skynet 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +65 -0
- data/README.txt +100 -0
- data/Rakefile +4 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
- data/app_generators/skynet_install/templates/migration.rb +60 -0
- data/app_generators/skynet_install/templates/skynet +33 -0
- data/app_generators/skynet_install/templates/skynet_console +16 -0
- data/bin/skynet +20 -0
- data/bin/skynet_console +9 -0
- data/bin/skynet_install +12 -0
- data/bin/skynet_tuplespace_server +53 -0
- data/config/hoe.rb +74 -0
- data/config/requirements.rb +17 -0
- data/lib/skynet.rb +34 -0
- data/lib/skynet/mapreduce_test.rb +25 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
- data/lib/skynet/skynet_active_record_extensions.rb +237 -0
- data/lib/skynet/skynet_config.rb +59 -0
- data/lib/skynet/skynet_console.rb +34 -0
- data/lib/skynet/skynet_console_helper.rb +59 -0
- data/lib/skynet/skynet_debugger.rb +84 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +607 -0
- data/lib/skynet/skynet_launcher.rb +10 -0
- data/lib/skynet/skynet_logger.rb +52 -0
- data/lib/skynet/skynet_manager.rb +486 -0
- data/lib/skynet/skynet_message.rb +366 -0
- data/lib/skynet/skynet_message_queue.rb +100 -0
- data/lib/skynet/skynet_ruby_extensions.rb +36 -0
- data/lib/skynet/skynet_task.rb +76 -0
- data/lib/skynet/skynet_tuplespace_server.rb +82 -0
- data/lib/skynet/skynet_worker.rb +395 -0
- data/lib/skynet/version.rb +9 -0
- data/log/debug.log +0 -0
- data/log/skynet.log +29 -0
- data/log/skynet_tuplespace_server.log +7 -0
- data/log/skynet_worker.pid +1 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/sometest.rb +23 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/all_models_test.rb +139 -0
- data/test/mysql_message_queue_adaptor_test.rb +199 -0
- data/test/skynet_manager_test.rb +107 -0
- data/test/skynet_message_test.rb +42 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +2 -0
- data/test/test_skynet.rb +11 -0
- data/test/test_skynet_install_generator.rb +53 -0
- data/test/tuplespace_message_queue_test.rb +179 -0
- data/tmtags +1242 -0
- data/website/index.html +93 -0
- data/website/index.txt +39 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +129 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module Enumerable
|
2
|
+
def mapreduce(klass=nil,options={},&block)
|
3
|
+
data = []
|
4
|
+
if self.is_a?(Hash)
|
5
|
+
self.each {|k,v| data << {k => v}}
|
6
|
+
else
|
7
|
+
data = self
|
8
|
+
end
|
9
|
+
jobopts = {
|
10
|
+
:map_tasks => 20000,
|
11
|
+
:map_data => data,
|
12
|
+
:name => "#{klass} Enumerable MASTER",
|
13
|
+
:map_name => "#{klass} Enumerable MAP",
|
14
|
+
:reduce_name => "#{klass} Enumerable REDUCE",
|
15
|
+
:map_timeout => 3600,
|
16
|
+
:reduce_timeout => 3600,
|
17
|
+
:master_timeout => 3600,
|
18
|
+
:master_result_timeout => 3600,
|
19
|
+
:async => false
|
20
|
+
}
|
21
|
+
|
22
|
+
jobopts[:map_reduce_class] = klass.to_s if klass
|
23
|
+
|
24
|
+
options.each { |k,v| jobopts[k] = v }
|
25
|
+
if block_given?
|
26
|
+
jobopts[:map] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
if block_given? or not jobopts[:async]
|
30
|
+
job = Skynet::Job.new(jobopts)
|
31
|
+
else
|
32
|
+
job = Skynet::AsyncJob.new(jobopts)
|
33
|
+
end
|
34
|
+
job.run
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Task
|
3
|
+
|
4
|
+
include SkynetDebugger
|
5
|
+
|
6
|
+
# require 'ostruct'
|
7
|
+
|
8
|
+
class ConstructorError < StandardError
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :data, :process, :result, :map_or_reduce
|
12
|
+
attr_accessor :name, :tuple, :result_timeout
|
13
|
+
|
14
|
+
@@log = nil
|
15
|
+
|
16
|
+
def self.debug_class_desc
|
17
|
+
"TASK"
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(opts = {})
|
21
|
+
unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
|
22
|
+
raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
|
23
|
+
end
|
24
|
+
@marshalable = true
|
25
|
+
@task_id = opts[:task_id].to_i
|
26
|
+
@data = opts[:data]
|
27
|
+
self.process = opts[:process]
|
28
|
+
@name = opts[:name]
|
29
|
+
@map_or_reduce = opts[:map_or_reduce]
|
30
|
+
@result_timeout = opts[:result_timeout]
|
31
|
+
end
|
32
|
+
|
33
|
+
def process=(process)
|
34
|
+
if process.is_a?(Proc)
|
35
|
+
@marshalable = false
|
36
|
+
end
|
37
|
+
@process = process
|
38
|
+
end
|
39
|
+
|
40
|
+
def can_marshal?
|
41
|
+
@marshalable
|
42
|
+
end
|
43
|
+
|
44
|
+
def task_or_master
|
45
|
+
if @map_or_reduce == :master
|
46
|
+
@map_or_reduce
|
47
|
+
else
|
48
|
+
:task
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def task_id
|
53
|
+
@task_id.to_i
|
54
|
+
end
|
55
|
+
|
56
|
+
def run
|
57
|
+
debug "running task #{name} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
|
58
|
+
begin
|
59
|
+
if @process.class == Proc
|
60
|
+
debug " - #{@map_or_reduce} using Proc"
|
61
|
+
@process.call @data
|
62
|
+
elsif @map_or_reduce == :master
|
63
|
+
debug " - as master"
|
64
|
+
job = Skynet::Job.new(@process)
|
65
|
+
job.run
|
66
|
+
elsif @process.class == String
|
67
|
+
debug " - #{@map_or_reduce} using class #{@process}"
|
68
|
+
@process.constantize.send(@map_or_reduce,@data)
|
69
|
+
end
|
70
|
+
rescue Exception => e
|
71
|
+
error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end ## END class Task
|
76
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
|
3
|
+
# Rinda RingServer
|
4
|
+
|
5
|
+
require 'rinda/ring'
|
6
|
+
require 'rinda/tuplespace'
|
7
|
+
require 'rubygems'
|
8
|
+
require 'logger'
|
9
|
+
require 'optparse'
|
10
|
+
require 'pp'
|
11
|
+
|
12
|
+
class Rinda::TupleSpaceProxy
|
13
|
+
def take(tuple, sec=nil, &block)
|
14
|
+
port = []
|
15
|
+
port.push @ts.move(nil, tuple, sec, &block)
|
16
|
+
port[0]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Rinda::Tuple
|
21
|
+
|
22
|
+
require 'ostruct'
|
23
|
+
|
24
|
+
def init_with_ary(ary)
|
25
|
+
if ary.instance_of?(DRb::DRbUnknown)
|
26
|
+
begin
|
27
|
+
Marshal.load(ary.buf)
|
28
|
+
rescue Exception => e
|
29
|
+
raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
|
30
|
+
end
|
31
|
+
else
|
32
|
+
@tuple = Array.new(ary.size)
|
33
|
+
@tuple.size.times do |i|
|
34
|
+
@tuple[i] = ary[i]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Skynet
|
41
|
+
class Task
|
42
|
+
end
|
43
|
+
class Message
|
44
|
+
class Payload
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class AsyncJob
|
49
|
+
end
|
50
|
+
|
51
|
+
class Job
|
52
|
+
end
|
53
|
+
|
54
|
+
class Server
|
55
|
+
|
56
|
+
def initialize(options)
|
57
|
+
log = Logger.new(options[:logfile], 'weekly')
|
58
|
+
log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__)
|
59
|
+
log.info "STARTING SKYNET SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
|
60
|
+
|
61
|
+
# Create a TupleSpace to hold named services, and start running
|
62
|
+
begin
|
63
|
+
ts = Rinda::TupleSpace.new
|
64
|
+
if options[:drburi]
|
65
|
+
DRb.start_service(options[:drburi], ts)
|
66
|
+
else
|
67
|
+
DRb.start_service
|
68
|
+
end
|
69
|
+
tuple = [:name,:TupleSpace, ts, 'Tuple Space']
|
70
|
+
renewer = Rinda::SimpleRenewer.new
|
71
|
+
ring_ts = Rinda::TupleSpace.new
|
72
|
+
ring_ts.write(tuple, renewer)
|
73
|
+
|
74
|
+
server = Rinda::RingServer.new(ring_ts, options[:port])
|
75
|
+
DRb.thread.join
|
76
|
+
rescue Exception, RuntimeError => e
|
77
|
+
log.fatal "Couldn't start Skynet Server #{e.inspect}"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,395 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Worker
|
3
|
+
|
4
|
+
include SkynetDebugger
|
5
|
+
include Skynet::GuidGenerator
|
6
|
+
|
7
|
+
RETRY_TIME = 2
|
8
|
+
VERSION_CHECK_DELAY = 5
|
9
|
+
MAX_MEMORY = 500
|
10
|
+
MEMORY_CHECK_DELAY = 30
|
11
|
+
MANAGER_PING_INTERVAL = 60
|
12
|
+
|
13
|
+
attr_accessor :message,:task, :mq, :processed
|
14
|
+
attr_reader :worker_id, :worker_info, :worker_type
|
15
|
+
|
16
|
+
class Error < StandardError
|
17
|
+
end
|
18
|
+
|
19
|
+
class RespawnWorker < Skynet::Error
|
20
|
+
end
|
21
|
+
|
22
|
+
class ConnectionFailure < Skynet::Error
|
23
|
+
end
|
24
|
+
|
25
|
+
class NoManagerError < Skynet::Error
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.debug_class_desc
|
29
|
+
"WORKER-#{$$}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(worker_type=:any)
|
33
|
+
@worker_id = get_unique_id(1).to_i
|
34
|
+
@mq = Skynet::MessageQueue.new
|
35
|
+
@worker_type = worker_type.to_sym
|
36
|
+
@processed = 0
|
37
|
+
debug "THIS WORKER TAKES #{worker_type}"
|
38
|
+
|
39
|
+
@worker_info = {
|
40
|
+
:hostname => hostname,
|
41
|
+
:process_id => process_id,
|
42
|
+
:worker_type => payload_type,
|
43
|
+
:worker_id => worker_id,
|
44
|
+
:version => mq.get_worker_version
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def process_id
|
49
|
+
$$
|
50
|
+
end
|
51
|
+
|
52
|
+
def hostname
|
53
|
+
@machine_name ||= Socket.gethostname
|
54
|
+
end
|
55
|
+
|
56
|
+
def version
|
57
|
+
@curver
|
58
|
+
end
|
59
|
+
|
60
|
+
def new_version_respawn?
|
61
|
+
if !@verchecktime
|
62
|
+
@verchecktime = Time.now
|
63
|
+
begin
|
64
|
+
@curver = mq.get_worker_version
|
65
|
+
debug "FINDING INITIAL VER #{@curver}"
|
66
|
+
rescue Skynet::RequestExpiredError => e
|
67
|
+
warn "NO INITIAL VER IN MQ using 1"
|
68
|
+
@curver = 1
|
69
|
+
end
|
70
|
+
else
|
71
|
+
if Time.now < (@verchecktime + VERSION_CHECK_DELAY)
|
72
|
+
return false
|
73
|
+
else
|
74
|
+
@verchecktime = Time.now
|
75
|
+
begin
|
76
|
+
newver = mq.get_worker_version
|
77
|
+
# debug "CURVER #{@curver} NEWVER: #{newver}"
|
78
|
+
if newver != @curver
|
79
|
+
info "RESTARTING WORKER ON PID #{$$}"
|
80
|
+
return true
|
81
|
+
end
|
82
|
+
rescue Skynet::RequestExpiredError => e
|
83
|
+
warn "NO CURRENT WORKER REV IN MQ still using 1"
|
84
|
+
mq.set_worker_version(1)
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
def take_worker_status
|
94
|
+
begin
|
95
|
+
mq.take_worker_status(@worker_info,0.00001)
|
96
|
+
rescue Skynet::RequestExpiredError, Skynet::QueueTimeout => e
|
97
|
+
error "Couldnt take worker status for #{hostname} pid: #{process_id}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def notify_worker_started
|
102
|
+
mq.write_worker_status(
|
103
|
+
@worker_info.merge({
|
104
|
+
:name => "waiting for #{@worker_type}",
|
105
|
+
:processed => 0,
|
106
|
+
:started_at => Time.now.to_i
|
107
|
+
})
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def notify_task_begun(task)
|
112
|
+
task[:processed] = @processed
|
113
|
+
task[:started_at] = Time.now.to_i
|
114
|
+
mq.write_worker_status(@worker_info.merge(task))
|
115
|
+
end
|
116
|
+
|
117
|
+
def notify_task_complete
|
118
|
+
@processed += 1
|
119
|
+
|
120
|
+
mq.write_worker_status(
|
121
|
+
@worker_info.merge({
|
122
|
+
:task_id => 0,
|
123
|
+
:job_id => 0,
|
124
|
+
:name => "waiting for #{@worker_type}",
|
125
|
+
:processed => @processed,
|
126
|
+
:map_or_reduce => nil,
|
127
|
+
:started_at => Time.now.to_i
|
128
|
+
})
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
def notify_worker_stop
|
133
|
+
info "Worker #{process_id} stopping..."
|
134
|
+
take_worker_status
|
135
|
+
end
|
136
|
+
|
137
|
+
def payload_type
|
138
|
+
return nil if worker_type == :any
|
139
|
+
return worker_type
|
140
|
+
end
|
141
|
+
|
142
|
+
def start
|
143
|
+
exceptions = 0
|
144
|
+
conerror = 0
|
145
|
+
@curver = nil
|
146
|
+
# setup signal handlers for manager
|
147
|
+
Signal.trap("HUP") { @respawn = true }
|
148
|
+
Signal.trap("TERM") do
|
149
|
+
if @die
|
150
|
+
exit
|
151
|
+
else
|
152
|
+
@die = true
|
153
|
+
end
|
154
|
+
end
|
155
|
+
Signal.trap("INT") { @die = true }
|
156
|
+
|
157
|
+
raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
|
158
|
+
|
159
|
+
info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
|
160
|
+
|
161
|
+
notify_worker_started
|
162
|
+
|
163
|
+
message = nil
|
164
|
+
task = nil
|
165
|
+
|
166
|
+
loop do
|
167
|
+
message = nil
|
168
|
+
begin
|
169
|
+
if @die
|
170
|
+
exit
|
171
|
+
elsif @respawn
|
172
|
+
raise Skynet::Worker::RespawnWorker.new
|
173
|
+
end
|
174
|
+
|
175
|
+
if local_mem = max_memory_reached?
|
176
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{MAX_MEMORY}")
|
177
|
+
end
|
178
|
+
|
179
|
+
if conerror > 0
|
180
|
+
@mq = Skynet::MessageQueue.new
|
181
|
+
warn "WORKER RECONNECTED AFTER #{conerror} tries"
|
182
|
+
conerror = 0
|
183
|
+
end
|
184
|
+
|
185
|
+
# debug "1 START LOOPSSS at VER #{@curver}"
|
186
|
+
#
|
187
|
+
# debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
|
188
|
+
# message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
|
189
|
+
message = mq.take_next_task(@curver,0.00001,payload_type)
|
190
|
+
|
191
|
+
next unless message.respond_to?(:payload)
|
192
|
+
|
193
|
+
task = message.payload
|
194
|
+
error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
|
195
|
+
|
196
|
+
info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
|
197
|
+
debug "STEP 2.1 message=", message.to_a
|
198
|
+
# info "STEP 3 GOT TASK taskid: #{task.task_id}"
|
199
|
+
# debug "STEP 3.1 task=", task
|
200
|
+
next unless task
|
201
|
+
# maybe instead of putting a time in the future, it puts the start time and an offset in seconds
|
202
|
+
|
203
|
+
# task.debug "taking task #{task.task_id} name:#{task.name}..."
|
204
|
+
|
205
|
+
info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
206
|
+
notify_task_begun({
|
207
|
+
:job_id => message.job_id,
|
208
|
+
:task_id => message.task_id,
|
209
|
+
:iteration => message.iteration,
|
210
|
+
:name => message.name,
|
211
|
+
:map_or_reduce => task.map_or_reduce
|
212
|
+
})
|
213
|
+
result = task.run
|
214
|
+
|
215
|
+
info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
216
|
+
debug "STEP 5.1 RESULT DATA:", result
|
217
|
+
|
218
|
+
## XXX need better result timeout
|
219
|
+
result_message = mq.write_result(message,result,task.result_timeout)
|
220
|
+
info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
221
|
+
# debug "STEP 6.1 RESULT_MESSAGE:", result_message
|
222
|
+
notify_task_complete
|
223
|
+
rescue Skynet::Worker::RespawnWorker => e
|
224
|
+
info "Respawning and taking worker status"
|
225
|
+
notify_worker_stop
|
226
|
+
raise e
|
227
|
+
rescue Skynet::RequestExpiredError => e
|
228
|
+
# debug "request expired"
|
229
|
+
if new_version_respawn?
|
230
|
+
notify_worker_stop
|
231
|
+
raise Skynet::Worker::RespawnWorker.new
|
232
|
+
end
|
233
|
+
sleep 1
|
234
|
+
# debug "WORKER [#{$$}] LOOPING AGAIN"
|
235
|
+
next
|
236
|
+
rescue Skynet::ConnectionError, DRb::DRbConnError => e
|
237
|
+
conerror += 1
|
238
|
+
retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
|
239
|
+
error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
|
240
|
+
@mq = nil
|
241
|
+
sleep retry_time
|
242
|
+
if conerror > 20
|
243
|
+
fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
|
244
|
+
notify_worker_stop
|
245
|
+
raise e
|
246
|
+
end
|
247
|
+
next
|
248
|
+
rescue NoManagerError => e
|
249
|
+
fatal e.message
|
250
|
+
break
|
251
|
+
rescue Interrupt, SystemExit => e
|
252
|
+
warn "Exiting..."
|
253
|
+
notify_worker_stop
|
254
|
+
break
|
255
|
+
rescue Exception => e
|
256
|
+
error "#{e.inspect} #{e.backtrace.join("\n")}"
|
257
|
+
#mq.take(@next_worker_message.task_template,0.0005) if message
|
258
|
+
if message
|
259
|
+
mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
|
260
|
+
else
|
261
|
+
# what do we do here
|
262
|
+
# mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
263
|
+
end
|
264
|
+
# mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
265
|
+
next
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
@@ok_to_mem_check = false
|
271
|
+
@@lastmem = nil
|
272
|
+
@@memct = 0
|
273
|
+
|
274
|
+
def max_memory_reached?
|
275
|
+
return false unless ok_to_mem_check?
|
276
|
+
if !@memchecktime
|
277
|
+
@memchecktime = Time.now
|
278
|
+
return false
|
279
|
+
elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
|
280
|
+
@memchecktime = Time.now
|
281
|
+
local_mem = get_memory_size.to_i
|
282
|
+
return local_mem if local_mem > MAX_MEMORY
|
283
|
+
else
|
284
|
+
false
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def find_pid_size(file, format=:notpretty)
|
289
|
+
begin
|
290
|
+
open(file).each { |line|
|
291
|
+
if line.index('VmSize')
|
292
|
+
temp = line[7..-5].strip.to_f/1000
|
293
|
+
return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
|
294
|
+
return temp
|
295
|
+
end
|
296
|
+
}
|
297
|
+
rescue Exception => e
|
298
|
+
warn "ERROR #{e.inspect}"
|
299
|
+
'0'
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def get_memory_size
|
304
|
+
find_pid_size("/proc/self/status")
|
305
|
+
end
|
306
|
+
|
307
|
+
def ok_to_mem_check?
|
308
|
+
return true if @@ok_to_mem_check == true
|
309
|
+
return false if @@ok_to_mem_check == :notok
|
310
|
+
if File.exists?('/proc/self/status')
|
311
|
+
@@lastmem ||= get_memory_size.to_i
|
312
|
+
return @@ok_to_mem_check = true
|
313
|
+
else
|
314
|
+
@@ok_to_mem_check = :notok
|
315
|
+
return false
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
|
320
|
+
# kinda like system() but gives me back a pid
|
321
|
+
def self.fork_and_exec(command)
|
322
|
+
sleep 0.01 # remove contention on manager drb object
|
323
|
+
log = Skynet::Logger.get
|
324
|
+
info "executing /bin/sh -c \"#{command}\""
|
325
|
+
pid = fork do
|
326
|
+
exec("/bin/sh -c \"#{command}\"")
|
327
|
+
exit
|
328
|
+
end
|
329
|
+
Process.detach(pid)
|
330
|
+
pid
|
331
|
+
end
|
332
|
+
|
333
|
+
def self.start(options={})
|
334
|
+
options[:worker_type] ||= :any
|
335
|
+
options[:required_libs] ||= []
|
336
|
+
|
337
|
+
OptionParser.new do |opt|
|
338
|
+
opt.banner = "Usage: worker [options]"
|
339
|
+
opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
|
340
|
+
options[:required_libs] << v
|
341
|
+
end
|
342
|
+
opt.on('-ot', '--worker_type WORKERTYPE', "master, task or any") do |v|
|
343
|
+
if ["any","master","task"].include?(v)
|
344
|
+
options[:worker_type] = v
|
345
|
+
else
|
346
|
+
raise Skynet::Error.new("#{v} is not a valid worker_type")
|
347
|
+
end
|
348
|
+
end
|
349
|
+
opt.parse!(ARGV)
|
350
|
+
end
|
351
|
+
|
352
|
+
options[:required_libs].each do |adlib|
|
353
|
+
begin
|
354
|
+
require adlib
|
355
|
+
rescue MissingSourceFile => e
|
356
|
+
error "The included lib #{adlib} was not found: #{e.inspect}"
|
357
|
+
exit
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# worker_script_path = (Skynet::CONFIG[:WORKER_SCRIPT_PATH] || File.dirname(__FILE__)) << "/skynet_worker"
|
362
|
+
|
363
|
+
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
|
364
|
+
|
365
|
+
begin
|
366
|
+
worker = Skynet::Worker.new(options[:worker_type])
|
367
|
+
worker.start
|
368
|
+
rescue Skynet::Worker::NoManagerError => e
|
369
|
+
fatal e.message
|
370
|
+
exit
|
371
|
+
rescue Skynet::Worker::RespawnWorker => e
|
372
|
+
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
|
373
|
+
cmd = "RAILS_ENV=#{RAILS_ENV} ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]}"
|
374
|
+
cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
|
375
|
+
pid = fork_and_exec(cmd)
|
376
|
+
warn "parent_pid: #{$$}, child_pid: #{pid}"
|
377
|
+
exit
|
378
|
+
rescue SystemExit
|
379
|
+
info "WORKER #{$$} EXITING GRACEFULLY"
|
380
|
+
rescue Exception => e
|
381
|
+
fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
|
382
|
+
report = ExceptionReport.new(e)
|
383
|
+
report.save
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
class ExceptionReport
|
390
|
+
def initialize(*args)
|
391
|
+
end
|
392
|
+
|
393
|
+
def save
|
394
|
+
end
|
395
|
+
end
|