skynet 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +65 -0
- data/README.txt +100 -0
- data/Rakefile +4 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
- data/app_generators/skynet_install/templates/migration.rb +60 -0
- data/app_generators/skynet_install/templates/skynet +33 -0
- data/app_generators/skynet_install/templates/skynet_console +16 -0
- data/bin/skynet +20 -0
- data/bin/skynet_console +9 -0
- data/bin/skynet_install +12 -0
- data/bin/skynet_tuplespace_server +53 -0
- data/config/hoe.rb +74 -0
- data/config/requirements.rb +17 -0
- data/lib/skynet.rb +34 -0
- data/lib/skynet/mapreduce_test.rb +25 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
- data/lib/skynet/skynet_active_record_extensions.rb +237 -0
- data/lib/skynet/skynet_config.rb +59 -0
- data/lib/skynet/skynet_console.rb +34 -0
- data/lib/skynet/skynet_console_helper.rb +59 -0
- data/lib/skynet/skynet_debugger.rb +84 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +607 -0
- data/lib/skynet/skynet_launcher.rb +10 -0
- data/lib/skynet/skynet_logger.rb +52 -0
- data/lib/skynet/skynet_manager.rb +486 -0
- data/lib/skynet/skynet_message.rb +366 -0
- data/lib/skynet/skynet_message_queue.rb +100 -0
- data/lib/skynet/skynet_ruby_extensions.rb +36 -0
- data/lib/skynet/skynet_task.rb +76 -0
- data/lib/skynet/skynet_tuplespace_server.rb +82 -0
- data/lib/skynet/skynet_worker.rb +395 -0
- data/lib/skynet/version.rb +9 -0
- data/log/debug.log +0 -0
- data/log/skynet.log +29 -0
- data/log/skynet_tuplespace_server.log +7 -0
- data/log/skynet_worker.pid +1 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/sometest.rb +23 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/all_models_test.rb +139 -0
- data/test/mysql_message_queue_adaptor_test.rb +199 -0
- data/test/skynet_manager_test.rb +107 -0
- data/test/skynet_message_test.rb +42 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +2 -0
- data/test/test_skynet.rb +11 -0
- data/test/test_skynet_install_generator.rb +53 -0
- data/test/tuplespace_message_queue_test.rb +179 -0
- data/tmtags +1242 -0
- data/website/index.html +93 -0
- data/website/index.txt +39 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +129 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
# http://darwinweb.net/article/Undoing_Rails_Monkey_Patch_To_Logger
|
2
|
+
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
class Skynet
|
6
|
+
|
7
|
+
class Error < StandardError
|
8
|
+
end
|
9
|
+
|
10
|
+
class Logger < ::Logger
|
11
|
+
if respond_to?(:format_message)
|
12
|
+
alias format_message old_format_message
|
13
|
+
end
|
14
|
+
|
15
|
+
@@log = nil
|
16
|
+
|
17
|
+
def self.get
|
18
|
+
if not @@log
|
19
|
+
@@log = self.new(Skynet::CONFIG[:SKYNET_LOG_FILE])
|
20
|
+
@@log.level = Skynet::CONFIG[:SKYNET_LOG_LEVEL]
|
21
|
+
end
|
22
|
+
@@log
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.log=(log)
|
26
|
+
@@log = log
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# This module can be mixed in to add logging methods to your class.
|
31
|
+
module Loggable
|
32
|
+
def debug
|
33
|
+
log = Skynet::Logger.get
|
34
|
+
end
|
35
|
+
|
36
|
+
def info
|
37
|
+
log = Skynet::Logger.get
|
38
|
+
end
|
39
|
+
|
40
|
+
def warn
|
41
|
+
log = Skynet::Logger.get
|
42
|
+
end
|
43
|
+
|
44
|
+
def error
|
45
|
+
log = Skynet::Logger.get
|
46
|
+
end
|
47
|
+
|
48
|
+
def fatal
|
49
|
+
log = Skynet::Logger.get
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,486 @@
|
|
1
|
+
class Skynet
|
2
|
+
begin
|
3
|
+
require 'fastthread'
|
4
|
+
rescue LoadError
|
5
|
+
# puts 'fastthread not installed, using thread instead'
|
6
|
+
require 'thread'
|
7
|
+
end
|
8
|
+
|
9
|
+
class Manager
|
10
|
+
include SkynetDebugger
|
11
|
+
|
12
|
+
Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS] ||= 0.7
|
13
|
+
Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS] ||= 0.2
|
14
|
+
|
15
|
+
def self.debug_class_desc
|
16
|
+
"MANAGER"
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
attr_accessor :required_libs
|
21
|
+
|
22
|
+
def initialize(script_path,workers_requested,adlibs=[])
|
23
|
+
info "Skynet Launcher Path: [#{@script_path}]"
|
24
|
+
@script_path = script_path
|
25
|
+
@mutex = Mutex.new
|
26
|
+
@workers_requested = workers_requested
|
27
|
+
@required_libs = adlibs
|
28
|
+
@number_of_workers = 0
|
29
|
+
@workers_by_type = {:master => [], :task => [], :any => []}
|
30
|
+
@signaled_workers = []
|
31
|
+
@workers_running = {}
|
32
|
+
@all_workers_started = false
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_workers
|
36
|
+
setup_signals
|
37
|
+
|
38
|
+
starting = workers_to_start(@workers_requested)
|
39
|
+
warn "Starting #{starting} workers. #{@workers_requested - starting} already running."
|
40
|
+
add_worker(starting)
|
41
|
+
end
|
42
|
+
|
43
|
+
### maybe workers_to_start should be a method
|
44
|
+
def workers_to_start(workers_to_start)
|
45
|
+
pids = worker_queue_pids
|
46
|
+
if not pids.empty?
|
47
|
+
pids.each do |worker_pid|
|
48
|
+
if worker_alive?(worker_pid)
|
49
|
+
@workers_running[worker_pid] = Time.now
|
50
|
+
@number_of_workers += 1
|
51
|
+
workers_to_start -= 1
|
52
|
+
else
|
53
|
+
take_worker_status(worker_pid)
|
54
|
+
end
|
55
|
+
return 0 if workers_to_start < 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
return workers_to_start
|
59
|
+
end
|
60
|
+
|
61
|
+
def check_started_workers
|
62
|
+
workers = []
|
63
|
+
begin
|
64
|
+
50.times do |ii|
|
65
|
+
workers = worker_queue
|
66
|
+
warn "Checking started workers, #{workers.size} out of #{@number_of_workers} after the #{(ii+1)}th try..."
|
67
|
+
break if workers.size >= @number_of_workers
|
68
|
+
sleep (@number_of_workers - workers.size)
|
69
|
+
end
|
70
|
+
rescue Exception => e
|
71
|
+
fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
|
72
|
+
end
|
73
|
+
|
74
|
+
@all_workers_started = true
|
75
|
+
|
76
|
+
warn "FINISHED STARTING ALL #{workers.size} WORKERS"
|
77
|
+
if workers.size > @number_of_workers
|
78
|
+
warn "EXPECTED #{@number_of_workers}"
|
79
|
+
@number_of_workers = workers.size
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# the main application loop
|
84
|
+
def run
|
85
|
+
loop do
|
86
|
+
next unless @all_workers_started
|
87
|
+
begin
|
88
|
+
check_workers
|
89
|
+
sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
|
90
|
+
rescue SystemExit, Interrupt => e
|
91
|
+
fatal "Manager Exiting!"
|
92
|
+
exit
|
93
|
+
rescue Exception => e
|
94
|
+
fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def check_workers
|
100
|
+
q_pids = worker_queue_pids || []
|
101
|
+
info "Checking on #{@number_of_workers} workers..." unless @shutdown
|
102
|
+
check_running_pids(q_pids)
|
103
|
+
check_number_of_workers(q_pids)
|
104
|
+
true
|
105
|
+
end
|
106
|
+
|
107
|
+
def check_running_pids(q_pids)
|
108
|
+
if @workers_running.keys.size > q_pids.size
|
109
|
+
(@workers_running.keys - q_pids).each do |wpid|
|
110
|
+
error "Missing worker #{wpid} from worker queue. Removing and/or killing."
|
111
|
+
Process.kill("TERM",wpid) if worker_alive?(wpid)
|
112
|
+
@workers_running.delete(wpid)
|
113
|
+
q_pids.delete(wpid)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
q_pids.each do |wpid|
|
118
|
+
if not worker_alive?(wpid)
|
119
|
+
error "Worker #{wpid} was in queue and but was not running. Removing from queue."
|
120
|
+
take_worker_status(wpid)
|
121
|
+
@workers_running.delete(wpid)
|
122
|
+
@number_of_workers -= 1
|
123
|
+
q_pids.delete(wpid)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
q_pids
|
127
|
+
end
|
128
|
+
|
129
|
+
def worker_shutdown(q_pids)
|
130
|
+
if not @masters_dead
|
131
|
+
warn "Shutting down masters. #{q_pids.size} workers still running." if q_pids.size > 0
|
132
|
+
workers_to_kill = worker_queue.select do |w|
|
133
|
+
w.map_or_reduce == "master" and @workers_running.include?(w.process_id)
|
134
|
+
end
|
135
|
+
|
136
|
+
worker_pids_to_kill = workers_to_kill.collect { |w| w.process_id }
|
137
|
+
if worker_pids_to_kill and not worker_pids_to_kill.empty?
|
138
|
+
warn "FOUND MORE RUNNING MASTERS WE HAVEN'T KILLED:", worker_pids_to_kill
|
139
|
+
remove_worker(worker_pids_to_kill)
|
140
|
+
end
|
141
|
+
|
142
|
+
if not worker_queue.detect { |w| w.map_or_reduce == "master" }
|
143
|
+
signal_workers("INT")
|
144
|
+
@masters_dead = true
|
145
|
+
sleep 1
|
146
|
+
return check_number_of_workers(worker_queue_pids)
|
147
|
+
else
|
148
|
+
sleep 4
|
149
|
+
return check_number_of_workers(worker_queue_pids)
|
150
|
+
end
|
151
|
+
else
|
152
|
+
warn "Shutting down. #{q_pids.size} workers still running." if q_pids.size > 0
|
153
|
+
end
|
154
|
+
if q_pids.size < 1
|
155
|
+
info "No more workers running."
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def check_number_of_workers(q_pids)
|
160
|
+
if @shutdown
|
161
|
+
worker_shutdown(q_pids)
|
162
|
+
if q_pids.size < 1
|
163
|
+
exit
|
164
|
+
end
|
165
|
+
elsif q_pids.size != @number_of_workers
|
166
|
+
if q_pids.size.to_f / @workers_requested.to_f < 0.85
|
167
|
+
starting = @workers_requested - q_pids.size
|
168
|
+
error "Expected #{@number_of_workers} workers. #{q_pids.size} running. Starting #{starting}"
|
169
|
+
@number_of_workers += starting
|
170
|
+
add_worker(starting)
|
171
|
+
else
|
172
|
+
error "Expected #{@number_of_workers} workers. #{q_pids.size} running."
|
173
|
+
@number_of_workers = q_pids.size
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
def take_worker_status(worker_process_id)
|
180
|
+
begin
|
181
|
+
mq.take_worker_status({
|
182
|
+
:hostname => hostname,
|
183
|
+
:process_id => worker_process_id
|
184
|
+
},0.00001)
|
185
|
+
rescue Skynet::QueueTimeout => e
|
186
|
+
error "Couldnt take worker status for #{hostname} #{worker_process_id}"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def worker_alive?(worker_pid)
|
191
|
+
begin
|
192
|
+
IO.popen("ps -o pid,command -p #{worker_pid}", "r") do |ps|
|
193
|
+
return ps.detect {|line| line =~ /worker_type/}
|
194
|
+
end
|
195
|
+
rescue Errno::ENOENT => e
|
196
|
+
return false
|
197
|
+
end
|
198
|
+
false
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
def add_workers(*args)
|
203
|
+
add_worker(*args)
|
204
|
+
end
|
205
|
+
|
206
|
+
def add_worker(workers=1)
|
207
|
+
num_task_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS]).to_i
|
208
|
+
num_master_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS]).to_i
|
209
|
+
warn "Adding #{workers} WORKERS. Task Workers: #{num_task_only_workers}, Master Workers: #{num_master_only_workers} Master & Task Workers: #{workers - num_task_only_workers - num_master_only_workers}"
|
210
|
+
|
211
|
+
@all_workers_started = false
|
212
|
+
worker_types = {:task => 0, :master => 0, :any => 0}
|
213
|
+
(1..workers).collect do |ii|
|
214
|
+
worker_type = :any
|
215
|
+
if (ii <= num_master_only_workers)
|
216
|
+
worker_type = :master
|
217
|
+
worker_types[:master] += 1
|
218
|
+
elsif (ii > num_master_only_workers and ii <= num_master_only_workers + num_task_only_workers)
|
219
|
+
worker_type = :task
|
220
|
+
worker_types[:task] += 1
|
221
|
+
else
|
222
|
+
worker_types[:any] += 1
|
223
|
+
end
|
224
|
+
cmd = "#{@script_path} --worker_type=#{worker_type}"
|
225
|
+
cmd << " -r #{required_libs.join(' -r ')}" if required_libs and not required_libs.empty?
|
226
|
+
wpid = self.fork_and_exec(cmd)
|
227
|
+
@workers_by_type[worker_type] ||= []
|
228
|
+
@workers_by_type[worker_type] << wpid
|
229
|
+
warn "Adding Worker ##{ii} PID: #{wpid} WORKER_TYPE?:#{worker_type}"
|
230
|
+
@mutex.synchronize do
|
231
|
+
@number_of_workers += 1
|
232
|
+
end
|
233
|
+
@workers_running[wpid] = Time.now
|
234
|
+
sleep 0.01
|
235
|
+
wpid
|
236
|
+
end
|
237
|
+
info "DISTRO", worker_types
|
238
|
+
check_started_workers
|
239
|
+
end
|
240
|
+
|
241
|
+
def remove_workers(workers=1)
|
242
|
+
pids = worker_queue_pids[0...workers]
|
243
|
+
remove_worker(pids)
|
244
|
+
end
|
245
|
+
|
246
|
+
def remove_worker(pids = nil)
|
247
|
+
pids = [pids] unless pids.kind_of?(Array)
|
248
|
+
info "Removing workers #{pids.join(",")} from worker queue. They will die gracefully when they finish what they're doing."
|
249
|
+
wq = worker_queue
|
250
|
+
pids.collect do |wpid|
|
251
|
+
@workers_running.delete(wpid)
|
252
|
+
@number_of_workers -= 1
|
253
|
+
@workers_running.delete(wpid)
|
254
|
+
warn "REMOVING WORKER #{wpid}"
|
255
|
+
# error "SHUTTING DOWN #{wpid} MR:",worker_queue.detect{|w|w.process_id == wpid}
|
256
|
+
@signaled_workers << wpid
|
257
|
+
Process.kill("INT",wpid)
|
258
|
+
end
|
259
|
+
pids
|
260
|
+
end
|
261
|
+
|
262
|
+
def signal_workers(signal,worker_type=nil)
|
263
|
+
worker_queue.each do |worker|
|
264
|
+
next if worker_type and not @workers_by_type[worker_type].include?(worker.process_id)
|
265
|
+
warn "SHUTTING DOWN #{worker.process_id} MR: #{worker.map_or_reduce}"
|
266
|
+
@workers_running.delete(worker.process_id)
|
267
|
+
Process.kill(signal,worker.process_id)
|
268
|
+
@signaled_workers << worker.process_id
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def restart_all_workers
|
273
|
+
hostnames = {}
|
274
|
+
mq.read_all_worker_statuses.each do |status|
|
275
|
+
hostnames[status.hostname] = true
|
276
|
+
end
|
277
|
+
hostnames.keys.each do |remote_hostname|
|
278
|
+
manager = DRbObject.new(nil,"druby://#{remote_hostname}:40000")
|
279
|
+
manager.restart_workers
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def hard_restart_workers
|
284
|
+
@all_workers_started = false
|
285
|
+
signal_workers("TERM")
|
286
|
+
@restart = true
|
287
|
+
signal_workers("INT",:master)
|
288
|
+
signal_workers("INT",:any)
|
289
|
+
sleep @number_of_workers
|
290
|
+
check_started_workers
|
291
|
+
end
|
292
|
+
|
293
|
+
def restart_workers
|
294
|
+
@all_workers_started = false
|
295
|
+
signal_workers("HUP")
|
296
|
+
@workers_running = {}
|
297
|
+
sleep @number_of_workers
|
298
|
+
check_started_workers
|
299
|
+
end
|
300
|
+
|
301
|
+
def setup_signals
|
302
|
+
Signal.trap("HUP") do
|
303
|
+
restart_workers
|
304
|
+
end
|
305
|
+
Signal.trap("TERM") do
|
306
|
+
if @term
|
307
|
+
terminate
|
308
|
+
else
|
309
|
+
@term=true
|
310
|
+
shutdown
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
Signal.trap("INT") do
|
315
|
+
if @shutdown
|
316
|
+
terminate
|
317
|
+
else
|
318
|
+
shutdown
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def shutdown
|
324
|
+
info(:shutdown)
|
325
|
+
@shutdown = true
|
326
|
+
signal_workers("INT",:master)
|
327
|
+
signal_workers("INT",:any)
|
328
|
+
end
|
329
|
+
|
330
|
+
def terminate
|
331
|
+
info(:terminate)
|
332
|
+
signal_workers("TERM")
|
333
|
+
exit
|
334
|
+
end
|
335
|
+
|
336
|
+
def fork_and_exec(command)
|
337
|
+
pid = fork do
|
338
|
+
exec("/bin/sh -c \"#{command}\"")
|
339
|
+
exit
|
340
|
+
end
|
341
|
+
Process.detach(pid) if (pid != 0)
|
342
|
+
pid
|
343
|
+
end
|
344
|
+
|
345
|
+
def mq
|
346
|
+
@mq ||= Skynet::MessageQueue.new
|
347
|
+
end
|
348
|
+
|
349
|
+
def worker_queue
|
350
|
+
mq.read_all_worker_statuses(hostname)
|
351
|
+
end
|
352
|
+
|
353
|
+
def worker_queue_pids
|
354
|
+
worker_queue.collect {|w| w.process_id}
|
355
|
+
end
|
356
|
+
|
357
|
+
def worker_pids
|
358
|
+
worker_queue_pids
|
359
|
+
end
|
360
|
+
|
361
|
+
def parent_pid
|
362
|
+
$$
|
363
|
+
end
|
364
|
+
|
365
|
+
def hostname
|
366
|
+
@machine_name ||= Socket.gethostname
|
367
|
+
end
|
368
|
+
|
369
|
+
def ping
|
370
|
+
true
|
371
|
+
end
|
372
|
+
|
373
|
+
def self.start(options={})
|
374
|
+
options[:add_workers] ||= nil
|
375
|
+
options[:remove_workers] ||= nil
|
376
|
+
options[:use_rails] ||= false
|
377
|
+
options[:required_libs] ||= []
|
378
|
+
OptionParser.new do |opt|
|
379
|
+
opt.banner = "Usage: skynet [options]"
|
380
|
+
opt.on('', '--restart-all-workers', 'Restart All Workers') do |v|
|
381
|
+
puts "Restarting ALL workers on ALL machines."
|
382
|
+
begin
|
383
|
+
manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
|
384
|
+
manager.restart_all_workers
|
385
|
+
exit
|
386
|
+
rescue DRb::DRbConnError => e
|
387
|
+
puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
|
388
|
+
exit
|
389
|
+
end
|
390
|
+
end
|
391
|
+
opt.on('', '--restart-workers', 'Restart Workers') do |v|
|
392
|
+
puts "Restarting workers on this machine."
|
393
|
+
begin
|
394
|
+
manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
|
395
|
+
manager.restart_workers
|
396
|
+
exit
|
397
|
+
rescue DRb::DRbConnError => e
|
398
|
+
puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
|
399
|
+
exit
|
400
|
+
end
|
401
|
+
end
|
402
|
+
opt.on('-i', '--increment-worker-version', 'Increment Worker Version') do |v|
|
403
|
+
ver = Skynet::MessageQueue.new.increment_worker_version
|
404
|
+
puts "Incrementing Worker Version to #{ver}"
|
405
|
+
exit
|
406
|
+
end
|
407
|
+
opt.on('-a', '--add-workers WORKERS', 'Number of workers to add.') do |v|
|
408
|
+
options[:add_workers] = v.to_i
|
409
|
+
end
|
410
|
+
opt.on('-k', '--remove-workers WORKERS', 'Number of workers to remove.') do |v|
|
411
|
+
options[:remove_workers] = v.to_i
|
412
|
+
end
|
413
|
+
opt.on('-w', '--workers WORKERS', 'Number of workers to start.') do |v|
|
414
|
+
options[:workers] = v.to_i
|
415
|
+
end
|
416
|
+
opt.on('-r', '--required LIBRARY', 'Require the specified libraries') do |v|
|
417
|
+
options[:required_libs] << File.expand_path(v)
|
418
|
+
end
|
419
|
+
|
420
|
+
opt.parse!(ARGV)
|
421
|
+
end
|
422
|
+
|
423
|
+
options[:workers] ||= Skynet::CONFIG[:NUMBER_OF_WORKERS] || 4
|
424
|
+
options[:pid_file] ||= File.dirname(Skynet::CONFIG[:SKYNET_PIDS_FILE]) + "/skynet_worker.pid"
|
425
|
+
|
426
|
+
options[:required_libs].each do |adlib|
|
427
|
+
begin
|
428
|
+
require adlib
|
429
|
+
rescue MissingSourceFile => e
|
430
|
+
error "The included lib #{adlib} was not found: #{e.inspect}"
|
431
|
+
exit
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
# Handle add or remove workers
|
436
|
+
if options[:add_workers] or options[:remove_workers]
|
437
|
+
begin
|
438
|
+
manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
|
439
|
+
if options[:add_workers]
|
440
|
+
pids = manager.add_worker(options[:add_workers])
|
441
|
+
warn "ADDING #{options[:add_workers]} workers PIDS: #{pids.inspect}"
|
442
|
+
elsif options[:remove_workers]
|
443
|
+
pids = manager.remove_workers(options[:remove_workers])
|
444
|
+
warn "REMOVING #{options[:remove_workers]} workers PIDS: #{pids.inspect}"
|
445
|
+
end
|
446
|
+
rescue DRb::DRbConnError => e
|
447
|
+
warn "Couldnt add or remove workers. There are probably no workers running. At least I couldn't find a skynet_manager around at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} #{e.inspect}"
|
448
|
+
rescue Exception => e
|
449
|
+
warn "Couldnt add or remove workers #{e.inspect} #{e.backtrace.join("\n")}"
|
450
|
+
end
|
451
|
+
exit
|
452
|
+
|
453
|
+
else
|
454
|
+
|
455
|
+
begin
|
456
|
+
debug "Making sure there's an available MessageQueue"
|
457
|
+
ts = Skynet::MessageQueue.new
|
458
|
+
rescue Skynet::ConnectionError => e
|
459
|
+
fatal "Couldn't get MessageQueue! #{e.message}"
|
460
|
+
raise Skynet::ConnectionError.new("ERROR! Couldn't get MessageQueue! #{e.message}")
|
461
|
+
end
|
462
|
+
|
463
|
+
debug "CONTINUING TO START : There IS an available MessageQueue", options
|
464
|
+
|
465
|
+
# create main pid file
|
466
|
+
File.open(options[:pid_file], 'w') do |file|
|
467
|
+
file.puts($$)
|
468
|
+
end
|
469
|
+
|
470
|
+
begin
|
471
|
+
info "STARTING THE MANAGER!!!!!!!!!!!"
|
472
|
+
@manager = Skynet::Manager.new(Skynet::CONFIG[:LAUNCHER_PATH],options[:workers],options[:required_libs])
|
473
|
+
DRb.start_service(Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL], @manager)
|
474
|
+
info "WORKER MANAGER URI: #{DRb.uri}"
|
475
|
+
@manager.start_workers
|
476
|
+
@manager.run
|
477
|
+
DRb.thread.join
|
478
|
+
rescue SystemExit, Interrupt
|
479
|
+
rescue Exception => e
|
480
|
+
fatal("Error in Manager. Manager Dying. #{e.inspect} #{e.backtrace}")
|
481
|
+
end
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
end
|