skynet 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,10 @@
1
+ class Skynet
2
+ include SkynetDebugger
3
+ def self.new(options={})
4
+ if options[:worker_type] or ARGV.detect {|a| a =~ /worker_type/ }
5
+ Skynet::Worker.start(options)
6
+ else
7
+ Skynet::Manager.start(options)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,52 @@
1
+ # http://darwinweb.net/article/Undoing_Rails_Monkey_Patch_To_Logger
2
+
3
+ require 'logger'
4
+
5
+ class Skynet
6
+
7
+ class Error < StandardError
8
+ end
9
+
10
+ class Logger < ::Logger
11
+ if respond_to?(:format_message)
12
+ alias format_message old_format_message
13
+ end
14
+
15
+ @@log = nil
16
+
17
+ def self.get
18
+ if not @@log
19
+ @@log = self.new(Skynet::CONFIG[:SKYNET_LOG_FILE])
20
+ @@log.level = Skynet::CONFIG[:SKYNET_LOG_LEVEL]
21
+ end
22
+ @@log
23
+ end
24
+
25
+ def self.log=(log)
26
+ @@log = log
27
+ end
28
+ end
29
+
30
+ # This module can be mixed in to add logging methods to your class.
31
+ module Loggable
32
+ def debug
33
+ log = Skynet::Logger.get
34
+ end
35
+
36
+ def info
37
+ log = Skynet::Logger.get
38
+ end
39
+
40
+ def warn
41
+ log = Skynet::Logger.get
42
+ end
43
+
44
+ def error
45
+ log = Skynet::Logger.get
46
+ end
47
+
48
+ def fatal
49
+ log = Skynet::Logger.get
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,486 @@
1
+ class Skynet
2
+ begin
3
+ require 'fastthread'
4
+ rescue LoadError
5
+ # puts 'fastthread not installed, using thread instead'
6
+ require 'thread'
7
+ end
8
+
9
+ class Manager
10
+ include SkynetDebugger
11
+
12
+ Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS] ||= 0.7
13
+ Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS] ||= 0.2
14
+
15
+ def self.debug_class_desc
16
+ "MANAGER"
17
+ end
18
+
19
+
20
+ attr_accessor :required_libs
21
+
22
+ def initialize(script_path,workers_requested,adlibs=[])
23
+ info "Skynet Launcher Path: [#{@script_path}]"
24
+ @script_path = script_path
25
+ @mutex = Mutex.new
26
+ @workers_requested = workers_requested
27
+ @required_libs = adlibs
28
+ @number_of_workers = 0
29
+ @workers_by_type = {:master => [], :task => [], :any => []}
30
+ @signaled_workers = []
31
+ @workers_running = {}
32
+ @all_workers_started = false
33
+ end
34
+
35
+ def start_workers
36
+ setup_signals
37
+
38
+ starting = workers_to_start(@workers_requested)
39
+ warn "Starting #{starting} workers. #{@workers_requested - starting} already running."
40
+ add_worker(starting)
41
+ end
42
+
43
+ ### maybe workers_to_start should be a method
44
+ def workers_to_start(workers_to_start)
45
+ pids = worker_queue_pids
46
+ if not pids.empty?
47
+ pids.each do |worker_pid|
48
+ if worker_alive?(worker_pid)
49
+ @workers_running[worker_pid] = Time.now
50
+ @number_of_workers += 1
51
+ workers_to_start -= 1
52
+ else
53
+ take_worker_status(worker_pid)
54
+ end
55
+ return 0 if workers_to_start < 1
56
+ end
57
+ end
58
+ return workers_to_start
59
+ end
60
+
61
+ def check_started_workers
62
+ workers = []
63
+ begin
64
+ 50.times do |ii|
65
+ workers = worker_queue
66
+ warn "Checking started workers, #{workers.size} out of #{@number_of_workers} after the #{(ii+1)}th try..."
67
+ break if workers.size >= @number_of_workers
68
+ sleep (@number_of_workers - workers.size)
69
+ end
70
+ rescue Exception => e
71
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
72
+ end
73
+
74
+ @all_workers_started = true
75
+
76
+ warn "FINISHED STARTING ALL #{workers.size} WORKERS"
77
+ if workers.size > @number_of_workers
78
+ warn "EXPECTED #{@number_of_workers}"
79
+ @number_of_workers = workers.size
80
+ end
81
+ end
82
+
83
+ # the main application loop
84
+ def run
85
+ loop do
86
+ next unless @all_workers_started
87
+ begin
88
+ check_workers
89
+ sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
90
+ rescue SystemExit, Interrupt => e
91
+ fatal "Manager Exiting!"
92
+ exit
93
+ rescue Exception => e
94
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
95
+ end
96
+ end
97
+ end
98
+
99
+ def check_workers
100
+ q_pids = worker_queue_pids || []
101
+ info "Checking on #{@number_of_workers} workers..." unless @shutdown
102
+ check_running_pids(q_pids)
103
+ check_number_of_workers(q_pids)
104
+ true
105
+ end
106
+
107
+ def check_running_pids(q_pids)
108
+ if @workers_running.keys.size > q_pids.size
109
+ (@workers_running.keys - q_pids).each do |wpid|
110
+ error "Missing worker #{wpid} from worker queue. Removing and/or killing."
111
+ Process.kill("TERM",wpid) if worker_alive?(wpid)
112
+ @workers_running.delete(wpid)
113
+ q_pids.delete(wpid)
114
+ end
115
+ end
116
+
117
+ q_pids.each do |wpid|
118
+ if not worker_alive?(wpid)
119
+ error "Worker #{wpid} was in queue and but was not running. Removing from queue."
120
+ take_worker_status(wpid)
121
+ @workers_running.delete(wpid)
122
+ @number_of_workers -= 1
123
+ q_pids.delete(wpid)
124
+ end
125
+ end
126
+ q_pids
127
+ end
128
+
129
+ def worker_shutdown(q_pids)
130
+ if not @masters_dead
131
+ warn "Shutting down masters. #{q_pids.size} workers still running." if q_pids.size > 0
132
+ workers_to_kill = worker_queue.select do |w|
133
+ w.map_or_reduce == "master" and @workers_running.include?(w.process_id)
134
+ end
135
+
136
+ worker_pids_to_kill = workers_to_kill.collect { |w| w.process_id }
137
+ if worker_pids_to_kill and not worker_pids_to_kill.empty?
138
+ warn "FOUND MORE RUNNING MASTERS WE HAVEN'T KILLED:", worker_pids_to_kill
139
+ remove_worker(worker_pids_to_kill)
140
+ end
141
+
142
+ if not worker_queue.detect { |w| w.map_or_reduce == "master" }
143
+ signal_workers("INT")
144
+ @masters_dead = true
145
+ sleep 1
146
+ return check_number_of_workers(worker_queue_pids)
147
+ else
148
+ sleep 4
149
+ return check_number_of_workers(worker_queue_pids)
150
+ end
151
+ else
152
+ warn "Shutting down. #{q_pids.size} workers still running." if q_pids.size > 0
153
+ end
154
+ if q_pids.size < 1
155
+ info "No more workers running."
156
+ end
157
+ end
158
+
159
+ def check_number_of_workers(q_pids)
160
+ if @shutdown
161
+ worker_shutdown(q_pids)
162
+ if q_pids.size < 1
163
+ exit
164
+ end
165
+ elsif q_pids.size != @number_of_workers
166
+ if q_pids.size.to_f / @workers_requested.to_f < 0.85
167
+ starting = @workers_requested - q_pids.size
168
+ error "Expected #{@number_of_workers} workers. #{q_pids.size} running. Starting #{starting}"
169
+ @number_of_workers += starting
170
+ add_worker(starting)
171
+ else
172
+ error "Expected #{@number_of_workers} workers. #{q_pids.size} running."
173
+ @number_of_workers = q_pids.size
174
+ end
175
+ end
176
+
177
+ end
178
+
179
+ def take_worker_status(worker_process_id)
180
+ begin
181
+ mq.take_worker_status({
182
+ :hostname => hostname,
183
+ :process_id => worker_process_id
184
+ },0.00001)
185
+ rescue Skynet::QueueTimeout => e
186
+ error "Couldnt take worker status for #{hostname} #{worker_process_id}"
187
+ end
188
+ end
189
+
190
+ def worker_alive?(worker_pid)
191
+ begin
192
+ IO.popen("ps -o pid,command -p #{worker_pid}", "r") do |ps|
193
+ return ps.detect {|line| line =~ /worker_type/}
194
+ end
195
+ rescue Errno::ENOENT => e
196
+ return false
197
+ end
198
+ false
199
+ end
200
+
201
+
202
+ def add_workers(*args)
203
+ add_worker(*args)
204
+ end
205
+
206
+ def add_worker(workers=1)
207
+ num_task_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS]).to_i
208
+ num_master_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS]).to_i
209
+ warn "Adding #{workers} WORKERS. Task Workers: #{num_task_only_workers}, Master Workers: #{num_master_only_workers} Master & Task Workers: #{workers - num_task_only_workers - num_master_only_workers}"
210
+
211
+ @all_workers_started = false
212
+ worker_types = {:task => 0, :master => 0, :any => 0}
213
+ (1..workers).collect do |ii|
214
+ worker_type = :any
215
+ if (ii <= num_master_only_workers)
216
+ worker_type = :master
217
+ worker_types[:master] += 1
218
+ elsif (ii > num_master_only_workers and ii <= num_master_only_workers + num_task_only_workers)
219
+ worker_type = :task
220
+ worker_types[:task] += 1
221
+ else
222
+ worker_types[:any] += 1
223
+ end
224
+ cmd = "#{@script_path} --worker_type=#{worker_type}"
225
+ cmd << " -r #{required_libs.join(' -r ')}" if required_libs and not required_libs.empty?
226
+ wpid = self.fork_and_exec(cmd)
227
+ @workers_by_type[worker_type] ||= []
228
+ @workers_by_type[worker_type] << wpid
229
+ warn "Adding Worker ##{ii} PID: #{wpid} WORKER_TYPE?:#{worker_type}"
230
+ @mutex.synchronize do
231
+ @number_of_workers += 1
232
+ end
233
+ @workers_running[wpid] = Time.now
234
+ sleep 0.01
235
+ wpid
236
+ end
237
+ info "DISTRO", worker_types
238
+ check_started_workers
239
+ end
240
+
241
+ def remove_workers(workers=1)
242
+ pids = worker_queue_pids[0...workers]
243
+ remove_worker(pids)
244
+ end
245
+
246
+ def remove_worker(pids = nil)
247
+ pids = [pids] unless pids.kind_of?(Array)
248
+ info "Removing workers #{pids.join(",")} from worker queue. They will die gracefully when they finish what they're doing."
249
+ wq = worker_queue
250
+ pids.collect do |wpid|
251
+ @workers_running.delete(wpid)
252
+ @number_of_workers -= 1
253
+ @workers_running.delete(wpid)
254
+ warn "REMOVING WORKER #{wpid}"
255
+ # error "SHUTTING DOWN #{wpid} MR:",worker_queue.detect{|w|w.process_id == wpid}
256
+ @signaled_workers << wpid
257
+ Process.kill("INT",wpid)
258
+ end
259
+ pids
260
+ end
261
+
262
+ def signal_workers(signal,worker_type=nil)
263
+ worker_queue.each do |worker|
264
+ next if worker_type and not @workers_by_type[worker_type].include?(worker.process_id)
265
+ warn "SHUTTING DOWN #{worker.process_id} MR: #{worker.map_or_reduce}"
266
+ @workers_running.delete(worker.process_id)
267
+ Process.kill(signal,worker.process_id)
268
+ @signaled_workers << worker.process_id
269
+ end
270
+ end
271
+
272
+ def restart_all_workers
273
+ hostnames = {}
274
+ mq.read_all_worker_statuses.each do |status|
275
+ hostnames[status.hostname] = true
276
+ end
277
+ hostnames.keys.each do |remote_hostname|
278
+ manager = DRbObject.new(nil,"druby://#{remote_hostname}:40000")
279
+ manager.restart_workers
280
+ end
281
+ end
282
+
283
+ def hard_restart_workers
284
+ @all_workers_started = false
285
+ signal_workers("TERM")
286
+ @restart = true
287
+ signal_workers("INT",:master)
288
+ signal_workers("INT",:any)
289
+ sleep @number_of_workers
290
+ check_started_workers
291
+ end
292
+
293
+ def restart_workers
294
+ @all_workers_started = false
295
+ signal_workers("HUP")
296
+ @workers_running = {}
297
+ sleep @number_of_workers
298
+ check_started_workers
299
+ end
300
+
301
+ def setup_signals
302
+ Signal.trap("HUP") do
303
+ restart_workers
304
+ end
305
+ Signal.trap("TERM") do
306
+ if @term
307
+ terminate
308
+ else
309
+ @term=true
310
+ shutdown
311
+ end
312
+ end
313
+
314
+ Signal.trap("INT") do
315
+ if @shutdown
316
+ terminate
317
+ else
318
+ shutdown
319
+ end
320
+ end
321
+ end
322
+
323
+ def shutdown
324
+ info(:shutdown)
325
+ @shutdown = true
326
+ signal_workers("INT",:master)
327
+ signal_workers("INT",:any)
328
+ end
329
+
330
+ def terminate
331
+ info(:terminate)
332
+ signal_workers("TERM")
333
+ exit
334
+ end
335
+
336
+ def fork_and_exec(command)
337
+ pid = fork do
338
+ exec("/bin/sh -c \"#{command}\"")
339
+ exit
340
+ end
341
+ Process.detach(pid) if (pid != 0)
342
+ pid
343
+ end
344
+
345
+ def mq
346
+ @mq ||= Skynet::MessageQueue.new
347
+ end
348
+
349
+ def worker_queue
350
+ mq.read_all_worker_statuses(hostname)
351
+ end
352
+
353
+ def worker_queue_pids
354
+ worker_queue.collect {|w| w.process_id}
355
+ end
356
+
357
+ def worker_pids
358
+ worker_queue_pids
359
+ end
360
+
361
+ def parent_pid
362
+ $$
363
+ end
364
+
365
+ def hostname
366
+ @machine_name ||= Socket.gethostname
367
+ end
368
+
369
+ def ping
370
+ true
371
+ end
372
+
373
+ def self.start(options={})
374
+ options[:add_workers] ||= nil
375
+ options[:remove_workers] ||= nil
376
+ options[:use_rails] ||= false
377
+ options[:required_libs] ||= []
378
+ OptionParser.new do |opt|
379
+ opt.banner = "Usage: skynet [options]"
380
+ opt.on('', '--restart-all-workers', 'Restart All Workers') do |v|
381
+ puts "Restarting ALL workers on ALL machines."
382
+ begin
383
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
384
+ manager.restart_all_workers
385
+ exit
386
+ rescue DRb::DRbConnError => e
387
+ puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
388
+ exit
389
+ end
390
+ end
391
+ opt.on('', '--restart-workers', 'Restart Workers') do |v|
392
+ puts "Restarting workers on this machine."
393
+ begin
394
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
395
+ manager.restart_workers
396
+ exit
397
+ rescue DRb::DRbConnError => e
398
+ puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
399
+ exit
400
+ end
401
+ end
402
+ opt.on('-i', '--increment-worker-version', 'Increment Worker Version') do |v|
403
+ ver = Skynet::MessageQueue.new.increment_worker_version
404
+ puts "Incrementing Worker Version to #{ver}"
405
+ exit
406
+ end
407
+ opt.on('-a', '--add-workers WORKERS', 'Number of workers to add.') do |v|
408
+ options[:add_workers] = v.to_i
409
+ end
410
+ opt.on('-k', '--remove-workers WORKERS', 'Number of workers to remove.') do |v|
411
+ options[:remove_workers] = v.to_i
412
+ end
413
+ opt.on('-w', '--workers WORKERS', 'Number of workers to start.') do |v|
414
+ options[:workers] = v.to_i
415
+ end
416
+ opt.on('-r', '--required LIBRARY', 'Require the specified libraries') do |v|
417
+ options[:required_libs] << File.expand_path(v)
418
+ end
419
+
420
+ opt.parse!(ARGV)
421
+ end
422
+
423
+ options[:workers] ||= Skynet::CONFIG[:NUMBER_OF_WORKERS] || 4
424
+ options[:pid_file] ||= File.dirname(Skynet::CONFIG[:SKYNET_PIDS_FILE]) + "/skynet_worker.pid"
425
+
426
+ options[:required_libs].each do |adlib|
427
+ begin
428
+ require adlib
429
+ rescue MissingSourceFile => e
430
+ error "The included lib #{adlib} was not found: #{e.inspect}"
431
+ exit
432
+ end
433
+ end
434
+
435
+ # Handle add or remove workers
436
+ if options[:add_workers] or options[:remove_workers]
437
+ begin
438
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
439
+ if options[:add_workers]
440
+ pids = manager.add_worker(options[:add_workers])
441
+ warn "ADDING #{options[:add_workers]} workers PIDS: #{pids.inspect}"
442
+ elsif options[:remove_workers]
443
+ pids = manager.remove_workers(options[:remove_workers])
444
+ warn "REMOVING #{options[:remove_workers]} workers PIDS: #{pids.inspect}"
445
+ end
446
+ rescue DRb::DRbConnError => e
447
+ warn "Couldnt add or remove workers. There are probably no workers running. At least I couldn't find a skynet_manager around at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} #{e.inspect}"
448
+ rescue Exception => e
449
+ warn "Couldnt add or remove workers #{e.inspect} #{e.backtrace.join("\n")}"
450
+ end
451
+ exit
452
+
453
+ else
454
+
455
+ begin
456
+ debug "Making sure there's an available MessageQueue"
457
+ ts = Skynet::MessageQueue.new
458
+ rescue Skynet::ConnectionError => e
459
+ fatal "Couldn't get MessageQueue! #{e.message}"
460
+ raise Skynet::ConnectionError.new("ERROR! Couldn't get MessageQueue! #{e.message}")
461
+ end
462
+
463
+ debug "CONTINUING TO START : There IS an available MessageQueue", options
464
+
465
+ # create main pid file
466
+ File.open(options[:pid_file], 'w') do |file|
467
+ file.puts($$)
468
+ end
469
+
470
+ begin
471
+ info "STARTING THE MANAGER!!!!!!!!!!!"
472
+ @manager = Skynet::Manager.new(Skynet::CONFIG[:LAUNCHER_PATH],options[:workers],options[:required_libs])
473
+ DRb.start_service(Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL], @manager)
474
+ info "WORKER MANAGER URI: #{DRb.uri}"
475
+ @manager.start_workers
476
+ @manager.run
477
+ DRb.thread.join
478
+ rescue SystemExit, Interrupt
479
+ rescue Exception => e
480
+ fatal("Error in Manager. Manager Dying. #{e.inspect} #{e.backtrace}")
481
+ end
482
+ end
483
+ end
484
+
485
+ end
486
+ end