skynet 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,10 @@
1
+ class Skynet
2
+ include SkynetDebugger
3
+ def self.new(options={})
4
+ if options[:worker_type] or ARGV.detect {|a| a =~ /worker_type/ }
5
+ Skynet::Worker.start(options)
6
+ else
7
+ Skynet::Manager.start(options)
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,52 @@
1
+ # http://darwinweb.net/article/Undoing_Rails_Monkey_Patch_To_Logger
2
+
3
+ require 'logger'
4
+
5
+ class Skynet
6
+
7
+ class Error < StandardError
8
+ end
9
+
10
+ class Logger < ::Logger
11
+ if respond_to?(:format_message)
12
+ alias format_message old_format_message
13
+ end
14
+
15
+ @@log = nil
16
+
17
+ def self.get
18
+ if not @@log
19
+ @@log = self.new(Skynet::CONFIG[:SKYNET_LOG_FILE])
20
+ @@log.level = Skynet::CONFIG[:SKYNET_LOG_LEVEL]
21
+ end
22
+ @@log
23
+ end
24
+
25
+ def self.log=(log)
26
+ @@log = log
27
+ end
28
+ end
29
+
30
+ # This module can be mixed in to add logging methods to your class.
31
+ module Loggable
32
+ def debug
33
+ log = Skynet::Logger.get
34
+ end
35
+
36
+ def info
37
+ log = Skynet::Logger.get
38
+ end
39
+
40
+ def warn
41
+ log = Skynet::Logger.get
42
+ end
43
+
44
+ def error
45
+ log = Skynet::Logger.get
46
+ end
47
+
48
+ def fatal
49
+ log = Skynet::Logger.get
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,486 @@
1
+ class Skynet
2
+ begin
3
+ require 'fastthread'
4
+ rescue LoadError
5
+ # puts 'fastthread not installed, using thread instead'
6
+ require 'thread'
7
+ end
8
+
9
+ class Manager
10
+ include SkynetDebugger
11
+
12
+ Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS] ||= 0.7
13
+ Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS] ||= 0.2
14
+
15
+ def self.debug_class_desc
16
+ "MANAGER"
17
+ end
18
+
19
+
20
+ attr_accessor :required_libs
21
+
22
+ def initialize(script_path,workers_requested,adlibs=[])
23
+ info "Skynet Launcher Path: [#{@script_path}]"
24
+ @script_path = script_path
25
+ @mutex = Mutex.new
26
+ @workers_requested = workers_requested
27
+ @required_libs = adlibs
28
+ @number_of_workers = 0
29
+ @workers_by_type = {:master => [], :task => [], :any => []}
30
+ @signaled_workers = []
31
+ @workers_running = {}
32
+ @all_workers_started = false
33
+ end
34
+
35
+ def start_workers
36
+ setup_signals
37
+
38
+ starting = workers_to_start(@workers_requested)
39
+ warn "Starting #{starting} workers. #{@workers_requested - starting} already running."
40
+ add_worker(starting)
41
+ end
42
+
43
+ ### maybe workers_to_start should be a method
44
+ def workers_to_start(workers_to_start)
45
+ pids = worker_queue_pids
46
+ if not pids.empty?
47
+ pids.each do |worker_pid|
48
+ if worker_alive?(worker_pid)
49
+ @workers_running[worker_pid] = Time.now
50
+ @number_of_workers += 1
51
+ workers_to_start -= 1
52
+ else
53
+ take_worker_status(worker_pid)
54
+ end
55
+ return 0 if workers_to_start < 1
56
+ end
57
+ end
58
+ return workers_to_start
59
+ end
60
+
61
+ def check_started_workers
62
+ workers = []
63
+ begin
64
+ 50.times do |ii|
65
+ workers = worker_queue
66
+ warn "Checking started workers, #{workers.size} out of #{@number_of_workers} after the #{(ii+1)}th try..."
67
+ break if workers.size >= @number_of_workers
68
+ sleep (@number_of_workers - workers.size)
69
+ end
70
+ rescue Exception => e
71
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
72
+ end
73
+
74
+ @all_workers_started = true
75
+
76
+ warn "FINISHED STARTING ALL #{workers.size} WORKERS"
77
+ if workers.size > @number_of_workers
78
+ warn "EXPECTED #{@number_of_workers}"
79
+ @number_of_workers = workers.size
80
+ end
81
+ end
82
+
83
+ # the main application loop
84
+ def run
85
+ loop do
86
+ next unless @all_workers_started
87
+ begin
88
+ check_workers
89
+ sleep Skynet::CONFIG[:WORKER_CHECK_DELAY]
90
+ rescue SystemExit, Interrupt => e
91
+ fatal "Manager Exiting!"
92
+ exit
93
+ rescue Exception => e
94
+ fatal "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
95
+ end
96
+ end
97
+ end
98
+
99
+ def check_workers
100
+ q_pids = worker_queue_pids || []
101
+ info "Checking on #{@number_of_workers} workers..." unless @shutdown
102
+ check_running_pids(q_pids)
103
+ check_number_of_workers(q_pids)
104
+ true
105
+ end
106
+
107
+ def check_running_pids(q_pids)
108
+ if @workers_running.keys.size > q_pids.size
109
+ (@workers_running.keys - q_pids).each do |wpid|
110
+ error "Missing worker #{wpid} from worker queue. Removing and/or killing."
111
+ Process.kill("TERM",wpid) if worker_alive?(wpid)
112
+ @workers_running.delete(wpid)
113
+ q_pids.delete(wpid)
114
+ end
115
+ end
116
+
117
+ q_pids.each do |wpid|
118
+ if not worker_alive?(wpid)
119
+ error "Worker #{wpid} was in queue and but was not running. Removing from queue."
120
+ take_worker_status(wpid)
121
+ @workers_running.delete(wpid)
122
+ @number_of_workers -= 1
123
+ q_pids.delete(wpid)
124
+ end
125
+ end
126
+ q_pids
127
+ end
128
+
129
+ def worker_shutdown(q_pids)
130
+ if not @masters_dead
131
+ warn "Shutting down masters. #{q_pids.size} workers still running." if q_pids.size > 0
132
+ workers_to_kill = worker_queue.select do |w|
133
+ w.map_or_reduce == "master" and @workers_running.include?(w.process_id)
134
+ end
135
+
136
+ worker_pids_to_kill = workers_to_kill.collect { |w| w.process_id }
137
+ if worker_pids_to_kill and not worker_pids_to_kill.empty?
138
+ warn "FOUND MORE RUNNING MASTERS WE HAVEN'T KILLED:", worker_pids_to_kill
139
+ remove_worker(worker_pids_to_kill)
140
+ end
141
+
142
+ if not worker_queue.detect { |w| w.map_or_reduce == "master" }
143
+ signal_workers("INT")
144
+ @masters_dead = true
145
+ sleep 1
146
+ return check_number_of_workers(worker_queue_pids)
147
+ else
148
+ sleep 4
149
+ return check_number_of_workers(worker_queue_pids)
150
+ end
151
+ else
152
+ warn "Shutting down. #{q_pids.size} workers still running." if q_pids.size > 0
153
+ end
154
+ if q_pids.size < 1
155
+ info "No more workers running."
156
+ end
157
+ end
158
+
159
+ def check_number_of_workers(q_pids)
160
+ if @shutdown
161
+ worker_shutdown(q_pids)
162
+ if q_pids.size < 1
163
+ exit
164
+ end
165
+ elsif q_pids.size != @number_of_workers
166
+ if q_pids.size.to_f / @workers_requested.to_f < 0.85
167
+ starting = @workers_requested - q_pids.size
168
+ error "Expected #{@number_of_workers} workers. #{q_pids.size} running. Starting #{starting}"
169
+ @number_of_workers += starting
170
+ add_worker(starting)
171
+ else
172
+ error "Expected #{@number_of_workers} workers. #{q_pids.size} running."
173
+ @number_of_workers = q_pids.size
174
+ end
175
+ end
176
+
177
+ end
178
+
179
+ def take_worker_status(worker_process_id)
180
+ begin
181
+ mq.take_worker_status({
182
+ :hostname => hostname,
183
+ :process_id => worker_process_id
184
+ },0.00001)
185
+ rescue Skynet::QueueTimeout => e
186
+ error "Couldnt take worker status for #{hostname} #{worker_process_id}"
187
+ end
188
+ end
189
+
190
+ def worker_alive?(worker_pid)
191
+ begin
192
+ IO.popen("ps -o pid,command -p #{worker_pid}", "r") do |ps|
193
+ return ps.detect {|line| line =~ /worker_type/}
194
+ end
195
+ rescue Errno::ENOENT => e
196
+ return false
197
+ end
198
+ false
199
+ end
200
+
201
+
202
+ def add_workers(*args)
203
+ add_worker(*args)
204
+ end
205
+
206
+ def add_worker(workers=1)
207
+ num_task_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_TASK_ONLY_WORKERS]).to_i
208
+ num_master_only_workers = (workers * Skynet::CONFIG[:PERCENTAGE_OF_MASTER_ONLY_WORKERS]).to_i
209
+ warn "Adding #{workers} WORKERS. Task Workers: #{num_task_only_workers}, Master Workers: #{num_master_only_workers} Master & Task Workers: #{workers - num_task_only_workers - num_master_only_workers}"
210
+
211
+ @all_workers_started = false
212
+ worker_types = {:task => 0, :master => 0, :any => 0}
213
+ (1..workers).collect do |ii|
214
+ worker_type = :any
215
+ if (ii <= num_master_only_workers)
216
+ worker_type = :master
217
+ worker_types[:master] += 1
218
+ elsif (ii > num_master_only_workers and ii <= num_master_only_workers + num_task_only_workers)
219
+ worker_type = :task
220
+ worker_types[:task] += 1
221
+ else
222
+ worker_types[:any] += 1
223
+ end
224
+ cmd = "#{@script_path} --worker_type=#{worker_type}"
225
+ cmd << " -r #{required_libs.join(' -r ')}" if required_libs and not required_libs.empty?
226
+ wpid = self.fork_and_exec(cmd)
227
+ @workers_by_type[worker_type] ||= []
228
+ @workers_by_type[worker_type] << wpid
229
+ warn "Adding Worker ##{ii} PID: #{wpid} WORKER_TYPE?:#{worker_type}"
230
+ @mutex.synchronize do
231
+ @number_of_workers += 1
232
+ end
233
+ @workers_running[wpid] = Time.now
234
+ sleep 0.01
235
+ wpid
236
+ end
237
+ info "DISTRO", worker_types
238
+ check_started_workers
239
+ end
240
+
241
+ def remove_workers(workers=1)
242
+ pids = worker_queue_pids[0...workers]
243
+ remove_worker(pids)
244
+ end
245
+
246
+ def remove_worker(pids = nil)
247
+ pids = [pids] unless pids.kind_of?(Array)
248
+ info "Removing workers #{pids.join(",")} from worker queue. They will die gracefully when they finish what they're doing."
249
+ wq = worker_queue
250
+ pids.collect do |wpid|
251
+ @workers_running.delete(wpid)
252
+ @number_of_workers -= 1
253
+ @workers_running.delete(wpid)
254
+ warn "REMOVING WORKER #{wpid}"
255
+ # error "SHUTTING DOWN #{wpid} MR:",worker_queue.detect{|w|w.process_id == wpid}
256
+ @signaled_workers << wpid
257
+ Process.kill("INT",wpid)
258
+ end
259
+ pids
260
+ end
261
+
262
+ def signal_workers(signal,worker_type=nil)
263
+ worker_queue.each do |worker|
264
+ next if worker_type and not @workers_by_type[worker_type].include?(worker.process_id)
265
+ warn "SHUTTING DOWN #{worker.process_id} MR: #{worker.map_or_reduce}"
266
+ @workers_running.delete(worker.process_id)
267
+ Process.kill(signal,worker.process_id)
268
+ @signaled_workers << worker.process_id
269
+ end
270
+ end
271
+
272
+ def restart_all_workers
273
+ hostnames = {}
274
+ mq.read_all_worker_statuses.each do |status|
275
+ hostnames[status.hostname] = true
276
+ end
277
+ hostnames.keys.each do |remote_hostname|
278
+ manager = DRbObject.new(nil,"druby://#{remote_hostname}:40000")
279
+ manager.restart_workers
280
+ end
281
+ end
282
+
283
+ def hard_restart_workers
284
+ @all_workers_started = false
285
+ signal_workers("TERM")
286
+ @restart = true
287
+ signal_workers("INT",:master)
288
+ signal_workers("INT",:any)
289
+ sleep @number_of_workers
290
+ check_started_workers
291
+ end
292
+
293
+ def restart_workers
294
+ @all_workers_started = false
295
+ signal_workers("HUP")
296
+ @workers_running = {}
297
+ sleep @number_of_workers
298
+ check_started_workers
299
+ end
300
+
301
+ def setup_signals
302
+ Signal.trap("HUP") do
303
+ restart_workers
304
+ end
305
+ Signal.trap("TERM") do
306
+ if @term
307
+ terminate
308
+ else
309
+ @term=true
310
+ shutdown
311
+ end
312
+ end
313
+
314
+ Signal.trap("INT") do
315
+ if @shutdown
316
+ terminate
317
+ else
318
+ shutdown
319
+ end
320
+ end
321
+ end
322
+
323
+ def shutdown
324
+ info(:shutdown)
325
+ @shutdown = true
326
+ signal_workers("INT",:master)
327
+ signal_workers("INT",:any)
328
+ end
329
+
330
+ def terminate
331
+ info(:terminate)
332
+ signal_workers("TERM")
333
+ exit
334
+ end
335
+
336
+ def fork_and_exec(command)
337
+ pid = fork do
338
+ exec("/bin/sh -c \"#{command}\"")
339
+ exit
340
+ end
341
+ Process.detach(pid) if (pid != 0)
342
+ pid
343
+ end
344
+
345
+ def mq
346
+ @mq ||= Skynet::MessageQueue.new
347
+ end
348
+
349
+ def worker_queue
350
+ mq.read_all_worker_statuses(hostname)
351
+ end
352
+
353
+ def worker_queue_pids
354
+ worker_queue.collect {|w| w.process_id}
355
+ end
356
+
357
+ def worker_pids
358
+ worker_queue_pids
359
+ end
360
+
361
+ def parent_pid
362
+ $$
363
+ end
364
+
365
+ def hostname
366
+ @machine_name ||= Socket.gethostname
367
+ end
368
+
369
+ def ping
370
+ true
371
+ end
372
+
373
+ def self.start(options={})
374
+ options[:add_workers] ||= nil
375
+ options[:remove_workers] ||= nil
376
+ options[:use_rails] ||= false
377
+ options[:required_libs] ||= []
378
+ OptionParser.new do |opt|
379
+ opt.banner = "Usage: skynet [options]"
380
+ opt.on('', '--restart-all-workers', 'Restart All Workers') do |v|
381
+ puts "Restarting ALL workers on ALL machines."
382
+ begin
383
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
384
+ manager.restart_all_workers
385
+ exit
386
+ rescue DRb::DRbConnError => e
387
+ puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
388
+ exit
389
+ end
390
+ end
391
+ opt.on('', '--restart-workers', 'Restart Workers') do |v|
392
+ puts "Restarting workers on this machine."
393
+ begin
394
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
395
+ manager.restart_workers
396
+ exit
397
+ rescue DRb::DRbConnError => e
398
+ puts "No manager running at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} ERROR: #{e.inspect}"
399
+ exit
400
+ end
401
+ end
402
+ opt.on('-i', '--increment-worker-version', 'Increment Worker Version') do |v|
403
+ ver = Skynet::MessageQueue.new.increment_worker_version
404
+ puts "Incrementing Worker Version to #{ver}"
405
+ exit
406
+ end
407
+ opt.on('-a', '--add-workers WORKERS', 'Number of workers to add.') do |v|
408
+ options[:add_workers] = v.to_i
409
+ end
410
+ opt.on('-k', '--remove-workers WORKERS', 'Number of workers to remove.') do |v|
411
+ options[:remove_workers] = v.to_i
412
+ end
413
+ opt.on('-w', '--workers WORKERS', 'Number of workers to start.') do |v|
414
+ options[:workers] = v.to_i
415
+ end
416
+ opt.on('-r', '--required LIBRARY', 'Require the specified libraries') do |v|
417
+ options[:required_libs] << File.expand_path(v)
418
+ end
419
+
420
+ opt.parse!(ARGV)
421
+ end
422
+
423
+ options[:workers] ||= Skynet::CONFIG[:NUMBER_OF_WORKERS] || 4
424
+ options[:pid_file] ||= File.dirname(Skynet::CONFIG[:SKYNET_PIDS_FILE]) + "/skynet_worker.pid"
425
+
426
+ options[:required_libs].each do |adlib|
427
+ begin
428
+ require adlib
429
+ rescue MissingSourceFile => e
430
+ error "The included lib #{adlib} was not found: #{e.inspect}"
431
+ exit
432
+ end
433
+ end
434
+
435
+ # Handle add or remove workers
436
+ if options[:add_workers] or options[:remove_workers]
437
+ begin
438
+ manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
439
+ if options[:add_workers]
440
+ pids = manager.add_worker(options[:add_workers])
441
+ warn "ADDING #{options[:add_workers]} workers PIDS: #{pids.inspect}"
442
+ elsif options[:remove_workers]
443
+ pids = manager.remove_workers(options[:remove_workers])
444
+ warn "REMOVING #{options[:remove_workers]} workers PIDS: #{pids.inspect}"
445
+ end
446
+ rescue DRb::DRbConnError => e
447
+ warn "Couldnt add or remove workers. There are probably no workers running. At least I couldn't find a skynet_manager around at #{Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL]} #{e.inspect}"
448
+ rescue Exception => e
449
+ warn "Couldnt add or remove workers #{e.inspect} #{e.backtrace.join("\n")}"
450
+ end
451
+ exit
452
+
453
+ else
454
+
455
+ begin
456
+ debug "Making sure there's an available MessageQueue"
457
+ ts = Skynet::MessageQueue.new
458
+ rescue Skynet::ConnectionError => e
459
+ fatal "Couldn't get MessageQueue! #{e.message}"
460
+ raise Skynet::ConnectionError.new("ERROR! Couldn't get MessageQueue! #{e.message}")
461
+ end
462
+
463
+ debug "CONTINUING TO START : There IS an available MessageQueue", options
464
+
465
+ # create main pid file
466
+ File.open(options[:pid_file], 'w') do |file|
467
+ file.puts($$)
468
+ end
469
+
470
+ begin
471
+ info "STARTING THE MANAGER!!!!!!!!!!!"
472
+ @manager = Skynet::Manager.new(Skynet::CONFIG[:LAUNCHER_PATH],options[:workers],options[:required_libs])
473
+ DRb.start_service(Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL], @manager)
474
+ info "WORKER MANAGER URI: #{DRb.uri}"
475
+ @manager.start_workers
476
+ @manager.run
477
+ DRb.thread.join
478
+ rescue SystemExit, Interrupt
479
+ rescue Exception => e
480
+ fatal("Error in Manager. Manager Dying. #{e.inspect} #{e.backtrace}")
481
+ end
482
+ end
483
+ end
484
+
485
+ end
486
+ end