skynet 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,36 @@
1
+ module Enumerable
2
+ def mapreduce(klass=nil,options={},&block)
3
+ data = []
4
+ if self.is_a?(Hash)
5
+ self.each {|k,v| data << {k => v}}
6
+ else
7
+ data = self
8
+ end
9
+ jobopts = {
10
+ :map_tasks => 20000,
11
+ :map_data => data,
12
+ :name => "#{klass} Enumerable MASTER",
13
+ :map_name => "#{klass} Enumerable MAP",
14
+ :reduce_name => "#{klass} Enumerable REDUCE",
15
+ :map_timeout => 3600,
16
+ :reduce_timeout => 3600,
17
+ :master_timeout => 3600,
18
+ :master_result_timeout => 3600,
19
+ :async => false
20
+ }
21
+
22
+ jobopts[:map_reduce_class] = klass.to_s if klass
23
+
24
+ options.each { |k,v| jobopts[k] = v }
25
+ if block_given?
26
+ jobopts[:map] = block
27
+ end
28
+
29
+ if block_given? or not jobopts[:async]
30
+ job = Skynet::Job.new(jobopts)
31
+ else
32
+ job = Skynet::AsyncJob.new(jobopts)
33
+ end
34
+ job.run
35
+ end
36
+ end
@@ -0,0 +1,76 @@
1
+ class Skynet
2
+ class Task
3
+
4
+ include SkynetDebugger
5
+
6
+ # require 'ostruct'
7
+
8
+ class ConstructorError < StandardError
9
+ end
10
+
11
+ attr_reader :data, :process, :result, :map_or_reduce
12
+ attr_accessor :name, :tuple, :result_timeout
13
+
14
+ @@log = nil
15
+
16
+ def self.debug_class_desc
17
+ "TASK"
18
+ end
19
+
20
+ def initialize(opts = {})
21
+ unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
22
+ raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
23
+ end
24
+ @marshalable = true
25
+ @task_id = opts[:task_id].to_i
26
+ @data = opts[:data]
27
+ self.process = opts[:process]
28
+ @name = opts[:name]
29
+ @map_or_reduce = opts[:map_or_reduce]
30
+ @result_timeout = opts[:result_timeout]
31
+ end
32
+
33
+ def process=(process)
34
+ if process.is_a?(Proc)
35
+ @marshalable = false
36
+ end
37
+ @process = process
38
+ end
39
+
40
+ def can_marshal?
41
+ @marshalable
42
+ end
43
+
44
+ def task_or_master
45
+ if @map_or_reduce == :master
46
+ @map_or_reduce
47
+ else
48
+ :task
49
+ end
50
+ end
51
+
52
+ def task_id
53
+ @task_id.to_i
54
+ end
55
+
56
+ def run
57
+ debug "running task #{name} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
58
+ begin
59
+ if @process.class == Proc
60
+ debug " - #{@map_or_reduce} using Proc"
61
+ @process.call @data
62
+ elsif @map_or_reduce == :master
63
+ debug " - as master"
64
+ job = Skynet::Job.new(@process)
65
+ job.run
66
+ elsif @process.class == String
67
+ debug " - #{@map_or_reduce} using class #{@process}"
68
+ @process.constantize.send(@map_or_reduce,@data)
69
+ end
70
+ rescue Exception => e
71
+ error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
72
+ end
73
+ end
74
+
75
+ end ## END class Task
76
+ end
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Rinda RingServer
4
+
5
+ require 'rinda/ring'
6
+ require 'rinda/tuplespace'
7
+ require 'rubygems'
8
+ require 'logger'
9
+ require 'optparse'
10
+ require 'pp'
11
+
12
+ class Rinda::TupleSpaceProxy
13
+ def take(tuple, sec=nil, &block)
14
+ port = []
15
+ port.push @ts.move(nil, tuple, sec, &block)
16
+ port[0]
17
+ end
18
+ end
19
+
20
+ class Rinda::Tuple
21
+
22
+ require 'ostruct'
23
+
24
+ def init_with_ary(ary)
25
+ if ary.instance_of?(DRb::DRbUnknown)
26
+ begin
27
+ Marshal.load(ary.buf)
28
+ rescue Exception => e
29
+ raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
30
+ end
31
+ else
32
+ @tuple = Array.new(ary.size)
33
+ @tuple.size.times do |i|
34
+ @tuple[i] = ary[i]
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ class Skynet
41
+ class Task
42
+ end
43
+ class Message
44
+ class Payload
45
+ end
46
+ end
47
+
48
+ class AsyncJob
49
+ end
50
+
51
+ class Job
52
+ end
53
+
54
+ class Server
55
+
56
+ def initialize(options)
57
+ log = Logger.new(options[:logfile], 'weekly')
58
+ log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__)
59
+ log.info "STARTING SKYNET SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
60
+
61
+ # Create a TupleSpace to hold named services, and start running
62
+ begin
63
+ ts = Rinda::TupleSpace.new
64
+ if options[:drburi]
65
+ DRb.start_service(options[:drburi], ts)
66
+ else
67
+ DRb.start_service
68
+ end
69
+ tuple = [:name,:TupleSpace, ts, 'Tuple Space']
70
+ renewer = Rinda::SimpleRenewer.new
71
+ ring_ts = Rinda::TupleSpace.new
72
+ ring_ts.write(tuple, renewer)
73
+
74
+ server = Rinda::RingServer.new(ring_ts, options[:port])
75
+ DRb.thread.join
76
+ rescue Exception, RuntimeError => e
77
+ log.fatal "Couldn't start Skynet Server #{e.inspect}"
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,395 @@
1
+ class Skynet
2
+ class Worker
3
+
4
+ include SkynetDebugger
5
+ include Skynet::GuidGenerator
6
+
7
+ RETRY_TIME = 2
8
+ VERSION_CHECK_DELAY = 5
9
+ MAX_MEMORY = 500
10
+ MEMORY_CHECK_DELAY = 30
11
+ MANAGER_PING_INTERVAL = 60
12
+
13
+ attr_accessor :message,:task, :mq, :processed
14
+ attr_reader :worker_id, :worker_info, :worker_type
15
+
16
+ class Error < StandardError
17
+ end
18
+
19
+ class RespawnWorker < Skynet::Error
20
+ end
21
+
22
+ class ConnectionFailure < Skynet::Error
23
+ end
24
+
25
+ class NoManagerError < Skynet::Error
26
+ end
27
+
28
+ def self.debug_class_desc
29
+ "WORKER-#{$$}"
30
+ end
31
+
32
+ def initialize(worker_type=:any)
33
+ @worker_id = get_unique_id(1).to_i
34
+ @mq = Skynet::MessageQueue.new
35
+ @worker_type = worker_type.to_sym
36
+ @processed = 0
37
+ debug "THIS WORKER TAKES #{worker_type}"
38
+
39
+ @worker_info = {
40
+ :hostname => hostname,
41
+ :process_id => process_id,
42
+ :worker_type => payload_type,
43
+ :worker_id => worker_id,
44
+ :version => mq.get_worker_version
45
+ }
46
+ end
47
+
48
+ def process_id
49
+ $$
50
+ end
51
+
52
+ def hostname
53
+ @machine_name ||= Socket.gethostname
54
+ end
55
+
56
+ def version
57
+ @curver
58
+ end
59
+
60
+ def new_version_respawn?
61
+ if !@verchecktime
62
+ @verchecktime = Time.now
63
+ begin
64
+ @curver = mq.get_worker_version
65
+ debug "FINDING INITIAL VER #{@curver}"
66
+ rescue Skynet::RequestExpiredError => e
67
+ warn "NO INITIAL VER IN MQ using 1"
68
+ @curver = 1
69
+ end
70
+ else
71
+ if Time.now < (@verchecktime + VERSION_CHECK_DELAY)
72
+ return false
73
+ else
74
+ @verchecktime = Time.now
75
+ begin
76
+ newver = mq.get_worker_version
77
+ # debug "CURVER #{@curver} NEWVER: #{newver}"
78
+ if newver != @curver
79
+ info "RESTARTING WORKER ON PID #{$$}"
80
+ return true
81
+ end
82
+ rescue Skynet::RequestExpiredError => e
83
+ warn "NO CURRENT WORKER REV IN MQ still using 1"
84
+ mq.set_worker_version(1)
85
+ return false
86
+ end
87
+ end
88
+ end
89
+ return false
90
+ end
91
+
92
+
93
+ def take_worker_status
94
+ begin
95
+ mq.take_worker_status(@worker_info,0.00001)
96
+ rescue Skynet::RequestExpiredError, Skynet::QueueTimeout => e
97
+ error "Couldnt take worker status for #{hostname} pid: #{process_id}"
98
+ end
99
+ end
100
+
101
+ def notify_worker_started
102
+ mq.write_worker_status(
103
+ @worker_info.merge({
104
+ :name => "waiting for #{@worker_type}",
105
+ :processed => 0,
106
+ :started_at => Time.now.to_i
107
+ })
108
+ )
109
+ end
110
+
111
+ def notify_task_begun(task)
112
+ task[:processed] = @processed
113
+ task[:started_at] = Time.now.to_i
114
+ mq.write_worker_status(@worker_info.merge(task))
115
+ end
116
+
117
+ def notify_task_complete
118
+ @processed += 1
119
+
120
+ mq.write_worker_status(
121
+ @worker_info.merge({
122
+ :task_id => 0,
123
+ :job_id => 0,
124
+ :name => "waiting for #{@worker_type}",
125
+ :processed => @processed,
126
+ :map_or_reduce => nil,
127
+ :started_at => Time.now.to_i
128
+ })
129
+ )
130
+ end
131
+
132
+ def notify_worker_stop
133
+ info "Worker #{process_id} stopping..."
134
+ take_worker_status
135
+ end
136
+
137
+ def payload_type
138
+ return nil if worker_type == :any
139
+ return worker_type
140
+ end
141
+
142
+ def start
143
+ exceptions = 0
144
+ conerror = 0
145
+ @curver = nil
146
+ # setup signal handlers for manager
147
+ Signal.trap("HUP") { @respawn = true }
148
+ Signal.trap("TERM") do
149
+ if @die
150
+ exit
151
+ else
152
+ @die = true
153
+ end
154
+ end
155
+ Signal.trap("INT") { @die = true }
156
+
157
+ raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
158
+
159
+ info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
160
+
161
+ notify_worker_started
162
+
163
+ message = nil
164
+ task = nil
165
+
166
+ loop do
167
+ message = nil
168
+ begin
169
+ if @die
170
+ exit
171
+ elsif @respawn
172
+ raise Skynet::Worker::RespawnWorker.new
173
+ end
174
+
175
+ if local_mem = max_memory_reached?
176
+ raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{MAX_MEMORY}")
177
+ end
178
+
179
+ if conerror > 0
180
+ @mq = Skynet::MessageQueue.new
181
+ warn "WORKER RECONNECTED AFTER #{conerror} tries"
182
+ conerror = 0
183
+ end
184
+
185
+ # debug "1 START LOOPSSS at VER #{@curver}"
186
+ #
187
+ # debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
188
+ # message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
189
+ message = mq.take_next_task(@curver,0.00001,payload_type)
190
+
191
+ next unless message.respond_to?(:payload)
192
+
193
+ task = message.payload
194
+ error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
195
+
196
+ info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
197
+ debug "STEP 2.1 message=", message.to_a
198
+ # info "STEP 3 GOT TASK taskid: #{task.task_id}"
199
+ # debug "STEP 3.1 task=", task
200
+ next unless task
201
+ # maybe instead of putting a time in the future, it puts the start time and an offset in seconds
202
+
203
+ # task.debug "taking task #{task.task_id} name:#{task.name}..."
204
+
205
+ info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
206
+ notify_task_begun({
207
+ :job_id => message.job_id,
208
+ :task_id => message.task_id,
209
+ :iteration => message.iteration,
210
+ :name => message.name,
211
+ :map_or_reduce => task.map_or_reduce
212
+ })
213
+ result = task.run
214
+
215
+ info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
216
+ debug "STEP 5.1 RESULT DATA:", result
217
+
218
+ ## XXX need better result timeout
219
+ result_message = mq.write_result(message,result,task.result_timeout)
220
+ info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
221
+ # debug "STEP 6.1 RESULT_MESSAGE:", result_message
222
+ notify_task_complete
223
+ rescue Skynet::Worker::RespawnWorker => e
224
+ info "Respawning and taking worker status"
225
+ notify_worker_stop
226
+ raise e
227
+ rescue Skynet::RequestExpiredError => e
228
+ # debug "request expired"
229
+ if new_version_respawn?
230
+ notify_worker_stop
231
+ raise Skynet::Worker::RespawnWorker.new
232
+ end
233
+ sleep 1
234
+ # debug "WORKER [#{$$}] LOOPING AGAIN"
235
+ next
236
+ rescue Skynet::ConnectionError, DRb::DRbConnError => e
237
+ conerror += 1
238
+ retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
239
+ error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
240
+ @mq = nil
241
+ sleep retry_time
242
+ if conerror > 20
243
+ fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
244
+ notify_worker_stop
245
+ raise e
246
+ end
247
+ next
248
+ rescue NoManagerError => e
249
+ fatal e.message
250
+ break
251
+ rescue Interrupt, SystemExit => e
252
+ warn "Exiting..."
253
+ notify_worker_stop
254
+ break
255
+ rescue Exception => e
256
+ error "#{e.inspect} #{e.backtrace.join("\n")}"
257
+ #mq.take(@next_worker_message.task_template,0.0005) if message
258
+ if message
259
+ mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
260
+ else
261
+ # what do we do here
262
+ # mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
263
+ end
264
+ # mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
265
+ next
266
+ end
267
+ end
268
+ end
269
+
270
+ @@ok_to_mem_check = false
271
+ @@lastmem = nil
272
+ @@memct = 0
273
+
274
+ def max_memory_reached?
275
+ return false unless ok_to_mem_check?
276
+ if !@memchecktime
277
+ @memchecktime = Time.now
278
+ return false
279
+ elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
280
+ @memchecktime = Time.now
281
+ local_mem = get_memory_size.to_i
282
+ return local_mem if local_mem > MAX_MEMORY
283
+ else
284
+ false
285
+ end
286
+ end
287
+
288
+ def find_pid_size(file, format=:notpretty)
289
+ begin
290
+ open(file).each { |line|
291
+ if line.index('VmSize')
292
+ temp = line[7..-5].strip.to_f/1000
293
+ return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
294
+ return temp
295
+ end
296
+ }
297
+ rescue Exception => e
298
+ warn "ERROR #{e.inspect}"
299
+ '0'
300
+ end
301
+ end
302
+
303
+ def get_memory_size
304
+ find_pid_size("/proc/self/status")
305
+ end
306
+
307
+ def ok_to_mem_check?
308
+ return true if @@ok_to_mem_check == true
309
+ return false if @@ok_to_mem_check == :notok
310
+ if File.exists?('/proc/self/status')
311
+ @@lastmem ||= get_memory_size.to_i
312
+ return @@ok_to_mem_check = true
313
+ else
314
+ @@ok_to_mem_check = :notok
315
+ return false
316
+ end
317
+ end
318
+
319
+
320
+ # kinda like system() but gives me back a pid
321
+ def self.fork_and_exec(command)
322
+ sleep 0.01 # remove contention on manager drb object
323
+ log = Skynet::Logger.get
324
+ info "executing /bin/sh -c \"#{command}\""
325
+ pid = fork do
326
+ exec("/bin/sh -c \"#{command}\"")
327
+ exit
328
+ end
329
+ Process.detach(pid)
330
+ pid
331
+ end
332
+
333
+ def self.start(options={})
334
+ options[:worker_type] ||= :any
335
+ options[:required_libs] ||= []
336
+
337
+ OptionParser.new do |opt|
338
+ opt.banner = "Usage: worker [options]"
339
+ opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
340
+ options[:required_libs] << v
341
+ end
342
+ opt.on('-ot', '--worker_type WORKERTYPE', "master, task or any") do |v|
343
+ if ["any","master","task"].include?(v)
344
+ options[:worker_type] = v
345
+ else
346
+ raise Skynet::Error.new("#{v} is not a valid worker_type")
347
+ end
348
+ end
349
+ opt.parse!(ARGV)
350
+ end
351
+
352
+ options[:required_libs].each do |adlib|
353
+ begin
354
+ require adlib
355
+ rescue MissingSourceFile => e
356
+ error "The included lib #{adlib} was not found: #{e.inspect}"
357
+ exit
358
+ end
359
+ end
360
+
361
+ # worker_script_path = (Skynet::CONFIG[:WORKER_SCRIPT_PATH] || File.dirname(__FILE__)) << "/skynet_worker"
362
+
363
+ debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
364
+
365
+ begin
366
+ worker = Skynet::Worker.new(options[:worker_type])
367
+ worker.start
368
+ rescue Skynet::Worker::NoManagerError => e
369
+ fatal e.message
370
+ exit
371
+ rescue Skynet::Worker::RespawnWorker => e
372
+ warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
373
+ cmd = "RAILS_ENV=#{RAILS_ENV} ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]}"
374
+ cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
375
+ pid = fork_and_exec(cmd)
376
+ warn "parent_pid: #{$$}, child_pid: #{pid}"
377
+ exit
378
+ rescue SystemExit
379
+ info "WORKER #{$$} EXITING GRACEFULLY"
380
+ rescue Exception => e
381
+ fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
382
+ report = ExceptionReport.new(e)
383
+ report.save
384
+ end
385
+ end
386
+ end
387
+ end
388
+
389
+ class ExceptionReport
390
+ def initialize(*args)
391
+ end
392
+
393
+ def save
394
+ end
395
+ end