skynet 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,36 @@
1
+ module Enumerable
2
+ def mapreduce(klass=nil,options={},&block)
3
+ data = []
4
+ if self.is_a?(Hash)
5
+ self.each {|k,v| data << {k => v}}
6
+ else
7
+ data = self
8
+ end
9
+ jobopts = {
10
+ :map_tasks => 20000,
11
+ :map_data => data,
12
+ :name => "#{klass} Enumerable MASTER",
13
+ :map_name => "#{klass} Enumerable MAP",
14
+ :reduce_name => "#{klass} Enumerable REDUCE",
15
+ :map_timeout => 3600,
16
+ :reduce_timeout => 3600,
17
+ :master_timeout => 3600,
18
+ :master_result_timeout => 3600,
19
+ :async => false
20
+ }
21
+
22
+ jobopts[:map_reduce_class] = klass.to_s if klass
23
+
24
+ options.each { |k,v| jobopts[k] = v }
25
+ if block_given?
26
+ jobopts[:map] = block
27
+ end
28
+
29
+ if block_given? or not jobopts[:async]
30
+ job = Skynet::Job.new(jobopts)
31
+ else
32
+ job = Skynet::AsyncJob.new(jobopts)
33
+ end
34
+ job.run
35
+ end
36
+ end
@@ -0,0 +1,76 @@
1
+ class Skynet
2
+ class Task
3
+
4
+ include SkynetDebugger
5
+
6
+ # require 'ostruct'
7
+
8
+ class ConstructorError < StandardError
9
+ end
10
+
11
+ attr_reader :data, :process, :result, :map_or_reduce
12
+ attr_accessor :name, :tuple, :result_timeout
13
+
14
+ @@log = nil
15
+
16
+ def self.debug_class_desc
17
+ "TASK"
18
+ end
19
+
20
+ def initialize(opts = {})
21
+ unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
22
+ raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
23
+ end
24
+ @marshalable = true
25
+ @task_id = opts[:task_id].to_i
26
+ @data = opts[:data]
27
+ self.process = opts[:process]
28
+ @name = opts[:name]
29
+ @map_or_reduce = opts[:map_or_reduce]
30
+ @result_timeout = opts[:result_timeout]
31
+ end
32
+
33
+ def process=(process)
34
+ if process.is_a?(Proc)
35
+ @marshalable = false
36
+ end
37
+ @process = process
38
+ end
39
+
40
+ def can_marshal?
41
+ @marshalable
42
+ end
43
+
44
+ def task_or_master
45
+ if @map_or_reduce == :master
46
+ @map_or_reduce
47
+ else
48
+ :task
49
+ end
50
+ end
51
+
52
+ def task_id
53
+ @task_id.to_i
54
+ end
55
+
56
+ def run
57
+ debug "running task #{name} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
58
+ begin
59
+ if @process.class == Proc
60
+ debug " - #{@map_or_reduce} using Proc"
61
+ @process.call @data
62
+ elsif @map_or_reduce == :master
63
+ debug " - as master"
64
+ job = Skynet::Job.new(@process)
65
+ job.run
66
+ elsif @process.class == String
67
+ debug " - #{@map_or_reduce} using class #{@process}"
68
+ @process.constantize.send(@map_or_reduce,@data)
69
+ end
70
+ rescue Exception => e
71
+ error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
72
+ end
73
+ end
74
+
75
+ end ## END class Task
76
+ end
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby -w
2
+
3
+ # Rinda RingServer
4
+
5
+ require 'rinda/ring'
6
+ require 'rinda/tuplespace'
7
+ require 'rubygems'
8
+ require 'logger'
9
+ require 'optparse'
10
+ require 'pp'
11
+
12
+ class Rinda::TupleSpaceProxy
13
+ def take(tuple, sec=nil, &block)
14
+ port = []
15
+ port.push @ts.move(nil, tuple, sec, &block)
16
+ port[0]
17
+ end
18
+ end
19
+
20
+ class Rinda::Tuple
21
+
22
+ require 'ostruct'
23
+
24
+ def init_with_ary(ary)
25
+ if ary.instance_of?(DRb::DRbUnknown)
26
+ begin
27
+ Marshal.load(ary.buf)
28
+ rescue Exception => e
29
+ raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
30
+ end
31
+ else
32
+ @tuple = Array.new(ary.size)
33
+ @tuple.size.times do |i|
34
+ @tuple[i] = ary[i]
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ class Skynet
41
+ class Task
42
+ end
43
+ class Message
44
+ class Payload
45
+ end
46
+ end
47
+
48
+ class AsyncJob
49
+ end
50
+
51
+ class Job
52
+ end
53
+
54
+ class Server
55
+
56
+ def initialize(options)
57
+ log = Logger.new(options[:logfile], 'weekly')
58
+ log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__)
59
+ log.info "STARTING SKYNET SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
60
+
61
+ # Create a TupleSpace to hold named services, and start running
62
+ begin
63
+ ts = Rinda::TupleSpace.new
64
+ if options[:drburi]
65
+ DRb.start_service(options[:drburi], ts)
66
+ else
67
+ DRb.start_service
68
+ end
69
+ tuple = [:name,:TupleSpace, ts, 'Tuple Space']
70
+ renewer = Rinda::SimpleRenewer.new
71
+ ring_ts = Rinda::TupleSpace.new
72
+ ring_ts.write(tuple, renewer)
73
+
74
+ server = Rinda::RingServer.new(ring_ts, options[:port])
75
+ DRb.thread.join
76
+ rescue Exception, RuntimeError => e
77
+ log.fatal "Couldn't start Skynet Server #{e.inspect}"
78
+ end
79
+
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,395 @@
1
+ class Skynet
2
+ class Worker
3
+
4
+ include SkynetDebugger
5
+ include Skynet::GuidGenerator
6
+
7
+ RETRY_TIME = 2
8
+ VERSION_CHECK_DELAY = 5
9
+ MAX_MEMORY = 500
10
+ MEMORY_CHECK_DELAY = 30
11
+ MANAGER_PING_INTERVAL = 60
12
+
13
+ attr_accessor :message,:task, :mq, :processed
14
+ attr_reader :worker_id, :worker_info, :worker_type
15
+
16
+ class Error < StandardError
17
+ end
18
+
19
+ class RespawnWorker < Skynet::Error
20
+ end
21
+
22
+ class ConnectionFailure < Skynet::Error
23
+ end
24
+
25
+ class NoManagerError < Skynet::Error
26
+ end
27
+
28
+ def self.debug_class_desc
29
+ "WORKER-#{$$}"
30
+ end
31
+
32
+ def initialize(worker_type=:any)
33
+ @worker_id = get_unique_id(1).to_i
34
+ @mq = Skynet::MessageQueue.new
35
+ @worker_type = worker_type.to_sym
36
+ @processed = 0
37
+ debug "THIS WORKER TAKES #{worker_type}"
38
+
39
+ @worker_info = {
40
+ :hostname => hostname,
41
+ :process_id => process_id,
42
+ :worker_type => payload_type,
43
+ :worker_id => worker_id,
44
+ :version => mq.get_worker_version
45
+ }
46
+ end
47
+
48
+ def process_id
49
+ $$
50
+ end
51
+
52
+ def hostname
53
+ @machine_name ||= Socket.gethostname
54
+ end
55
+
56
+ def version
57
+ @curver
58
+ end
59
+
60
+ def new_version_respawn?
61
+ if !@verchecktime
62
+ @verchecktime = Time.now
63
+ begin
64
+ @curver = mq.get_worker_version
65
+ debug "FINDING INITIAL VER #{@curver}"
66
+ rescue Skynet::RequestExpiredError => e
67
+ warn "NO INITIAL VER IN MQ using 1"
68
+ @curver = 1
69
+ end
70
+ else
71
+ if Time.now < (@verchecktime + VERSION_CHECK_DELAY)
72
+ return false
73
+ else
74
+ @verchecktime = Time.now
75
+ begin
76
+ newver = mq.get_worker_version
77
+ # debug "CURVER #{@curver} NEWVER: #{newver}"
78
+ if newver != @curver
79
+ info "RESTARTING WORKER ON PID #{$$}"
80
+ return true
81
+ end
82
+ rescue Skynet::RequestExpiredError => e
83
+ warn "NO CURRENT WORKER REV IN MQ still using 1"
84
+ mq.set_worker_version(1)
85
+ return false
86
+ end
87
+ end
88
+ end
89
+ return false
90
+ end
91
+
92
+
93
+ def take_worker_status
94
+ begin
95
+ mq.take_worker_status(@worker_info,0.00001)
96
+ rescue Skynet::RequestExpiredError, Skynet::QueueTimeout => e
97
+ error "Couldnt take worker status for #{hostname} pid: #{process_id}"
98
+ end
99
+ end
100
+
101
+ def notify_worker_started
102
+ mq.write_worker_status(
103
+ @worker_info.merge({
104
+ :name => "waiting for #{@worker_type}",
105
+ :processed => 0,
106
+ :started_at => Time.now.to_i
107
+ })
108
+ )
109
+ end
110
+
111
+ def notify_task_begun(task)
112
+ task[:processed] = @processed
113
+ task[:started_at] = Time.now.to_i
114
+ mq.write_worker_status(@worker_info.merge(task))
115
+ end
116
+
117
+ def notify_task_complete
118
+ @processed += 1
119
+
120
+ mq.write_worker_status(
121
+ @worker_info.merge({
122
+ :task_id => 0,
123
+ :job_id => 0,
124
+ :name => "waiting for #{@worker_type}",
125
+ :processed => @processed,
126
+ :map_or_reduce => nil,
127
+ :started_at => Time.now.to_i
128
+ })
129
+ )
130
+ end
131
+
132
+ def notify_worker_stop
133
+ info "Worker #{process_id} stopping..."
134
+ take_worker_status
135
+ end
136
+
137
+ def payload_type
138
+ return nil if worker_type == :any
139
+ return worker_type
140
+ end
141
+
142
+ def start
143
+ exceptions = 0
144
+ conerror = 0
145
+ @curver = nil
146
+ # setup signal handlers for manager
147
+ Signal.trap("HUP") { @respawn = true }
148
+ Signal.trap("TERM") do
149
+ if @die
150
+ exit
151
+ else
152
+ @die = true
153
+ end
154
+ end
155
+ Signal.trap("INT") { @die = true }
156
+
157
+ raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
158
+
159
+ info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
160
+
161
+ notify_worker_started
162
+
163
+ message = nil
164
+ task = nil
165
+
166
+ loop do
167
+ message = nil
168
+ begin
169
+ if @die
170
+ exit
171
+ elsif @respawn
172
+ raise Skynet::Worker::RespawnWorker.new
173
+ end
174
+
175
+ if local_mem = max_memory_reached?
176
+ raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{MAX_MEMORY}")
177
+ end
178
+
179
+ if conerror > 0
180
+ @mq = Skynet::MessageQueue.new
181
+ warn "WORKER RECONNECTED AFTER #{conerror} tries"
182
+ conerror = 0
183
+ end
184
+
185
+ # debug "1 START LOOPSSS at VER #{@curver}"
186
+ #
187
+ # debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
188
+ # message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
189
+ message = mq.take_next_task(@curver,0.00001,payload_type)
190
+
191
+ next unless message.respond_to?(:payload)
192
+
193
+ task = message.payload
194
+ error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
195
+
196
+ info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
197
+ debug "STEP 2.1 message=", message.to_a
198
+ # info "STEP 3 GOT TASK taskid: #{task.task_id}"
199
+ # debug "STEP 3.1 task=", task
200
+ next unless task
201
+ # maybe instead of putting a time in the future, it puts the start time and an offset in seconds
202
+
203
+ # task.debug "taking task #{task.task_id} name:#{task.name}..."
204
+
205
+ info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
206
+ notify_task_begun({
207
+ :job_id => message.job_id,
208
+ :task_id => message.task_id,
209
+ :iteration => message.iteration,
210
+ :name => message.name,
211
+ :map_or_reduce => task.map_or_reduce
212
+ })
213
+ result = task.run
214
+
215
+ info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
216
+ debug "STEP 5.1 RESULT DATA:", result
217
+
218
+ ## XXX need better result timeout
219
+ result_message = mq.write_result(message,result,task.result_timeout)
220
+ info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
221
+ # debug "STEP 6.1 RESULT_MESSAGE:", result_message
222
+ notify_task_complete
223
+ rescue Skynet::Worker::RespawnWorker => e
224
+ info "Respawning and taking worker status"
225
+ notify_worker_stop
226
+ raise e
227
+ rescue Skynet::RequestExpiredError => e
228
+ # debug "request expired"
229
+ if new_version_respawn?
230
+ notify_worker_stop
231
+ raise Skynet::Worker::RespawnWorker.new
232
+ end
233
+ sleep 1
234
+ # debug "WORKER [#{$$}] LOOPING AGAIN"
235
+ next
236
+ rescue Skynet::ConnectionError, DRb::DRbConnError => e
237
+ conerror += 1
238
+ retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
239
+ error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
240
+ @mq = nil
241
+ sleep retry_time
242
+ if conerror > 20
243
+ fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
244
+ notify_worker_stop
245
+ raise e
246
+ end
247
+ next
248
+ rescue NoManagerError => e
249
+ fatal e.message
250
+ break
251
+ rescue Interrupt, SystemExit => e
252
+ warn "Exiting..."
253
+ notify_worker_stop
254
+ break
255
+ rescue Exception => e
256
+ error "#{e.inspect} #{e.backtrace.join("\n")}"
257
+ #mq.take(@next_worker_message.task_template,0.0005) if message
258
+ if message
259
+ mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
260
+ else
261
+ # what do we do here
262
+ # mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
263
+ end
264
+ # mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
265
+ next
266
+ end
267
+ end
268
+ end
269
+
270
+ @@ok_to_mem_check = false
271
+ @@lastmem = nil
272
+ @@memct = 0
273
+
274
+ def max_memory_reached?
275
+ return false unless ok_to_mem_check?
276
+ if !@memchecktime
277
+ @memchecktime = Time.now
278
+ return false
279
+ elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
280
+ @memchecktime = Time.now
281
+ local_mem = get_memory_size.to_i
282
+ return local_mem if local_mem > MAX_MEMORY
283
+ else
284
+ false
285
+ end
286
+ end
287
+
288
+ def find_pid_size(file, format=:notpretty)
289
+ begin
290
+ open(file).each { |line|
291
+ if line.index('VmSize')
292
+ temp = line[7..-5].strip.to_f/1000
293
+ return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
294
+ return temp
295
+ end
296
+ }
297
+ rescue Exception => e
298
+ warn "ERROR #{e.inspect}"
299
+ '0'
300
+ end
301
+ end
302
+
303
+ def get_memory_size
304
+ find_pid_size("/proc/self/status")
305
+ end
306
+
307
+ def ok_to_mem_check?
308
+ return true if @@ok_to_mem_check == true
309
+ return false if @@ok_to_mem_check == :notok
310
+ if File.exists?('/proc/self/status')
311
+ @@lastmem ||= get_memory_size.to_i
312
+ return @@ok_to_mem_check = true
313
+ else
314
+ @@ok_to_mem_check = :notok
315
+ return false
316
+ end
317
+ end
318
+
319
+
320
+ # kinda like system() but gives me back a pid
321
+ def self.fork_and_exec(command)
322
+ sleep 0.01 # remove contention on manager drb object
323
+ log = Skynet::Logger.get
324
+ info "executing /bin/sh -c \"#{command}\""
325
+ pid = fork do
326
+ exec("/bin/sh -c \"#{command}\"")
327
+ exit
328
+ end
329
+ Process.detach(pid)
330
+ pid
331
+ end
332
+
333
+ def self.start(options={})
334
+ options[:worker_type] ||= :any
335
+ options[:required_libs] ||= []
336
+
337
+ OptionParser.new do |opt|
338
+ opt.banner = "Usage: worker [options]"
339
+ opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
340
+ options[:required_libs] << v
341
+ end
342
+ opt.on('-ot', '--worker_type WORKERTYPE', "master, task or any") do |v|
343
+ if ["any","master","task"].include?(v)
344
+ options[:worker_type] = v
345
+ else
346
+ raise Skynet::Error.new("#{v} is not a valid worker_type")
347
+ end
348
+ end
349
+ opt.parse!(ARGV)
350
+ end
351
+
352
+ options[:required_libs].each do |adlib|
353
+ begin
354
+ require adlib
355
+ rescue MissingSourceFile => e
356
+ error "The included lib #{adlib} was not found: #{e.inspect}"
357
+ exit
358
+ end
359
+ end
360
+
361
+ # worker_script_path = (Skynet::CONFIG[:WORKER_SCRIPT_PATH] || File.dirname(__FILE__)) << "/skynet_worker"
362
+
363
+ debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
364
+
365
+ begin
366
+ worker = Skynet::Worker.new(options[:worker_type])
367
+ worker.start
368
+ rescue Skynet::Worker::NoManagerError => e
369
+ fatal e.message
370
+ exit
371
+ rescue Skynet::Worker::RespawnWorker => e
372
+ warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
373
+ cmd = "RAILS_ENV=#{RAILS_ENV} ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]}"
374
+ cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
375
+ pid = fork_and_exec(cmd)
376
+ warn "parent_pid: #{$$}, child_pid: #{pid}"
377
+ exit
378
+ rescue SystemExit
379
+ info "WORKER #{$$} EXITING GRACEFULLY"
380
+ rescue Exception => e
381
+ fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
382
+ report = ExceptionReport.new(e)
383
+ report.save
384
+ end
385
+ end
386
+ end
387
+ end
388
+
389
+ class ExceptionReport
390
+ def initialize(*args)
391
+ end
392
+
393
+ def save
394
+ end
395
+ end