skynet 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,59 @@
1
+ class Skynet
2
+ LOGDIR = "/var/log"
3
+
4
+ CONFIG = {
5
+ :ENABLE => true,
6
+ :SOLO => false,
7
+ :SKYNET_LOG_DIR => LOGDIR,
8
+ :SKYNET_PID_DIR => "/tmp",
9
+ :SKYNET_PIDS_FILE => "/tmp/skynet.pid",
10
+ :SKYNET_LOG_FILE => STDOUT,
11
+ :SKYNET_LOG_LEVEL => Logger::ERROR,
12
+ :SKYNET_LOCAL_MANAGER_URL => "druby://localhost:40000",
13
+ :MESSAGE_QUEUE_ADAPTER => "Skynet::MessageQueueAdapter::TupleSpace",
14
+ # :TUPLESPACE_DRBURIS => ["druby://localhost:47647"]
15
+ # :MESSAGE_QUEUE_ADAPTER => "Skynet::MessageQueueAdapter::Mysql",
16
+ # :QUEUE_DATABASE => "skynet_queue",
17
+ # :MYSQL_TEMPERATURE_CHANGE_SLEEP => 40,
18
+ :NEXT_TASK_TIMEOUT => 60,
19
+ :USE_RINGSERVER => true,
20
+ :SERVER_HOSTS => ["localhost:7647"],
21
+ :NUMBER_OF_WORKERS => 4,
22
+ :WORKER_CHECK_DELAY => 40,
23
+ # :GUID_GENERATOR => nil,
24
+ :PERCENTAGE_OF_TASK_ONLY_WORKERS => 0.7,
25
+ :PERCENTAGE_OF_MASTER_ONLY_WORKERS => 0.2
26
+ } unless defined?(CONFIG)
27
+
28
+
29
+ def self.configure(config={})
30
+ old_config = CONFIG.dup
31
+ config.each {|k,v| CONFIG[k] = v}
32
+ if block_given?
33
+ ret = yield
34
+ CONFIG.keys.each do |key|
35
+ CONFIG.delete(key)
36
+ end
37
+ old_config.each {|k,v| CONFIG[k] = v}
38
+ ret
39
+ end
40
+ end
41
+
42
+ def self.solo(config = {})
43
+ raise Skynet::Error.new("You provide a code block to Skynet.solo") unless block_given?
44
+ result = nil
45
+ Skynet::Logger.log = nil
46
+ begin
47
+ config[:ENABLE] = true
48
+ config[:SOLO] = true
49
+ config[:SKYNET_LOG_FILE] ||= STDOUT
50
+ config[:SKYNET_LOG_LEVEL] ||= Logger::ERROR
51
+ configure(config) do
52
+ result = yield
53
+ end
54
+ rescue Exception => e
55
+ error "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
56
+ end
57
+ return result
58
+ end
59
+ end
@@ -0,0 +1,34 @@
1
+ class Skynet
2
+ class Console
3
+ def self.start(libs=[])
4
+ require 'rubygems'
5
+ require 'optparse'
6
+ require 'skynet'
7
+
8
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
9
+
10
+ options = {
11
+ :irb => irb,
12
+ :required_libs => []
13
+ }
14
+
15
+ OptionParser.new do |opt|
16
+ opt.banner = "Usage: skynet_console [options]"
17
+ opt.on("--irb=[#{irb}]", 'Invoke a different irb.') { |v| options[:irb] = v }
18
+ opt.on('-r', '--required LIBRARY', 'Require the specified libraries. To include multiple libraries, include multiple -r options. ie. -r skynet -r fileutils') do |v|
19
+ options[:required_libs] << File.expand_path(v)
20
+ end
21
+ opt.parse!(ARGV)
22
+ end
23
+
24
+ libs << "irb/completion"
25
+ libs << "rubygems"
26
+ libs << "skynet"
27
+ libs << "skynet_console_helper"
28
+ libs += options[:required_libs]
29
+ cmd = "#{options[:irb]} -r #{libs.join(" -r ")} --simple-prompt"
30
+
31
+ exec cmd
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,59 @@
1
+
2
+ def mq
3
+ @mq ||= Skynet::MessageQueue.new
4
+ end
5
+
6
+ def stats
7
+ mq.stats
8
+ end
9
+
10
+ def increment_worker_version
11
+ mq.increment_worker_version
12
+ end
13
+
14
+ def get_worker_version
15
+ mq.get_worker_version
16
+ end
17
+
18
+ def set_worker_version(*args)
19
+ mq.set_worker_version(*args)
20
+ end
21
+
22
+ def manager
23
+ @manager ||= DRbObject.new(nil,Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
24
+ end
25
+
26
+ def add_lib(lib)
27
+ manager.required_libs << File.expand_path(lib)
28
+ manager.restart_workers
29
+ end
30
+
31
+ def restart_workers
32
+ manager.restart_workers
33
+ end
34
+
35
+ def add_workers(num)
36
+ manager.add_workers(num)
37
+ end
38
+
39
+ def remove_workers(num)
40
+ manager.remove_workers(num)
41
+ end
42
+
43
+ # ===============
44
+ # = Doesnt work =
45
+ # ===============
46
+ # def help
47
+ # puts <<-HELP
48
+ # mq
49
+ # stats
50
+ # increment_worker_version
51
+ # get_worker_version
52
+ # set_worker_version(version)
53
+ # manager
54
+ # add_lib(library_to_include) -- forces a restart
55
+ # restart_workers
56
+ # add_workers(number_of_workers)
57
+ # remove_workers(number_of_workers)
58
+ # HELP
59
+ # end
@@ -0,0 +1,84 @@
1
+ module SkynetDebugger
2
+
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+ end
6
+
7
+ def log
8
+ self.class.log
9
+ end
10
+
11
+ def args_pp(*args)
12
+ self.class.args_pp(*args)
13
+ end
14
+
15
+
16
+ def debug(*args)
17
+ self.class.debug(*args)
18
+ end
19
+
20
+
21
+ def info(*args)
22
+ self.class.info(*args)
23
+ end
24
+
25
+
26
+ def warn(*args)
27
+ self.class.warn(*args)
28
+ end
29
+
30
+
31
+ def error(*args)
32
+ self.class.error(*args)
33
+ end
34
+
35
+ def fatal(*args)
36
+ self.class.fatal(*args)
37
+ end
38
+
39
+ def debug_header
40
+ self.class.debug_header
41
+ end
42
+
43
+ module ClassMethods
44
+
45
+ def debug_class_desc
46
+ self.to_s
47
+ end
48
+
49
+ def debug_header
50
+ t = Time.now
51
+ "##{$$} #{t.strftime("%Y-%m-%d %H:%M:%S")}.#{t.usec} <#{debug_class_desc}>"
52
+ end
53
+
54
+ def log
55
+ Skynet::Logger.get
56
+ end
57
+
58
+ def args_pp(*args)
59
+ "#{args.length > 0 ? args.pretty_print_inspect : ''}"
60
+ end
61
+
62
+ def debug(msg,*args)
63
+ log.debug "[DEBUG] #{debug_header} #{msg} #{args_pp(*args)}"
64
+ end
65
+
66
+ def info(msg, *args)
67
+ log.info "[INFO] #{debug_header} #{msg} #{args_pp(*args)}"
68
+ end
69
+
70
+ def warn(msg, *args)
71
+ log.warn "[WARN] #{debug_header} #{msg} #{args_pp(*args)}"
72
+ end
73
+
74
+ def error(msg, *args)
75
+ log.error "[ERROR] #{debug_header} #{msg} #{args_pp(*args)}"
76
+ end
77
+
78
+ def fatal(msg, *args)
79
+ log.fatal "[FATAL] #{debug_header} #{msg} #{args_pp(*args)}"
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,68 @@
1
+ require 'socket'
2
+
3
+ begin
4
+ require 'fastthread'
5
+ rescue LoadError
6
+ require 'thread'
7
+ end
8
+
9
+ class Skynet
10
+ class UniqueDBNumGenerator
11
+
12
+ class Config
13
+ attr_accessor :lockfile, :pidfile, :server_num, :pid_id, :use_incremental_ids
14
+ end
15
+
16
+ @@config ||= Config.new
17
+
18
+ def self.configure
19
+ yield @@config
20
+ end
21
+
22
+ def self.server_num(hostname=nil)
23
+ @@config.server_num ||= Socket.gethostname.sum
24
+ end
25
+
26
+ def self.pid_id
27
+ $$
28
+ end
29
+
30
+ def self.use_incremental_ids
31
+ @@config.use_incremental_ids
32
+ end
33
+ end
34
+
35
+ module GuidGenerator
36
+
37
+ @@pid_ctr = 0
38
+
39
+ def get_unique_id(nodb=nil)
40
+
41
+ if defined?(Skynet::CONFIG) and Skynet::CONFIG[:GUID_GENERATOR]
42
+ Skynet::CONFIG[:GUID_GENERATOR].call
43
+ else
44
+ @@pid_id ||= Skynet::UniqueDBNumGenerator.pid_id
45
+
46
+ if not Skynet::UniqueDBNumGenerator.server_num or not @@pid_id
47
+ raise 'SERVER_NUM or PIDID not defined, please check environment.rb for the proper code.'
48
+ end
49
+
50
+ Mutex.new.synchronize do
51
+ timeprt = Time.now.to_f - 1186210800 # figure it out
52
+ timeprt = timeprt * (2 ** 3)
53
+ @@pid_ctr += 1
54
+
55
+ guid_parts = [[timeprt,30],[Skynet::UniqueDBNumGenerator.server_num,8],[@@pid_id,14],[@@pid_ctr,12]]
56
+
57
+ guid = 0
58
+ guid_parts.each do |part, bitlength|
59
+ guid = guid << bitlength
60
+ guid += part.to_i % (2 ** bitlength)
61
+ end
62
+ guid
63
+ end
64
+ end
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,607 @@
1
+ # require 'ruby2ruby' # XXX this will break unless people have the fix to Ruby2Ruby
2
+ ##### ruby2ruby fix from ruby2ruby.rb ############
3
+ ### XXX This is bad. Some people rely on an exception being thrown if a method is missing! BULLSHIT!
4
+ # class NilClass # Objective-C trick
5
+ # def method_missing(msg, *args, &block)
6
+ # nil
7
+ # end
8
+ # end
9
+ ##############################
10
+
11
+ # Users should create instances of this class. Rather than subclassing,
12
+ # jobs are specialized by assigning lambdas to map, reduce, and partition.
13
+ # This allows the instance to easily create sub-tasks and marshal the map
14
+ # and reduce code for sending to workers.
15
+ #
16
+
17
+ class Skynet
18
+ class Job
19
+ include SkynetDebugger
20
+ include Skynet::GuidGenerator
21
+
22
+ class WorkerError < Skynet::Error
23
+ end
24
+
25
+ class BadMapOrReduceError < Skynet::Error
26
+ end
27
+
28
+ class Error < Skynet::Error
29
+ end
30
+
31
+ @@svn_rev = nil
32
+ @@worker_ver = nil
33
+ @@log = nil
34
+
35
+ FIELDS = [:map_tasks, :reduce_tasks, :silent, :name, :map_timeout, :map_data, :job_id,
36
+ :reduce_timeout, :master_timeout, :master, :map_name, :reduce_name, :async,
37
+ :master_result_timeout, :result_timeout, :start_after, :solo, :single,
38
+ :map, :map_partitioner, :reduce, :reduce_partitioner, :single
39
+ ]
40
+
41
+ FIELDS.each do |method|
42
+ attr_accessor method
43
+ end
44
+
45
+
46
+ def self.debug_class_desc
47
+ "JOB"
48
+ end
49
+
50
+ def initialize(opts = {})
51
+ @name = opts[:name]
52
+ @map_name = opts[:map_name]
53
+ @reduce_name = opts[:reduce_name]
54
+ @silent = opts[:silent]
55
+ @master = opts[:master]
56
+ @async = opts[:async]
57
+ @solo = opts[:solo]
58
+ @single = opts[:single]
59
+ @version = opts[:version] if opts[:version]
60
+ @map_tasks = opts[:map_tasks] || 2
61
+ @reduce_tasks = opts[:reduce_tasks] || 1
62
+ @map_timeout = opts[:map_timeout] || 60
63
+ @reduce_timeout = opts[:reduce_timeout] || 60
64
+ @master_timeout = opts[:master_timeout] || 60
65
+ @result_timeout = opts[:result_timeout] || 1200
66
+ @start_after = opts[:start_after] || 0
67
+ @master_result_timeout = opts[:master_result_timeout] || 1200
68
+
69
+ @map_data = opts[:map_data]
70
+ if opts[:map_reduce_class]
71
+ self.map_reduce_class = opts[:map_reduce_class]
72
+ else
73
+ self.map = opts[:map] if opts[:map]
74
+ self.reduce = opts[:reduce] if opts[:reduce]
75
+ end
76
+
77
+ @job_id = task_id
78
+ end
79
+
80
+ def to_h
81
+ if @map.kind_of?(Proc) or @reduce.kind_of?(Proc)
82
+ raise Skynet::Error.new("You have a Proc in your map or reduce. This can't be turned into a hash.")
83
+ end
84
+ hash = {}
85
+ FIELDS.each do |field|
86
+ next unless self.send(field)
87
+ hash[field] = self.send(field)
88
+ end
89
+ hash
90
+ end
91
+
92
+ def mq
93
+ @mq ||= Skynet::MessageQueue.new
94
+ end
95
+
96
+ ## set_version was supposed to know when to upgrade the version. Haven't figured out how to do this yet
97
+ def set_version
98
+ true
99
+ # return 1 if solo?
100
+ # oldver = mq.get_worker_version || 0
101
+ # if oldver != self.version
102
+ # mq.set_worker_version(self.version)
103
+ # end
104
+ end
105
+
106
+ def version
107
+ return 1 if solo?
108
+ @@worker_version ||= mq.get_worker_version
109
+ @version ||= @@worker_version
110
+ end
111
+
112
+ def version=(v)
113
+ @version = v
114
+ end
115
+
116
+ def display_info
117
+ "#{name}, job_id: #{job_id}"
118
+ end
119
+
120
+ def increment_worker_version
121
+ newver = mq.get_worker_version + 1
122
+ mq.set_worker_version(newver)
123
+ newver
124
+ end
125
+
126
+ def solo?
127
+ (@solo or CONFIG[:SOLO])
128
+ end
129
+
130
+ def single?
131
+ @single
132
+ end
133
+
134
+ def run_tasks(tasks,timeout = 5,description = "Generic Task")
135
+ result = Hash.new
136
+ errors = Hash.new
137
+ mq = Skynet::MessageQueue.new unless solo?
138
+ t1 = Time.now
139
+ tasks = [tasks] unless tasks.class == Array
140
+ info "RUN TASKS #{description} ver: #{self.version} jobid: #{job_id} @ #{t1}"
141
+
142
+ # write tasks to the MessageQueue
143
+ task_ids = []
144
+ tasks.each do |task|
145
+ debug "RUN TASKS SUBMITTING #{description} task #{task.task_id} job_id: #{job_id}"
146
+ if solo? or single?
147
+ result[task.task_id] = task.run
148
+ else
149
+ task_ids << task.task_id
150
+ worker_message = Skynet::Message.new(
151
+ :tasktype => :task,
152
+ :job_id => job_id,
153
+ :task_id => task.task_id,
154
+ :payload => task,
155
+ :payload_type => task.task_or_master,
156
+ :expiry => timeout,
157
+ :expire_time => @start_after,
158
+ :iteration => 0,
159
+ :name => description,
160
+ :version => @version
161
+ )
162
+ debug "RUN TASKS WORKER MESSAGE #{description} job_id: #{job_id}", worker_message.to_a
163
+ mq.write_message(worker_message,timeout * 5)
164
+ end
165
+ end
166
+
167
+ return result.values if solo? or single?
168
+ return true if async
169
+
170
+ debug "GATHER RESULTS for #{description} job_id: #{job_id} - NOT AN ASYNC JOB"
171
+
172
+ # retrieve results unless async
173
+ begin
174
+ loop do
175
+ # debug "LOOKING FOR RESULT MESSAGE TEMPLATE"
176
+ result_message = mq.take_result(job_id,timeout * 2)
177
+
178
+ ret_result = result_message.payload
179
+ if result_message.payload_type == :error
180
+ errors[result_message.task_id] = ret_result
181
+ error "ERROR RESULT TASK #{result_message.task_id} returned #{errors[result_message.task_id].inspect}"
182
+ else
183
+ result[result_message.task_id] = ret_result
184
+ debug "RESULT returned TASKID: #{result_message.task_id} #{result[result_message.task_id].inspect}"
185
+ end
186
+ debug "RESULT collected: #{(result.keys + errors.keys).size}, remaining: #{(task_ids - (result.keys + errors.keys)).size}"
187
+ break if (task_ids - (result.keys + errors.keys)).empty?
188
+ if (task_ids - (result.keys & errors.keys)).empty?
189
+ raise Skynet::Job::Error.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{task_ids.size} workers. #{errors.pretty_print_inspect}")
190
+ end
191
+ end
192
+ rescue Skynet::RequestExpiredError => e
193
+ error "A WORKER EXPIRED or ERRORED, #{description}, job_id: #{job_id}"
194
+ if not errors.empty?
195
+ raise WorkerError.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{task_ids.size} workers. #{errors.pretty_print_inspect}")
196
+ else
197
+ raise Skynet::RequestExpiredError.new("WORKER ERROR, A WORKER EXPIRED! Did not get results or even errors back from all workers!")
198
+ end
199
+ end
200
+ info "RUN TASKS COMPLETE #{description} jobid: #{job_id} TOOK: #{Time.now - t1}"
201
+ result.values
202
+ end
203
+
204
+ def map_reduce_class=(klass)
205
+ unless klass.class == String or klass.class == Class
206
+ raise BadMapOrReduceError.new("#{self.class}.map_reduce only accepts a class name")
207
+ end
208
+ klass = klass.to_s
209
+ @map = klass
210
+ self.name ||= "#{klass} MASTER"
211
+ self.map_name ||= "#{klass} MAP"
212
+ if klass.constantize.respond_to?(:reduce)
213
+ @reduce = klass
214
+ self.reduce_name ||= "#{klass} REDUCE"
215
+ end
216
+ @reduce_partitioner = klass if klass.constantize.respond_to?(:reduce_partitioner)
217
+ @map_partitioner = klass if klass.constantize.respond_to?(:map_partitioner)
218
+ end
219
+
220
+ def task_id
221
+ @task_id ||= get_unique_id(1)
222
+ end
223
+
224
+ # def run_master
225
+ # result = run_tasks(master_task,master_timeout,name)
226
+ # debug "MASTER RESULT #{self.name} job_id: #{self.job_id}", result
227
+ # result
228
+ # end
229
+
230
+ def master_task
231
+ @master_task ||= begin
232
+ raise Exception.new("No map provided") unless @map
233
+ set_version
234
+
235
+ job = Skynet::Job.new(
236
+ :map_timeout => map_timeout,
237
+ :reduce_timeout => reduce_timeout,
238
+ :job_id => :task_id,
239
+ :map_data => @map_data,
240
+ :map_name => map_name || name,
241
+ :reduce_name => reduce_name || name,
242
+ :map => @map,
243
+ :map_partitioner => @map_partitioner,
244
+ :reduce => @reduce,
245
+ :reduce_partitioner => @reduce_partitioner,
246
+ :map_tasks => @map_tasks,
247
+ :reduce_tasks => @reduce_tasks,
248
+ :name => @name,
249
+ :version => version,
250
+ :process => lambda do |data|
251
+ debug "RUNNING MASTER RUN #{name}, job_id:#{job_id}"
252
+ job.run
253
+ end
254
+ )
255
+
256
+ task = Skynet::Task.new(
257
+ :task_id => task_id,
258
+ :data => :master,
259
+ :process => process,
260
+ :map_or_reduce => :master,
261
+ :name => self.name,
262
+ :result_timeout => master_result_timeout
263
+ )
264
+ end
265
+ end
266
+
267
+ # Run the job and return result arrays
268
+ def run
269
+ run_job
270
+ end
271
+
272
+ def run_job
273
+ debug "RUN 1 BEGIN #{name}, job_id:#{job_id}"
274
+ set_version
275
+ # unless (@map && @reduce)
276
+ raise ArgumentError, "map lambdas not assigned" unless (@map)
277
+
278
+ # sometimes people want to run a master with just run. In this case we assume we have to set the data to the map_data
279
+ # XXX seems like a hack
280
+
281
+ debug "RUN 2 MAP pre run_map #{name}, job_id:#{job_id}"
282
+
283
+ post_map_data = run_map
284
+ debug "RUN 3 REDUCE pre run_reduce #{name}, job_id:#{job_id}"
285
+ return post_map_data unless post_map_data
286
+ results = run_reduce(post_map_data)
287
+ debug "RUN 4 FINISHED run_job #{name}, job_id:#{job_id}"
288
+ results
289
+ end
290
+
291
+ # Partition up starting data, create map tasks
292
+ def run_map
293
+ map_tasks = Array.new
294
+ debug "RUN MAP 2.1 #{display_info} data size before partition: #{@map_data.size}"
295
+ debug "RUN MAP 2.1 #{display_info} data before partition:", @map_data
296
+ if @map_data.class == Array
297
+ debug "RUN MAP 2.2 DATA IS Array #{display_info}"
298
+ num_mappers = @map_data.length < @map_tasks ? @map_data.length : @map_tasks
299
+ pre_map_data = Array.new
300
+ if @map_partitioner
301
+ pre_map_data = @map_partitioner.call(@map_data,num_mappers)
302
+ else
303
+ pre_map_data = Partitioner::simple_partition_data(@map_data, num_mappers)
304
+ end
305
+ debug "RUN MAP 2.3 #{display_info} data size after partition: #{pre_map_data.size}"
306
+ debug "RUN MAP 2.3 #{display_info} map data after partition:", pre_map_data
307
+ map_tasks = Array.new
308
+
309
+ (0..num_mappers - 1).each do |i|
310
+ map_tasks << Skynet::Task.new(
311
+ :task_id => get_unique_id(1),
312
+ :data => pre_map_data[i],
313
+ :process => @map,
314
+ :name => map_name,
315
+ :map_or_reduce => :map,
316
+ :result_timeout => result_timeout
317
+ )
318
+ end
319
+
320
+ # Run map tasks
321
+ #
322
+ elsif @map_data.is_a?(Enumerable)
323
+ debug "RUN MAP 2.2 DATA IS ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
324
+ each_method = @map_data.respond_to?(:next) ? :next : :each
325
+ @map_data.send(each_method) do |pre_map_data|
326
+ map_tasks << Skynet::Task.new(
327
+ :task_id => get_unique_id(1),
328
+ :data => pre_map_data,
329
+ :process => @map,
330
+ :name => map_name,
331
+ :map_or_reduce => :map,
332
+ :result_timeout => result_timeout
333
+ )
334
+ end
335
+ else
336
+ debug "RUN MAP 2.2 DATA IS NOT ARRAY OR ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
337
+ map_tasks = [
338
+ Skynet::Task.new(
339
+ :task_id => get_unique_id(1),
340
+ :data => @map_data,
341
+ :process => @map,
342
+ :name => map_name,
343
+ :map_or_reduce => :map,
344
+ :result_timeout => result_timeout
345
+ )
346
+ ]
347
+ end
348
+
349
+ begin
350
+ post_map_data = run_tasks(map_tasks,map_timeout,map_name)
351
+ rescue WorkerError => e
352
+ error "MAP FAILED #{display_info} #{e.class} #{e.message.inspect}"
353
+ return nil
354
+ end
355
+
356
+ debug "RUN MAP 2.5 RESULTS AFTER RUN #{display_info} results:", post_map_data.inspect
357
+
358
+ return nil unless post_map_data
359
+
360
+ post_map_data.compact! if post_map_data.class == Array
361
+
362
+ return post_map_data
363
+ end
364
+
365
+ # Re-partition returning data for reduction, create reduce tasks
366
+ def run_reduce(post_map_data=nil)
367
+ return post_map_data unless post_map_data and @reduce
368
+
369
+ num_reducers = @reduce_tasks
370
+
371
+ debug "RUN REDUCE 3.1 BEFORE PARTITION #{display_info} num_reducers: #{num_reducers}"
372
+ # debug "RUN REDUCE 3.1 : #{num_reducers} #{name}, job_id:#{job_id}", post_map_data
373
+
374
+ reduce_data = run_reduce_partitioner(post_map_data, num_reducers)
375
+ reduce_data.compact!
376
+ debug "RUN REDUCE 3.2 AFTER PARTITION #{display_info} num_reducers: #{reduce_data.length}"
377
+ debug "RUN REDUCE 3.2 AFTER PARTITION #{display_info} data:", reduce_data
378
+ reduce_tasks = Array.new
379
+
380
+ (0..reduce_data.length - 1).each do |i|
381
+ reduce_tasks << Skynet::Task.new(
382
+ :task_id => get_unique_id(1),
383
+ :data => reduce_data[i],
384
+ :name => reduce_name,
385
+ :process => @reduce,
386
+ :map_or_reduce => :reduce,
387
+ :result_timeout => result_timeout
388
+ )
389
+ end
390
+ reduce_tasks.compact! if reduce_tasks
391
+
392
+ debug "RUN REDUCE 3.3 CREATED REDUCE TASKS #{display_info}"#, reduce_tasks
393
+
394
+ # Reduce and return results
395
+ #
396
+ begin
397
+ results = run_tasks(reduce_tasks, reduce_timeout,reduce_name)
398
+ rescue WorkerError => e
399
+ error "REDUCE FAILED #{display_info} #{e.class} #{e.message.inspect}"
400
+ return nil
401
+ end
402
+
403
+ if results.class == Array and results.first.class == Hash
404
+ hash_results = Hash.new
405
+ results.each {|h| hash_results.merge!(h) if h.class == Hash}
406
+ # results.flatten! if results
407
+ results = hash_results
408
+ end
409
+ debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results size: #{results.size}"
410
+ debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results:", results
411
+ return results
412
+ end
413
+
414
+ def run_reduce_partitioner(post_map_data,num_reducers)
415
+ if not @reduce_partitioner
416
+ Partitioner::recombine_and_split.call(post_map_data, num_reducers)
417
+ elsif @reduce_partitioner.class == String
418
+ @reduce_partitioner.constantize.reduce_partitioner(post_map_data, num_reducers)
419
+ else
420
+ @reduce_partitioner.call(post_map_data, num_reducers)
421
+ end
422
+ end
423
+
424
+ end ### END class Skynet::Job
425
+
426
+ class AsyncJob < Skynet::Job
427
+
428
+ ## XXX Partitioning doesn't work yet!!!!!
429
+
430
+ def initialize(opts = {})
431
+ opts[:async] = true
432
+ super(opts)
433
+ end
434
+
435
+
436
+ def map=(klass)
437
+ unless klass.class == String or klass.class == Class
438
+ raise BadMapOrReduceError.new("#{self.class}.map only accepts a class name")
439
+ end
440
+ klass = klass.to_s if klass.class == Symbol
441
+ @map = klass
442
+ end
443
+
444
+ def reduce=(klass)
445
+ unless klass.class == String or klass.class == Class
446
+ raise BadMapOrReduceError.new("#{self.class}.reduce only accepts a class name")
447
+ end
448
+ klass = klass.to_s if klass.class == Symbol
449
+ @reduce = klass
450
+ end
451
+
452
+ def run_master
453
+ if solo?
454
+ run_job
455
+ else
456
+ results = run_tasks(master_task,master_timeout,name)
457
+ self.job_id
458
+ end
459
+ end
460
+
461
+ def master_task
462
+ @master_task ||= begin
463
+ raise Exception.new("No map provided") unless @map
464
+ set_version
465
+ job = Skynet::Job.new(
466
+ :map_timeout => map_timeout,
467
+ :reduce_timeout => reduce_timeout,
468
+ :job_id => task_id,
469
+ :map_data => @map_data,
470
+ :map_name => map_name || name,
471
+ :reduce_name => reduce_name || name,
472
+ :map => @map,
473
+ :map_partitioner => @map_partitioner,
474
+ :reduce => @reduce,
475
+ :reduce_partitioner => @reduce_partitioner,
476
+ :map_tasks => @map_tasks,
477
+ :reduce_tasks => @reduce_tasks,
478
+ :name => @name,
479
+ :version => version,
480
+ :result_timeout => result_timeout,
481
+ :master_result_timeout => master_result_timeout,
482
+ :solo => solo,
483
+ :single => single
484
+ )
485
+ @single = false
486
+
487
+ task = Skynet::Task.new(
488
+ :task_id => task_id,
489
+ :data => nil,
490
+ :process => job.to_h,
491
+ :map_or_reduce => :master,
492
+ :name => self.name,
493
+ :result_timeout => master_result_timeout
494
+ )
495
+ end
496
+ end
497
+
498
+ def run
499
+ if solo?
500
+ super
501
+ else
502
+ run_master
503
+ end
504
+ end
505
+
506
+ end ### END class Skynet::AsyncJob
507
+
508
+ # Collection of partitioning utilities
509
+ #
510
+ module Partitioner
511
+
512
+ # Split one block of data into partitions
513
+ #
514
+ def self.args_pp(*args)
515
+ "#{args.length > 0 ? args.pretty_print_inspect : ''}"
516
+ end
517
+
518
+ def self.debug(msg,*args)
519
+ log = Skynet::Logger.get
520
+ log.debug "#{self.class} PARTITION: #{msg} #{args_pp(*args)}"
521
+ end
522
+
523
+ def self.simple_partition_data(data, partitions)
524
+ partitioned_data = Array.new
525
+
526
+ # If data size is significantly greater than the number of desired
527
+ # partitions, we can divide the data roughly but the last partition
528
+ # may be smaller than the others.
529
+ #
530
+ return data if (not data) or data.empty?
531
+
532
+ if partitions >= data.length
533
+ data.each do |datum|
534
+ partitioned_data << [datum]
535
+ end
536
+ elsif (data.length >= partitions * 2)
537
+ # Use quicker but less "fair" method
538
+ size = data.length / partitions
539
+
540
+ if (data.length % partitions != 0)
541
+ size += 1 # Last slice of leftovers
542
+ end
543
+
544
+ (0..partitions - 1).each do |i|
545
+ partitioned_data[i] = data[i * size, size]
546
+ end
547
+ else
548
+ # Slower method, but partitions evenly
549
+ partitions = (data.size < partitions ? data.size : partitions)
550
+ (0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
551
+
552
+ data.each_with_index do |datum, i|
553
+ partitioned_data[i % partitions] << datum
554
+ end
555
+ end
556
+
557
+ partitioned_data
558
+ end
559
+
560
+ # Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
561
+ #
562
+ def self.recombine_and_split
563
+ lambda do |post_map_data, new_partitions|
564
+
565
+ return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array) and (not post_map_data.first.empty?)
566
+ if not post_map_data.first.first.is_a?(Array)
567
+ partitioned_data = post_map_data.flatten
568
+ else
569
+ partitioned_data = post_map_data.inject(Array.new) do |data,part|
570
+ data += part
571
+ end
572
+ end
573
+ partitioned_data = Partitioner::simple_partition_data(partitioned_data, new_partitions)
574
+ debug "POST PARTITIONED DATA", partitioned_data
575
+ partitioned_data
576
+ end
577
+ end
578
+
579
+
580
+ # Smarter partitioner for array data, generates simple sum of array[0]
581
+ # and ensures that all arrays sharing that key go into the same partition.
582
+ #
583
+ def self.array_data_split_by_first_entry
584
+ lambda do |partitioned_data, new_partitions|
585
+ partitions = Array.new
586
+ (0..new_partitions - 1).each { |i| partitions[i] = Array.new }
587
+
588
+ partitioned_data.each do |partition|
589
+ partition.each do |array|
590
+ next unless array.class == Array and array.size == 2
591
+ if array[0].kind_of?(Fixnum)
592
+ key = array[0]
593
+ else
594
+ key = 0
595
+ array[0].each_byte { |c| key += c }
596
+ end
597
+ partitions[key % new_partitions] << array
598
+ end
599
+ end
600
+
601
+ partitions
602
+ end
603
+ end
604
+
605
+ end
606
+
607
+ end