skynet 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. data/History.txt +4 -0
  2. data/License.txt +20 -0
  3. data/Manifest.txt +65 -0
  4. data/README.txt +100 -0
  5. data/Rakefile +4 -0
  6. data/app_generators/skynet_install/USAGE +5 -0
  7. data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
  8. data/app_generators/skynet_install/templates/migration.rb +60 -0
  9. data/app_generators/skynet_install/templates/skynet +33 -0
  10. data/app_generators/skynet_install/templates/skynet_console +16 -0
  11. data/bin/skynet +20 -0
  12. data/bin/skynet_console +9 -0
  13. data/bin/skynet_install +12 -0
  14. data/bin/skynet_tuplespace_server +53 -0
  15. data/config/hoe.rb +74 -0
  16. data/config/requirements.rb +17 -0
  17. data/lib/skynet.rb +34 -0
  18. data/lib/skynet/mapreduce_test.rb +25 -0
  19. data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
  20. data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
  21. data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
  22. data/lib/skynet/skynet_active_record_extensions.rb +237 -0
  23. data/lib/skynet/skynet_config.rb +59 -0
  24. data/lib/skynet/skynet_console.rb +34 -0
  25. data/lib/skynet/skynet_console_helper.rb +59 -0
  26. data/lib/skynet/skynet_debugger.rb +84 -0
  27. data/lib/skynet/skynet_guid_generator.rb +68 -0
  28. data/lib/skynet/skynet_job.rb +607 -0
  29. data/lib/skynet/skynet_launcher.rb +10 -0
  30. data/lib/skynet/skynet_logger.rb +52 -0
  31. data/lib/skynet/skynet_manager.rb +486 -0
  32. data/lib/skynet/skynet_message.rb +366 -0
  33. data/lib/skynet/skynet_message_queue.rb +100 -0
  34. data/lib/skynet/skynet_ruby_extensions.rb +36 -0
  35. data/lib/skynet/skynet_task.rb +76 -0
  36. data/lib/skynet/skynet_tuplespace_server.rb +82 -0
  37. data/lib/skynet/skynet_worker.rb +395 -0
  38. data/lib/skynet/version.rb +9 -0
  39. data/log/debug.log +0 -0
  40. data/log/skynet.log +29 -0
  41. data/log/skynet_tuplespace_server.log +7 -0
  42. data/log/skynet_worker.pid +1 -0
  43. data/script/destroy +14 -0
  44. data/script/generate +14 -0
  45. data/script/txt2html +74 -0
  46. data/setup.rb +1585 -0
  47. data/sometest.rb +23 -0
  48. data/tasks/deployment.rake +34 -0
  49. data/tasks/environment.rake +7 -0
  50. data/tasks/website.rake +17 -0
  51. data/test/all_models_test.rb +139 -0
  52. data/test/mysql_message_queue_adaptor_test.rb +199 -0
  53. data/test/skynet_manager_test.rb +107 -0
  54. data/test/skynet_message_test.rb +42 -0
  55. data/test/test_generator_helper.rb +20 -0
  56. data/test/test_helper.rb +2 -0
  57. data/test/test_skynet.rb +11 -0
  58. data/test/test_skynet_install_generator.rb +53 -0
  59. data/test/tuplespace_message_queue_test.rb +179 -0
  60. data/tmtags +1242 -0
  61. data/website/index.html +93 -0
  62. data/website/index.txt +39 -0
  63. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  64. data/website/stylesheets/screen.css +138 -0
  65. data/website/template.rhtml +48 -0
  66. metadata +129 -0
@@ -0,0 +1,59 @@
1
+ class Skynet
2
+ LOGDIR = "/var/log"
3
+
4
+ CONFIG = {
5
+ :ENABLE => true,
6
+ :SOLO => false,
7
+ :SKYNET_LOG_DIR => LOGDIR,
8
+ :SKYNET_PID_DIR => "/tmp",
9
+ :SKYNET_PIDS_FILE => "/tmp/skynet.pid",
10
+ :SKYNET_LOG_FILE => STDOUT,
11
+ :SKYNET_LOG_LEVEL => Logger::ERROR,
12
+ :SKYNET_LOCAL_MANAGER_URL => "druby://localhost:40000",
13
+ :MESSAGE_QUEUE_ADAPTER => "Skynet::MessageQueueAdapter::TupleSpace",
14
+ # :TUPLESPACE_DRBURIS => ["druby://localhost:47647"]
15
+ # :MESSAGE_QUEUE_ADAPTER => "Skynet::MessageQueueAdapter::Mysql",
16
+ # :QUEUE_DATABASE => "skynet_queue",
17
+ # :MYSQL_TEMPERATURE_CHANGE_SLEEP => 40,
18
+ :NEXT_TASK_TIMEOUT => 60,
19
+ :USE_RINGSERVER => true,
20
+ :SERVER_HOSTS => ["localhost:7647"],
21
+ :NUMBER_OF_WORKERS => 4,
22
+ :WORKER_CHECK_DELAY => 40,
23
+ # :GUID_GENERATOR => nil,
24
+ :PERCENTAGE_OF_TASK_ONLY_WORKERS => 0.7,
25
+ :PERCENTAGE_OF_MASTER_ONLY_WORKERS => 0.2
26
+ } unless defined?(CONFIG)
27
+
28
+
29
+ def self.configure(config={})
30
+ old_config = CONFIG.dup
31
+ config.each {|k,v| CONFIG[k] = v}
32
+ if block_given?
33
+ ret = yield
34
+ CONFIG.keys.each do |key|
35
+ CONFIG.delete(key)
36
+ end
37
+ old_config.each {|k,v| CONFIG[k] = v}
38
+ ret
39
+ end
40
+ end
41
+
42
+ def self.solo(config = {})
43
+ raise Skynet::Error.new("You provide a code block to Skynet.solo") unless block_given?
44
+ result = nil
45
+ Skynet::Logger.log = nil
46
+ begin
47
+ config[:ENABLE] = true
48
+ config[:SOLO] = true
49
+ config[:SKYNET_LOG_FILE] ||= STDOUT
50
+ config[:SKYNET_LOG_LEVEL] ||= Logger::ERROR
51
+ configure(config) do
52
+ result = yield
53
+ end
54
+ rescue Exception => e
55
+ error "Something bad happened #{e.inspect} #{e.backtrace.join("\n")}"
56
+ end
57
+ return result
58
+ end
59
+ end
@@ -0,0 +1,34 @@
1
+ class Skynet
2
+ class Console
3
+ def self.start(libs=[])
4
+ require 'rubygems'
5
+ require 'optparse'
6
+ require 'skynet'
7
+
8
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
9
+
10
+ options = {
11
+ :irb => irb,
12
+ :required_libs => []
13
+ }
14
+
15
+ OptionParser.new do |opt|
16
+ opt.banner = "Usage: skynet_console [options]"
17
+ opt.on("--irb=[#{irb}]", 'Invoke a different irb.') { |v| options[:irb] = v }
18
+ opt.on('-r', '--required LIBRARY', 'Require the specified libraries. To include multiple libraries, include multiple -r options. ie. -r skynet -r fileutils') do |v|
19
+ options[:required_libs] << File.expand_path(v)
20
+ end
21
+ opt.parse!(ARGV)
22
+ end
23
+
24
+ libs << "irb/completion"
25
+ libs << "rubygems"
26
+ libs << "skynet"
27
+ libs << "skynet_console_helper"
28
+ libs += options[:required_libs]
29
+ cmd = "#{options[:irb]} -r #{libs.join(" -r ")} --simple-prompt"
30
+
31
+ exec cmd
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,59 @@
1
+
2
+ def mq
3
+ @mq ||= Skynet::MessageQueue.new
4
+ end
5
+
6
+ def stats
7
+ mq.stats
8
+ end
9
+
10
+ def increment_worker_version
11
+ mq.increment_worker_version
12
+ end
13
+
14
+ def get_worker_version
15
+ mq.get_worker_version
16
+ end
17
+
18
+ def set_worker_version(*args)
19
+ mq.set_worker_version(*args)
20
+ end
21
+
22
+ def manager
23
+ @manager ||= DRbObject.new(nil,Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
24
+ end
25
+
26
+ def add_lib(lib)
27
+ manager.required_libs << File.expand_path(lib)
28
+ manager.restart_workers
29
+ end
30
+
31
+ def restart_workers
32
+ manager.restart_workers
33
+ end
34
+
35
+ def add_workers(num)
36
+ manager.add_workers(num)
37
+ end
38
+
39
+ def remove_workers(num)
40
+ manager.remove_workers(num)
41
+ end
42
+
43
+ # ===============
44
+ # = Doesnt work =
45
+ # ===============
46
+ # def help
47
+ # puts <<-HELP
48
+ # mq
49
+ # stats
50
+ # increment_worker_version
51
+ # get_worker_version
52
+ # set_worker_version(version)
53
+ # manager
54
+ # add_lib(library_to_include) -- forces a restart
55
+ # restart_workers
56
+ # add_workers(number_of_workers)
57
+ # remove_workers(number_of_workers)
58
+ # HELP
59
+ # end
@@ -0,0 +1,84 @@
1
+ module SkynetDebugger
2
+
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+ end
6
+
7
+ def log
8
+ self.class.log
9
+ end
10
+
11
+ def args_pp(*args)
12
+ self.class.args_pp(*args)
13
+ end
14
+
15
+
16
+ def debug(*args)
17
+ self.class.debug(*args)
18
+ end
19
+
20
+
21
+ def info(*args)
22
+ self.class.info(*args)
23
+ end
24
+
25
+
26
+ def warn(*args)
27
+ self.class.warn(*args)
28
+ end
29
+
30
+
31
+ def error(*args)
32
+ self.class.error(*args)
33
+ end
34
+
35
+ def fatal(*args)
36
+ self.class.fatal(*args)
37
+ end
38
+
39
+ def debug_header
40
+ self.class.debug_header
41
+ end
42
+
43
+ module ClassMethods
44
+
45
+ def debug_class_desc
46
+ self.to_s
47
+ end
48
+
49
+ def debug_header
50
+ t = Time.now
51
+ "##{$$} #{t.strftime("%Y-%m-%d %H:%M:%S")}.#{t.usec} <#{debug_class_desc}>"
52
+ end
53
+
54
+ def log
55
+ Skynet::Logger.get
56
+ end
57
+
58
+ def args_pp(*args)
59
+ "#{args.length > 0 ? args.pretty_print_inspect : ''}"
60
+ end
61
+
62
+ def debug(msg,*args)
63
+ log.debug "[DEBUG] #{debug_header} #{msg} #{args_pp(*args)}"
64
+ end
65
+
66
+ def info(msg, *args)
67
+ log.info "[INFO] #{debug_header} #{msg} #{args_pp(*args)}"
68
+ end
69
+
70
+ def warn(msg, *args)
71
+ log.warn "[WARN] #{debug_header} #{msg} #{args_pp(*args)}"
72
+ end
73
+
74
+ def error(msg, *args)
75
+ log.error "[ERROR] #{debug_header} #{msg} #{args_pp(*args)}"
76
+ end
77
+
78
+ def fatal(msg, *args)
79
+ log.fatal "[FATAL] #{debug_header} #{msg} #{args_pp(*args)}"
80
+ end
81
+
82
+ end
83
+
84
+ end
@@ -0,0 +1,68 @@
1
+ require 'socket'
2
+
3
+ begin
4
+ require 'fastthread'
5
+ rescue LoadError
6
+ require 'thread'
7
+ end
8
+
9
+ class Skynet
10
+ class UniqueDBNumGenerator
11
+
12
+ class Config
13
+ attr_accessor :lockfile, :pidfile, :server_num, :pid_id, :use_incremental_ids
14
+ end
15
+
16
+ @@config ||= Config.new
17
+
18
+ def self.configure
19
+ yield @@config
20
+ end
21
+
22
+ def self.server_num(hostname=nil)
23
+ @@config.server_num ||= Socket.gethostname.sum
24
+ end
25
+
26
+ def self.pid_id
27
+ $$
28
+ end
29
+
30
+ def self.use_incremental_ids
31
+ @@config.use_incremental_ids
32
+ end
33
+ end
34
+
35
+ module GuidGenerator
36
+
37
+ @@pid_ctr = 0
38
+
39
+ def get_unique_id(nodb=nil)
40
+
41
+ if defined?(Skynet::CONFIG) and Skynet::CONFIG[:GUID_GENERATOR]
42
+ Skynet::CONFIG[:GUID_GENERATOR].call
43
+ else
44
+ @@pid_id ||= Skynet::UniqueDBNumGenerator.pid_id
45
+
46
+ if not Skynet::UniqueDBNumGenerator.server_num or not @@pid_id
47
+ raise 'SERVER_NUM or PIDID not defined, please check environment.rb for the proper code.'
48
+ end
49
+
50
+ Mutex.new.synchronize do
51
+ timeprt = Time.now.to_f - 1186210800 # figure it out
52
+ timeprt = timeprt * (2 ** 3)
53
+ @@pid_ctr += 1
54
+
55
+ guid_parts = [[timeprt,30],[Skynet::UniqueDBNumGenerator.server_num,8],[@@pid_id,14],[@@pid_ctr,12]]
56
+
57
+ guid = 0
58
+ guid_parts.each do |part, bitlength|
59
+ guid = guid << bitlength
60
+ guid += part.to_i % (2 ** bitlength)
61
+ end
62
+ guid
63
+ end
64
+ end
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,607 @@
1
+ # require 'ruby2ruby' # XXX this will break unless people have the fix to Ruby2Ruby
2
+ ##### ruby2ruby fix from ruby2ruby.rb ############
3
+ ### XXX This is bad. Some people rely on an exception being thrown if a method is missing! BULLSHIT!
4
+ # class NilClass # Objective-C trick
5
+ # def method_missing(msg, *args, &block)
6
+ # nil
7
+ # end
8
+ # end
9
+ ##############################
10
+
11
+ # Users should create instances of this class. Rather than subclassing,
12
+ # jobs are specialized by assigning lambdas to map, reduce, and partition.
13
+ # This allows the instance to easily create sub-tasks and marshal the map
14
+ # and reduce code for sending to workers.
15
+ #
16
+
17
+ class Skynet
18
+ class Job
19
+ include SkynetDebugger
20
+ include Skynet::GuidGenerator
21
+
22
+ class WorkerError < Skynet::Error
23
+ end
24
+
25
+ class BadMapOrReduceError < Skynet::Error
26
+ end
27
+
28
+ class Error < Skynet::Error
29
+ end
30
+
31
+ @@svn_rev = nil
32
+ @@worker_ver = nil
33
+ @@log = nil
34
+
35
+ FIELDS = [:map_tasks, :reduce_tasks, :silent, :name, :map_timeout, :map_data, :job_id,
36
+ :reduce_timeout, :master_timeout, :master, :map_name, :reduce_name, :async,
37
+ :master_result_timeout, :result_timeout, :start_after, :solo, :single,
38
+ :map, :map_partitioner, :reduce, :reduce_partitioner, :single
39
+ ]
40
+
41
+ FIELDS.each do |method|
42
+ attr_accessor method
43
+ end
44
+
45
+
46
+ def self.debug_class_desc
47
+ "JOB"
48
+ end
49
+
50
+ def initialize(opts = {})
51
+ @name = opts[:name]
52
+ @map_name = opts[:map_name]
53
+ @reduce_name = opts[:reduce_name]
54
+ @silent = opts[:silent]
55
+ @master = opts[:master]
56
+ @async = opts[:async]
57
+ @solo = opts[:solo]
58
+ @single = opts[:single]
59
+ @version = opts[:version] if opts[:version]
60
+ @map_tasks = opts[:map_tasks] || 2
61
+ @reduce_tasks = opts[:reduce_tasks] || 1
62
+ @map_timeout = opts[:map_timeout] || 60
63
+ @reduce_timeout = opts[:reduce_timeout] || 60
64
+ @master_timeout = opts[:master_timeout] || 60
65
+ @result_timeout = opts[:result_timeout] || 1200
66
+ @start_after = opts[:start_after] || 0
67
+ @master_result_timeout = opts[:master_result_timeout] || 1200
68
+
69
+ @map_data = opts[:map_data]
70
+ if opts[:map_reduce_class]
71
+ self.map_reduce_class = opts[:map_reduce_class]
72
+ else
73
+ self.map = opts[:map] if opts[:map]
74
+ self.reduce = opts[:reduce] if opts[:reduce]
75
+ end
76
+
77
+ @job_id = task_id
78
+ end
79
+
80
+ def to_h
81
+ if @map.kind_of?(Proc) or @reduce.kind_of?(Proc)
82
+ raise Skynet::Error.new("You have a Proc in your map or reduce. This can't be turned into a hash.")
83
+ end
84
+ hash = {}
85
+ FIELDS.each do |field|
86
+ next unless self.send(field)
87
+ hash[field] = self.send(field)
88
+ end
89
+ hash
90
+ end
91
+
92
+ def mq
93
+ @mq ||= Skynet::MessageQueue.new
94
+ end
95
+
96
+ ## set_version was supposed to know when to upgrade the version. Haven't figured out how to do this yet
97
+ def set_version
98
+ true
99
+ # return 1 if solo?
100
+ # oldver = mq.get_worker_version || 0
101
+ # if oldver != self.version
102
+ # mq.set_worker_version(self.version)
103
+ # end
104
+ end
105
+
106
+ def version
107
+ return 1 if solo?
108
+ @@worker_version ||= mq.get_worker_version
109
+ @version ||= @@worker_version
110
+ end
111
+
112
+ def version=(v)
113
+ @version = v
114
+ end
115
+
116
+ def display_info
117
+ "#{name}, job_id: #{job_id}"
118
+ end
119
+
120
+ def increment_worker_version
121
+ newver = mq.get_worker_version + 1
122
+ mq.set_worker_version(newver)
123
+ newver
124
+ end
125
+
126
+ def solo?
127
+ (@solo or CONFIG[:SOLO])
128
+ end
129
+
130
+ def single?
131
+ @single
132
+ end
133
+
134
+ def run_tasks(tasks,timeout = 5,description = "Generic Task")
135
+ result = Hash.new
136
+ errors = Hash.new
137
+ mq = Skynet::MessageQueue.new unless solo?
138
+ t1 = Time.now
139
+ tasks = [tasks] unless tasks.class == Array
140
+ info "RUN TASKS #{description} ver: #{self.version} jobid: #{job_id} @ #{t1}"
141
+
142
+ # write tasks to the MessageQueue
143
+ task_ids = []
144
+ tasks.each do |task|
145
+ debug "RUN TASKS SUBMITTING #{description} task #{task.task_id} job_id: #{job_id}"
146
+ if solo? or single?
147
+ result[task.task_id] = task.run
148
+ else
149
+ task_ids << task.task_id
150
+ worker_message = Skynet::Message.new(
151
+ :tasktype => :task,
152
+ :job_id => job_id,
153
+ :task_id => task.task_id,
154
+ :payload => task,
155
+ :payload_type => task.task_or_master,
156
+ :expiry => timeout,
157
+ :expire_time => @start_after,
158
+ :iteration => 0,
159
+ :name => description,
160
+ :version => @version
161
+ )
162
+ debug "RUN TASKS WORKER MESSAGE #{description} job_id: #{job_id}", worker_message.to_a
163
+ mq.write_message(worker_message,timeout * 5)
164
+ end
165
+ end
166
+
167
+ return result.values if solo? or single?
168
+ return true if async
169
+
170
+ debug "GATHER RESULTS for #{description} job_id: #{job_id} - NOT AN ASYNC JOB"
171
+
172
+ # retrieve results unless async
173
+ begin
174
+ loop do
175
+ # debug "LOOKING FOR RESULT MESSAGE TEMPLATE"
176
+ result_message = mq.take_result(job_id,timeout * 2)
177
+
178
+ ret_result = result_message.payload
179
+ if result_message.payload_type == :error
180
+ errors[result_message.task_id] = ret_result
181
+ error "ERROR RESULT TASK #{result_message.task_id} returned #{errors[result_message.task_id].inspect}"
182
+ else
183
+ result[result_message.task_id] = ret_result
184
+ debug "RESULT returned TASKID: #{result_message.task_id} #{result[result_message.task_id].inspect}"
185
+ end
186
+ debug "RESULT collected: #{(result.keys + errors.keys).size}, remaining: #{(task_ids - (result.keys + errors.keys)).size}"
187
+ break if (task_ids - (result.keys + errors.keys)).empty?
188
+ if (task_ids - (result.keys & errors.keys)).empty?
189
+ raise Skynet::Job::Error.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{task_ids.size} workers. #{errors.pretty_print_inspect}")
190
+ end
191
+ end
192
+ rescue Skynet::RequestExpiredError => e
193
+ error "A WORKER EXPIRED or ERRORED, #{description}, job_id: #{job_id}"
194
+ if not errors.empty?
195
+ raise WorkerError.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{task_ids.size} workers. #{errors.pretty_print_inspect}")
196
+ else
197
+ raise Skynet::RequestExpiredError.new("WORKER ERROR, A WORKER EXPIRED! Did not get results or even errors back from all workers!")
198
+ end
199
+ end
200
+ info "RUN TASKS COMPLETE #{description} jobid: #{job_id} TOOK: #{Time.now - t1}"
201
+ result.values
202
+ end
203
+
204
+ def map_reduce_class=(klass)
205
+ unless klass.class == String or klass.class == Class
206
+ raise BadMapOrReduceError.new("#{self.class}.map_reduce only accepts a class name")
207
+ end
208
+ klass = klass.to_s
209
+ @map = klass
210
+ self.name ||= "#{klass} MASTER"
211
+ self.map_name ||= "#{klass} MAP"
212
+ if klass.constantize.respond_to?(:reduce)
213
+ @reduce = klass
214
+ self.reduce_name ||= "#{klass} REDUCE"
215
+ end
216
+ @reduce_partitioner = klass if klass.constantize.respond_to?(:reduce_partitioner)
217
+ @map_partitioner = klass if klass.constantize.respond_to?(:map_partitioner)
218
+ end
219
+
220
+ def task_id
221
+ @task_id ||= get_unique_id(1)
222
+ end
223
+
224
+ # def run_master
225
+ # result = run_tasks(master_task,master_timeout,name)
226
+ # debug "MASTER RESULT #{self.name} job_id: #{self.job_id}", result
227
+ # result
228
+ # end
229
+
230
+ def master_task
231
+ @master_task ||= begin
232
+ raise Exception.new("No map provided") unless @map
233
+ set_version
234
+
235
+ job = Skynet::Job.new(
236
+ :map_timeout => map_timeout,
237
+ :reduce_timeout => reduce_timeout,
238
+ :job_id => :task_id,
239
+ :map_data => @map_data,
240
+ :map_name => map_name || name,
241
+ :reduce_name => reduce_name || name,
242
+ :map => @map,
243
+ :map_partitioner => @map_partitioner,
244
+ :reduce => @reduce,
245
+ :reduce_partitioner => @reduce_partitioner,
246
+ :map_tasks => @map_tasks,
247
+ :reduce_tasks => @reduce_tasks,
248
+ :name => @name,
249
+ :version => version,
250
+ :process => lambda do |data|
251
+ debug "RUNNING MASTER RUN #{name}, job_id:#{job_id}"
252
+ job.run
253
+ end
254
+ )
255
+
256
+ task = Skynet::Task.new(
257
+ :task_id => task_id,
258
+ :data => :master,
259
+ :process => process,
260
+ :map_or_reduce => :master,
261
+ :name => self.name,
262
+ :result_timeout => master_result_timeout
263
+ )
264
+ end
265
+ end
266
+
267
+ # Run the job and return result arrays
268
+ def run
269
+ run_job
270
+ end
271
+
272
+ def run_job
273
+ debug "RUN 1 BEGIN #{name}, job_id:#{job_id}"
274
+ set_version
275
+ # unless (@map && @reduce)
276
+ raise ArgumentError, "map lambdas not assigned" unless (@map)
277
+
278
+ # sometimes people want to run a master with just run. In this case we assume we have to set the data to the map_data
279
+ # XXX seems like a hack
280
+
281
+ debug "RUN 2 MAP pre run_map #{name}, job_id:#{job_id}"
282
+
283
+ post_map_data = run_map
284
+ debug "RUN 3 REDUCE pre run_reduce #{name}, job_id:#{job_id}"
285
+ return post_map_data unless post_map_data
286
+ results = run_reduce(post_map_data)
287
+ debug "RUN 4 FINISHED run_job #{name}, job_id:#{job_id}"
288
+ results
289
+ end
290
+
291
+ # Partition up starting data, create map tasks
292
+ def run_map
293
+ map_tasks = Array.new
294
+ debug "RUN MAP 2.1 #{display_info} data size before partition: #{@map_data.size}"
295
+ debug "RUN MAP 2.1 #{display_info} data before partition:", @map_data
296
+ if @map_data.class == Array
297
+ debug "RUN MAP 2.2 DATA IS Array #{display_info}"
298
+ num_mappers = @map_data.length < @map_tasks ? @map_data.length : @map_tasks
299
+ pre_map_data = Array.new
300
+ if @map_partitioner
301
+ pre_map_data = @map_partitioner.call(@map_data,num_mappers)
302
+ else
303
+ pre_map_data = Partitioner::simple_partition_data(@map_data, num_mappers)
304
+ end
305
+ debug "RUN MAP 2.3 #{display_info} data size after partition: #{pre_map_data.size}"
306
+ debug "RUN MAP 2.3 #{display_info} map data after partition:", pre_map_data
307
+ map_tasks = Array.new
308
+
309
+ (0..num_mappers - 1).each do |i|
310
+ map_tasks << Skynet::Task.new(
311
+ :task_id => get_unique_id(1),
312
+ :data => pre_map_data[i],
313
+ :process => @map,
314
+ :name => map_name,
315
+ :map_or_reduce => :map,
316
+ :result_timeout => result_timeout
317
+ )
318
+ end
319
+
320
+ # Run map tasks
321
+ #
322
+ elsif @map_data.is_a?(Enumerable)
323
+ debug "RUN MAP 2.2 DATA IS ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
324
+ each_method = @map_data.respond_to?(:next) ? :next : :each
325
+ @map_data.send(each_method) do |pre_map_data|
326
+ map_tasks << Skynet::Task.new(
327
+ :task_id => get_unique_id(1),
328
+ :data => pre_map_data,
329
+ :process => @map,
330
+ :name => map_name,
331
+ :map_or_reduce => :map,
332
+ :result_timeout => result_timeout
333
+ )
334
+ end
335
+ else
336
+ debug "RUN MAP 2.2 DATA IS NOT ARRAY OR ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
337
+ map_tasks = [
338
+ Skynet::Task.new(
339
+ :task_id => get_unique_id(1),
340
+ :data => @map_data,
341
+ :process => @map,
342
+ :name => map_name,
343
+ :map_or_reduce => :map,
344
+ :result_timeout => result_timeout
345
+ )
346
+ ]
347
+ end
348
+
349
+ begin
350
+ post_map_data = run_tasks(map_tasks,map_timeout,map_name)
351
+ rescue WorkerError => e
352
+ error "MAP FAILED #{display_info} #{e.class} #{e.message.inspect}"
353
+ return nil
354
+ end
355
+
356
+ debug "RUN MAP 2.5 RESULTS AFTER RUN #{display_info} results:", post_map_data.inspect
357
+
358
+ return nil unless post_map_data
359
+
360
+ post_map_data.compact! if post_map_data.class == Array
361
+
362
+ return post_map_data
363
+ end
364
+
365
+ # Re-partition returning data for reduction, create reduce tasks
366
+ def run_reduce(post_map_data=nil)
367
+ return post_map_data unless post_map_data and @reduce
368
+
369
+ num_reducers = @reduce_tasks
370
+
371
+ debug "RUN REDUCE 3.1 BEFORE PARTITION #{display_info} num_reducers: #{num_reducers}"
372
+ # debug "RUN REDUCE 3.1 : #{num_reducers} #{name}, job_id:#{job_id}", post_map_data
373
+
374
+ reduce_data = run_reduce_partitioner(post_map_data, num_reducers)
375
+ reduce_data.compact!
376
+ debug "RUN REDUCE 3.2 AFTER PARTITION #{display_info} num_reducers: #{reduce_data.length}"
377
+ debug "RUN REDUCE 3.2 AFTER PARTITION #{display_info} data:", reduce_data
378
+ reduce_tasks = Array.new
379
+
380
+ (0..reduce_data.length - 1).each do |i|
381
+ reduce_tasks << Skynet::Task.new(
382
+ :task_id => get_unique_id(1),
383
+ :data => reduce_data[i],
384
+ :name => reduce_name,
385
+ :process => @reduce,
386
+ :map_or_reduce => :reduce,
387
+ :result_timeout => result_timeout
388
+ )
389
+ end
390
+ reduce_tasks.compact! if reduce_tasks
391
+
392
+ debug "RUN REDUCE 3.3 CREATED REDUCE TASKS #{display_info}"#, reduce_tasks
393
+
394
+ # Reduce and return results
395
+ #
396
+ begin
397
+ results = run_tasks(reduce_tasks, reduce_timeout,reduce_name)
398
+ rescue WorkerError => e
399
+ error "REDUCE FAILED #{display_info} #{e.class} #{e.message.inspect}"
400
+ return nil
401
+ end
402
+
403
+ if results.class == Array and results.first.class == Hash
404
+ hash_results = Hash.new
405
+ results.each {|h| hash_results.merge!(h) if h.class == Hash}
406
+ # results.flatten! if results
407
+ results = hash_results
408
+ end
409
+ debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results size: #{results.size}"
410
+ debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results:", results
411
+ return results
412
+ end
413
+
414
+ def run_reduce_partitioner(post_map_data,num_reducers)
415
+ if not @reduce_partitioner
416
+ Partitioner::recombine_and_split.call(post_map_data, num_reducers)
417
+ elsif @reduce_partitioner.class == String
418
+ @reduce_partitioner.constantize.reduce_partitioner(post_map_data, num_reducers)
419
+ else
420
+ @reduce_partitioner.call(post_map_data, num_reducers)
421
+ end
422
+ end
423
+
424
+ end ### END class Skynet::Job
425
+
426
+ class AsyncJob < Skynet::Job
427
+
428
+ ## XXX Partitioning doesn't work yet!!!!!
429
+
430
+ def initialize(opts = {})
431
+ opts[:async] = true
432
+ super(opts)
433
+ end
434
+
435
+
436
+ def map=(klass)
437
+ unless klass.class == String or klass.class == Class
438
+ raise BadMapOrReduceError.new("#{self.class}.map only accepts a class name")
439
+ end
440
+ klass = klass.to_s if klass.class == Symbol
441
+ @map = klass
442
+ end
443
+
444
+ def reduce=(klass)
445
+ unless klass.class == String or klass.class == Class
446
+ raise BadMapOrReduceError.new("#{self.class}.reduce only accepts a class name")
447
+ end
448
+ klass = klass.to_s if klass.class == Symbol
449
+ @reduce = klass
450
+ end
451
+
452
+ def run_master
453
+ if solo?
454
+ run_job
455
+ else
456
+ results = run_tasks(master_task,master_timeout,name)
457
+ self.job_id
458
+ end
459
+ end
460
+
461
+ def master_task
462
+ @master_task ||= begin
463
+ raise Exception.new("No map provided") unless @map
464
+ set_version
465
+ job = Skynet::Job.new(
466
+ :map_timeout => map_timeout,
467
+ :reduce_timeout => reduce_timeout,
468
+ :job_id => task_id,
469
+ :map_data => @map_data,
470
+ :map_name => map_name || name,
471
+ :reduce_name => reduce_name || name,
472
+ :map => @map,
473
+ :map_partitioner => @map_partitioner,
474
+ :reduce => @reduce,
475
+ :reduce_partitioner => @reduce_partitioner,
476
+ :map_tasks => @map_tasks,
477
+ :reduce_tasks => @reduce_tasks,
478
+ :name => @name,
479
+ :version => version,
480
+ :result_timeout => result_timeout,
481
+ :master_result_timeout => master_result_timeout,
482
+ :solo => solo,
483
+ :single => single
484
+ )
485
+ @single = false
486
+
487
+ task = Skynet::Task.new(
488
+ :task_id => task_id,
489
+ :data => nil,
490
+ :process => job.to_h,
491
+ :map_or_reduce => :master,
492
+ :name => self.name,
493
+ :result_timeout => master_result_timeout
494
+ )
495
+ end
496
+ end
497
+
498
+ def run
499
+ if solo?
500
+ super
501
+ else
502
+ run_master
503
+ end
504
+ end
505
+
506
+ end ### END class Skynet::AsyncJob
507
+
508
+ # Collection of partitioning utilities
509
+ #
510
+ module Partitioner
511
+
512
+ # Split one block of data into partitions
513
+ #
514
+ def self.args_pp(*args)
515
+ "#{args.length > 0 ? args.pretty_print_inspect : ''}"
516
+ end
517
+
518
+ def self.debug(msg,*args)
519
+ log = Skynet::Logger.get
520
+ log.debug "#{self.class} PARTITION: #{msg} #{args_pp(*args)}"
521
+ end
522
+
523
+ def self.simple_partition_data(data, partitions)
524
+ partitioned_data = Array.new
525
+
526
+ # If data size is significantly greater than the number of desired
527
+ # partitions, we can divide the data roughly but the last partition
528
+ # may be smaller than the others.
529
+ #
530
+ return data if (not data) or data.empty?
531
+
532
+ if partitions >= data.length
533
+ data.each do |datum|
534
+ partitioned_data << [datum]
535
+ end
536
+ elsif (data.length >= partitions * 2)
537
+ # Use quicker but less "fair" method
538
+ size = data.length / partitions
539
+
540
+ if (data.length % partitions != 0)
541
+ size += 1 # Last slice of leftovers
542
+ end
543
+
544
+ (0..partitions - 1).each do |i|
545
+ partitioned_data[i] = data[i * size, size]
546
+ end
547
+ else
548
+ # Slower method, but partitions evenly
549
+ partitions = (data.size < partitions ? data.size : partitions)
550
+ (0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
551
+
552
+ data.each_with_index do |datum, i|
553
+ partitioned_data[i % partitions] << datum
554
+ end
555
+ end
556
+
557
+ partitioned_data
558
+ end
559
+
560
+ # Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
561
+ #
562
+ def self.recombine_and_split
563
+ lambda do |post_map_data, new_partitions|
564
+
565
+ return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array) and (not post_map_data.first.empty?)
566
+ if not post_map_data.first.first.is_a?(Array)
567
+ partitioned_data = post_map_data.flatten
568
+ else
569
+ partitioned_data = post_map_data.inject(Array.new) do |data,part|
570
+ data += part
571
+ end
572
+ end
573
+ partitioned_data = Partitioner::simple_partition_data(partitioned_data, new_partitions)
574
+ debug "POST PARTITIONED DATA", partitioned_data
575
+ partitioned_data
576
+ end
577
+ end
578
+
579
+
580
+ # Smarter partitioner for array data, generates simple sum of array[0]
581
+ # and ensures that all arrays sharing that key go into the same partition.
582
+ #
583
+ def self.array_data_split_by_first_entry
584
+ lambda do |partitioned_data, new_partitions|
585
+ partitions = Array.new
586
+ (0..new_partitions - 1).each { |i| partitions[i] = Array.new }
587
+
588
+ partitioned_data.each do |partition|
589
+ partition.each do |array|
590
+ next unless array.class == Array and array.size == 2
591
+ if array[0].kind_of?(Fixnum)
592
+ key = array[0]
593
+ else
594
+ key = 0
595
+ array[0].each_byte { |c| key += c }
596
+ end
597
+ partitions[key % new_partitions] << array
598
+ end
599
+ end
600
+
601
+ partitions
602
+ end
603
+ end
604
+
605
+ end
606
+
607
+ end