timocratic-skynet 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +152 -0
- data/License.txt +20 -0
- data/Manifest.txt +144 -0
- data/README.txt +178 -0
- data/Rakefile +5 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +94 -0
- data/app_generators/skynet_install/templates/migration.rb +43 -0
- data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
- data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
- data/app_generators/skynet_install/templates/skynet_mysql_schema.sql +33 -0
- data/bin/skynet +71 -0
- data/bin/skynet_install +36 -0
- data/bin/skynet_tuplespace_server +74 -0
- data/config/hoe.rb +75 -0
- data/config/requirements.rb +17 -0
- data/examples/dgrep/README +70 -0
- data/examples/dgrep/config/skynet_config.rb +26 -0
- data/examples/dgrep/data/shakespeare/README +2 -0
- data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
- data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
- data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
- data/examples/dgrep/data/shakespeare/poetry/various +640 -0
- data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
- data/examples/dgrep/data/testfile1.txt +1 -0
- data/examples/dgrep/data/testfile2.txt +1 -0
- data/examples/dgrep/data/testfile3.txt +1 -0
- data/examples/dgrep/data/testfile4.txt +1 -0
- data/examples/dgrep/lib/dgrep.rb +59 -0
- data/examples/dgrep/lib/mapreduce_test.rb +32 -0
- data/examples/dgrep/lib/most_common_words.rb +45 -0
- data/examples/dgrep/script/dgrep +75 -0
- data/examples/rails_mysql_example/README +66 -0
- data/examples/rails_mysql_example/Rakefile +10 -0
- data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
- data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
- data/examples/rails_mysql_example/app/models/user.rb +21 -0
- data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
- data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
- data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
- data/examples/rails_mysql_example/config/boot.rb +109 -0
- data/examples/rails_mysql_example/config/database.yml +42 -0
- data/examples/rails_mysql_example/config/environment.rb +59 -0
- data/examples/rails_mysql_example/config/environments/development.rb +18 -0
- data/examples/rails_mysql_example/config/environments/production.rb +19 -0
- data/examples/rails_mysql_example/config/environments/test.rb +22 -0
- data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
- data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
- data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
- data/examples/rails_mysql_example/config/routes.rb +35 -0
- data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
- data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
- data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
- data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
- data/examples/rails_mysql_example/db/schema.rb +85 -0
- data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
- data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
- data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
- data/examples/rails_mysql_example/public/.htaccess +40 -0
- data/examples/rails_mysql_example/public/404.html +30 -0
- data/examples/rails_mysql_example/public/422.html +30 -0
- data/examples/rails_mysql_example/public/500.html +30 -0
- data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
- data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
- data/examples/rails_mysql_example/public/dispatch.rb +10 -0
- data/examples/rails_mysql_example/public/favicon.ico +0 -0
- data/examples/rails_mysql_example/public/images/rails.png +0 -0
- data/examples/rails_mysql_example/public/index.html +277 -0
- data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
- data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
- data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
- data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
- data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
- data/examples/rails_mysql_example/public/robots.txt +5 -0
- data/examples/rails_mysql_example/script/about +3 -0
- data/examples/rails_mysql_example/script/console +3 -0
- data/examples/rails_mysql_example/script/destroy +3 -0
- data/examples/rails_mysql_example/script/generate +3 -0
- data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
- data/examples/rails_mysql_example/script/performance/profiler +3 -0
- data/examples/rails_mysql_example/script/performance/request +3 -0
- data/examples/rails_mysql_example/script/plugin +3 -0
- data/examples/rails_mysql_example/script/process/inspector +3 -0
- data/examples/rails_mysql_example/script/process/reaper +3 -0
- data/examples/rails_mysql_example/script/process/spawner +3 -0
- data/examples/rails_mysql_example/script/runner +3 -0
- data/examples/rails_mysql_example/script/server +3 -0
- data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
- data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
- data/examples/rails_mysql_example/test/test_helper.rb +38 -0
- data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
- data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
- data/extras/README +7 -0
- data/extras/init.d/skynet +87 -0
- data/extras/nagios/check_skynet.sh +121 -0
- data/extras/rails/controllers/skynet_controller.rb +43 -0
- data/extras/rails/views/skynet/index.rhtml +137 -0
- data/lib/skynet.rb +95 -0
- data/lib/skynet/mapreduce_helper.rb +74 -0
- data/lib/skynet/mapreduce_test.rb +56 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +509 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +316 -0
- data/lib/skynet/skynet_active_record_extensions.rb +280 -0
- data/lib/skynet/skynet_config.rb +232 -0
- data/lib/skynet/skynet_console.rb +50 -0
- data/lib/skynet/skynet_console_helper.rb +66 -0
- data/lib/skynet/skynet_debugger.rb +138 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +892 -0
- data/lib/skynet/skynet_launcher.rb +40 -0
- data/lib/skynet/skynet_logger.rb +62 -0
- data/lib/skynet/skynet_manager.rb +706 -0
- data/lib/skynet/skynet_message.rb +359 -0
- data/lib/skynet/skynet_message_queue.rb +136 -0
- data/lib/skynet/skynet_partitioners.rb +96 -0
- data/lib/skynet/skynet_ruby_extensions.rb +53 -0
- data/lib/skynet/skynet_task.rb +118 -0
- data/lib/skynet/skynet_tuplespace_server.rb +83 -0
- data/lib/skynet/skynet_worker.rb +451 -0
- data/lib/skynet/version.rb +9 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/test_active_record_extensions.rb +138 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +10 -0
- data/test/test_mysql_message_queue_adapter.rb +263 -0
- data/test/test_skynet.rb +19 -0
- data/test/test_skynet_install_generator.rb +49 -0
- data/test/test_skynet_job.rb +717 -0
- data/test/test_skynet_manager.rb +157 -0
- data/test/test_skynet_message.rb +229 -0
- data/test/test_skynet_task.rb +24 -0
- data/test/test_tuplespace_message_queue.rb +174 -0
- data/website/index.html +181 -0
- data/website/index.txt +98 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +247 -0
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'socket'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'fastthread'
|
5
|
+
rescue LoadError
|
6
|
+
require 'thread'
|
7
|
+
end
|
8
|
+
|
9
|
+
class Skynet
|
10
|
+
class UniqueDBNumGenerator
|
11
|
+
|
12
|
+
class Config
|
13
|
+
attr_accessor :lockfile, :pidfile, :server_num, :pid_id, :use_incremental_ids
|
14
|
+
end
|
15
|
+
|
16
|
+
@@config ||= Config.new
|
17
|
+
|
18
|
+
def self.configure
|
19
|
+
yield @@config
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.server_num(hostname=nil)
|
23
|
+
@@config.server_num ||= Socket.gethostname.sum
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.pid_id
|
27
|
+
$$
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.use_incremental_ids
|
31
|
+
@@config.use_incremental_ids
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
module GuidGenerator
|
36
|
+
|
37
|
+
@@pid_ctr = 0
|
38
|
+
|
39
|
+
def get_unique_id(nodb=nil)
|
40
|
+
|
41
|
+
if defined?(Skynet::CONFIG) and Skynet::CONFIG[:GUID_GENERATOR]
|
42
|
+
Skynet::CONFIG[:GUID_GENERATOR].call
|
43
|
+
else
|
44
|
+
@@pid_id ||= Skynet::UniqueDBNumGenerator.pid_id
|
45
|
+
|
46
|
+
if not Skynet::UniqueDBNumGenerator.server_num or not @@pid_id
|
47
|
+
raise 'SERVER_NUM or PIDID not defined, please check environment.rb for the proper code.'
|
48
|
+
end
|
49
|
+
|
50
|
+
Mutex.new.synchronize do
|
51
|
+
timeprt = Time.now.to_f - 1186210800 # figure it out
|
52
|
+
timeprt = timeprt * (2 ** 3)
|
53
|
+
@@pid_ctr += 1
|
54
|
+
|
55
|
+
guid_parts = [[timeprt,30],[Skynet::UniqueDBNumGenerator.server_num,8],[@@pid_id,14],[@@pid_ctr,12]]
|
56
|
+
|
57
|
+
guid = 0
|
58
|
+
guid_parts.each do |part, bitlength|
|
59
|
+
guid = guid << bitlength
|
60
|
+
guid += part.to_i % (2 ** bitlength)
|
61
|
+
end
|
62
|
+
guid
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,892 @@
|
|
1
|
+
class Skynet
|
2
|
+
# Skynet::Job is the main interface to Skynet. You create a job object giving
|
3
|
+
# it the starting data (map_data), along with what class has the map/reduce
|
4
|
+
# functions in it. Even though Skynet is distributed, when you call #run on
|
5
|
+
# a plain Skynet::Job, it will still block in your current process until it has completed
|
6
|
+
# your task. If you want to go on to do other things you'll want to pass :async => true
|
7
|
+
# when creating a new job. Then later call job.results to retrieve your results.
|
8
|
+
#
|
9
|
+
# There are also many global configuration options which can be controlled through Skynet::CONFIG
|
10
|
+
#
|
11
|
+
# Example Usage:
|
12
|
+
# Create a file called mapreduce_test.rb with the following.
|
13
|
+
#
|
14
|
+
# class MapreduceTest
|
15
|
+
# include SkynetDebugger ## This gives you logging methods such as log, error, info, fatal
|
16
|
+
#
|
17
|
+
# def self.run
|
18
|
+
# job = Skynet::Job.new(
|
19
|
+
# :mappers => 2,
|
20
|
+
# :reducers => 1,
|
21
|
+
# :map_reduce_class => self,
|
22
|
+
# :map_data => [OpenStruct.new({:created_by => 2}),OpenStruct.new({:created_by => 2}),OpenStruct.new({:created_by => 3})]
|
23
|
+
# )
|
24
|
+
# results = job.run
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# def self.map(profiles)
|
28
|
+
# result = Array.new
|
29
|
+
# profiles.each do |profile|
|
30
|
+
# result << [profile.created_by, 1] if profile.created_by
|
31
|
+
# end
|
32
|
+
# result
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# def self.reduce(pairs)
|
36
|
+
# totals = Hash.new
|
37
|
+
# pairs.each do |pair|
|
38
|
+
# created_by, count = pair[0], pair[1]
|
39
|
+
# totals[created_by] ||= 0
|
40
|
+
# totals[created_by] += count
|
41
|
+
# end
|
42
|
+
# totals
|
43
|
+
# end
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
#
|
47
|
+
# You need to make sure Skynet is running with your class loaded. That's is how Skynet works.
|
48
|
+
# Since there is no easy way to actually pass code around the network, each skynet worker must
|
49
|
+
# already have your code loaded. If you have skynet started, stop it and then start it with the -r flag
|
50
|
+
# to tell it where to find your class it should require.
|
51
|
+
# $ skynet -r mapreduce_test.rb
|
52
|
+
# Then go into the skynet console to test running your map reduce task.
|
53
|
+
# $ skynet console -r mapreduce_test.rb
|
54
|
+
# skynet>> MapreduceTest.run # returns {2=>2, 3=>1}
|
55
|
+
#
|
56
|
+
# In the example above, you might notice that self.map and self.reduce both accept Arrays.
|
57
|
+
# If you do not want to deal with getting arrays of map_data or reduce_data, you can include MapreduceHelper
|
58
|
+
# into your class and then implement self.map_each and self.reduce_each methods.
|
59
|
+
# The included self.map and self.reduce methods will handle iterating over the map_data and reduce_data,
|
60
|
+
# passing each element to your map_each and reduce_each methods respectively. They will also handle error
|
61
|
+
# handling within that loop to make sure even if a single map or reduce fails, processing will continue.
|
62
|
+
# If you do not want processing to continue if a map fails, do not use the MapreduceHelper mixin.
|
63
|
+
#
|
64
|
+
# Since Skynet must have your code, you will probably want to install skynet into the application
|
65
|
+
# that skynet needs access to in order to run your jobs. See bin/skynet_install[link:files/bin/skynet_install.html] for more info.
|
66
|
+
#
|
67
|
+
# See new for the many other options to control various Skynet::Job settings.
|
68
|
+
class Job
|
69
|
+
include SkynetDebugger
|
70
|
+
include Skynet::GuidGenerator
|
71
|
+
|
72
|
+
class WorkerError < Skynet::Error; end
|
73
|
+
|
74
|
+
class BadMapOrReduceError < Skynet::Error; end
|
75
|
+
|
76
|
+
class Error < Skynet::Error; end
|
77
|
+
|
78
|
+
@@worker_ver = nil
|
79
|
+
|
80
|
+
FIELDS = [:queue_id, :mappers, :reducers, :silent, :name, :map_timeout, :map_data, :job_id,
|
81
|
+
:reduce_timeout, :master_timeout, :map_name, :reduce_name,
|
82
|
+
:master_result_timeout, :result_timeout, :start_after, :solo, :single, :version,
|
83
|
+
:map, :map_partitioner, :reduce, :reduce_partition, :map_reduce_class,
|
84
|
+
:master_retry, :map_retry, :reduce_retry,
|
85
|
+
:keep_map_tasks, :keep_reduce_tasks,
|
86
|
+
:local_master, :async, :data_debug
|
87
|
+
]
|
88
|
+
|
89
|
+
FIELDS.each do |method|
|
90
|
+
if [:map_reduce_class, :version, :map, :reduce, :map_data, :start_after].include?(method)
|
91
|
+
attr_reader method
|
92
|
+
elsif [:master_retry, :map_retry, :reduce_retry,:keep_map_tasks, :keep_reduce_tasks].include?(method)
|
93
|
+
attr_writer method
|
94
|
+
else
|
95
|
+
attr_accessor method
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
attr_accessor :use_local_queue, :data_debug
|
100
|
+
|
101
|
+
Skynet::CONFIG[:JOB_DEFAULTS] = {
|
102
|
+
:queue_id => 0,
|
103
|
+
:mappers => 2,
|
104
|
+
:reducers => 1,
|
105
|
+
:map_timeout => 60,
|
106
|
+
:reduce_timeout => 60,
|
107
|
+
:master_timeout => 60,
|
108
|
+
:result_timeout => 1200,
|
109
|
+
:start_after => 0,
|
110
|
+
:master_result_timeout => 1200,
|
111
|
+
:local_master => true
|
112
|
+
}
|
113
|
+
|
114
|
+
def self.debug_class_desc
|
115
|
+
"JOB"
|
116
|
+
end
|
117
|
+
|
118
|
+
# Most of the time you will merely call #new(options) and then #run on the returned object.
|
119
|
+
#
|
120
|
+
# Options are:
|
121
|
+
# <tt>:local_master</tt> BOOL (DEFAULT true)
|
122
|
+
# By default, your Skynet::Job will act as the master for your map/reduce job, doling out
|
123
|
+
# tasks, waiting for other workers to complete and return their results and dealing with
|
124
|
+
# merging and partitioning the data. If you call #run in async mode, another worker will handle
|
125
|
+
# being the master for your job without blocking. If you run :async => false, :local_master => false
|
126
|
+
# Skynet will let another worker be the master for your job, but will block waiting for the
|
127
|
+
# final results. The benefit of this is that if your process dies, the Job will continue to
|
128
|
+
# run remotely.
|
129
|
+
#
|
130
|
+
# <tt>:async</tt> BOOL (DEFAULT false)
|
131
|
+
# If you run in async mode, another worker will handle being the master for your job without blocking.
|
132
|
+
# You can not pass :local_master => true, :async => true since the only way to allow your
|
133
|
+
# job to run asyncronously is to have a remote_master.
|
134
|
+
#
|
135
|
+
# <tt>:map_data</tt>(Array or Enumerable)
|
136
|
+
# map_data should be an Array or Enumerable that data Skynet::Job will split up
|
137
|
+
# and distribute among your workers. You can stream data to Skynet::Job by passing
|
138
|
+
# an Enumerable that implements next or each.
|
139
|
+
#
|
140
|
+
# <tt>:map_reduce_class</tt>(Class or Class Name)
|
141
|
+
# Skynet::Job will look for class methods named self.map, self.reduce, self.map_partitioner,
|
142
|
+
# self.reduce_partition in your map_reduce_class. The only method requires is self.map.
|
143
|
+
# Each of these methods must accept an array. Examples above.
|
144
|
+
#
|
145
|
+
# <tt>:map</tt>(Class Name)
|
146
|
+
# You can pass a classname, or a proc. If you pass a classname, Job will look for a method
|
147
|
+
# called self.map in that class.
|
148
|
+
# WARNING: Passing a proc does not work right now.
|
149
|
+
#
|
150
|
+
# <tt>:reduce</tt>(Class Name)
|
151
|
+
# You can pass a classname, or a proc. If you pass a classname, Job will look for a method
|
152
|
+
# called self.reduce in that class.
|
153
|
+
# WARNING: Passing a proc does not work right now.
|
154
|
+
#
|
155
|
+
# <tt>:reduce_partition</tt>(Class Name)
|
156
|
+
# You can pass a classname, or a proc. If you pass a classname, Job will look for a method
|
157
|
+
# called self.reduce_partition in that class.
|
158
|
+
# WARNING: Passing a proc does not work right now.
|
159
|
+
#
|
160
|
+
# <tt>:mappers</tt> Fixnum
|
161
|
+
# The number of mappers to partition map data for.
|
162
|
+
#
|
163
|
+
# <tt>:reducers</tt> Fixnum
|
164
|
+
# The number of reducers to partition the returned map_data for.
|
165
|
+
#
|
166
|
+
# <tt>:master_retry</tt> Fixnum
|
167
|
+
# If the master fails for any reason, how many times should it be retried? You can also set
|
168
|
+
# Skynet::CONFIG[:DEFAULT_MASTER_RETRY] (DEFAULT 0)
|
169
|
+
#
|
170
|
+
# <tt>:map_retry</tt> Fixnum
|
171
|
+
# If a map task fails for any reason, how many times should it be retried? You can also set
|
172
|
+
# Skynet::CONFIG[:DEFAULT_MAP_RETRY] (DEFAULT 3)
|
173
|
+
#
|
174
|
+
# <tt>:reduce_retry</tt> Fixnum
|
175
|
+
# If a reduce task fails for any reason, how many times should it be retried? You can also set
|
176
|
+
# Skynet::CONFIG[:DEFAULT_REDUCE_RETRY] (DEFAULT 3)
|
177
|
+
#
|
178
|
+
# <tt>:master_timeout</tt>, <tt>:map_timeout</tt>, <tt>:reduce_timeout</tt>, <tt>master_result_timeout</tt>, <tt>result_timeout</tt>
|
179
|
+
# These control how long skynet should wait for particular actions to be finished.
|
180
|
+
# The master_timeout controls how long the master should wait for ALL map/reduce tasks ie. the entire job to finish.
|
181
|
+
# The master_result_timeout controls how long the final result should wait in the queue before being expired.
|
182
|
+
# The map and reduce timeouts control how long individual map and reduce tasks shoudl take.
|
183
|
+
#
|
184
|
+
# <tt>:single</tt> BOOL
|
185
|
+
# By default the master task distributes the map and reduce tasks to other workers.
|
186
|
+
# In single mode the master will take care of the map and reduce tasks by itself.
|
187
|
+
# This is handy when you really want to just perform some single action asyncronously.
|
188
|
+
# In this case you're merely using Skynet to postpone some action. In single mode, the
|
189
|
+
# first worker that picks up your task will just complete it as opposed to trying to distribute
|
190
|
+
# it to another worker.
|
191
|
+
#
|
192
|
+
# <tt>:start_after</tt> Time or Time.to_i
|
193
|
+
# Do not start job until :start_after has passed
|
194
|
+
#
|
195
|
+
# <tt>:queue</tt> String
|
196
|
+
# Which queue should this Job go in to? The queue provided is merely used to
|
197
|
+
# determine the queue_id.
|
198
|
+
# Queues are defined in Skynet::CONFIG[:MESSAGE_QUEUES]
|
199
|
+
#
|
200
|
+
# <tt>:queue_id</tt> Fixnum (DEFAULT 0)
|
201
|
+
# Which queue should this Job go in to?
|
202
|
+
# Queues are defined in Skynet::CONFIG[:MESSAGE_QUEUES]
|
203
|
+
#
|
204
|
+
# <tt>:solo</tt> BOOL
|
205
|
+
# One normally turns solo mode in in Skynet::Config using Skynet::CONFIG[:SOLO] = true
|
206
|
+
# In solo mode, Skynet jobs do not add items to a Skynet queue. Instead they do all
|
207
|
+
# work in place. It's like a Skynet simulation mode. It will complete all tasks
|
208
|
+
# without Skynet running. Great for testing. You can also wrap code blocks in
|
209
|
+
# Skynet.solo {} to run that code in solo mode.
|
210
|
+
#
|
211
|
+
# <tt>:version</tt> Fixnum
|
212
|
+
# If you do not provide a version the current worker version will be used.
|
213
|
+
# Skynet workers start at a specific version and only look for jobs that match that version.
|
214
|
+
# A worker will continue looking for jobs at that version until there are no more jobs left on
|
215
|
+
# the queue for that version. At that time, the worker will check to see if there is a new version.
|
216
|
+
# If there is, it will restart itself at the new version (assuming you had already pushed code to
|
217
|
+
# said workers.)
|
218
|
+
# To retrieve the current version, set the current version or increment the current version, see
|
219
|
+
# Skynet::Job.set_worker_version, Skynet::Job.get_worker_version, Skynet::Job.increment_worker_version
|
220
|
+
#
|
221
|
+
# <tt>:name</tt>, <tt>:map_name</tt>, <tt>:reduce_name</tt>
|
222
|
+
# These name methods are merely for debugging while watching the Skynet logs or the Skynet queue.
|
223
|
+
# If you do not supply names, it will try and provide sensible ones based on your class names.
|
224
|
+
#
|
225
|
+
# <tt>:keep_map_tasks</tt> BOOL or Fixnum (DEFAULT 1)
|
226
|
+
# If true, the master will run the map_tasks locally.
|
227
|
+
# If a number is provided, the master will run the map_tasks locally if there are
|
228
|
+
# LESS THAN OR EQUAL TO the number provided.
|
229
|
+
# You may also set Skynet::CONFIG[:DEFAILT_KEEP_MAP_TASKS] DEFAULT 1
|
230
|
+
#
|
231
|
+
# <tt>:keep_reduce_tasks</tt> BOOL or Fixnum (DEFAULT 1)
|
232
|
+
# If true, the master will run the reduce_tasks locally.
|
233
|
+
# If a number is provided, the master will run the reduce_tasks locally if there are
|
234
|
+
# LESS THAN OR EQUAL TO the number provided.
|
235
|
+
# You may also set Skynet::CONFIG[:DEFAILT_REDUCVE_MAP_TASKS] DEFAULT 1
|
236
|
+
def initialize(options = {})
|
237
|
+
FIELDS.each do |field|
|
238
|
+
if options.has_key?(field)
|
239
|
+
self.send("#{field}=".to_sym,options[field])
|
240
|
+
elsif Skynet::CONFIG[:JOB_DEFAULTS][field]
|
241
|
+
self.send("#{field}=".to_sym,Skynet::CONFIG[:JOB_DEFAULTS][field])
|
242
|
+
end
|
243
|
+
if options[:queue]
|
244
|
+
raise Error.new("The provided queue (#{options[:queue]}) does not exist in Skynet::CONFIG[:MESSAGE_QUEUES]") unless Skynet::CONFIG[:MESSAGE_QUEUES].index(options[:queue])
|
245
|
+
self.queue_id = Skynet::CONFIG[:MESSAGE_QUEUES].index(options[:queue])
|
246
|
+
end
|
247
|
+
|
248
|
+
# Backward compatability
|
249
|
+
self.mappers ||= options[:map_tasks]
|
250
|
+
self.reducers ||= options[:reduce_tasks]
|
251
|
+
end
|
252
|
+
|
253
|
+
raise Error.new("You can not run a local master in async mode.") if self.async and self.local_master
|
254
|
+
|
255
|
+
@job_id = task_id
|
256
|
+
end
|
257
|
+
|
258
|
+
# Options are:
|
259
|
+
# <tt>:local_master</tt> BOOL (DEFAULT true)
|
260
|
+
# By default, your Skynet::Job will act as the master for your map/reduce job, doling out
|
261
|
+
# tasks, waiting for other workers to complete and return their results and dealing with
|
262
|
+
# merging and partitioning the data. If you run in async mode, another worker will handle
|
263
|
+
# being the master for your job without blocking. If you run :async => false, :local_master => false
|
264
|
+
# Skynet will let another worker be the master for your job, but will block waiting for the
|
265
|
+
# final results. The benefit of this is that if your process dies, the Job will continue to
|
266
|
+
# run remotely.
|
267
|
+
#
|
268
|
+
# <tt>:async</tt> BOOL (DEFAULT false)
|
269
|
+
# If you run in async mode, another worker will handle being the master for your job without blocking.
|
270
|
+
# You can not pass :local_master => true, :async => true since the only way to allow your
|
271
|
+
# job to run asyncronously is to have a remote_master.
|
272
|
+
#
|
273
|
+
# You can pass any options you might pass to Skynet::Job.new. Warning: Passing options to run
|
274
|
+
# will permanently change properties of the job.
|
275
|
+
def run(options = {})
|
276
|
+
FIELDS.each do |field|
|
277
|
+
if options.has_key?(field)
|
278
|
+
self.send("#{field}=".to_sym,options[field])
|
279
|
+
end
|
280
|
+
end
|
281
|
+
raise Error.new("You can not run a local master in async mode.") if self.async and self.local_master
|
282
|
+
|
283
|
+
info "RUN 1 BEGIN #{name}, job_id:#{job_id} vers: #{version} async:#{async}, local_master: #{local_master}, master?: #{master?}"
|
284
|
+
|
285
|
+
# run the master task if we're running async or local_master
|
286
|
+
if master?
|
287
|
+
master_enqueue
|
288
|
+
# ====================================================================================
|
289
|
+
# = FIXME If async Return a handle to an object that can used to retrieve the results later.
|
290
|
+
# ====================================================================================
|
291
|
+
async? ? job_id : master_results
|
292
|
+
else
|
293
|
+
number_of_tasks_queued = self.map_enqueue
|
294
|
+
map_results = self.map_results(number_of_tasks_queued)
|
295
|
+
return map_results unless map_results and self.reduce
|
296
|
+
|
297
|
+
partitioned_data = self.partition_data(map_results)
|
298
|
+
return unless partitioned_data
|
299
|
+
number_of_tasks_queued = self.reduce_enqueue(partitioned_data)
|
300
|
+
|
301
|
+
@results = self.reduce_results(number_of_tasks_queued)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
def master_enqueue
|
306
|
+
self.use_local_queue = local_master?
|
307
|
+
messages = tasks_to_messages([master_task])
|
308
|
+
enqueue_messages(messages)
|
309
|
+
end
|
310
|
+
|
311
|
+
# Returns the final results of this map/reduce job. If results is called on an :async job
|
312
|
+
# calling results will block until results are found or the master_timeout is reached.
|
313
|
+
def results
|
314
|
+
# ============================================
|
315
|
+
# = FIXME Maybe this can have better warnings if the results aren't ready yet. =
|
316
|
+
# ============================================
|
317
|
+
master_results
|
318
|
+
end
|
319
|
+
|
320
|
+
def master_results
|
321
|
+
@results = gather_results(1,master_timeout,name) unless defined?(@results)
|
322
|
+
end
|
323
|
+
|
324
|
+
def map_enqueue
|
325
|
+
task_ids = []
|
326
|
+
map_tasks = self.map_tasks
|
327
|
+
self.use_local_queue = map_local?
|
328
|
+
if map_tasks
|
329
|
+
number_of_tasks = 0
|
330
|
+
size = map_tasks.size - 1
|
331
|
+
printlog "MESSAGES TO MAP ENQUEUE #{size}" if data_debug?
|
332
|
+
map_tasks.each_with_index do |task,ii|
|
333
|
+
printlog "#{size - ii} MAP TASKS LEFT TO ENQUEUE" if data_debug?
|
334
|
+
number_of_tasks += 1
|
335
|
+
enqueue_messages(tasks_to_messages(task))
|
336
|
+
end
|
337
|
+
end
|
338
|
+
return number_of_tasks
|
339
|
+
end
|
340
|
+
|
341
|
+
def map_results(number_of_tasks)
|
342
|
+
debug "RUN MAP 2.4 BEFORE MAP #{display_info} MAP_LOCAL?:#{map_local?} USE_LOCAL_QUEUE?:#{use_local_queue?}"
|
343
|
+
results = gather_results(number_of_tasks, map_timeout, map_name)
|
344
|
+
return unless results
|
345
|
+
results.compact! if results.is_a?(Array)
|
346
|
+
debug "RUN MAP 2.5 RESULTS AFTER RUN #{display_info} MAP_LOCAL:#{map_local?} USE_LOCAL_QUEUE?:#{use_local_queue?} results:", results.inspect
|
347
|
+
results
|
348
|
+
end
|
349
|
+
|
350
|
+
def partition_data(post_map_data)
|
351
|
+
info "RUN REDUCE 3.1 BEFORE PARTITION #{display_info} reducers: #{reducers}"
|
352
|
+
debug "RUN REDUCE 3.1 : #{reducers} #{name}, job_id:#{job_id}", post_map_data
|
353
|
+
printlog "RUN REDUCE 3.1 : #{reducers} #{name}, job_id:#{job_id}", post_map_data if data_debug?
|
354
|
+
return unless post_map_data
|
355
|
+
partitioned_data = nil
|
356
|
+
if not @reduce_partition
|
357
|
+
# =====================
|
358
|
+
# = XXX HACK
|
359
|
+
# = There was a bug in Job where the reduce_partition of master jobs wasn't being set! This is to catch that.
|
360
|
+
# = It handles it by checking if the map class has a reduce partitioner. Maybe this is a good thing to leave anyway.
|
361
|
+
# =====================
|
362
|
+
if @map.is_a?(String) and @map.constantize.respond_to?(:reduce_partition)
|
363
|
+
partitioned_data = @map.constantize.reduce_partition(post_map_data, reducers)
|
364
|
+
else
|
365
|
+
partitioned_data = Skynet::Partitioners::RecombineAndSplit.reduce_partition(post_map_data, reducers)
|
366
|
+
end
|
367
|
+
elsif @reduce_partition.is_a?(String)
|
368
|
+
partitioned_data = @reduce_partition.constantize.reduce_partition(post_map_data, reducers)
|
369
|
+
else
|
370
|
+
partitioned_data = @reduce_partition.call(post_map_data, reducers)
|
371
|
+
end
|
372
|
+
partitioned_data.compact! if partitioned_data
|
373
|
+
info "RUN REDUCE 3.2 AFTER PARTITION #{display_info} reducers: #{reducers}"
|
374
|
+
debug "RUN REDUCE 3.2 AFTER PARTITION #{display_info} data:", partitioned_data if partitioned_data
|
375
|
+
printlog "RUN REDUCE 3.2 AFTER PARTITION #{display_info} data:", partitioned_data if data_debug?
|
376
|
+
partitioned_data
|
377
|
+
end
|
378
|
+
|
379
|
+
def reduce_enqueue(partitioned_data)
|
380
|
+
return partitioned_data unless @reduce and reducers and reducers > 0
|
381
|
+
debug "RUN REDUCE 3.3 CREATED REDUCE TASKS #{display_info}", partitioned_data
|
382
|
+
size = partitioned_data.size
|
383
|
+
printlog "REDUCE MESSAGES TO ENQUEUE #{size}" if data_debug?
|
384
|
+
|
385
|
+
reduce_tasks = self.reduce_tasks(partitioned_data)
|
386
|
+
self.use_local_queue = reduce_local?(reduce_tasks)
|
387
|
+
number_of_tasks = 0
|
388
|
+
reduce_tasks.each_with_index do |task,ii|
|
389
|
+
printlog "#{size - ii} REDUCE TASKS LEFT TO ENQUEUE" if data_debug?
|
390
|
+
number_of_tasks += 1
|
391
|
+
enqueue_messages(tasks_to_messages(task))
|
392
|
+
end
|
393
|
+
return number_of_tasks
|
394
|
+
end
|
395
|
+
|
396
|
+
def reduce_results(number_of_tasks)
|
397
|
+
results = gather_results(number_of_tasks, reduce_timeout, reduce_name)
|
398
|
+
printlog "REDUCE RESULTS", results if data_debug?
|
399
|
+
if results.is_a?(Array) and results.first.is_a?(Array)
|
400
|
+
final = []
|
401
|
+
results.each do |result|
|
402
|
+
final += result
|
403
|
+
end
|
404
|
+
results = final
|
405
|
+
end
|
406
|
+
debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results size: #{results ? results.size : ''}"
|
407
|
+
debug "RUN REDUCE 3.4 AFTER REDUCE #{display_info} results:", results if results
|
408
|
+
printlog "POST REDUCE RESULTS", results if data_debug?
|
409
|
+
return results
|
410
|
+
end
|
411
|
+
|
412
|
+
def enqueue_messages(messages)
|
413
|
+
size = messages.size
|
414
|
+
messages.each_with_index do |message,ii|
|
415
|
+
timeout = message.expiry || 5
|
416
|
+
debug "RUN TASKS SUBMITTING #{message.name} job_id: #{job_id} #{message.payload.is_a?(Skynet::Task) ? 'task' + message.payload.task_id.to_s : ''}"
|
417
|
+
debug "RUN TASKS WORKER MESSAGE #{message.name} job_id: #{job_id}", message.to_a
|
418
|
+
mq.write_message(message,timeout * 5)
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
# Given a job_id, returns the results from the message queue. Used to retrieve results of asyncronous jobs.
|
423
|
+
def self.results_by_job_id(job_id,timeout=2)
|
424
|
+
result_message = mq.take_result(job_id,timeout)
|
425
|
+
result = result_message.payload
|
426
|
+
return nil unless result
|
427
|
+
return result
|
428
|
+
end
|
429
|
+
|
430
|
+
def gather_results(number_of_tasks, timeout=nil, description=nil)
|
431
|
+
debug "GATHER RESULTS job_id: #{job_id} - NOT AN ASYNC JOB"
|
432
|
+
results = {}
|
433
|
+
errors = {}
|
434
|
+
started_at = Time.now.to_i
|
435
|
+
|
436
|
+
begin
|
437
|
+
loop do
|
438
|
+
# debug "LOOKING FOR RESULT MESSAGE TEMPLATE"
|
439
|
+
result_message = self.mq.take_result(job_id,timeout * 2)
|
440
|
+
ret_result = result_message.payload
|
441
|
+
|
442
|
+
if result_message.payload_type == :error
|
443
|
+
errors[result_message.task_id] = ret_result
|
444
|
+
error "ERROR RESULT TASK #{result_message.task_id} returned #{errors[result_message.task_id].inspect}"
|
445
|
+
else
|
446
|
+
results[result_message.task_id] = ret_result
|
447
|
+
debug "RESULT returned TASKID: #{result_message.task_id} #{results[result_message.task_id].inspect}"
|
448
|
+
end
|
449
|
+
debug "RESULT collected: #{(results.keys + errors.keys).size}, remaining: #{(number_of_tasks - (results.keys + errors.keys).uniq.size)}"
|
450
|
+
printlog "RESULT collected: #{(results.keys + errors.keys).size}, remaining: #{(number_of_tasks - (results.keys + errors.keys).uniq.size)}" if data_debug?
|
451
|
+
break if (number_of_tasks - (results.keys + errors.keys).uniq.size) <= 0
|
452
|
+
end
|
453
|
+
rescue Skynet::RequestExpiredError => e
|
454
|
+
local_mq_reset!
|
455
|
+
error "A WORKER EXPIRED or ERRORED, #{description}, job_id: #{job_id}"
|
456
|
+
if not errors.empty?
|
457
|
+
raise WorkerError.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{number_of_tasks} workers. #{errors.pretty_print_inspect}")
|
458
|
+
else
|
459
|
+
raise Skynet::RequestExpiredError.new("WORKER ERROR, A WORKER EXPIRED! Did not get results or even errors back from all workers!")
|
460
|
+
end
|
461
|
+
end
|
462
|
+
local_mq_reset!
|
463
|
+
|
464
|
+
# ==========
|
465
|
+
# = FIXME Tricky one. Should we throw an exception if we didn't get all the results back, or should we keep going.
|
466
|
+
# = Maybe this is another needed option.
|
467
|
+
# ==========
|
468
|
+
# if not (errors.keys - results.keys).empty?
|
469
|
+
# raise WorkerError.new("WORKER ERROR #{description}, job_id: #{job_id} errors:#{errors.keys.size} out of #{number_of_tasks} workers. #{errors.pretty_print_inspect}")
|
470
|
+
# end
|
471
|
+
|
472
|
+
return nil if results.values.compact.empty?
|
473
|
+
return results.values
|
474
|
+
end
|
475
|
+
|
476
|
+
def local_mq_reset!
|
477
|
+
if use_local_queue?
|
478
|
+
local_mq.reset!
|
479
|
+
self.use_local_queue=false
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
|
484
|
+
def master_task
|
485
|
+
@master_task ||= begin
|
486
|
+
raise Exception.new("No map provided") unless @map
|
487
|
+
|
488
|
+
# Make sure to set single to false in our own Job object.
|
489
|
+
# We're just passing along whether they set us to single.
|
490
|
+
# If we were single, we'd never send off the master to be run externally.
|
491
|
+
@single = false
|
492
|
+
|
493
|
+
task = Skynet::Task.master_task(self)
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
def map_tasks
|
498
|
+
@map_tasks ||= begin
|
499
|
+
map_tasks = []
|
500
|
+
debug "RUN MAP 2.1 #{display_info} data size before partition: #{@map_data.size}" if @map_data.respond_to?(:size)
|
501
|
+
debug "RUN MAP 2.1 #{display_info} data before partition:", @map_data
|
502
|
+
|
503
|
+
task_options = {
|
504
|
+
:process => @map,
|
505
|
+
:name => map_name,
|
506
|
+
:map_or_reduce => :map,
|
507
|
+
:result_timeout => map_timeout,
|
508
|
+
:retry => map_retry || Skynet::CONFIG[:DEFAULT_MAP_RETRY]
|
509
|
+
}
|
510
|
+
|
511
|
+
if @map_data.is_a?(Array)
|
512
|
+
debug "RUN MAP 2.2 DATA IS Array #{display_info}"
|
513
|
+
num_mappers = @map_data.length < @mappers ? @map_data.length : @mappers
|
514
|
+
|
515
|
+
map_data = if @map_partitioner
|
516
|
+
@map_partitioner.call(@map_data,num_mappers)
|
517
|
+
else
|
518
|
+
Skynet::Partitioners::SimplePartitionData.reduce_partition(@map_data, num_mappers)
|
519
|
+
end
|
520
|
+
|
521
|
+
debug "RUN MAP 2.3 #{display_info} data size after partition: #{map_data.size}"
|
522
|
+
debug "RUN MAP 2.3 #{display_info} map data after partition:", map_data
|
523
|
+
elsif @map_data.is_a?(Enumerable)
|
524
|
+
debug "RUN MAP 2.2 DATA IS ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
|
525
|
+
map_data = @map_data
|
526
|
+
else
|
527
|
+
debug "RUN MAP 2.2 DATA IS NOT ARRAY OR ENUMERABLE #{display_info} map_data_class: #{@map_data.class}"
|
528
|
+
map_data = [ @map_data ]
|
529
|
+
end
|
530
|
+
Skynet::TaskIterator.new(task_options, map_data)
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
def reduce_tasks(partitioned_data)
|
535
|
+
@reduce_tasks ||= begin
|
536
|
+
task_options = {
|
537
|
+
:name => reduce_name,
|
538
|
+
:process => @reduce,
|
539
|
+
:map_or_reduce => :reduce,
|
540
|
+
:result_timeout => reduce_timeout,
|
541
|
+
:retry => reduce_retry || Skynet::CONFIG[:DEFAULT_REDUCE_RETRY]
|
542
|
+
}
|
543
|
+
Skynet::TaskIterator.new(task_options, partitioned_data)
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
def tasks_to_messages(tasks)
|
548
|
+
if tasks.is_a?(Skynet::TaskIterator)
|
549
|
+
tasks = tasks.to_a
|
550
|
+
elsif not tasks.is_a?(Array)
|
551
|
+
tasks = [tasks]
|
552
|
+
end
|
553
|
+
|
554
|
+
tasks.collect do |task|
|
555
|
+
Skynet::Message.new_task_message(task,self)
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
def master_retry
|
560
|
+
@master_retry || Skynet::CONFIG[:DEFAULT_MASTER_RETRY]
|
561
|
+
end
|
562
|
+
|
563
|
+
def map_retry
|
564
|
+
@map_retry || Skynet::CONFIG[:DEFAULT_MAP_RETRY]
|
565
|
+
end
|
566
|
+
|
567
|
+
def reduce_retry
|
568
|
+
@reduce_retry || Skynet::CONFIG[:DEFAULT_REDUCE_RETRY]
|
569
|
+
end
|
570
|
+
|
571
|
+
def keep_map_tasks
|
572
|
+
@keep_map_tasks || Skynet::CONFIG[:DEFAULT_KEEP_MAP_TASKS]
|
573
|
+
end
|
574
|
+
|
575
|
+
def keep_reduce_tasks
|
576
|
+
@keep_reduce_tasks || Skynet::CONFIG[:DEFAULT_KEEP_REDUCE_TASKS]
|
577
|
+
end
|
578
|
+
|
579
|
+
def map_local?
|
580
|
+
return true if solo? or single?
|
581
|
+
return true if keep_map_tasks == true
|
582
|
+
# error "RUN MAP 2.4 BEFORE MAP #{display_info} KEEPMT:#{keep_map_tasks} DKMT:#{Skynet::CONFIG[:DEFAULT_KEEP_MAP_TASKS]} MDCLASS: #{map_tasks.data.class} #{(map_tasks.data.is_a?(Array) ? map_tasks.data.size : '')}"
|
583
|
+
return true if keep_map_tasks and map_tasks.data.is_a?(Array) and map_tasks.data.size <= keep_map_tasks
|
584
|
+
return false
|
585
|
+
end
|
586
|
+
|
587
|
+
def reduce_local?(reduce_tasks)
|
588
|
+
return true if solo? or single?
|
589
|
+
return true if keep_reduce_tasks == true
|
590
|
+
return true if keep_reduce_tasks and reduce_tasks.data.is_a?(Array) and reduce_tasks.data.size <= keep_reduce_tasks
|
591
|
+
return false
|
592
|
+
end
|
593
|
+
|
594
|
+
def use_local_queue?
|
595
|
+
@use_local_queue
|
596
|
+
end
|
597
|
+
|
598
|
+
# async is true if the async flag is set and the job is not a 'single' job, or in solo mode.
|
599
|
+
# async only applies to whether we run the master locally and whether we poll for the result
|
600
|
+
def async?
|
601
|
+
@async and not (solo? or single? or local_master?)
|
602
|
+
end
|
603
|
+
|
604
|
+
def master?
|
605
|
+
async? or not local_master?
|
606
|
+
end
|
607
|
+
|
608
|
+
def local_master?
|
609
|
+
@local_master or solo?
|
610
|
+
end
|
611
|
+
|
612
|
+
def solo?
|
613
|
+
(@solo or CONFIG[:SOLO])
|
614
|
+
end
|
615
|
+
|
616
|
+
def single?
|
617
|
+
@single
|
618
|
+
end
|
619
|
+
|
620
|
+
def data_debug?
|
621
|
+
@data_debug || Skynet::CONFIG[:SKYNET_JOB_DEBUG_DATA_LEVEL]
|
622
|
+
end
|
623
|
+
|
624
|
+
def reset!
|
625
|
+
@map_tasks = nil
|
626
|
+
@reduce_tasks = nil
|
627
|
+
end
|
628
|
+
|
629
|
+
def to_h
|
630
|
+
if @map.kind_of?(Proc) or @reduce.kind_of?(Proc)
|
631
|
+
raise Skynet::Error.new("You have a Proc in your map or reduce. This can't be turned into a hash.")
|
632
|
+
end
|
633
|
+
hash = {}
|
634
|
+
FIELDS.each do |field|
|
635
|
+
hash[field] = self.send(field) if self.send(field)
|
636
|
+
end
|
637
|
+
hash
|
638
|
+
end
|
639
|
+
|
640
|
+
def task_id
|
641
|
+
@task_id ||= get_unique_id(1).to_i
|
642
|
+
end
|
643
|
+
|
644
|
+
def version
|
645
|
+
return 1 if solo?
|
646
|
+
@version ||= begin
|
647
|
+
@@worker_version ||= self.mq.get_worker_version || 1
|
648
|
+
@@worker_version
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
def version=(v)
|
653
|
+
@version = v
|
654
|
+
end
|
655
|
+
|
656
|
+
def display_info
|
657
|
+
"#{name}, job_id: #{job_id}"
|
658
|
+
end
|
659
|
+
|
660
|
+
def start_after=(time)
|
661
|
+
@start_after = (time.is_a?(Time) ? time.to_i : time)
|
662
|
+
end
|
663
|
+
|
664
|
+
def map_data=(map_data)
|
665
|
+
reset!
|
666
|
+
@map_data = map_data
|
667
|
+
end
|
668
|
+
|
669
|
+
def map=(map)
|
670
|
+
reset!
|
671
|
+
return unless map
|
672
|
+
if map.class == String or map.class == Class
|
673
|
+
@map = map.to_s
|
674
|
+
elsif map.is_a?(Proc)
|
675
|
+
@map = map
|
676
|
+
else
|
677
|
+
raise BadMapOrReduceError.new("#{self.class}.map accepts a class name or a proc. Got #{map}")
|
678
|
+
end
|
679
|
+
end
|
680
|
+
|
681
|
+
def reduce=(reduce)
|
682
|
+
reset!
|
683
|
+
return unless reduce
|
684
|
+
if reduce.class == String or reduce.class == Class
|
685
|
+
@reduce = reduce.to_s
|
686
|
+
elsif reduce.is_a?(Proc)
|
687
|
+
@reduce = reduce
|
688
|
+
else
|
689
|
+
raise BadMapOrReduceError.new("#{self.class}.reduce accepts a class name or a proc. Got #{reduce}")
|
690
|
+
end
|
691
|
+
end
|
692
|
+
|
693
|
+
def map_reduce_class=(klass)
|
694
|
+
reset!
|
695
|
+
unless klass.class == String or klass.class == Class
|
696
|
+
raise BadMapOrReduceError.new("#{self.class}.map_reduce only accepts a class name: #{klass} #{klass.class}")
|
697
|
+
end
|
698
|
+
klass = klass.to_s
|
699
|
+
@map = klass
|
700
|
+
self.name ||= "#{klass} MASTER"
|
701
|
+
self.map_name ||= "#{klass} MAP"
|
702
|
+
if klass.constantize.respond_to?(:reduce)
|
703
|
+
@reduce ||= klass
|
704
|
+
self.reduce_name ||= "#{klass} REDUCE"
|
705
|
+
end
|
706
|
+
@reduce_partitioner ||= klass if klass.constantize.respond_to?(:reduce_partition)
|
707
|
+
@map_partitioner ||= klass if klass.constantize.respond_to?(:map_partitioner)
|
708
|
+
end
|
709
|
+
|
710
|
+
def run_master
|
711
|
+
error "run_master has been deprecated, please use run"
|
712
|
+
run(:local_master => false)
|
713
|
+
end
|
714
|
+
|
715
|
+
def mq
|
716
|
+
if use_local_queue?
|
717
|
+
local_mq
|
718
|
+
else
|
719
|
+
@mq ||= Skynet::MessageQueue.new
|
720
|
+
end
|
721
|
+
end
|
722
|
+
|
723
|
+
def local_mq
|
724
|
+
@local_mq ||= LocalMessageQueue.new
|
725
|
+
end
|
726
|
+
|
727
|
+
def self.mq
|
728
|
+
Skynet::MessageQueue.new
|
729
|
+
end
|
730
|
+
|
731
|
+
end ### END class Skynet::Job
|
732
|
+
end
|
733
|
+
|
734
|
+
class Skynet::AsyncJob < Skynet::Job
|
735
|
+
# Skynet::AsyncJob is for Skynet jobs you want to run asyncronously.
|
736
|
+
# Normally when you run a Skynet::Job it blocks until the job is complete.
|
737
|
+
# Running an Async job merely returns a job_id which can be used later to retrieve the results.
|
738
|
+
# See Skynet::Job for full documentation
|
739
|
+
|
740
|
+
def initialize(options = {})
|
741
|
+
options[:async] = true
|
742
|
+
options[:local_master] = false
|
743
|
+
super(options)
|
744
|
+
end
|
745
|
+
|
746
|
+
def map=(klass)
|
747
|
+
unless klass.class == String or klass.class == Class
|
748
|
+
raise BadMapOrReduceError.new("#{self.class}.map only accepts a class name")
|
749
|
+
end
|
750
|
+
@map = klass.to_s
|
751
|
+
end
|
752
|
+
|
753
|
+
def reduce=(klass)
|
754
|
+
unless klass.class == String or klass.class == Class
|
755
|
+
raise BadMapOrReduceError.new("#{self.class}.reduce only accepts a class name")
|
756
|
+
end
|
757
|
+
@reduce = klass.to_s
|
758
|
+
end
|
759
|
+
end # class Skynet::AsyncJob
|
760
|
+
|
761
|
+
class Skynet::Job::LocalMessageQueue
|
762
|
+
include SkynetDebugger
|
763
|
+
|
764
|
+
attr_reader :messages, :results
|
765
|
+
|
766
|
+
def initialize
|
767
|
+
@messages = []
|
768
|
+
@results = []
|
769
|
+
end
|
770
|
+
|
771
|
+
def get_worker_version
|
772
|
+
1
|
773
|
+
end
|
774
|
+
|
775
|
+
def take_result(job_id,timeout=nil)
|
776
|
+
raise Skynet::RequestExpiredError.new if @messages.empty?
|
777
|
+
run_message(@messages.shift)
|
778
|
+
end
|
779
|
+
|
780
|
+
def write_message(message,timeout=nil)
|
781
|
+
@messages << message
|
782
|
+
end
|
783
|
+
|
784
|
+
def empty?
|
785
|
+
@messages.empty?
|
786
|
+
end
|
787
|
+
|
788
|
+
def in_use?
|
789
|
+
(not empty?)
|
790
|
+
end
|
791
|
+
|
792
|
+
def reset!
|
793
|
+
@messages = []
|
794
|
+
@results = []
|
795
|
+
end
|
796
|
+
|
797
|
+
def run_message(message)
|
798
|
+
result = nil
|
799
|
+
(message.retry + 1).times do
|
800
|
+
task = message.payload
|
801
|
+
debug "RUN TASKS LOCALLY SUBMITTING #{message.name} task #{task.task_id}", task
|
802
|
+
begin
|
803
|
+
result = task.run
|
804
|
+
break
|
805
|
+
rescue Skynet::Task::TimeoutError => e
|
806
|
+
result = e
|
807
|
+
error "Skynet::Job::LocalMessageQueue Task timed out while executing #{e.inspect} #{e.backtrace.join("\n")}"
|
808
|
+
next
|
809
|
+
rescue Exception => e
|
810
|
+
error "Skynet::Job::LocalMessageQueue :#{__LINE__} #{e.inspect} #{e.backtrace.join("\n")}"
|
811
|
+
result = e
|
812
|
+
next
|
813
|
+
end
|
814
|
+
end
|
815
|
+
message.result_message(result)
|
816
|
+
end
|
817
|
+
end # class LocalMessageQueue
|
818
|
+
|
819
|
+
class Skynet::TaskIterator
|
820
|
+
include SkynetDebugger
|
821
|
+
include Skynet::GuidGenerator
|
822
|
+
|
823
|
+
class Error < StandardError
|
824
|
+
end
|
825
|
+
|
826
|
+
include Enumerable
|
827
|
+
|
828
|
+
attr_accessor :task_options, :data
|
829
|
+
|
830
|
+
def initialize(task_options, data)
|
831
|
+
@task_options = task_options
|
832
|
+
@data = data
|
833
|
+
end
|
834
|
+
|
835
|
+
def first
|
836
|
+
if data.respond_to?(:first)
|
837
|
+
@first ||= Skynet::Task.new(task_options.merge(:data => data.first, :task_id => get_unique_id(1).to_i))
|
838
|
+
else
|
839
|
+
raise Error.new("#{data.class} does not implement 'first'")
|
840
|
+
end
|
841
|
+
end
|
842
|
+
|
843
|
+
def size
|
844
|
+
if data.respond_to?(:size)
|
845
|
+
data.size
|
846
|
+
else
|
847
|
+
raise Error.new("#{data.class} does not implement 'size'")
|
848
|
+
end
|
849
|
+
end
|
850
|
+
|
851
|
+
def [](index)
|
852
|
+
if data.respond_to?(:[])
|
853
|
+
Skynet::Task.new(task_options.merge(:data => data[index], :task_id => get_unique_id(1).to_i))
|
854
|
+
else
|
855
|
+
raise Error.new("#{data.class} does not implement '[]'")
|
856
|
+
end
|
857
|
+
end
|
858
|
+
|
859
|
+
def each_method
|
860
|
+
each_method = data.respond_to?(:next) ? :next : :each
|
861
|
+
end
|
862
|
+
|
863
|
+
def to_a
|
864
|
+
self.collect { |task| task }
|
865
|
+
end
|
866
|
+
|
867
|
+
def each
|
868
|
+
iteration = 0
|
869
|
+
data.send(each_method) do |task_data|
|
870
|
+
task = nil
|
871
|
+
if @first and iteration == 0
|
872
|
+
task = @first
|
873
|
+
else
|
874
|
+
task = Skynet::Task.new(task_options.merge(:data => task_data, :task_id => (get_unique_id(1).to_i)))
|
875
|
+
@first = task if iteration == 0
|
876
|
+
end
|
877
|
+
iteration += 1
|
878
|
+
yield task
|
879
|
+
end
|
880
|
+
end
|
881
|
+
end # class TaskIterator
|
882
|
+
|
883
|
+
|
884
|
+
# require 'ruby2ruby' # XXX this will break unless people have the fix to Ruby2Ruby
|
885
|
+
##### ruby2ruby fix from ruby2ruby.rb ############
|
886
|
+
### XXX This is bad. Some people rely on an exception being thrown if a method is missing! BULLSHIT!
|
887
|
+
# class NilClass # Objective-C trick
|
888
|
+
# def method_missing(msg, *args, &block)
|
889
|
+
# nil
|
890
|
+
# end
|
891
|
+
# end
|
892
|
+
##############################
|