timocratic-skynet 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +152 -0
- data/License.txt +20 -0
- data/Manifest.txt +144 -0
- data/README.txt +178 -0
- data/Rakefile +5 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +94 -0
- data/app_generators/skynet_install/templates/migration.rb +43 -0
- data/app_generators/skynet_install/templates/skynet_config.rb +50 -0
- data/app_generators/skynet_install/templates/skynet_initializer.rb +1 -0
- data/app_generators/skynet_install/templates/skynet_mysql_schema.sql +33 -0
- data/bin/skynet +71 -0
- data/bin/skynet_install +36 -0
- data/bin/skynet_tuplespace_server +74 -0
- data/config/hoe.rb +75 -0
- data/config/requirements.rb +17 -0
- data/examples/dgrep/README +70 -0
- data/examples/dgrep/config/skynet_config.rb +26 -0
- data/examples/dgrep/data/shakespeare/README +2 -0
- data/examples/dgrep/data/shakespeare/poetry/loverscomplaint +381 -0
- data/examples/dgrep/data/shakespeare/poetry/rapeoflucrece +2199 -0
- data/examples/dgrep/data/shakespeare/poetry/sonnets +2633 -0
- data/examples/dgrep/data/shakespeare/poetry/various +640 -0
- data/examples/dgrep/data/shakespeare/poetry/venusandadonis +1423 -0
- data/examples/dgrep/data/testfile1.txt +1 -0
- data/examples/dgrep/data/testfile2.txt +1 -0
- data/examples/dgrep/data/testfile3.txt +1 -0
- data/examples/dgrep/data/testfile4.txt +1 -0
- data/examples/dgrep/lib/dgrep.rb +59 -0
- data/examples/dgrep/lib/mapreduce_test.rb +32 -0
- data/examples/dgrep/lib/most_common_words.rb +45 -0
- data/examples/dgrep/script/dgrep +75 -0
- data/examples/rails_mysql_example/README +66 -0
- data/examples/rails_mysql_example/Rakefile +10 -0
- data/examples/rails_mysql_example/app/controllers/application.rb +10 -0
- data/examples/rails_mysql_example/app/helpers/application_helper.rb +3 -0
- data/examples/rails_mysql_example/app/models/user.rb +21 -0
- data/examples/rails_mysql_example/app/models/user_favorite.rb +5 -0
- data/examples/rails_mysql_example/app/models/user_mailer.rb +12 -0
- data/examples/rails_mysql_example/app/views/user_mailer/welcome.erb +5 -0
- data/examples/rails_mysql_example/config/boot.rb +109 -0
- data/examples/rails_mysql_example/config/database.yml +42 -0
- data/examples/rails_mysql_example/config/environment.rb +59 -0
- data/examples/rails_mysql_example/config/environments/development.rb +18 -0
- data/examples/rails_mysql_example/config/environments/production.rb +19 -0
- data/examples/rails_mysql_example/config/environments/test.rb +22 -0
- data/examples/rails_mysql_example/config/initializers/inflections.rb +10 -0
- data/examples/rails_mysql_example/config/initializers/mime_types.rb +5 -0
- data/examples/rails_mysql_example/config/initializers/skynet.rb +1 -0
- data/examples/rails_mysql_example/config/routes.rb +35 -0
- data/examples/rails_mysql_example/config/skynet_config.rb +36 -0
- data/examples/rails_mysql_example/db/migrate/001_create_skynet_tables.rb +43 -0
- data/examples/rails_mysql_example/db/migrate/002_create_users.rb +16 -0
- data/examples/rails_mysql_example/db/migrate/003_create_user_favorites.rb +14 -0
- data/examples/rails_mysql_example/db/schema.rb +85 -0
- data/examples/rails_mysql_example/db/skynet_mysql_schema.sql +33 -0
- data/examples/rails_mysql_example/doc/README_FOR_APP +2 -0
- data/examples/rails_mysql_example/lib/tasks/rails_mysql_example.rake +20 -0
- data/examples/rails_mysql_example/public/.htaccess +40 -0
- data/examples/rails_mysql_example/public/404.html +30 -0
- data/examples/rails_mysql_example/public/422.html +30 -0
- data/examples/rails_mysql_example/public/500.html +30 -0
- data/examples/rails_mysql_example/public/dispatch.cgi +10 -0
- data/examples/rails_mysql_example/public/dispatch.fcgi +24 -0
- data/examples/rails_mysql_example/public/dispatch.rb +10 -0
- data/examples/rails_mysql_example/public/favicon.ico +0 -0
- data/examples/rails_mysql_example/public/images/rails.png +0 -0
- data/examples/rails_mysql_example/public/index.html +277 -0
- data/examples/rails_mysql_example/public/javascripts/application.js +2 -0
- data/examples/rails_mysql_example/public/javascripts/controls.js +963 -0
- data/examples/rails_mysql_example/public/javascripts/dragdrop.js +972 -0
- data/examples/rails_mysql_example/public/javascripts/effects.js +1120 -0
- data/examples/rails_mysql_example/public/javascripts/prototype.js +4225 -0
- data/examples/rails_mysql_example/public/robots.txt +5 -0
- data/examples/rails_mysql_example/script/about +3 -0
- data/examples/rails_mysql_example/script/console +3 -0
- data/examples/rails_mysql_example/script/destroy +3 -0
- data/examples/rails_mysql_example/script/generate +3 -0
- data/examples/rails_mysql_example/script/performance/benchmarker +3 -0
- data/examples/rails_mysql_example/script/performance/profiler +3 -0
- data/examples/rails_mysql_example/script/performance/request +3 -0
- data/examples/rails_mysql_example/script/plugin +3 -0
- data/examples/rails_mysql_example/script/process/inspector +3 -0
- data/examples/rails_mysql_example/script/process/reaper +3 -0
- data/examples/rails_mysql_example/script/process/spawner +3 -0
- data/examples/rails_mysql_example/script/runner +3 -0
- data/examples/rails_mysql_example/script/server +3 -0
- data/examples/rails_mysql_example/test/fixtures/user_favorites.yml +9 -0
- data/examples/rails_mysql_example/test/fixtures/users.yml +11 -0
- data/examples/rails_mysql_example/test/test_helper.rb +38 -0
- data/examples/rails_mysql_example/test/unit/user_favorite_test.rb +8 -0
- data/examples/rails_mysql_example/test/unit/user_test.rb +8 -0
- data/extras/README +7 -0
- data/extras/init.d/skynet +87 -0
- data/extras/nagios/check_skynet.sh +121 -0
- data/extras/rails/controllers/skynet_controller.rb +43 -0
- data/extras/rails/views/skynet/index.rhtml +137 -0
- data/lib/skynet.rb +95 -0
- data/lib/skynet/mapreduce_helper.rb +74 -0
- data/lib/skynet/mapreduce_test.rb +56 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +509 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +316 -0
- data/lib/skynet/skynet_active_record_extensions.rb +280 -0
- data/lib/skynet/skynet_config.rb +232 -0
- data/lib/skynet/skynet_console.rb +50 -0
- data/lib/skynet/skynet_console_helper.rb +66 -0
- data/lib/skynet/skynet_debugger.rb +138 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +892 -0
- data/lib/skynet/skynet_launcher.rb +40 -0
- data/lib/skynet/skynet_logger.rb +62 -0
- data/lib/skynet/skynet_manager.rb +706 -0
- data/lib/skynet/skynet_message.rb +359 -0
- data/lib/skynet/skynet_message_queue.rb +136 -0
- data/lib/skynet/skynet_partitioners.rb +96 -0
- data/lib/skynet/skynet_ruby_extensions.rb +53 -0
- data/lib/skynet/skynet_task.rb +118 -0
- data/lib/skynet/skynet_tuplespace_server.rb +83 -0
- data/lib/skynet/skynet_worker.rb +451 -0
- data/lib/skynet/version.rb +9 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/test_active_record_extensions.rb +138 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +10 -0
- data/test/test_mysql_message_queue_adapter.rb +263 -0
- data/test/test_skynet.rb +19 -0
- data/test/test_skynet_install_generator.rb +49 -0
- data/test/test_skynet_job.rb +717 -0
- data/test/test_skynet_manager.rb +157 -0
- data/test/test_skynet_message.rb +229 -0
- data/test/test_skynet_task.rb +24 -0
- data/test/test_tuplespace_message_queue.rb +174 -0
- data/website/index.html +181 -0
- data/website/index.txt +98 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +247 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
class Skynet
|
2
|
+
# Collection of partitioning utilities
|
3
|
+
class Partitioners
|
4
|
+
include SkynetDebugger
|
5
|
+
|
6
|
+
# Split one block of data into partitions
|
7
|
+
#
|
8
|
+
class SimplePartitionData < Partitioners
|
9
|
+
|
10
|
+
def self.reduce_partition(data, partitions)
|
11
|
+
partitioned_data = Array.new
|
12
|
+
|
13
|
+
# If data size is significantly greater than the number of desired
|
14
|
+
# partitions, we can divide the data roughly but the last partition
|
15
|
+
# may be smaller than the others.
|
16
|
+
#
|
17
|
+
return data if (not data) or data.empty?
|
18
|
+
|
19
|
+
if partitions >= data.length
|
20
|
+
data.each do |datum|
|
21
|
+
partitioned_data << [datum]
|
22
|
+
end
|
23
|
+
elsif (data.length >= partitions * 2)
|
24
|
+
# Use quicker but less "fair" method
|
25
|
+
size = data.length / partitions
|
26
|
+
|
27
|
+
if (data.length % partitions != 0)
|
28
|
+
size += 1 # Last slice of leftovers
|
29
|
+
end
|
30
|
+
|
31
|
+
(0..partitions - 1).each do |i|
|
32
|
+
partitioned_data[i] = data[i * size, size]
|
33
|
+
end
|
34
|
+
else
|
35
|
+
# Slower method, but partitions evenly
|
36
|
+
partitions = (data.size < partitions ? data.size : partitions)
|
37
|
+
(0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
|
38
|
+
|
39
|
+
data.each_with_index do |datum, i|
|
40
|
+
partitioned_data[i % partitions] << datum
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
partitioned_data
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class RecombineAndSplit < Partitioners
|
50
|
+
# Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
|
51
|
+
#
|
52
|
+
def self.reduce_partition(post_map_data,new_partitions)
|
53
|
+
return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array)
|
54
|
+
### Why did I do this? It breaks badly.
|
55
|
+
# if not post_map_data.first.first.is_a?(Array)
|
56
|
+
# partitioned_data = post_map_data.flatten
|
57
|
+
# else
|
58
|
+
partitioned_data = post_map_data.inject(Array.new) do |data,part|
|
59
|
+
data += part
|
60
|
+
end
|
61
|
+
# end
|
62
|
+
partitioned_data = Skynet::Partitioners::SimplePartitionData.reduce_partition(partitioned_data, new_partitions)
|
63
|
+
debug "POST PARTITIONED DATA_SIZE", partitioned_data.size
|
64
|
+
debug "POST PARTITIONED DATA", partitioned_data
|
65
|
+
partitioned_data
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class ArrayDataSplitByFirstEntry < Partitioners
|
70
|
+
# Smarter partitioner for array data, generates simple sum of array[0]
|
71
|
+
# and ensures that all arrays sharing that key go into the same partition.
|
72
|
+
#
|
73
|
+
def self.reduce_partition(post_map_data, new_partitions)
|
74
|
+
partitions = []
|
75
|
+
(0..new_partitions - 1).each { |i| partitions[i] = Array.new }
|
76
|
+
cnt = 0
|
77
|
+
post_map_data.each do |partition|
|
78
|
+
partition.each do |array|
|
79
|
+
next unless array.is_a?(Array) and array.size >= 2
|
80
|
+
if array[0].kind_of?(Fixnum)
|
81
|
+
key = array[0] % new_partitions
|
82
|
+
elsif array[0].kind_of?(String)
|
83
|
+
key = array[0].sum % new_partitions
|
84
|
+
else
|
85
|
+
cnt += 1
|
86
|
+
key = cnt % new_partitions
|
87
|
+
end
|
88
|
+
partitions[key] << array
|
89
|
+
end
|
90
|
+
end
|
91
|
+
partitions
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Enumerable
|
2
|
+
def mapreduce(klass=nil,options={},&block)
|
3
|
+
data = []
|
4
|
+
if self.is_a?(Hash)
|
5
|
+
self.each {|k,v| data << {k => v}}
|
6
|
+
else
|
7
|
+
data = self
|
8
|
+
end
|
9
|
+
jobopts = {
|
10
|
+
:mappers => 20000,
|
11
|
+
:map_data => data,
|
12
|
+
:name => "#{klass} Enumerable MASTER",
|
13
|
+
:map_name => "#{klass} Enumerable MAP",
|
14
|
+
:reduce_name => "#{klass} Enumerable REDUCE",
|
15
|
+
:map_timeout => 3600,
|
16
|
+
:reduce_timeout => 3600,
|
17
|
+
:master_timeout => 3600,
|
18
|
+
:master_result_timeout => 3600
|
19
|
+
}
|
20
|
+
|
21
|
+
jobopts[:map_reduce_class] = klass.to_s if klass
|
22
|
+
|
23
|
+
options.each { |k,v| jobopts[k] = v }
|
24
|
+
if block_given?
|
25
|
+
jobopts[:map] = block
|
26
|
+
end
|
27
|
+
|
28
|
+
if block_given? or not jobopts[:async]
|
29
|
+
job = Skynet::Job.new(jobopts.merge(:local_master => true))
|
30
|
+
else
|
31
|
+
job = Skynet::AsyncJob.new(jobopts)
|
32
|
+
end
|
33
|
+
job.run
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class String
|
38
|
+
### THIS IS TAKEN DIRECTLY FROM ActiveSupport::Inflector
|
39
|
+
# Constantize tries to find a declared constant with the name specified
|
40
|
+
# in the string. It raises a NameError when the name is not in CamelCase
|
41
|
+
# or is not initialized.
|
42
|
+
#
|
43
|
+
# Examples
|
44
|
+
# "Module".constantize #=> Module
|
45
|
+
# "Class".constantize #=> Class
|
46
|
+
def constantize
|
47
|
+
unless /\A(?:::)?([A-Z]\w*(?:::[A-Z]\w*)*)\z/ =~ self
|
48
|
+
raise NameError, "#{camel_cased_word.inspect} is not a valid constant name!"
|
49
|
+
end
|
50
|
+
|
51
|
+
Object.module_eval("::#{$1}", __FILE__, __LINE__)
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Task
|
3
|
+
include SkynetDebugger
|
4
|
+
|
5
|
+
class ConstructorError < StandardError; end
|
6
|
+
class TimeoutError < StandardError; end
|
7
|
+
|
8
|
+
attr_reader :data, :process, :result, :map_or_reduce, :marshalable
|
9
|
+
attr_accessor :name, :result_timeout, :retry
|
10
|
+
|
11
|
+
@@log = nil
|
12
|
+
|
13
|
+
def self.debug_class_desc
|
14
|
+
"TASK"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.master_task(job)
|
18
|
+
options = {
|
19
|
+
:async => false,
|
20
|
+
:local_master => true,
|
21
|
+
:map_name => job.map_name || job.name,
|
22
|
+
:reduce_name => job.reduce_name || job.name,
|
23
|
+
}
|
24
|
+
Skynet::Job::FIELDS.each do |field|
|
25
|
+
next if options.has_key?(field)
|
26
|
+
options[field] = job.send(field) if job.send(field)
|
27
|
+
end
|
28
|
+
|
29
|
+
master_job = Skynet::Job.new(options)
|
30
|
+
|
31
|
+
self.new(
|
32
|
+
:task_id => master_job.task_id,
|
33
|
+
:data => nil,
|
34
|
+
:process => master_job.to_h,
|
35
|
+
:map_or_reduce => :master,
|
36
|
+
:name => master_job.name,
|
37
|
+
:result_timeout => master_job.master_timeout,
|
38
|
+
:retry => master_job.master_retry || Skynet::CONFIG[:DEFAULT_MASTER_RETRY]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def initialize(opts = {})
|
43
|
+
unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
|
44
|
+
raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
|
45
|
+
end
|
46
|
+
@marshalable = true
|
47
|
+
@task_id = opts[:task_id].to_i
|
48
|
+
@data = opts[:data]
|
49
|
+
self.process = opts[:process]
|
50
|
+
@name = opts[:name]
|
51
|
+
@map_or_reduce = opts[:map_or_reduce]
|
52
|
+
@result_timeout = opts[:result_timeout]
|
53
|
+
@retry = opts[:retry]
|
54
|
+
end
|
55
|
+
|
56
|
+
def process=(process)
|
57
|
+
if process.is_a?(Proc)
|
58
|
+
@marshalable = false
|
59
|
+
end
|
60
|
+
@process = process
|
61
|
+
end
|
62
|
+
|
63
|
+
def can_marshal?
|
64
|
+
@marshalable
|
65
|
+
end
|
66
|
+
|
67
|
+
def task_or_master
|
68
|
+
if @map_or_reduce == :master
|
69
|
+
@map_or_reduce
|
70
|
+
else
|
71
|
+
:task
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def task_id
|
76
|
+
@task_id.to_i
|
77
|
+
end
|
78
|
+
|
79
|
+
def run(iteration=nil)
|
80
|
+
info "running task #{name} TIMEOUT: #{result_timeout} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
|
81
|
+
begin
|
82
|
+
Timeout::timeout(@result_timeout) do
|
83
|
+
if @process.class == Proc
|
84
|
+
debug " - #{@map_or_reduce} using Proc"
|
85
|
+
@process.call @data
|
86
|
+
elsif @map_or_reduce == :master
|
87
|
+
debug " - as master"
|
88
|
+
job = Skynet::Job.new(@process)
|
89
|
+
job.run
|
90
|
+
elsif @process.class == String
|
91
|
+
debug " - #{@map_or_reduce} using class #{@process}"
|
92
|
+
@process.constantize.send(@map_or_reduce,@data)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue Timeout::Error => e
|
96
|
+
# ==========
|
97
|
+
# = XXX NEWSFEED HACK
|
98
|
+
# = I'm printing the data hash, but that hash has all this shit added to it after runing through newsfeed.
|
99
|
+
# = It's actually nice to be able to see what was added, but sometimes its too much data.
|
100
|
+
# = Though the handy part will be adding instrumentation to the event_hash and seeing it onyl during a timeout.
|
101
|
+
# ==========
|
102
|
+
|
103
|
+
if @data.is_a?(Array) and @data.first.is_a?(Hash)
|
104
|
+
@data.each {|h|h.delete(:event_object)}
|
105
|
+
end
|
106
|
+
raise TimeoutError.new("TASK TIMED OUT! #{name} IT:[#{iteration}] timeout:#{@result_timeout} #{e.inspect} DATA: #{@data.inspect} #{e.backtrace.join("\n")}")
|
107
|
+
|
108
|
+
# ==========
|
109
|
+
# = XXX This rescue block is probably not necessary. Just for debugging for now. =
|
110
|
+
# ==========
|
111
|
+
rescue Exception => e
|
112
|
+
error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
|
113
|
+
raise e
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
end ## END class Task
|
118
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
|
3
|
+
require 'rinda/ring'
|
4
|
+
require 'rinda/tuplespace'
|
5
|
+
require 'rubygems'
|
6
|
+
require 'logger'
|
7
|
+
require 'optparse'
|
8
|
+
require 'pp'
|
9
|
+
|
10
|
+
class Rinda::TupleSpaceProxy
|
11
|
+
def take(tuple, sec=nil, &block)
|
12
|
+
port = []
|
13
|
+
port.push @ts.move(nil, tuple, sec, &block)
|
14
|
+
port[0]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Rinda::Tuple
|
19
|
+
|
20
|
+
require 'ostruct'
|
21
|
+
|
22
|
+
def init_with_ary(ary)
|
23
|
+
if ary.instance_of?(DRb::DRbUnknown)
|
24
|
+
begin
|
25
|
+
Marshal.load(ary.buf)
|
26
|
+
rescue Exception => e
|
27
|
+
raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
|
28
|
+
end
|
29
|
+
else
|
30
|
+
@tuple = Array.new(ary.size)
|
31
|
+
@tuple.size.times do |i|
|
32
|
+
@tuple[i] = ary[i]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class Skynet
|
39
|
+
class Task
|
40
|
+
end
|
41
|
+
class Message
|
42
|
+
class Payload
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class AsyncJob
|
47
|
+
end
|
48
|
+
|
49
|
+
class Job
|
50
|
+
end
|
51
|
+
|
52
|
+
class TuplespaceServer
|
53
|
+
|
54
|
+
def self.start(options)
|
55
|
+
options[:port] ||= 7647
|
56
|
+
log = Logger.new(options[:logfile])
|
57
|
+
log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__) if options[:loglevel]
|
58
|
+
log.info "STARTING TUPLESPACE SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
|
59
|
+
|
60
|
+
# Create a TupleSpace to hold named services, and start running
|
61
|
+
ts = Rinda::TupleSpace.new
|
62
|
+
begin
|
63
|
+
if options[:use_ringserver] and options[:port]
|
64
|
+
DRb.start_service
|
65
|
+
tuple = [:name,:TupleSpace, ts, 'Tuple Space']
|
66
|
+
renewer = Rinda::SimpleRenewer.new
|
67
|
+
ring_ts = Rinda::TupleSpace.new
|
68
|
+
ring_ts.write(tuple, renewer)
|
69
|
+
|
70
|
+
server = Rinda::RingServer.new(ring_ts, options[:port])
|
71
|
+
end
|
72
|
+
if options[:drburi]
|
73
|
+
DRb.start_service(options[:drburi], ts)
|
74
|
+
end
|
75
|
+
DRb.thread.join
|
76
|
+
rescue SystemExit, Interrupt
|
77
|
+
rescue Exception, RuntimeError => e
|
78
|
+
log.fatal "Couldn't start Skynet Server #{e.inspect}"
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,451 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Worker
|
3
|
+
|
4
|
+
include SkynetDebugger
|
5
|
+
include Skynet::GuidGenerator
|
6
|
+
|
7
|
+
RETRY_TIME = 2
|
8
|
+
Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY] ||= 30
|
9
|
+
|
10
|
+
Skynet::CONFIG[:WORKER_MAX_MEMORY] ||= 500
|
11
|
+
|
12
|
+
MEMORY_CHECK_DELAY = 30
|
13
|
+
MANAGER_PING_INTERVAL = 60
|
14
|
+
|
15
|
+
attr_accessor :message,:task, :mq, :processed
|
16
|
+
attr_reader :worker_id, :worker_info, :worker_type, :queue_id
|
17
|
+
|
18
|
+
class Error < StandardError; end
|
19
|
+
class RespawnWorker < Skynet::Error; end
|
20
|
+
class ConnectionFailure < Skynet::Error; end
|
21
|
+
class NoManagerError < Skynet::Error; end
|
22
|
+
|
23
|
+
def self.debug_class_desc
|
24
|
+
"WORKER-#{$$}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize(worker_type, options = {})
|
28
|
+
@worker_id = get_unique_id(1).to_i
|
29
|
+
@worker_type = worker_type.to_sym
|
30
|
+
@queue_id = options[:queue_id] || 0
|
31
|
+
@processed = 0
|
32
|
+
@in_process = false
|
33
|
+
@mq = Skynet::MessageQueue.new
|
34
|
+
|
35
|
+
debug "THIS WORKER TAKES #{worker_type}"
|
36
|
+
|
37
|
+
@worker_info = {
|
38
|
+
:tasktype => worker_type,
|
39
|
+
:hostname => hostname,
|
40
|
+
:process_id => process_id,
|
41
|
+
:worker_type => payload_type,
|
42
|
+
:worker_id => worker_id,
|
43
|
+
:version => mq.get_worker_version,
|
44
|
+
}
|
45
|
+
@worker_info.merge!(options)
|
46
|
+
end
|
47
|
+
|
48
|
+
def process_id
|
49
|
+
$$
|
50
|
+
end
|
51
|
+
|
52
|
+
def hostname
|
53
|
+
@machine_name ||= Socket.gethostname
|
54
|
+
end
|
55
|
+
|
56
|
+
def version
|
57
|
+
@curver
|
58
|
+
end
|
59
|
+
|
60
|
+
def new_version_respawn?
|
61
|
+
if !@verchecktime
|
62
|
+
@verchecktime = Time.now
|
63
|
+
begin
|
64
|
+
@curver = mq.get_worker_version
|
65
|
+
debug "FINDING INITIAL VER #{@curver}"
|
66
|
+
rescue Skynet::RequestExpiredError => e
|
67
|
+
warn "NO INITIAL VER IN MQ using 1"
|
68
|
+
@curver = 1
|
69
|
+
end
|
70
|
+
else
|
71
|
+
if Time.now < (@verchecktime + Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY])
|
72
|
+
return false
|
73
|
+
else
|
74
|
+
@verchecktime = Time.now
|
75
|
+
begin
|
76
|
+
newver = mq.get_worker_version
|
77
|
+
# debug "CURVER #{@curver} NEWVER: #{newver}"
|
78
|
+
if newver != @curver and not mq.version_active?(@curver, queue_id)
|
79
|
+
info "RESTARTING WORKER ON PID #{$$}"
|
80
|
+
return true
|
81
|
+
end
|
82
|
+
rescue Skynet::RequestExpiredError => e
|
83
|
+
warn "NO CURRENT WORKER REV IN MQ still using 1"
|
84
|
+
mq.set_worker_version(1)
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
def notify_worker_started
|
93
|
+
write_worker_status(
|
94
|
+
@worker_info.merge({
|
95
|
+
:name => "waiting for #{@worker_type}",
|
96
|
+
:processed => 0,
|
97
|
+
:started_at => Time.now.to_i
|
98
|
+
})
|
99
|
+
)
|
100
|
+
end
|
101
|
+
|
102
|
+
def notify_task_begun(task)
|
103
|
+
task[:processed] = @processed
|
104
|
+
task[:started_at] = Time.now.to_i
|
105
|
+
@in_process = true
|
106
|
+
write_worker_status(@worker_info.merge(task))
|
107
|
+
end
|
108
|
+
|
109
|
+
def notify_task_complete
|
110
|
+
@processed += 1
|
111
|
+
@in_process = false
|
112
|
+
|
113
|
+
write_worker_status(
|
114
|
+
@worker_info.merge({
|
115
|
+
:task_id => 0,
|
116
|
+
:job_id => 0,
|
117
|
+
:name => "waiting for #{@worker_type}",
|
118
|
+
:processed => @processed,
|
119
|
+
:map_or_reduce => nil,
|
120
|
+
:started_at => Time.now.to_i
|
121
|
+
})
|
122
|
+
)
|
123
|
+
end
|
124
|
+
|
125
|
+
def notify_worker_stop
|
126
|
+
info "Worker #{process_id} stopping..."
|
127
|
+
write_worker_status(
|
128
|
+
@worker_info.merge({
|
129
|
+
:task_id => 0,
|
130
|
+
:job_id => 0,
|
131
|
+
:name => "waiting for #{@worker_type}",
|
132
|
+
:processed => @processed,
|
133
|
+
:process_id => nil,
|
134
|
+
:map_or_reduce => nil,
|
135
|
+
:started_at => Time.now.to_i
|
136
|
+
})
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
def manager_send(method,*args)
|
141
|
+
begin
|
142
|
+
manager.send(method,*args)
|
143
|
+
rescue DRb::DRbConnError, Errno::ECONNREFUSED => e
|
144
|
+
error "Worker could not connect to manager to call #{method} on manager #{e.inspect}"
|
145
|
+
rescue Exception => e
|
146
|
+
error "Worker could not connect call #{method} on manager #{e.inspect} args:", args
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def write_worker_status(status)
|
151
|
+
manager_send(:worker_notify,status)
|
152
|
+
end
|
153
|
+
|
154
|
+
def manager
|
155
|
+
Skynet::Manager.get
|
156
|
+
end
|
157
|
+
|
158
|
+
def payload_type
|
159
|
+
return nil if worker_type == :any
|
160
|
+
return worker_type
|
161
|
+
end
|
162
|
+
|
163
|
+
def interrupt
|
164
|
+
if @die
|
165
|
+
exit
|
166
|
+
else
|
167
|
+
@die = true
|
168
|
+
if not @in_process
|
169
|
+
notify_worker_stop
|
170
|
+
exit
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def start
|
176
|
+
exceptions = 0
|
177
|
+
conerror = 0
|
178
|
+
@curver = nil
|
179
|
+
|
180
|
+
# setup signal handlers for manager
|
181
|
+
Signal.trap("HUP") do
|
182
|
+
@respawn = true
|
183
|
+
raise Skynet::Worker::RespawnWorker.new if not @in_process
|
184
|
+
end
|
185
|
+
Signal.trap("TERM") { interrupt }
|
186
|
+
Signal.trap("INT") { interrupt }
|
187
|
+
|
188
|
+
raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
|
189
|
+
|
190
|
+
printlog "STARTING WORKER @ VER:#{@curver} type:#{@worker_type} QUEUE_ID:#{queue_id}"
|
191
|
+
|
192
|
+
notify_worker_started
|
193
|
+
|
194
|
+
message = nil
|
195
|
+
task = nil
|
196
|
+
|
197
|
+
loop do
|
198
|
+
message = nil
|
199
|
+
begin
|
200
|
+
if Skynet::CONFIG[:WORKER_MAX_PROCESSED] and Skynet::CONFIG[:WORKER_MAX_PROCESSED] > 0 and @processed >= Skynet::CONFIG[:WORKER_MAX_PROCESSED]
|
201
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{get_memory_size} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
202
|
+
end
|
203
|
+
if @die
|
204
|
+
exit
|
205
|
+
elsif @respawn
|
206
|
+
raise Skynet::Worker::RespawnWorker.new()
|
207
|
+
end
|
208
|
+
|
209
|
+
if local_mem = max_memory_reached?
|
210
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
211
|
+
end
|
212
|
+
|
213
|
+
if conerror > 0
|
214
|
+
@mq = Skynet::MessageQueue.new
|
215
|
+
warn "WORKER RECONNECTED AFTER #{conerror} tries"
|
216
|
+
conerror = 0
|
217
|
+
end
|
218
|
+
|
219
|
+
# debug "1 START LOOPSSS at VER #{@curver}"
|
220
|
+
#
|
221
|
+
# debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
|
222
|
+
# message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
|
223
|
+
message = mq.take_next_task(@curver, 0.00001, payload_type, queue_id)
|
224
|
+
|
225
|
+
next unless message.respond_to?(:payload)
|
226
|
+
|
227
|
+
task = message.payload
|
228
|
+
error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
|
229
|
+
|
230
|
+
info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
|
231
|
+
debug "STEP 2.1 message=", message.to_a
|
232
|
+
# info "STEP 3 GOT TASK taskid: #{task.task_id}"
|
233
|
+
# debug "STEP 3.1 task=", task
|
234
|
+
next unless task
|
235
|
+
# maybe instead of putting a time in the future, it puts the start time and an offset in seconds
|
236
|
+
|
237
|
+
# task.debug "taking task #{task.task_id} name:#{task.name}..."
|
238
|
+
|
239
|
+
info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
240
|
+
notify_task_begun({
|
241
|
+
:job_id => message.job_id,
|
242
|
+
:task_id => message.task_id,
|
243
|
+
:iteration => message.iteration,
|
244
|
+
:name => message.name,
|
245
|
+
:map_or_reduce => task.map_or_reduce
|
246
|
+
})
|
247
|
+
result = task.run(message.iteration)
|
248
|
+
|
249
|
+
info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
250
|
+
debug "STEP 5.1 RESULT DATA:", result
|
251
|
+
|
252
|
+
result_message = mq.write_result(message,result,task.result_timeout)
|
253
|
+
info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
254
|
+
# debug "STEP 6.1 RESULT_MESSAGE:", result_message
|
255
|
+
notify_task_complete
|
256
|
+
|
257
|
+
rescue Skynet::Task::TimeoutError => e
|
258
|
+
error "Task timed out while executing #{e.inspect} #{e.backtrace.join("\n")}"
|
259
|
+
@in_process = false
|
260
|
+
next
|
261
|
+
|
262
|
+
rescue Skynet::Worker::RespawnWorker => e
|
263
|
+
info "Respawning and taking worker status #{e.message}"
|
264
|
+
notify_worker_stop
|
265
|
+
raise e
|
266
|
+
|
267
|
+
rescue Skynet::RequestExpiredError => e
|
268
|
+
if new_version_respawn?
|
269
|
+
notify_worker_stop
|
270
|
+
manager_send(:restart_worker,$$)
|
271
|
+
end
|
272
|
+
sleep 1
|
273
|
+
next
|
274
|
+
|
275
|
+
rescue Skynet::ConnectionError, DRb::DRbConnError => e
|
276
|
+
conerror += 1
|
277
|
+
retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
|
278
|
+
error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
|
279
|
+
@mq = nil
|
280
|
+
sleep retry_time
|
281
|
+
if conerror > 20
|
282
|
+
fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
|
283
|
+
notify_worker_stop
|
284
|
+
raise e
|
285
|
+
end
|
286
|
+
next
|
287
|
+
|
288
|
+
rescue NoManagerError => e
|
289
|
+
fatal e.message
|
290
|
+
break
|
291
|
+
rescue Interrupt, SystemExit => e
|
292
|
+
info "Exiting..."
|
293
|
+
notify_worker_stop
|
294
|
+
break
|
295
|
+
rescue Exception => e
|
296
|
+
error "skynet_worker.rb:#{__LINE__} #{e.inspect} #{e.backtrace.join("\n")}"
|
297
|
+
exceptions += 1
|
298
|
+
break if exceptions > 1000
|
299
|
+
#mq.take(@next_worker_message.task_template,0.0005) if message
|
300
|
+
if message
|
301
|
+
mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
|
302
|
+
else
|
303
|
+
# what do we do here
|
304
|
+
# mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
305
|
+
end
|
306
|
+
# mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
307
|
+
@in_process = false
|
308
|
+
next
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
@@ok_to_mem_check = false
|
314
|
+
@@lastmem = nil
|
315
|
+
@@memct = 0
|
316
|
+
|
317
|
+
def max_memory_reached?
|
318
|
+
return false unless ok_to_mem_check?
|
319
|
+
if !@memchecktime
|
320
|
+
@memchecktime = Time.now
|
321
|
+
return false
|
322
|
+
elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
|
323
|
+
@memchecktime = Time.now
|
324
|
+
local_mem = get_memory_size.to_i
|
325
|
+
return local_mem if local_mem > Skynet::CONFIG[:WORKER_MAX_MEMORY]
|
326
|
+
else
|
327
|
+
false
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def find_pid_size(file, format=:notpretty)
|
332
|
+
begin
|
333
|
+
open(file).each { |line|
|
334
|
+
if line.index('VmSize')
|
335
|
+
temp = line[7..-5].strip.to_f/1000
|
336
|
+
return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
|
337
|
+
return temp
|
338
|
+
end
|
339
|
+
}
|
340
|
+
rescue Exception => e
|
341
|
+
warn "ERROR #{e.inspect}"
|
342
|
+
'0'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def get_memory_size
|
347
|
+
find_pid_size("/proc/self/status")
|
348
|
+
end
|
349
|
+
|
350
|
+
def ok_to_mem_check?
|
351
|
+
return true if @@ok_to_mem_check == true
|
352
|
+
return false if @@ok_to_mem_check == :notok
|
353
|
+
if File.exists?('/proc/self/status')
|
354
|
+
@@lastmem ||= get_memory_size.to_i
|
355
|
+
return @@ok_to_mem_check = true
|
356
|
+
else
|
357
|
+
@@ok_to_mem_check = :notok
|
358
|
+
return false
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
def self.start(options={})
|
363
|
+
options[:worker_type] ||= :any
|
364
|
+
options[:required_libs] ||= []
|
365
|
+
|
366
|
+
OptionParser.new do |opt|
|
367
|
+
opt.banner = "Usage: worker [options]"
|
368
|
+
opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
|
369
|
+
options[:required_libs] << v
|
370
|
+
end
|
371
|
+
opt.on('--worker_type=WORKERTYPE', "master, task or any") do |v|
|
372
|
+
if ["any","master","task"].include?(v)
|
373
|
+
options[:worker_type] = v
|
374
|
+
else
|
375
|
+
raise Skynet::Error.new("#{v} is not a valid worker_type")
|
376
|
+
end
|
377
|
+
end
|
378
|
+
opt.on('--config=CONFIG_FILE', 'Where to find the skynet.rb config file') do |v|
|
379
|
+
options[:config_file] = File.expand_path(v)
|
380
|
+
end
|
381
|
+
opt.on('--queue=QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
|
382
|
+
options[:queue] = v
|
383
|
+
end
|
384
|
+
opt.on('--queue_id=queue_id', 'Which queue should these workers use (default 0).') do |v|
|
385
|
+
options[:queue_id] = v.to_i
|
386
|
+
end
|
387
|
+
opt.parse!(ARGV)
|
388
|
+
end
|
389
|
+
|
390
|
+
if options[:queue]
|
391
|
+
if options[:queue_id]
|
392
|
+
raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
|
393
|
+
end
|
394
|
+
options[:queue_id] = config.queue_id_by_name(options[:queue])
|
395
|
+
end
|
396
|
+
|
397
|
+
options[:required_libs].each do |adlib|
|
398
|
+
begin
|
399
|
+
require adlib
|
400
|
+
rescue MissingSourceFile => e
|
401
|
+
error "The included lib #{adlib} was not found: #{e.inspect}"
|
402
|
+
exit
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
options[:config_file] ||= Skynet::CONFIG[:CONFIG_FILE]
|
407
|
+
if options[:config_file]
|
408
|
+
begin
|
409
|
+
require options[:config_file]
|
410
|
+
rescue MissingSourceFile => e
|
411
|
+
error "The config file at #{options[:config_file]} was not found: #{e.inspect}"
|
412
|
+
exit
|
413
|
+
end
|
414
|
+
else
|
415
|
+
error "Config file missing. Please add a config/skynet_config.rb before starting."
|
416
|
+
exit
|
417
|
+
end
|
418
|
+
|
419
|
+
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}. QUEUE: #{Skynet::Config.new.queue_name_by_id(options[:queue_id])}"
|
420
|
+
|
421
|
+
begin
|
422
|
+
worker = Skynet::Worker.new(options[:worker_type], options)
|
423
|
+
worker.start
|
424
|
+
rescue Skynet::Worker::NoManagerError => e
|
425
|
+
fatal e.message
|
426
|
+
exit
|
427
|
+
rescue Skynet::Worker::RespawnWorker => e
|
428
|
+
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING #{e.message}"
|
429
|
+
cmd = "ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]} --queue_id=#{options[:queue_id]} "
|
430
|
+
cmd << "--config=#{options[:config_file]} "
|
431
|
+
cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
|
432
|
+
pid = Skynet.fork_and_exec(cmd)
|
433
|
+
exit
|
434
|
+
rescue SystemExit
|
435
|
+
info "WORKER #{$$} EXITING GRACEFULLY"
|
436
|
+
rescue Exception => e
|
437
|
+
fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
|
438
|
+
report = ExceptionReport.new(e)
|
439
|
+
report.save
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
445
|
+
class ExceptionReport
|
446
|
+
def initialize(*args)
|
447
|
+
end
|
448
|
+
|
449
|
+
def save
|
450
|
+
end
|
451
|
+
end
|