skynet 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +99 -0
- data/Manifest.txt +10 -9
- data/README.txt +74 -7
- data/app_generators/skynet_install/skynet_install_generator.rb +26 -22
- data/app_generators/skynet_install/templates/migration.rb +11 -5
- data/app_generators/skynet_install/templates/skynet +25 -12
- data/app_generators/skynet_install/templates/skynet_schema.sql +56 -0
- data/bin/skynet +26 -2
- data/bin/skynet_install +24 -0
- data/bin/skynet_tuplespace_server +13 -0
- data/config/hoe.rb +1 -0
- data/lib/skynet.rb +3 -0
- data/lib/skynet/mapreduce_helper.rb +74 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +225 -172
- data/lib/skynet/message_queue_adapters/tuple_space.rb +31 -16
- data/lib/skynet/skynet_active_record_extensions.rb +78 -46
- data/lib/skynet/skynet_config.rb +162 -23
- data/lib/skynet/skynet_console.rb +23 -10
- data/lib/skynet/skynet_console_helper.rb +61 -58
- data/lib/skynet/skynet_job.rb +741 -493
- data/lib/skynet/skynet_launcher.rb +5 -1
- data/lib/skynet/skynet_manager.rb +106 -49
- data/lib/skynet/skynet_message.rb +169 -174
- data/lib/skynet/skynet_message_queue.rb +29 -16
- data/lib/skynet/skynet_partitioners.rb +92 -0
- data/lib/skynet/skynet_ruby_extensions.rb +3 -4
- data/lib/skynet/skynet_task.rb +61 -19
- data/lib/skynet/skynet_tuplespace_server.rb +0 -2
- data/lib/skynet/skynet_worker.rb +73 -51
- data/lib/skynet/version.rb +1 -1
- data/test/test_active_record_extensions.rb +138 -0
- data/test/test_helper.rb +6 -0
- data/test/{mysql_message_queue_adaptor_test.rb → test_mysql_message_queue_adapter.rb} +94 -30
- data/test/test_skynet.rb +11 -11
- data/test/test_skynet_install_generator.rb +0 -4
- data/test/test_skynet_job.rb +717 -0
- data/test/test_skynet_manager.rb +142 -0
- data/test/test_skynet_message.rb +229 -0
- data/test/test_skynet_task.rb +24 -0
- data/test/{tuplespace_message_queue_test.rb → test_tuplespace_message_queue.rb} +25 -30
- data/website/index.html +56 -16
- data/website/index.txt +55 -25
- data/website/template.rhtml +1 -1
- metadata +29 -13
- data/app_generators/skynet_install/templates/skynet_console +0 -16
- data/bin/skynet_console +0 -9
- data/sometest.rb +0 -23
- data/test/all_models_test.rb +0 -139
- data/test/skynet_manager_test.rb +0 -107
- data/test/skynet_message_test.rb +0 -42
- data/tmtags +0 -1242
@@ -12,15 +12,14 @@ class Skynet
|
|
12
12
|
class RequestExpiredError < Skynet::Error
|
13
13
|
end
|
14
14
|
|
15
|
+
# This class is the interface to the Skynet Message Queue.
|
15
16
|
class MessageQueue
|
16
17
|
|
17
18
|
include SkynetDebugger
|
18
19
|
|
19
20
|
require 'forwardable'
|
20
21
|
extend Forwardable
|
21
|
-
|
22
|
-
# require 'skynet_message'
|
23
|
-
|
22
|
+
|
24
23
|
def self.adapter
|
25
24
|
Object.module_eval(Skynet::CONFIG[:MESSAGE_QUEUE_ADAPTER], __FILE__, __LINE__).adapter
|
26
25
|
end
|
@@ -34,8 +33,26 @@ class Skynet
|
|
34
33
|
mq
|
35
34
|
end
|
36
35
|
|
37
|
-
|
38
|
-
|
36
|
+
# Is this version still active in the queue?
|
37
|
+
def version_active?(version,queue_id=0)
|
38
|
+
mq.version_active?(version,queue_id)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Retrieves the current worker version
|
42
|
+
def get_worker_version
|
43
|
+
mq.get_worker_version
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the current worker version (causing workers to restart)
|
47
|
+
def set_worker_version(version)
|
48
|
+
mq.set_worker_version(version)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Increments the current worker version (causing workers to restart)
|
52
|
+
def increment_worker_version
|
53
|
+
newver = self.get_worker_version + 1
|
54
|
+
self.set_worker_version(newver)
|
55
|
+
newver
|
39
56
|
end
|
40
57
|
|
41
58
|
def mq
|
@@ -43,14 +60,16 @@ class Skynet
|
|
43
60
|
end
|
44
61
|
|
45
62
|
def_delegators :mq, :take_next_task, :write_message, :take_result, :write_error, :write_result,
|
46
|
-
:list_tasks, :list_results,
|
63
|
+
:list_tasks, :list_results, :stats,
|
47
64
|
:clear_outstanding_tasks, :clear_outstanding_results,
|
48
|
-
:take_worker_status, :write_worker_status, :read_all_worker_statuses, :clear_worker_status
|
49
|
-
:get_worker_version, :set_worker_version, :stats
|
50
|
-
|
51
|
-
|
65
|
+
:take_worker_status, :write_worker_status, :read_all_worker_statuses, :clear_worker_status
|
52
66
|
|
53
67
|
|
68
|
+
|
69
|
+
def message_fields
|
70
|
+
Skynet::Message.fields
|
71
|
+
end
|
72
|
+
|
54
73
|
def print_stats
|
55
74
|
"TAKEN TASKS: #{list_tasks(1).size}, UNTAKEN_TASKS: #{list_tasks(0).size} RESULTS: #{list_results.size}"
|
56
75
|
end
|
@@ -59,12 +78,6 @@ class Skynet
|
|
59
78
|
list_tasks + list_results
|
60
79
|
end
|
61
80
|
|
62
|
-
def increment_worker_version
|
63
|
-
newver = self.get_worker_version + 1
|
64
|
-
self.set_worker_version(newver)
|
65
|
-
newver
|
66
|
-
end
|
67
|
-
|
68
81
|
def ansi_clear
|
69
82
|
puts "\033[2J\033[H"
|
70
83
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
class Skynet
|
2
|
+
# Collection of partitioning utilities
|
3
|
+
class Partitioners
|
4
|
+
include SkynetDebugger
|
5
|
+
|
6
|
+
# Split one block of data into partitions
|
7
|
+
#
|
8
|
+
class SimplePartitionData < Partitioners
|
9
|
+
|
10
|
+
def self.reduce_partition(data, partitions)
|
11
|
+
partitioned_data = Array.new
|
12
|
+
|
13
|
+
# If data size is significantly greater than the number of desired
|
14
|
+
# partitions, we can divide the data roughly but the last partition
|
15
|
+
# may be smaller than the others.
|
16
|
+
#
|
17
|
+
return data if (not data) or data.empty?
|
18
|
+
|
19
|
+
if partitions >= data.length
|
20
|
+
data.each do |datum|
|
21
|
+
partitioned_data << [datum]
|
22
|
+
end
|
23
|
+
elsif (data.length >= partitions * 2)
|
24
|
+
# Use quicker but less "fair" method
|
25
|
+
size = data.length / partitions
|
26
|
+
|
27
|
+
if (data.length % partitions != 0)
|
28
|
+
size += 1 # Last slice of leftovers
|
29
|
+
end
|
30
|
+
|
31
|
+
(0..partitions - 1).each do |i|
|
32
|
+
partitioned_data[i] = data[i * size, size]
|
33
|
+
end
|
34
|
+
else
|
35
|
+
# Slower method, but partitions evenly
|
36
|
+
partitions = (data.size < partitions ? data.size : partitions)
|
37
|
+
(0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
|
38
|
+
|
39
|
+
data.each_with_index do |datum, i|
|
40
|
+
partitioned_data[i % partitions] << datum
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
partitioned_data
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class RecombineAndSplit < Partitioners
|
50
|
+
# Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
|
51
|
+
#
|
52
|
+
def self.reduce_partition(post_map_data,new_partitions)
|
53
|
+
return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array)
|
54
|
+
if not post_map_data.first.first.is_a?(Array)
|
55
|
+
partitioned_data = post_map_data.flatten
|
56
|
+
else
|
57
|
+
partitioned_data = post_map_data.inject(Array.new) do |data,part|
|
58
|
+
data += part
|
59
|
+
end
|
60
|
+
end
|
61
|
+
partitioned_data = Skynet::Partitioners::SimplePartitionData.reduce_partition(partitioned_data, new_partitions)
|
62
|
+
debug "POST PARTITIONED DATA", partitioned_data
|
63
|
+
partitioned_data
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class ArrayDataSplitByFirstEntry < Partitioners
|
68
|
+
# Smarter partitioner for array data, generates simple sum of array[0]
|
69
|
+
# and ensures that all arrays sharing that key go into the same partition.
|
70
|
+
#
|
71
|
+
def self.reduce_partition(partitioned_data, new_partitions)
|
72
|
+
partitions = Array.new
|
73
|
+
(0..new_partitions - 1).each { |i| partitions[i] = Array.new }
|
74
|
+
|
75
|
+
partitioned_data.each do |partition|
|
76
|
+
partition.each do |array|
|
77
|
+
next unless array.class == Array and array.size == 2
|
78
|
+
if array[0].kind_of?(Fixnum)
|
79
|
+
key = array[0]
|
80
|
+
else
|
81
|
+
key = 0
|
82
|
+
array[0].each_byte { |c| key += c }
|
83
|
+
end
|
84
|
+
partitions[key % new_partitions] << array
|
85
|
+
end
|
86
|
+
end
|
87
|
+
partitions
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -7,7 +7,7 @@ module Enumerable
|
|
7
7
|
data = self
|
8
8
|
end
|
9
9
|
jobopts = {
|
10
|
-
:
|
10
|
+
:mappers => 20000,
|
11
11
|
:map_data => data,
|
12
12
|
:name => "#{klass} Enumerable MASTER",
|
13
13
|
:map_name => "#{klass} Enumerable MAP",
|
@@ -15,8 +15,7 @@ module Enumerable
|
|
15
15
|
:map_timeout => 3600,
|
16
16
|
:reduce_timeout => 3600,
|
17
17
|
:master_timeout => 3600,
|
18
|
-
:master_result_timeout => 3600
|
19
|
-
:async => false
|
18
|
+
:master_result_timeout => 3600
|
20
19
|
}
|
21
20
|
|
22
21
|
jobopts[:map_reduce_class] = klass.to_s if klass
|
@@ -27,7 +26,7 @@ module Enumerable
|
|
27
26
|
end
|
28
27
|
|
29
28
|
if block_given? or not jobopts[:async]
|
30
|
-
job = Skynet::Job.new(jobopts)
|
29
|
+
job = Skynet::Job.new(jobopts.merge(:local_master => true))
|
31
30
|
else
|
32
31
|
job = Skynet::AsyncJob.new(jobopts)
|
33
32
|
end
|
data/lib/skynet/skynet_task.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
1
|
class Skynet
|
2
2
|
class Task
|
3
|
-
|
4
3
|
include SkynetDebugger
|
5
|
-
|
6
|
-
# require 'ostruct'
|
7
4
|
|
8
|
-
class ConstructorError < StandardError
|
9
|
-
end
|
5
|
+
class ConstructorError < StandardError; end
|
6
|
+
class TimeoutError < StandardError; end
|
10
7
|
|
11
|
-
attr_reader :data, :process, :result, :map_or_reduce
|
12
|
-
attr_accessor :name, :
|
8
|
+
attr_reader :data, :process, :result, :map_or_reduce, :marshalable
|
9
|
+
attr_accessor :name, :result_timeout, :retry
|
13
10
|
|
14
11
|
@@log = nil
|
15
12
|
|
@@ -17,6 +14,31 @@ class Skynet
|
|
17
14
|
"TASK"
|
18
15
|
end
|
19
16
|
|
17
|
+
def self.master_task(job)
|
18
|
+
options = {
|
19
|
+
:async => false,
|
20
|
+
:local_master => true,
|
21
|
+
:map_name => job.map_name || job.name,
|
22
|
+
:reduce_name => job.reduce_name || job.name,
|
23
|
+
}
|
24
|
+
Skynet::Job::FIELDS.each do |field|
|
25
|
+
next if options.has_key?(field)
|
26
|
+
options[field] = job.send(field) if job.send(field)
|
27
|
+
end
|
28
|
+
|
29
|
+
master_job = Skynet::Job.new(options)
|
30
|
+
|
31
|
+
self.new(
|
32
|
+
:task_id => master_job.task_id,
|
33
|
+
:data => nil,
|
34
|
+
:process => master_job.to_h,
|
35
|
+
:map_or_reduce => :master,
|
36
|
+
:name => master_job.name,
|
37
|
+
:result_timeout => master_job.master_timeout,
|
38
|
+
:retry => master_job.master_retry || Skynet::CONFIG[:DEFAULT_MASTER_RETRY]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
20
42
|
def initialize(opts = {})
|
21
43
|
unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
|
22
44
|
raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
|
@@ -28,6 +50,7 @@ class Skynet
|
|
28
50
|
@name = opts[:name]
|
29
51
|
@map_or_reduce = opts[:map_or_reduce]
|
30
52
|
@result_timeout = opts[:result_timeout]
|
53
|
+
@retry = opts[:retry]
|
31
54
|
end
|
32
55
|
|
33
56
|
def process=(process)
|
@@ -53,22 +76,41 @@ class Skynet
|
|
53
76
|
@task_id.to_i
|
54
77
|
end
|
55
78
|
|
56
|
-
def run
|
57
|
-
|
79
|
+
def run(iteration=nil)
|
80
|
+
info "running task #{name} TIMEOUT: #{result_timeout} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
|
58
81
|
begin
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
82
|
+
Timeout::timeout(@result_timeout) do
|
83
|
+
if @process.class == Proc
|
84
|
+
debug " - #{@map_or_reduce} using Proc"
|
85
|
+
@process.call @data
|
86
|
+
elsif @map_or_reduce == :master
|
87
|
+
debug " - as master"
|
88
|
+
job = Skynet::Job.new(@process)
|
89
|
+
job.run
|
90
|
+
elsif @process.class == String
|
91
|
+
debug " - #{@map_or_reduce} using class #{@process}"
|
92
|
+
@process.constantize.send(@map_or_reduce,@data)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue Timeout::Error => e
|
96
|
+
# ==========
|
97
|
+
# = XXX NEWSFEED HACK
|
98
|
+
# = I'm printing the data hash, but that hash has all this shit added to it after runing through newsfeed.
|
99
|
+
# = It's actually nice to be able to see what was added, but sometimes its too much data.
|
100
|
+
# = Though the handy part will be adding instrumentation to the event_hash and seeing it onyl during a timeout.
|
101
|
+
# ==========
|
102
|
+
|
103
|
+
if @data.is_a?(Array) and @data.first.is_a?(Hash)
|
104
|
+
@data.each {|h|h.delete(:event_object)}
|
69
105
|
end
|
106
|
+
raise TimeoutError.new("TASK TIMED OUT! #{name} IT:[#{iteration}] timeout:#{@result_timeout} #{e.inspect} DATA: #{@data.inspect} #{e.backtrace.join("\n")}")
|
107
|
+
|
108
|
+
# ==========
|
109
|
+
# = XXX This rescue block is probably not necessary. Just for debugging for now. =
|
110
|
+
# ==========
|
70
111
|
rescue Exception => e
|
71
112
|
error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
|
113
|
+
raise e
|
72
114
|
end
|
73
115
|
end
|
74
116
|
|
data/lib/skynet/skynet_worker.rb
CHANGED
@@ -5,35 +5,32 @@ class Skynet
|
|
5
5
|
include Skynet::GuidGenerator
|
6
6
|
|
7
7
|
RETRY_TIME = 2
|
8
|
-
|
9
|
-
|
8
|
+
Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY] ||= 30
|
9
|
+
|
10
|
+
Skynet::CONFIG[:WORKER_MAX_MEMORY] ||= 500
|
11
|
+
|
10
12
|
MEMORY_CHECK_DELAY = 30
|
11
13
|
MANAGER_PING_INTERVAL = 60
|
12
14
|
|
13
15
|
attr_accessor :message,:task, :mq, :processed
|
14
|
-
attr_reader :worker_id, :worker_info, :worker_type
|
16
|
+
attr_reader :worker_id, :worker_info, :worker_type, :queue_id
|
15
17
|
|
16
|
-
class Error
|
17
|
-
end
|
18
|
-
|
19
|
-
class
|
20
|
-
end
|
21
|
-
|
22
|
-
class ConnectionFailure < Skynet::Error
|
23
|
-
end
|
24
|
-
|
25
|
-
class NoManagerError < Skynet::Error
|
26
|
-
end
|
18
|
+
class Error < StandardError; end
|
19
|
+
class RespawnWorker < Skynet::Error; end
|
20
|
+
class ConnectionFailure < Skynet::Error; end
|
21
|
+
class NoManagerError < Skynet::Error; end
|
27
22
|
|
28
23
|
def self.debug_class_desc
|
29
24
|
"WORKER-#{$$}"
|
30
25
|
end
|
31
26
|
|
32
|
-
def initialize(worker_type
|
27
|
+
def initialize(worker_type, options = {})
|
33
28
|
@worker_id = get_unique_id(1).to_i
|
34
|
-
@mq = Skynet::MessageQueue.new
|
35
29
|
@worker_type = worker_type.to_sym
|
30
|
+
@queue_id = options[:queue_id] || 0
|
36
31
|
@processed = 0
|
32
|
+
@mq = Skynet::MessageQueue.new
|
33
|
+
|
37
34
|
debug "THIS WORKER TAKES #{worker_type}"
|
38
35
|
|
39
36
|
@worker_info = {
|
@@ -41,8 +38,9 @@ class Skynet
|
|
41
38
|
:process_id => process_id,
|
42
39
|
:worker_type => payload_type,
|
43
40
|
:worker_id => worker_id,
|
44
|
-
:version => mq.get_worker_version
|
45
|
-
}
|
41
|
+
:version => mq.get_worker_version,
|
42
|
+
}
|
43
|
+
@worker_info.merge!(options)
|
46
44
|
end
|
47
45
|
|
48
46
|
def process_id
|
@@ -68,14 +66,14 @@ class Skynet
|
|
68
66
|
@curver = 1
|
69
67
|
end
|
70
68
|
else
|
71
|
-
if Time.now < (@verchecktime +
|
69
|
+
if Time.now < (@verchecktime + Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY])
|
72
70
|
return false
|
73
71
|
else
|
74
72
|
@verchecktime = Time.now
|
75
73
|
begin
|
76
74
|
newver = mq.get_worker_version
|
77
75
|
# debug "CURVER #{@curver} NEWVER: #{newver}"
|
78
|
-
if newver != @curver
|
76
|
+
if newver != @curver and not mq.version_active?(@curver, queue_id)
|
79
77
|
info "RESTARTING WORKER ON PID #{$$}"
|
80
78
|
return true
|
81
79
|
end
|
@@ -138,42 +136,48 @@ class Skynet
|
|
138
136
|
return nil if worker_type == :any
|
139
137
|
return worker_type
|
140
138
|
end
|
139
|
+
|
140
|
+
def interrupt
|
141
|
+
if @die
|
142
|
+
exit
|
143
|
+
else
|
144
|
+
@die = true
|
145
|
+
end
|
146
|
+
end
|
141
147
|
|
142
148
|
def start
|
143
149
|
exceptions = 0
|
144
|
-
conerror
|
145
|
-
@curver
|
150
|
+
conerror = 0
|
151
|
+
@curver = nil
|
152
|
+
|
146
153
|
# setup signal handlers for manager
|
147
154
|
Signal.trap("HUP") { @respawn = true }
|
148
|
-
Signal.trap("TERM")
|
149
|
-
|
150
|
-
|
151
|
-
else
|
152
|
-
@die = true
|
153
|
-
end
|
154
|
-
end
|
155
|
-
Signal.trap("INT") { @die = true }
|
156
|
-
|
155
|
+
Signal.trap("TERM") { interrupt }
|
156
|
+
Signal.trap("INT") { @die = true }
|
157
|
+
|
157
158
|
raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
|
158
159
|
|
159
|
-
info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
|
160
|
+
info "STARTING WORKER @ VER #{@curver} (#{@worker_type}) QUEUE_ID: #{queue_id}"
|
160
161
|
|
161
162
|
notify_worker_started
|
162
163
|
|
163
164
|
message = nil
|
164
|
-
task
|
165
|
+
task = nil
|
165
166
|
|
166
167
|
loop do
|
167
168
|
message = nil
|
168
169
|
begin
|
170
|
+
if Skynet::CONFIG[:WORKER_MAX_PROCESSED] and Skynet::CONFIG[:WORKER_MAX_PROCESSED] > 0 and @processed >= Skynet::CONFIG[:WORKER_MAX_PROCESSED]
|
171
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{get_memory_size} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
172
|
+
end
|
169
173
|
if @die
|
170
174
|
exit
|
171
175
|
elsif @respawn
|
172
|
-
raise Skynet::Worker::RespawnWorker.new
|
176
|
+
raise Skynet::Worker::RespawnWorker.new()
|
173
177
|
end
|
174
178
|
|
175
179
|
if local_mem = max_memory_reached?
|
176
|
-
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{
|
180
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
177
181
|
end
|
178
182
|
|
179
183
|
if conerror > 0
|
@@ -186,7 +190,7 @@ class Skynet
|
|
186
190
|
#
|
187
191
|
# debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
|
188
192
|
# message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
|
189
|
-
message = mq.take_next_task(@curver,0.00001,payload_type)
|
193
|
+
message = mq.take_next_task(@curver, 0.00001, payload_type, queue_id)
|
190
194
|
|
191
195
|
next unless message.respond_to?(:payload)
|
192
196
|
|
@@ -210,29 +214,34 @@ class Skynet
|
|
210
214
|
:name => message.name,
|
211
215
|
:map_or_reduce => task.map_or_reduce
|
212
216
|
})
|
213
|
-
result = task.run
|
217
|
+
result = task.run(message.iteration)
|
214
218
|
|
215
219
|
info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
216
220
|
debug "STEP 5.1 RESULT DATA:", result
|
217
221
|
|
218
|
-
## XXX need better result timeout
|
219
222
|
result_message = mq.write_result(message,result,task.result_timeout)
|
220
223
|
info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
221
224
|
# debug "STEP 6.1 RESULT_MESSAGE:", result_message
|
222
225
|
notify_task_complete
|
226
|
+
|
227
|
+
rescue Skynet::Task::TimeoutError => e
|
228
|
+
error "Task timed out while executing #{e.inspect} #{e.backtrace.join("\n")}"
|
229
|
+
next
|
230
|
+
|
223
231
|
rescue Skynet::Worker::RespawnWorker => e
|
224
|
-
info "Respawning and taking worker status"
|
232
|
+
info "Respawning and taking worker status #{e.message}"
|
225
233
|
notify_worker_stop
|
226
234
|
raise e
|
235
|
+
|
227
236
|
rescue Skynet::RequestExpiredError => e
|
228
|
-
# debug "request expired"
|
229
237
|
if new_version_respawn?
|
230
238
|
notify_worker_stop
|
231
|
-
|
239
|
+
manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
|
240
|
+
manager.restart_worker($$) if manager
|
232
241
|
end
|
233
242
|
sleep 1
|
234
|
-
# debug "WORKER [#{$$}] LOOPING AGAIN"
|
235
243
|
next
|
244
|
+
|
236
245
|
rescue Skynet::ConnectionError, DRb::DRbConnError => e
|
237
246
|
conerror += 1
|
238
247
|
retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
|
@@ -245,6 +254,7 @@ class Skynet
|
|
245
254
|
raise e
|
246
255
|
end
|
247
256
|
next
|
257
|
+
|
248
258
|
rescue NoManagerError => e
|
249
259
|
fatal e.message
|
250
260
|
break
|
@@ -253,7 +263,9 @@ class Skynet
|
|
253
263
|
notify_worker_stop
|
254
264
|
break
|
255
265
|
rescue Exception => e
|
256
|
-
error "#{e.inspect} #{e.backtrace.join("\n")}"
|
266
|
+
error "skynet_worker.rb:#{__LINE__} #{e.inspect} #{e.backtrace.join("\n")}"
|
267
|
+
exceptions += 1
|
268
|
+
break if exceptions > 1000
|
257
269
|
#mq.take(@next_worker_message.task_template,0.0005) if message
|
258
270
|
if message
|
259
271
|
mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
|
@@ -279,7 +291,7 @@ class Skynet
|
|
279
291
|
elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
|
280
292
|
@memchecktime = Time.now
|
281
293
|
local_mem = get_memory_size.to_i
|
282
|
-
return local_mem if local_mem >
|
294
|
+
return local_mem if local_mem > Skynet::CONFIG[:WORKER_MAX_MEMORY]
|
283
295
|
else
|
284
296
|
false
|
285
297
|
end
|
@@ -346,9 +358,22 @@ class Skynet
|
|
346
358
|
raise Skynet::Error.new("#{v} is not a valid worker_type")
|
347
359
|
end
|
348
360
|
end
|
361
|
+
opt.on('-q', '--queue QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
|
362
|
+
options[:queue] = v
|
363
|
+
end
|
364
|
+
opt.on('-i', '--queue_id queue_id', 'Which queue should these workers use (default 0).') do |v|
|
365
|
+
options[:queue_id] = v.to_i
|
366
|
+
end
|
349
367
|
opt.parse!(ARGV)
|
350
368
|
end
|
351
369
|
|
370
|
+
if options[:queue]
|
371
|
+
if options[:queue_id]
|
372
|
+
raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
|
373
|
+
end
|
374
|
+
options[:queue_id] = config.queue_id_by_name(options[:queue])
|
375
|
+
end
|
376
|
+
|
352
377
|
options[:required_libs].each do |adlib|
|
353
378
|
begin
|
354
379
|
require adlib
|
@@ -358,22 +383,19 @@ class Skynet
|
|
358
383
|
end
|
359
384
|
end
|
360
385
|
|
361
|
-
|
362
|
-
|
363
|
-
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
|
386
|
+
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}. QUEUE: #{Skynet::Config.new.queue_name_by_id(options[:queue_id])}"
|
364
387
|
|
365
388
|
begin
|
366
|
-
worker = Skynet::Worker.new(options[:worker_type])
|
389
|
+
worker = Skynet::Worker.new(options[:worker_type], options)
|
367
390
|
worker.start
|
368
391
|
rescue Skynet::Worker::NoManagerError => e
|
369
392
|
fatal e.message
|
370
393
|
exit
|
371
394
|
rescue Skynet::Worker::RespawnWorker => e
|
372
|
-
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
|
373
|
-
cmd = "
|
395
|
+
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING #{e.message}"
|
396
|
+
cmd = "ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]} --queue_id=#{options[:queue_id]}"
|
374
397
|
cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
|
375
398
|
pid = fork_and_exec(cmd)
|
376
|
-
warn "parent_pid: #{$$}, child_pid: #{pid}"
|
377
399
|
exit
|
378
400
|
rescue SystemExit
|
379
401
|
info "WORKER #{$$} EXITING GRACEFULLY"
|