skynet 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +99 -0
- data/Manifest.txt +10 -9
- data/README.txt +74 -7
- data/app_generators/skynet_install/skynet_install_generator.rb +26 -22
- data/app_generators/skynet_install/templates/migration.rb +11 -5
- data/app_generators/skynet_install/templates/skynet +25 -12
- data/app_generators/skynet_install/templates/skynet_schema.sql +56 -0
- data/bin/skynet +26 -2
- data/bin/skynet_install +24 -0
- data/bin/skynet_tuplespace_server +13 -0
- data/config/hoe.rb +1 -0
- data/lib/skynet.rb +3 -0
- data/lib/skynet/mapreduce_helper.rb +74 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +225 -172
- data/lib/skynet/message_queue_adapters/tuple_space.rb +31 -16
- data/lib/skynet/skynet_active_record_extensions.rb +78 -46
- data/lib/skynet/skynet_config.rb +162 -23
- data/lib/skynet/skynet_console.rb +23 -10
- data/lib/skynet/skynet_console_helper.rb +61 -58
- data/lib/skynet/skynet_job.rb +741 -493
- data/lib/skynet/skynet_launcher.rb +5 -1
- data/lib/skynet/skynet_manager.rb +106 -49
- data/lib/skynet/skynet_message.rb +169 -174
- data/lib/skynet/skynet_message_queue.rb +29 -16
- data/lib/skynet/skynet_partitioners.rb +92 -0
- data/lib/skynet/skynet_ruby_extensions.rb +3 -4
- data/lib/skynet/skynet_task.rb +61 -19
- data/lib/skynet/skynet_tuplespace_server.rb +0 -2
- data/lib/skynet/skynet_worker.rb +73 -51
- data/lib/skynet/version.rb +1 -1
- data/test/test_active_record_extensions.rb +138 -0
- data/test/test_helper.rb +6 -0
- data/test/{mysql_message_queue_adaptor_test.rb → test_mysql_message_queue_adapter.rb} +94 -30
- data/test/test_skynet.rb +11 -11
- data/test/test_skynet_install_generator.rb +0 -4
- data/test/test_skynet_job.rb +717 -0
- data/test/test_skynet_manager.rb +142 -0
- data/test/test_skynet_message.rb +229 -0
- data/test/test_skynet_task.rb +24 -0
- data/test/{tuplespace_message_queue_test.rb → test_tuplespace_message_queue.rb} +25 -30
- data/website/index.html +56 -16
- data/website/index.txt +55 -25
- data/website/template.rhtml +1 -1
- metadata +29 -13
- data/app_generators/skynet_install/templates/skynet_console +0 -16
- data/bin/skynet_console +0 -9
- data/sometest.rb +0 -23
- data/test/all_models_test.rb +0 -139
- data/test/skynet_manager_test.rb +0 -107
- data/test/skynet_message_test.rb +0 -42
- data/tmtags +0 -1242
@@ -12,15 +12,14 @@ class Skynet
|
|
12
12
|
class RequestExpiredError < Skynet::Error
|
13
13
|
end
|
14
14
|
|
15
|
+
# This class is the interface to the Skynet Message Queue.
|
15
16
|
class MessageQueue
|
16
17
|
|
17
18
|
include SkynetDebugger
|
18
19
|
|
19
20
|
require 'forwardable'
|
20
21
|
extend Forwardable
|
21
|
-
|
22
|
-
# require 'skynet_message'
|
23
|
-
|
22
|
+
|
24
23
|
def self.adapter
|
25
24
|
Object.module_eval(Skynet::CONFIG[:MESSAGE_QUEUE_ADAPTER], __FILE__, __LINE__).adapter
|
26
25
|
end
|
@@ -34,8 +33,26 @@ class Skynet
|
|
34
33
|
mq
|
35
34
|
end
|
36
35
|
|
37
|
-
|
38
|
-
|
36
|
+
# Is this version still active in the queue?
|
37
|
+
def version_active?(version,queue_id=0)
|
38
|
+
mq.version_active?(version,queue_id)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Retrieves the current worker version
|
42
|
+
def get_worker_version
|
43
|
+
mq.get_worker_version
|
44
|
+
end
|
45
|
+
|
46
|
+
# Sets the current worker version (causing workers to restart)
|
47
|
+
def set_worker_version(version)
|
48
|
+
mq.set_worker_version(version)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Increments the current worker version (causing workers to restart)
|
52
|
+
def increment_worker_version
|
53
|
+
newver = self.get_worker_version + 1
|
54
|
+
self.set_worker_version(newver)
|
55
|
+
newver
|
39
56
|
end
|
40
57
|
|
41
58
|
def mq
|
@@ -43,14 +60,16 @@ class Skynet
|
|
43
60
|
end
|
44
61
|
|
45
62
|
def_delegators :mq, :take_next_task, :write_message, :take_result, :write_error, :write_result,
|
46
|
-
:list_tasks, :list_results,
|
63
|
+
:list_tasks, :list_results, :stats,
|
47
64
|
:clear_outstanding_tasks, :clear_outstanding_results,
|
48
|
-
:take_worker_status, :write_worker_status, :read_all_worker_statuses, :clear_worker_status
|
49
|
-
:get_worker_version, :set_worker_version, :stats
|
50
|
-
|
51
|
-
|
65
|
+
:take_worker_status, :write_worker_status, :read_all_worker_statuses, :clear_worker_status
|
52
66
|
|
53
67
|
|
68
|
+
|
69
|
+
def message_fields
|
70
|
+
Skynet::Message.fields
|
71
|
+
end
|
72
|
+
|
54
73
|
def print_stats
|
55
74
|
"TAKEN TASKS: #{list_tasks(1).size}, UNTAKEN_TASKS: #{list_tasks(0).size} RESULTS: #{list_results.size}"
|
56
75
|
end
|
@@ -59,12 +78,6 @@ class Skynet
|
|
59
78
|
list_tasks + list_results
|
60
79
|
end
|
61
80
|
|
62
|
-
def increment_worker_version
|
63
|
-
newver = self.get_worker_version + 1
|
64
|
-
self.set_worker_version(newver)
|
65
|
-
newver
|
66
|
-
end
|
67
|
-
|
68
81
|
def ansi_clear
|
69
82
|
puts "\033[2J\033[H"
|
70
83
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
class Skynet
|
2
|
+
# Collection of partitioning utilities
|
3
|
+
class Partitioners
|
4
|
+
include SkynetDebugger
|
5
|
+
|
6
|
+
# Split one block of data into partitions
|
7
|
+
#
|
8
|
+
class SimplePartitionData < Partitioners
|
9
|
+
|
10
|
+
def self.reduce_partition(data, partitions)
|
11
|
+
partitioned_data = Array.new
|
12
|
+
|
13
|
+
# If data size is significantly greater than the number of desired
|
14
|
+
# partitions, we can divide the data roughly but the last partition
|
15
|
+
# may be smaller than the others.
|
16
|
+
#
|
17
|
+
return data if (not data) or data.empty?
|
18
|
+
|
19
|
+
if partitions >= data.length
|
20
|
+
data.each do |datum|
|
21
|
+
partitioned_data << [datum]
|
22
|
+
end
|
23
|
+
elsif (data.length >= partitions * 2)
|
24
|
+
# Use quicker but less "fair" method
|
25
|
+
size = data.length / partitions
|
26
|
+
|
27
|
+
if (data.length % partitions != 0)
|
28
|
+
size += 1 # Last slice of leftovers
|
29
|
+
end
|
30
|
+
|
31
|
+
(0..partitions - 1).each do |i|
|
32
|
+
partitioned_data[i] = data[i * size, size]
|
33
|
+
end
|
34
|
+
else
|
35
|
+
# Slower method, but partitions evenly
|
36
|
+
partitions = (data.size < partitions ? data.size : partitions)
|
37
|
+
(0..partitions - 1).each { |i| partitioned_data[i] = Array.new }
|
38
|
+
|
39
|
+
data.each_with_index do |datum, i|
|
40
|
+
partitioned_data[i % partitions] << datum
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
partitioned_data
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class RecombineAndSplit < Partitioners
|
50
|
+
# Tries to be smart about what kind of data its getting, whether array of arrays or array of arrays of arrays.
|
51
|
+
#
|
52
|
+
def self.reduce_partition(post_map_data,new_partitions)
|
53
|
+
return post_map_data unless post_map_data.is_a?(Array) and (not post_map_data.empty?) and post_map_data.first.is_a?(Array)
|
54
|
+
if not post_map_data.first.first.is_a?(Array)
|
55
|
+
partitioned_data = post_map_data.flatten
|
56
|
+
else
|
57
|
+
partitioned_data = post_map_data.inject(Array.new) do |data,part|
|
58
|
+
data += part
|
59
|
+
end
|
60
|
+
end
|
61
|
+
partitioned_data = Skynet::Partitioners::SimplePartitionData.reduce_partition(partitioned_data, new_partitions)
|
62
|
+
debug "POST PARTITIONED DATA", partitioned_data
|
63
|
+
partitioned_data
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class ArrayDataSplitByFirstEntry < Partitioners
|
68
|
+
# Smarter partitioner for array data, generates simple sum of array[0]
|
69
|
+
# and ensures that all arrays sharing that key go into the same partition.
|
70
|
+
#
|
71
|
+
def self.reduce_partition(partitioned_data, new_partitions)
|
72
|
+
partitions = Array.new
|
73
|
+
(0..new_partitions - 1).each { |i| partitions[i] = Array.new }
|
74
|
+
|
75
|
+
partitioned_data.each do |partition|
|
76
|
+
partition.each do |array|
|
77
|
+
next unless array.class == Array and array.size == 2
|
78
|
+
if array[0].kind_of?(Fixnum)
|
79
|
+
key = array[0]
|
80
|
+
else
|
81
|
+
key = 0
|
82
|
+
array[0].each_byte { |c| key += c }
|
83
|
+
end
|
84
|
+
partitions[key % new_partitions] << array
|
85
|
+
end
|
86
|
+
end
|
87
|
+
partitions
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -7,7 +7,7 @@ module Enumerable
|
|
7
7
|
data = self
|
8
8
|
end
|
9
9
|
jobopts = {
|
10
|
-
:
|
10
|
+
:mappers => 20000,
|
11
11
|
:map_data => data,
|
12
12
|
:name => "#{klass} Enumerable MASTER",
|
13
13
|
:map_name => "#{klass} Enumerable MAP",
|
@@ -15,8 +15,7 @@ module Enumerable
|
|
15
15
|
:map_timeout => 3600,
|
16
16
|
:reduce_timeout => 3600,
|
17
17
|
:master_timeout => 3600,
|
18
|
-
:master_result_timeout => 3600
|
19
|
-
:async => false
|
18
|
+
:master_result_timeout => 3600
|
20
19
|
}
|
21
20
|
|
22
21
|
jobopts[:map_reduce_class] = klass.to_s if klass
|
@@ -27,7 +26,7 @@ module Enumerable
|
|
27
26
|
end
|
28
27
|
|
29
28
|
if block_given? or not jobopts[:async]
|
30
|
-
job = Skynet::Job.new(jobopts)
|
29
|
+
job = Skynet::Job.new(jobopts.merge(:local_master => true))
|
31
30
|
else
|
32
31
|
job = Skynet::AsyncJob.new(jobopts)
|
33
32
|
end
|
data/lib/skynet/skynet_task.rb
CHANGED
@@ -1,15 +1,12 @@
|
|
1
1
|
class Skynet
|
2
2
|
class Task
|
3
|
-
|
4
3
|
include SkynetDebugger
|
5
|
-
|
6
|
-
# require 'ostruct'
|
7
4
|
|
8
|
-
class ConstructorError < StandardError
|
9
|
-
end
|
5
|
+
class ConstructorError < StandardError; end
|
6
|
+
class TimeoutError < StandardError; end
|
10
7
|
|
11
|
-
attr_reader :data, :process, :result, :map_or_reduce
|
12
|
-
attr_accessor :name, :
|
8
|
+
attr_reader :data, :process, :result, :map_or_reduce, :marshalable
|
9
|
+
attr_accessor :name, :result_timeout, :retry
|
13
10
|
|
14
11
|
@@log = nil
|
15
12
|
|
@@ -17,6 +14,31 @@ class Skynet
|
|
17
14
|
"TASK"
|
18
15
|
end
|
19
16
|
|
17
|
+
def self.master_task(job)
|
18
|
+
options = {
|
19
|
+
:async => false,
|
20
|
+
:local_master => true,
|
21
|
+
:map_name => job.map_name || job.name,
|
22
|
+
:reduce_name => job.reduce_name || job.name,
|
23
|
+
}
|
24
|
+
Skynet::Job::FIELDS.each do |field|
|
25
|
+
next if options.has_key?(field)
|
26
|
+
options[field] = job.send(field) if job.send(field)
|
27
|
+
end
|
28
|
+
|
29
|
+
master_job = Skynet::Job.new(options)
|
30
|
+
|
31
|
+
self.new(
|
32
|
+
:task_id => master_job.task_id,
|
33
|
+
:data => nil,
|
34
|
+
:process => master_job.to_h,
|
35
|
+
:map_or_reduce => :master,
|
36
|
+
:name => master_job.name,
|
37
|
+
:result_timeout => master_job.master_timeout,
|
38
|
+
:retry => master_job.master_retry || Skynet::CONFIG[:DEFAULT_MASTER_RETRY]
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
20
42
|
def initialize(opts = {})
|
21
43
|
unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
|
22
44
|
raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
|
@@ -28,6 +50,7 @@ class Skynet
|
|
28
50
|
@name = opts[:name]
|
29
51
|
@map_or_reduce = opts[:map_or_reduce]
|
30
52
|
@result_timeout = opts[:result_timeout]
|
53
|
+
@retry = opts[:retry]
|
31
54
|
end
|
32
55
|
|
33
56
|
def process=(process)
|
@@ -53,22 +76,41 @@ class Skynet
|
|
53
76
|
@task_id.to_i
|
54
77
|
end
|
55
78
|
|
56
|
-
def run
|
57
|
-
|
79
|
+
def run(iteration=nil)
|
80
|
+
info "running task #{name} TIMEOUT: #{result_timeout} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
|
58
81
|
begin
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
82
|
+
Timeout::timeout(@result_timeout) do
|
83
|
+
if @process.class == Proc
|
84
|
+
debug " - #{@map_or_reduce} using Proc"
|
85
|
+
@process.call @data
|
86
|
+
elsif @map_or_reduce == :master
|
87
|
+
debug " - as master"
|
88
|
+
job = Skynet::Job.new(@process)
|
89
|
+
job.run
|
90
|
+
elsif @process.class == String
|
91
|
+
debug " - #{@map_or_reduce} using class #{@process}"
|
92
|
+
@process.constantize.send(@map_or_reduce,@data)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue Timeout::Error => e
|
96
|
+
# ==========
|
97
|
+
# = XXX NEWSFEED HACK
|
98
|
+
# = I'm printing the data hash, but that hash has all this shit added to it after runing through newsfeed.
|
99
|
+
# = It's actually nice to be able to see what was added, but sometimes its too much data.
|
100
|
+
# = Though the handy part will be adding instrumentation to the event_hash and seeing it onyl during a timeout.
|
101
|
+
# ==========
|
102
|
+
|
103
|
+
if @data.is_a?(Array) and @data.first.is_a?(Hash)
|
104
|
+
@data.each {|h|h.delete(:event_object)}
|
69
105
|
end
|
106
|
+
raise TimeoutError.new("TASK TIMED OUT! #{name} IT:[#{iteration}] timeout:#{@result_timeout} #{e.inspect} DATA: #{@data.inspect} #{e.backtrace.join("\n")}")
|
107
|
+
|
108
|
+
# ==========
|
109
|
+
# = XXX This rescue block is probably not necessary. Just for debugging for now. =
|
110
|
+
# ==========
|
70
111
|
rescue Exception => e
|
71
112
|
error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
|
113
|
+
raise e
|
72
114
|
end
|
73
115
|
end
|
74
116
|
|
data/lib/skynet/skynet_worker.rb
CHANGED
@@ -5,35 +5,32 @@ class Skynet
|
|
5
5
|
include Skynet::GuidGenerator
|
6
6
|
|
7
7
|
RETRY_TIME = 2
|
8
|
-
|
9
|
-
|
8
|
+
Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY] ||= 30
|
9
|
+
|
10
|
+
Skynet::CONFIG[:WORKER_MAX_MEMORY] ||= 500
|
11
|
+
|
10
12
|
MEMORY_CHECK_DELAY = 30
|
11
13
|
MANAGER_PING_INTERVAL = 60
|
12
14
|
|
13
15
|
attr_accessor :message,:task, :mq, :processed
|
14
|
-
attr_reader :worker_id, :worker_info, :worker_type
|
16
|
+
attr_reader :worker_id, :worker_info, :worker_type, :queue_id
|
15
17
|
|
16
|
-
class Error
|
17
|
-
end
|
18
|
-
|
19
|
-
class
|
20
|
-
end
|
21
|
-
|
22
|
-
class ConnectionFailure < Skynet::Error
|
23
|
-
end
|
24
|
-
|
25
|
-
class NoManagerError < Skynet::Error
|
26
|
-
end
|
18
|
+
class Error < StandardError; end
|
19
|
+
class RespawnWorker < Skynet::Error; end
|
20
|
+
class ConnectionFailure < Skynet::Error; end
|
21
|
+
class NoManagerError < Skynet::Error; end
|
27
22
|
|
28
23
|
def self.debug_class_desc
|
29
24
|
"WORKER-#{$$}"
|
30
25
|
end
|
31
26
|
|
32
|
-
def initialize(worker_type
|
27
|
+
def initialize(worker_type, options = {})
|
33
28
|
@worker_id = get_unique_id(1).to_i
|
34
|
-
@mq = Skynet::MessageQueue.new
|
35
29
|
@worker_type = worker_type.to_sym
|
30
|
+
@queue_id = options[:queue_id] || 0
|
36
31
|
@processed = 0
|
32
|
+
@mq = Skynet::MessageQueue.new
|
33
|
+
|
37
34
|
debug "THIS WORKER TAKES #{worker_type}"
|
38
35
|
|
39
36
|
@worker_info = {
|
@@ -41,8 +38,9 @@ class Skynet
|
|
41
38
|
:process_id => process_id,
|
42
39
|
:worker_type => payload_type,
|
43
40
|
:worker_id => worker_id,
|
44
|
-
:version => mq.get_worker_version
|
45
|
-
}
|
41
|
+
:version => mq.get_worker_version,
|
42
|
+
}
|
43
|
+
@worker_info.merge!(options)
|
46
44
|
end
|
47
45
|
|
48
46
|
def process_id
|
@@ -68,14 +66,14 @@ class Skynet
|
|
68
66
|
@curver = 1
|
69
67
|
end
|
70
68
|
else
|
71
|
-
if Time.now < (@verchecktime +
|
69
|
+
if Time.now < (@verchecktime + Skynet::CONFIG[:WORKER_VERSION_CHECK_DELAY])
|
72
70
|
return false
|
73
71
|
else
|
74
72
|
@verchecktime = Time.now
|
75
73
|
begin
|
76
74
|
newver = mq.get_worker_version
|
77
75
|
# debug "CURVER #{@curver} NEWVER: #{newver}"
|
78
|
-
if newver != @curver
|
76
|
+
if newver != @curver and not mq.version_active?(@curver, queue_id)
|
79
77
|
info "RESTARTING WORKER ON PID #{$$}"
|
80
78
|
return true
|
81
79
|
end
|
@@ -138,42 +136,48 @@ class Skynet
|
|
138
136
|
return nil if worker_type == :any
|
139
137
|
return worker_type
|
140
138
|
end
|
139
|
+
|
140
|
+
def interrupt
|
141
|
+
if @die
|
142
|
+
exit
|
143
|
+
else
|
144
|
+
@die = true
|
145
|
+
end
|
146
|
+
end
|
141
147
|
|
142
148
|
def start
|
143
149
|
exceptions = 0
|
144
|
-
conerror
|
145
|
-
@curver
|
150
|
+
conerror = 0
|
151
|
+
@curver = nil
|
152
|
+
|
146
153
|
# setup signal handlers for manager
|
147
154
|
Signal.trap("HUP") { @respawn = true }
|
148
|
-
Signal.trap("TERM")
|
149
|
-
|
150
|
-
|
151
|
-
else
|
152
|
-
@die = true
|
153
|
-
end
|
154
|
-
end
|
155
|
-
Signal.trap("INT") { @die = true }
|
156
|
-
|
155
|
+
Signal.trap("TERM") { interrupt }
|
156
|
+
Signal.trap("INT") { @die = true }
|
157
|
+
|
157
158
|
raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
|
158
159
|
|
159
|
-
info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
|
160
|
+
info "STARTING WORKER @ VER #{@curver} (#{@worker_type}) QUEUE_ID: #{queue_id}"
|
160
161
|
|
161
162
|
notify_worker_started
|
162
163
|
|
163
164
|
message = nil
|
164
|
-
task
|
165
|
+
task = nil
|
165
166
|
|
166
167
|
loop do
|
167
168
|
message = nil
|
168
169
|
begin
|
170
|
+
if Skynet::CONFIG[:WORKER_MAX_PROCESSED] and Skynet::CONFIG[:WORKER_MAX_PROCESSED] > 0 and @processed >= Skynet::CONFIG[:WORKER_MAX_PROCESSED]
|
171
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{get_memory_size} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
172
|
+
end
|
169
173
|
if @die
|
170
174
|
exit
|
171
175
|
elsif @respawn
|
172
|
-
raise Skynet::Worker::RespawnWorker.new
|
176
|
+
raise Skynet::Worker::RespawnWorker.new()
|
173
177
|
end
|
174
178
|
|
175
179
|
if local_mem = max_memory_reached?
|
176
|
-
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{
|
180
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{Skynet::CONFIG[:WORKER_MAX_MEMORY]}")
|
177
181
|
end
|
178
182
|
|
179
183
|
if conerror > 0
|
@@ -186,7 +190,7 @@ class Skynet
|
|
186
190
|
#
|
187
191
|
# debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
|
188
192
|
# message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
|
189
|
-
message = mq.take_next_task(@curver,0.00001,payload_type)
|
193
|
+
message = mq.take_next_task(@curver, 0.00001, payload_type, queue_id)
|
190
194
|
|
191
195
|
next unless message.respond_to?(:payload)
|
192
196
|
|
@@ -210,29 +214,34 @@ class Skynet
|
|
210
214
|
:name => message.name,
|
211
215
|
:map_or_reduce => task.map_or_reduce
|
212
216
|
})
|
213
|
-
result = task.run
|
217
|
+
result = task.run(message.iteration)
|
214
218
|
|
215
219
|
info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
216
220
|
debug "STEP 5.1 RESULT DATA:", result
|
217
221
|
|
218
|
-
## XXX need better result timeout
|
219
222
|
result_message = mq.write_result(message,result,task.result_timeout)
|
220
223
|
info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
221
224
|
# debug "STEP 6.1 RESULT_MESSAGE:", result_message
|
222
225
|
notify_task_complete
|
226
|
+
|
227
|
+
rescue Skynet::Task::TimeoutError => e
|
228
|
+
error "Task timed out while executing #{e.inspect} #{e.backtrace.join("\n")}"
|
229
|
+
next
|
230
|
+
|
223
231
|
rescue Skynet::Worker::RespawnWorker => e
|
224
|
-
info "Respawning and taking worker status"
|
232
|
+
info "Respawning and taking worker status #{e.message}"
|
225
233
|
notify_worker_stop
|
226
234
|
raise e
|
235
|
+
|
227
236
|
rescue Skynet::RequestExpiredError => e
|
228
|
-
# debug "request expired"
|
229
237
|
if new_version_respawn?
|
230
238
|
notify_worker_stop
|
231
|
-
|
239
|
+
manager = DRbObject.new(nil, Skynet::CONFIG[:SKYNET_LOCAL_MANAGER_URL])
|
240
|
+
manager.restart_worker($$) if manager
|
232
241
|
end
|
233
242
|
sleep 1
|
234
|
-
# debug "WORKER [#{$$}] LOOPING AGAIN"
|
235
243
|
next
|
244
|
+
|
236
245
|
rescue Skynet::ConnectionError, DRb::DRbConnError => e
|
237
246
|
conerror += 1
|
238
247
|
retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
|
@@ -245,6 +254,7 @@ class Skynet
|
|
245
254
|
raise e
|
246
255
|
end
|
247
256
|
next
|
257
|
+
|
248
258
|
rescue NoManagerError => e
|
249
259
|
fatal e.message
|
250
260
|
break
|
@@ -253,7 +263,9 @@ class Skynet
|
|
253
263
|
notify_worker_stop
|
254
264
|
break
|
255
265
|
rescue Exception => e
|
256
|
-
error "#{e.inspect} #{e.backtrace.join("\n")}"
|
266
|
+
error "skynet_worker.rb:#{__LINE__} #{e.inspect} #{e.backtrace.join("\n")}"
|
267
|
+
exceptions += 1
|
268
|
+
break if exceptions > 1000
|
257
269
|
#mq.take(@next_worker_message.task_template,0.0005) if message
|
258
270
|
if message
|
259
271
|
mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
|
@@ -279,7 +291,7 @@ class Skynet
|
|
279
291
|
elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
|
280
292
|
@memchecktime = Time.now
|
281
293
|
local_mem = get_memory_size.to_i
|
282
|
-
return local_mem if local_mem >
|
294
|
+
return local_mem if local_mem > Skynet::CONFIG[:WORKER_MAX_MEMORY]
|
283
295
|
else
|
284
296
|
false
|
285
297
|
end
|
@@ -346,9 +358,22 @@ class Skynet
|
|
346
358
|
raise Skynet::Error.new("#{v} is not a valid worker_type")
|
347
359
|
end
|
348
360
|
end
|
361
|
+
opt.on('-q', '--queue QUEUE_NAME', 'Which queue should these workers use (default "default").') do |v|
|
362
|
+
options[:queue] = v
|
363
|
+
end
|
364
|
+
opt.on('-i', '--queue_id queue_id', 'Which queue should these workers use (default 0).') do |v|
|
365
|
+
options[:queue_id] = v.to_i
|
366
|
+
end
|
349
367
|
opt.parse!(ARGV)
|
350
368
|
end
|
351
369
|
|
370
|
+
if options[:queue]
|
371
|
+
if options[:queue_id]
|
372
|
+
raise Skynet::Error.new("You may either provide a queue_id or a queue, but not both.")
|
373
|
+
end
|
374
|
+
options[:queue_id] = config.queue_id_by_name(options[:queue])
|
375
|
+
end
|
376
|
+
|
352
377
|
options[:required_libs].each do |adlib|
|
353
378
|
begin
|
354
379
|
require adlib
|
@@ -358,22 +383,19 @@ class Skynet
|
|
358
383
|
end
|
359
384
|
end
|
360
385
|
|
361
|
-
|
362
|
-
|
363
|
-
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
|
386
|
+
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}. QUEUE: #{Skynet::Config.new.queue_name_by_id(options[:queue_id])}"
|
364
387
|
|
365
388
|
begin
|
366
|
-
worker = Skynet::Worker.new(options[:worker_type])
|
389
|
+
worker = Skynet::Worker.new(options[:worker_type], options)
|
367
390
|
worker.start
|
368
391
|
rescue Skynet::Worker::NoManagerError => e
|
369
392
|
fatal e.message
|
370
393
|
exit
|
371
394
|
rescue Skynet::Worker::RespawnWorker => e
|
372
|
-
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
|
373
|
-
cmd = "
|
395
|
+
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING #{e.message}"
|
396
|
+
cmd = "ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]} --queue_id=#{options[:queue_id]}"
|
374
397
|
cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
|
375
398
|
pid = fork_and_exec(cmd)
|
376
|
-
warn "parent_pid: #{$$}, child_pid: #{pid}"
|
377
399
|
exit
|
378
400
|
rescue SystemExit
|
379
401
|
info "WORKER #{$$} EXITING GRACEFULLY"
|