skynet 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +65 -0
- data/README.txt +100 -0
- data/Rakefile +4 -0
- data/app_generators/skynet_install/USAGE +5 -0
- data/app_generators/skynet_install/skynet_install_generator.rb +84 -0
- data/app_generators/skynet_install/templates/migration.rb +60 -0
- data/app_generators/skynet_install/templates/skynet +33 -0
- data/app_generators/skynet_install/templates/skynet_console +16 -0
- data/bin/skynet +20 -0
- data/bin/skynet_console +9 -0
- data/bin/skynet_install +12 -0
- data/bin/skynet_tuplespace_server +53 -0
- data/config/hoe.rb +74 -0
- data/config/requirements.rb +17 -0
- data/lib/skynet.rb +34 -0
- data/lib/skynet/mapreduce_test.rb +25 -0
- data/lib/skynet/message_queue_adapters/message_queue_adapter.rb +70 -0
- data/lib/skynet/message_queue_adapters/mysql.rb +573 -0
- data/lib/skynet/message_queue_adapters/tuple_space.rb +327 -0
- data/lib/skynet/skynet_active_record_extensions.rb +237 -0
- data/lib/skynet/skynet_config.rb +59 -0
- data/lib/skynet/skynet_console.rb +34 -0
- data/lib/skynet/skynet_console_helper.rb +59 -0
- data/lib/skynet/skynet_debugger.rb +84 -0
- data/lib/skynet/skynet_guid_generator.rb +68 -0
- data/lib/skynet/skynet_job.rb +607 -0
- data/lib/skynet/skynet_launcher.rb +10 -0
- data/lib/skynet/skynet_logger.rb +52 -0
- data/lib/skynet/skynet_manager.rb +486 -0
- data/lib/skynet/skynet_message.rb +366 -0
- data/lib/skynet/skynet_message_queue.rb +100 -0
- data/lib/skynet/skynet_ruby_extensions.rb +36 -0
- data/lib/skynet/skynet_task.rb +76 -0
- data/lib/skynet/skynet_tuplespace_server.rb +82 -0
- data/lib/skynet/skynet_worker.rb +395 -0
- data/lib/skynet/version.rb +9 -0
- data/log/debug.log +0 -0
- data/log/skynet.log +29 -0
- data/log/skynet_tuplespace_server.log +7 -0
- data/log/skynet_worker.pid +1 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +74 -0
- data/setup.rb +1585 -0
- data/sometest.rb +23 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/website.rake +17 -0
- data/test/all_models_test.rb +139 -0
- data/test/mysql_message_queue_adaptor_test.rb +199 -0
- data/test/skynet_manager_test.rb +107 -0
- data/test/skynet_message_test.rb +42 -0
- data/test/test_generator_helper.rb +20 -0
- data/test/test_helper.rb +2 -0
- data/test/test_skynet.rb +11 -0
- data/test/test_skynet_install_generator.rb +53 -0
- data/test/tuplespace_message_queue_test.rb +179 -0
- data/tmtags +1242 -0
- data/website/index.html +93 -0
- data/website/index.txt +39 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +129 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module Enumerable
|
2
|
+
def mapreduce(klass=nil,options={},&block)
|
3
|
+
data = []
|
4
|
+
if self.is_a?(Hash)
|
5
|
+
self.each {|k,v| data << {k => v}}
|
6
|
+
else
|
7
|
+
data = self
|
8
|
+
end
|
9
|
+
jobopts = {
|
10
|
+
:map_tasks => 20000,
|
11
|
+
:map_data => data,
|
12
|
+
:name => "#{klass} Enumerable MASTER",
|
13
|
+
:map_name => "#{klass} Enumerable MAP",
|
14
|
+
:reduce_name => "#{klass} Enumerable REDUCE",
|
15
|
+
:map_timeout => 3600,
|
16
|
+
:reduce_timeout => 3600,
|
17
|
+
:master_timeout => 3600,
|
18
|
+
:master_result_timeout => 3600,
|
19
|
+
:async => false
|
20
|
+
}
|
21
|
+
|
22
|
+
jobopts[:map_reduce_class] = klass.to_s if klass
|
23
|
+
|
24
|
+
options.each { |k,v| jobopts[k] = v }
|
25
|
+
if block_given?
|
26
|
+
jobopts[:map] = block
|
27
|
+
end
|
28
|
+
|
29
|
+
if block_given? or not jobopts[:async]
|
30
|
+
job = Skynet::Job.new(jobopts)
|
31
|
+
else
|
32
|
+
job = Skynet::AsyncJob.new(jobopts)
|
33
|
+
end
|
34
|
+
job.run
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Task
|
3
|
+
|
4
|
+
include SkynetDebugger
|
5
|
+
|
6
|
+
# require 'ostruct'
|
7
|
+
|
8
|
+
class ConstructorError < StandardError
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :data, :process, :result, :map_or_reduce
|
12
|
+
attr_accessor :name, :tuple, :result_timeout
|
13
|
+
|
14
|
+
@@log = nil
|
15
|
+
|
16
|
+
def self.debug_class_desc
|
17
|
+
"TASK"
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(opts = {})
|
21
|
+
unless opts[:task_id] and opts[:process] and opts[:map_or_reduce]
|
22
|
+
raise ConstructorError.new("Must provide task_id, process and map_or_reduce")
|
23
|
+
end
|
24
|
+
@marshalable = true
|
25
|
+
@task_id = opts[:task_id].to_i
|
26
|
+
@data = opts[:data]
|
27
|
+
self.process = opts[:process]
|
28
|
+
@name = opts[:name]
|
29
|
+
@map_or_reduce = opts[:map_or_reduce]
|
30
|
+
@result_timeout = opts[:result_timeout]
|
31
|
+
end
|
32
|
+
|
33
|
+
def process=(process)
|
34
|
+
if process.is_a?(Proc)
|
35
|
+
@marshalable = false
|
36
|
+
end
|
37
|
+
@process = process
|
38
|
+
end
|
39
|
+
|
40
|
+
def can_marshal?
|
41
|
+
@marshalable
|
42
|
+
end
|
43
|
+
|
44
|
+
def task_or_master
|
45
|
+
if @map_or_reduce == :master
|
46
|
+
@map_or_reduce
|
47
|
+
else
|
48
|
+
:task
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def task_id
|
53
|
+
@task_id.to_i
|
54
|
+
end
|
55
|
+
|
56
|
+
def run
|
57
|
+
debug "running task #{name} task_id:#{task_id} MorR:#{map_or_reduce} PROCESS CLASS: #{@process.class}"
|
58
|
+
begin
|
59
|
+
if @process.class == Proc
|
60
|
+
debug " - #{@map_or_reduce} using Proc"
|
61
|
+
@process.call @data
|
62
|
+
elsif @map_or_reduce == :master
|
63
|
+
debug " - as master"
|
64
|
+
job = Skynet::Job.new(@process)
|
65
|
+
job.run
|
66
|
+
elsif @process.class == String
|
67
|
+
debug " - #{@map_or_reduce} using class #{@process}"
|
68
|
+
@process.constantize.send(@map_or_reduce,@data)
|
69
|
+
end
|
70
|
+
rescue Exception => e
|
71
|
+
error "Error running task #{e.inspect} TASK:", self, e.backtrace.join("\n")
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end ## END class Task
|
76
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby -w
|
2
|
+
|
3
|
+
# Rinda RingServer
|
4
|
+
|
5
|
+
require 'rinda/ring'
|
6
|
+
require 'rinda/tuplespace'
|
7
|
+
require 'rubygems'
|
8
|
+
require 'logger'
|
9
|
+
require 'optparse'
|
10
|
+
require 'pp'
|
11
|
+
|
12
|
+
class Rinda::TupleSpaceProxy
|
13
|
+
def take(tuple, sec=nil, &block)
|
14
|
+
port = []
|
15
|
+
port.push @ts.move(nil, tuple, sec, &block)
|
16
|
+
port[0]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class Rinda::Tuple
|
21
|
+
|
22
|
+
require 'ostruct'
|
23
|
+
|
24
|
+
def init_with_ary(ary)
|
25
|
+
if ary.instance_of?(DRb::DRbUnknown)
|
26
|
+
begin
|
27
|
+
Marshal.load(ary.buf)
|
28
|
+
rescue Exception => e
|
29
|
+
raise Rinda::RindaError.new("DRb couldn't marshall tuple of type #{ary.name}, it was turned into a DRb::DRbUnknown object.\nMarshal exception #{e.inspect}\nOriginal object:\n\t#{ary.buf}.\n\nStacktrace:\n")
|
30
|
+
end
|
31
|
+
else
|
32
|
+
@tuple = Array.new(ary.size)
|
33
|
+
@tuple.size.times do |i|
|
34
|
+
@tuple[i] = ary[i]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class Skynet
|
41
|
+
class Task
|
42
|
+
end
|
43
|
+
class Message
|
44
|
+
class Payload
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class AsyncJob
|
49
|
+
end
|
50
|
+
|
51
|
+
class Job
|
52
|
+
end
|
53
|
+
|
54
|
+
class Server
|
55
|
+
|
56
|
+
def initialize(options)
|
57
|
+
log = Logger.new(options[:logfile], 'weekly')
|
58
|
+
log.level = Object.module_eval("#{"Logger::" + options[:loglevel].upcase}", __FILE__, __LINE__)
|
59
|
+
log.info "STARTING SKYNET SERVER ON PORT: #{options[:port]} Logging to #{options[:logfile]}"
|
60
|
+
|
61
|
+
# Create a TupleSpace to hold named services, and start running
|
62
|
+
begin
|
63
|
+
ts = Rinda::TupleSpace.new
|
64
|
+
if options[:drburi]
|
65
|
+
DRb.start_service(options[:drburi], ts)
|
66
|
+
else
|
67
|
+
DRb.start_service
|
68
|
+
end
|
69
|
+
tuple = [:name,:TupleSpace, ts, 'Tuple Space']
|
70
|
+
renewer = Rinda::SimpleRenewer.new
|
71
|
+
ring_ts = Rinda::TupleSpace.new
|
72
|
+
ring_ts.write(tuple, renewer)
|
73
|
+
|
74
|
+
server = Rinda::RingServer.new(ring_ts, options[:port])
|
75
|
+
DRb.thread.join
|
76
|
+
rescue Exception, RuntimeError => e
|
77
|
+
log.fatal "Couldn't start Skynet Server #{e.inspect}"
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,395 @@
|
|
1
|
+
class Skynet
|
2
|
+
class Worker
|
3
|
+
|
4
|
+
include SkynetDebugger
|
5
|
+
include Skynet::GuidGenerator
|
6
|
+
|
7
|
+
RETRY_TIME = 2
|
8
|
+
VERSION_CHECK_DELAY = 5
|
9
|
+
MAX_MEMORY = 500
|
10
|
+
MEMORY_CHECK_DELAY = 30
|
11
|
+
MANAGER_PING_INTERVAL = 60
|
12
|
+
|
13
|
+
attr_accessor :message,:task, :mq, :processed
|
14
|
+
attr_reader :worker_id, :worker_info, :worker_type
|
15
|
+
|
16
|
+
class Error < StandardError
|
17
|
+
end
|
18
|
+
|
19
|
+
class RespawnWorker < Skynet::Error
|
20
|
+
end
|
21
|
+
|
22
|
+
class ConnectionFailure < Skynet::Error
|
23
|
+
end
|
24
|
+
|
25
|
+
class NoManagerError < Skynet::Error
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.debug_class_desc
|
29
|
+
"WORKER-#{$$}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize(worker_type=:any)
|
33
|
+
@worker_id = get_unique_id(1).to_i
|
34
|
+
@mq = Skynet::MessageQueue.new
|
35
|
+
@worker_type = worker_type.to_sym
|
36
|
+
@processed = 0
|
37
|
+
debug "THIS WORKER TAKES #{worker_type}"
|
38
|
+
|
39
|
+
@worker_info = {
|
40
|
+
:hostname => hostname,
|
41
|
+
:process_id => process_id,
|
42
|
+
:worker_type => payload_type,
|
43
|
+
:worker_id => worker_id,
|
44
|
+
:version => mq.get_worker_version
|
45
|
+
}
|
46
|
+
end
|
47
|
+
|
48
|
+
def process_id
|
49
|
+
$$
|
50
|
+
end
|
51
|
+
|
52
|
+
def hostname
|
53
|
+
@machine_name ||= Socket.gethostname
|
54
|
+
end
|
55
|
+
|
56
|
+
def version
|
57
|
+
@curver
|
58
|
+
end
|
59
|
+
|
60
|
+
def new_version_respawn?
|
61
|
+
if !@verchecktime
|
62
|
+
@verchecktime = Time.now
|
63
|
+
begin
|
64
|
+
@curver = mq.get_worker_version
|
65
|
+
debug "FINDING INITIAL VER #{@curver}"
|
66
|
+
rescue Skynet::RequestExpiredError => e
|
67
|
+
warn "NO INITIAL VER IN MQ using 1"
|
68
|
+
@curver = 1
|
69
|
+
end
|
70
|
+
else
|
71
|
+
if Time.now < (@verchecktime + VERSION_CHECK_DELAY)
|
72
|
+
return false
|
73
|
+
else
|
74
|
+
@verchecktime = Time.now
|
75
|
+
begin
|
76
|
+
newver = mq.get_worker_version
|
77
|
+
# debug "CURVER #{@curver} NEWVER: #{newver}"
|
78
|
+
if newver != @curver
|
79
|
+
info "RESTARTING WORKER ON PID #{$$}"
|
80
|
+
return true
|
81
|
+
end
|
82
|
+
rescue Skynet::RequestExpiredError => e
|
83
|
+
warn "NO CURRENT WORKER REV IN MQ still using 1"
|
84
|
+
mq.set_worker_version(1)
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
def take_worker_status
|
94
|
+
begin
|
95
|
+
mq.take_worker_status(@worker_info,0.00001)
|
96
|
+
rescue Skynet::RequestExpiredError, Skynet::QueueTimeout => e
|
97
|
+
error "Couldnt take worker status for #{hostname} pid: #{process_id}"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def notify_worker_started
|
102
|
+
mq.write_worker_status(
|
103
|
+
@worker_info.merge({
|
104
|
+
:name => "waiting for #{@worker_type}",
|
105
|
+
:processed => 0,
|
106
|
+
:started_at => Time.now.to_i
|
107
|
+
})
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def notify_task_begun(task)
|
112
|
+
task[:processed] = @processed
|
113
|
+
task[:started_at] = Time.now.to_i
|
114
|
+
mq.write_worker_status(@worker_info.merge(task))
|
115
|
+
end
|
116
|
+
|
117
|
+
def notify_task_complete
|
118
|
+
@processed += 1
|
119
|
+
|
120
|
+
mq.write_worker_status(
|
121
|
+
@worker_info.merge({
|
122
|
+
:task_id => 0,
|
123
|
+
:job_id => 0,
|
124
|
+
:name => "waiting for #{@worker_type}",
|
125
|
+
:processed => @processed,
|
126
|
+
:map_or_reduce => nil,
|
127
|
+
:started_at => Time.now.to_i
|
128
|
+
})
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
def notify_worker_stop
|
133
|
+
info "Worker #{process_id} stopping..."
|
134
|
+
take_worker_status
|
135
|
+
end
|
136
|
+
|
137
|
+
def payload_type
|
138
|
+
return nil if worker_type == :any
|
139
|
+
return worker_type
|
140
|
+
end
|
141
|
+
|
142
|
+
def start
|
143
|
+
exceptions = 0
|
144
|
+
conerror = 0
|
145
|
+
@curver = nil
|
146
|
+
# setup signal handlers for manager
|
147
|
+
Signal.trap("HUP") { @respawn = true }
|
148
|
+
Signal.trap("TERM") do
|
149
|
+
if @die
|
150
|
+
exit
|
151
|
+
else
|
152
|
+
@die = true
|
153
|
+
end
|
154
|
+
end
|
155
|
+
Signal.trap("INT") { @die = true }
|
156
|
+
|
157
|
+
raise Skynet::Worker::RespawnWorker.new if new_version_respawn?
|
158
|
+
|
159
|
+
info "STARTING WORKER @ VER #{@curver} (#{@worker_type})"
|
160
|
+
|
161
|
+
notify_worker_started
|
162
|
+
|
163
|
+
message = nil
|
164
|
+
task = nil
|
165
|
+
|
166
|
+
loop do
|
167
|
+
message = nil
|
168
|
+
begin
|
169
|
+
if @die
|
170
|
+
exit
|
171
|
+
elsif @respawn
|
172
|
+
raise Skynet::Worker::RespawnWorker.new
|
173
|
+
end
|
174
|
+
|
175
|
+
if local_mem = max_memory_reached?
|
176
|
+
raise Skynet::Worker::RespawnWorker.new("WORKER OVER MAX MEM AT: #{local_mem} MAX: #{MAX_MEMORY}")
|
177
|
+
end
|
178
|
+
|
179
|
+
if conerror > 0
|
180
|
+
@mq = Skynet::MessageQueue.new
|
181
|
+
warn "WORKER RECONNECTED AFTER #{conerror} tries"
|
182
|
+
conerror = 0
|
183
|
+
end
|
184
|
+
|
185
|
+
# debug "1 START LOOPSSS at VER #{@curver}"
|
186
|
+
#
|
187
|
+
# debug "LOOK FOR WORK USING TEMPLATE", Skynet::Message.task_template(@curver)
|
188
|
+
# message = Skynet::Message.new(mq.take(Skynet::Message.task_template(@curver),0.00001))
|
189
|
+
message = mq.take_next_task(@curver,0.00001,payload_type)
|
190
|
+
|
191
|
+
next unless message.respond_to?(:payload)
|
192
|
+
|
193
|
+
task = message.payload
|
194
|
+
error "BAD MESSAGE", task unless task.respond_to?(:map_or_reduce)
|
195
|
+
|
196
|
+
info "STEP 2 GOT MESSAGE #{message.name} type:#{task.map_or_reduce}, jobid: #{message.job_id}, taskid:#{message.task_id} it: #{message.iteration}"
|
197
|
+
debug "STEP 2.1 message=", message.to_a
|
198
|
+
# info "STEP 3 GOT TASK taskid: #{task.task_id}"
|
199
|
+
# debug "STEP 3.1 task=", task
|
200
|
+
next unless task
|
201
|
+
# maybe instead of putting a time in the future, it puts the start time and an offset in seconds
|
202
|
+
|
203
|
+
# task.debug "taking task #{task.task_id} name:#{task.name}..."
|
204
|
+
|
205
|
+
info "STEP 4 RUNNING TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
206
|
+
notify_task_begun({
|
207
|
+
:job_id => message.job_id,
|
208
|
+
:task_id => message.task_id,
|
209
|
+
:iteration => message.iteration,
|
210
|
+
:name => message.name,
|
211
|
+
:map_or_reduce => task.map_or_reduce
|
212
|
+
})
|
213
|
+
result = task.run
|
214
|
+
|
215
|
+
info "STEP 5 GOT RESULT FROM RUN TASK #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
216
|
+
debug "STEP 5.1 RESULT DATA:", result
|
217
|
+
|
218
|
+
## XXX need better result timeout
|
219
|
+
result_message = mq.write_result(message,result,task.result_timeout)
|
220
|
+
info "STEP 6 WROTE RESULT MESSAGE #{message.name} jobid: #{message.job_id} taskid: #{task.task_id}"
|
221
|
+
# debug "STEP 6.1 RESULT_MESSAGE:", result_message
|
222
|
+
notify_task_complete
|
223
|
+
rescue Skynet::Worker::RespawnWorker => e
|
224
|
+
info "Respawning and taking worker status"
|
225
|
+
notify_worker_stop
|
226
|
+
raise e
|
227
|
+
rescue Skynet::RequestExpiredError => e
|
228
|
+
# debug "request expired"
|
229
|
+
if new_version_respawn?
|
230
|
+
notify_worker_stop
|
231
|
+
raise Skynet::Worker::RespawnWorker.new
|
232
|
+
end
|
233
|
+
sleep 1
|
234
|
+
# debug "WORKER [#{$$}] LOOPING AGAIN"
|
235
|
+
next
|
236
|
+
rescue Skynet::ConnectionError, DRb::DRbConnError => e
|
237
|
+
conerror += 1
|
238
|
+
retry_time = conerror > 6 ? RETRY_TIME * 3 : RETRY_TIME
|
239
|
+
error "#{e.message}, RETRY #{conerror} in #{retry_time} seconds !!"
|
240
|
+
@mq = nil
|
241
|
+
sleep retry_time
|
242
|
+
if conerror > 20
|
243
|
+
fatal "TOO MANY RECONNECTION EXCEPTIONS #{e.message}"
|
244
|
+
notify_worker_stop
|
245
|
+
raise e
|
246
|
+
end
|
247
|
+
next
|
248
|
+
rescue NoManagerError => e
|
249
|
+
fatal e.message
|
250
|
+
break
|
251
|
+
rescue Interrupt, SystemExit => e
|
252
|
+
warn "Exiting..."
|
253
|
+
notify_worker_stop
|
254
|
+
break
|
255
|
+
rescue Exception => e
|
256
|
+
error "#{e.inspect} #{e.backtrace.join("\n")}"
|
257
|
+
#mq.take(@next_worker_message.task_template,0.0005) if message
|
258
|
+
if message
|
259
|
+
mq.write_error(message,"#{e.inspect} #{e.backtrace.join("\n")}",(task.respond_to?(:result_timeout) ? task.result_timeout : 200))
|
260
|
+
else
|
261
|
+
# what do we do here
|
262
|
+
# mq.write_error(message,"ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
263
|
+
end
|
264
|
+
# mq.write_error("ERROR in WORKER [#{$$}] #{e.inspect} #{e.backtrace.join("\n")}")
|
265
|
+
next
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
@@ok_to_mem_check = false
|
271
|
+
@@lastmem = nil
|
272
|
+
@@memct = 0
|
273
|
+
|
274
|
+
def max_memory_reached?
|
275
|
+
return false unless ok_to_mem_check?
|
276
|
+
if !@memchecktime
|
277
|
+
@memchecktime = Time.now
|
278
|
+
return false
|
279
|
+
elsif Time.now > (@memchecktime + MEMORY_CHECK_DELAY)
|
280
|
+
@memchecktime = Time.now
|
281
|
+
local_mem = get_memory_size.to_i
|
282
|
+
return local_mem if local_mem > MAX_MEMORY
|
283
|
+
else
|
284
|
+
false
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def find_pid_size(file, format=:notpretty)
|
289
|
+
begin
|
290
|
+
open(file).each { |line|
|
291
|
+
if line.index('VmSize')
|
292
|
+
temp = line[7..-5].strip.to_f/1000
|
293
|
+
return BigDecimal(temp.to_s).truncate(5).to_s('F') if format == :pretty
|
294
|
+
return temp
|
295
|
+
end
|
296
|
+
}
|
297
|
+
rescue Exception => e
|
298
|
+
warn "ERROR #{e.inspect}"
|
299
|
+
'0'
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
def get_memory_size
|
304
|
+
find_pid_size("/proc/self/status")
|
305
|
+
end
|
306
|
+
|
307
|
+
def ok_to_mem_check?
|
308
|
+
return true if @@ok_to_mem_check == true
|
309
|
+
return false if @@ok_to_mem_check == :notok
|
310
|
+
if File.exists?('/proc/self/status')
|
311
|
+
@@lastmem ||= get_memory_size.to_i
|
312
|
+
return @@ok_to_mem_check = true
|
313
|
+
else
|
314
|
+
@@ok_to_mem_check = :notok
|
315
|
+
return false
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
|
320
|
+
# kinda like system() but gives me back a pid
|
321
|
+
def self.fork_and_exec(command)
|
322
|
+
sleep 0.01 # remove contention on manager drb object
|
323
|
+
log = Skynet::Logger.get
|
324
|
+
info "executing /bin/sh -c \"#{command}\""
|
325
|
+
pid = fork do
|
326
|
+
exec("/bin/sh -c \"#{command}\"")
|
327
|
+
exit
|
328
|
+
end
|
329
|
+
Process.detach(pid)
|
330
|
+
pid
|
331
|
+
end
|
332
|
+
|
333
|
+
def self.start(options={})
|
334
|
+
options[:worker_type] ||= :any
|
335
|
+
options[:required_libs] ||= []
|
336
|
+
|
337
|
+
OptionParser.new do |opt|
|
338
|
+
opt.banner = "Usage: worker [options]"
|
339
|
+
opt.on('-r', '--required LIBRARY', 'Include the specified libraries') do |v|
|
340
|
+
options[:required_libs] << v
|
341
|
+
end
|
342
|
+
opt.on('-ot', '--worker_type WORKERTYPE', "master, task or any") do |v|
|
343
|
+
if ["any","master","task"].include?(v)
|
344
|
+
options[:worker_type] = v
|
345
|
+
else
|
346
|
+
raise Skynet::Error.new("#{v} is not a valid worker_type")
|
347
|
+
end
|
348
|
+
end
|
349
|
+
opt.parse!(ARGV)
|
350
|
+
end
|
351
|
+
|
352
|
+
options[:required_libs].each do |adlib|
|
353
|
+
begin
|
354
|
+
require adlib
|
355
|
+
rescue MissingSourceFile => e
|
356
|
+
error "The included lib #{adlib} was not found: #{e.inspect}"
|
357
|
+
exit
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
# worker_script_path = (Skynet::CONFIG[:WORKER_SCRIPT_PATH] || File.dirname(__FILE__)) << "/skynet_worker"
|
362
|
+
|
363
|
+
debug "WORKER STARTING WORKER_TYPE?:#{options[:worker_type]}"
|
364
|
+
|
365
|
+
begin
|
366
|
+
worker = Skynet::Worker.new(options[:worker_type])
|
367
|
+
worker.start
|
368
|
+
rescue Skynet::Worker::NoManagerError => e
|
369
|
+
fatal e.message
|
370
|
+
exit
|
371
|
+
rescue Skynet::Worker::RespawnWorker => e
|
372
|
+
warn "WORKER #{$$} SCRIPT CAUGHT RESPAWN. RESTARTING"
|
373
|
+
cmd = "RAILS_ENV=#{RAILS_ENV} ruby #{Skynet::CONFIG[:LAUNCHER_PATH]} --worker_type=#{options[:worker_type]}"
|
374
|
+
cmd << "-r #{options[:required_libs].join(' -r ')}" if options[:required_libs] and not options[:required_libs].empty?
|
375
|
+
pid = fork_and_exec(cmd)
|
376
|
+
warn "parent_pid: #{$$}, child_pid: #{pid}"
|
377
|
+
exit
|
378
|
+
rescue SystemExit
|
379
|
+
info "WORKER #{$$} EXITING GRACEFULLY"
|
380
|
+
rescue Exception => e
|
381
|
+
fatal "WORKER #{$$} DYING #{e.class} #{e.message} #{e.backtrace}"
|
382
|
+
report = ExceptionReport.new(e)
|
383
|
+
report.save
|
384
|
+
end
|
385
|
+
end
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
class ExceptionReport
|
390
|
+
def initialize(*args)
|
391
|
+
end
|
392
|
+
|
393
|
+
def save
|
394
|
+
end
|
395
|
+
end
|