job_dispatch 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/job_dispatch/broker.rb +36 -13
- data/lib/job_dispatch/identity.rb +1 -1
- data/lib/job_dispatch/version.rb +1 -1
- data/lib/job_dispatch/worker.rb +16 -8
- data/lib/job_dispatch/worker/item.rb +5 -1
- data/spec/job_dispatch/broker_spec.rb +62 -17
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e00ac3ea419f26f9e3d8ae4a52a73c267f306ce7
|
4
|
+
data.tar.gz: 2f2941010e8775f2e6ec0759952438fd899f17a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03c7b5606808c97b37c0c3903ec9058db29f8f96741a5b4b42a2220e459017249b9b48cb6b701c8fbd75392e0ba212bee8dc38ff29257512fd119e39b3739671
|
7
|
+
data.tar.gz: 414171d3052dcfdc29f5c8c30f3216aeb755d4bdf96c205452016241c1ac493bd859c374d29c24d1287709092bc16e0d04cda87b020a13b9f4228bf9f8bf13a7
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# job_dispatch change log
|
2
|
+
|
3
|
+
## Version 0.0.2
|
4
|
+
|
5
|
+
* Broker sends an idle command to a worker immediately upon connect. This helps recover from a case where a worker
|
6
|
+
has been running for some time before the dispatcher starts. (Particularly with a Windows worker using NetMQ where
|
7
|
+
a closed socket may still send messages.)
|
8
|
+
* Improve Ruby worker serialisation of exceptions into job result.
|
9
|
+
|
10
|
+
## Version 0.0.1
|
11
|
+
|
12
|
+
* First release
|
data/lib/job_dispatch/broker.rb
CHANGED
@@ -14,7 +14,7 @@ module JobDispatch
|
|
14
14
|
POLL_TIME = 5.123
|
15
15
|
STOP_SIGNALS = %w[INT TERM KILL]
|
16
16
|
|
17
|
-
IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name
|
17
|
+
IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name, :idle_count
|
18
18
|
|
19
19
|
|
20
20
|
# any object that will respond to `next_job_for_queue`, which should return a job, or nil if there
|
@@ -67,6 +67,7 @@ module JobDispatch
|
|
67
67
|
puts "JobDispatch::Broker running in process #{Process.pid}"
|
68
68
|
JobDispatch.logger.info("JobDispatch::Broker running in process #{Process.pid}")
|
69
69
|
@running = true
|
70
|
+
@running_thread = Thread.current
|
70
71
|
poller = ZMQ::Poller.new
|
71
72
|
|
72
73
|
@socket = JobDispatch::Broker::Socket.new(@worker_bind_address)
|
@@ -99,10 +100,12 @@ module JobDispatch
|
|
99
100
|
puts "JobDispatch::Broker shutting down, due to #{signal_name} signal"
|
100
101
|
@running = false
|
101
102
|
@status = "SHUTDOWN"
|
102
|
-
sleep 1
|
103
|
+
# sleep 1
|
103
104
|
process_quit
|
104
|
-
sleep 1
|
105
|
+
sleep 1 # let ZMQ send the messages before we close the socket.
|
105
106
|
end
|
107
|
+
rescue StandardError => e
|
108
|
+
JobDispatch.logger.error "Unexpected exception: #{e}"
|
106
109
|
end
|
107
110
|
end
|
108
111
|
ensure
|
@@ -112,6 +115,14 @@ module JobDispatch
|
|
112
115
|
end
|
113
116
|
|
114
117
|
|
118
|
+
def stop
|
119
|
+
if running?
|
120
|
+
@running = false
|
121
|
+
@running_thread.raise SignalException.new("TERM") unless Thread.current == @running_thread
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
115
126
|
def process_messages(poller)
|
116
127
|
# TODO: calculate the amount of time to sleep to wake up such that a scheduled event happens as close
|
117
128
|
# as possible to the time it was supposed to happen. This could additionally mean that the POLL_TIME
|
@@ -158,7 +169,7 @@ module JobDispatch
|
|
158
169
|
case command.command
|
159
170
|
when "ready"
|
160
171
|
# add to list of workers who are ready for work
|
161
|
-
add_available_worker(command)
|
172
|
+
add_available_worker(command, 0)
|
162
173
|
|
163
174
|
# don't reply, leaves worker blocked waiting for a job to do.
|
164
175
|
reply = nil
|
@@ -172,7 +183,7 @@ module JobDispatch
|
|
172
183
|
|
173
184
|
if command.worker_ready?
|
174
185
|
# a completed job also means the worker is available for more work.
|
175
|
-
add_available_worker(command)
|
186
|
+
add_available_worker(command, 1)
|
176
187
|
reply = nil
|
177
188
|
else
|
178
189
|
reply.parameters = {:status => 'thanks'}
|
@@ -235,9 +246,10 @@ module JobDispatch
|
|
235
246
|
def send_idle_commands(idle_time=nil)
|
236
247
|
idle_time ||= Time.now
|
237
248
|
idle_time -= WORKER_IDLE_TIME
|
238
|
-
idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time }
|
249
|
+
idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time || worker.idle_count == 0 }
|
239
250
|
idle_workers.each do |worker_id, worker|
|
240
251
|
send_job_to_worker(InternalJob.new('idle', worker.queue), worker_id)
|
252
|
+
worker.idle_count += 1
|
241
253
|
end
|
242
254
|
end
|
243
255
|
|
@@ -268,10 +280,18 @@ module JobDispatch
|
|
268
280
|
|
269
281
|
|
270
282
|
# add a worker to the list of workers available for jobs.
|
271
|
-
def add_available_worker(command)
|
283
|
+
def add_available_worker(command, idle_count=0)
|
272
284
|
JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
285
|
+
|
286
|
+
# immediately remove any existing workers with the given name. If a worker has closed its connection and opened
|
287
|
+
# a new one (perhaps it started a long time before the broker did)
|
288
|
+
|
289
|
+
if command.worker_name # this is only sent on initial requests.
|
290
|
+
remove_worker_named(command.worker_name)
|
291
|
+
end
|
292
|
+
|
273
293
|
queue = command.queue
|
274
|
-
idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name)
|
294
|
+
idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name, idle_count)
|
275
295
|
workers_waiting_for_jobs[command.worker_id] = idle_worker
|
276
296
|
queues[queue] << command.worker_id
|
277
297
|
if command.worker_name # this is only sent on initial requests.
|
@@ -282,20 +302,23 @@ module JobDispatch
|
|
282
302
|
# remove a worker from available list. Worker is shutting down or indicating that it will no longer
|
283
303
|
# be available for doing work.
|
284
304
|
def remove_available_worker(command)
|
285
|
-
JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
305
|
+
JobDispatch.logger.info("JobDispatch::Broker Removing Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
286
306
|
|
287
307
|
# the goodbye command is sent by another socket connection, so the worker_id (socket identity) will
|
288
|
-
# not match the socket actually waiting for work.
|
308
|
+
# not match the socket actually waiting for work. Remove the worker by its name, not socket identity
|
309
|
+
|
310
|
+
remove_worker_named(command.worker_name)
|
311
|
+
{status: "see ya later"}
|
312
|
+
end
|
289
313
|
|
290
|
-
|
314
|
+
def remove_worker_named(worker_name)
|
315
|
+
keys = worker_names.select { |id, name| name == worker_name }.keys
|
291
316
|
keys.each do |worker_id|
|
292
317
|
workers_waiting_for_reply.delete(worker_id) # socket will be closing, no need to send it anything.
|
293
318
|
worker = workers_waiting_for_jobs.delete(worker_id)
|
294
319
|
queues[worker.queue].delete(worker_id) if worker
|
295
320
|
worker_names.delete(worker_id)
|
296
321
|
end
|
297
|
-
|
298
|
-
{status: "see ya later"}
|
299
322
|
end
|
300
323
|
|
301
324
|
def dispatch_jobs_to_workers
|
data/lib/job_dispatch/version.rb
CHANGED
data/lib/job_dispatch/worker.rb
CHANGED
@@ -8,6 +8,9 @@ module JobDispatch
|
|
8
8
|
#
|
9
9
|
class Worker
|
10
10
|
|
11
|
+
class StopError < StandardError
|
12
|
+
end
|
13
|
+
|
11
14
|
IDLE_TIME = 3
|
12
15
|
IDLE_COUNT = 10
|
13
16
|
|
@@ -38,11 +41,12 @@ module JobDispatch
|
|
38
41
|
|
39
42
|
def run
|
40
43
|
@running = true
|
44
|
+
@running_thread = Thread.current
|
41
45
|
while running?
|
42
|
-
puts "connecting"
|
46
|
+
# puts "connecting"
|
43
47
|
connect
|
44
|
-
puts "asking for work"
|
45
|
-
ask_for_work
|
48
|
+
# puts "asking for work"
|
49
|
+
ask_for_work rescue StopError
|
46
50
|
|
47
51
|
# if we are idle for too many times, the broker has restarted or gone away, and we will be stuck in receive
|
48
52
|
# state, so we need to close the socket and make a new one to ask for work again.
|
@@ -60,13 +64,14 @@ module JobDispatch
|
|
60
64
|
idle
|
61
65
|
idle_count += 1
|
62
66
|
end
|
63
|
-
rescue Interrupt
|
64
|
-
|
65
|
-
JobDispatch.logger.info("Worker #{}")
|
67
|
+
rescue Interrupt, StopError
|
68
|
+
JobDispatch.logger.info("Worker stopping.")
|
66
69
|
stop
|
67
70
|
disconnect
|
71
|
+
# Tell the broker goodbye so that we are removed from the idle worker list and no more jobs will come here.
|
68
72
|
connect
|
69
73
|
send_goodbye
|
74
|
+
sleep(0.1) # let the socket send the message before we disconnect...
|
70
75
|
end
|
71
76
|
end
|
72
77
|
disconnect
|
@@ -86,7 +91,10 @@ module JobDispatch
|
|
86
91
|
end
|
87
92
|
|
88
93
|
def stop
|
89
|
-
|
94
|
+
if running?
|
95
|
+
@running_thread.raise StopError unless @running_thread == Thread.current
|
96
|
+
@running = false
|
97
|
+
end
|
90
98
|
end
|
91
99
|
|
92
100
|
def self.touch(timeout=nil)
|
@@ -111,7 +119,7 @@ module JobDispatch
|
|
111
119
|
end
|
112
120
|
|
113
121
|
def idle
|
114
|
-
puts "waiting for job to do…"
|
122
|
+
# puts "waiting for job to do…"
|
115
123
|
end
|
116
124
|
end
|
117
125
|
end
|
@@ -30,7 +30,11 @@ module JobDispatch
|
|
30
30
|
@result = @klass.__send__(method.to_sym, *params)
|
31
31
|
@status = :success
|
32
32
|
rescue StandardError => ex
|
33
|
-
@result =
|
33
|
+
@result = {
|
34
|
+
class: ex.class.to_s,
|
35
|
+
message: ex.to_s,
|
36
|
+
backtrace: ex.backtrace,
|
37
|
+
}
|
34
38
|
@status = :error
|
35
39
|
ensure
|
36
40
|
Thread.current["JobDispatch::Worker.job_id"] = nil
|
@@ -81,7 +81,6 @@ describe JobDispatch::Broker do
|
|
81
81
|
subject.workers_waiting_for_reply << worker_id3
|
82
82
|
subject.process_command(Command.new(worker_id3, {command: 'ready', worker_name: 'test worker 2'}))
|
83
83
|
|
84
|
-
|
85
84
|
@job = FactoryGirl.build :job
|
86
85
|
@socket = double('Broker::Socket', :send_command => nil)
|
87
86
|
subject.stub(:socket => @socket)
|
@@ -166,6 +165,14 @@ describe JobDispatch::Broker do
|
|
166
165
|
@result = subject.process_command(command)
|
167
166
|
expect(subject.worker_names[worker_id]).to eq('ruby worker')
|
168
167
|
end
|
168
|
+
|
169
|
+
let(:command2) { Command.new(worker_id2, {command: 'ready', queue: 'example', worker_name: 'ruby worker'}) }
|
170
|
+
|
171
|
+
it "duplicate ready has only 1 worker" do
|
172
|
+
@result = subject.process_command(command)
|
173
|
+
@result = subject.process_command(command2)
|
174
|
+
expect(subject.queues[:example].count).to eq(1)
|
175
|
+
end
|
169
176
|
end
|
170
177
|
|
171
178
|
|
@@ -405,34 +412,72 @@ describe JobDispatch::Broker do
|
|
405
412
|
command = Command.new(worker_id2, {command: 'ready', queue: 'example'})
|
406
413
|
@result = subject.process_command(command)
|
407
414
|
end
|
408
|
-
|
409
415
|
end
|
410
416
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
expect(cmd.parameters[:command]).to eq('idle')
|
417
|
+
context "already done an idle" do
|
418
|
+
before do
|
419
|
+
JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 1)
|
415
420
|
end
|
416
421
|
|
417
|
-
|
418
|
-
|
422
|
+
it "that have waited long enough receive idle commands" do
|
423
|
+
@socket.should_receive(:send_command) do |cmd|
|
424
|
+
expect(cmd.worker_id).to eq(worker_id)
|
425
|
+
expect(cmd.parameters[:command]).to eq('idle')
|
426
|
+
end
|
427
|
+
|
428
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
429
|
+
subject.send_idle_commands
|
430
|
+
end
|
431
|
+
|
432
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id)
|
433
|
+
expect(subject.queues[:example]).not_to include(worker_id)
|
419
434
|
end
|
420
435
|
|
421
|
-
|
422
|
-
|
436
|
+
it "that have not waited long enough are still waiting" do
|
437
|
+
@socket.should_receive(:send_command) do |cmd|
|
438
|
+
expect(cmd.worker_id).not_to eq(worker_id2)
|
439
|
+
end
|
440
|
+
|
441
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
442
|
+
subject.send_idle_commands
|
443
|
+
end
|
444
|
+
|
445
|
+
expect(subject.workers_waiting_for_reply).to include(worker_id2)
|
446
|
+
expect(subject.queues[:example]).to include(worker_id2)
|
447
|
+
end
|
423
448
|
end
|
424
449
|
|
425
|
-
|
426
|
-
|
427
|
-
|
450
|
+
context "have not done an idle" do
|
451
|
+
before do
|
452
|
+
JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 0)
|
428
453
|
end
|
429
454
|
|
430
|
-
|
431
|
-
|
455
|
+
it "that have waited long enough receive idle commands" do
|
456
|
+
@socket.should_receive(:send_command) do |cmd|
|
457
|
+
expect(cmd.worker_id).to eq(worker_id)
|
458
|
+
expect(cmd.parameters[:command]).to eq('idle')
|
459
|
+
end
|
460
|
+
|
461
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
462
|
+
subject.send_idle_commands
|
463
|
+
end
|
464
|
+
|
465
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id)
|
466
|
+
expect(subject.queues[:example]).not_to include(worker_id)
|
432
467
|
end
|
433
468
|
|
434
|
-
|
435
|
-
|
469
|
+
it "that have not waited long enough are still waiting" do
|
470
|
+
@socket.should_receive(:send_command) do |cmd|
|
471
|
+
expect(cmd.worker_id).not_to eq(worker_id2)
|
472
|
+
end
|
473
|
+
|
474
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
475
|
+
subject.send_idle_commands
|
476
|
+
end
|
477
|
+
|
478
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id2)
|
479
|
+
expect(subject.queues[:example]).not_to include(worker_id2)
|
480
|
+
end
|
436
481
|
end
|
437
482
|
end
|
438
483
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job_dispatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Connolly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbczmq
|
@@ -177,6 +177,7 @@ files:
|
|
177
177
|
- ".gitignore"
|
178
178
|
- ".rspec"
|
179
179
|
- ".travis.yml"
|
180
|
+
- CHANGELOG.md
|
180
181
|
- Gemfile
|
181
182
|
- Guardfile
|
182
183
|
- LICENSE.txt
|