job_dispatch 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/lib/job_dispatch/broker.rb +36 -13
- data/lib/job_dispatch/identity.rb +1 -1
- data/lib/job_dispatch/version.rb +1 -1
- data/lib/job_dispatch/worker.rb +16 -8
- data/lib/job_dispatch/worker/item.rb +5 -1
- data/spec/job_dispatch/broker_spec.rb +62 -17
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e00ac3ea419f26f9e3d8ae4a52a73c267f306ce7
|
4
|
+
data.tar.gz: 2f2941010e8775f2e6ec0759952438fd899f17a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 03c7b5606808c97b37c0c3903ec9058db29f8f96741a5b4b42a2220e459017249b9b48cb6b701c8fbd75392e0ba212bee8dc38ff29257512fd119e39b3739671
|
7
|
+
data.tar.gz: 414171d3052dcfdc29f5c8c30f3216aeb755d4bdf96c205452016241c1ac493bd859c374d29c24d1287709092bc16e0d04cda87b020a13b9f4228bf9f8bf13a7
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# job_dispatch change log
|
2
|
+
|
3
|
+
## Version 0.0.2
|
4
|
+
|
5
|
+
* Broker sends an idle command to a worker immediately upon connect. This helps recover from a case where a worker
|
6
|
+
has been running for some time before the dispatcher starts. (Particularly with a Windows worker using NetMQ where
|
7
|
+
a closed socket may still send messages.)
|
8
|
+
* Improve Ruby worker serialisation of exceptions into job result.
|
9
|
+
|
10
|
+
## Version 0.0.1
|
11
|
+
|
12
|
+
* First release
|
data/lib/job_dispatch/broker.rb
CHANGED
@@ -14,7 +14,7 @@ module JobDispatch
|
|
14
14
|
POLL_TIME = 5.123
|
15
15
|
STOP_SIGNALS = %w[INT TERM KILL]
|
16
16
|
|
17
|
-
IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name
|
17
|
+
IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name, :idle_count
|
18
18
|
|
19
19
|
|
20
20
|
# any object that will respond to `next_job_for_queue`, which should return a job, or nil if there
|
@@ -67,6 +67,7 @@ module JobDispatch
|
|
67
67
|
puts "JobDispatch::Broker running in process #{Process.pid}"
|
68
68
|
JobDispatch.logger.info("JobDispatch::Broker running in process #{Process.pid}")
|
69
69
|
@running = true
|
70
|
+
@running_thread = Thread.current
|
70
71
|
poller = ZMQ::Poller.new
|
71
72
|
|
72
73
|
@socket = JobDispatch::Broker::Socket.new(@worker_bind_address)
|
@@ -99,10 +100,12 @@ module JobDispatch
|
|
99
100
|
puts "JobDispatch::Broker shutting down, due to #{signal_name} signal"
|
100
101
|
@running = false
|
101
102
|
@status = "SHUTDOWN"
|
102
|
-
sleep 1
|
103
|
+
# sleep 1
|
103
104
|
process_quit
|
104
|
-
sleep 1
|
105
|
+
sleep 1 # let ZMQ send the messages before we close the socket.
|
105
106
|
end
|
107
|
+
rescue StandardError => e
|
108
|
+
JobDispatch.logger.error "Unexpected exception: #{e}"
|
106
109
|
end
|
107
110
|
end
|
108
111
|
ensure
|
@@ -112,6 +115,14 @@ module JobDispatch
|
|
112
115
|
end
|
113
116
|
|
114
117
|
|
118
|
+
def stop
|
119
|
+
if running?
|
120
|
+
@running = false
|
121
|
+
@running_thread.raise SignalException.new("TERM") unless Thread.current == @running_thread
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
115
126
|
def process_messages(poller)
|
116
127
|
# TODO: calculate the amount of time to sleep to wake up such that a scheduled event happens as close
|
117
128
|
# as possible to the time it was supposed to happen. This could additionally mean that the POLL_TIME
|
@@ -158,7 +169,7 @@ module JobDispatch
|
|
158
169
|
case command.command
|
159
170
|
when "ready"
|
160
171
|
# add to list of workers who are ready for work
|
161
|
-
add_available_worker(command)
|
172
|
+
add_available_worker(command, 0)
|
162
173
|
|
163
174
|
# don't reply, leaves worker blocked waiting for a job to do.
|
164
175
|
reply = nil
|
@@ -172,7 +183,7 @@ module JobDispatch
|
|
172
183
|
|
173
184
|
if command.worker_ready?
|
174
185
|
# a completed job also means the worker is available for more work.
|
175
|
-
add_available_worker(command)
|
186
|
+
add_available_worker(command, 1)
|
176
187
|
reply = nil
|
177
188
|
else
|
178
189
|
reply.parameters = {:status => 'thanks'}
|
@@ -235,9 +246,10 @@ module JobDispatch
|
|
235
246
|
def send_idle_commands(idle_time=nil)
|
236
247
|
idle_time ||= Time.now
|
237
248
|
idle_time -= WORKER_IDLE_TIME
|
238
|
-
idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time }
|
249
|
+
idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time || worker.idle_count == 0 }
|
239
250
|
idle_workers.each do |worker_id, worker|
|
240
251
|
send_job_to_worker(InternalJob.new('idle', worker.queue), worker_id)
|
252
|
+
worker.idle_count += 1
|
241
253
|
end
|
242
254
|
end
|
243
255
|
|
@@ -268,10 +280,18 @@ module JobDispatch
|
|
268
280
|
|
269
281
|
|
270
282
|
# add a worker to the list of workers available for jobs.
|
271
|
-
def add_available_worker(command)
|
283
|
+
def add_available_worker(command, idle_count=0)
|
272
284
|
JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
285
|
+
|
286
|
+
# immediately remove any existing workers with the given name. If a worker has closed its connection and opened
|
287
|
+
# a new one (perhaps it started a long time before the broker did)
|
288
|
+
|
289
|
+
if command.worker_name # this is only sent on initial requests.
|
290
|
+
remove_worker_named(command.worker_name)
|
291
|
+
end
|
292
|
+
|
273
293
|
queue = command.queue
|
274
|
-
idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name)
|
294
|
+
idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name, idle_count)
|
275
295
|
workers_waiting_for_jobs[command.worker_id] = idle_worker
|
276
296
|
queues[queue] << command.worker_id
|
277
297
|
if command.worker_name # this is only sent on initial requests.
|
@@ -282,20 +302,23 @@ module JobDispatch
|
|
282
302
|
# remove a worker from available list. Worker is shutting down or indicating that it will no longer
|
283
303
|
# be available for doing work.
|
284
304
|
def remove_available_worker(command)
|
285
|
-
JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
305
|
+
JobDispatch.logger.info("JobDispatch::Broker Removing Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
|
286
306
|
|
287
307
|
# the goodbye command is sent by another socket connection, so the worker_id (socket identity) will
|
288
|
-
# not match the socket actually waiting for work.
|
308
|
+
# not match the socket actually waiting for work. Remove the worker by its name, not socket identity
|
309
|
+
|
310
|
+
remove_worker_named(command.worker_name)
|
311
|
+
{status: "see ya later"}
|
312
|
+
end
|
289
313
|
|
290
|
-
|
314
|
+
def remove_worker_named(worker_name)
|
315
|
+
keys = worker_names.select { |id, name| name == worker_name }.keys
|
291
316
|
keys.each do |worker_id|
|
292
317
|
workers_waiting_for_reply.delete(worker_id) # socket will be closing, no need to send it anything.
|
293
318
|
worker = workers_waiting_for_jobs.delete(worker_id)
|
294
319
|
queues[worker.queue].delete(worker_id) if worker
|
295
320
|
worker_names.delete(worker_id)
|
296
321
|
end
|
297
|
-
|
298
|
-
{status: "see ya later"}
|
299
322
|
end
|
300
323
|
|
301
324
|
def dispatch_jobs_to_workers
|
data/lib/job_dispatch/version.rb
CHANGED
data/lib/job_dispatch/worker.rb
CHANGED
@@ -8,6 +8,9 @@ module JobDispatch
|
|
8
8
|
#
|
9
9
|
class Worker
|
10
10
|
|
11
|
+
class StopError < StandardError
|
12
|
+
end
|
13
|
+
|
11
14
|
IDLE_TIME = 3
|
12
15
|
IDLE_COUNT = 10
|
13
16
|
|
@@ -38,11 +41,12 @@ module JobDispatch
|
|
38
41
|
|
39
42
|
def run
|
40
43
|
@running = true
|
44
|
+
@running_thread = Thread.current
|
41
45
|
while running?
|
42
|
-
puts "connecting"
|
46
|
+
# puts "connecting"
|
43
47
|
connect
|
44
|
-
puts "asking for work"
|
45
|
-
ask_for_work
|
48
|
+
# puts "asking for work"
|
49
|
+
ask_for_work rescue StopError
|
46
50
|
|
47
51
|
# if we are idle for too many times, the broker has restarted or gone away, and we will be stuck in receive
|
48
52
|
# state, so we need to close the socket and make a new one to ask for work again.
|
@@ -60,13 +64,14 @@ module JobDispatch
|
|
60
64
|
idle
|
61
65
|
idle_count += 1
|
62
66
|
end
|
63
|
-
rescue Interrupt
|
64
|
-
|
65
|
-
JobDispatch.logger.info("Worker #{}")
|
67
|
+
rescue Interrupt, StopError
|
68
|
+
JobDispatch.logger.info("Worker stopping.")
|
66
69
|
stop
|
67
70
|
disconnect
|
71
|
+
# Tell the broker goodbye so that we are removed from the idle worker list and no more jobs will come here.
|
68
72
|
connect
|
69
73
|
send_goodbye
|
74
|
+
sleep(0.1) # let the socket send the message before we disconnect...
|
70
75
|
end
|
71
76
|
end
|
72
77
|
disconnect
|
@@ -86,7 +91,10 @@ module JobDispatch
|
|
86
91
|
end
|
87
92
|
|
88
93
|
def stop
|
89
|
-
|
94
|
+
if running?
|
95
|
+
@running_thread.raise StopError unless @running_thread == Thread.current
|
96
|
+
@running = false
|
97
|
+
end
|
90
98
|
end
|
91
99
|
|
92
100
|
def self.touch(timeout=nil)
|
@@ -111,7 +119,7 @@ module JobDispatch
|
|
111
119
|
end
|
112
120
|
|
113
121
|
def idle
|
114
|
-
puts "waiting for job to do…"
|
122
|
+
# puts "waiting for job to do…"
|
115
123
|
end
|
116
124
|
end
|
117
125
|
end
|
@@ -30,7 +30,11 @@ module JobDispatch
|
|
30
30
|
@result = @klass.__send__(method.to_sym, *params)
|
31
31
|
@status = :success
|
32
32
|
rescue StandardError => ex
|
33
|
-
@result =
|
33
|
+
@result = {
|
34
|
+
class: ex.class.to_s,
|
35
|
+
message: ex.to_s,
|
36
|
+
backtrace: ex.backtrace,
|
37
|
+
}
|
34
38
|
@status = :error
|
35
39
|
ensure
|
36
40
|
Thread.current["JobDispatch::Worker.job_id"] = nil
|
@@ -81,7 +81,6 @@ describe JobDispatch::Broker do
|
|
81
81
|
subject.workers_waiting_for_reply << worker_id3
|
82
82
|
subject.process_command(Command.new(worker_id3, {command: 'ready', worker_name: 'test worker 2'}))
|
83
83
|
|
84
|
-
|
85
84
|
@job = FactoryGirl.build :job
|
86
85
|
@socket = double('Broker::Socket', :send_command => nil)
|
87
86
|
subject.stub(:socket => @socket)
|
@@ -166,6 +165,14 @@ describe JobDispatch::Broker do
|
|
166
165
|
@result = subject.process_command(command)
|
167
166
|
expect(subject.worker_names[worker_id]).to eq('ruby worker')
|
168
167
|
end
|
168
|
+
|
169
|
+
let(:command2) { Command.new(worker_id2, {command: 'ready', queue: 'example', worker_name: 'ruby worker'}) }
|
170
|
+
|
171
|
+
it "duplicate ready has only 1 worker" do
|
172
|
+
@result = subject.process_command(command)
|
173
|
+
@result = subject.process_command(command2)
|
174
|
+
expect(subject.queues[:example].count).to eq(1)
|
175
|
+
end
|
169
176
|
end
|
170
177
|
|
171
178
|
|
@@ -405,34 +412,72 @@ describe JobDispatch::Broker do
|
|
405
412
|
command = Command.new(worker_id2, {command: 'ready', queue: 'example'})
|
406
413
|
@result = subject.process_command(command)
|
407
414
|
end
|
408
|
-
|
409
415
|
end
|
410
416
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
expect(cmd.parameters[:command]).to eq('idle')
|
417
|
+
context "already done an idle" do
|
418
|
+
before do
|
419
|
+
JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 1)
|
415
420
|
end
|
416
421
|
|
417
|
-
|
418
|
-
|
422
|
+
it "that have waited long enough receive idle commands" do
|
423
|
+
@socket.should_receive(:send_command) do |cmd|
|
424
|
+
expect(cmd.worker_id).to eq(worker_id)
|
425
|
+
expect(cmd.parameters[:command]).to eq('idle')
|
426
|
+
end
|
427
|
+
|
428
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
429
|
+
subject.send_idle_commands
|
430
|
+
end
|
431
|
+
|
432
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id)
|
433
|
+
expect(subject.queues[:example]).not_to include(worker_id)
|
419
434
|
end
|
420
435
|
|
421
|
-
|
422
|
-
|
436
|
+
it "that have not waited long enough are still waiting" do
|
437
|
+
@socket.should_receive(:send_command) do |cmd|
|
438
|
+
expect(cmd.worker_id).not_to eq(worker_id2)
|
439
|
+
end
|
440
|
+
|
441
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
442
|
+
subject.send_idle_commands
|
443
|
+
end
|
444
|
+
|
445
|
+
expect(subject.workers_waiting_for_reply).to include(worker_id2)
|
446
|
+
expect(subject.queues[:example]).to include(worker_id2)
|
447
|
+
end
|
423
448
|
end
|
424
449
|
|
425
|
-
|
426
|
-
|
427
|
-
|
450
|
+
context "have not done an idle" do
|
451
|
+
before do
|
452
|
+
JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 0)
|
428
453
|
end
|
429
454
|
|
430
|
-
|
431
|
-
|
455
|
+
it "that have waited long enough receive idle commands" do
|
456
|
+
@socket.should_receive(:send_command) do |cmd|
|
457
|
+
expect(cmd.worker_id).to eq(worker_id)
|
458
|
+
expect(cmd.parameters[:command]).to eq('idle')
|
459
|
+
end
|
460
|
+
|
461
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
462
|
+
subject.send_idle_commands
|
463
|
+
end
|
464
|
+
|
465
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id)
|
466
|
+
expect(subject.queues[:example]).not_to include(worker_id)
|
432
467
|
end
|
433
468
|
|
434
|
-
|
435
|
-
|
469
|
+
it "that have not waited long enough are still waiting" do
|
470
|
+
@socket.should_receive(:send_command) do |cmd|
|
471
|
+
expect(cmd.worker_id).not_to eq(worker_id2)
|
472
|
+
end
|
473
|
+
|
474
|
+
Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
|
475
|
+
subject.send_idle_commands
|
476
|
+
end
|
477
|
+
|
478
|
+
expect(subject.workers_waiting_for_reply).not_to include(worker_id2)
|
479
|
+
expect(subject.queues[:example]).not_to include(worker_id2)
|
480
|
+
end
|
436
481
|
end
|
437
482
|
end
|
438
483
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: job_dispatch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matt Connolly
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-05-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbczmq
|
@@ -177,6 +177,7 @@ files:
|
|
177
177
|
- ".gitignore"
|
178
178
|
- ".rspec"
|
179
179
|
- ".travis.yml"
|
180
|
+
- CHANGELOG.md
|
180
181
|
- Gemfile
|
181
182
|
- Guardfile
|
182
183
|
- LICENSE.txt
|