job_dispatch 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b05fed659c186b001b926e08650b3a281160668f
4
- data.tar.gz: 6ed34896dfbe1a5dcc1382a36b39e7c3e73f2f2f
3
+ metadata.gz: e00ac3ea419f26f9e3d8ae4a52a73c267f306ce7
4
+ data.tar.gz: 2f2941010e8775f2e6ec0759952438fd899f17a5
5
5
  SHA512:
6
- metadata.gz: 4a211880bafb99581f6a4d8bd884ddcc37d68170f5ba28fde9943d0cdf2eb9735a6cd3c78f683ef4c923f02bc4c19d5479461904d8b25d060088c088048e2747
7
- data.tar.gz: 30656712d978383424f23945daa8ae77e3f61dd6c4314d324b8063203e6ecf604347588074b9db0f9ea39fed4b0289869d55d20dc5e3dfaed901caea7d5ad674
6
+ metadata.gz: 03c7b5606808c97b37c0c3903ec9058db29f8f96741a5b4b42a2220e459017249b9b48cb6b701c8fbd75392e0ba212bee8dc38ff29257512fd119e39b3739671
7
+ data.tar.gz: 414171d3052dcfdc29f5c8c30f3216aeb755d4bdf96c205452016241c1ac493bd859c374d29c24d1287709092bc16e0d04cda87b020a13b9f4228bf9f8bf13a7
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ # job_dispatch change log
2
+
3
+ ## Version 0.0.2
4
+
5
+ * Broker sends an idle command to a worker immediately upon connect. This helps recover from a case where a worker
6
+ has been running for some time before the dispatcher starts. (Particularly with a Windows worker using NetMQ where
7
+ a closed socket may still send messages.)
8
+ * Improve Ruby worker serialisation of exceptions into job result.
9
+
10
+ ## Version 0.0.1
11
+
12
+ * First release
@@ -14,7 +14,7 @@ module JobDispatch
14
14
  POLL_TIME = 5.123
15
15
  STOP_SIGNALS = %w[INT TERM KILL]
16
16
 
17
- IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name
17
+ IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name, :idle_count
18
18
 
19
19
 
20
20
  # any object that will respond to `next_job_for_queue`, which should return a job, or nil if there
@@ -67,6 +67,7 @@ module JobDispatch
67
67
  puts "JobDispatch::Broker running in process #{Process.pid}"
68
68
  JobDispatch.logger.info("JobDispatch::Broker running in process #{Process.pid}")
69
69
  @running = true
70
+ @running_thread = Thread.current
70
71
  poller = ZMQ::Poller.new
71
72
 
72
73
  @socket = JobDispatch::Broker::Socket.new(@worker_bind_address)
@@ -99,10 +100,12 @@ module JobDispatch
99
100
  puts "JobDispatch::Broker shutting down, due to #{signal_name} signal"
100
101
  @running = false
101
102
  @status = "SHUTDOWN"
102
- sleep 1
103
+ # sleep 1
103
104
  process_quit
104
- sleep 1
105
+ sleep 1 # let ZMQ send the messages before we close the socket.
105
106
  end
107
+ rescue StandardError => e
108
+ JobDispatch.logger.error "Unexpected exception: #{e}"
106
109
  end
107
110
  end
108
111
  ensure
@@ -112,6 +115,14 @@ module JobDispatch
112
115
  end
113
116
 
114
117
 
118
+ def stop
119
+ if running?
120
+ @running = false
121
+ @running_thread.raise SignalException.new("TERM") unless Thread.current == @running_thread
122
+ end
123
+ end
124
+
125
+
115
126
  def process_messages(poller)
116
127
  # TODO: calculate the amount of time to sleep to wake up such that a scheduled event happens as close
117
128
  # as possible to the time it was supposed to happen. This could additionally mean that the POLL_TIME
@@ -158,7 +169,7 @@ module JobDispatch
158
169
  case command.command
159
170
  when "ready"
160
171
  # add to list of workers who are ready for work
161
- add_available_worker(command)
172
+ add_available_worker(command, 0)
162
173
 
163
174
  # don't reply, leaves worker blocked waiting for a job to do.
164
175
  reply = nil
@@ -172,7 +183,7 @@ module JobDispatch
172
183
 
173
184
  if command.worker_ready?
174
185
  # a completed job also means the worker is available for more work.
175
- add_available_worker(command)
186
+ add_available_worker(command, 1)
176
187
  reply = nil
177
188
  else
178
189
  reply.parameters = {:status => 'thanks'}
@@ -235,9 +246,10 @@ module JobDispatch
235
246
  def send_idle_commands(idle_time=nil)
236
247
  idle_time ||= Time.now
237
248
  idle_time -= WORKER_IDLE_TIME
238
- idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time }
249
+ idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time || worker.idle_count == 0 }
239
250
  idle_workers.each do |worker_id, worker|
240
251
  send_job_to_worker(InternalJob.new('idle', worker.queue), worker_id)
252
+ worker.idle_count += 1
241
253
  end
242
254
  end
243
255
 
@@ -268,10 +280,18 @@ module JobDispatch
268
280
 
269
281
 
270
282
  # add a worker to the list of workers available for jobs.
271
- def add_available_worker(command)
283
+ def add_available_worker(command, idle_count=0)
272
284
  JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
285
+
286
+ # immediately remove any existing workers with the given name. If a worker has closed its connection and opened
287
+ # a new one (perhaps it started a long time before the broker did)
288
+
289
+ if command.worker_name # this is only sent on initial requests.
290
+ remove_worker_named(command.worker_name)
291
+ end
292
+
273
293
  queue = command.queue
274
- idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name)
294
+ idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name, idle_count)
275
295
  workers_waiting_for_jobs[command.worker_id] = idle_worker
276
296
  queues[queue] << command.worker_id
277
297
  if command.worker_name # this is only sent on initial requests.
@@ -282,20 +302,23 @@ module JobDispatch
282
302
  # remove a worker from available list. Worker is shutting down or indicating that it will no longer
283
303
  # be available for doing work.
284
304
  def remove_available_worker(command)
285
- JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
305
+ JobDispatch.logger.info("JobDispatch::Broker Removing Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
286
306
 
287
307
  # the goodbye command is sent by another socket connection, so the worker_id (socket identity) will
288
- # not match the socket actually waiting for work.
308
+ # not match the socket actually waiting for work. Remove the worker by its name, not socket identity
309
+
310
+ remove_worker_named(command.worker_name)
311
+ {status: "see ya later"}
312
+ end
289
313
 
290
- keys = worker_names.select { |id, name| name == command.worker_name }.keys
314
+ def remove_worker_named(worker_name)
315
+ keys = worker_names.select { |id, name| name == worker_name }.keys
291
316
  keys.each do |worker_id|
292
317
  workers_waiting_for_reply.delete(worker_id) # socket will be closing, no need to send it anything.
293
318
  worker = workers_waiting_for_jobs.delete(worker_id)
294
319
  queues[worker.queue].delete(worker_id) if worker
295
320
  worker_names.delete(worker_id)
296
321
  end
297
-
298
- {status: "see ya later"}
299
322
  end
300
323
 
301
324
  def dispatch_jobs_to_workers
@@ -1,5 +1,5 @@
1
1
  require 'active_support/core_ext'
2
- require 'active_support/core_ext/object/json'
2
+ require 'active_support/core_ext/object/to_json'
3
3
 
4
4
  module JobDispatch
5
5
  # Identity encapsulates a ZeroMQ socket identity, which is a string of binary characters, typically
@@ -1,3 +1,3 @@
1
1
  module JobDispatch
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -8,6 +8,9 @@ module JobDispatch
8
8
  #
9
9
  class Worker
10
10
 
11
+ class StopError < StandardError
12
+ end
13
+
11
14
  IDLE_TIME = 3
12
15
  IDLE_COUNT = 10
13
16
 
@@ -38,11 +41,12 @@ module JobDispatch
38
41
 
39
42
  def run
40
43
  @running = true
44
+ @running_thread = Thread.current
41
45
  while running?
42
- puts "connecting"
46
+ # puts "connecting"
43
47
  connect
44
- puts "asking for work"
45
- ask_for_work
48
+ # puts "asking for work"
49
+ ask_for_work rescue StopError
46
50
 
47
51
  # if we are idle for too many times, the broker has restarted or gone away, and we will be stuck in receive
48
52
  # state, so we need to close the socket and make a new one to ask for work again.
@@ -60,13 +64,14 @@ module JobDispatch
60
64
  idle
61
65
  idle_count += 1
62
66
  end
63
- rescue Interrupt
64
- puts "Worker stopping."
65
- JobDispatch.logger.info("Worker #{}")
67
+ rescue Interrupt, StopError
68
+ JobDispatch.logger.info("Worker stopping.")
66
69
  stop
67
70
  disconnect
71
+ # Tell the broker goodbye so that we are removed from the idle worker list and no more jobs will come here.
68
72
  connect
69
73
  send_goodbye
74
+ sleep(0.1) # let the socket send the message before we disconnect...
70
75
  end
71
76
  end
72
77
  disconnect
@@ -86,7 +91,10 @@ module JobDispatch
86
91
  end
87
92
 
88
93
  def stop
89
- @running = false
94
+ if running?
95
+ @running_thread.raise StopError unless @running_thread == Thread.current
96
+ @running = false
97
+ end
90
98
  end
91
99
 
92
100
  def self.touch(timeout=nil)
@@ -111,7 +119,7 @@ module JobDispatch
111
119
  end
112
120
 
113
121
  def idle
114
- puts "waiting for job to do…"
122
+ # puts "waiting for job to do…"
115
123
  end
116
124
  end
117
125
  end
@@ -30,7 +30,11 @@ module JobDispatch
30
30
  @result = @klass.__send__(method.to_sym, *params)
31
31
  @status = :success
32
32
  rescue StandardError => ex
33
- @result = ex
33
+ @result = {
34
+ class: ex.class.to_s,
35
+ message: ex.to_s,
36
+ backtrace: ex.backtrace,
37
+ }
34
38
  @status = :error
35
39
  ensure
36
40
  Thread.current["JobDispatch::Worker.job_id"] = nil
@@ -81,7 +81,6 @@ describe JobDispatch::Broker do
81
81
  subject.workers_waiting_for_reply << worker_id3
82
82
  subject.process_command(Command.new(worker_id3, {command: 'ready', worker_name: 'test worker 2'}))
83
83
 
84
-
85
84
  @job = FactoryGirl.build :job
86
85
  @socket = double('Broker::Socket', :send_command => nil)
87
86
  subject.stub(:socket => @socket)
@@ -166,6 +165,14 @@ describe JobDispatch::Broker do
166
165
  @result = subject.process_command(command)
167
166
  expect(subject.worker_names[worker_id]).to eq('ruby worker')
168
167
  end
168
+
169
+ let(:command2) { Command.new(worker_id2, {command: 'ready', queue: 'example', worker_name: 'ruby worker'}) }
170
+
171
+ it "duplicate ready has only 1 worker" do
172
+ @result = subject.process_command(command)
173
+ @result = subject.process_command(command2)
174
+ expect(subject.queues[:example].count).to eq(1)
175
+ end
169
176
  end
170
177
 
171
178
 
@@ -405,34 +412,72 @@ describe JobDispatch::Broker do
405
412
  command = Command.new(worker_id2, {command: 'ready', queue: 'example'})
406
413
  @result = subject.process_command(command)
407
414
  end
408
-
409
415
  end
410
416
 
411
- it "that have waited long enough receive idle commands" do
412
- @socket.should_receive(:send_command) do |cmd|
413
- expect(cmd.worker_id).to eq(worker_id)
414
- expect(cmd.parameters[:command]).to eq('idle')
417
+ context "already done an idle" do
418
+ before do
419
+ JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 1)
415
420
  end
416
421
 
417
- Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
418
- subject.send_idle_commands
422
+ it "that have waited long enough receive idle commands" do
423
+ @socket.should_receive(:send_command) do |cmd|
424
+ expect(cmd.worker_id).to eq(worker_id)
425
+ expect(cmd.parameters[:command]).to eq('idle')
426
+ end
427
+
428
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
429
+ subject.send_idle_commands
430
+ end
431
+
432
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id)
433
+ expect(subject.queues[:example]).not_to include(worker_id)
419
434
  end
420
435
 
421
- expect(subject.workers_waiting_for_reply).not_to include(worker_id)
422
- expect(subject.queues[:example]).not_to include(worker_id)
436
+ it "that have not waited long enough are still waiting" do
437
+ @socket.should_receive(:send_command) do |cmd|
438
+ expect(cmd.worker_id).not_to eq(worker_id2)
439
+ end
440
+
441
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
442
+ subject.send_idle_commands
443
+ end
444
+
445
+ expect(subject.workers_waiting_for_reply).to include(worker_id2)
446
+ expect(subject.queues[:example]).to include(worker_id2)
447
+ end
423
448
  end
424
449
 
425
- it "that have not waited long enough are still waiting" do
426
- @socket.should_receive(:send_command) do |cmd|
427
- expect(cmd.worker_id).not_to eq(worker_id2)
450
+ context "have not done an idle" do
451
+ before do
452
+ JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 0)
428
453
  end
429
454
 
430
- Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
431
- subject.send_idle_commands
455
+ it "that have waited long enough receive idle commands" do
456
+ @socket.should_receive(:send_command) do |cmd|
457
+ expect(cmd.worker_id).to eq(worker_id)
458
+ expect(cmd.parameters[:command]).to eq('idle')
459
+ end
460
+
461
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
462
+ subject.send_idle_commands
463
+ end
464
+
465
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id)
466
+ expect(subject.queues[:example]).not_to include(worker_id)
432
467
  end
433
468
 
434
- expect(subject.workers_waiting_for_reply).to include(worker_id2)
435
- expect(subject.queues[:example]).to include(worker_id2)
469
+ it "that have not waited long enough are still waiting" do
470
+ @socket.should_receive(:send_command) do |cmd|
471
+ expect(cmd.worker_id).not_to eq(worker_id2)
472
+ end
473
+
474
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
475
+ subject.send_idle_commands
476
+ end
477
+
478
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id2)
479
+ expect(subject.queues[:example]).not_to include(worker_id2)
480
+ end
436
481
  end
437
482
  end
438
483
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job_dispatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Connolly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-20 00:00:00.000000000 Z
11
+ date: 2014-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbczmq
@@ -177,6 +177,7 @@ files:
177
177
  - ".gitignore"
178
178
  - ".rspec"
179
179
  - ".travis.yml"
180
+ - CHANGELOG.md
180
181
  - Gemfile
181
182
  - Guardfile
182
183
  - LICENSE.txt