job_dispatch 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b05fed659c186b001b926e08650b3a281160668f
4
- data.tar.gz: 6ed34896dfbe1a5dcc1382a36b39e7c3e73f2f2f
3
+ metadata.gz: e00ac3ea419f26f9e3d8ae4a52a73c267f306ce7
4
+ data.tar.gz: 2f2941010e8775f2e6ec0759952438fd899f17a5
5
5
  SHA512:
6
- metadata.gz: 4a211880bafb99581f6a4d8bd884ddcc37d68170f5ba28fde9943d0cdf2eb9735a6cd3c78f683ef4c923f02bc4c19d5479461904d8b25d060088c088048e2747
7
- data.tar.gz: 30656712d978383424f23945daa8ae77e3f61dd6c4314d324b8063203e6ecf604347588074b9db0f9ea39fed4b0289869d55d20dc5e3dfaed901caea7d5ad674
6
+ metadata.gz: 03c7b5606808c97b37c0c3903ec9058db29f8f96741a5b4b42a2220e459017249b9b48cb6b701c8fbd75392e0ba212bee8dc38ff29257512fd119e39b3739671
7
+ data.tar.gz: 414171d3052dcfdc29f5c8c30f3216aeb755d4bdf96c205452016241c1ac493bd859c374d29c24d1287709092bc16e0d04cda87b020a13b9f4228bf9f8bf13a7
data/CHANGELOG.md ADDED
@@ -0,0 +1,12 @@
1
+ # job_dispatch change log
2
+
3
+ ## Version 0.0.2
4
+
5
+ * Broker sends an idle command to a worker immediately upon connect. This helps recover from a case where a worker
6
+ has been running for some time before the dispatcher starts. (Particularly with a Windows worker using NetMQ where
7
+ a closed socket may still send messages.)
8
+ * Improve Ruby worker serialisation of exceptions into job result.
9
+
10
+ ## Version 0.0.1
11
+
12
+ * First release
@@ -14,7 +14,7 @@ module JobDispatch
14
14
  POLL_TIME = 5.123
15
15
  STOP_SIGNALS = %w[INT TERM KILL]
16
16
 
17
- IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name
17
+ IdleWorker = Struct.new :worker_id, :idle_since, :queue, :worker_name, :idle_count
18
18
 
19
19
 
20
20
  # any object that will respond to `next_job_for_queue`, which should return a job, or nil if there
@@ -67,6 +67,7 @@ module JobDispatch
67
67
  puts "JobDispatch::Broker running in process #{Process.pid}"
68
68
  JobDispatch.logger.info("JobDispatch::Broker running in process #{Process.pid}")
69
69
  @running = true
70
+ @running_thread = Thread.current
70
71
  poller = ZMQ::Poller.new
71
72
 
72
73
  @socket = JobDispatch::Broker::Socket.new(@worker_bind_address)
@@ -99,10 +100,12 @@ module JobDispatch
99
100
  puts "JobDispatch::Broker shutting down, due to #{signal_name} signal"
100
101
  @running = false
101
102
  @status = "SHUTDOWN"
102
- sleep 1
103
+ # sleep 1
103
104
  process_quit
104
- sleep 1
105
+ sleep 1 # let ZMQ send the messages before we close the socket.
105
106
  end
107
+ rescue StandardError => e
108
+ JobDispatch.logger.error "Unexpected exception: #{e}"
106
109
  end
107
110
  end
108
111
  ensure
@@ -112,6 +115,14 @@ module JobDispatch
112
115
  end
113
116
 
114
117
 
118
+ def stop
119
+ if running?
120
+ @running = false
121
+ @running_thread.raise SignalException.new("TERM") unless Thread.current == @running_thread
122
+ end
123
+ end
124
+
125
+
115
126
  def process_messages(poller)
116
127
  # TODO: calculate the amount of time to sleep to wake up such that a scheduled event happens as close
117
128
  # as possible to the time it was supposed to happen. This could additionally mean that the POLL_TIME
@@ -158,7 +169,7 @@ module JobDispatch
158
169
  case command.command
159
170
  when "ready"
160
171
  # add to list of workers who are ready for work
161
- add_available_worker(command)
172
+ add_available_worker(command, 0)
162
173
 
163
174
  # don't reply, leaves worker blocked waiting for a job to do.
164
175
  reply = nil
@@ -172,7 +183,7 @@ module JobDispatch
172
183
 
173
184
  if command.worker_ready?
174
185
  # a completed job also means the worker is available for more work.
175
- add_available_worker(command)
186
+ add_available_worker(command, 1)
176
187
  reply = nil
177
188
  else
178
189
  reply.parameters = {:status => 'thanks'}
@@ -235,9 +246,10 @@ module JobDispatch
235
246
  def send_idle_commands(idle_time=nil)
236
247
  idle_time ||= Time.now
237
248
  idle_time -= WORKER_IDLE_TIME
238
- idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time }
249
+ idle_workers = @workers_waiting_for_jobs.select { |worker_id, worker| worker.idle_since < idle_time || worker.idle_count == 0 }
239
250
  idle_workers.each do |worker_id, worker|
240
251
  send_job_to_worker(InternalJob.new('idle', worker.queue), worker_id)
252
+ worker.idle_count += 1
241
253
  end
242
254
  end
243
255
 
@@ -268,10 +280,18 @@ module JobDispatch
268
280
 
269
281
 
270
282
  # add a worker to the list of workers available for jobs.
271
- def add_available_worker(command)
283
+ def add_available_worker(command, idle_count=0)
272
284
  JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
285
+
286
+ # immediately remove any existing workers with the given name. If a worker has closed its connection and opened
287
+ # a new one (perhaps it started a long time before the broker did)
288
+
289
+ if command.worker_name # this is only sent on initial requests.
290
+ remove_worker_named(command.worker_name)
291
+ end
292
+
273
293
  queue = command.queue
274
- idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name)
294
+ idle_worker = IdleWorker.new(command.worker_id, Time.now, queue, command.worker_name, idle_count)
275
295
  workers_waiting_for_jobs[command.worker_id] = idle_worker
276
296
  queues[queue] << command.worker_id
277
297
  if command.worker_name # this is only sent on initial requests.
@@ -282,20 +302,23 @@ module JobDispatch
282
302
  # remove a worker from available list. Worker is shutting down or indicating that it will no longer
283
303
  # be available for doing work.
284
304
  def remove_available_worker(command)
285
- JobDispatch.logger.info("JobDispatch::Broker Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
305
+ JobDispatch.logger.info("JobDispatch::Broker Removing Worker '#{command.worker_id.to_json}' available for work on queue '#{command.queue}'")
286
306
 
287
307
  # the goodbye command is sent by another socket connection, so the worker_id (socket identity) will
288
- # not match the socket actually waiting for work.
308
+ # not match the socket actually waiting for work. Remove the worker by its name, not socket identity
309
+
310
+ remove_worker_named(command.worker_name)
311
+ {status: "see ya later"}
312
+ end
289
313
 
290
- keys = worker_names.select { |id, name| name == command.worker_name }.keys
314
+ def remove_worker_named(worker_name)
315
+ keys = worker_names.select { |id, name| name == worker_name }.keys
291
316
  keys.each do |worker_id|
292
317
  workers_waiting_for_reply.delete(worker_id) # socket will be closing, no need to send it anything.
293
318
  worker = workers_waiting_for_jobs.delete(worker_id)
294
319
  queues[worker.queue].delete(worker_id) if worker
295
320
  worker_names.delete(worker_id)
296
321
  end
297
-
298
- {status: "see ya later"}
299
322
  end
300
323
 
301
324
  def dispatch_jobs_to_workers
@@ -1,5 +1,5 @@
1
1
  require 'active_support/core_ext'
2
- require 'active_support/core_ext/object/json'
2
+ require 'active_support/core_ext/object/to_json'
3
3
 
4
4
  module JobDispatch
5
5
  # Identity encapsulates a ZeroMQ socket identity, which is a string of binary characters, typically
@@ -1,3 +1,3 @@
1
1
  module JobDispatch
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -8,6 +8,9 @@ module JobDispatch
8
8
  #
9
9
  class Worker
10
10
 
11
+ class StopError < StandardError
12
+ end
13
+
11
14
  IDLE_TIME = 3
12
15
  IDLE_COUNT = 10
13
16
 
@@ -38,11 +41,12 @@ module JobDispatch
38
41
 
39
42
  def run
40
43
  @running = true
44
+ @running_thread = Thread.current
41
45
  while running?
42
- puts "connecting"
46
+ # puts "connecting"
43
47
  connect
44
- puts "asking for work"
45
- ask_for_work
48
+ # puts "asking for work"
49
+ ask_for_work rescue StopError
46
50
 
47
51
  # if we are idle for too many times, the broker has restarted or gone away, and we will be stuck in receive
48
52
  # state, so we need to close the socket and make a new one to ask for work again.
@@ -60,13 +64,14 @@ module JobDispatch
60
64
  idle
61
65
  idle_count += 1
62
66
  end
63
- rescue Interrupt
64
- puts "Worker stopping."
65
- JobDispatch.logger.info("Worker #{}")
67
+ rescue Interrupt, StopError
68
+ JobDispatch.logger.info("Worker stopping.")
66
69
  stop
67
70
  disconnect
71
+ # Tell the broker goodbye so that we are removed from the idle worker list and no more jobs will come here.
68
72
  connect
69
73
  send_goodbye
74
+ sleep(0.1) # let the socket send the message before we disconnect...
70
75
  end
71
76
  end
72
77
  disconnect
@@ -86,7 +91,10 @@ module JobDispatch
86
91
  end
87
92
 
88
93
  def stop
89
- @running = false
94
+ if running?
95
+ @running_thread.raise StopError unless @running_thread == Thread.current
96
+ @running = false
97
+ end
90
98
  end
91
99
 
92
100
  def self.touch(timeout=nil)
@@ -111,7 +119,7 @@ module JobDispatch
111
119
  end
112
120
 
113
121
  def idle
114
- puts "waiting for job to do…"
122
+ # puts "waiting for job to do…"
115
123
  end
116
124
  end
117
125
  end
@@ -30,7 +30,11 @@ module JobDispatch
30
30
  @result = @klass.__send__(method.to_sym, *params)
31
31
  @status = :success
32
32
  rescue StandardError => ex
33
- @result = ex
33
+ @result = {
34
+ class: ex.class.to_s,
35
+ message: ex.to_s,
36
+ backtrace: ex.backtrace,
37
+ }
34
38
  @status = :error
35
39
  ensure
36
40
  Thread.current["JobDispatch::Worker.job_id"] = nil
@@ -81,7 +81,6 @@ describe JobDispatch::Broker do
81
81
  subject.workers_waiting_for_reply << worker_id3
82
82
  subject.process_command(Command.new(worker_id3, {command: 'ready', worker_name: 'test worker 2'}))
83
83
 
84
-
85
84
  @job = FactoryGirl.build :job
86
85
  @socket = double('Broker::Socket', :send_command => nil)
87
86
  subject.stub(:socket => @socket)
@@ -166,6 +165,14 @@ describe JobDispatch::Broker do
166
165
  @result = subject.process_command(command)
167
166
  expect(subject.worker_names[worker_id]).to eq('ruby worker')
168
167
  end
168
+
169
+ let(:command2) { Command.new(worker_id2, {command: 'ready', queue: 'example', worker_name: 'ruby worker'}) }
170
+
171
+ it "duplicate ready has only 1 worker" do
172
+ @result = subject.process_command(command)
173
+ @result = subject.process_command(command2)
174
+ expect(subject.queues[:example].count).to eq(1)
175
+ end
169
176
  end
170
177
 
171
178
 
@@ -405,34 +412,72 @@ describe JobDispatch::Broker do
405
412
  command = Command.new(worker_id2, {command: 'ready', queue: 'example'})
406
413
  @result = subject.process_command(command)
407
414
  end
408
-
409
415
  end
410
416
 
411
- it "that have waited long enough receive idle commands" do
412
- @socket.should_receive(:send_command) do |cmd|
413
- expect(cmd.worker_id).to eq(worker_id)
414
- expect(cmd.parameters[:command]).to eq('idle')
417
+ context "already done an idle" do
418
+ before do
419
+ JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 1)
415
420
  end
416
421
 
417
- Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
418
- subject.send_idle_commands
422
+ it "that have waited long enough receive idle commands" do
423
+ @socket.should_receive(:send_command) do |cmd|
424
+ expect(cmd.worker_id).to eq(worker_id)
425
+ expect(cmd.parameters[:command]).to eq('idle')
426
+ end
427
+
428
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
429
+ subject.send_idle_commands
430
+ end
431
+
432
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id)
433
+ expect(subject.queues[:example]).not_to include(worker_id)
419
434
  end
420
435
 
421
- expect(subject.workers_waiting_for_reply).not_to include(worker_id)
422
- expect(subject.queues[:example]).not_to include(worker_id)
436
+ it "that have not waited long enough are still waiting" do
437
+ @socket.should_receive(:send_command) do |cmd|
438
+ expect(cmd.worker_id).not_to eq(worker_id2)
439
+ end
440
+
441
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
442
+ subject.send_idle_commands
443
+ end
444
+
445
+ expect(subject.workers_waiting_for_reply).to include(worker_id2)
446
+ expect(subject.queues[:example]).to include(worker_id2)
447
+ end
423
448
  end
424
449
 
425
- it "that have not waited long enough are still waiting" do
426
- @socket.should_receive(:send_command) do |cmd|
427
- expect(cmd.worker_id).not_to eq(worker_id2)
450
+ context "have not done an idle" do
451
+ before do
452
+ JobDispatch::Broker::IdleWorker.any_instance.stub(:idle_count => 0)
428
453
  end
429
454
 
430
- Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
431
- subject.send_idle_commands
455
+ it "that have waited long enough receive idle commands" do
456
+ @socket.should_receive(:send_command) do |cmd|
457
+ expect(cmd.worker_id).to eq(worker_id)
458
+ expect(cmd.parameters[:command]).to eq('idle')
459
+ end
460
+
461
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
462
+ subject.send_idle_commands
463
+ end
464
+
465
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id)
466
+ expect(subject.queues[:example]).not_to include(worker_id)
432
467
  end
433
468
 
434
- expect(subject.workers_waiting_for_reply).to include(worker_id2)
435
- expect(subject.queues[:example]).to include(worker_id2)
469
+ it "that have not waited long enough are still waiting" do
470
+ @socket.should_receive(:send_command) do |cmd|
471
+ expect(cmd.worker_id).not_to eq(worker_id2)
472
+ end
473
+
474
+ Timecop.freeze(@time + JobDispatch::Broker::WORKER_IDLE_TIME + 1) do
475
+ subject.send_idle_commands
476
+ end
477
+
478
+ expect(subject.workers_waiting_for_reply).not_to include(worker_id2)
479
+ expect(subject.queues[:example]).not_to include(worker_id2)
480
+ end
436
481
  end
437
482
  end
438
483
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: job_dispatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matt Connolly
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-20 00:00:00.000000000 Z
11
+ date: 2014-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbczmq
@@ -177,6 +177,7 @@ files:
177
177
  - ".gitignore"
178
178
  - ".rspec"
179
179
  - ".travis.yml"
180
+ - CHANGELOG.md
180
181
  - Gemfile
181
182
  - Guardfile
182
183
  - LICENSE.txt