rocketjob 1.3.0 → 2.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +201 -0
  3. data/README.md +15 -10
  4. data/bin/rocketjob +3 -1
  5. data/bin/rocketjob_perf +92 -0
  6. data/lib/rocket_job/cli.rb +71 -31
  7. data/lib/rocket_job/config.rb +21 -23
  8. data/lib/rocket_job/dirmon_entry.rb +63 -45
  9. data/lib/rocket_job/extensions/aasm.rb +56 -0
  10. data/lib/rocket_job/extensions/mongo.rb +23 -0
  11. data/lib/rocket_job/job.rb +9 -433
  12. data/lib/rocket_job/jobs/dirmon_job.rb +20 -20
  13. data/lib/rocket_job/jobs/simple_job.rb +12 -0
  14. data/lib/rocket_job/plugins/document.rb +69 -0
  15. data/lib/rocket_job/plugins/job/callbacks.rb +92 -0
  16. data/lib/rocket_job/plugins/job/defaults.rb +40 -0
  17. data/lib/rocket_job/plugins/job/logger.rb +36 -0
  18. data/lib/rocket_job/plugins/job/model.rb +288 -0
  19. data/lib/rocket_job/plugins/job/persistence.rb +167 -0
  20. data/lib/rocket_job/plugins/job/state_machine.rb +166 -0
  21. data/lib/rocket_job/plugins/job/worker.rb +167 -0
  22. data/lib/rocket_job/plugins/restart.rb +54 -0
  23. data/lib/rocket_job/plugins/singleton.rb +26 -0
  24. data/lib/rocket_job/plugins/state_machine.rb +105 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +150 -119
  27. data/lib/rocketjob.rb +43 -21
  28. data/test/config_test.rb +12 -0
  29. data/test/dirmon_entry_test.rb +81 -85
  30. data/test/dirmon_job_test.rb +40 -28
  31. data/test/job_test.rb +14 -257
  32. data/test/plugins/job/callbacks_test.rb +163 -0
  33. data/test/plugins/job/defaults_test.rb +52 -0
  34. data/test/plugins/job/logger_test.rb +58 -0
  35. data/test/plugins/job/model_test.rb +97 -0
  36. data/test/plugins/job/persistence_test.rb +81 -0
  37. data/test/plugins/job/state_machine_test.rb +118 -0
  38. data/test/plugins/job/worker_test.rb +183 -0
  39. data/test/plugins/restart_test.rb +185 -0
  40. data/test/plugins/singleton_test.rb +94 -0
  41. data/test/plugins/state_machine_event_callbacks_test.rb +101 -0
  42. data/test/plugins/state_machine_test.rb +64 -0
  43. data/test/test_helper.rb +3 -36
  44. metadata +64 -19
  45. data/lib/rocket_job/concerns/singleton.rb +0 -33
  46. data/lib/rocket_job/concerns/worker.rb +0 -214
  47. data/test/files/_archive/archived.txt +0 -3
  48. data/test/job_worker_test.rb +0 -86
  49. data/test/jobs/test_job.rb +0 -46
  50. data/test/worker_test.rb +0 -97
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
  require 'socket'
3
- require 'aasm'
3
+ require 'concurrent'
4
4
  module RocketJob
5
5
  # Worker
6
6
  #
@@ -27,13 +27,10 @@ module RocketJob
27
27
  # immediately. Via the UI or Ruby code the worker can take up to 15 seconds
28
28
  # (the heartbeat interval) to start shutting down.
29
29
  class Worker
30
- include MongoMapper::Document
31
- include AASM
30
+ include Plugins::Document
31
+ include Plugins::StateMachine
32
32
  include SemanticLogger::Loggable
33
33
 
34
- # Prevent data in MongoDB from re-defining the model behavior
35
- #self.static_keys = true
36
-
37
34
  # @formatter:off
38
35
  # Unique Name of this worker instance
39
36
  # Defaults to the `hostname` but _must_ be overriden if mutiple Worker instances
@@ -73,12 +70,15 @@ module RocketJob
73
70
  self.started_at = Time.now
74
71
  end
75
72
  end
73
+
76
74
  event :pause do
77
75
  transitions from: :running, to: :paused
78
76
  end
77
+
79
78
  event :resume do
80
79
  transitions from: :paused, to: :running
81
80
  end
81
+
82
82
  event :stop do
83
83
  transitions from: :running, to: :stopping
84
84
  transitions from: :paused, to: :stopping
@@ -87,24 +87,32 @@ module RocketJob
87
87
  end
88
88
  # @formatter:on
89
89
 
90
- attr_reader :thread_pool
91
-
92
90
  # Requeue any jobs being worked by this worker when it is destroyed
93
91
  before_destroy :requeue_jobs
94
92
 
95
93
  # Run the worker process
96
94
  # Attributes supplied are passed to #new
97
95
  def self.run(attrs={})
98
- worker = new(attrs)
99
- worker.build_heartbeat
100
- worker.save!
96
+ Thread.current.name = 'rocketjob main'
101
97
  create_indexes
102
98
  register_signal_handlers
103
- if defined?(RocketJobPro) && (RocketJob::Job.database.name != RocketJob::SlicedJob.database.name)
99
+ if defined?(RocketJobPro) && (RocketJob::Job.database.name != RocketJob::Jobs::PerformanceJob.database.name)
104
100
  raise 'The RocketJob configuration is being applied after the system has been initialized'
105
101
  end
106
- logger.info "Using MongoDB Database: #{RocketJob::Job.database.name}"
107
- worker.run
102
+
103
+ worker = create!(attrs)
104
+ if worker.max_threads == 0
105
+ # Does not start any additional threads and runs the worker in the current thread.
106
+ # No heartbeats are performed. So this worker will appear as a zombie in RJMC.
107
+ # Designed for profiling purposes where a single thread is much simpler to profile.
108
+ worker.started!
109
+ worker.send(:worker, 0)
110
+ else
111
+ worker.send(:run)
112
+ end
113
+
114
+ ensure
115
+ worker.destroy if worker
108
116
  end
109
117
 
110
118
  # Create indexes
@@ -127,11 +135,6 @@ module RocketJob
127
135
  count
128
136
  end
129
137
 
130
- def self.destroy_dead_workers
131
- warn 'RocketJob::Worker.destroy_dead_workers is deprecated, use RocketJob::Worker.destroy_zombies'
132
- destroy_zombies
133
- end
134
-
135
138
  # Stop all running, paused, or starting workers
136
139
  def self.stop_all
137
140
  where(state: [:running, :paused, :starting]).each(&:stop!)
@@ -147,55 +150,117 @@ module RocketJob
147
150
  paused.each(&:resume!)
148
151
  end
149
152
 
153
+ # Returns [Hash<String:Integer>] of the number of workers in each state.
154
+ # Note: If there are no workers in that particular state then the hash will not have a value for it.
155
+ #
156
+ # Example workers in every state:
157
+ # RocketJob::Worker.counts_by_state
158
+ # # => {
159
+ # :aborted => 1,
160
+ # :completed => 37,
161
+ # :failed => 1,
162
+ # :paused => 3,
163
+ # :queued => 4,
164
+ # :running => 1,
165
+ # :queued_now => 1,
166
+ # :scheduled => 3
167
+ # }
168
+ #
169
+ # Example no workers active:
170
+ # RocketJob::Worker.counts_by_state
171
+ # # => {}
172
+ def self.counts_by_state
173
+ counts = {}
174
+ collection.aggregate([
175
+ {
176
+ '$group' => {
177
+ _id: '$state',
178
+ count: {'$sum' => 1}
179
+ }
180
+ }
181
+ ]
182
+ ).each do |result|
183
+ counts[result['_id']] = result['count']
184
+ end
185
+ counts
186
+ end
187
+
150
188
  # Returns [Boolean] whether the worker is shutting down
151
189
  def shutting_down?
152
- if self.class.shutdown
153
- stop! if running?
154
- true
155
- else
156
- !running?
190
+ self.class.shutdown? || !running?
191
+ end
192
+
193
+ # Returns [true|false] if this worker has missed at least the last 4 heartbeats
194
+ #
195
+ # Possible causes for a worker to miss its heartbeats:
196
+ # - The worker process has died
197
+ # - The worker process is "hanging"
198
+ # - The worker is no longer able to communicate with the MongoDB Server
199
+ def zombie?(missed = 4)
200
+ return false unless running?
201
+ return true if heartbeat.updated_at.nil?
202
+ dead_seconds = Config.instance.heartbeat_seconds * missed
203
+ (Time.now - heartbeat.updated_at) >= dead_seconds
204
+ end
205
+
206
+ # On MRI the 'concurrent-ruby-ext' gem may not be loaded
207
+ if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
208
+ # Returns [true|false] whether the shutdown indicator has been set for this worker process
209
+ def self.shutdown?
210
+ @@shutdown.value
211
+ end
212
+
213
+ # Set shutdown indicator for this worker process
214
+ def self.shutdown!
215
+ @@shutdown.make_true
216
+ end
217
+
218
+ @@shutdown = Concurrent::AtomicBoolean.new(false)
219
+ else
220
+ # Returns [true|false] whether the shutdown indicator has been set for this worker process
221
+ def self.shutdown?
222
+ @@shutdown
223
+ end
224
+
225
+ # Set shutdown indicator for this worker process
226
+ def self.shutdown!
227
+ @@shutdown = true
157
228
  end
229
+
230
+ @@shutdown = false
158
231
  end
159
232
 
160
- # Returns [Array<Thread>] threads in the thread_pool
161
- def thread_pool
162
- @thread_pool ||= []
233
+ private
234
+
235
+ attr_reader :worker_threads
236
+
237
+ # Returns [Array<Thread>] collection of created worker threads
238
+ def worker_threads
239
+ @worker_threads ||= []
163
240
  end
164
241
 
165
242
  # Management Thread
166
243
  def run
167
- Thread.current.name = 'rocketjob main'
168
- build_heartbeat unless heartbeat
169
-
170
- started
171
- adjust_thread_pool(true)
172
- save
244
+ logger.info "Using MongoDB Database: #{RocketJob::Job.database.name}"
245
+ build_heartbeat(updated_at: Time.now, current_threads: 0)
246
+ started!
247
+ adjust_worker_threads(true)
173
248
  logger.info "RocketJob Worker started with #{max_threads} workers running"
174
249
 
175
250
  count = 0
176
- loop do
177
- # Update heartbeat so that monitoring tools know that this worker is alive
178
- set(
251
+ while running? || paused?
252
+ sleep Config.instance.heartbeat_seconds
253
+
254
+ update_attributes_and_reload(
179
255
  'heartbeat.updated_at' => Time.now,
180
- 'heartbeat.current_threads' => thread_pool_count
256
+ 'heartbeat.current_threads' => worker_count
181
257
  )
182
258
 
183
- # Reload the worker model every few heartbeats in case its config was changed
184
- # TODO make 3 configurable
185
- if count >= 3
186
- reload
187
- adjust_thread_pool
188
- count = 0
189
- else
190
- count += 1
191
- end
192
-
193
- # Stop worker if shutdown signal was raised
194
- stop! if self.class.shutdown && !stopping?
195
-
196
- break if stopping?
259
+ # In case number of threads has been modified
260
+ adjust_worker_threads
197
261
 
198
- sleep Config.instance.heartbeat_seconds
262
+ # Stop worker if shutdown indicator was set
263
+ stop! if self.class.shutdown? && may_stop?
199
264
  end
200
265
  logger.info 'Waiting for worker threads to stop'
201
266
  # TODO Put a timeout on join.
@@ -203,33 +268,17 @@ module RocketJob
203
268
  # Compare thread dumps for any changes, force down if no change?
204
269
  # reload, if model missing: Send Shutdown exception to each thread
205
270
  # 5 more seconds then exit
206
- thread_pool.each { |t| t.join }
271
+ worker_threads.each { |t| t.join }
207
272
  logger.info 'Shutdown'
208
273
  rescue Exception => exc
209
274
  logger.error('RocketJob::Worker is stopping due to an exception', exc)
210
- ensure
211
- # Destroy this worker instance
212
- destroy
213
275
  end
214
276
 
215
- def thread_pool_count
216
- thread_pool.count { |i| i.alive? }
277
+ # Returns [Fixnum] number of workers (threads) that are alive
278
+ def worker_count
279
+ worker_threads.count { |i| i.alive? }
217
280
  end
218
281
 
219
- # Returns [true|false] if this worker has missed at least the last 4 heartbeats
220
- #
221
- # Possible causes for a worker to miss its heartbeats:
222
- # - The worker process has died
223
- # - The worker process is "hanging"
224
- # - The worker is no longer able to communicate with the MongoDB Server
225
- def zombie?(missed = 4)
226
- return false unless running?
227
- dead_seconds = Config.instance.heartbeat_seconds * missed
228
- (Time.now - heartbeat.updated_at) >= dead_seconds
229
- end
230
-
231
- protected
232
-
233
282
  def next_worker_id
234
283
  @worker_id ||= 0
235
284
  @worker_id += 1
@@ -244,12 +293,12 @@ module RocketJob
244
293
  # that not all workers poll at the same time
245
294
  # The worker also respond faster than max_poll_seconds when a new
246
295
  # job is added.
247
- def adjust_thread_pool(stagger_threads=false)
248
- count = thread_pool_count
296
+ def adjust_worker_threads(stagger_threads=false)
297
+ count = worker_count
249
298
  # Cleanup threads that have stopped
250
- if count != thread_pool.count
251
- logger.info "Cleaning up #{thread_pool.count - count} threads that went away"
252
- thread_pool.delete_if { |t| !t.alive? }
299
+ if count != worker_threads.count
300
+ logger.info "Cleaning up #{worker_threads.count - count} threads that went away"
301
+ worker_threads.delete_if { |t| !t.alive? }
253
302
  end
254
303
 
255
304
  return if shutting_down?
@@ -260,7 +309,7 @@ module RocketJob
260
309
  logger.info "Starting #{thread_count} threads"
261
310
  thread_count.times.each do
262
311
  # Start worker thread
263
- thread_pool << Thread.new(next_worker_id) do |id|
312
+ worker_threads << Thread.new(next_worker_id) do |id|
264
313
  begin
265
314
  sleep (Config.instance.max_poll_seconds.to_f / max_threads) * (id - 1) if stagger_threads
266
315
  worker(id)
@@ -274,14 +323,15 @@ module RocketJob
274
323
 
275
324
  # Keep processing jobs until worker stops running
276
325
  def worker(worker_id)
277
- Thread.current.name = "rocketjob #{worker_id}"
326
+ Thread.current.name = 'rocketjob %03i' % worker_id
278
327
  logger.info 'Started'
279
328
  while !shutting_down?
280
- if process_next_job
281
- # Keeps workers staggered across the poll interval so that not
282
- # all workers poll at the same time
329
+ if process_available_jobs
330
+ # Keeps workers staggered across the poll interval so that
331
+ # all workers don't poll at the same time
283
332
  sleep rand(RocketJob::Config.instance.max_poll_seconds * 1000) / 1000
284
333
  else
334
+ break if shutting_down?
285
335
  sleep RocketJob::Config.instance.max_poll_seconds
286
336
  end
287
337
  end
@@ -292,35 +342,22 @@ module RocketJob
292
342
 
293
343
  # Process the next available job
294
344
  # Returns [Boolean] whether any job was actually processed
295
- def process_next_job
345
+ def process_available_jobs
296
346
  skip_job_ids = []
297
- while job = Job.next_job(name, skip_job_ids)
298
- logger.tagged("Job #{job.id}") do
299
- if job.work(self)
300
- return true if shutting_down?
347
+ processed = false
348
+ while (job = Job.rocket_job_next_job(name, skip_job_ids)) && !shutting_down?
349
+ logger.fast_tag("Job #{job.id}") do
350
+ if job.rocket_job_work(self)
301
351
  # Need to skip the specified job due to throttling or no work available
302
352
  skip_job_ids << job.id
303
353
  else
304
- return true
354
+ processed = true
305
355
  end
306
356
  end
307
357
  end
308
- false
358
+ processed
309
359
  end
310
360
 
311
- # Requeue any jobs assigned to this worker
312
- def requeue_jobs
313
- stop! if running? || paused?
314
- RocketJob::Job.requeue_dead_worker(name)
315
- end
316
-
317
- # Shutdown indicator
318
- def self.shutdown
319
- @@shutdown
320
- end
321
-
322
- @@shutdown = false
323
-
324
361
  # Register handlers for the various signals
325
362
  # Term:
326
363
  # Perform clean shutdown
@@ -328,32 +365,26 @@ module RocketJob
328
365
  def self.register_signal_handlers
329
366
  begin
330
367
  Signal.trap 'SIGTERM' do
331
- # Cannot use Mutex protected writer here since it is in a signal handler
332
- @@shutdown = true
333
- logger.warn 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
368
+ shutdown!
369
+ message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
370
+ # Logging uses a mutex to access Queue on MRI/CRuby
371
+ defined?(JRuby) ? logger.warn(message) : puts(message)
334
372
  end
335
373
 
336
374
  Signal.trap 'INT' do
337
- # Cannot use Mutex protected writer here since it is in a signal handler
338
- @@shutdown = true
339
- logger.warn 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
375
+ shutdown!
376
+ message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
377
+ # Logging uses a mutex to access Queue on MRI/CRuby
378
+ defined?(JRuby) ? logger.warn(message) : puts(message)
340
379
  end
341
380
  rescue StandardError
342
381
  logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
343
382
  end
344
383
  end
345
384
 
346
- # Patch the way MongoMapper reloads a model
347
- def reload
348
- if doc = collection.find_one(:_id => id)
349
- # Clear out keys that are not returned during the reload from MongoDB
350
- (keys.keys - doc.keys).each { |key| send("#{key}=", nil) }
351
- initialize_default_values
352
- load_from_database(doc)
353
- self
354
- else
355
- raise MongoMapper::DocumentNotFound, "Document match #{_id.inspect} does not exist in #{collection.name} collection"
356
- end
385
+ # Requeue any jobs assigned to this worker when it is destroyed
386
+ def requeue_jobs
387
+ RocketJob::Job.requeue_dead_worker(name)
357
388
  end
358
389
 
359
390
  end
data/lib/rocketjob.rb CHANGED
@@ -1,39 +1,61 @@
1
1
  # encoding: UTF-8
2
- require 'mongo'
2
+ require 'semantic_logger'
3
+ require 'rocket_job/extensions/mongo'
3
4
  require 'mongo_ha'
4
5
  require 'mongo_mapper'
5
- require 'semantic_logger'
6
6
  require 'rocket_job/version'
7
7
 
8
8
  # @formatter:off
9
9
  module RocketJob
10
- autoload :CLI, 'rocket_job/cli'
11
- autoload :Config, 'rocket_job/config'
12
- autoload :DirmonEntry, 'rocket_job/dirmon_entry'
13
- autoload :Heartbeat, 'rocket_job/heartbeat'
14
- autoload :Job, 'rocket_job/job'
15
- autoload :JobException, 'rocket_job/job_exception'
16
- autoload :Worker, 'rocket_job/worker'
17
- module Concerns
18
- autoload :Worker, 'rocket_job/concerns/worker'
19
- autoload :Singleton, 'rocket_job/concerns/singleton'
10
+ autoload :CLI, 'rocket_job/cli'
11
+ autoload :Config, 'rocket_job/config'
12
+ autoload :DirmonEntry, 'rocket_job/dirmon_entry'
13
+ autoload :Heartbeat, 'rocket_job/heartbeat'
14
+ autoload :Job, 'rocket_job/job'
15
+ autoload :JobException, 'rocket_job/job_exception'
16
+ autoload :Worker, 'rocket_job/worker'
17
+
18
+ module Plugins
19
+ module Job
20
+ autoload :Callbacks, 'rocket_job/plugins/job/callbacks'
21
+ autoload :Defaults, 'rocket_job/plugins/job/defaults'
22
+ autoload :StateMachine, 'rocket_job/plugins/job/state_machine'
23
+ autoload :Logger, 'rocket_job/plugins/job/logger'
24
+ autoload :Model, 'rocket_job/plugins/job/model'
25
+ autoload :Persistence, 'rocket_job/plugins/job/persistence'
26
+ autoload :Worker, 'rocket_job/plugins/job/worker'
27
+ end
28
+ autoload :Document, 'rocket_job/plugins/document'
29
+ autoload :Restart, 'rocket_job/plugins/restart'
30
+ autoload :Singleton, 'rocket_job/plugins/singleton'
31
+ autoload :StateMachine, 'rocket_job/plugins/state_machine'
20
32
  end
33
+
21
34
  module Jobs
22
- autoload :DirmonJob, 'rocket_job/jobs/dirmon_job'
35
+ autoload :DirmonJob, 'rocket_job/jobs/dirmon_job'
36
+ autoload :SimpleJob, 'rocket_job/jobs/simple_job'
23
37
  end
24
38
 
25
39
  # @formatter:on
26
40
  # Returns a human readable duration from the supplied [Float] number of seconds
27
41
  def self.seconds_as_duration(seconds)
28
- time = Time.at(seconds)
29
- if seconds >= 1.day
30
- "#{(seconds / 1.day).to_i}d #{time.strftime('%-Hh %-Mm %-Ss')}"
31
- elsif seconds >= 1.hour
32
- time.strftime('%-Hh %-Mm %-Ss')
33
- elsif seconds >= 1.minute
34
- time.strftime('%-Mm %-Ss')
42
+ return nil unless seconds
43
+ if seconds >= 86400.0 # 1 day
44
+ "#{(seconds / 86400).to_i}d #{Time.at(seconds).strftime('%-Hh %-Mm')}"
45
+ elsif seconds >= 3600.0 # 1 hour
46
+ Time.at(seconds).strftime('%-Hh %-Mm')
47
+ elsif seconds >= 60.0 # 1 minute
48
+ Time.at(seconds).strftime('%-Mm %-Ss')
49
+ elsif seconds >= 1.0 # 1 second
50
+ "#{'%.3f' % seconds}s"
35
51
  else
36
- time.strftime('%-Ss')
52
+ duration = seconds * 1000
53
+ if defined? JRuby
54
+ "#{duration.to_i}ms"
55
+ else
56
+ duration < 10.0 ? "#{'%.3f' % duration}ms" : "#{'%.1f' % duration}ms"
57
+ end
37
58
  end
38
59
  end
60
+
39
61
  end
@@ -0,0 +1,12 @@
1
+ require_relative 'test_helper'
2
+ class ConfigTest < Minitest::Test
3
+ describe RocketJob::Config do
4
+
5
+ describe '.config' do
6
+ it 'support multiple databases' do
7
+ assert_equal 'test_rocketjob', RocketJob::Job.collection.db.name
8
+ end
9
+ end
10
+
11
+ end
12
+ end