rocketjob 2.1.3 → 3.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -0
  3. data/lib/rocket_job/active_server.rb +48 -0
  4. data/lib/rocket_job/cli.rb +29 -17
  5. data/lib/rocket_job/config.rb +19 -31
  6. data/lib/rocket_job/dirmon_entry.rb +15 -45
  7. data/lib/rocket_job/extensions/mongo/logging.rb +26 -0
  8. data/lib/rocket_job/extensions/rocket_job_adapter.rb +3 -5
  9. data/lib/rocket_job/heartbeat.rb +18 -23
  10. data/lib/rocket_job/job.rb +0 -1
  11. data/lib/rocket_job/job_exception.rb +11 -13
  12. data/lib/rocket_job/jobs/dirmon_job.rb +8 -8
  13. data/lib/rocket_job/jobs/housekeeping_job.rb +13 -15
  14. data/lib/rocket_job/performance.rb +5 -5
  15. data/lib/rocket_job/plugins/cron.rb +3 -10
  16. data/lib/rocket_job/plugins/document.rb +58 -33
  17. data/lib/rocket_job/plugins/job/model.rb +43 -71
  18. data/lib/rocket_job/plugins/job/persistence.rb +7 -63
  19. data/lib/rocket_job/plugins/job/worker.rb +24 -26
  20. data/lib/rocket_job/plugins/processing_window.rb +6 -9
  21. data/lib/rocket_job/plugins/retry.rb +3 -8
  22. data/lib/rocket_job/plugins/singleton.rb +1 -1
  23. data/lib/rocket_job/plugins/state_machine.rb +1 -7
  24. data/lib/rocket_job/server.rb +352 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +46 -336
  27. data/lib/rocketjob.rb +5 -4
  28. data/test/config/mongoid.yml +88 -0
  29. data/test/config_test.rb +1 -1
  30. data/test/dirmon_entry_test.rb +15 -79
  31. data/test/dirmon_job_test.rb +6 -6
  32. data/test/job_test.rb +2 -2
  33. data/test/plugins/job/callbacks_test.rb +40 -32
  34. data/test/plugins/job/defaults_test.rb +10 -8
  35. data/test/plugins/job/model_test.rb +1 -3
  36. data/test/plugins/job/persistence_test.rb +11 -13
  37. data/test/plugins/job/worker_test.rb +45 -26
  38. data/test/plugins/processing_window_test.rb +4 -4
  39. data/test/plugins/restart_test.rb +11 -12
  40. data/test/plugins/state_machine_event_callbacks_test.rb +20 -18
  41. data/test/plugins/state_machine_test.rb +5 -5
  42. data/test/test_helper.rb +4 -1
  43. metadata +15 -29
  44. data/lib/rocket_job/extensions/mongo.rb +0 -23
  45. data/lib/rocket_job/extensions/mongo_mapper.rb +0 -30
  46. data/lib/rocket_job/plugins/job/defaults.rb +0 -40
  47. data/test/config/mongo.yml +0 -46
@@ -1,4 +1,4 @@
1
1
  # encoding: UTF-8
2
2
  module RocketJob #:nodoc
3
- VERSION = '2.1.3'
3
+ VERSION = '3.0.0.alpha'
4
4
  end
@@ -1,354 +1,93 @@
1
1
  # encoding: UTF-8
2
2
  require 'concurrent'
3
+ require 'forwardable'
3
4
  module RocketJob
4
5
  # Worker
5
6
  #
6
- # On startup a worker instance will automatically register itself
7
- # if not already present
8
- #
9
- # Starting a worker in the foreground:
10
- # - Using a Rails runner:
11
- # bin/rocketjob
12
- #
13
- # Starting a worker in the background:
14
- # - Using a Rails runner:
15
- # nohup bin/rocketjob --quiet 2>&1 1>output.log &
16
- #
17
- # Stopping a worker:
18
- # - Stop the worker via the Web UI
19
- # - Send a regular kill signal to make it shutdown once all active work is complete
20
- # kill <pid>
21
- # - Or, use the following Ruby code:
22
- # worker = RocketJob::Worker.where(name: 'worker name').first
23
- # worker.stop!
24
- #
25
- # Sending the kill signal locally will result in starting the shutdown process
26
- # immediately. Via the UI or Ruby code the worker can take up to 15 seconds
27
- # (the heartbeat interval) to start shutting down.
7
+ # A worker runs on a single operating system thread
8
+ # Is usually started under a RocketJob server process.
28
9
  class Worker
29
- include Plugins::Document
30
- include Plugins::StateMachine
31
10
  include SemanticLogger::Loggable
11
+ include ActiveSupport::Callbacks
12
+ extend Forwardable
32
13
 
33
- # @formatter:off
34
- # Unique Name of this worker instance
35
- # Default: `host name:PID`
36
- # The unique name is used on re-start to re-queue any jobs that were being processed
37
- # at the time the worker or host unexpectedly terminated, if any
38
- key :name, String, default: -> { "#{SemanticLogger.host}:#{$$}" }
39
-
40
- # The maximum number of threads that this worker should use
41
- # If set, it will override the default value in RocketJob::Config
42
- key :max_threads, Integer, default: -> { Config.instance.max_worker_threads }
43
-
44
- # When this worker process was started
45
- key :started_at, Time
46
-
47
- # The heartbeat information for this worker
48
- has_one :heartbeat, class_name: 'RocketJob::Heartbeat'
49
-
50
- # Current state
51
- # Internal use only. Do not set this field directly
52
- key :state, Symbol, default: :starting
53
-
54
- validates_presence_of :state, :name, :max_threads
55
-
56
- # States
57
- # :starting -> :running -> :paused
58
- # -> :stopping
59
- aasm column: :state do
60
- state :starting, initial: true
61
- state :running
62
- state :paused
63
- state :stopping
64
-
65
- event :started do
66
- transitions from: :starting, to: :running
67
- before do
68
- self.started_at = Time.now
69
- end
70
- end
71
-
72
- event :pause do
73
- transitions from: :running, to: :paused
74
- end
75
-
76
- event :resume do
77
- transitions from: :paused, to: :running
78
- end
79
-
80
- event :stop do
81
- transitions from: :running, to: :stopping
82
- transitions from: :paused, to: :stopping
83
- transitions from: :starting, to: :stopping
84
- end
85
- end
86
- # @formatter:on
87
-
88
- # Requeue any jobs being worked by this worker when it is destroyed
89
- before_destroy :requeue_jobs
90
-
91
- # Run the worker process
92
- # Attributes supplied are passed to #new
93
- def self.run(attrs={})
94
- Thread.current.name = 'rocketjob main'
95
- create_indexes
96
- register_signal_handlers
97
- if defined?(RocketJobPro) && (RocketJob::Job.database.name != RocketJob::Jobs::PerformanceJob.database.name)
98
- raise 'The RocketJob configuration is being applied after the system has been initialized'
99
- end
100
-
101
- worker = create!(attrs)
102
- if worker.max_threads == 0
103
- # Does not start any additional threads and runs the worker in the current thread.
104
- # No heartbeats are performed. So this worker will appear as a zombie in RJMC.
105
- # Designed for profiling purposes where a single thread is much simpler to profile.
106
- worker.started!
107
- worker.send(:worker, 0)
108
- else
109
- worker.send(:run)
110
- end
111
-
112
- ensure
113
- worker.destroy if worker
114
- end
14
+ def_delegator :@thread, :alive?
15
+ def_delegator :@thread, :backtrace
16
+ def_delegator :@thread, :join
115
17
 
116
- # Create indexes
117
- def self.create_indexes
118
- ensure_index [[:name, 1]], background: true, unique: true
119
- # Also create indexes for the jobs collection
120
- Job.create_indexes
121
- end
18
+ define_callbacks :running
122
19
 
123
- # Destroy's all instances of zombie workers and requeues any jobs still "running"
124
- # on those workers
125
- def self.destroy_zombies
126
- count = 0
127
- each do |worker|
128
- next unless worker.zombie?
129
- logger.warn "Destroying zombie worker #{worker.name}, and requeueing its jobs"
130
- worker.destroy
131
- count += 1
132
- end
133
- count
134
- end
20
+ attr_accessor :id, :worker_name, :inline
21
+ attr_reader :thread, :name
135
22
 
136
- # Stop all running, paused, or starting workers
137
- def self.stop_all
138
- where(state: [:running, :paused, :starting]).each(&:stop!)
23
+ def self.before_running(*filters, &blk)
24
+ set_callback(:running, :before, *filters, &blk)
139
25
  end
140
26
 
141
- # Pause all running workers
142
- def self.pause_all
143
- running.each(&:pause!)
27
+ def self.after_running(*filters, &blk)
28
+ set_callback(:running, :after, *filters, &blk)
144
29
  end
145
30
 
146
- # Resume all paused workers
147
- def self.resume_all
148
- paused.each(&:resume!)
31
+ def self.around_running(*filters, &blk)
32
+ set_callback(:running, :around, *filters, &blk)
149
33
  end
150
34
 
151
- # Returns [Hash<String:Integer>] of the number of workers in each state.
152
- # Note: If there are no workers in that particular state then the hash will not have a value for it.
153
- #
154
- # Example workers in every state:
155
- # RocketJob::Worker.counts_by_state
156
- # # => {
157
- # :aborted => 1,
158
- # :completed => 37,
159
- # :failed => 1,
160
- # :paused => 3,
161
- # :queued => 4,
162
- # :running => 1,
163
- # :queued_now => 1,
164
- # :scheduled => 3
165
- # }
166
- #
167
- # Example no workers active:
168
- # RocketJob::Worker.counts_by_state
169
- # # => {}
170
- def self.counts_by_state
171
- counts = {}
172
- collection.aggregate([
173
- {
174
- '$group' => {
175
- _id: '$state',
176
- count: {'$sum' => 1}
177
- }
178
- }
179
- ]
180
- ).each do |result|
181
- counts[result['_id'].to_sym] = result['count']
35
+ def initialize(id: 0, server_name: 'inline', inline: false)
36
+ @id = id
37
+ @server_name = server_name
38
+ if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
39
+ @shutdown = Concurrent::AtomicBoolean.new(false)
40
+ else
41
+ @shutdown = false
182
42
  end
183
- counts
184
- end
185
-
186
- # Returns [Boolean] whether the worker is shutting down
187
- def shutting_down?
188
- self.class.shutdown? || !running?
189
- end
190
-
191
- # Returns [true|false] if this worker has missed at least the last 4 heartbeats
192
- #
193
- # Possible causes for a worker to miss its heartbeats:
194
- # - The worker process has died
195
- # - The worker process is "hanging"
196
- # - The worker is no longer able to communicate with the MongoDB Server
197
- def zombie?(missed = 4)
198
- return false unless running? || stopping?
199
- return true if heartbeat.nil? || heartbeat.updated_at.nil?
200
- dead_seconds = Config.instance.heartbeat_seconds * missed
201
- (Time.now - heartbeat.updated_at) >= dead_seconds
43
+ @name = "#{server_name}:#{id}"
44
+ @thread = Thread.new { run } unless inline
202
45
  end
203
46
 
204
- # On MRI the 'concurrent-ruby-ext' gem may not be loaded
205
47
  if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
206
- # Returns [true|false] whether the shutdown indicator has been set for this worker process
207
- def self.shutdown?
208
- @@shutdown.value
48
+ # Tells this worker to shutdown as soon the current job/slice is complete
49
+ def shutdown!
50
+ @shutdown.make_true
209
51
  end
210
52
 
211
- # Set shutdown indicator for this worker process
212
- def self.shutdown!
213
- @@shutdown.make_true
53
+ def shutdown?
54
+ @shutdown.value
214
55
  end
215
-
216
- @@shutdown = Concurrent::AtomicBoolean.new(false)
217
56
  else
218
- # Returns [true|false] whether the shutdown indicator has been set for this worker process
219
- def self.shutdown?
220
- @@shutdown
57
+ def shutdown!
58
+ @shutdown = true
221
59
  end
222
60
 
223
- # Set shutdown indicator for this worker process
224
- def self.shutdown!
225
- @@shutdown = true
61
+ def shutdown?
62
+ @shutdown
226
63
  end
227
-
228
- @@shutdown = false
229
64
  end
230
65
 
231
66
  private
232
67
 
233
- attr_reader :worker_threads
234
-
235
- # Returns [Array<Thread>] collection of created worker threads
236
- def worker_threads
237
- @worker_threads ||= []
238
- end
239
-
240
- # Management Thread
68
+ # Process jobs until it shuts down
69
+ #
70
+ # Params
71
+ # worker_id [Integer]
72
+ # The number of this worker for logging purposes
241
73
  def run
242
- logger.info "Using MongoDB Database: #{RocketJob::Job.database.name}"
243
- build_heartbeat(updated_at: Time.now, current_threads: 0)
244
- started!
245
- adjust_worker_threads(true)
246
- logger.info "RocketJob Worker started with #{max_threads} workers running"
247
-
248
- count = 0
249
- while running? || paused?
250
- sleep Config.instance.heartbeat_seconds
251
-
252
- update_attributes_and_reload(
253
- 'heartbeat.updated_at' => Time.now,
254
- 'heartbeat.current_threads' => worker_count
255
- )
256
-
257
- # In case number of threads has been modified
258
- adjust_worker_threads
259
-
260
- # Stop worker if shutdown indicator was set
261
- stop! if self.class.shutdown? && may_stop?
262
- end
263
-
264
- logger.info 'Waiting for worker threads to stop'
265
- while thread = worker_threads.first
266
- if thread.join(5)
267
- # Worker thread is dead
268
- worker_threads.shift
269
- else
270
- # Timeout waiting for thread to stop
271
- begin
272
- update_attributes_and_reload(
273
- 'heartbeat.updated_at' => Time.now,
274
- 'heartbeat.current_threads' => worker_count
275
- )
276
- rescue MongoMapper::DocumentNotFound
277
- logger.warn('Worker has been destroyed. Going down hard!')
278
- break
279
- end
280
- end
281
- end
282
- logger.info 'Shutdown'
283
- rescue Exception => exc
284
- logger.error('RocketJob::Worker is stopping due to an exception', exc)
285
- end
286
-
287
- # Returns [Fixnum] number of workers (threads) that are alive
288
- def worker_count
289
- worker_threads.count { |i| i.alive? }
290
- end
291
-
292
- def next_worker_id
293
- @worker_id ||= 0
294
- @worker_id += 1
295
- end
296
-
297
- # Re-adjust the number of running threads to get it up to the
298
- # required number of threads
299
- # Parameters
300
- # stagger_threads
301
- # Whether to stagger when the threads poll for work the first time
302
- # It spreads out the queue polling over the max_poll_seconds so
303
- # that not all workers poll at the same time
304
- # The worker also respond faster than max_poll_seconds when a new
305
- # job is added.
306
- def adjust_worker_threads(stagger_threads=false)
307
- count = worker_count
308
- # Cleanup threads that have stopped
309
- if count != worker_threads.count
310
- logger.info "Cleaning up #{worker_threads.count - count} threads that went away"
311
- worker_threads.delete_if { |t| !t.alive? }
312
- end
313
-
314
- return if shutting_down?
315
-
316
- # Need to add more threads?
317
- if count < max_threads
318
- thread_count = max_threads - count
319
- logger.info "Starting #{thread_count} threads"
320
- thread_count.times.each do
321
- # Start worker thread
322
- worker_threads << Thread.new(next_worker_id) do |id|
323
- begin
324
- sleep (Config.instance.max_poll_seconds.to_f / max_threads) * (id - 1) if stagger_threads
325
- worker(id)
326
- rescue Exception => exc
327
- logger.fatal('Cannot start worker thread', exc)
328
- end
329
- end
330
- end
331
- end
332
- end
333
-
334
- # Keep processing jobs until worker stops running
335
- def worker(worker_id)
336
- Thread.current.name = 'rocketjob %03i' % worker_id
74
+ Thread.current.name = 'rocketjob %03i' % id
337
75
  logger.info 'Started'
338
- while !shutting_down?
76
+ while !shutdown?
339
77
  if process_available_jobs
340
78
  # Keeps workers staggered across the poll interval so that
341
79
  # all workers don't poll at the same time
342
80
  sleep rand(RocketJob::Config.instance.max_poll_seconds * 1000) / 1000
343
81
  else
344
- break if shutting_down?
82
+ break if shutdown?
345
83
  sleep RocketJob::Config.instance.max_poll_seconds
346
84
  end
347
85
  end
348
- logger.info "Stopping. Worker state: #{state.inspect}"
86
+ logger.info 'Stopping'
349
87
  rescue Exception => exc
350
88
  logger.fatal('Unhandled exception in job processing thread', exc)
351
89
  ensure
90
+ # TODO: Move to after_running callback
352
91
  ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
353
92
  end
354
93
 
@@ -357,7 +96,7 @@ module RocketJob
357
96
  def process_available_jobs
358
97
  skip_job_ids = []
359
98
  processed = false
360
- while (job = Job.rocket_job_next_job(name, skip_job_ids)) && !shutting_down?
99
+ while (job = Job.rocket_job_next_job(worker_name, skip_job_ids)) && !shutdown?
361
100
  logger.fast_tag("job:#{job.id}") do
362
101
  if job.rocket_job_work(self)
363
102
  # Need to skip the specified job due to throttling or no work available
@@ -370,35 +109,6 @@ module RocketJob
370
109
  processed
371
110
  end
372
111
 
373
- # Register handlers for the various signals
374
- # Term:
375
- # Perform clean shutdown
376
- #
377
- def self.register_signal_handlers
378
- begin
379
- Signal.trap 'SIGTERM' do
380
- shutdown!
381
- message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
382
- # Logging uses a mutex to access Queue on MRI/CRuby
383
- defined?(JRuby) ? logger.warn(message) : puts(message)
384
- end
385
-
386
- Signal.trap 'INT' do
387
- shutdown!
388
- message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
389
- # Logging uses a mutex to access Queue on MRI/CRuby
390
- defined?(JRuby) ? logger.warn(message) : puts(message)
391
- end
392
- rescue StandardError
393
- logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
394
- end
395
- end
396
-
397
- # Requeue any jobs assigned to this worker when it is destroyed
398
- def requeue_jobs
399
- RocketJob::Job.requeue_dead_worker(name)
400
- end
401
-
402
112
  end
403
113
  end
404
114
 
data/lib/rocketjob.rb CHANGED
@@ -1,13 +1,12 @@
1
1
  # encoding: UTF-8
2
2
  require 'semantic_logger'
3
- require 'rocket_job/extensions/mongo'
4
- require 'mongo_ha'
5
- require 'mongo_mapper'
6
- require 'rocket_job/extensions/mongo_mapper'
3
+ require 'mongoid'
4
+ require 'rocket_job/extensions/mongo/logging'
7
5
  require 'rocket_job/version'
8
6
 
9
7
  # @formatter:off
10
8
  module RocketJob
9
+ autoload :ActiveServer, 'rocket_job/active_server'
11
10
  autoload :CLI, 'rocket_job/cli'
12
11
  autoload :Config, 'rocket_job/config'
13
12
  autoload :DirmonEntry, 'rocket_job/dirmon_entry'
@@ -15,6 +14,8 @@ module RocketJob
15
14
  autoload :Job, 'rocket_job/job'
16
15
  autoload :JobException, 'rocket_job/job_exception'
17
16
  autoload :Worker, 'rocket_job/worker'
17
+ autoload :Performance, 'rocket_job/performance'
18
+ autoload :Server, 'rocket_job/server'
18
19
 
19
20
  module Plugins
20
21
  module Job