rocketjob 2.1.3 → 3.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +36 -0
- data/lib/rocket_job/active_server.rb +48 -0
- data/lib/rocket_job/cli.rb +29 -17
- data/lib/rocket_job/config.rb +19 -31
- data/lib/rocket_job/dirmon_entry.rb +15 -45
- data/lib/rocket_job/extensions/mongo/logging.rb +26 -0
- data/lib/rocket_job/extensions/rocket_job_adapter.rb +3 -5
- data/lib/rocket_job/heartbeat.rb +18 -23
- data/lib/rocket_job/job.rb +0 -1
- data/lib/rocket_job/job_exception.rb +11 -13
- data/lib/rocket_job/jobs/dirmon_job.rb +8 -8
- data/lib/rocket_job/jobs/housekeeping_job.rb +13 -15
- data/lib/rocket_job/performance.rb +5 -5
- data/lib/rocket_job/plugins/cron.rb +3 -10
- data/lib/rocket_job/plugins/document.rb +58 -33
- data/lib/rocket_job/plugins/job/model.rb +43 -71
- data/lib/rocket_job/plugins/job/persistence.rb +7 -63
- data/lib/rocket_job/plugins/job/worker.rb +24 -26
- data/lib/rocket_job/plugins/processing_window.rb +6 -9
- data/lib/rocket_job/plugins/retry.rb +3 -8
- data/lib/rocket_job/plugins/singleton.rb +1 -1
- data/lib/rocket_job/plugins/state_machine.rb +1 -7
- data/lib/rocket_job/server.rb +352 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +46 -336
- data/lib/rocketjob.rb +5 -4
- data/test/config/mongoid.yml +88 -0
- data/test/config_test.rb +1 -1
- data/test/dirmon_entry_test.rb +15 -79
- data/test/dirmon_job_test.rb +6 -6
- data/test/job_test.rb +2 -2
- data/test/plugins/job/callbacks_test.rb +40 -32
- data/test/plugins/job/defaults_test.rb +10 -8
- data/test/plugins/job/model_test.rb +1 -3
- data/test/plugins/job/persistence_test.rb +11 -13
- data/test/plugins/job/worker_test.rb +45 -26
- data/test/plugins/processing_window_test.rb +4 -4
- data/test/plugins/restart_test.rb +11 -12
- data/test/plugins/state_machine_event_callbacks_test.rb +20 -18
- data/test/plugins/state_machine_test.rb +5 -5
- data/test/test_helper.rb +4 -1
- metadata +15 -29
- data/lib/rocket_job/extensions/mongo.rb +0 -23
- data/lib/rocket_job/extensions/mongo_mapper.rb +0 -30
- data/lib/rocket_job/plugins/job/defaults.rb +0 -40
- data/test/config/mongo.yml +0 -46
data/lib/rocket_job/version.rb
CHANGED
data/lib/rocket_job/worker.rb
CHANGED
@@ -1,354 +1,93 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'concurrent'
|
3
|
+
require 'forwardable'
|
3
4
|
module RocketJob
|
4
5
|
# Worker
|
5
6
|
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
# Starting a worker in the foreground:
|
10
|
-
# - Using a Rails runner:
|
11
|
-
# bin/rocketjob
|
12
|
-
#
|
13
|
-
# Starting a worker in the background:
|
14
|
-
# - Using a Rails runner:
|
15
|
-
# nohup bin/rocketjob --quiet 2>&1 1>output.log &
|
16
|
-
#
|
17
|
-
# Stopping a worker:
|
18
|
-
# - Stop the worker via the Web UI
|
19
|
-
# - Send a regular kill signal to make it shutdown once all active work is complete
|
20
|
-
# kill <pid>
|
21
|
-
# - Or, use the following Ruby code:
|
22
|
-
# worker = RocketJob::Worker.where(name: 'worker name').first
|
23
|
-
# worker.stop!
|
24
|
-
#
|
25
|
-
# Sending the kill signal locally will result in starting the shutdown process
|
26
|
-
# immediately. Via the UI or Ruby code the worker can take up to 15 seconds
|
27
|
-
# (the heartbeat interval) to start shutting down.
|
7
|
+
# A worker runs on a single operating system thread
|
8
|
+
# Is usually started under a RocketJob server process.
|
28
9
|
class Worker
|
29
|
-
include Plugins::Document
|
30
|
-
include Plugins::StateMachine
|
31
10
|
include SemanticLogger::Loggable
|
11
|
+
include ActiveSupport::Callbacks
|
12
|
+
extend Forwardable
|
32
13
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
# The unique name is used on re-start to re-queue any jobs that were being processed
|
37
|
-
# at the time the worker or host unexpectedly terminated, if any
|
38
|
-
key :name, String, default: -> { "#{SemanticLogger.host}:#{$$}" }
|
39
|
-
|
40
|
-
# The maximum number of threads that this worker should use
|
41
|
-
# If set, it will override the default value in RocketJob::Config
|
42
|
-
key :max_threads, Integer, default: -> { Config.instance.max_worker_threads }
|
43
|
-
|
44
|
-
# When this worker process was started
|
45
|
-
key :started_at, Time
|
46
|
-
|
47
|
-
# The heartbeat information for this worker
|
48
|
-
has_one :heartbeat, class_name: 'RocketJob::Heartbeat'
|
49
|
-
|
50
|
-
# Current state
|
51
|
-
# Internal use only. Do not set this field directly
|
52
|
-
key :state, Symbol, default: :starting
|
53
|
-
|
54
|
-
validates_presence_of :state, :name, :max_threads
|
55
|
-
|
56
|
-
# States
|
57
|
-
# :starting -> :running -> :paused
|
58
|
-
# -> :stopping
|
59
|
-
aasm column: :state do
|
60
|
-
state :starting, initial: true
|
61
|
-
state :running
|
62
|
-
state :paused
|
63
|
-
state :stopping
|
64
|
-
|
65
|
-
event :started do
|
66
|
-
transitions from: :starting, to: :running
|
67
|
-
before do
|
68
|
-
self.started_at = Time.now
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
event :pause do
|
73
|
-
transitions from: :running, to: :paused
|
74
|
-
end
|
75
|
-
|
76
|
-
event :resume do
|
77
|
-
transitions from: :paused, to: :running
|
78
|
-
end
|
79
|
-
|
80
|
-
event :stop do
|
81
|
-
transitions from: :running, to: :stopping
|
82
|
-
transitions from: :paused, to: :stopping
|
83
|
-
transitions from: :starting, to: :stopping
|
84
|
-
end
|
85
|
-
end
|
86
|
-
# @formatter:on
|
87
|
-
|
88
|
-
# Requeue any jobs being worked by this worker when it is destroyed
|
89
|
-
before_destroy :requeue_jobs
|
90
|
-
|
91
|
-
# Run the worker process
|
92
|
-
# Attributes supplied are passed to #new
|
93
|
-
def self.run(attrs={})
|
94
|
-
Thread.current.name = 'rocketjob main'
|
95
|
-
create_indexes
|
96
|
-
register_signal_handlers
|
97
|
-
if defined?(RocketJobPro) && (RocketJob::Job.database.name != RocketJob::Jobs::PerformanceJob.database.name)
|
98
|
-
raise 'The RocketJob configuration is being applied after the system has been initialized'
|
99
|
-
end
|
100
|
-
|
101
|
-
worker = create!(attrs)
|
102
|
-
if worker.max_threads == 0
|
103
|
-
# Does not start any additional threads and runs the worker in the current thread.
|
104
|
-
# No heartbeats are performed. So this worker will appear as a zombie in RJMC.
|
105
|
-
# Designed for profiling purposes where a single thread is much simpler to profile.
|
106
|
-
worker.started!
|
107
|
-
worker.send(:worker, 0)
|
108
|
-
else
|
109
|
-
worker.send(:run)
|
110
|
-
end
|
111
|
-
|
112
|
-
ensure
|
113
|
-
worker.destroy if worker
|
114
|
-
end
|
14
|
+
def_delegator :@thread, :alive?
|
15
|
+
def_delegator :@thread, :backtrace
|
16
|
+
def_delegator :@thread, :join
|
115
17
|
|
116
|
-
|
117
|
-
def self.create_indexes
|
118
|
-
ensure_index [[:name, 1]], background: true, unique: true
|
119
|
-
# Also create indexes for the jobs collection
|
120
|
-
Job.create_indexes
|
121
|
-
end
|
18
|
+
define_callbacks :running
|
122
19
|
|
123
|
-
|
124
|
-
|
125
|
-
def self.destroy_zombies
|
126
|
-
count = 0
|
127
|
-
each do |worker|
|
128
|
-
next unless worker.zombie?
|
129
|
-
logger.warn "Destroying zombie worker #{worker.name}, and requeueing its jobs"
|
130
|
-
worker.destroy
|
131
|
-
count += 1
|
132
|
-
end
|
133
|
-
count
|
134
|
-
end
|
20
|
+
attr_accessor :id, :worker_name, :inline
|
21
|
+
attr_reader :thread, :name
|
135
22
|
|
136
|
-
|
137
|
-
|
138
|
-
where(state: [:running, :paused, :starting]).each(&:stop!)
|
23
|
+
def self.before_running(*filters, &blk)
|
24
|
+
set_callback(:running, :before, *filters, &blk)
|
139
25
|
end
|
140
26
|
|
141
|
-
|
142
|
-
|
143
|
-
running.each(&:pause!)
|
27
|
+
def self.after_running(*filters, &blk)
|
28
|
+
set_callback(:running, :after, *filters, &blk)
|
144
29
|
end
|
145
30
|
|
146
|
-
|
147
|
-
|
148
|
-
paused.each(&:resume!)
|
31
|
+
def self.around_running(*filters, &blk)
|
32
|
+
set_callback(:running, :around, *filters, &blk)
|
149
33
|
end
|
150
34
|
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
# :completed => 37,
|
159
|
-
# :failed => 1,
|
160
|
-
# :paused => 3,
|
161
|
-
# :queued => 4,
|
162
|
-
# :running => 1,
|
163
|
-
# :queued_now => 1,
|
164
|
-
# :scheduled => 3
|
165
|
-
# }
|
166
|
-
#
|
167
|
-
# Example no workers active:
|
168
|
-
# RocketJob::Worker.counts_by_state
|
169
|
-
# # => {}
|
170
|
-
def self.counts_by_state
|
171
|
-
counts = {}
|
172
|
-
collection.aggregate([
|
173
|
-
{
|
174
|
-
'$group' => {
|
175
|
-
_id: '$state',
|
176
|
-
count: {'$sum' => 1}
|
177
|
-
}
|
178
|
-
}
|
179
|
-
]
|
180
|
-
).each do |result|
|
181
|
-
counts[result['_id'].to_sym] = result['count']
|
35
|
+
def initialize(id: 0, server_name: 'inline', inline: false)
|
36
|
+
@id = id
|
37
|
+
@server_name = server_name
|
38
|
+
if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
|
39
|
+
@shutdown = Concurrent::AtomicBoolean.new(false)
|
40
|
+
else
|
41
|
+
@shutdown = false
|
182
42
|
end
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
# Returns [Boolean] whether the worker is shutting down
|
187
|
-
def shutting_down?
|
188
|
-
self.class.shutdown? || !running?
|
189
|
-
end
|
190
|
-
|
191
|
-
# Returns [true|false] if this worker has missed at least the last 4 heartbeats
|
192
|
-
#
|
193
|
-
# Possible causes for a worker to miss its heartbeats:
|
194
|
-
# - The worker process has died
|
195
|
-
# - The worker process is "hanging"
|
196
|
-
# - The worker is no longer able to communicate with the MongoDB Server
|
197
|
-
def zombie?(missed = 4)
|
198
|
-
return false unless running? || stopping?
|
199
|
-
return true if heartbeat.nil? || heartbeat.updated_at.nil?
|
200
|
-
dead_seconds = Config.instance.heartbeat_seconds * missed
|
201
|
-
(Time.now - heartbeat.updated_at) >= dead_seconds
|
43
|
+
@name = "#{server_name}:#{id}"
|
44
|
+
@thread = Thread.new { run } unless inline
|
202
45
|
end
|
203
46
|
|
204
|
-
# On MRI the 'concurrent-ruby-ext' gem may not be loaded
|
205
47
|
if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
|
206
|
-
#
|
207
|
-
def
|
208
|
-
|
48
|
+
# Tells this worker to shutdown as soon the current job/slice is complete
|
49
|
+
def shutdown!
|
50
|
+
@shutdown.make_true
|
209
51
|
end
|
210
52
|
|
211
|
-
|
212
|
-
|
213
|
-
@@shutdown.make_true
|
53
|
+
def shutdown?
|
54
|
+
@shutdown.value
|
214
55
|
end
|
215
|
-
|
216
|
-
@@shutdown = Concurrent::AtomicBoolean.new(false)
|
217
56
|
else
|
218
|
-
|
219
|
-
|
220
|
-
@@shutdown
|
57
|
+
def shutdown!
|
58
|
+
@shutdown = true
|
221
59
|
end
|
222
60
|
|
223
|
-
|
224
|
-
|
225
|
-
@@shutdown = true
|
61
|
+
def shutdown?
|
62
|
+
@shutdown
|
226
63
|
end
|
227
|
-
|
228
|
-
@@shutdown = false
|
229
64
|
end
|
230
65
|
|
231
66
|
private
|
232
67
|
|
233
|
-
|
234
|
-
|
235
|
-
#
|
236
|
-
|
237
|
-
|
238
|
-
end
|
239
|
-
|
240
|
-
# Management Thread
|
68
|
+
# Process jobs until it shuts down
|
69
|
+
#
|
70
|
+
# Params
|
71
|
+
# worker_id [Integer]
|
72
|
+
# The number of this worker for logging purposes
|
241
73
|
def run
|
242
|
-
|
243
|
-
build_heartbeat(updated_at: Time.now, current_threads: 0)
|
244
|
-
started!
|
245
|
-
adjust_worker_threads(true)
|
246
|
-
logger.info "RocketJob Worker started with #{max_threads} workers running"
|
247
|
-
|
248
|
-
count = 0
|
249
|
-
while running? || paused?
|
250
|
-
sleep Config.instance.heartbeat_seconds
|
251
|
-
|
252
|
-
update_attributes_and_reload(
|
253
|
-
'heartbeat.updated_at' => Time.now,
|
254
|
-
'heartbeat.current_threads' => worker_count
|
255
|
-
)
|
256
|
-
|
257
|
-
# In case number of threads has been modified
|
258
|
-
adjust_worker_threads
|
259
|
-
|
260
|
-
# Stop worker if shutdown indicator was set
|
261
|
-
stop! if self.class.shutdown? && may_stop?
|
262
|
-
end
|
263
|
-
|
264
|
-
logger.info 'Waiting for worker threads to stop'
|
265
|
-
while thread = worker_threads.first
|
266
|
-
if thread.join(5)
|
267
|
-
# Worker thread is dead
|
268
|
-
worker_threads.shift
|
269
|
-
else
|
270
|
-
# Timeout waiting for thread to stop
|
271
|
-
begin
|
272
|
-
update_attributes_and_reload(
|
273
|
-
'heartbeat.updated_at' => Time.now,
|
274
|
-
'heartbeat.current_threads' => worker_count
|
275
|
-
)
|
276
|
-
rescue MongoMapper::DocumentNotFound
|
277
|
-
logger.warn('Worker has been destroyed. Going down hard!')
|
278
|
-
break
|
279
|
-
end
|
280
|
-
end
|
281
|
-
end
|
282
|
-
logger.info 'Shutdown'
|
283
|
-
rescue Exception => exc
|
284
|
-
logger.error('RocketJob::Worker is stopping due to an exception', exc)
|
285
|
-
end
|
286
|
-
|
287
|
-
# Returns [Fixnum] number of workers (threads) that are alive
|
288
|
-
def worker_count
|
289
|
-
worker_threads.count { |i| i.alive? }
|
290
|
-
end
|
291
|
-
|
292
|
-
def next_worker_id
|
293
|
-
@worker_id ||= 0
|
294
|
-
@worker_id += 1
|
295
|
-
end
|
296
|
-
|
297
|
-
# Re-adjust the number of running threads to get it up to the
|
298
|
-
# required number of threads
|
299
|
-
# Parameters
|
300
|
-
# stagger_threads
|
301
|
-
# Whether to stagger when the threads poll for work the first time
|
302
|
-
# It spreads out the queue polling over the max_poll_seconds so
|
303
|
-
# that not all workers poll at the same time
|
304
|
-
# The worker also respond faster than max_poll_seconds when a new
|
305
|
-
# job is added.
|
306
|
-
def adjust_worker_threads(stagger_threads=false)
|
307
|
-
count = worker_count
|
308
|
-
# Cleanup threads that have stopped
|
309
|
-
if count != worker_threads.count
|
310
|
-
logger.info "Cleaning up #{worker_threads.count - count} threads that went away"
|
311
|
-
worker_threads.delete_if { |t| !t.alive? }
|
312
|
-
end
|
313
|
-
|
314
|
-
return if shutting_down?
|
315
|
-
|
316
|
-
# Need to add more threads?
|
317
|
-
if count < max_threads
|
318
|
-
thread_count = max_threads - count
|
319
|
-
logger.info "Starting #{thread_count} threads"
|
320
|
-
thread_count.times.each do
|
321
|
-
# Start worker thread
|
322
|
-
worker_threads << Thread.new(next_worker_id) do |id|
|
323
|
-
begin
|
324
|
-
sleep (Config.instance.max_poll_seconds.to_f / max_threads) * (id - 1) if stagger_threads
|
325
|
-
worker(id)
|
326
|
-
rescue Exception => exc
|
327
|
-
logger.fatal('Cannot start worker thread', exc)
|
328
|
-
end
|
329
|
-
end
|
330
|
-
end
|
331
|
-
end
|
332
|
-
end
|
333
|
-
|
334
|
-
# Keep processing jobs until worker stops running
|
335
|
-
def worker(worker_id)
|
336
|
-
Thread.current.name = 'rocketjob %03i' % worker_id
|
74
|
+
Thread.current.name = 'rocketjob %03i' % id
|
337
75
|
logger.info 'Started'
|
338
|
-
while !
|
76
|
+
while !shutdown?
|
339
77
|
if process_available_jobs
|
340
78
|
# Keeps workers staggered across the poll interval so that
|
341
79
|
# all workers don't poll at the same time
|
342
80
|
sleep rand(RocketJob::Config.instance.max_poll_seconds * 1000) / 1000
|
343
81
|
else
|
344
|
-
break if
|
82
|
+
break if shutdown?
|
345
83
|
sleep RocketJob::Config.instance.max_poll_seconds
|
346
84
|
end
|
347
85
|
end
|
348
|
-
logger.info
|
86
|
+
logger.info 'Stopping'
|
349
87
|
rescue Exception => exc
|
350
88
|
logger.fatal('Unhandled exception in job processing thread', exc)
|
351
89
|
ensure
|
90
|
+
# TODO: Move to after_running callback
|
352
91
|
ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
|
353
92
|
end
|
354
93
|
|
@@ -357,7 +96,7 @@ module RocketJob
|
|
357
96
|
def process_available_jobs
|
358
97
|
skip_job_ids = []
|
359
98
|
processed = false
|
360
|
-
while (job = Job.rocket_job_next_job(
|
99
|
+
while (job = Job.rocket_job_next_job(worker_name, skip_job_ids)) && !shutdown?
|
361
100
|
logger.fast_tag("job:#{job.id}") do
|
362
101
|
if job.rocket_job_work(self)
|
363
102
|
# Need to skip the specified job due to throttling or no work available
|
@@ -370,35 +109,6 @@ module RocketJob
|
|
370
109
|
processed
|
371
110
|
end
|
372
111
|
|
373
|
-
# Register handlers for the various signals
|
374
|
-
# Term:
|
375
|
-
# Perform clean shutdown
|
376
|
-
#
|
377
|
-
def self.register_signal_handlers
|
378
|
-
begin
|
379
|
-
Signal.trap 'SIGTERM' do
|
380
|
-
shutdown!
|
381
|
-
message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
|
382
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
383
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
384
|
-
end
|
385
|
-
|
386
|
-
Signal.trap 'INT' do
|
387
|
-
shutdown!
|
388
|
-
message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
|
389
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
390
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
391
|
-
end
|
392
|
-
rescue StandardError
|
393
|
-
logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
|
394
|
-
end
|
395
|
-
end
|
396
|
-
|
397
|
-
# Requeue any jobs assigned to this worker when it is destroyed
|
398
|
-
def requeue_jobs
|
399
|
-
RocketJob::Job.requeue_dead_worker(name)
|
400
|
-
end
|
401
|
-
|
402
112
|
end
|
403
113
|
end
|
404
114
|
|
data/lib/rocketjob.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'semantic_logger'
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require 'mongo_mapper'
|
6
|
-
require 'rocket_job/extensions/mongo_mapper'
|
3
|
+
require 'mongoid'
|
4
|
+
require 'rocket_job/extensions/mongo/logging'
|
7
5
|
require 'rocket_job/version'
|
8
6
|
|
9
7
|
# @formatter:off
|
10
8
|
module RocketJob
|
9
|
+
autoload :ActiveServer, 'rocket_job/active_server'
|
11
10
|
autoload :CLI, 'rocket_job/cli'
|
12
11
|
autoload :Config, 'rocket_job/config'
|
13
12
|
autoload :DirmonEntry, 'rocket_job/dirmon_entry'
|
@@ -15,6 +14,8 @@ module RocketJob
|
|
15
14
|
autoload :Job, 'rocket_job/job'
|
16
15
|
autoload :JobException, 'rocket_job/job_exception'
|
17
16
|
autoload :Worker, 'rocket_job/worker'
|
17
|
+
autoload :Performance, 'rocket_job/performance'
|
18
|
+
autoload :Server, 'rocket_job/server'
|
18
19
|
|
19
20
|
module Plugins
|
20
21
|
module Job
|