rocketjob 2.1.3 → 3.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -0
  3. data/lib/rocket_job/active_server.rb +48 -0
  4. data/lib/rocket_job/cli.rb +29 -17
  5. data/lib/rocket_job/config.rb +19 -31
  6. data/lib/rocket_job/dirmon_entry.rb +15 -45
  7. data/lib/rocket_job/extensions/mongo/logging.rb +26 -0
  8. data/lib/rocket_job/extensions/rocket_job_adapter.rb +3 -5
  9. data/lib/rocket_job/heartbeat.rb +18 -23
  10. data/lib/rocket_job/job.rb +0 -1
  11. data/lib/rocket_job/job_exception.rb +11 -13
  12. data/lib/rocket_job/jobs/dirmon_job.rb +8 -8
  13. data/lib/rocket_job/jobs/housekeeping_job.rb +13 -15
  14. data/lib/rocket_job/performance.rb +5 -5
  15. data/lib/rocket_job/plugins/cron.rb +3 -10
  16. data/lib/rocket_job/plugins/document.rb +58 -33
  17. data/lib/rocket_job/plugins/job/model.rb +43 -71
  18. data/lib/rocket_job/plugins/job/persistence.rb +7 -63
  19. data/lib/rocket_job/plugins/job/worker.rb +24 -26
  20. data/lib/rocket_job/plugins/processing_window.rb +6 -9
  21. data/lib/rocket_job/plugins/retry.rb +3 -8
  22. data/lib/rocket_job/plugins/singleton.rb +1 -1
  23. data/lib/rocket_job/plugins/state_machine.rb +1 -7
  24. data/lib/rocket_job/server.rb +352 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +46 -336
  27. data/lib/rocketjob.rb +5 -4
  28. data/test/config/mongoid.yml +88 -0
  29. data/test/config_test.rb +1 -1
  30. data/test/dirmon_entry_test.rb +15 -79
  31. data/test/dirmon_job_test.rb +6 -6
  32. data/test/job_test.rb +2 -2
  33. data/test/plugins/job/callbacks_test.rb +40 -32
  34. data/test/plugins/job/defaults_test.rb +10 -8
  35. data/test/plugins/job/model_test.rb +1 -3
  36. data/test/plugins/job/persistence_test.rb +11 -13
  37. data/test/plugins/job/worker_test.rb +45 -26
  38. data/test/plugins/processing_window_test.rb +4 -4
  39. data/test/plugins/restart_test.rb +11 -12
  40. data/test/plugins/state_machine_event_callbacks_test.rb +20 -18
  41. data/test/plugins/state_machine_test.rb +5 -5
  42. data/test/test_helper.rb +4 -1
  43. metadata +15 -29
  44. data/lib/rocket_job/extensions/mongo.rb +0 -23
  45. data/lib/rocket_job/extensions/mongo_mapper.rb +0 -30
  46. data/lib/rocket_job/plugins/job/defaults.rb +0 -40
  47. data/test/config/mongo.yml +0 -46
@@ -10,18 +10,7 @@ module RocketJob
10
10
 
11
11
  included do
12
12
  # Store all job types in this collection
13
- set_collection_name 'rocket_job.jobs'
14
-
15
- # Create indexes
16
- def self.create_indexes
17
- # Used by find_and_modify in .rocket_job_retrieve
18
- ensure_index({state: 1, priority: 1, _id: 1}, background: true)
19
- # Remove outdated indexes if present
20
- drop_index('state_1_run_at_1_priority_1_created_at_1_sub_state_1') rescue nil
21
- drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
22
- drop_index('state_1_priority_1_created_at_1') rescue nil
23
- drop_index('created_at_1') rescue nil
24
- end
13
+ store_in collection: 'rocket_job.jobs'
25
14
 
26
15
  # Retrieves the next job to work on in priority based order
27
16
  # and assigns it to this worker
@@ -35,40 +24,12 @@ module RocketJob
35
24
  # skip_job_ids [Array<BSON::ObjectId>]
36
25
  # Job ids to exclude when looking for the next job
37
26
  def self.rocket_job_retrieve(worker_name, skip_job_ids = nil)
38
- run_at = [
39
- {run_at: {'$exists' => false}},
40
- {run_at: {'$lte' => Time.now}}
41
- ]
42
- update = query = nil
43
- if defined?(RocketJobPro)
44
- query = {
45
- '$and' => [
46
- {
47
- '$or' => [
48
- {'state' => 'queued'}, # Jobs
49
- {'state' => 'running', 'sub_state' => :processing} # Slices
50
- ]
51
- },
52
- {
53
- '$or' => run_at
54
- }
55
- ]
56
- }
57
- update = {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
58
- else
59
- query = {'state' => 'queued', '$or' => run_at}
60
- update = {'$set' => {'worker_name' => worker_name, 'state' => 'running', 'started_at' => Time.now}}
61
- end
27
+ query = queued_now
28
+ update = {'$set' => {'worker_name' => worker_name, 'state' => 'running', 'started_at' => Time.now}}
62
29
 
63
- query['_id'] = {'$nin' => skip_job_ids} if skip_job_ids && skip_job_ids.size > 0
30
+ query = query.where(:id.nin => skip_job_ids) if skip_job_ids && skip_job_ids.size > 0
64
31
 
65
- if doc = find_and_modify(
66
- query: query,
67
- sort: {priority: 1, _id: 1},
68
- update: update
69
- )
70
- load(doc)
71
- end
32
+ query.sort(priority: 1, _id: 1).find_one_and_update(update)
72
33
  end
73
34
 
74
35
  # Returns [Hash<String:Integer>] of the number of jobs in each state
@@ -114,7 +75,7 @@ module RocketJob
114
75
 
115
76
  # Calculate :queued_now and :scheduled if there are queued jobs
116
77
  if queued_count = counts[:queued]
117
- scheduled_count = RocketJob::Job.where(state: :queued, run_at: {'$gt' => Time.now}).count
78
+ scheduled_count = RocketJob::Job.scheduled.count
118
79
  if scheduled_count > 0
119
80
  queued_now_count = queued_count - scheduled_count
120
81
  counts[:queued_now] = queued_count - scheduled_count if queued_now_count > 0
@@ -133,7 +94,7 @@ module RocketJob
133
94
  return super unless destroy_on_complete
134
95
  begin
135
96
  super
136
- rescue MongoMapper::DocumentNotFound
97
+ rescue Mongoid::Errors::DocumentNotFound
137
98
  unless completed?
138
99
  self.state = :completed
139
100
  rocket_job_set_completed_at
@@ -143,23 +104,6 @@ module RocketJob
143
104
  end
144
105
  end
145
106
 
146
- private
147
-
148
- # After this model is loaded, convert any hashes in the arguments list to HashWithIndifferentAccess
149
- def load_from_database(*args)
150
- super
151
- if arguments.present?
152
- self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
153
- end
154
- end
155
-
156
- # Apply RocketJob defaults after initializing default values
157
- # but before setting attributes. after_initialize is too late
158
- def initialize_default_values(except = {})
159
- super
160
- rocket_job_set_defaults
161
- end
162
-
163
107
  end
164
108
  end
165
109
  end
@@ -8,15 +8,15 @@ module RocketJob
8
8
  module Worker
9
9
  extend ActiveSupport::Concern
10
10
 
11
- included do
11
+ module ClassMethods
12
12
  # Run this job later
13
13
  #
14
14
  # Saves it to the database for processing later by workers
15
- def self.perform_later(*args, &block)
15
+ def perform_later(args, &block)
16
16
  if RocketJob::Config.inline_mode
17
- perform_now(*args, &block)
17
+ perform_now(args, &block)
18
18
  else
19
- job = new(arguments: args)
19
+ job = new(args)
20
20
  block.call(job) if block
21
21
  job.save!
22
22
  job
@@ -28,8 +28,8 @@ module RocketJob
28
28
  # The job is not saved to the database since it is processed entriely in memory
29
29
  # As a result before_save and before_destroy callbacks will not be called.
30
30
  # Validations are still called however prior to calling #perform
31
- def self.perform_now(*args, &block)
32
- job = new(arguments: args)
31
+ def perform_now(args, &block)
32
+ job = new(args)
33
33
  block.call(job) if block
34
34
  job.perform_now
35
35
  job
@@ -48,7 +48,7 @@ module RocketJob
48
48
  #
49
49
  # Note:
50
50
  # If a job is in queued state it will be started
51
- def self.rocket_job_next_job(worker_name, skip_job_ids = nil)
51
+ def rocket_job_next_job(worker_name, skip_job_ids = nil)
52
52
  while (job = rocket_job_retrieve(worker_name, skip_job_ids))
53
53
  case
54
54
  when job.running?
@@ -67,16 +67,13 @@ module RocketJob
67
67
  end
68
68
  end
69
69
 
70
- # Requeues all jobs that were running on worker that died
71
- def self.requeue_dead_worker(worker_name)
72
- # TODO Need to requeue paused, failed since user may have transitioned job before it finished
73
- running.each do |job|
74
- job.requeue!(worker_name) if job.may_requeue?(worker_name)
70
+ # Requeues all jobs that were running on a server that died
71
+ def requeue_dead_server(server_name)
72
+ # Need to requeue paused, failed since user may have transitioned job before it finished
73
+ where(:state.in => [:running, :paused, :faled]).each do |job|
74
+ job.requeue!(server_name) if job.may_requeue?(server_name)
75
75
  end
76
76
  end
77
-
78
- # Turn off embedded callbacks. Slow and not used for Jobs
79
- embedded_callbacks_off
80
77
  end
81
78
 
82
79
  # Runs the job now in the current thread.
@@ -91,14 +88,9 @@ module RocketJob
91
88
  #
92
89
  # Exceptions are _not_ suppressed and should be handled by the caller.
93
90
  def perform_now
94
- # Call validations
95
- if respond_to?(:validate!)
96
- validate!
97
- elsif invalid?
98
- raise(MongoMapper::DocumentNotValid, self)
99
- end
100
- worker = RocketJob::Worker.new(name: 'inline')
101
- worker.started
91
+ raise(Mongoid::Errors::Validations, self) unless valid?
92
+
93
+ worker = RocketJob::Worker.new(inline: true)
102
94
  start if may_start?
103
95
  # Re-Raise exceptions
104
96
  rocket_job_work(worker, true) if running?
@@ -114,7 +106,7 @@ module RocketJob
114
106
  # The job is automatically saved only if an exception is raised in the supplied block.
115
107
  #
116
108
  # worker_name: [String]
117
- # Name of the worker on which the exception has occurred
109
+ # Name of the server on which the exception has occurred
118
110
  #
119
111
  # re_raise_exceptions: [true|false]
120
112
  # Re-raise the exception after updating the job
@@ -150,10 +142,10 @@ module RocketJob
150
142
  run_callbacks :perform do
151
143
  # Allow callbacks to fail, complete or abort the job
152
144
  if running?
153
- ret = perform(*arguments)
145
+ ret = perform
154
146
  if collect_output?
155
147
  # Result must be a Hash, if not put it in a Hash
156
- self.result = (ret.is_a?(Hash) || ret.is_a?(BSON::OrderedHash)) ? ret : {result: ret}
148
+ self.result = ret.is_a?(Hash) ? ret : {'result' => ret}
157
149
  end
158
150
  end
159
151
  end
@@ -166,6 +158,12 @@ module RocketJob
166
158
  false
167
159
  end
168
160
 
161
+ # Returns [Hash<String:[Array<ActiveWorker>]>] All servers actively working on this job
162
+ def rocket_job_active_servers
163
+ return {} unless running?
164
+ {worker_name => [ActiveServer.new(worker_name, started_at, self)]}
165
+ end
166
+
169
167
  end
170
168
  end
171
169
  end
@@ -20,13 +20,10 @@ module RocketJob
20
20
  # class BusinessHoursJob < RocketJob::Job
21
21
  # include RocketJob::Plugins::ProcessingWindow
22
22
  #
23
- # # Set the default processing_window
24
- # rocket_job do |job|
25
- # # The start of the processing window
26
- # job.processing_schedule = "30 8 * * * America/New_York"
27
- # # How long the processing window is:
28
- # job.processing_duration = 12.hours
29
- # end
23
+ # # The start of the processing window
24
+ # self.processing_schedule = "30 8 * * * America/New_York"
25
+ # # How long the processing window is:
26
+ # self..processing_duration = 12.hours
30
27
  #
31
28
  # def perform
32
29
  # # Job will only run between 8:30am and 8:30pm Eastern
@@ -41,8 +38,8 @@ module RocketJob
41
38
  extend ActiveSupport::Concern
42
39
 
43
40
  included do
44
- key :processing_schedule, String
45
- key :processing_duration, Integer
41
+ field :processing_schedule, type: String, class_attribute: true
42
+ field :processing_duration, type: Integer, class_attribute: true
46
43
 
47
44
  before_create :rocket_job_processing_window_set_run_at
48
45
  before_retry :rocket_job_processing_window_set_run_at
@@ -26,9 +26,7 @@ module RocketJob
26
26
  # include RocketJob::Plugins::Retry
27
27
  #
28
28
  # # Set the default retry_count
29
- # rocket_job do |job|
30
- # job.max_retries = 3
31
- # end
29
+ # self.max_retries = 3
32
30
  #
33
31
  # def perform
34
32
  # puts "DONE"
@@ -52,13 +50,10 @@ module RocketJob
52
50
 
53
51
  # Maximum number of times to retry this job
54
52
  # 25 is approximately 3 weeks of retries
55
- key :max_retries, Integer, default: 25
53
+ field :max_retries, type: Integer, default: 25, class_attribute: true, user_editable: true
56
54
 
57
55
  # List of times when this job failed
58
- key :failed_times, Array
59
-
60
- # Make max_retries editable in Rocket Job Mission Control
61
- public_rocket_job_properties :max_retries
56
+ field :failed_times, type: Array, default: []
62
57
 
63
58
  validates_presence_of :max_retries
64
59
  end
@@ -17,7 +17,7 @@ module RocketJob
17
17
 
18
18
  # Returns [true|false] whether another instance of this job is already active
19
19
  def rocket_job_singleton_active?
20
- self.class.where(state: [:running, :queued], _id: {'$ne' => id}).exists?
20
+ self.class.where(:state.in => [:running, :queued], :id.ne => id).exists?
21
21
  end
22
22
  end
23
23
 
@@ -86,13 +86,7 @@ module RocketJob
86
86
  write_attribute(attr_name, state)
87
87
 
88
88
  begin
89
- if aasm_skipping_validations(name)
90
- saved = save(validate: false)
91
- write_attribute(attr_name, old_value) unless saved
92
- saved
93
- else
94
- save!
95
- end
89
+ save!
96
90
  rescue Exception => exc
97
91
  write_attribute(attr_name, old_value)
98
92
  raise(exc)
@@ -0,0 +1,352 @@
1
+ # encoding: UTF-8
2
+ require 'concurrent'
3
+ module RocketJob
4
+ # Server
5
+ #
6
+ # On startup a server instance will automatically register itself
7
+ # if not already present
8
+ #
9
+ # Starting a server in the foreground:
10
+ # - Using a Rails runner:
11
+ # bin/rocketjob
12
+ #
13
+ # Starting a server in the background:
14
+ # - Using a Rails runner:
15
+ # nohup bin/rocketjob --quiet 2>&1 1>output.log &
16
+ #
17
+ # Stopping a server:
18
+ # - Stop the server via the Web UI
19
+ # - Send a regular kill signal to make it shutdown once all active work is complete
20
+ # kill <pid>
21
+ # - Or, use the following Ruby code:
22
+ # server = RocketJob::Server.where(name: 'server name').first
23
+ # server.stop!
24
+ #
25
+ # Sending the kill signal locally will result in starting the shutdown process
26
+ # immediately. Via the UI or Ruby code the server can take up to 15 seconds
27
+ # (the heartbeat interval) to start shutting down.
28
+ class Server
29
+ include Plugins::Document
30
+ include Plugins::StateMachine
31
+ include SemanticLogger::Loggable
32
+
33
+ # Unique Name of this server instance
34
+ # Default: `host name:PID`
35
+ # The unique name is used on re-start to re-queue any jobs that were being processed
36
+ # at the time the server unexpectedly terminated, if any
37
+ field :name, type: String, default: -> { "#{SemanticLogger.host}:#{$$}" }
38
+
39
+ # The maximum number of workers this server should start
40
+ # If set, it will override the default value in RocketJob::Config
41
+ field :max_workers, type: Integer, default: -> { Config.instance.max_worker_threads }
42
+
43
+ # When this server process was started
44
+ field :started_at, type: Time
45
+
46
+ # The heartbeat information for this server
47
+ embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
48
+
49
+ # Current state
50
+ # Internal use only. Do not set this field directly
51
+ field :state, type: Symbol, default: :starting
52
+
53
+ index({name: 1}, background: true, unique: true, drop_dups: true)
54
+
55
+ validates_presence_of :state, :name, :max_workers
56
+
57
+ # States
58
+ # :starting -> :running -> :paused
59
+ # -> :stopping
60
+ aasm column: :state do
61
+ state :starting, initial: true
62
+ state :running
63
+ state :paused
64
+ state :stopping
65
+
66
+ event :started do
67
+ transitions from: :starting, to: :running
68
+ before do
69
+ self.started_at = Time.now
70
+ end
71
+ end
72
+
73
+ event :pause do
74
+ transitions from: :running, to: :paused
75
+ end
76
+
77
+ event :resume do
78
+ transitions from: :paused, to: :running
79
+ end
80
+
81
+ event :stop do
82
+ transitions from: :running, to: :stopping
83
+ transitions from: :paused, to: :stopping
84
+ transitions from: :starting, to: :stopping
85
+ end
86
+ end
87
+
88
+ # Requeue any jobs being worked by this server when it is destroyed
89
+ before_destroy :requeue_jobs
90
+
91
+ # Destroy's all instances of zombie server and requeue any jobs still "running"
92
+ # on those servers
93
+ def self.destroy_zombies
94
+ count = 0
95
+ each do |server|
96
+ next unless server.zombie?
97
+ logger.warn "Destroying zombie server #{server.name}, and requeueing its jobs"
98
+ server.destroy
99
+ count += 1
100
+ end
101
+ count
102
+ end
103
+
104
+ # Stop all running, paused, or starting servers
105
+ def self.stop_all
106
+ where(:state.in => [:running, :paused, :starting]).each(&:stop!)
107
+ end
108
+
109
+ # Pause all running servers
110
+ def self.pause_all
111
+ running.each(&:pause!)
112
+ end
113
+
114
+ # Resume all paused servers
115
+ def self.resume_all
116
+ paused.each(&:resume!)
117
+ end
118
+
119
+ # Returns [Hash<String:Integer>] of the number of servers in each state.
120
+ # Note: If there are no servers in that particular state then the hash will not have a value for it.
121
+ #
122
+ # Example servers in every state:
123
+ # RocketJob::Server.counts_by_state
124
+ # # => {
125
+ # :aborted => 1,
126
+ # :completed => 37,
127
+ # :failed => 1,
128
+ # :paused => 3,
129
+ # :queued => 4,
130
+ # :running => 1,
131
+ # :queued_now => 1,
132
+ # :scheduled => 3
133
+ # }
134
+ #
135
+ # Example no servers active:
136
+ # RocketJob::Server.counts_by_state
137
+ # # => {}
138
+ def self.counts_by_state
139
+ counts = {}
140
+ collection.aggregate([
141
+ {
142
+ '$group' => {
143
+ _id: '$state',
144
+ count: {'$sum' => 1}
145
+ }
146
+ }
147
+ ]
148
+ ).each do |result|
149
+ counts[result['_id'].to_sym] = result['count']
150
+ end
151
+ counts
152
+ end
153
+
154
+ # On MRI the 'concurrent-ruby-ext' gem may not be loaded
155
+ if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
156
+ # Returns [true|false] whether the shutdown indicator has been set for this server process
157
+ def self.shutdown?
158
+ @@shutdown.value
159
+ end
160
+
161
+ # Set shutdown indicator for this server process
162
+ def self.shutdown!
163
+ @@shutdown.make_true
164
+ end
165
+
166
+ @@shutdown = Concurrent::AtomicBoolean.new(false)
167
+ else
168
+ # Returns [true|false] whether the shutdown indicator has been set for this server process
169
+ def self.shutdown?
170
+ @@shutdown
171
+ end
172
+
173
+ # Set shutdown indicator for this server process
174
+ def self.shutdown!
175
+ @@shutdown = true
176
+ end
177
+
178
+ @@shutdown = false
179
+ end
180
+
181
+ # Run the server process
182
+ # Attributes supplied are passed to #new
183
+ def self.run(attrs = {})
184
+ Thread.current.name = 'rocketjob main'
185
+ # Create Indexes on server startup
186
+ Mongoid::Tasks::Database.create_indexes
187
+ register_signal_handlers
188
+
189
+ server = create!(attrs)
190
+ server.send(:run)
191
+
192
+ ensure
193
+ server.destroy if server
194
+ end
195
+
196
+ # Returns [Boolean] whether the server is shutting down
197
+ def shutdown?
198
+ self.class.shutdown? || !running?
199
+ end
200
+
201
+ # Returns [true|false] if this server has missed at least the last 4 heartbeats
202
+ #
203
+ # Possible causes for a server to miss its heartbeats:
204
+ # - The server process has died
205
+ # - The server process is "hanging"
206
+ # - The server is no longer able to communicate with the MongoDB Server
207
+ def zombie?(missed = 4)
208
+ return false unless running? || stopping?
209
+ return true if heartbeat.nil? || heartbeat.updated_at.nil?
210
+ dead_seconds = Config.instance.heartbeat_seconds * missed
211
+ (Time.now - heartbeat.updated_at) >= dead_seconds
212
+ end
213
+
214
+ private
215
+
216
+ attr_reader :workers
217
+
218
+ # Returns [Array<Worker>] collection of workers
219
+ def workers
220
+ @workers ||= []
221
+ end
222
+
223
+ # Management Thread
224
+ def run
225
+ logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
226
+ build_heartbeat(updated_at: Time.now, current_threads: 0)
227
+ started!
228
+ adjust_workers(true)
229
+ logger.info "RocketJob Server started with #{workers.size} workers running"
230
+
231
+ while running? || paused?
232
+ sleep Config.instance.heartbeat_seconds
233
+
234
+ find_and_update(
235
+ 'heartbeat.updated_at' => Time.now,
236
+ 'heartbeat.current_threads' => worker_count
237
+ )
238
+
239
+ # In case number of threads has been modified
240
+ adjust_workers
241
+
242
+ # Stop server if shutdown indicator was set
243
+ stop! if self.class.shutdown? && may_stop?
244
+ end
245
+
246
+ logger.info 'Waiting for workers to stop'
247
+ # Tell each worker to shutdown cleanly
248
+ workers.each(&:shutdown!)
249
+
250
+ while worker = workers.first
251
+ if worker.join(5)
252
+ # Worker thread is dead
253
+ workers.shift
254
+ else
255
+ # Timeout waiting for worker to stop
256
+ begin
257
+ find_and_update(
258
+ 'heartbeat.updated_at' => Time.now,
259
+ 'heartbeat.current_threads' => worker_count
260
+ )
261
+ rescue Mongoid::Errors::DocumentNotFound
262
+ logger.warn('Server has been destroyed. Going down hard!')
263
+ break
264
+ end
265
+ end
266
+ end
267
+
268
+ # Logs the backtrace for each running worker
269
+ if SemanticLogger::VERSION.to_i >= 4
270
+ workers.each { |thread| logger.backtrace(thread: thread) }
271
+ end
272
+ logger.info 'Shutdown'
273
+ rescue Exception => exc
274
+ logger.error('RocketJob::Server is stopping due to an exception', exc)
275
+ end
276
+
277
+ # Returns [Fixnum] number of workers (threads) that are alive
278
+ def worker_count
279
+ workers.count(&:alive?)
280
+ end
281
+
282
+ def next_worker_id
283
+ @worker_id ||= 0
284
+ @worker_id += 1
285
+ end
286
+
287
+ # Re-adjust the number of running workers to get it up to the
288
+ # required number of workers
289
+ # Parameters
290
+ # stagger_workers
291
+ # Whether to stagger when the workers poll for work the first time
292
+ # It spreads out the queue polling over the max_poll_seconds so
293
+ # that not all workers poll at the same time
294
+ # The worker also respond faster than max_poll_seconds when a new
295
+ # job is added.
296
+ def adjust_workers(stagger_workers=false)
297
+ count = worker_count
298
+ # Cleanup workers that have stopped
299
+ if count != workers.count
300
+ logger.info "Cleaning up #{workers.count - count} workers that went away"
301
+ workers.delete_if { |t| !t.alive? }
302
+ end
303
+
304
+ # Need to add more workers?
305
+ if count < max_workers
306
+ worker_count = max_workers - count
307
+ logger.info "Starting #{worker_count} workers"
308
+ worker_count.times.each do
309
+ sleep (Config.instance.max_poll_seconds.to_f / max_workers) * (next_worker_id - 1) if stagger_workers
310
+ return if shutdown?
311
+ # Start worker
312
+ begin
313
+ workers << Worker.new(id: next_worker_id, server_name: name)
314
+ rescue Exception => exc
315
+ logger.fatal('Cannot start worker', exc)
316
+ end
317
+ end
318
+ end
319
+ end
320
+
321
+ # Register handlers for the various signals
322
+ # Term:
323
+ # Perform clean shutdown
324
+ #
325
+ def self.register_signal_handlers
326
+ begin
327
+ Signal.trap 'SIGTERM' do
328
+ shutdown!
329
+ message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
330
+ # Logging uses a mutex to access Queue on MRI/CRuby
331
+ defined?(JRuby) ? logger.warn(message) : puts(message)
332
+ end
333
+
334
+ Signal.trap 'INT' do
335
+ shutdown!
336
+ message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
337
+ # Logging uses a mutex to access Queue on MRI/CRuby
338
+ defined?(JRuby) ? logger.warn(message) : puts(message)
339
+ end
340
+ rescue StandardError
341
+ logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
342
+ end
343
+ end
344
+
345
+ # Requeue any jobs assigned to this server when it is destroyed
346
+ def requeue_jobs
347
+ RocketJob::Job.requeue_dead_server(name)
348
+ end
349
+
350
+ end
351
+ end
352
+