rocketjob 2.1.3 → 3.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -0
  3. data/lib/rocket_job/active_server.rb +48 -0
  4. data/lib/rocket_job/cli.rb +29 -17
  5. data/lib/rocket_job/config.rb +19 -31
  6. data/lib/rocket_job/dirmon_entry.rb +15 -45
  7. data/lib/rocket_job/extensions/mongo/logging.rb +26 -0
  8. data/lib/rocket_job/extensions/rocket_job_adapter.rb +3 -5
  9. data/lib/rocket_job/heartbeat.rb +18 -23
  10. data/lib/rocket_job/job.rb +0 -1
  11. data/lib/rocket_job/job_exception.rb +11 -13
  12. data/lib/rocket_job/jobs/dirmon_job.rb +8 -8
  13. data/lib/rocket_job/jobs/housekeeping_job.rb +13 -15
  14. data/lib/rocket_job/performance.rb +5 -5
  15. data/lib/rocket_job/plugins/cron.rb +3 -10
  16. data/lib/rocket_job/plugins/document.rb +58 -33
  17. data/lib/rocket_job/plugins/job/model.rb +43 -71
  18. data/lib/rocket_job/plugins/job/persistence.rb +7 -63
  19. data/lib/rocket_job/plugins/job/worker.rb +24 -26
  20. data/lib/rocket_job/plugins/processing_window.rb +6 -9
  21. data/lib/rocket_job/plugins/retry.rb +3 -8
  22. data/lib/rocket_job/plugins/singleton.rb +1 -1
  23. data/lib/rocket_job/plugins/state_machine.rb +1 -7
  24. data/lib/rocket_job/server.rb +352 -0
  25. data/lib/rocket_job/version.rb +1 -1
  26. data/lib/rocket_job/worker.rb +46 -336
  27. data/lib/rocketjob.rb +5 -4
  28. data/test/config/mongoid.yml +88 -0
  29. data/test/config_test.rb +1 -1
  30. data/test/dirmon_entry_test.rb +15 -79
  31. data/test/dirmon_job_test.rb +6 -6
  32. data/test/job_test.rb +2 -2
  33. data/test/plugins/job/callbacks_test.rb +40 -32
  34. data/test/plugins/job/defaults_test.rb +10 -8
  35. data/test/plugins/job/model_test.rb +1 -3
  36. data/test/plugins/job/persistence_test.rb +11 -13
  37. data/test/plugins/job/worker_test.rb +45 -26
  38. data/test/plugins/processing_window_test.rb +4 -4
  39. data/test/plugins/restart_test.rb +11 -12
  40. data/test/plugins/state_machine_event_callbacks_test.rb +20 -18
  41. data/test/plugins/state_machine_test.rb +5 -5
  42. data/test/test_helper.rb +4 -1
  43. metadata +15 -29
  44. data/lib/rocket_job/extensions/mongo.rb +0 -23
  45. data/lib/rocket_job/extensions/mongo_mapper.rb +0 -30
  46. data/lib/rocket_job/plugins/job/defaults.rb +0 -40
  47. data/test/config/mongo.yml +0 -46
@@ -10,18 +10,7 @@ module RocketJob
10
10
 
11
11
  included do
12
12
  # Store all job types in this collection
13
- set_collection_name 'rocket_job.jobs'
14
-
15
- # Create indexes
16
- def self.create_indexes
17
- # Used by find_and_modify in .rocket_job_retrieve
18
- ensure_index({state: 1, priority: 1, _id: 1}, background: true)
19
- # Remove outdated indexes if present
20
- drop_index('state_1_run_at_1_priority_1_created_at_1_sub_state_1') rescue nil
21
- drop_index('state_1_priority_1_created_at_1_sub_state_1') rescue nil
22
- drop_index('state_1_priority_1_created_at_1') rescue nil
23
- drop_index('created_at_1') rescue nil
24
- end
13
+ store_in collection: 'rocket_job.jobs'
25
14
 
26
15
  # Retrieves the next job to work on in priority based order
27
16
  # and assigns it to this worker
@@ -35,40 +24,12 @@ module RocketJob
35
24
  # skip_job_ids [Array<BSON::ObjectId>]
36
25
  # Job ids to exclude when looking for the next job
37
26
  def self.rocket_job_retrieve(worker_name, skip_job_ids = nil)
38
- run_at = [
39
- {run_at: {'$exists' => false}},
40
- {run_at: {'$lte' => Time.now}}
41
- ]
42
- update = query = nil
43
- if defined?(RocketJobPro)
44
- query = {
45
- '$and' => [
46
- {
47
- '$or' => [
48
- {'state' => 'queued'}, # Jobs
49
- {'state' => 'running', 'sub_state' => :processing} # Slices
50
- ]
51
- },
52
- {
53
- '$or' => run_at
54
- }
55
- ]
56
- }
57
- update = {'$set' => {'worker_name' => worker_name, 'state' => 'running'}}
58
- else
59
- query = {'state' => 'queued', '$or' => run_at}
60
- update = {'$set' => {'worker_name' => worker_name, 'state' => 'running', 'started_at' => Time.now}}
61
- end
27
+ query = queued_now
28
+ update = {'$set' => {'worker_name' => worker_name, 'state' => 'running', 'started_at' => Time.now}}
62
29
 
63
- query['_id'] = {'$nin' => skip_job_ids} if skip_job_ids && skip_job_ids.size > 0
30
+ query = query.where(:id.nin => skip_job_ids) if skip_job_ids && skip_job_ids.size > 0
64
31
 
65
- if doc = find_and_modify(
66
- query: query,
67
- sort: {priority: 1, _id: 1},
68
- update: update
69
- )
70
- load(doc)
71
- end
32
+ query.sort(priority: 1, _id: 1).find_one_and_update(update)
72
33
  end
73
34
 
74
35
  # Returns [Hash<String:Integer>] of the number of jobs in each state
@@ -114,7 +75,7 @@ module RocketJob
114
75
 
115
76
  # Calculate :queued_now and :scheduled if there are queued jobs
116
77
  if queued_count = counts[:queued]
117
- scheduled_count = RocketJob::Job.where(state: :queued, run_at: {'$gt' => Time.now}).count
78
+ scheduled_count = RocketJob::Job.scheduled.count
118
79
  if scheduled_count > 0
119
80
  queued_now_count = queued_count - scheduled_count
120
81
  counts[:queued_now] = queued_count - scheduled_count if queued_now_count > 0
@@ -133,7 +94,7 @@ module RocketJob
133
94
  return super unless destroy_on_complete
134
95
  begin
135
96
  super
136
- rescue MongoMapper::DocumentNotFound
97
+ rescue Mongoid::Errors::DocumentNotFound
137
98
  unless completed?
138
99
  self.state = :completed
139
100
  rocket_job_set_completed_at
@@ -143,23 +104,6 @@ module RocketJob
143
104
  end
144
105
  end
145
106
 
146
- private
147
-
148
- # After this model is loaded, convert any hashes in the arguments list to HashWithIndifferentAccess
149
- def load_from_database(*args)
150
- super
151
- if arguments.present?
152
- self.arguments = arguments.collect { |i| i.is_a?(BSON::OrderedHash) ? i.with_indifferent_access : i }
153
- end
154
- end
155
-
156
- # Apply RocketJob defaults after initializing default values
157
- # but before setting attributes. after_initialize is too late
158
- def initialize_default_values(except = {})
159
- super
160
- rocket_job_set_defaults
161
- end
162
-
163
107
  end
164
108
  end
165
109
  end
@@ -8,15 +8,15 @@ module RocketJob
8
8
  module Worker
9
9
  extend ActiveSupport::Concern
10
10
 
11
- included do
11
+ module ClassMethods
12
12
  # Run this job later
13
13
  #
14
14
  # Saves it to the database for processing later by workers
15
- def self.perform_later(*args, &block)
15
+ def perform_later(args, &block)
16
16
  if RocketJob::Config.inline_mode
17
- perform_now(*args, &block)
17
+ perform_now(args, &block)
18
18
  else
19
- job = new(arguments: args)
19
+ job = new(args)
20
20
  block.call(job) if block
21
21
  job.save!
22
22
  job
@@ -28,8 +28,8 @@ module RocketJob
28
28
  # The job is not saved to the database since it is processed entriely in memory
29
29
  # As a result before_save and before_destroy callbacks will not be called.
30
30
  # Validations are still called however prior to calling #perform
31
- def self.perform_now(*args, &block)
32
- job = new(arguments: args)
31
+ def perform_now(args, &block)
32
+ job = new(args)
33
33
  block.call(job) if block
34
34
  job.perform_now
35
35
  job
@@ -48,7 +48,7 @@ module RocketJob
48
48
  #
49
49
  # Note:
50
50
  # If a job is in queued state it will be started
51
- def self.rocket_job_next_job(worker_name, skip_job_ids = nil)
51
+ def rocket_job_next_job(worker_name, skip_job_ids = nil)
52
52
  while (job = rocket_job_retrieve(worker_name, skip_job_ids))
53
53
  case
54
54
  when job.running?
@@ -67,16 +67,13 @@ module RocketJob
67
67
  end
68
68
  end
69
69
 
70
- # Requeues all jobs that were running on worker that died
71
- def self.requeue_dead_worker(worker_name)
72
- # TODO Need to requeue paused, failed since user may have transitioned job before it finished
73
- running.each do |job|
74
- job.requeue!(worker_name) if job.may_requeue?(worker_name)
70
+ # Requeues all jobs that were running on a server that died
71
+ def requeue_dead_server(server_name)
72
+ # Need to requeue paused, failed since user may have transitioned job before it finished
73
+ where(:state.in => [:running, :paused, :faled]).each do |job|
74
+ job.requeue!(server_name) if job.may_requeue?(server_name)
75
75
  end
76
76
  end
77
-
78
- # Turn off embedded callbacks. Slow and not used for Jobs
79
- embedded_callbacks_off
80
77
  end
81
78
 
82
79
  # Runs the job now in the current thread.
@@ -91,14 +88,9 @@ module RocketJob
91
88
  #
92
89
  # Exceptions are _not_ suppressed and should be handled by the caller.
93
90
  def perform_now
94
- # Call validations
95
- if respond_to?(:validate!)
96
- validate!
97
- elsif invalid?
98
- raise(MongoMapper::DocumentNotValid, self)
99
- end
100
- worker = RocketJob::Worker.new(name: 'inline')
101
- worker.started
91
+ raise(Mongoid::Errors::Validations, self) unless valid?
92
+
93
+ worker = RocketJob::Worker.new(inline: true)
102
94
  start if may_start?
103
95
  # Re-Raise exceptions
104
96
  rocket_job_work(worker, true) if running?
@@ -114,7 +106,7 @@ module RocketJob
114
106
  # The job is automatically saved only if an exception is raised in the supplied block.
115
107
  #
116
108
  # worker_name: [String]
117
- # Name of the worker on which the exception has occurred
109
+ # Name of the server on which the exception has occurred
118
110
  #
119
111
  # re_raise_exceptions: [true|false]
120
112
  # Re-raise the exception after updating the job
@@ -150,10 +142,10 @@ module RocketJob
150
142
  run_callbacks :perform do
151
143
  # Allow callbacks to fail, complete or abort the job
152
144
  if running?
153
- ret = perform(*arguments)
145
+ ret = perform
154
146
  if collect_output?
155
147
  # Result must be a Hash, if not put it in a Hash
156
- self.result = (ret.is_a?(Hash) || ret.is_a?(BSON::OrderedHash)) ? ret : {result: ret}
148
+ self.result = ret.is_a?(Hash) ? ret : {'result' => ret}
157
149
  end
158
150
  end
159
151
  end
@@ -166,6 +158,12 @@ module RocketJob
166
158
  false
167
159
  end
168
160
 
161
+ # Returns [Hash<String:[Array<ActiveWorker>]>] All servers actively working on this job
162
+ def rocket_job_active_servers
163
+ return {} unless running?
164
+ {worker_name => [ActiveServer.new(worker_name, started_at, self)]}
165
+ end
166
+
169
167
  end
170
168
  end
171
169
  end
@@ -20,13 +20,10 @@ module RocketJob
20
20
  # class BusinessHoursJob < RocketJob::Job
21
21
  # include RocketJob::Plugins::ProcessingWindow
22
22
  #
23
- # # Set the default processing_window
24
- # rocket_job do |job|
25
- # # The start of the processing window
26
- # job.processing_schedule = "30 8 * * * America/New_York"
27
- # # How long the processing window is:
28
- # job.processing_duration = 12.hours
29
- # end
23
+ # # The start of the processing window
24
+ # self.processing_schedule = "30 8 * * * America/New_York"
25
+ # # How long the processing window is:
26
+ # self..processing_duration = 12.hours
30
27
  #
31
28
  # def perform
32
29
  # # Job will only run between 8:30am and 8:30pm Eastern
@@ -41,8 +38,8 @@ module RocketJob
41
38
  extend ActiveSupport::Concern
42
39
 
43
40
  included do
44
- key :processing_schedule, String
45
- key :processing_duration, Integer
41
+ field :processing_schedule, type: String, class_attribute: true
42
+ field :processing_duration, type: Integer, class_attribute: true
46
43
 
47
44
  before_create :rocket_job_processing_window_set_run_at
48
45
  before_retry :rocket_job_processing_window_set_run_at
@@ -26,9 +26,7 @@ module RocketJob
26
26
  # include RocketJob::Plugins::Retry
27
27
  #
28
28
  # # Set the default retry_count
29
- # rocket_job do |job|
30
- # job.max_retries = 3
31
- # end
29
+ # self.max_retries = 3
32
30
  #
33
31
  # def perform
34
32
  # puts "DONE"
@@ -52,13 +50,10 @@ module RocketJob
52
50
 
53
51
  # Maximum number of times to retry this job
54
52
  # 25 is approximately 3 weeks of retries
55
- key :max_retries, Integer, default: 25
53
+ field :max_retries, type: Integer, default: 25, class_attribute: true, user_editable: true
56
54
 
57
55
  # List of times when this job failed
58
- key :failed_times, Array
59
-
60
- # Make max_retries editable in Rocket Job Mission Control
61
- public_rocket_job_properties :max_retries
56
+ field :failed_times, type: Array, default: []
62
57
 
63
58
  validates_presence_of :max_retries
64
59
  end
@@ -17,7 +17,7 @@ module RocketJob
17
17
 
18
18
  # Returns [true|false] whether another instance of this job is already active
19
19
  def rocket_job_singleton_active?
20
- self.class.where(state: [:running, :queued], _id: {'$ne' => id}).exists?
20
+ self.class.where(:state.in => [:running, :queued], :id.ne => id).exists?
21
21
  end
22
22
  end
23
23
 
@@ -86,13 +86,7 @@ module RocketJob
86
86
  write_attribute(attr_name, state)
87
87
 
88
88
  begin
89
- if aasm_skipping_validations(name)
90
- saved = save(validate: false)
91
- write_attribute(attr_name, old_value) unless saved
92
- saved
93
- else
94
- save!
95
- end
89
+ save!
96
90
  rescue Exception => exc
97
91
  write_attribute(attr_name, old_value)
98
92
  raise(exc)
@@ -0,0 +1,352 @@
1
+ # encoding: UTF-8
2
+ require 'concurrent'
3
+ module RocketJob
4
+ # Server
5
+ #
6
+ # On startup a server instance will automatically register itself
7
+ # if not already present
8
+ #
9
+ # Starting a server in the foreground:
10
+ # - Using a Rails runner:
11
+ # bin/rocketjob
12
+ #
13
+ # Starting a server in the background:
14
+ # - Using a Rails runner:
15
+ # nohup bin/rocketjob --quiet 2>&1 1>output.log &
16
+ #
17
+ # Stopping a server:
18
+ # - Stop the server via the Web UI
19
+ # - Send a regular kill signal to make it shutdown once all active work is complete
20
+ # kill <pid>
21
+ # - Or, use the following Ruby code:
22
+ # server = RocketJob::Server.where(name: 'server name').first
23
+ # server.stop!
24
+ #
25
+ # Sending the kill signal locally will result in starting the shutdown process
26
+ # immediately. Via the UI or Ruby code the server can take up to 15 seconds
27
+ # (the heartbeat interval) to start shutting down.
28
+ class Server
29
+ include Plugins::Document
30
+ include Plugins::StateMachine
31
+ include SemanticLogger::Loggable
32
+
33
+ # Unique Name of this server instance
34
+ # Default: `host name:PID`
35
+ # The unique name is used on re-start to re-queue any jobs that were being processed
36
+ # at the time the server unexpectedly terminated, if any
37
+ field :name, type: String, default: -> { "#{SemanticLogger.host}:#{$$}" }
38
+
39
+ # The maximum number of workers this server should start
40
+ # If set, it will override the default value in RocketJob::Config
41
+ field :max_workers, type: Integer, default: -> { Config.instance.max_worker_threads }
42
+
43
+ # When this server process was started
44
+ field :started_at, type: Time
45
+
46
+ # The heartbeat information for this server
47
+ embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
48
+
49
+ # Current state
50
+ # Internal use only. Do not set this field directly
51
+ field :state, type: Symbol, default: :starting
52
+
53
+ index({name: 1}, background: true, unique: true, drop_dups: true)
54
+
55
+ validates_presence_of :state, :name, :max_workers
56
+
57
+ # States
58
+ # :starting -> :running -> :paused
59
+ # -> :stopping
60
+ aasm column: :state do
61
+ state :starting, initial: true
62
+ state :running
63
+ state :paused
64
+ state :stopping
65
+
66
+ event :started do
67
+ transitions from: :starting, to: :running
68
+ before do
69
+ self.started_at = Time.now
70
+ end
71
+ end
72
+
73
+ event :pause do
74
+ transitions from: :running, to: :paused
75
+ end
76
+
77
+ event :resume do
78
+ transitions from: :paused, to: :running
79
+ end
80
+
81
+ event :stop do
82
+ transitions from: :running, to: :stopping
83
+ transitions from: :paused, to: :stopping
84
+ transitions from: :starting, to: :stopping
85
+ end
86
+ end
87
+
88
+ # Requeue any jobs being worked by this server when it is destroyed
89
+ before_destroy :requeue_jobs
90
+
91
+ # Destroy's all instances of zombie server and requeue any jobs still "running"
92
+ # on those servers
93
+ def self.destroy_zombies
94
+ count = 0
95
+ each do |server|
96
+ next unless server.zombie?
97
+ logger.warn "Destroying zombie server #{server.name}, and requeueing its jobs"
98
+ server.destroy
99
+ count += 1
100
+ end
101
+ count
102
+ end
103
+
104
+ # Stop all running, paused, or starting servers
105
+ def self.stop_all
106
+ where(:state.in => [:running, :paused, :starting]).each(&:stop!)
107
+ end
108
+
109
+ # Pause all running servers
110
+ def self.pause_all
111
+ running.each(&:pause!)
112
+ end
113
+
114
+ # Resume all paused servers
115
+ def self.resume_all
116
+ paused.each(&:resume!)
117
+ end
118
+
119
+ # Returns [Hash<String:Integer>] of the number of servers in each state.
120
+ # Note: If there are no servers in that particular state then the hash will not have a value for it.
121
+ #
122
+ # Example servers in every state:
123
+ # RocketJob::Server.counts_by_state
124
+ # # => {
125
+ # :aborted => 1,
126
+ # :completed => 37,
127
+ # :failed => 1,
128
+ # :paused => 3,
129
+ # :queued => 4,
130
+ # :running => 1,
131
+ # :queued_now => 1,
132
+ # :scheduled => 3
133
+ # }
134
+ #
135
+ # Example no servers active:
136
+ # RocketJob::Server.counts_by_state
137
+ # # => {}
138
+ def self.counts_by_state
139
+ counts = {}
140
+ collection.aggregate([
141
+ {
142
+ '$group' => {
143
+ _id: '$state',
144
+ count: {'$sum' => 1}
145
+ }
146
+ }
147
+ ]
148
+ ).each do |result|
149
+ counts[result['_id'].to_sym] = result['count']
150
+ end
151
+ counts
152
+ end
153
+
154
+ # On MRI the 'concurrent-ruby-ext' gem may not be loaded
155
+ if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
156
+ # Returns [true|false] whether the shutdown indicator has been set for this server process
157
+ def self.shutdown?
158
+ @@shutdown.value
159
+ end
160
+
161
+ # Set shutdown indicator for this server process
162
+ def self.shutdown!
163
+ @@shutdown.make_true
164
+ end
165
+
166
+ @@shutdown = Concurrent::AtomicBoolean.new(false)
167
+ else
168
+ # Returns [true|false] whether the shutdown indicator has been set for this server process
169
+ def self.shutdown?
170
+ @@shutdown
171
+ end
172
+
173
+ # Set shutdown indicator for this server process
174
+ def self.shutdown!
175
+ @@shutdown = true
176
+ end
177
+
178
+ @@shutdown = false
179
+ end
180
+
181
+ # Run the server process
182
+ # Attributes supplied are passed to #new
183
+ def self.run(attrs = {})
184
+ Thread.current.name = 'rocketjob main'
185
+ # Create Indexes on server startup
186
+ Mongoid::Tasks::Database.create_indexes
187
+ register_signal_handlers
188
+
189
+ server = create!(attrs)
190
+ server.send(:run)
191
+
192
+ ensure
193
+ server.destroy if server
194
+ end
195
+
196
+ # Returns [Boolean] whether the server is shutting down
197
+ def shutdown?
198
+ self.class.shutdown? || !running?
199
+ end
200
+
201
+ # Returns [true|false] if this server has missed at least the last 4 heartbeats
202
+ #
203
+ # Possible causes for a server to miss its heartbeats:
204
+ # - The server process has died
205
+ # - The server process is "hanging"
206
+ # - The server is no longer able to communicate with the MongoDB Server
207
+ def zombie?(missed = 4)
208
+ return false unless running? || stopping?
209
+ return true if heartbeat.nil? || heartbeat.updated_at.nil?
210
+ dead_seconds = Config.instance.heartbeat_seconds * missed
211
+ (Time.now - heartbeat.updated_at) >= dead_seconds
212
+ end
213
+
214
+ private
215
+
216
+ attr_reader :workers
217
+
218
+ # Returns [Array<Worker>] collection of workers
219
+ def workers
220
+ @workers ||= []
221
+ end
222
+
223
+ # Management Thread
224
+ def run
225
+ logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
226
+ build_heartbeat(updated_at: Time.now, current_threads: 0)
227
+ started!
228
+ adjust_workers(true)
229
+ logger.info "RocketJob Server started with #{workers.size} workers running"
230
+
231
+ while running? || paused?
232
+ sleep Config.instance.heartbeat_seconds
233
+
234
+ find_and_update(
235
+ 'heartbeat.updated_at' => Time.now,
236
+ 'heartbeat.current_threads' => worker_count
237
+ )
238
+
239
+ # In case number of threads has been modified
240
+ adjust_workers
241
+
242
+ # Stop server if shutdown indicator was set
243
+ stop! if self.class.shutdown? && may_stop?
244
+ end
245
+
246
+ logger.info 'Waiting for workers to stop'
247
+ # Tell each worker to shutdown cleanly
248
+ workers.each(&:shutdown!)
249
+
250
+ while worker = workers.first
251
+ if worker.join(5)
252
+ # Worker thread is dead
253
+ workers.shift
254
+ else
255
+ # Timeout waiting for worker to stop
256
+ begin
257
+ find_and_update(
258
+ 'heartbeat.updated_at' => Time.now,
259
+ 'heartbeat.current_threads' => worker_count
260
+ )
261
+ rescue Mongoid::Errors::DocumentNotFound
262
+ logger.warn('Server has been destroyed. Going down hard!')
263
+ break
264
+ end
265
+ end
266
+ end
267
+
268
+ # Logs the backtrace for each running worker
269
+ if SemanticLogger::VERSION.to_i >= 4
270
+ workers.each { |thread| logger.backtrace(thread: thread) }
271
+ end
272
+ logger.info 'Shutdown'
273
+ rescue Exception => exc
274
+ logger.error('RocketJob::Server is stopping due to an exception', exc)
275
+ end
276
+
277
+ # Returns [Fixnum] number of workers (threads) that are alive
278
+ def worker_count
279
+ workers.count(&:alive?)
280
+ end
281
+
282
+ def next_worker_id
283
+ @worker_id ||= 0
284
+ @worker_id += 1
285
+ end
286
+
287
+ # Re-adjust the number of running workers to get it up to the
288
+ # required number of workers
289
+ # Parameters
290
+ # stagger_workers
291
+ # Whether to stagger when the workers poll for work the first time
292
+ # It spreads out the queue polling over the max_poll_seconds so
293
+ # that not all workers poll at the same time
294
+ # The worker also respond faster than max_poll_seconds when a new
295
+ # job is added.
296
+ def adjust_workers(stagger_workers=false)
297
+ count = worker_count
298
+ # Cleanup workers that have stopped
299
+ if count != workers.count
300
+ logger.info "Cleaning up #{workers.count - count} workers that went away"
301
+ workers.delete_if { |t| !t.alive? }
302
+ end
303
+
304
+ # Need to add more workers?
305
+ if count < max_workers
306
+ worker_count = max_workers - count
307
+ logger.info "Starting #{worker_count} workers"
308
+ worker_count.times.each do
309
+ sleep (Config.instance.max_poll_seconds.to_f / max_workers) * (next_worker_id - 1) if stagger_workers
310
+ return if shutdown?
311
+ # Start worker
312
+ begin
313
+ workers << Worker.new(id: next_worker_id, server_name: name)
314
+ rescue Exception => exc
315
+ logger.fatal('Cannot start worker', exc)
316
+ end
317
+ end
318
+ end
319
+ end
320
+
321
+ # Register handlers for the various signals
322
+ # Term:
323
+ # Perform clean shutdown
324
+ #
325
+ def self.register_signal_handlers
326
+ begin
327
+ Signal.trap 'SIGTERM' do
328
+ shutdown!
329
+ message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
330
+ # Logging uses a mutex to access Queue on MRI/CRuby
331
+ defined?(JRuby) ? logger.warn(message) : puts(message)
332
+ end
333
+
334
+ Signal.trap 'INT' do
335
+ shutdown!
336
+ message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
337
+ # Logging uses a mutex to access Queue on MRI/CRuby
338
+ defined?(JRuby) ? logger.warn(message) : puts(message)
339
+ end
340
+ rescue StandardError
341
+ logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
342
+ end
343
+ end
344
+
345
+ # Requeue any jobs assigned to this server when it is destroyed
346
+ def requeue_jobs
347
+ RocketJob::Job.requeue_dead_server(name)
348
+ end
349
+
350
+ end
351
+ end
352
+