rocketjob 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rocket_job/cli.rb +2 -2
- data/lib/rocket_job/event.rb +163 -0
- data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
- data/lib/rocket_job/plugins/transaction.rb +1 -1
- data/lib/rocket_job/rocket_job.rb +7 -0
- data/lib/rocket_job/server.rb +5 -356
- data/lib/rocket_job/server/model.rb +138 -0
- data/lib/rocket_job/server/state_machine.rb +60 -0
- data/lib/rocket_job/subscriber.rb +79 -0
- data/lib/rocket_job/subscribers/logger.rb +75 -0
- data/lib/rocket_job/subscribers/server.rb +71 -0
- data/lib/rocket_job/subscribers/worker.rb +61 -0
- data/lib/rocket_job/supervisor.rb +96 -0
- data/lib/rocket_job/supervisor/shutdown.rb +63 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +41 -31
- data/lib/rocket_job/worker_pool.rb +103 -0
- data/lib/rocketjob.rb +17 -7
- metadata +15 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d14befce83747b072cf398356d5a5a798630d65c9aa50ec5f5de1e1f4d0e8d69
|
4
|
+
data.tar.gz: 17c5e295968836458ef1b998d1ffb79228ab6b5971c6339ec1bdc74d429d2511
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07b25f25ca1fd804e6c3d98ebcf3724acb8bcb564281f1b5ffd21bd815e8a05d7a99988721c5e3a25063da44f800ea3c998fcbecd7751a8b10755b214804feab
|
7
|
+
data.tar.gz: 48093ed5e152571a097e07f8e606df99fc48fcf1f8364e78b77d371e5e668acf384c972a002c078930b1af809dd7e34483ba28194fbcd52a9ae06bfb60f2dfce
|
data/lib/rocket_job/cli.rb
CHANGED
@@ -47,7 +47,7 @@ module RocketJob
|
|
47
47
|
opts[:max_workers] = workers if workers
|
48
48
|
opts[:filter] = filter if filter
|
49
49
|
|
50
|
-
|
50
|
+
Supervisor.run(opts)
|
51
51
|
end
|
52
52
|
|
53
53
|
def rails?
|
@@ -96,7 +96,7 @@ module RocketJob
|
|
96
96
|
|
97
97
|
require 'rocketjob'
|
98
98
|
begin
|
99
|
-
require '
|
99
|
+
require 'rocketjob_enterprise'
|
100
100
|
rescue LoadError
|
101
101
|
nil
|
102
102
|
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'concurrent-ruby'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
# RocketJob::Event
|
5
|
+
#
|
6
|
+
# Publish and Subscribe to events. Events are published immediately and usually consumed
|
7
|
+
# almost immediately by all subscriber processes.
|
8
|
+
class Event
|
9
|
+
include SemanticLogger::Loggable
|
10
|
+
include Plugins::Document
|
11
|
+
include Mongoid::Timestamps
|
12
|
+
|
13
|
+
ALL_EVENTS = '*'.freeze
|
14
|
+
|
15
|
+
# Capped collection long polling interval.
|
16
|
+
class_attribute :long_poll_seconds, instance_accessor: false
|
17
|
+
self.long_poll_seconds = 300
|
18
|
+
|
19
|
+
# Capped collection size.
|
20
|
+
# Only used the first time the collection is created.
|
21
|
+
#
|
22
|
+
# Default: 128MB.
|
23
|
+
class_attribute :capped_collection_size, instance_accessor: false
|
24
|
+
self.capped_collection_size = 128 * 1024 * 1024
|
25
|
+
|
26
|
+
# Mandatory Event Name
|
27
|
+
# Examples:
|
28
|
+
# '/rocket_job/config'
|
29
|
+
# '/rocket_job/server'
|
30
|
+
# '/rocket_job/worker'
|
31
|
+
field :name, type: String
|
32
|
+
|
33
|
+
# Event Action
|
34
|
+
# Examples:
|
35
|
+
# :shutdown
|
36
|
+
# :pause
|
37
|
+
# :updated
|
38
|
+
field :action, type: Symbol
|
39
|
+
|
40
|
+
# Hash Parameters to be sent with the event (event specific).
|
41
|
+
field :parameters, type: Hash
|
42
|
+
|
43
|
+
validates_presence_of :name
|
44
|
+
|
45
|
+
store_in collection: 'rocket_job.events'
|
46
|
+
index({created_at: 1}, background: true)
|
47
|
+
|
48
|
+
# Add a subscriber for its events.
|
49
|
+
# Returns a handle to the subscription that can be used to unsubscribe
|
50
|
+
# this particular subscription
|
51
|
+
#
|
52
|
+
# Example:
|
53
|
+
# def MySubscriber
|
54
|
+
# include RocketJob::Subscriber
|
55
|
+
#
|
56
|
+
# def hello
|
57
|
+
# logger.info "Hello Action Received"
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
# def show(message:)
|
61
|
+
# logger.info "Received: #{message}"
|
62
|
+
# end
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
# MySubscriber.subscribe
|
66
|
+
def self.subscribe(subscriber)
|
67
|
+
if block_given?
|
68
|
+
begin
|
69
|
+
handle = add_subscriber(subscriber)
|
70
|
+
yield(subscriber)
|
71
|
+
ensure
|
72
|
+
unsubscribe(handle) if handle
|
73
|
+
end
|
74
|
+
else
|
75
|
+
add_subscriber(subscriber)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Unsubscribes a previous subscription
|
80
|
+
def self.unsubscribe(handle)
|
81
|
+
@subscribers.each_value { |v| v.delete_if { |i| i.object_id == handle } }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Indefinitely tail the capped collection looking for new events.
|
85
|
+
# time: the start time from which to start looking for new events.
|
86
|
+
def self.listener(time: @load_time)
|
87
|
+
Thread.current.name = 'rocketjob event'
|
88
|
+
create_capped_collection
|
89
|
+
|
90
|
+
logger.info('Event listener started')
|
91
|
+
tail_capped_collection(time) { |event| process_event(event) }
|
92
|
+
rescue Exception => exc
|
93
|
+
logger.error('#listener Event listener is terminating due to unhandled exception', exc)
|
94
|
+
raise(exc)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Create the capped collection only if it does not exist.
|
98
|
+
# Drop the collection before calling this method to re-create it.
|
99
|
+
def self.create_capped_collection(size: capped_collection_size)
|
100
|
+
if collection_exists?
|
101
|
+
convert_to_capped_collection(size) unless collection.capped?
|
102
|
+
else
|
103
|
+
collection.client[collection_name, {capped: true, size: size}].create
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
@load_time = Time.now.utc
|
110
|
+
@subscribers = Concurrent::Map.new { Concurrent::Array.new }
|
111
|
+
|
112
|
+
def self.add_subscriber(subscriber)
|
113
|
+
name = subscriber.class.event_name
|
114
|
+
@subscribers[name] = @subscribers[name] << subscriber
|
115
|
+
subscriber.object_id
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.tail_capped_collection(time)
|
119
|
+
with(socket_timeout: long_poll_seconds + 10) do
|
120
|
+
filter = {created_at: {'$gt' => time}}
|
121
|
+
collection.
|
122
|
+
find(filter).
|
123
|
+
await_data.
|
124
|
+
cursor_type(:tailable_await).
|
125
|
+
max_await_time_ms(long_poll_seconds * 1000).
|
126
|
+
sort('$natural' => 1).
|
127
|
+
each do |doc|
|
128
|
+
event = Mongoid::Factory.from_db(Event, doc)
|
129
|
+
# Recovery will occur from after the last message read
|
130
|
+
time = event.created_at
|
131
|
+
yield(event)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
rescue Mongo::Error::SocketError, Mongo::Error::SocketTimeoutError, Mongo::Error::OperationFailure, Timeout::Error => exc
|
135
|
+
logger.info("Creating a new cursor and trying again: #{exc.class.name} #{exc.message}")
|
136
|
+
retry
|
137
|
+
end
|
138
|
+
|
139
|
+
# Process a new event, calling registered subscribers.
|
140
|
+
def self.process_event(event)
|
141
|
+
logger.info('Event Received', event.attributes)
|
142
|
+
|
143
|
+
if @subscribers.key?(event.name)
|
144
|
+
@subscribers[event.name].each { |subscriber| subscriber.process_action(event.action, event.parameters) }
|
145
|
+
end
|
146
|
+
|
147
|
+
if @subscribers.key?(ALL_EVENTS)
|
148
|
+
@subscribers[ALL_EVENTS].each { |subscriber| subscriber.process_event(event.name, event.action, event.parameters) }
|
149
|
+
end
|
150
|
+
rescue StandardError => exc
|
151
|
+
logger.error('Unknown subscriber. Continuing..', exc)
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.collection_exists?
|
155
|
+
collection.database.collection_names.include?(collection_name.to_s)
|
156
|
+
end
|
157
|
+
|
158
|
+
# Convert a non-capped collection to capped
|
159
|
+
def self.convert_to_capped_collection(size)
|
160
|
+
collection.database.command('convertToCapped' => collection_name.to_s, 'size' => size)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -46,13 +46,6 @@ module RocketJob
|
|
46
46
|
field :queued_retention, type: Integer, user_editable: true, copy_on_restart: true
|
47
47
|
|
48
48
|
def perform
|
49
|
-
if destroy_zombies
|
50
|
-
# Cleanup zombie servers
|
51
|
-
RocketJob::Server.destroy_zombies
|
52
|
-
# Requeue jobs where the worker is in the zombie state and its server has gone away
|
53
|
-
RocketJob::ActiveWorker.requeue_zombies
|
54
|
-
end
|
55
|
-
|
56
49
|
RocketJob::Job.aborted.where(completed_at: {'$lte' => aborted_retention.seconds.ago}).destroy_all if aborted_retention
|
57
50
|
if completed_retention
|
58
51
|
RocketJob::Job.completed.where(completed_at: {'$lte' => completed_retention.seconds.ago}).destroy_all
|
@@ -60,6 +53,13 @@ module RocketJob
|
|
60
53
|
RocketJob::Job.failed.where(completed_at: {'$lte' => failed_retention.seconds.ago}).destroy_all if failed_retention
|
61
54
|
RocketJob::Job.paused.where(completed_at: {'$lte' => paused_retention.seconds.ago}).destroy_all if paused_retention
|
62
55
|
RocketJob::Job.queued.where(created_at: {'$lte' => queued_retention.seconds.ago}).destroy_all if queued_retention
|
56
|
+
|
57
|
+
if destroy_zombies
|
58
|
+
# Cleanup zombie servers
|
59
|
+
RocketJob::Server.destroy_zombies
|
60
|
+
# Requeue jobs where the worker is in the zombie state and its server has gone away
|
61
|
+
RocketJob::ActiveWorker.requeue_zombies
|
62
|
+
end
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
@@ -25,7 +25,7 @@ module RocketJob
|
|
25
25
|
# end
|
26
26
|
#
|
27
27
|
# Performance
|
28
|
-
# - On
|
28
|
+
# - On CRuby an empty transaction block call takes about 1ms.
|
29
29
|
# - On JRuby an empty transaction block call takes about 55ms.
|
30
30
|
#
|
31
31
|
# Note:
|
@@ -1,4 +1,11 @@
|
|
1
1
|
module RocketJob
|
2
|
+
def self.create_indexes
|
3
|
+
# Ensure models with indexes are loaded into memory first
|
4
|
+
Job.create_indexes
|
5
|
+
Server.create_indexes
|
6
|
+
DirmonEntry.create_indexes
|
7
|
+
end
|
8
|
+
|
2
9
|
# Whether the current process is running inside a Rocket Job server process.
|
3
10
|
def self.server?
|
4
11
|
@server
|
data/lib/rocket_job/server.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'rocket_job/server/model'
|
2
|
+
require 'rocket_job/server/state_machine'
|
3
|
+
|
3
4
|
module RocketJob
|
4
5
|
# Server
|
5
6
|
#
|
@@ -29,359 +30,7 @@ module RocketJob
|
|
29
30
|
include Plugins::Document
|
30
31
|
include Plugins::StateMachine
|
31
32
|
include SemanticLogger::Loggable
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
# Unique Name of this server instance
|
36
|
-
# Default: `host name:PID`
|
37
|
-
# The unique name is used on re-start to re-queue any jobs that were being processed
|
38
|
-
# at the time the server unexpectedly terminated, if any
|
39
|
-
field :name, type: String, default: -> { "#{SemanticLogger.host}:#{$$}" }
|
40
|
-
|
41
|
-
# The maximum number of workers this server should start
|
42
|
-
# If set, it will override the default value in RocketJob::Config
|
43
|
-
field :max_workers, type: Integer, default: -> { Config.instance.max_workers }
|
44
|
-
|
45
|
-
# When this server process was started
|
46
|
-
field :started_at, type: Time
|
47
|
-
|
48
|
-
# Filter to apply to control which job classes this server can process
|
49
|
-
field :yaml_filter, type: String
|
50
|
-
|
51
|
-
# The heartbeat information for this server
|
52
|
-
embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
|
53
|
-
|
54
|
-
# Current state
|
55
|
-
# Internal use only. Do not set this field directly
|
56
|
-
field :state, type: Symbol, default: :starting
|
57
|
-
|
58
|
-
index({name: 1}, background: true, unique: true, drop_dups: true)
|
59
|
-
|
60
|
-
validates_presence_of :state, :name, :max_workers
|
61
|
-
|
62
|
-
# States
|
63
|
-
# :starting -> :running -> :paused
|
64
|
-
# -> :stopping
|
65
|
-
aasm column: :state, whiny_persistence: true do
|
66
|
-
state :starting, initial: true
|
67
|
-
state :running
|
68
|
-
state :paused
|
69
|
-
state :stopping
|
70
|
-
|
71
|
-
event :started do
|
72
|
-
transitions from: :starting, to: :running
|
73
|
-
before do
|
74
|
-
self.started_at = Time.now
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
event :pause do
|
79
|
-
transitions from: :running, to: :paused
|
80
|
-
end
|
81
|
-
|
82
|
-
event :resume do
|
83
|
-
transitions from: :paused, to: :running
|
84
|
-
end
|
85
|
-
|
86
|
-
event :stop do
|
87
|
-
transitions from: :running, to: :stopping
|
88
|
-
transitions from: :paused, to: :stopping
|
89
|
-
transitions from: :starting, to: :stopping
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Requeue any jobs being worked by this server when it is destroyed
|
94
|
-
before_destroy :requeue_jobs
|
95
|
-
|
96
|
-
# Destroy's all instances of zombie servers and requeues any jobs still "running"
|
97
|
-
# on those servers.
|
98
|
-
def self.destroy_zombies
|
99
|
-
count = 0
|
100
|
-
each do |server|
|
101
|
-
next unless server.zombie?
|
102
|
-
logger.warn "Destroying zombie server #{server.name}, and requeueing its jobs"
|
103
|
-
server.destroy
|
104
|
-
count += 1
|
105
|
-
end
|
106
|
-
count
|
107
|
-
end
|
108
|
-
|
109
|
-
# Stop all running, paused, or starting servers
|
110
|
-
def self.stop_all
|
111
|
-
where(:state.in => %i[running paused starting]).each(&:stop!)
|
112
|
-
end
|
113
|
-
|
114
|
-
# Pause all running servers
|
115
|
-
def self.pause_all
|
116
|
-
running.each(&:pause!)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Resume all paused servers
|
120
|
-
def self.resume_all
|
121
|
-
paused.each(&:resume!)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Returns [Hash<String:Integer>] of the number of servers in each state.
|
125
|
-
# Note: If there are no servers in that particular state then the hash will not have a value for it.
|
126
|
-
#
|
127
|
-
# Example servers in every state:
|
128
|
-
# RocketJob::Server.counts_by_state
|
129
|
-
# # => {
|
130
|
-
# :aborted => 1,
|
131
|
-
# :completed => 37,
|
132
|
-
# :failed => 1,
|
133
|
-
# :paused => 3,
|
134
|
-
# :queued => 4,
|
135
|
-
# :running => 1,
|
136
|
-
# :queued_now => 1,
|
137
|
-
# :scheduled => 3
|
138
|
-
# }
|
139
|
-
#
|
140
|
-
# Example no servers active:
|
141
|
-
# RocketJob::Server.counts_by_state
|
142
|
-
# # => {}
|
143
|
-
def self.counts_by_state
|
144
|
-
counts = {}
|
145
|
-
collection.aggregate(
|
146
|
-
[
|
147
|
-
{
|
148
|
-
'$group' => {
|
149
|
-
_id: '$state',
|
150
|
-
count: {'$sum' => 1}
|
151
|
-
}
|
152
|
-
}
|
153
|
-
]
|
154
|
-
).each do |result|
|
155
|
-
counts[result['_id'].to_sym] = result['count']
|
156
|
-
end
|
157
|
-
counts
|
158
|
-
end
|
159
|
-
|
160
|
-
# On MRI the 'concurrent-ruby-ext' gem may not be loaded
|
161
|
-
if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
|
162
|
-
# Returns [true|false] whether the shutdown indicator has been set for this server process
|
163
|
-
def self.shutdown?
|
164
|
-
@shutdown.value
|
165
|
-
end
|
166
|
-
|
167
|
-
# Set shutdown indicator for this server process
|
168
|
-
def self.shutdown!
|
169
|
-
@shutdown.make_true
|
170
|
-
end
|
171
|
-
|
172
|
-
@shutdown = Concurrent::AtomicBoolean.new(false)
|
173
|
-
else
|
174
|
-
# Returns [true|false] whether the shutdown indicator has been set for this server process
|
175
|
-
def self.shutdown?
|
176
|
-
@shutdown
|
177
|
-
end
|
178
|
-
|
179
|
-
# Set shutdown indicator for this server process
|
180
|
-
def self.shutdown!
|
181
|
-
@shutdown = true
|
182
|
-
end
|
183
|
-
|
184
|
-
@shutdown = false
|
185
|
-
end
|
186
|
-
|
187
|
-
# Run the server process
|
188
|
-
# Attributes supplied are passed to #new
|
189
|
-
def self.run(attrs = {})
|
190
|
-
Thread.current.name = 'rocketjob main'
|
191
|
-
# Create Indexes on server startup
|
192
|
-
::Mongoid::Tasks::Database.create_indexes
|
193
|
-
register_signal_handlers
|
194
|
-
|
195
|
-
server = create!(attrs)
|
196
|
-
server.send(:run)
|
197
|
-
ensure
|
198
|
-
server&.destroy
|
199
|
-
end
|
200
|
-
|
201
|
-
# Returns [Boolean] whether the server is shutting down
|
202
|
-
def shutdown?
|
203
|
-
self.class.shutdown? || !running?
|
204
|
-
end
|
205
|
-
|
206
|
-
# Scope for all zombie servers
|
207
|
-
def self.zombies(missed = 4)
|
208
|
-
dead_seconds = Config.instance.heartbeat_seconds * missed
|
209
|
-
last_heartbeat_time = Time.now - dead_seconds
|
210
|
-
where(
|
211
|
-
:state.in => %i[stopping running paused],
|
212
|
-
'$or' => [
|
213
|
-
{'heartbeat.updated_at' => {'$exists' => false}},
|
214
|
-
{'heartbeat.updated_at' => {'$lte' => last_heartbeat_time}}
|
215
|
-
]
|
216
|
-
)
|
217
|
-
end
|
218
|
-
|
219
|
-
# Returns [true|false] if this server has missed at least the last 4 heartbeats
|
220
|
-
#
|
221
|
-
# Possible causes for a server to miss its heartbeats:
|
222
|
-
# - The server process has died
|
223
|
-
# - The server process is "hanging"
|
224
|
-
# - The server is no longer able to communicate with the MongoDB Server
|
225
|
-
def zombie?(missed = 4)
|
226
|
-
return false unless running? || stopping? || paused?
|
227
|
-
return true if heartbeat.nil? || heartbeat.updated_at.nil?
|
228
|
-
dead_seconds = Config.instance.heartbeat_seconds * missed
|
229
|
-
(Time.now - heartbeat.updated_at) >= dead_seconds
|
230
|
-
end
|
231
|
-
|
232
|
-
# Where clause filter to apply to workers looking for jobs
|
233
|
-
def filter
|
234
|
-
YAML.load(yaml_filter) if yaml_filter
|
235
|
-
end
|
236
|
-
|
237
|
-
def filter=(hash)
|
238
|
-
self.yaml_filter = hash.nil? ? nil : hash.to_yaml
|
239
|
-
end
|
240
|
-
|
241
|
-
private
|
242
|
-
|
243
|
-
# Returns [Array<Worker>] collection of workers
|
244
|
-
def workers
|
245
|
-
@workers ||= []
|
246
|
-
end
|
247
|
-
|
248
|
-
# Management Thread
|
249
|
-
def run
|
250
|
-
logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
|
251
|
-
logger.info('Running with filter', filter) if filter
|
252
|
-
build_heartbeat(updated_at: Time.now, workers: 0)
|
253
|
-
started!
|
254
|
-
logger.info 'Rocket Job Server started'
|
255
|
-
|
256
|
-
run_workers
|
257
|
-
|
258
|
-
logger.info 'Waiting for workers to stop'
|
259
|
-
# Tell each worker to shutdown cleanly
|
260
|
-
workers.each(&:shutdown!)
|
261
|
-
|
262
|
-
while (worker = workers.first)
|
263
|
-
if worker.join(5)
|
264
|
-
# Worker thread is dead
|
265
|
-
workers.shift
|
266
|
-
else
|
267
|
-
# Timeout waiting for worker to stop
|
268
|
-
find_and_update(
|
269
|
-
'heartbeat.updated_at' => Time.now,
|
270
|
-
'heartbeat.workers' => worker_count
|
271
|
-
)
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
|
-
logger.info 'Shutdown'
|
276
|
-
rescue ::Mongoid::Errors::DocumentNotFound
|
277
|
-
logger.warn('Server has been destroyed. Going down hard!')
|
278
|
-
rescue Exception => exc
|
279
|
-
logger.error('RocketJob::Server is stopping due to an exception', exc)
|
280
|
-
ensure
|
281
|
-
# Logs the backtrace for each running worker
|
282
|
-
workers.each { |worker| logger.backtrace(thread: worker.thread) if worker.thread && worker.alive? }
|
283
|
-
end
|
284
|
-
|
285
|
-
def run_workers
|
286
|
-
stagger = true
|
287
|
-
while running? || paused?
|
288
|
-
SemanticLogger.silence(:info) do
|
289
|
-
find_and_update(
|
290
|
-
'heartbeat.updated_at' => Time.now,
|
291
|
-
'heartbeat.workers' => worker_count
|
292
|
-
)
|
293
|
-
end
|
294
|
-
if paused?
|
295
|
-
workers.each(&:shutdown!)
|
296
|
-
stagger = true
|
297
|
-
end
|
298
|
-
|
299
|
-
# In case number of threads has been modified
|
300
|
-
adjust_workers(stagger)
|
301
|
-
stagger = false
|
302
|
-
|
303
|
-
# Stop server if shutdown indicator was set
|
304
|
-
if self.class.shutdown? && may_stop?
|
305
|
-
stop!
|
306
|
-
else
|
307
|
-
sleep Config.instance.heartbeat_seconds
|
308
|
-
end
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
# Returns [Fixnum] number of workers (threads) that are alive
|
313
|
-
def worker_count
|
314
|
-
workers.count(&:alive?)
|
315
|
-
end
|
316
|
-
|
317
|
-
def next_worker_id
|
318
|
-
@worker_id ||= 0
|
319
|
-
@worker_id += 1
|
320
|
-
end
|
321
|
-
|
322
|
-
# Re-adjust the number of running workers to get it up to the
|
323
|
-
# required number of workers
|
324
|
-
# Parameters
|
325
|
-
# stagger_workers
|
326
|
-
# Whether to stagger when the workers poll for work the first time
|
327
|
-
# It spreads out the queue polling over the max_poll_seconds so
|
328
|
-
# that not all workers poll at the same time
|
329
|
-
# The worker also respond faster than max_poll_seconds when a new
|
330
|
-
# job is added.
|
331
|
-
def adjust_workers(stagger_workers = false)
|
332
|
-
count = worker_count
|
333
|
-
# Cleanup workers that have stopped
|
334
|
-
if count != workers.count
|
335
|
-
logger.info "Cleaning up #{workers.count - count} workers that went away"
|
336
|
-
workers.delete_if { |t| !t.alive? }
|
337
|
-
end
|
338
|
-
|
339
|
-
return unless running?
|
340
|
-
|
341
|
-
# Need to add more workers?
|
342
|
-
return unless count < max_workers
|
343
|
-
|
344
|
-
worker_count = max_workers - count
|
345
|
-
logger.info "Starting #{worker_count} workers"
|
346
|
-
worker_count.times.each do
|
347
|
-
sleep(Config.instance.max_poll_seconds.to_f / max_workers) if stagger_workers
|
348
|
-
return if shutdown?
|
349
|
-
# Start worker
|
350
|
-
begin
|
351
|
-
workers << Worker.new(id: next_worker_id, server_name: name, filter: filter)
|
352
|
-
rescue Exception => exc
|
353
|
-
logger.fatal('Cannot start worker', exc)
|
354
|
-
end
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
# Register handlers for the various signals
|
359
|
-
# Term:
|
360
|
-
# Perform clean shutdown
|
361
|
-
#
|
362
|
-
def self.register_signal_handlers
|
363
|
-
Signal.trap 'SIGTERM' do
|
364
|
-
shutdown!
|
365
|
-
message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
|
366
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
367
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
368
|
-
end
|
369
|
-
|
370
|
-
Signal.trap 'INT' do
|
371
|
-
shutdown!
|
372
|
-
message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
|
373
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
374
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
375
|
-
end
|
376
|
-
rescue StandardError
|
377
|
-
logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
|
378
|
-
end
|
379
|
-
|
380
|
-
private_class_method :register_signal_handlers
|
381
|
-
|
382
|
-
# Requeue any jobs assigned to this server when it is destroyed
|
383
|
-
def requeue_jobs
|
384
|
-
RocketJob::Job.requeue_dead_server(name)
|
385
|
-
end
|
33
|
+
include Server::Model
|
34
|
+
include Server::StateMachine
|
386
35
|
end
|
387
36
|
end
|