rocketjob 4.0.0 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rocket_job/cli.rb +2 -2
- data/lib/rocket_job/event.rb +163 -0
- data/lib/rocket_job/jobs/housekeeping_job.rb +7 -7
- data/lib/rocket_job/plugins/transaction.rb +1 -1
- data/lib/rocket_job/rocket_job.rb +7 -0
- data/lib/rocket_job/server.rb +5 -356
- data/lib/rocket_job/server/model.rb +138 -0
- data/lib/rocket_job/server/state_machine.rb +60 -0
- data/lib/rocket_job/subscriber.rb +79 -0
- data/lib/rocket_job/subscribers/logger.rb +75 -0
- data/lib/rocket_job/subscribers/server.rb +71 -0
- data/lib/rocket_job/subscribers/worker.rb +61 -0
- data/lib/rocket_job/supervisor.rb +96 -0
- data/lib/rocket_job/supervisor/shutdown.rb +63 -0
- data/lib/rocket_job/version.rb +1 -1
- data/lib/rocket_job/worker.rb +41 -31
- data/lib/rocket_job/worker_pool.rb +103 -0
- data/lib/rocketjob.rb +17 -7
- metadata +15 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d14befce83747b072cf398356d5a5a798630d65c9aa50ec5f5de1e1f4d0e8d69
|
4
|
+
data.tar.gz: 17c5e295968836458ef1b998d1ffb79228ab6b5971c6339ec1bdc74d429d2511
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 07b25f25ca1fd804e6c3d98ebcf3724acb8bcb564281f1b5ffd21bd815e8a05d7a99988721c5e3a25063da44f800ea3c998fcbecd7751a8b10755b214804feab
|
7
|
+
data.tar.gz: 48093ed5e152571a097e07f8e606df99fc48fcf1f8364e78b77d371e5e668acf384c972a002c078930b1af809dd7e34483ba28194fbcd52a9ae06bfb60f2dfce
|
data/lib/rocket_job/cli.rb
CHANGED
@@ -47,7 +47,7 @@ module RocketJob
|
|
47
47
|
opts[:max_workers] = workers if workers
|
48
48
|
opts[:filter] = filter if filter
|
49
49
|
|
50
|
-
|
50
|
+
Supervisor.run(opts)
|
51
51
|
end
|
52
52
|
|
53
53
|
def rails?
|
@@ -96,7 +96,7 @@ module RocketJob
|
|
96
96
|
|
97
97
|
require 'rocketjob'
|
98
98
|
begin
|
99
|
-
require '
|
99
|
+
require 'rocketjob_enterprise'
|
100
100
|
rescue LoadError
|
101
101
|
nil
|
102
102
|
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'concurrent-ruby'
|
2
|
+
|
3
|
+
module RocketJob
|
4
|
+
# RocketJob::Event
|
5
|
+
#
|
6
|
+
# Publish and Subscribe to events. Events are published immediately and usually consumed
|
7
|
+
# almost immediately by all subscriber processes.
|
8
|
+
class Event
|
9
|
+
include SemanticLogger::Loggable
|
10
|
+
include Plugins::Document
|
11
|
+
include Mongoid::Timestamps
|
12
|
+
|
13
|
+
ALL_EVENTS = '*'.freeze
|
14
|
+
|
15
|
+
# Capped collection long polling interval.
|
16
|
+
class_attribute :long_poll_seconds, instance_accessor: false
|
17
|
+
self.long_poll_seconds = 300
|
18
|
+
|
19
|
+
# Capped collection size.
|
20
|
+
# Only used the first time the collection is created.
|
21
|
+
#
|
22
|
+
# Default: 128MB.
|
23
|
+
class_attribute :capped_collection_size, instance_accessor: false
|
24
|
+
self.capped_collection_size = 128 * 1024 * 1024
|
25
|
+
|
26
|
+
# Mandatory Event Name
|
27
|
+
# Examples:
|
28
|
+
# '/rocket_job/config'
|
29
|
+
# '/rocket_job/server'
|
30
|
+
# '/rocket_job/worker'
|
31
|
+
field :name, type: String
|
32
|
+
|
33
|
+
# Event Action
|
34
|
+
# Examples:
|
35
|
+
# :shutdown
|
36
|
+
# :pause
|
37
|
+
# :updated
|
38
|
+
field :action, type: Symbol
|
39
|
+
|
40
|
+
# Hash Parameters to be sent with the event (event specific).
|
41
|
+
field :parameters, type: Hash
|
42
|
+
|
43
|
+
validates_presence_of :name
|
44
|
+
|
45
|
+
store_in collection: 'rocket_job.events'
|
46
|
+
index({created_at: 1}, background: true)
|
47
|
+
|
48
|
+
# Add a subscriber for its events.
|
49
|
+
# Returns a handle to the subscription that can be used to unsubscribe
|
50
|
+
# this particular subscription
|
51
|
+
#
|
52
|
+
# Example:
|
53
|
+
# def MySubscriber
|
54
|
+
# include RocketJob::Subscriber
|
55
|
+
#
|
56
|
+
# def hello
|
57
|
+
# logger.info "Hello Action Received"
|
58
|
+
# end
|
59
|
+
#
|
60
|
+
# def show(message:)
|
61
|
+
# logger.info "Received: #{message}"
|
62
|
+
# end
|
63
|
+
# end
|
64
|
+
#
|
65
|
+
# MySubscriber.subscribe
|
66
|
+
def self.subscribe(subscriber)
|
67
|
+
if block_given?
|
68
|
+
begin
|
69
|
+
handle = add_subscriber(subscriber)
|
70
|
+
yield(subscriber)
|
71
|
+
ensure
|
72
|
+
unsubscribe(handle) if handle
|
73
|
+
end
|
74
|
+
else
|
75
|
+
add_subscriber(subscriber)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Unsubscribes a previous subscription
|
80
|
+
def self.unsubscribe(handle)
|
81
|
+
@subscribers.each_value { |v| v.delete_if { |i| i.object_id == handle } }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Indefinitely tail the capped collection looking for new events.
|
85
|
+
# time: the start time from which to start looking for new events.
|
86
|
+
def self.listener(time: @load_time)
|
87
|
+
Thread.current.name = 'rocketjob event'
|
88
|
+
create_capped_collection
|
89
|
+
|
90
|
+
logger.info('Event listener started')
|
91
|
+
tail_capped_collection(time) { |event| process_event(event) }
|
92
|
+
rescue Exception => exc
|
93
|
+
logger.error('#listener Event listener is terminating due to unhandled exception', exc)
|
94
|
+
raise(exc)
|
95
|
+
end
|
96
|
+
|
97
|
+
# Create the capped collection only if it does not exist.
|
98
|
+
# Drop the collection before calling this method to re-create it.
|
99
|
+
def self.create_capped_collection(size: capped_collection_size)
|
100
|
+
if collection_exists?
|
101
|
+
convert_to_capped_collection(size) unless collection.capped?
|
102
|
+
else
|
103
|
+
collection.client[collection_name, {capped: true, size: size}].create
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
@load_time = Time.now.utc
|
110
|
+
@subscribers = Concurrent::Map.new { Concurrent::Array.new }
|
111
|
+
|
112
|
+
def self.add_subscriber(subscriber)
|
113
|
+
name = subscriber.class.event_name
|
114
|
+
@subscribers[name] = @subscribers[name] << subscriber
|
115
|
+
subscriber.object_id
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.tail_capped_collection(time)
|
119
|
+
with(socket_timeout: long_poll_seconds + 10) do
|
120
|
+
filter = {created_at: {'$gt' => time}}
|
121
|
+
collection.
|
122
|
+
find(filter).
|
123
|
+
await_data.
|
124
|
+
cursor_type(:tailable_await).
|
125
|
+
max_await_time_ms(long_poll_seconds * 1000).
|
126
|
+
sort('$natural' => 1).
|
127
|
+
each do |doc|
|
128
|
+
event = Mongoid::Factory.from_db(Event, doc)
|
129
|
+
# Recovery will occur from after the last message read
|
130
|
+
time = event.created_at
|
131
|
+
yield(event)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
rescue Mongo::Error::SocketError, Mongo::Error::SocketTimeoutError, Mongo::Error::OperationFailure, Timeout::Error => exc
|
135
|
+
logger.info("Creating a new cursor and trying again: #{exc.class.name} #{exc.message}")
|
136
|
+
retry
|
137
|
+
end
|
138
|
+
|
139
|
+
# Process a new event, calling registered subscribers.
|
140
|
+
def self.process_event(event)
|
141
|
+
logger.info('Event Received', event.attributes)
|
142
|
+
|
143
|
+
if @subscribers.key?(event.name)
|
144
|
+
@subscribers[event.name].each { |subscriber| subscriber.process_action(event.action, event.parameters) }
|
145
|
+
end
|
146
|
+
|
147
|
+
if @subscribers.key?(ALL_EVENTS)
|
148
|
+
@subscribers[ALL_EVENTS].each { |subscriber| subscriber.process_event(event.name, event.action, event.parameters) }
|
149
|
+
end
|
150
|
+
rescue StandardError => exc
|
151
|
+
logger.error('Unknown subscriber. Continuing..', exc)
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.collection_exists?
|
155
|
+
collection.database.collection_names.include?(collection_name.to_s)
|
156
|
+
end
|
157
|
+
|
158
|
+
# Convert a non-capped collection to capped
|
159
|
+
def self.convert_to_capped_collection(size)
|
160
|
+
collection.database.command('convertToCapped' => collection_name.to_s, 'size' => size)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -46,13 +46,6 @@ module RocketJob
|
|
46
46
|
field :queued_retention, type: Integer, user_editable: true, copy_on_restart: true
|
47
47
|
|
48
48
|
def perform
|
49
|
-
if destroy_zombies
|
50
|
-
# Cleanup zombie servers
|
51
|
-
RocketJob::Server.destroy_zombies
|
52
|
-
# Requeue jobs where the worker is in the zombie state and its server has gone away
|
53
|
-
RocketJob::ActiveWorker.requeue_zombies
|
54
|
-
end
|
55
|
-
|
56
49
|
RocketJob::Job.aborted.where(completed_at: {'$lte' => aborted_retention.seconds.ago}).destroy_all if aborted_retention
|
57
50
|
if completed_retention
|
58
51
|
RocketJob::Job.completed.where(completed_at: {'$lte' => completed_retention.seconds.ago}).destroy_all
|
@@ -60,6 +53,13 @@ module RocketJob
|
|
60
53
|
RocketJob::Job.failed.where(completed_at: {'$lte' => failed_retention.seconds.ago}).destroy_all if failed_retention
|
61
54
|
RocketJob::Job.paused.where(completed_at: {'$lte' => paused_retention.seconds.ago}).destroy_all if paused_retention
|
62
55
|
RocketJob::Job.queued.where(created_at: {'$lte' => queued_retention.seconds.ago}).destroy_all if queued_retention
|
56
|
+
|
57
|
+
if destroy_zombies
|
58
|
+
# Cleanup zombie servers
|
59
|
+
RocketJob::Server.destroy_zombies
|
60
|
+
# Requeue jobs where the worker is in the zombie state and its server has gone away
|
61
|
+
RocketJob::ActiveWorker.requeue_zombies
|
62
|
+
end
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
@@ -25,7 +25,7 @@ module RocketJob
|
|
25
25
|
# end
|
26
26
|
#
|
27
27
|
# Performance
|
28
|
-
# - On
|
28
|
+
# - On CRuby an empty transaction block call takes about 1ms.
|
29
29
|
# - On JRuby an empty transaction block call takes about 55ms.
|
30
30
|
#
|
31
31
|
# Note:
|
@@ -1,4 +1,11 @@
|
|
1
1
|
module RocketJob
|
2
|
+
def self.create_indexes
|
3
|
+
# Ensure models with indexes are loaded into memory first
|
4
|
+
Job.create_indexes
|
5
|
+
Server.create_indexes
|
6
|
+
DirmonEntry.create_indexes
|
7
|
+
end
|
8
|
+
|
2
9
|
# Whether the current process is running inside a Rocket Job server process.
|
3
10
|
def self.server?
|
4
11
|
@server
|
data/lib/rocket_job/server.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
require '
|
2
|
-
require '
|
1
|
+
require 'rocket_job/server/model'
|
2
|
+
require 'rocket_job/server/state_machine'
|
3
|
+
|
3
4
|
module RocketJob
|
4
5
|
# Server
|
5
6
|
#
|
@@ -29,359 +30,7 @@ module RocketJob
|
|
29
30
|
include Plugins::Document
|
30
31
|
include Plugins::StateMachine
|
31
32
|
include SemanticLogger::Loggable
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
# Unique Name of this server instance
|
36
|
-
# Default: `host name:PID`
|
37
|
-
# The unique name is used on re-start to re-queue any jobs that were being processed
|
38
|
-
# at the time the server unexpectedly terminated, if any
|
39
|
-
field :name, type: String, default: -> { "#{SemanticLogger.host}:#{$$}" }
|
40
|
-
|
41
|
-
# The maximum number of workers this server should start
|
42
|
-
# If set, it will override the default value in RocketJob::Config
|
43
|
-
field :max_workers, type: Integer, default: -> { Config.instance.max_workers }
|
44
|
-
|
45
|
-
# When this server process was started
|
46
|
-
field :started_at, type: Time
|
47
|
-
|
48
|
-
# Filter to apply to control which job classes this server can process
|
49
|
-
field :yaml_filter, type: String
|
50
|
-
|
51
|
-
# The heartbeat information for this server
|
52
|
-
embeds_one :heartbeat, class_name: 'RocketJob::Heartbeat'
|
53
|
-
|
54
|
-
# Current state
|
55
|
-
# Internal use only. Do not set this field directly
|
56
|
-
field :state, type: Symbol, default: :starting
|
57
|
-
|
58
|
-
index({name: 1}, background: true, unique: true, drop_dups: true)
|
59
|
-
|
60
|
-
validates_presence_of :state, :name, :max_workers
|
61
|
-
|
62
|
-
# States
|
63
|
-
# :starting -> :running -> :paused
|
64
|
-
# -> :stopping
|
65
|
-
aasm column: :state, whiny_persistence: true do
|
66
|
-
state :starting, initial: true
|
67
|
-
state :running
|
68
|
-
state :paused
|
69
|
-
state :stopping
|
70
|
-
|
71
|
-
event :started do
|
72
|
-
transitions from: :starting, to: :running
|
73
|
-
before do
|
74
|
-
self.started_at = Time.now
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
event :pause do
|
79
|
-
transitions from: :running, to: :paused
|
80
|
-
end
|
81
|
-
|
82
|
-
event :resume do
|
83
|
-
transitions from: :paused, to: :running
|
84
|
-
end
|
85
|
-
|
86
|
-
event :stop do
|
87
|
-
transitions from: :running, to: :stopping
|
88
|
-
transitions from: :paused, to: :stopping
|
89
|
-
transitions from: :starting, to: :stopping
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Requeue any jobs being worked by this server when it is destroyed
|
94
|
-
before_destroy :requeue_jobs
|
95
|
-
|
96
|
-
# Destroy's all instances of zombie servers and requeues any jobs still "running"
|
97
|
-
# on those servers.
|
98
|
-
def self.destroy_zombies
|
99
|
-
count = 0
|
100
|
-
each do |server|
|
101
|
-
next unless server.zombie?
|
102
|
-
logger.warn "Destroying zombie server #{server.name}, and requeueing its jobs"
|
103
|
-
server.destroy
|
104
|
-
count += 1
|
105
|
-
end
|
106
|
-
count
|
107
|
-
end
|
108
|
-
|
109
|
-
# Stop all running, paused, or starting servers
|
110
|
-
def self.stop_all
|
111
|
-
where(:state.in => %i[running paused starting]).each(&:stop!)
|
112
|
-
end
|
113
|
-
|
114
|
-
# Pause all running servers
|
115
|
-
def self.pause_all
|
116
|
-
running.each(&:pause!)
|
117
|
-
end
|
118
|
-
|
119
|
-
# Resume all paused servers
|
120
|
-
def self.resume_all
|
121
|
-
paused.each(&:resume!)
|
122
|
-
end
|
123
|
-
|
124
|
-
# Returns [Hash<String:Integer>] of the number of servers in each state.
|
125
|
-
# Note: If there are no servers in that particular state then the hash will not have a value for it.
|
126
|
-
#
|
127
|
-
# Example servers in every state:
|
128
|
-
# RocketJob::Server.counts_by_state
|
129
|
-
# # => {
|
130
|
-
# :aborted => 1,
|
131
|
-
# :completed => 37,
|
132
|
-
# :failed => 1,
|
133
|
-
# :paused => 3,
|
134
|
-
# :queued => 4,
|
135
|
-
# :running => 1,
|
136
|
-
# :queued_now => 1,
|
137
|
-
# :scheduled => 3
|
138
|
-
# }
|
139
|
-
#
|
140
|
-
# Example no servers active:
|
141
|
-
# RocketJob::Server.counts_by_state
|
142
|
-
# # => {}
|
143
|
-
def self.counts_by_state
|
144
|
-
counts = {}
|
145
|
-
collection.aggregate(
|
146
|
-
[
|
147
|
-
{
|
148
|
-
'$group' => {
|
149
|
-
_id: '$state',
|
150
|
-
count: {'$sum' => 1}
|
151
|
-
}
|
152
|
-
}
|
153
|
-
]
|
154
|
-
).each do |result|
|
155
|
-
counts[result['_id'].to_sym] = result['count']
|
156
|
-
end
|
157
|
-
counts
|
158
|
-
end
|
159
|
-
|
160
|
-
# On MRI the 'concurrent-ruby-ext' gem may not be loaded
|
161
|
-
if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
|
162
|
-
# Returns [true|false] whether the shutdown indicator has been set for this server process
|
163
|
-
def self.shutdown?
|
164
|
-
@shutdown.value
|
165
|
-
end
|
166
|
-
|
167
|
-
# Set shutdown indicator for this server process
|
168
|
-
def self.shutdown!
|
169
|
-
@shutdown.make_true
|
170
|
-
end
|
171
|
-
|
172
|
-
@shutdown = Concurrent::AtomicBoolean.new(false)
|
173
|
-
else
|
174
|
-
# Returns [true|false] whether the shutdown indicator has been set for this server process
|
175
|
-
def self.shutdown?
|
176
|
-
@shutdown
|
177
|
-
end
|
178
|
-
|
179
|
-
# Set shutdown indicator for this server process
|
180
|
-
def self.shutdown!
|
181
|
-
@shutdown = true
|
182
|
-
end
|
183
|
-
|
184
|
-
@shutdown = false
|
185
|
-
end
|
186
|
-
|
187
|
-
# Run the server process
|
188
|
-
# Attributes supplied are passed to #new
|
189
|
-
def self.run(attrs = {})
|
190
|
-
Thread.current.name = 'rocketjob main'
|
191
|
-
# Create Indexes on server startup
|
192
|
-
::Mongoid::Tasks::Database.create_indexes
|
193
|
-
register_signal_handlers
|
194
|
-
|
195
|
-
server = create!(attrs)
|
196
|
-
server.send(:run)
|
197
|
-
ensure
|
198
|
-
server&.destroy
|
199
|
-
end
|
200
|
-
|
201
|
-
# Returns [Boolean] whether the server is shutting down
|
202
|
-
def shutdown?
|
203
|
-
self.class.shutdown? || !running?
|
204
|
-
end
|
205
|
-
|
206
|
-
# Scope for all zombie servers
|
207
|
-
def self.zombies(missed = 4)
|
208
|
-
dead_seconds = Config.instance.heartbeat_seconds * missed
|
209
|
-
last_heartbeat_time = Time.now - dead_seconds
|
210
|
-
where(
|
211
|
-
:state.in => %i[stopping running paused],
|
212
|
-
'$or' => [
|
213
|
-
{'heartbeat.updated_at' => {'$exists' => false}},
|
214
|
-
{'heartbeat.updated_at' => {'$lte' => last_heartbeat_time}}
|
215
|
-
]
|
216
|
-
)
|
217
|
-
end
|
218
|
-
|
219
|
-
# Returns [true|false] if this server has missed at least the last 4 heartbeats
|
220
|
-
#
|
221
|
-
# Possible causes for a server to miss its heartbeats:
|
222
|
-
# - The server process has died
|
223
|
-
# - The server process is "hanging"
|
224
|
-
# - The server is no longer able to communicate with the MongoDB Server
|
225
|
-
def zombie?(missed = 4)
|
226
|
-
return false unless running? || stopping? || paused?
|
227
|
-
return true if heartbeat.nil? || heartbeat.updated_at.nil?
|
228
|
-
dead_seconds = Config.instance.heartbeat_seconds * missed
|
229
|
-
(Time.now - heartbeat.updated_at) >= dead_seconds
|
230
|
-
end
|
231
|
-
|
232
|
-
# Where clause filter to apply to workers looking for jobs
|
233
|
-
def filter
|
234
|
-
YAML.load(yaml_filter) if yaml_filter
|
235
|
-
end
|
236
|
-
|
237
|
-
def filter=(hash)
|
238
|
-
self.yaml_filter = hash.nil? ? nil : hash.to_yaml
|
239
|
-
end
|
240
|
-
|
241
|
-
private
|
242
|
-
|
243
|
-
# Returns [Array<Worker>] collection of workers
|
244
|
-
def workers
|
245
|
-
@workers ||= []
|
246
|
-
end
|
247
|
-
|
248
|
-
# Management Thread
|
249
|
-
def run
|
250
|
-
logger.info "Using MongoDB Database: #{RocketJob::Job.collection.database.name}"
|
251
|
-
logger.info('Running with filter', filter) if filter
|
252
|
-
build_heartbeat(updated_at: Time.now, workers: 0)
|
253
|
-
started!
|
254
|
-
logger.info 'Rocket Job Server started'
|
255
|
-
|
256
|
-
run_workers
|
257
|
-
|
258
|
-
logger.info 'Waiting for workers to stop'
|
259
|
-
# Tell each worker to shutdown cleanly
|
260
|
-
workers.each(&:shutdown!)
|
261
|
-
|
262
|
-
while (worker = workers.first)
|
263
|
-
if worker.join(5)
|
264
|
-
# Worker thread is dead
|
265
|
-
workers.shift
|
266
|
-
else
|
267
|
-
# Timeout waiting for worker to stop
|
268
|
-
find_and_update(
|
269
|
-
'heartbeat.updated_at' => Time.now,
|
270
|
-
'heartbeat.workers' => worker_count
|
271
|
-
)
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
|
-
logger.info 'Shutdown'
|
276
|
-
rescue ::Mongoid::Errors::DocumentNotFound
|
277
|
-
logger.warn('Server has been destroyed. Going down hard!')
|
278
|
-
rescue Exception => exc
|
279
|
-
logger.error('RocketJob::Server is stopping due to an exception', exc)
|
280
|
-
ensure
|
281
|
-
# Logs the backtrace for each running worker
|
282
|
-
workers.each { |worker| logger.backtrace(thread: worker.thread) if worker.thread && worker.alive? }
|
283
|
-
end
|
284
|
-
|
285
|
-
def run_workers
|
286
|
-
stagger = true
|
287
|
-
while running? || paused?
|
288
|
-
SemanticLogger.silence(:info) do
|
289
|
-
find_and_update(
|
290
|
-
'heartbeat.updated_at' => Time.now,
|
291
|
-
'heartbeat.workers' => worker_count
|
292
|
-
)
|
293
|
-
end
|
294
|
-
if paused?
|
295
|
-
workers.each(&:shutdown!)
|
296
|
-
stagger = true
|
297
|
-
end
|
298
|
-
|
299
|
-
# In case number of threads has been modified
|
300
|
-
adjust_workers(stagger)
|
301
|
-
stagger = false
|
302
|
-
|
303
|
-
# Stop server if shutdown indicator was set
|
304
|
-
if self.class.shutdown? && may_stop?
|
305
|
-
stop!
|
306
|
-
else
|
307
|
-
sleep Config.instance.heartbeat_seconds
|
308
|
-
end
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
# Returns [Fixnum] number of workers (threads) that are alive
|
313
|
-
def worker_count
|
314
|
-
workers.count(&:alive?)
|
315
|
-
end
|
316
|
-
|
317
|
-
def next_worker_id
|
318
|
-
@worker_id ||= 0
|
319
|
-
@worker_id += 1
|
320
|
-
end
|
321
|
-
|
322
|
-
# Re-adjust the number of running workers to get it up to the
|
323
|
-
# required number of workers
|
324
|
-
# Parameters
|
325
|
-
# stagger_workers
|
326
|
-
# Whether to stagger when the workers poll for work the first time
|
327
|
-
# It spreads out the queue polling over the max_poll_seconds so
|
328
|
-
# that not all workers poll at the same time
|
329
|
-
# The worker also respond faster than max_poll_seconds when a new
|
330
|
-
# job is added.
|
331
|
-
def adjust_workers(stagger_workers = false)
|
332
|
-
count = worker_count
|
333
|
-
# Cleanup workers that have stopped
|
334
|
-
if count != workers.count
|
335
|
-
logger.info "Cleaning up #{workers.count - count} workers that went away"
|
336
|
-
workers.delete_if { |t| !t.alive? }
|
337
|
-
end
|
338
|
-
|
339
|
-
return unless running?
|
340
|
-
|
341
|
-
# Need to add more workers?
|
342
|
-
return unless count < max_workers
|
343
|
-
|
344
|
-
worker_count = max_workers - count
|
345
|
-
logger.info "Starting #{worker_count} workers"
|
346
|
-
worker_count.times.each do
|
347
|
-
sleep(Config.instance.max_poll_seconds.to_f / max_workers) if stagger_workers
|
348
|
-
return if shutdown?
|
349
|
-
# Start worker
|
350
|
-
begin
|
351
|
-
workers << Worker.new(id: next_worker_id, server_name: name, filter: filter)
|
352
|
-
rescue Exception => exc
|
353
|
-
logger.fatal('Cannot start worker', exc)
|
354
|
-
end
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
# Register handlers for the various signals
|
359
|
-
# Term:
|
360
|
-
# Perform clean shutdown
|
361
|
-
#
|
362
|
-
def self.register_signal_handlers
|
363
|
-
Signal.trap 'SIGTERM' do
|
364
|
-
shutdown!
|
365
|
-
message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
|
366
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
367
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
368
|
-
end
|
369
|
-
|
370
|
-
Signal.trap 'INT' do
|
371
|
-
shutdown!
|
372
|
-
message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
|
373
|
-
# Logging uses a mutex to access Queue on MRI/CRuby
|
374
|
-
defined?(JRuby) ? logger.warn(message) : puts(message)
|
375
|
-
end
|
376
|
-
rescue StandardError
|
377
|
-
logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
|
378
|
-
end
|
379
|
-
|
380
|
-
private_class_method :register_signal_handlers
|
381
|
-
|
382
|
-
# Requeue any jobs assigned to this server when it is destroyed
|
383
|
-
def requeue_jobs
|
384
|
-
RocketJob::Job.requeue_dead_server(name)
|
385
|
-
end
|
33
|
+
include Server::Model
|
34
|
+
include Server::StateMachine
|
386
35
|
end
|
387
36
|
end
|