inst-jobs 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrate/20101216224513_create_delayed_jobs.rb +9 -7
- data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +8 -13
- data/db/migrate/20110610213249_optimize_delayed_jobs.rb +8 -8
- data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +25 -25
- data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +4 -8
- data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +1 -3
- data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +11 -15
- data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +1 -1
- data/db/migrate/20120608191051_add_jobs_run_at_index.rb +2 -2
- data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +1 -1
- data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -3
- data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +9 -13
- data/db/migrate/20151210162949_improve_max_concurrent.rb +4 -8
- data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +3 -2
- data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +13 -17
- data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +8 -8
- data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +72 -77
- data/db/migrate/20200825011002_add_strand_order_override.rb +93 -97
- data/db/migrate/20210809145804_add_n_strand_index.rb +12 -0
- data/db/migrate/20210812210128_add_singleton_column.rb +200 -0
- data/db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb +27 -0
- data/db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb +56 -0
- data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb +27 -0
- data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb +137 -0
- data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
- data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
- data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
- data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
- data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
- data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
- data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
- data/db/migrate/20220328152900_add_failed_jobs_indicies.rb +12 -0
- data/exe/inst_jobs +3 -2
- data/lib/delayed/backend/active_record.rb +226 -168
- data/lib/delayed/backend/base.rb +119 -72
- data/lib/delayed/batch.rb +11 -9
- data/lib/delayed/cli.rb +98 -84
- data/lib/delayed/core_ext/kernel.rb +4 -2
- data/lib/delayed/daemon.rb +70 -74
- data/lib/delayed/job_tracking.rb +26 -25
- data/lib/delayed/lifecycle.rb +28 -23
- data/lib/delayed/log_tailer.rb +17 -17
- data/lib/delayed/logging.rb +13 -16
- data/lib/delayed/message_sending.rb +43 -52
- data/lib/delayed/performable_method.rb +6 -8
- data/lib/delayed/periodic.rb +72 -68
- data/lib/delayed/plugin.rb +2 -4
- data/lib/delayed/pool.rb +205 -168
- data/lib/delayed/rails_reloader_plugin.rb +30 -0
- data/lib/delayed/server/helpers.rb +6 -6
- data/lib/delayed/server.rb +51 -54
- data/lib/delayed/settings.rb +96 -81
- data/lib/delayed/testing.rb +21 -22
- data/lib/delayed/version.rb +1 -1
- data/lib/delayed/work_queue/in_process.rb +21 -17
- data/lib/delayed/work_queue/parent_process/client.rb +55 -53
- data/lib/delayed/work_queue/parent_process/server.rb +245 -207
- data/lib/delayed/work_queue/parent_process.rb +52 -53
- data/lib/delayed/worker/consul_health_check.rb +32 -33
- data/lib/delayed/worker/health_check.rb +35 -27
- data/lib/delayed/worker/null_health_check.rb +3 -1
- data/lib/delayed/worker/process_helper.rb +11 -12
- data/lib/delayed/worker.rb +257 -244
- data/lib/delayed/yaml_extensions.rb +12 -10
- data/lib/delayed_job.rb +37 -37
- data/lib/inst-jobs.rb +1 -1
- data/spec/active_record_job_spec.rb +152 -139
- data/spec/delayed/cli_spec.rb +7 -7
- data/spec/delayed/daemon_spec.rb +10 -9
- data/spec/delayed/message_sending_spec.rb +16 -9
- data/spec/delayed/periodic_spec.rb +14 -21
- data/spec/delayed/server_spec.rb +38 -38
- data/spec/delayed/settings_spec.rb +26 -25
- data/spec/delayed/work_queue/in_process_spec.rb +8 -9
- data/spec/delayed/work_queue/parent_process/client_spec.rb +17 -12
- data/spec/delayed/work_queue/parent_process/server_spec.rb +118 -42
- data/spec/delayed/work_queue/parent_process_spec.rb +21 -23
- data/spec/delayed/worker/consul_health_check_spec.rb +37 -50
- data/spec/delayed/worker/health_check_spec.rb +60 -52
- data/spec/delayed/worker_spec.rb +53 -24
- data/spec/sample_jobs.rb +45 -15
- data/spec/shared/delayed_batch.rb +74 -67
- data/spec/shared/delayed_method.rb +143 -102
- data/spec/shared/performable_method.rb +39 -38
- data/spec/shared/shared_backend.rb +801 -440
- data/spec/shared/testing.rb +14 -14
- data/spec/shared/worker.rb +157 -149
- data/spec/shared_jobs_specs.rb +13 -13
- data/spec/spec_helper.rb +57 -56
- metadata +183 -103
- data/lib/delayed/backend/redis/bulk_update.lua +0 -50
- data/lib/delayed/backend/redis/destroy_job.lua +0 -2
- data/lib/delayed/backend/redis/enqueue.lua +0 -29
- data/lib/delayed/backend/redis/fail_job.lua +0 -5
- data/lib/delayed/backend/redis/find_available.lua +0 -3
- data/lib/delayed/backend/redis/functions.rb +0 -59
- data/lib/delayed/backend/redis/get_and_lock_next_available.lua +0 -17
- data/lib/delayed/backend/redis/includes/jobs_common.lua +0 -203
- data/lib/delayed/backend/redis/job.rb +0 -535
- data/lib/delayed/backend/redis/set_running.lua +0 -5
- data/lib/delayed/backend/redis/tickle_strand.lua +0 -2
- data/spec/gemfiles/42.gemfile +0 -7
- data/spec/gemfiles/50.gemfile +0 -7
- data/spec/gemfiles/51.gemfile +0 -7
- data/spec/gemfiles/52.gemfile +0 -7
- data/spec/gemfiles/60.gemfile +0 -7
- data/spec/redis_job_spec.rb +0 -148
@@ -1,243 +1,281 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require "activerecord-pg-extensions"
|
4
|
+
|
3
5
|
module Delayed
|
4
|
-
module WorkQueue
|
5
|
-
class ParentProcess
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
include Delayed::Logging
|
10
|
-
SIGNALS = %i{INT TERM QUIT}
|
11
|
-
|
12
|
-
def initialize(listen_socket, parent_pid: nil, config: Settings.parent_process)
|
13
|
-
@listen_socket = listen_socket
|
14
|
-
@parent_pid = parent_pid
|
15
|
-
@clients = {}
|
16
|
-
@waiting_clients = {}
|
17
|
-
@prefetched_jobs = {}
|
18
|
-
|
19
|
-
@config = config
|
20
|
-
@client_timeout = config['server_socket_timeout'] || 10.0 # left for backwards compat
|
21
|
-
|
22
|
-
@exit = false
|
23
|
-
@self_pipe = IO.pipe
|
24
|
-
end
|
6
|
+
module WorkQueue
|
7
|
+
class ParentProcess
|
8
|
+
class Server
|
9
|
+
attr_reader :clients, :listen_socket
|
25
10
|
|
26
|
-
|
27
|
-
|
28
|
-
end
|
11
|
+
include Delayed::Logging
|
12
|
+
SIGNALS = %i[INT TERM QUIT].freeze
|
29
13
|
|
30
|
-
|
31
|
-
|
32
|
-
|
14
|
+
def initialize(listen_socket, parent_pid: nil, config: Settings.parent_process)
|
15
|
+
@listen_socket = listen_socket
|
16
|
+
@parent_pid = parent_pid
|
17
|
+
@clients = {}
|
18
|
+
@waiting_clients = {}
|
19
|
+
@prefetched_jobs = {}
|
33
20
|
|
34
|
-
|
35
|
-
|
36
|
-
def run
|
37
|
-
logger.debug "Starting work queue process"
|
21
|
+
@config = config
|
22
|
+
@client_timeout = config["server_socket_timeout"] || 10.0 # left for backwards compat
|
38
23
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# from the IO.select we're using to wait on clients.
|
43
|
-
trap(sig) { @exit = true; @self_pipe[1].write_nonblock('.', exception: false) }
|
44
|
-
end
|
24
|
+
@exit = false
|
25
|
+
@self_pipe = IO.pipe
|
26
|
+
end
|
45
27
|
|
46
|
-
|
47
|
-
|
48
|
-
run_once
|
49
|
-
if last_orphaned_prefetched_jobs_purge + 15 * 60 < Job.db_time_now
|
50
|
-
Job.unlock_orphaned_prefetched_jobs
|
51
|
-
last_orphaned_prefetched_jobs_purge = Job.db_time_now
|
28
|
+
def connected_clients
|
29
|
+
@clients.size
|
52
30
|
end
|
53
|
-
end
|
54
31
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
ensure
|
59
|
-
unlock_all_prefetched_jobs
|
60
|
-
end
|
32
|
+
def all_workers_idle?
|
33
|
+
@clients.none? { |_, c| c.working }
|
34
|
+
end
|
61
35
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
# if they're not keeping up, the jobs will slip back in time, and suddenly we'll become
|
67
|
-
# active and quickly pick up all the jobs we can. The latency is calculated to ensure that
|
68
|
-
# an active worker is guaranteed to have attempted to fetch new jobs in the meantime
|
69
|
-
forced_latency = Settings.sleep_delay + Settings.sleep_delay_stagger * 2 if all_workers_idle?
|
70
|
-
timeout = Settings.sleep_delay + (rand * Settings.sleep_delay_stagger)
|
71
|
-
readable, _, _ = IO.select(handles, nil, nil, timeout)
|
72
|
-
if readable
|
73
|
-
readable.each { |s| handle_read(s) }
|
74
|
-
end
|
75
|
-
Delayed::Worker.lifecycle.run_callbacks(:check_for_work, self) do
|
76
|
-
check_for_work(forced_latency: forced_latency)
|
77
|
-
end
|
78
|
-
unlock_timed_out_prefetched_jobs
|
79
|
-
end
|
36
|
+
# run the server queue worker
|
37
|
+
# this method does not return, only exits or raises an exception
|
38
|
+
def run
|
39
|
+
logger.debug "Starting work queue process"
|
80
40
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
end
|
91
|
-
end
|
41
|
+
SIGNALS.each do |sig|
|
42
|
+
# We're not doing any aggressive exiting here since we really want
|
43
|
+
# prefetched jobs to be unlocked and we're going to wake up the process
|
44
|
+
# from the IO.select we're using to wait on clients.
|
45
|
+
trap(sig) do
|
46
|
+
@exit = true
|
47
|
+
@self_pipe[1].write_nonblock(".", exception: false)
|
48
|
+
end
|
49
|
+
end
|
92
50
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
51
|
+
last_orphaned_prefetched_jobs_purge = Job.db_time_now - rand(15 * 60)
|
52
|
+
until exit?
|
53
|
+
run_once
|
54
|
+
if last_orphaned_prefetched_jobs_purge + (15 * 60) < Job.db_time_now
|
55
|
+
Job.unlock_orphaned_prefetched_jobs
|
56
|
+
last_orphaned_prefetched_jobs_purge = Job.db_time_now
|
57
|
+
end
|
58
|
+
end
|
59
|
+
rescue => e
|
60
|
+
logger.error "WorkQueue Server died: #{e.inspect}"
|
61
|
+
raise
|
62
|
+
ensure
|
63
|
+
unlock_all_prefetched_jobs
|
64
|
+
end
|
104
65
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
drop_socket(socket)
|
122
|
-
end
|
66
|
+
def run_once
|
67
|
+
handles = @clients.keys + [@listen_socket, @self_pipe[0]]
|
68
|
+
# if we're currently idle, then force a "latency" to job fetching - don't
|
69
|
+
# fetch recently queued jobs, allowing busier workers to fetch them first.
|
70
|
+
# if they're not keeping up, the jobs will slip back in time, and suddenly we'll become
|
71
|
+
# active and quickly pick up all the jobs we can. The latency is calculated to ensure that
|
72
|
+
# an active worker is guaranteed to have attempted to fetch new jobs in the meantime
|
73
|
+
forced_latency = Settings.sleep_delay + (Settings.sleep_delay_stagger * 2) if all_workers_idle?
|
74
|
+
timeout = Settings.sleep_delay + (rand * Settings.sleep_delay_stagger)
|
75
|
+
readable, = IO.select(handles, nil, nil, timeout)
|
76
|
+
readable&.each { |s| handle_read(s) }
|
77
|
+
Delayed::Worker.lifecycle.run_callbacks(:check_for_work, self) do
|
78
|
+
check_for_work(forced_latency: forced_latency)
|
79
|
+
end
|
80
|
+
unlock_timed_out_prefetched_jobs
|
81
|
+
end
|
123
82
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
unless job.transfer_lock!(from: prefetch_owner, to: client.name)
|
134
|
-
workers.unshift(client)
|
135
|
-
next
|
83
|
+
def handle_read(socket)
|
84
|
+
if socket == @listen_socket
|
85
|
+
handle_accept
|
86
|
+
elsif socket == @self_pipe[0]
|
87
|
+
# We really don't care about the contents of the pipe, we just need to
|
88
|
+
# wake up.
|
89
|
+
@self_pipe[0].read_nonblock(11, exception: false)
|
90
|
+
else
|
91
|
+
handle_request(socket)
|
136
92
|
end
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
93
|
+
end
|
94
|
+
|
95
|
+
# Any error on the listen socket other than WaitReadable will bubble up
|
96
|
+
# and terminate the work queue process, to be restarted by the parent daemon.
|
97
|
+
def handle_accept
|
98
|
+
socket, _addr = @listen_socket.accept_nonblock
|
99
|
+
@clients[socket] = ClientState.new(false, socket) if socket
|
100
|
+
rescue IO::WaitReadable
|
101
|
+
logger.error("Server attempted to read listen_socket but failed with IO::WaitReadable")
|
102
|
+
# ignore and just try accepting again next time through the loop
|
103
|
+
end
|
104
|
+
|
105
|
+
def handle_request(socket)
|
106
|
+
# There is an assumption here that the client will never send a partial
|
107
|
+
# request and then leave the socket open. Doing so would leave us hanging
|
108
|
+
# in Marshal.load forever. This is only a reasonable assumption because we
|
109
|
+
# control the client.
|
110
|
+
client = @clients[socket]
|
111
|
+
if socket.eof?
|
112
|
+
logger.debug("Client #{client.name} closed connection")
|
113
|
+
return drop_socket(socket)
|
114
|
+
end
|
115
|
+
worker_name, worker_config = Marshal.load(socket)
|
116
|
+
client.name = worker_name
|
117
|
+
client.working = false
|
118
|
+
(@waiting_clients[worker_config] ||= []) << client
|
119
|
+
rescue SystemCallError, IOError => e
|
120
|
+
logger.error("Receiving message from client (#{socket}) failed: #{e.inspect}")
|
121
|
+
drop_socket(socket)
|
122
|
+
end
|
123
|
+
|
124
|
+
def check_for_work(forced_latency: nil)
|
125
|
+
@waiting_clients.each do |(worker_config, workers)|
|
126
|
+
prefetched_jobs = @prefetched_jobs[worker_config] ||= []
|
127
|
+
logger.debug("I have #{prefetched_jobs.length} jobs for #{workers.length} waiting workers")
|
128
|
+
while !prefetched_jobs.empty? && !workers.empty?
|
129
|
+
job = prefetched_jobs.shift
|
130
|
+
client = workers.shift
|
131
|
+
# couldn't re-lock it for some reason
|
132
|
+
logger.debug("Transferring prefetched job to #{client.name}")
|
133
|
+
unless job.transfer_lock!(from: prefetch_owner, to: client.name)
|
134
|
+
workers.unshift(client)
|
135
|
+
next
|
136
|
+
end
|
137
|
+
client.working = true
|
138
|
+
begin
|
139
|
+
logger.debug("Sending prefetched job #{job.id} to #{client.name}")
|
140
|
+
client_timeout { Marshal.dump(job, client.socket) }
|
141
|
+
rescue SystemCallError, IOError, Timeout::Error => e
|
142
|
+
logger.error("Failed to send pre-fetched job to #{client.name}: #{e.inspect}")
|
143
|
+
drop_socket(client.socket)
|
144
|
+
Delayed::Job.unlock([job])
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
next if workers.empty?
|
149
|
+
|
150
|
+
logger.debug("Fetching new work for #{workers.length} workers")
|
151
|
+
jobs_to_send = []
|
152
|
+
|
153
|
+
Delayed::Worker.lifecycle.run_callbacks(:work_queue_pop, self, worker_config) do
|
154
|
+
recipients = workers.map(&:name)
|
155
|
+
|
156
|
+
response = Delayed::Job.get_and_lock_next_available(
|
157
|
+
recipients,
|
158
|
+
worker_config[:queue],
|
159
|
+
worker_config[:min_priority],
|
160
|
+
worker_config[:max_priority],
|
161
|
+
prefetch: (Settings.fetch_batch_size * (worker_config[:workers] || 1)) - recipients.length,
|
162
|
+
prefetch_owner: prefetch_owner,
|
163
|
+
forced_latency: forced_latency
|
164
|
+
)
|
165
|
+
logger.debug(
|
166
|
+
"Fetched and locked #{response.values.flatten.size} new jobs for workers (#{response.keys.join(', ')})."
|
167
|
+
)
|
168
|
+
response.each do |(worker_name, locked_jobs)|
|
169
|
+
if worker_name == prefetch_owner
|
170
|
+
# it's actually an array of all the extra jobs
|
171
|
+
logger.debug(
|
172
|
+
"Adding prefetched jobs #{locked_jobs.length} to prefetched array (size: #{prefetched_jobs.count})"
|
173
|
+
)
|
174
|
+
prefetched_jobs.concat(locked_jobs)
|
175
|
+
next
|
176
|
+
end
|
177
|
+
client = workers.find { |worker| worker.name == worker_name }
|
178
|
+
client.working = true
|
179
|
+
jobs_to_send << [client, locked_jobs]
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
jobs_to_send.each do |(recipient, job_to_send)|
|
184
|
+
@waiting_clients[worker_config].delete(recipient)
|
185
|
+
begin
|
186
|
+
logger.debug("Sending job #{job_to_send.id} to #{recipient.name}")
|
187
|
+
client_timeout { Marshal.dump(job_to_send, recipient.socket) }
|
188
|
+
rescue SystemCallError, IOError, Timeout::Error => e
|
189
|
+
logger.error("Failed to send job to #{recipient.name}: #{e.inspect}")
|
190
|
+
drop_socket(recipient.socket)
|
191
|
+
Delayed::Job.unlock([job_to_send])
|
192
|
+
end
|
193
|
+
end
|
145
194
|
end
|
146
195
|
end
|
147
196
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
prefetched_jobs
|
170
|
-
next
|
197
|
+
def unlock_prefetched_jobs
|
198
|
+
@prefetched_jobs.each do |(worker_config, jobs)|
|
199
|
+
next if jobs.empty?
|
200
|
+
next if block_given? && !yield(jobs)
|
201
|
+
|
202
|
+
connection = Delayed::Job.connection
|
203
|
+
connection.transaction do
|
204
|
+
# make absolutely sure we don't get hung up and leave things
|
205
|
+
# locked in the database
|
206
|
+
if connection.postgresql_version >= 9_06_00 # rubocop:disable Style/NumericLiterals
|
207
|
+
connection.idle_in_transaction_session_timeout = 5
|
208
|
+
end
|
209
|
+
# relatively short timeout for acquiring the lock
|
210
|
+
connection.statement_timeout = Settings.sleep_delay
|
211
|
+
Delayed::Job.advisory_lock(Delayed::Job.prefetch_jobs_lock_name)
|
212
|
+
|
213
|
+
# this query might take longer, and we really want to get it
|
214
|
+
# done if we got the lock, but still don't want an inadvertent
|
215
|
+
# hang
|
216
|
+
connection.statement_timeout = 30
|
217
|
+
Delayed::Job.unlock(jobs)
|
218
|
+
@prefetched_jobs[worker_config] = []
|
171
219
|
end
|
172
|
-
|
173
|
-
|
174
|
-
|
220
|
+
rescue ActiveRecord::QueryCanceled
|
221
|
+
# ignore; we'll retry anyway
|
222
|
+
logger.warn("unable to unlock prefetched jobs; skipping for now")
|
223
|
+
rescue ActiveRecord::StatementInvalid
|
224
|
+
# see if we dropped the connection
|
225
|
+
raise if connection.active?
|
226
|
+
|
227
|
+
# otherwise just reconnect and let it retry
|
228
|
+
logger.warn("failed to unlock prefetched jobs - connection terminated; skipping for now")
|
229
|
+
Delayed::Job.clear_all_connections!
|
175
230
|
end
|
176
231
|
end
|
177
232
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
logger.debug("Sending job #{job.id} to #{client.name}")
|
182
|
-
client_timeout { Marshal.dump(job, client.socket) }
|
183
|
-
rescue SystemCallError, IOError, Timeout::Error => ex
|
184
|
-
logger.error("Failed to send job to #{client.name}: #{ex.inspect}")
|
185
|
-
drop_socket(client.socket)
|
186
|
-
Delayed::Job.unlock([job])
|
233
|
+
def unlock_timed_out_prefetched_jobs
|
234
|
+
unlock_prefetched_jobs do |jobs|
|
235
|
+
jobs.first.locked_at < Time.now.utc - Settings.parent_process[:prefetched_jobs_timeout]
|
187
236
|
end
|
188
237
|
end
|
189
|
-
end
|
190
|
-
end
|
191
238
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
239
|
+
def unlock_all_prefetched_jobs
|
240
|
+
# we try really hard; it may not have done any work if it timed out
|
241
|
+
10.times do
|
242
|
+
unlock_prefetched_jobs
|
243
|
+
break if @prefetched_jobs.each_value.all?(&:empty?)
|
244
|
+
end
|
198
245
|
end
|
199
|
-
end
|
200
|
-
end
|
201
246
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
247
|
+
def drop_socket(socket)
|
248
|
+
# this socket went away
|
249
|
+
begin
|
250
|
+
socket.close
|
251
|
+
rescue IOError
|
252
|
+
nil
|
253
|
+
end
|
254
|
+
client = @clients[socket]
|
255
|
+
@clients.delete(socket)
|
256
|
+
@waiting_clients.each do |(_config, workers)|
|
257
|
+
workers.delete(client)
|
258
|
+
end
|
259
|
+
end
|
209
260
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
socket.close
|
214
|
-
rescue IOError
|
215
|
-
end
|
216
|
-
client = @clients[socket]
|
217
|
-
@clients.delete(socket)
|
218
|
-
@waiting_clients.each do |(_config, workers)|
|
219
|
-
workers.delete(client)
|
220
|
-
end
|
221
|
-
end
|
261
|
+
def exit?
|
262
|
+
!!@exit || parent_exited?
|
263
|
+
end
|
222
264
|
|
223
|
-
|
224
|
-
|
225
|
-
|
265
|
+
def prefetch_owner
|
266
|
+
"prefetch:#{Socket.gethostname rescue 'X'}"
|
267
|
+
end
|
226
268
|
|
227
|
-
|
228
|
-
|
229
|
-
|
269
|
+
def parent_exited?
|
270
|
+
@parent_pid && @parent_pid != Process.ppid
|
271
|
+
end
|
230
272
|
|
231
|
-
|
232
|
-
|
233
|
-
|
273
|
+
def client_timeout(&block)
|
274
|
+
Timeout.timeout(@client_timeout, &block)
|
275
|
+
end
|
234
276
|
|
235
|
-
|
236
|
-
|
277
|
+
ClientState = Struct.new(:working, :socket, :name)
|
278
|
+
end
|
237
279
|
end
|
238
|
-
|
239
|
-
ClientState = Struct.new(:working, :socket, :name)
|
240
280
|
end
|
241
281
|
end
|
242
|
-
end
|
243
|
-
end
|
@@ -1,69 +1,68 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
3
|
+
require "pathname"
|
4
|
+
require "socket"
|
5
|
+
require "timeout"
|
6
6
|
|
7
|
-
require_relative
|
8
|
-
require_relative
|
7
|
+
require_relative "parent_process/client"
|
8
|
+
require_relative "parent_process/server"
|
9
9
|
|
10
10
|
module Delayed
|
11
|
-
module WorkQueue
|
12
|
-
# ParentProcess is a WorkQueue implementation that spawns a separate worker
|
13
|
-
# process for querying the queue. Each Worker child process sends requests to
|
14
|
-
# the ParentProcess via IPC, and receives responses. This centralized queue
|
15
|
-
# querying cuts down on db queries and lock contention, and allows the
|
16
|
-
# possibility for other centralized logic such as notifications when all workers
|
17
|
-
# are idle.
|
18
|
-
#
|
19
|
-
# The IPC implementation uses Unix stream sockets and Ruby's built-in Marshal
|
20
|
-
# functionality. The ParentProcess creates a Unix socket on the filesystem in
|
21
|
-
# the tmp directory, so that if a worker process dies and is restarted it can
|
22
|
-
# reconnect to the socket.
|
23
|
-
#
|
24
|
-
# While Unix and IP sockets are API compatible, we take a lot of shortcuts
|
25
|
-
# because we know it's just a local Unix socket. If we ever wanted to swap this
|
26
|
-
# out for a TCP/IP socket and have the WorkQueue running on another host, we'd
|
27
|
-
# want to be a lot more robust about partial reads/writes and timeouts.
|
28
|
-
class ParentProcess
|
29
|
-
|
30
|
-
|
11
|
+
module WorkQueue
|
12
|
+
# ParentProcess is a WorkQueue implementation that spawns a separate worker
|
13
|
+
# process for querying the queue. Each Worker child process sends requests to
|
14
|
+
# the ParentProcess via IPC, and receives responses. This centralized queue
|
15
|
+
# querying cuts down on db queries and lock contention, and allows the
|
16
|
+
# possibility for other centralized logic such as notifications when all workers
|
17
|
+
# are idle.
|
18
|
+
#
|
19
|
+
# The IPC implementation uses Unix stream sockets and Ruby's built-in Marshal
|
20
|
+
# functionality. The ParentProcess creates a Unix socket on the filesystem in
|
21
|
+
# the tmp directory, so that if a worker process dies and is restarted it can
|
22
|
+
# reconnect to the socket.
|
23
|
+
#
|
24
|
+
# While Unix and IP sockets are API compatible, we take a lot of shortcuts
|
25
|
+
# because we know it's just a local Unix socket. If we ever wanted to swap this
|
26
|
+
# out for a TCP/IP socket and have the WorkQueue running on another host, we'd
|
27
|
+
# want to be a lot more robust about partial reads/writes and timeouts.
|
28
|
+
class ParentProcess
|
29
|
+
class ProtocolError < RuntimeError
|
30
|
+
end
|
31
31
|
|
32
|
-
|
32
|
+
attr_reader :server_address
|
33
33
|
|
34
|
-
|
35
|
-
|
34
|
+
DEFAULT_SOCKET_NAME = "inst-jobs.sock"
|
35
|
+
private_constant :DEFAULT_SOCKET_NAME
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
def initialize(config = Settings.parent_process)
|
38
|
+
@config = config
|
39
|
+
@server_address = generate_socket_path(config["server_address"])
|
40
|
+
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
42
|
+
def server(parent_pid: nil)
|
43
|
+
# The unix_server_socket method takes care of cleaning up any existing
|
44
|
+
# socket for us if the work queue process dies and is restarted.
|
45
|
+
listen_socket = Socket.unix_server_socket(@server_address)
|
46
|
+
Server.new(listen_socket, parent_pid: parent_pid, config: @config)
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
49
|
+
def client
|
50
|
+
Client.new(Addrinfo.unix(@server_address), config: @config)
|
51
|
+
end
|
52
52
|
|
53
|
-
|
53
|
+
private
|
54
54
|
|
55
|
-
|
56
|
-
|
55
|
+
def generate_socket_path(supplied_path)
|
56
|
+
pathname = Pathname.new(supplied_path)
|
57
57
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
58
|
+
if pathname.absolute? && pathname.directory?
|
59
|
+
pathname.join(DEFAULT_SOCKET_NAME).to_s
|
60
|
+
elsif pathname.absolute?
|
61
|
+
supplied_path
|
62
|
+
else
|
63
|
+
generate_socket_path(Settings.expand_rails_path(supplied_path))
|
64
|
+
end
|
65
|
+
end
|
64
66
|
end
|
65
67
|
end
|
66
68
|
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|