inst-jobs 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/delayed/backend/base.rb +9 -1
- data/lib/delayed/lifecycle.rb +1 -0
- data/lib/delayed/periodic.rb +1 -4
- data/lib/delayed/pool.rb +31 -0
- data/lib/delayed/settings.rb +1 -0
- data/lib/delayed/version.rb +1 -1
- data/lib/delayed/worker.rb +46 -25
- data/lib/delayed/worker/health_check.rb +7 -6
- data/spec/delayed/periodic_spec.rb +1 -9
- data/spec/delayed/worker_spec.rb +23 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c062c222e731bd490efe572108a508bf78faabee4479f7fe6927a89688d9ef0b
|
4
|
+
data.tar.gz: 3b1678fc017230e990bc7e8d4e652c23ab59413953ce72a312b13adfa7626193
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c1a722c17c9abc8f5c8a44cb28f6584dc9fb16c1edcccc8df566ad21a5f81af7a54fb70282e2689aee11947dcd96f44ca01dfe542d71c8d3d6b7f145a572ce7
|
7
|
+
data.tar.gz: 9a7a65c71820d4b04f1e1ac2bf498cf030490a597d075d87d4399a392a7da1bbf50cfb3d5eeb1dea9c357d11e00aabf5e469f062c1fe9cc4b02cc8ed08e1a192
|
data/lib/delayed/backend/base.rb
CHANGED
@@ -87,6 +87,10 @@ module Delayed
|
|
87
87
|
batches[batch_enqueue_args] << kwargs
|
88
88
|
return true
|
89
89
|
else
|
90
|
+
if kwargs[:on_conflict].present?
|
91
|
+
Delayed::Logging.logger.warn("[DELAYED_JOB] WARNING: providing 'on_conflict' as an option to a non-singleton job will have no effect. Discarding.")
|
92
|
+
kwargs.delete(:on_conflict)
|
93
|
+
end
|
90
94
|
job = self.create(**kwargs)
|
91
95
|
end
|
92
96
|
|
@@ -178,6 +182,10 @@ module Delayed
|
|
178
182
|
expires_at && (self.class.db_time_now >= expires_at)
|
179
183
|
end
|
180
184
|
|
185
|
+
def inferred_max_attempts
|
186
|
+
self.max_attempts || Delayed::Settings.max_attempts
|
187
|
+
end
|
188
|
+
|
181
189
|
# Reschedule the job in the future (when a job fails).
|
182
190
|
# Uses an exponential scale depending on the number of failed attempts.
|
183
191
|
def reschedule(error = nil, time = nil)
|
@@ -190,7 +198,7 @@ module Delayed
|
|
190
198
|
|
191
199
|
self.attempts += 1 unless return_code == :unlock
|
192
200
|
|
193
|
-
if self.attempts >=
|
201
|
+
if self.attempts >= self.inferred_max_attempts
|
194
202
|
permanent_failure error || "max attempts reached"
|
195
203
|
elsif expired?
|
196
204
|
permanent_failure error || "job has expired"
|
data/lib/delayed/lifecycle.rb
CHANGED
data/lib/delayed/periodic.rb
CHANGED
@@ -56,10 +56,7 @@ class Periodic
|
|
56
56
|
inferred_args = {
|
57
57
|
max_attempts: 1,
|
58
58
|
run_at: @cron.next_time(Delayed::Periodic.now).utc.to_time,
|
59
|
-
singleton:
|
60
|
-
# yes, checking for whether it is actually the boolean literal false,
|
61
|
-
# which means the consuming code really does not want this job to be
|
62
|
-
# a singleton at all.
|
59
|
+
singleton: tag,
|
63
60
|
on_conflict: :patient
|
64
61
|
}
|
65
62
|
@job_args.merge(inferred_args)
|
data/lib/delayed/pool.rb
CHANGED
@@ -39,6 +39,7 @@ class Pool
|
|
39
39
|
Process.wait unlock_pid
|
40
40
|
|
41
41
|
spawn_periodic_auditor
|
42
|
+
spawn_abandoned_job_cleanup
|
42
43
|
spawn_all_workers
|
43
44
|
say "Workers spawned"
|
44
45
|
join
|
@@ -111,6 +112,34 @@ class Pool
|
|
111
112
|
end
|
112
113
|
end
|
113
114
|
|
115
|
+
def spawn_abandoned_job_cleanup
|
116
|
+
return if Settings.disable_abandoned_job_cleanup
|
117
|
+
cleanup_interval_in_minutes = 60
|
118
|
+
@abandoned_cleanup_thread = Thread.new do
|
119
|
+
# every hour (staggered by process)
|
120
|
+
# check for dead jobs and cull them.
|
121
|
+
# Will actually be more often based on the
|
122
|
+
# number of worker nodes in the pool. This will actually
|
123
|
+
# be a max of N times per hour where N is the number of workers,
|
124
|
+
# but they won't overrun each other because the health check
|
125
|
+
# takes an advisory lock internally
|
126
|
+
sleep(rand(cleanup_interval_in_minutes * 60))
|
127
|
+
loop do
|
128
|
+
schedule_abandoned_job_cleanup
|
129
|
+
sleep(cleanup_interval_in_minutes * 60)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def schedule_abandoned_job_cleanup
|
135
|
+
pid = fork_with_reconnects do
|
136
|
+
# we want to avoid db connections in the main pool process
|
137
|
+
$0 = "delayed_abandoned_job_cleanup"
|
138
|
+
Delayed::Worker::HealthCheck.reschedule_abandoned_jobs
|
139
|
+
end
|
140
|
+
workers[pid] = :abandoned_job_cleanup
|
141
|
+
end
|
142
|
+
|
114
143
|
def spawn_periodic_auditor
|
115
144
|
return if Settings.disable_periodic_jobs
|
116
145
|
|
@@ -217,6 +246,8 @@ class Pool
|
|
217
246
|
case worker
|
218
247
|
when :periodic_audit
|
219
248
|
say "ran auditor: #{worker}"
|
249
|
+
when :abandoned_job_cleanup
|
250
|
+
say "ran cleanup: #{worker}"
|
220
251
|
when :work_queue
|
221
252
|
say "work queue exited, restarting", :info
|
222
253
|
spawn_work_queue
|
data/lib/delayed/settings.rb
CHANGED
data/lib/delayed/version.rb
CHANGED
data/lib/delayed/worker.rb
CHANGED
@@ -3,6 +3,17 @@
|
|
3
3
|
module Delayed
|
4
4
|
|
5
5
|
class TimeoutError < RuntimeError; end
|
6
|
+
class RetriableError < RuntimeError
|
7
|
+
# this error is a special case. You _should_ raise
|
8
|
+
# it from inside the rescue block for another error,
|
9
|
+
# because it indicates: "something made this job fail
|
10
|
+
# but we're pretty sure it's transient and it's safe to try again".
|
11
|
+
# the workflow is still the same (retry will happen unless
|
12
|
+
# retries are exhausted), but it won't call the :error
|
13
|
+
# callback unless it can't retry anymore. It WILL call the
|
14
|
+
# separate ":retry" callback, which is ONLY activated
|
15
|
+
# for this kind of error.
|
16
|
+
end
|
6
17
|
|
7
18
|
require 'tmpdir'
|
8
19
|
require 'set'
|
@@ -94,7 +105,11 @@ class Worker
|
|
94
105
|
end
|
95
106
|
|
96
107
|
def exit?
|
97
|
-
|
108
|
+
!!@exit || parent_exited?
|
109
|
+
end
|
110
|
+
|
111
|
+
def parent_exited?
|
112
|
+
@parent_pid && @parent_pid != Process.ppid
|
98
113
|
end
|
99
114
|
|
100
115
|
def wake_up
|
@@ -198,32 +213,38 @@ class Worker
|
|
198
213
|
end
|
199
214
|
|
200
215
|
def perform(job)
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
216
|
+
begin
|
217
|
+
count = 1
|
218
|
+
raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
|
219
|
+
self.class.lifecycle.run_callbacks(:perform, self, job) do
|
220
|
+
set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
|
221
|
+
logger.info("Processing #{log_job(job, :long)}")
|
222
|
+
runtime = Benchmark.realtime do
|
223
|
+
if job.batch?
|
224
|
+
# each job in the batch will have perform called on it, so we don't
|
225
|
+
# need a timeout around this
|
226
|
+
count = perform_batch(job)
|
227
|
+
else
|
228
|
+
job.invoke_job
|
229
|
+
end
|
230
|
+
job.destroy
|
213
231
|
end
|
214
|
-
job
|
232
|
+
logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
|
233
|
+
end
|
234
|
+
rescue ::Delayed::RetriableError => re
|
235
|
+
can_retry = job.attempts + 1 < job.inferred_max_attempts
|
236
|
+
callback_type = can_retry ? :retry : :error
|
237
|
+
self.class.lifecycle.run_callbacks(callback_type, self, job, re) do
|
238
|
+
handle_failed_job(job, re)
|
239
|
+
end
|
240
|
+
rescue SystemExit => se
|
241
|
+
# There wasn't really a failure here so no callbacks and whatnot needed,
|
242
|
+
# still reschedule the job though.
|
243
|
+
job.reschedule(se)
|
244
|
+
rescue Exception => e
|
245
|
+
self.class.lifecycle.run_callbacks(:error, self, job, e) do
|
246
|
+
handle_failed_job(job, e)
|
215
247
|
end
|
216
|
-
logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
|
217
|
-
end
|
218
|
-
count
|
219
|
-
rescue SystemExit => se
|
220
|
-
# There wasn't really a failure here so no callbacks and whatnot needed,
|
221
|
-
# still reschedule the job though.
|
222
|
-
job.reschedule(se)
|
223
|
-
count
|
224
|
-
rescue Exception => e
|
225
|
-
self.class.lifecycle.run_callbacks(:error, self, job, e) do
|
226
|
-
handle_failed_job(job, e)
|
227
248
|
end
|
228
249
|
count
|
229
250
|
end
|
@@ -23,12 +23,13 @@ module Delayed
|
|
23
23
|
def reschedule_abandoned_jobs
|
24
24
|
return if Settings.worker_health_check_type == :none
|
25
25
|
Delayed::Job.transaction do
|
26
|
-
# this
|
26
|
+
# this action is a special case, and SHOULD NOT be a periodic job
|
27
27
|
# because if it gets wiped out suddenly during execution
|
28
28
|
# it can't go clean up it's abandoned self. Therefore,
|
29
|
-
# we
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# we expect it to get run from it's own process forked from the job pool
|
30
|
+
# and we try to get an advisory lock when it runs. If we succeed,
|
31
|
+
# no other worker is trying to do this right now (and if we abandon the
|
32
|
+
# operation, the transaction will end, releasing the advisory lock).
|
32
33
|
result = attempt_advisory_lock
|
33
34
|
return unless result
|
34
35
|
checker = Worker::HealthCheck.build(
|
@@ -59,8 +60,8 @@ module Delayed
|
|
59
60
|
|
60
61
|
def attempt_advisory_lock
|
61
62
|
lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
|
62
|
-
|
63
|
-
|
63
|
+
conn = ActiveRecord::Base.connection
|
64
|
+
conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
|
64
65
|
end
|
65
66
|
end
|
66
67
|
|
@@ -14,6 +14,7 @@ RSpec.describe Delayed::Periodic do
|
|
14
14
|
ensure
|
15
15
|
Delayed::Periodic.scheduled = prev_sched
|
16
16
|
Delayed::Periodic.overrides = prev_ovr
|
17
|
+
Delayed::Job.delete_all
|
17
18
|
end
|
18
19
|
|
19
20
|
describe ".cron" do
|
@@ -26,14 +27,5 @@ RSpec.describe Delayed::Periodic do
|
|
26
27
|
expect(instance).to_not be_nil
|
27
28
|
expect(instance.enqueue_args[:singleton]).to eq("periodic: just a test")
|
28
29
|
end
|
29
|
-
|
30
|
-
it "uses no singleton if told to skip" do
|
31
|
-
Delayed::Periodic.cron job_name, '*/10 * * * *', {singleton: false} do
|
32
|
-
# no-op
|
33
|
-
end
|
34
|
-
instance = Delayed::Periodic.scheduled[job_name]
|
35
|
-
expect(instance).to_not be_nil
|
36
|
-
expect(instance.enqueue_args[:singleton]).to be_nil
|
37
|
-
end
|
38
30
|
end
|
39
31
|
end
|
data/spec/delayed/worker_spec.rb
CHANGED
@@ -6,6 +6,11 @@ describe Delayed::Worker do
|
|
6
6
|
let(:worker_config) { {
|
7
7
|
queue: "test", min_priority: 1, max_priority: 2, stuff: "stuff",
|
8
8
|
}.freeze }
|
9
|
+
let(:job_attrs) { {
|
10
|
+
id: 42, name: "testjob", full_name: "testfullname", :last_error= => nil,
|
11
|
+
attempts: 1, reschedule: nil, :expired? => false,
|
12
|
+
payload_object: {}, priority: 25
|
13
|
+
}.freeze }
|
9
14
|
subject { described_class.new(worker_config.dup) }
|
10
15
|
|
11
16
|
after { Delayed::Worker.lifecycle.reset! }
|
@@ -14,9 +19,24 @@ describe Delayed::Worker do
|
|
14
19
|
it "fires off an error callback when a job raises an exception" do
|
15
20
|
fired = false
|
16
21
|
Delayed::Worker.lifecycle.before(:error) {|worker, exception| fired = true}
|
17
|
-
job = double(
|
18
|
-
subject.perform(job)
|
22
|
+
job = double(job_attrs)
|
23
|
+
output_count = subject.perform(job)
|
19
24
|
expect(fired).to be_truthy
|
25
|
+
expect(output_count).to eq(1)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "uses the retry callback for a retriable exception" do
|
29
|
+
error_fired = retry_fired = false
|
30
|
+
Delayed::Worker.lifecycle.before(:error) {|worker, exception| error_fired = true }
|
31
|
+
Delayed::Worker.lifecycle.before(:retry) {|worker, exception| retry_fired = true}
|
32
|
+
job = Delayed::Job.new(payload_object: {}, priority: 25, strand: "test_jobs", max_attempts: 3)
|
33
|
+
expect(job).to receive(:invoke_job) do
|
34
|
+
raise Delayed::RetriableError, "that's all this job does"
|
35
|
+
end
|
36
|
+
output_count = subject.perform(job)
|
37
|
+
expect(error_fired).to be_falsey
|
38
|
+
expect(retry_fired).to be_truthy
|
39
|
+
expect(output_count).to eq(1)
|
20
40
|
end
|
21
41
|
|
22
42
|
it "reloads" do
|
@@ -35,7 +55,7 @@ describe Delayed::Worker do
|
|
35
55
|
expect(ActionDispatch::Reloader).to receive(:prepare!).once
|
36
56
|
expect(ActionDispatch::Reloader).to receive(:cleanup!).once
|
37
57
|
end
|
38
|
-
job = double(
|
58
|
+
job = double(job_attrs)
|
39
59
|
subject.perform(job)
|
40
60
|
end
|
41
61
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tobias Luetke
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-01-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activerecord
|