inst-jobs 2.0.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/delayed/backend/base.rb +9 -1
- data/lib/delayed/lifecycle.rb +1 -0
- data/lib/delayed/periodic.rb +1 -4
- data/lib/delayed/pool.rb +31 -0
- data/lib/delayed/settings.rb +1 -0
- data/lib/delayed/version.rb +1 -1
- data/lib/delayed/worker.rb +46 -25
- data/lib/delayed/worker/health_check.rb +7 -6
- data/spec/delayed/periodic_spec.rb +1 -9
- data/spec/delayed/worker_spec.rb +23 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c062c222e731bd490efe572108a508bf78faabee4479f7fe6927a89688d9ef0b
|
4
|
+
data.tar.gz: 3b1678fc017230e990bc7e8d4e652c23ab59413953ce72a312b13adfa7626193
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c1a722c17c9abc8f5c8a44cb28f6584dc9fb16c1edcccc8df566ad21a5f81af7a54fb70282e2689aee11947dcd96f44ca01dfe542d71c8d3d6b7f145a572ce7
|
7
|
+
data.tar.gz: 9a7a65c71820d4b04f1e1ac2bf498cf030490a597d075d87d4399a392a7da1bbf50cfb3d5eeb1dea9c357d11e00aabf5e469f062c1fe9cc4b02cc8ed08e1a192
|
data/lib/delayed/backend/base.rb
CHANGED
@@ -87,6 +87,10 @@ module Delayed
|
|
87
87
|
batches[batch_enqueue_args] << kwargs
|
88
88
|
return true
|
89
89
|
else
|
90
|
+
if kwargs[:on_conflict].present?
|
91
|
+
Delayed::Logging.logger.warn("[DELAYED_JOB] WARNING: providing 'on_conflict' as an option to a non-singleton job will have no effect. Discarding.")
|
92
|
+
kwargs.delete(:on_conflict)
|
93
|
+
end
|
90
94
|
job = self.create(**kwargs)
|
91
95
|
end
|
92
96
|
|
@@ -178,6 +182,10 @@ module Delayed
|
|
178
182
|
expires_at && (self.class.db_time_now >= expires_at)
|
179
183
|
end
|
180
184
|
|
185
|
+
def inferred_max_attempts
|
186
|
+
self.max_attempts || Delayed::Settings.max_attempts
|
187
|
+
end
|
188
|
+
|
181
189
|
# Reschedule the job in the future (when a job fails).
|
182
190
|
# Uses an exponential scale depending on the number of failed attempts.
|
183
191
|
def reschedule(error = nil, time = nil)
|
@@ -190,7 +198,7 @@ module Delayed
|
|
190
198
|
|
191
199
|
self.attempts += 1 unless return_code == :unlock
|
192
200
|
|
193
|
-
if self.attempts >=
|
201
|
+
if self.attempts >= self.inferred_max_attempts
|
194
202
|
permanent_failure error || "max attempts reached"
|
195
203
|
elsif expired?
|
196
204
|
permanent_failure error || "job has expired"
|
data/lib/delayed/lifecycle.rb
CHANGED
data/lib/delayed/periodic.rb
CHANGED
@@ -56,10 +56,7 @@ class Periodic
|
|
56
56
|
inferred_args = {
|
57
57
|
max_attempts: 1,
|
58
58
|
run_at: @cron.next_time(Delayed::Periodic.now).utc.to_time,
|
59
|
-
singleton:
|
60
|
-
# yes, checking for whether it is actually the boolean literal false,
|
61
|
-
# which means the consuming code really does not want this job to be
|
62
|
-
# a singleton at all.
|
59
|
+
singleton: tag,
|
63
60
|
on_conflict: :patient
|
64
61
|
}
|
65
62
|
@job_args.merge(inferred_args)
|
data/lib/delayed/pool.rb
CHANGED
@@ -39,6 +39,7 @@ class Pool
|
|
39
39
|
Process.wait unlock_pid
|
40
40
|
|
41
41
|
spawn_periodic_auditor
|
42
|
+
spawn_abandoned_job_cleanup
|
42
43
|
spawn_all_workers
|
43
44
|
say "Workers spawned"
|
44
45
|
join
|
@@ -111,6 +112,34 @@ class Pool
|
|
111
112
|
end
|
112
113
|
end
|
113
114
|
|
115
|
+
def spawn_abandoned_job_cleanup
|
116
|
+
return if Settings.disable_abandoned_job_cleanup
|
117
|
+
cleanup_interval_in_minutes = 60
|
118
|
+
@abandoned_cleanup_thread = Thread.new do
|
119
|
+
# every hour (staggered by process)
|
120
|
+
# check for dead jobs and cull them.
|
121
|
+
# Will actually be more often based on the
|
122
|
+
# number of worker nodes in the pool. This will actually
|
123
|
+
# be a max of N times per hour where N is the number of workers,
|
124
|
+
# but they won't overrun each other because the health check
|
125
|
+
# takes an advisory lock internally
|
126
|
+
sleep(rand(cleanup_interval_in_minutes * 60))
|
127
|
+
loop do
|
128
|
+
schedule_abandoned_job_cleanup
|
129
|
+
sleep(cleanup_interval_in_minutes * 60)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def schedule_abandoned_job_cleanup
|
135
|
+
pid = fork_with_reconnects do
|
136
|
+
# we want to avoid db connections in the main pool process
|
137
|
+
$0 = "delayed_abandoned_job_cleanup"
|
138
|
+
Delayed::Worker::HealthCheck.reschedule_abandoned_jobs
|
139
|
+
end
|
140
|
+
workers[pid] = :abandoned_job_cleanup
|
141
|
+
end
|
142
|
+
|
114
143
|
def spawn_periodic_auditor
|
115
144
|
return if Settings.disable_periodic_jobs
|
116
145
|
|
@@ -217,6 +246,8 @@ class Pool
|
|
217
246
|
case worker
|
218
247
|
when :periodic_audit
|
219
248
|
say "ran auditor: #{worker}"
|
249
|
+
when :abandoned_job_cleanup
|
250
|
+
say "ran cleanup: #{worker}"
|
220
251
|
when :work_queue
|
221
252
|
say "work queue exited, restarting", :info
|
222
253
|
spawn_work_queue
|
data/lib/delayed/settings.rb
CHANGED
data/lib/delayed/version.rb
CHANGED
data/lib/delayed/worker.rb
CHANGED
@@ -3,6 +3,17 @@
|
|
3
3
|
module Delayed
|
4
4
|
|
5
5
|
class TimeoutError < RuntimeError; end
|
6
|
+
class RetriableError < RuntimeError
|
7
|
+
# this error is a special case. You _should_ raise
|
8
|
+
# it from inside the rescue block for another error,
|
9
|
+
# because it indicates: "something made this job fail
|
10
|
+
# but we're pretty sure it's transient and it's safe to try again".
|
11
|
+
# the workflow is still the same (retry will happen unless
|
12
|
+
# retries are exhausted), but it won't call the :error
|
13
|
+
# callback unless it can't retry anymore. It WILL call the
|
14
|
+
# separate ":retry" callback, which is ONLY activated
|
15
|
+
# for this kind of error.
|
16
|
+
end
|
6
17
|
|
7
18
|
require 'tmpdir'
|
8
19
|
require 'set'
|
@@ -94,7 +105,11 @@ class Worker
|
|
94
105
|
end
|
95
106
|
|
96
107
|
def exit?
|
97
|
-
|
108
|
+
!!@exit || parent_exited?
|
109
|
+
end
|
110
|
+
|
111
|
+
def parent_exited?
|
112
|
+
@parent_pid && @parent_pid != Process.ppid
|
98
113
|
end
|
99
114
|
|
100
115
|
def wake_up
|
@@ -198,32 +213,38 @@ class Worker
|
|
198
213
|
end
|
199
214
|
|
200
215
|
def perform(job)
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
216
|
+
begin
|
217
|
+
count = 1
|
218
|
+
raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
|
219
|
+
self.class.lifecycle.run_callbacks(:perform, self, job) do
|
220
|
+
set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
|
221
|
+
logger.info("Processing #{log_job(job, :long)}")
|
222
|
+
runtime = Benchmark.realtime do
|
223
|
+
if job.batch?
|
224
|
+
# each job in the batch will have perform called on it, so we don't
|
225
|
+
# need a timeout around this
|
226
|
+
count = perform_batch(job)
|
227
|
+
else
|
228
|
+
job.invoke_job
|
229
|
+
end
|
230
|
+
job.destroy
|
213
231
|
end
|
214
|
-
job
|
232
|
+
logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
|
233
|
+
end
|
234
|
+
rescue ::Delayed::RetriableError => re
|
235
|
+
can_retry = job.attempts + 1 < job.inferred_max_attempts
|
236
|
+
callback_type = can_retry ? :retry : :error
|
237
|
+
self.class.lifecycle.run_callbacks(callback_type, self, job, re) do
|
238
|
+
handle_failed_job(job, re)
|
239
|
+
end
|
240
|
+
rescue SystemExit => se
|
241
|
+
# There wasn't really a failure here so no callbacks and whatnot needed,
|
242
|
+
# still reschedule the job though.
|
243
|
+
job.reschedule(se)
|
244
|
+
rescue Exception => e
|
245
|
+
self.class.lifecycle.run_callbacks(:error, self, job, e) do
|
246
|
+
handle_failed_job(job, e)
|
215
247
|
end
|
216
|
-
logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
|
217
|
-
end
|
218
|
-
count
|
219
|
-
rescue SystemExit => se
|
220
|
-
# There wasn't really a failure here so no callbacks and whatnot needed,
|
221
|
-
# still reschedule the job though.
|
222
|
-
job.reschedule(se)
|
223
|
-
count
|
224
|
-
rescue Exception => e
|
225
|
-
self.class.lifecycle.run_callbacks(:error, self, job, e) do
|
226
|
-
handle_failed_job(job, e)
|
227
248
|
end
|
228
249
|
count
|
229
250
|
end
|
@@ -23,12 +23,13 @@ module Delayed
|
|
23
23
|
def reschedule_abandoned_jobs
|
24
24
|
return if Settings.worker_health_check_type == :none
|
25
25
|
Delayed::Job.transaction do
|
26
|
-
# this
|
26
|
+
# this action is a special case, and SHOULD NOT be a periodic job
|
27
27
|
# because if it gets wiped out suddenly during execution
|
28
28
|
# it can't go clean up it's abandoned self. Therefore,
|
29
|
-
# we
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# we expect it to get run from it's own process forked from the job pool
|
30
|
+
# and we try to get an advisory lock when it runs. If we succeed,
|
31
|
+
# no other worker is trying to do this right now (and if we abandon the
|
32
|
+
# operation, the transaction will end, releasing the advisory lock).
|
32
33
|
result = attempt_advisory_lock
|
33
34
|
return unless result
|
34
35
|
checker = Worker::HealthCheck.build(
|
@@ -59,8 +60,8 @@ module Delayed
|
|
59
60
|
|
60
61
|
def attempt_advisory_lock
|
61
62
|
lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
|
62
|
-
|
63
|
-
|
63
|
+
conn = ActiveRecord::Base.connection
|
64
|
+
conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
|
64
65
|
end
|
65
66
|
end
|
66
67
|
|
@@ -14,6 +14,7 @@ RSpec.describe Delayed::Periodic do
|
|
14
14
|
ensure
|
15
15
|
Delayed::Periodic.scheduled = prev_sched
|
16
16
|
Delayed::Periodic.overrides = prev_ovr
|
17
|
+
Delayed::Job.delete_all
|
17
18
|
end
|
18
19
|
|
19
20
|
describe ".cron" do
|
@@ -26,14 +27,5 @@ RSpec.describe Delayed::Periodic do
|
|
26
27
|
expect(instance).to_not be_nil
|
27
28
|
expect(instance.enqueue_args[:singleton]).to eq("periodic: just a test")
|
28
29
|
end
|
29
|
-
|
30
|
-
it "uses no singleton if told to skip" do
|
31
|
-
Delayed::Periodic.cron job_name, '*/10 * * * *', {singleton: false} do
|
32
|
-
# no-op
|
33
|
-
end
|
34
|
-
instance = Delayed::Periodic.scheduled[job_name]
|
35
|
-
expect(instance).to_not be_nil
|
36
|
-
expect(instance.enqueue_args[:singleton]).to be_nil
|
37
|
-
end
|
38
30
|
end
|
39
31
|
end
|
data/spec/delayed/worker_spec.rb
CHANGED
@@ -6,6 +6,11 @@ describe Delayed::Worker do
|
|
6
6
|
let(:worker_config) { {
|
7
7
|
queue: "test", min_priority: 1, max_priority: 2, stuff: "stuff",
|
8
8
|
}.freeze }
|
9
|
+
let(:job_attrs) { {
|
10
|
+
id: 42, name: "testjob", full_name: "testfullname", :last_error= => nil,
|
11
|
+
attempts: 1, reschedule: nil, :expired? => false,
|
12
|
+
payload_object: {}, priority: 25
|
13
|
+
}.freeze }
|
9
14
|
subject { described_class.new(worker_config.dup) }
|
10
15
|
|
11
16
|
after { Delayed::Worker.lifecycle.reset! }
|
@@ -14,9 +19,24 @@ describe Delayed::Worker do
|
|
14
19
|
it "fires off an error callback when a job raises an exception" do
|
15
20
|
fired = false
|
16
21
|
Delayed::Worker.lifecycle.before(:error) {|worker, exception| fired = true}
|
17
|
-
job = double(
|
18
|
-
subject.perform(job)
|
22
|
+
job = double(job_attrs)
|
23
|
+
output_count = subject.perform(job)
|
19
24
|
expect(fired).to be_truthy
|
25
|
+
expect(output_count).to eq(1)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "uses the retry callback for a retriable exception" do
|
29
|
+
error_fired = retry_fired = false
|
30
|
+
Delayed::Worker.lifecycle.before(:error) {|worker, exception| error_fired = true }
|
31
|
+
Delayed::Worker.lifecycle.before(:retry) {|worker, exception| retry_fired = true}
|
32
|
+
job = Delayed::Job.new(payload_object: {}, priority: 25, strand: "test_jobs", max_attempts: 3)
|
33
|
+
expect(job).to receive(:invoke_job) do
|
34
|
+
raise Delayed::RetriableError, "that's all this job does"
|
35
|
+
end
|
36
|
+
output_count = subject.perform(job)
|
37
|
+
expect(error_fired).to be_falsey
|
38
|
+
expect(retry_fired).to be_truthy
|
39
|
+
expect(output_count).to eq(1)
|
20
40
|
end
|
21
41
|
|
22
42
|
it "reloads" do
|
@@ -35,7 +55,7 @@ describe Delayed::Worker do
|
|
35
55
|
expect(ActionDispatch::Reloader).to receive(:prepare!).once
|
36
56
|
expect(ActionDispatch::Reloader).to receive(:cleanup!).once
|
37
57
|
end
|
38
|
-
job = double(
|
58
|
+
job = double(job_attrs)
|
39
59
|
subject.perform(job)
|
40
60
|
end
|
41
61
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tobias Luetke
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-01-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: activerecord
|