inst-jobs 2.4.7 → 2.4.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/delayed/backend/base.rb +29 -10
- data/lib/delayed/version.rb +1 -1
- data/lib/delayed/worker/health_check.rb +14 -9
- data/spec/delayed/worker/health_check_spec.rb +9 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b810ed7504a4de6c0338c2b6f0b2303e72e172225ca57d8ed3ebf8a9f05c6111
|
4
|
+
data.tar.gz: 7fff2151aa908f846af19401a39390beee21ca2fdd9b9317425fdcf0345970fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a98557f9c875df6e9961b849dd8d6fec4564e247eed2d68a717dbaef94ed5af4b2a61642c0c17a4bf7f1eaac102b9607a2da5b4fb9b2c40ef3cca2b6bafcef5
|
7
|
+
data.tar.gz: 1fcaa3bc4d1191d2a8d56156e40755d032f635dcaef7f05fbaf19d197b6456d552578ae7fcf376d4fe90ab70c3406a81d8d959ff87b714b669defccb8d9b04d0
|
data/lib/delayed/backend/base.rb
CHANGED
@@ -166,17 +166,36 @@ module Delayed
|
|
166
166
|
pid_regex = pid || '(\d+)'
|
167
167
|
regex = Regexp.new("^#{Regexp.escape(name)}:#{pid_regex}$")
|
168
168
|
unlocked_jobs = 0
|
169
|
+
escaped_name = name.gsub("\\", "\\\\")
|
170
|
+
.gsub("%", "\\%")
|
171
|
+
.gsub("_", "\\_")
|
172
|
+
locked_by_like = "#{escaped_name}:%"
|
169
173
|
running = false if pid
|
170
|
-
running_jobs.
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
job.
|
174
|
+
jobs = running_jobs.limit(100)
|
175
|
+
jobs = pid ? jobs.where(locked_by: "#{name}:#{pid}") : jobs.where("locked_by LIKE ?", locked_by_like)
|
176
|
+
ignores = []
|
177
|
+
loop do
|
178
|
+
batch_scope = ignores.empty? ? jobs : jobs.where.not(id: ignores)
|
179
|
+
batch = batch_scope.to_a
|
180
|
+
break if batch.empty?
|
181
|
+
|
182
|
+
batch.each do |job|
|
183
|
+
unless job.locked_by =~ regex
|
184
|
+
ignores << job.id
|
185
|
+
next
|
186
|
+
end
|
187
|
+
|
188
|
+
unless pid
|
189
|
+
job_pid = $1.to_i
|
190
|
+
running = Process.kill(0, job_pid) rescue false
|
191
|
+
end
|
192
|
+
|
193
|
+
if running
|
194
|
+
ignores << job.id
|
195
|
+
else
|
196
|
+
unlocked_jobs += 1
|
197
|
+
job.reschedule("process died")
|
198
|
+
end
|
180
199
|
end
|
181
200
|
end
|
182
201
|
unlocked_jobs
|
data/lib/delayed/version.rb
CHANGED
@@ -28,7 +28,7 @@ module Delayed
|
|
28
28
|
Delayed::Job.transaction do
|
29
29
|
# this action is a special case, and SHOULD NOT be a periodic job
|
30
30
|
# because if it gets wiped out suddenly during execution
|
31
|
-
# it can't go clean up
|
31
|
+
# it can't go clean up its abandoned self. Therefore,
|
32
32
|
# we expect it to get run from it's own process forked from the job pool
|
33
33
|
# and we try to get an advisory lock when it runs. If we succeed,
|
34
34
|
# no other worker is trying to do this right now (and if we abandon the
|
@@ -36,6 +36,8 @@ module Delayed
|
|
36
36
|
result = Delayed::Job.attempt_advisory_lock("Delayed::Worker::HealthCheck#reschedule_abandoned_jobs")
|
37
37
|
return unless result
|
38
38
|
|
39
|
+
horizon = 5.minutes.ago
|
40
|
+
|
39
41
|
checker = Worker::HealthCheck.build(
|
40
42
|
type: Settings.worker_health_check_type,
|
41
43
|
config: Settings.worker_health_check_config,
|
@@ -43,13 +45,16 @@ module Delayed
|
|
43
45
|
)
|
44
46
|
live_workers = checker.live_workers
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
48
|
+
loop do
|
49
|
+
batch = Delayed::Job.running_jobs
|
50
|
+
.where("locked_at<?", horizon)
|
51
|
+
.where.not("locked_by LIKE 'prefetch:%'")
|
52
|
+
.where.not(locked_by: live_workers)
|
53
|
+
.limit(100)
|
54
|
+
.to_a
|
55
|
+
break if batch.empty?
|
51
56
|
|
52
|
-
|
57
|
+
batch.each do |job|
|
53
58
|
Delayed::Job.transaction do
|
54
59
|
# double check that the job is still there. locked_by will immediately be reset
|
55
60
|
# to nil in this transaction by Job#reschedule
|
@@ -59,9 +64,9 @@ module Delayed
|
|
59
64
|
|
60
65
|
job.reschedule
|
61
66
|
end
|
62
|
-
rescue
|
63
|
-
::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
|
64
67
|
end
|
68
|
+
rescue
|
69
|
+
::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
|
65
70
|
end
|
66
71
|
end
|
67
72
|
end
|
@@ -51,7 +51,7 @@ RSpec.describe Delayed::Worker::HealthCheck do
|
|
51
51
|
end
|
52
52
|
end
|
53
53
|
|
54
|
-
let(:initial_run_at) {
|
54
|
+
let(:initial_run_at) { 10.minutes.ago }
|
55
55
|
|
56
56
|
before do
|
57
57
|
klass.live_workers = %w[alive]
|
@@ -96,7 +96,13 @@ RSpec.describe Delayed::Worker::HealthCheck do
|
|
96
96
|
|
97
97
|
it "ignores jobs that are re-locked after fetching from db" do
|
98
98
|
Delayed::Job.where(id: @dead_job).update_all(locked_by: "someone_else")
|
99
|
-
|
99
|
+
# we need to return @dead_job itself, which doesn't match the database
|
100
|
+
jobs_scope = double
|
101
|
+
allow(jobs_scope).to receive(:where).and_return(jobs_scope)
|
102
|
+
allow(jobs_scope).to receive(:not).and_return(jobs_scope)
|
103
|
+
allow(jobs_scope).to receive(:limit).and_return(jobs_scope)
|
104
|
+
allow(jobs_scope).to receive(:to_a).and_return([@dead_job], [])
|
105
|
+
allow(Delayed::Job).to receive(:running_jobs).and_return(jobs_scope)
|
100
106
|
described_class.reschedule_abandoned_jobs
|
101
107
|
@dead_job.reload
|
102
108
|
expect(@dead_job.locked_by).to eq "someone_else"
|
@@ -104,7 +110,7 @@ RSpec.describe Delayed::Worker::HealthCheck do
|
|
104
110
|
|
105
111
|
it "ignores jobs that are prefetched" do
|
106
112
|
Delayed::Job.where(id: @dead_job).update_all(locked_by: "prefetch:some_node")
|
107
|
-
allow(Delayed::Job).to receive(:running_jobs).and_return(
|
113
|
+
allow(Delayed::Job).to receive(:running_jobs).and_return(Delayed::Job.where(id: @dead_job.id))
|
108
114
|
described_class.reschedule_abandoned_jobs
|
109
115
|
@dead_job.reload
|
110
116
|
expect(@dead_job.locked_by).to eq "prefetch:some_node"
|