inst-jobs 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d674b7da21caf04eb87ff9823ed549c93a901219669316090d088f0699564e59
4
- data.tar.gz: 021456d34f12eff8cc988db866018d701fc77ffdbb57e9fb308fc1bd25a91ecb
3
+ metadata.gz: c062c222e731bd490efe572108a508bf78faabee4479f7fe6927a89688d9ef0b
4
+ data.tar.gz: 3b1678fc017230e990bc7e8d4e652c23ab59413953ce72a312b13adfa7626193
5
5
  SHA512:
6
- metadata.gz: ad78cfdd9026db24b714c532c8ee837a875e443afc375909f0c130e3cfbf87d1f872344f982d931838bfa6649a2f1edc59430f6444a2baee08f8afb568015cfc
7
- data.tar.gz: e2b127477f0687958178505628b9544aa5c49e7aa1d0ceef32892250aa26aeb1c77f12bcacd6682e17c2bc379f987b154a0f982e029852432c39f7b3a5335df8
6
+ metadata.gz: 8c1a722c17c9abc8f5c8a44cb28f6584dc9fb16c1edcccc8df566ad21a5f81af7a54fb70282e2689aee11947dcd96f44ca01dfe542d71c8d3d6b7f145a572ce7
7
+ data.tar.gz: 9a7a65c71820d4b04f1e1ac2bf498cf030490a597d075d87d4399a392a7da1bbf50cfb3d5eeb1dea9c357d11e00aabf5e469f062c1fe9cc4b02cc8ed08e1a192
@@ -87,6 +87,10 @@ module Delayed
87
87
  batches[batch_enqueue_args] << kwargs
88
88
  return true
89
89
  else
90
+ if kwargs[:on_conflict].present?
91
+ Delayed::Logging.logger.warn("[DELAYED_JOB] WARNING: providing 'on_conflict' as an option to a non-singleton job will have no effect. Discarding.")
92
+ kwargs.delete(:on_conflict)
93
+ end
90
94
  job = self.create(**kwargs)
91
95
  end
92
96
 
@@ -178,6 +182,10 @@ module Delayed
178
182
  expires_at && (self.class.db_time_now >= expires_at)
179
183
  end
180
184
 
185
+ def inferred_max_attempts
186
+ self.max_attempts || Delayed::Settings.max_attempts
187
+ end
188
+
181
189
  # Reschedule the job in the future (when a job fails).
182
190
  # Uses an exponential scale depending on the number of failed attempts.
183
191
  def reschedule(error = nil, time = nil)
@@ -190,7 +198,7 @@ module Delayed
190
198
 
191
199
  self.attempts += 1 unless return_code == :unlock
192
200
 
193
- if self.attempts >= (self.max_attempts || Delayed::Settings.max_attempts)
201
+ if self.attempts >= self.inferred_max_attempts
194
202
  permanent_failure error || "max attempts reached"
195
203
  elsif expired?
196
204
  permanent_failure error || "job has expired"
@@ -12,6 +12,7 @@ module Delayed
12
12
  :loop => [:worker],
13
13
  :perform => [:worker, :job],
14
14
  :pop => [:worker],
15
+ :retry => [:worker, :job, :exception],
15
16
  :work_queue_pop => [:work_queue, :worker_config],
16
17
  :check_for_work => [:work_queue],
17
18
  }
@@ -56,10 +56,7 @@ class Periodic
56
56
  inferred_args = {
57
57
  max_attempts: 1,
58
58
  run_at: @cron.next_time(Delayed::Periodic.now).utc.to_time,
59
- singleton: (@job_args[:singleton] == false ? nil : tag),
60
- # yes, checking for whether it is actually the boolean literal false,
61
- # which means the consuming code really does not want this job to be
62
- # a singleton at all.
59
+ singleton: tag,
63
60
  on_conflict: :patient
64
61
  }
65
62
  @job_args.merge(inferred_args)
@@ -39,6 +39,7 @@ class Pool
39
39
  Process.wait unlock_pid
40
40
 
41
41
  spawn_periodic_auditor
42
+ spawn_abandoned_job_cleanup
42
43
  spawn_all_workers
43
44
  say "Workers spawned"
44
45
  join
@@ -111,6 +112,34 @@ class Pool
111
112
  end
112
113
  end
113
114
 
115
+ def spawn_abandoned_job_cleanup
116
+ return if Settings.disable_abandoned_job_cleanup
117
+ cleanup_interval_in_minutes = 60
118
+ @abandoned_cleanup_thread = Thread.new do
119
+ # every hour (staggered by process)
120
+ # check for dead jobs and cull them.
121
+ # Will actually be more often based on the
122
+ # number of worker nodes in the pool. This will actually
123
+ # be a max of N times per hour where N is the number of workers,
124
+ # but they won't overrun each other because the health check
125
+ # takes an advisory lock internally
126
+ sleep(rand(cleanup_interval_in_minutes * 60))
127
+ loop do
128
+ schedule_abandoned_job_cleanup
129
+ sleep(cleanup_interval_in_minutes * 60)
130
+ end
131
+ end
132
+ end
133
+
134
+ def schedule_abandoned_job_cleanup
135
+ pid = fork_with_reconnects do
136
+ # we want to avoid db connections in the main pool process
137
+ $0 = "delayed_abandoned_job_cleanup"
138
+ Delayed::Worker::HealthCheck.reschedule_abandoned_jobs
139
+ end
140
+ workers[pid] = :abandoned_job_cleanup
141
+ end
142
+
114
143
  def spawn_periodic_auditor
115
144
  return if Settings.disable_periodic_jobs
116
145
 
@@ -217,6 +246,8 @@ class Pool
217
246
  case worker
218
247
  when :periodic_audit
219
248
  say "ran auditor: #{worker}"
249
+ when :abandoned_job_cleanup
250
+ say "ran cleanup: #{worker}"
220
251
  when :work_queue
221
252
  say "work queue exited, restarting", :info
222
253
  spawn_work_queue
@@ -8,6 +8,7 @@ module Delayed
8
8
  module Settings
9
9
  SETTINGS = [
10
10
  :default_job_options,
11
+ :disable_abandoned_job_cleanup,
11
12
  :disable_periodic_jobs,
12
13
  :disable_automatic_orphan_unlocking,
13
14
  :fetch_batch_size,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Delayed
4
- VERSION = "2.0.0"
4
+ VERSION = "2.2.0"
5
5
  end
@@ -3,6 +3,17 @@
3
3
  module Delayed
4
4
 
5
5
  class TimeoutError < RuntimeError; end
6
+ class RetriableError < RuntimeError
7
+ # this error is a special case. You _should_ raise
8
+ # it from inside the rescue block for another error,
9
+ # because it indicates: "something made this job fail
10
+ # but we're pretty sure it's transient and it's safe to try again".
11
+ # the workflow is still the same (retry will happen unless
12
+ # retries are exhausted), but it won't call the :error
13
+ # callback unless it can't retry anymore. It WILL call the
14
+ # separate ":retry" callback, which is ONLY activated
15
+ # for this kind of error.
16
+ end
6
17
 
7
18
  require 'tmpdir'
8
19
  require 'set'
@@ -94,7 +105,11 @@ class Worker
94
105
  end
95
106
 
96
107
  def exit?
97
- @exit
108
+ !!@exit || parent_exited?
109
+ end
110
+
111
+ def parent_exited?
112
+ @parent_pid && @parent_pid != Process.ppid
98
113
  end
99
114
 
100
115
  def wake_up
@@ -198,32 +213,38 @@ class Worker
198
213
  end
199
214
 
200
215
  def perform(job)
201
- count = 1
202
- raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
203
- self.class.lifecycle.run_callbacks(:perform, self, job) do
204
- set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
205
- logger.info("Processing #{log_job(job, :long)}")
206
- runtime = Benchmark.realtime do
207
- if job.batch?
208
- # each job in the batch will have perform called on it, so we don't
209
- # need a timeout around this
210
- count = perform_batch(job)
211
- else
212
- job.invoke_job
216
+ begin
217
+ count = 1
218
+ raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
219
+ self.class.lifecycle.run_callbacks(:perform, self, job) do
220
+ set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
221
+ logger.info("Processing #{log_job(job, :long)}")
222
+ runtime = Benchmark.realtime do
223
+ if job.batch?
224
+ # each job in the batch will have perform called on it, so we don't
225
+ # need a timeout around this
226
+ count = perform_batch(job)
227
+ else
228
+ job.invoke_job
229
+ end
230
+ job.destroy
213
231
  end
214
- job.destroy
232
+ logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
233
+ end
234
+ rescue ::Delayed::RetriableError => re
235
+ can_retry = job.attempts + 1 < job.inferred_max_attempts
236
+ callback_type = can_retry ? :retry : :error
237
+ self.class.lifecycle.run_callbacks(callback_type, self, job, re) do
238
+ handle_failed_job(job, re)
239
+ end
240
+ rescue SystemExit => se
241
+ # There wasn't really a failure here so no callbacks and whatnot needed,
242
+ # still reschedule the job though.
243
+ job.reschedule(se)
244
+ rescue Exception => e
245
+ self.class.lifecycle.run_callbacks(:error, self, job, e) do
246
+ handle_failed_job(job, e)
215
247
  end
216
- logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
217
- end
218
- count
219
- rescue SystemExit => se
220
- # There wasn't really a failure here so no callbacks and whatnot needed,
221
- # still reschedule the job though.
222
- job.reschedule(se)
223
- count
224
- rescue Exception => e
225
- self.class.lifecycle.run_callbacks(:error, self, job, e) do
226
- handle_failed_job(job, e)
227
248
  end
228
249
  count
229
250
  end
@@ -23,12 +23,13 @@ module Delayed
23
23
  def reschedule_abandoned_jobs
24
24
  return if Settings.worker_health_check_type == :none
25
25
  Delayed::Job.transaction do
26
- # this job is a special case, and is not a singleton
26
+ # this action is a special case, and SHOULD NOT be a periodic job
27
27
  # because if it gets wiped out suddenly during execution
28
28
  # it can't go clean up it's abandoned self. Therefore,
29
- # we try to get an advisory lock when it runs. If we succeed,
30
- # no other job is trying to do this right now (and if we abandon the
31
- # job, the transaction will end, releasing the advisory lock).
29
+ # we expect it to get run from it's own process forked from the job pool
30
+ # and we try to get an advisory lock when it runs. If we succeed,
31
+ # no other worker is trying to do this right now (and if we abandon the
32
+ # operation, the transaction will end, releasing the advisory lock).
32
33
  result = attempt_advisory_lock
33
34
  return unless result
34
35
  checker = Worker::HealthCheck.build(
@@ -59,8 +60,8 @@ module Delayed
59
60
 
60
61
  def attempt_advisory_lock
61
62
  lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
62
- output = ActiveRecord::Base.connection.execute("SELECT pg_try_advisory_xact_lock(half_md5_as_bigint('#{lock_name}'));")
63
- output.getvalue(0, 0)
63
+ conn = ActiveRecord::Base.connection
64
+ conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
64
65
  end
65
66
  end
66
67
 
@@ -14,6 +14,7 @@ RSpec.describe Delayed::Periodic do
14
14
  ensure
15
15
  Delayed::Periodic.scheduled = prev_sched
16
16
  Delayed::Periodic.overrides = prev_ovr
17
+ Delayed::Job.delete_all
17
18
  end
18
19
 
19
20
  describe ".cron" do
@@ -26,14 +27,5 @@ RSpec.describe Delayed::Periodic do
26
27
  expect(instance).to_not be_nil
27
28
  expect(instance.enqueue_args[:singleton]).to eq("periodic: just a test")
28
29
  end
29
-
30
- it "uses no singleton if told to skip" do
31
- Delayed::Periodic.cron job_name, '*/10 * * * *', {singleton: false} do
32
- # no-op
33
- end
34
- instance = Delayed::Periodic.scheduled[job_name]
35
- expect(instance).to_not be_nil
36
- expect(instance.enqueue_args[:singleton]).to be_nil
37
- end
38
30
  end
39
31
  end
@@ -6,6 +6,11 @@ describe Delayed::Worker do
6
6
  let(:worker_config) { {
7
7
  queue: "test", min_priority: 1, max_priority: 2, stuff: "stuff",
8
8
  }.freeze }
9
+ let(:job_attrs) { {
10
+ id: 42, name: "testjob", full_name: "testfullname", :last_error= => nil,
11
+ attempts: 1, reschedule: nil, :expired? => false,
12
+ payload_object: {}, priority: 25
13
+ }.freeze }
9
14
  subject { described_class.new(worker_config.dup) }
10
15
 
11
16
  after { Delayed::Worker.lifecycle.reset! }
@@ -14,9 +19,24 @@ describe Delayed::Worker do
14
19
  it "fires off an error callback when a job raises an exception" do
15
20
  fired = false
16
21
  Delayed::Worker.lifecycle.before(:error) {|worker, exception| fired = true}
17
- job = double(:last_error= => nil, attempts: 1, reschedule: nil)
18
- subject.perform(job)
22
+ job = double(job_attrs)
23
+ output_count = subject.perform(job)
19
24
  expect(fired).to be_truthy
25
+ expect(output_count).to eq(1)
26
+ end
27
+
28
+ it "uses the retry callback for a retriable exception" do
29
+ error_fired = retry_fired = false
30
+ Delayed::Worker.lifecycle.before(:error) {|worker, exception| error_fired = true }
31
+ Delayed::Worker.lifecycle.before(:retry) {|worker, exception| retry_fired = true}
32
+ job = Delayed::Job.new(payload_object: {}, priority: 25, strand: "test_jobs", max_attempts: 3)
33
+ expect(job).to receive(:invoke_job) do
34
+ raise Delayed::RetriableError, "that's all this job does"
35
+ end
36
+ output_count = subject.perform(job)
37
+ expect(error_fired).to be_falsey
38
+ expect(retry_fired).to be_truthy
39
+ expect(output_count).to eq(1)
20
40
  end
21
41
 
22
42
  it "reloads" do
@@ -35,7 +55,7 @@ describe Delayed::Worker do
35
55
  expect(ActionDispatch::Reloader).to receive(:prepare!).once
36
56
  expect(ActionDispatch::Reloader).to receive(:cleanup!).once
37
57
  end
38
- job = double(:last_error= => nil, attempts: 0, reschedule: nil, expired?: false)
58
+ job = double(job_attrs)
39
59
  subject.perform(job)
40
60
  end
41
61
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tobias Luetke
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2020-12-07 00:00:00.000000000 Z
12
+ date: 2021-01-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activerecord