RubyGems - inst-jobs - Versions diffs - 2.0.0 → 2.2.0 - Mend

inst-jobs 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/lib/delayed/backend/base.rb +9 -1
data/lib/delayed/lifecycle.rb +1 -0
data/lib/delayed/periodic.rb +1 -4
data/lib/delayed/pool.rb +31 -0
data/lib/delayed/settings.rb +1 -0
data/lib/delayed/version.rb +1 -1
data/lib/delayed/worker.rb +46 -25
data/lib/delayed/worker/health_check.rb +7 -6
data/spec/delayed/periodic_spec.rb +1 -9
data/spec/delayed/worker_spec.rb +23 -3
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d674b7da21caf04eb87ff9823ed549c93a901219669316090d088f0699564e59
-  data.tar.gz: 021456d34f12eff8cc988db866018d701fc77ffdbb57e9fb308fc1bd25a91ecb
+  metadata.gz: c062c222e731bd490efe572108a508bf78faabee4479f7fe6927a89688d9ef0b
+  data.tar.gz: 3b1678fc017230e990bc7e8d4e652c23ab59413953ce72a312b13adfa7626193
 SHA512:
-  metadata.gz: ad78cfdd9026db24b714c532c8ee837a875e443afc375909f0c130e3cfbf87d1f872344f982d931838bfa6649a2f1edc59430f6444a2baee08f8afb568015cfc
-  data.tar.gz: e2b127477f0687958178505628b9544aa5c49e7aa1d0ceef32892250aa26aeb1c77f12bcacd6682e17c2bc379f987b154a0f982e029852432c39f7b3a5335df8
+  metadata.gz: 8c1a722c17c9abc8f5c8a44cb28f6584dc9fb16c1edcccc8df566ad21a5f81af7a54fb70282e2689aee11947dcd96f44ca01dfe542d71c8d3d6b7f145a572ce7
+  data.tar.gz: 9a7a65c71820d4b04f1e1ac2bf498cf030490a597d075d87d4399a392a7da1bbf50cfb3d5eeb1dea9c357d11e00aabf5e469f062c1fe9cc4b02cc8ed08e1a192

data/lib/delayed/backend/base.rb CHANGED

@@ -87,6 +87,10 @@ module Delayed
             batches[batch_enqueue_args] << kwargs
             return true
           else
+            if kwargs[:on_conflict].present?
+              Delayed::Logging.logger.warn("[DELAYED_JOB] WARNING: providing 'on_conflict' as an option to a non-singleton job will have no effect.  Discarding.")
+              kwargs.delete(:on_conflict)
+            end
             job = self.create(**kwargs)
           end
@@ -178,6 +182,10 @@ module Delayed
         expires_at && (self.class.db_time_now >= expires_at)
       end
+      def inferred_max_attempts
+        self.max_attempts || Delayed::Settings.max_attempts
+      end
       # Reschedule the job in the future (when a job fails).
       # Uses an exponential scale depending on the number of failed attempts.
       def reschedule(error = nil, time = nil)
@@ -190,7 +198,7 @@ module Delayed
         self.attempts += 1 unless return_code == :unlock
-        if self.attempts >= (self.max_attempts || Delayed::Settings.max_attempts)
+        if self.attempts >= self.inferred_max_attempts
           permanent_failure error || "max attempts reached"
         elsif expired?
           permanent_failure error || "job has expired"

data/lib/delayed/lifecycle.rb CHANGED

@@ -12,6 +12,7 @@ module Delayed
       :loop             => [:worker],
       :perform          => [:worker, :job],
       :pop              => [:worker],
+      :retry            => [:worker, :job, :exception],
       :work_queue_pop   => [:work_queue, :worker_config],
       :check_for_work   => [:work_queue],
     }

data/lib/delayed/periodic.rb CHANGED

@@ -56,10 +56,7 @@ class Periodic
     inferred_args = {
       max_attempts: 1,
       run_at: @cron.next_time(Delayed::Periodic.now).utc.to_time,
-      singleton: (@job_args[:singleton] == false ? nil : tag),
-      # yes, checking for whether it is actually the boolean literal false,
-      # which means the consuming code really does not want this job to be
-      # a singleton at all.
+      singleton: tag,
       on_conflict: :patient
     }
     @job_args.merge(inferred_args)

data/lib/delayed/pool.rb CHANGED

@@ -39,6 +39,7 @@ class Pool
     Process.wait unlock_pid
     spawn_periodic_auditor
+    spawn_abandoned_job_cleanup
     spawn_all_workers
     say "Workers spawned"
     join
@@ -111,6 +112,34 @@ class Pool
     end
   end
+  def spawn_abandoned_job_cleanup
+    return if Settings.disable_abandoned_job_cleanup
+    cleanup_interval_in_minutes = 60
+    @abandoned_cleanup_thread = Thread.new do
+      # every hour (staggered by process)
+      # check for dead jobs and cull them.
+      # Will actually be more often based on the
+      # number of worker nodes in the pool.  This will actually
+      # be a max of N times per hour where N is the number of workers,
+      # but they won't overrun each other because the health check
+      # takes an advisory lock internally
+      sleep(rand(cleanup_interval_in_minutes * 60))
+      loop do
+        schedule_abandoned_job_cleanup
+        sleep(cleanup_interval_in_minutes * 60)
+      end
+    end
+  end
+  def schedule_abandoned_job_cleanup
+    pid = fork_with_reconnects do
+      # we want to avoid db connections in the main pool process
+      $0 = "delayed_abandoned_job_cleanup"
+      Delayed::Worker::HealthCheck.reschedule_abandoned_jobs
+    end
+    workers[pid] = :abandoned_job_cleanup
+  end
   def spawn_periodic_auditor
     return if Settings.disable_periodic_jobs
@@ -217,6 +246,8 @@ class Pool
       case worker
       when :periodic_audit
         say "ran auditor: #{worker}"
+      when :abandoned_job_cleanup
+        say "ran cleanup: #{worker}"
       when :work_queue
         say "work queue exited, restarting", :info
         spawn_work_queue

data/lib/delayed/settings.rb CHANGED

@@ -8,6 +8,7 @@ module Delayed
   module Settings
     SETTINGS = [
       :default_job_options,
+      :disable_abandoned_job_cleanup,
       :disable_periodic_jobs,
       :disable_automatic_orphan_unlocking,
       :fetch_batch_size,

data/lib/delayed/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Delayed
-  VERSION = "2.0.0"
+  VERSION = "2.2.0"
 end

data/lib/delayed/worker.rb CHANGED

@@ -3,6 +3,17 @@
 module Delayed
 class TimeoutError < RuntimeError; end
+class RetriableError < RuntimeError
+  # this error is a special case.  You _should_ raise
+  # it from inside the rescue block for another error,
+  # because it indicates: "something made this job fail
+  # but we're pretty sure it's transient and it's safe to try again".
+  # the workflow is still the same (retry will happen unless
+  # retries are exhausted), but it won't call the :error
+  # callback unless it can't retry anymore.  It WILL call the
+  # separate ":retry" callback, which is ONLY activated
+  # for this kind of error.
+end
 require 'tmpdir'
 require 'set'
@@ -94,7 +105,11 @@ class Worker
   end
   def exit?
-    @exit
+    !!@exit || parent_exited?
+  end
+  def parent_exited?
+    @parent_pid && @parent_pid != Process.ppid
   end
   def wake_up
@@ -198,32 +213,38 @@ class Worker
   end
   def perform(job)
-    count = 1
-    raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
-    self.class.lifecycle.run_callbacks(:perform, self, job) do
-      set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
-      logger.info("Processing #{log_job(job, :long)}")
-      runtime = Benchmark.realtime do
-        if job.batch?
-          # each job in the batch will have perform called on it, so we don't
-          # need a timeout around this
-          count = perform_batch(job)
-        else
-          job.invoke_job
+    begin
+      count = 1
+      raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
+      self.class.lifecycle.run_callbacks(:perform, self, job) do
+        set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
+        logger.info("Processing #{log_job(job, :long)}")
+        runtime = Benchmark.realtime do
+          if job.batch?
+            # each job in the batch will have perform called on it, so we don't
+            # need a timeout around this
+            count = perform_batch(job)
+          else
+            job.invoke_job
+          end
+          job.destroy
         end
-        job.destroy
+        logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
+      end
+    rescue ::Delayed::RetriableError => re
+      can_retry = job.attempts + 1 < job.inferred_max_attempts
+      callback_type = can_retry ? :retry : :error
+      self.class.lifecycle.run_callbacks(callback_type, self, job, re) do
+        handle_failed_job(job, re)
+      end
+    rescue SystemExit => se
+      # There wasn't really a failure here so no callbacks and whatnot needed,
+      # still reschedule the job though.
+      job.reschedule(se)
+    rescue Exception => e
+      self.class.lifecycle.run_callbacks(:error, self, job, e) do
+        handle_failed_job(job, e)
       end
-      logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
-    end
-    count
-  rescue SystemExit => se
-    # There wasn't really a failure here so no callbacks and whatnot needed,
-    # still reschedule the job though.
-    job.reschedule(se)
-    count
-  rescue Exception => e
-    self.class.lifecycle.run_callbacks(:error, self, job, e) do
-      handle_failed_job(job, e)
     end
     count
   end

data/lib/delayed/worker/health_check.rb CHANGED

@@ -23,12 +23,13 @@ module Delayed
         def reschedule_abandoned_jobs
           return if Settings.worker_health_check_type == :none
           Delayed::Job.transaction do
-            # this job is a special case, and is not a singleton
+            # this action is a special case, and SHOULD NOT be a periodic job
             # because if it gets wiped out suddenly during execution
             # it can't go clean up it's abandoned self.  Therefore,
-            # we try to get an advisory lock when it runs.  If we succeed,
-            # no other job is trying to do this right now (and if we abandon the
-            # job, the transaction will end, releasing the advisory lock).
+            # we expect it to get run from it's own process forked from the job pool
+            # and we try to get an advisory lock when it runs.  If we succeed,
+            # no other worker is trying to do this right now (and if we abandon the
+            # operation, the transaction will end, releasing the advisory lock).
             result = attempt_advisory_lock
             return unless result
             checker = Worker::HealthCheck.build(
@@ -59,8 +60,8 @@ module Delayed
         def attempt_advisory_lock
           lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
-          output = ActiveRecord::Base.connection.execute("SELECT pg_try_advisory_xact_lock(half_md5_as_bigint('#{lock_name}'));")
-          output.getvalue(0, 0)
+          conn = ActiveRecord::Base.connection
+          conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
         end
       end

data/spec/delayed/periodic_spec.rb CHANGED

@@ -14,6 +14,7 @@ RSpec.describe Delayed::Periodic do
   ensure
     Delayed::Periodic.scheduled = prev_sched
     Delayed::Periodic.overrides = prev_ovr
+    Delayed::Job.delete_all
   end
   describe ".cron" do
@@ -26,14 +27,5 @@ RSpec.describe Delayed::Periodic do
       expect(instance).to_not be_nil
       expect(instance.enqueue_args[:singleton]).to eq("periodic: just a test")
     end
-    it "uses no singleton if told to skip" do
-      Delayed::Periodic.cron job_name, '*/10 * * * *', {singleton: false} do
-        # no-op
-      end
-      instance = Delayed::Periodic.scheduled[job_name]
-      expect(instance).to_not be_nil
-      expect(instance.enqueue_args[:singleton]).to be_nil
-    end
   end
 end

data/spec/delayed/worker_spec.rb CHANGED

@@ -6,6 +6,11 @@ describe Delayed::Worker do
   let(:worker_config) { {
       queue: "test", min_priority: 1, max_priority: 2, stuff: "stuff",
   }.freeze }
+  let(:job_attrs) { {
+    id: 42, name: "testjob", full_name: "testfullname", :last_error= => nil,
+    attempts: 1, reschedule: nil, :expired? => false,
+    payload_object: {}, priority: 25
+  }.freeze }
   subject { described_class.new(worker_config.dup) }
   after { Delayed::Worker.lifecycle.reset! }
@@ -14,9 +19,24 @@ describe Delayed::Worker do
     it "fires off an error callback when a job raises an exception" do
       fired = false
       Delayed::Worker.lifecycle.before(:error) {|worker, exception| fired = true}
-      job = double(:last_error= => nil, attempts: 1, reschedule: nil)
-      subject.perform(job)
+      job = double(job_attrs)
+      output_count = subject.perform(job)
       expect(fired).to be_truthy
+      expect(output_count).to eq(1)
+    end
+    it "uses the retry callback for a retriable exception" do
+      error_fired = retry_fired = false
+      Delayed::Worker.lifecycle.before(:error) {|worker, exception| error_fired = true }
+      Delayed::Worker.lifecycle.before(:retry) {|worker, exception| retry_fired = true}
+      job = Delayed::Job.new(payload_object: {}, priority: 25, strand: "test_jobs", max_attempts: 3)
+      expect(job).to receive(:invoke_job) do
+        raise Delayed::RetriableError, "that's all this job does"
+      end
+      output_count = subject.perform(job)
+      expect(error_fired).to be_falsey
+      expect(retry_fired).to be_truthy
+      expect(output_count).to eq(1)
     end
     it "reloads" do
@@ -35,7 +55,7 @@ describe Delayed::Worker do
         expect(ActionDispatch::Reloader).to receive(:prepare!).once
         expect(ActionDispatch::Reloader).to receive(:cleanup!).once
       end
-      job = double(:last_error= => nil, attempts: 0, reschedule: nil, expired?: false)
+      job = double(job_attrs)
       subject.perform(job)
     end
   end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: inst-jobs
 version: !ruby/object:Gem::Version
-  version: 2.0.0
+  version: 2.2.0
 platform: ruby
 authors:
 - Tobias Luetke
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-12-07 00:00:00.000000000 Z
+date: 2021-01-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activerecord