RubyGems - inst-jobs - Versions diffs - 3.0.3 → 3.0.5 - Mend

inst-jobs 3.0.3 → 3.0.5

Files changed (6) hide show

checksums.yaml +4 -4
data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
data/lib/delayed/version.rb +1 -1
data/spec/shared/shared_backend.rb +146 -0
data/spec/spec_helper.rb +2 -1
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fb38d77d225501a3f9ca85561266adf7f2b873d187d0ee9e820e30f773fc5846
-  data.tar.gz: 0bdbb7e6609d0c228de6906b68ff41b7a5218b89832dd9f8958ed261d7a39fd2
+  metadata.gz: 11059ce779a0ff644edcae25335e23b7ad2a4448dfb48e8cbc95295f3e6c468c
+  data.tar.gz: 84dfa8a1185219823013e1363190b0469e397dd423105eb29a205fea1e7207fa
 SHA512:
-  metadata.gz: 76c59051987d523e8465c98a0aa03edfa7ee6df1fc090ebeb8238f827d3f5487dccd61f92c20acf87df9b53153de03bb866d577f44eb3d0db18bd3cd9fa44f66
-  data.tar.gz: 0262e34514a1919ff9715b1707d7c99dab2ddf4f3ef0c98e06441b61709ddb2bb36c763b73cc5a5512a03e2b2a253b507818da313213a621c40879d086fa9333
+  metadata.gz: 9e4c5291673edccd5760cd11d087102e0d8829d25793a6ca4f9f3255336b3abb1ce2bcf7426a79a76d0f373b44ec2b0152c395974f78a4552942f7a858d9d499
+  data.tar.gz: 77aea9b18c3492e2b98f26a23c0591c8da9a31a34c0736868f00d54b3eadff77f7b2bbeb00b91b0819f93bffcefb7a26d1f2c829ebd82bc6b58a5c5a7bb01b16

data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb ADDED Viewed

@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+class UpdateAfterDeleteTriggerForSingletonTransitionCases < ActiveRecord::Migration[6.0]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        ELSIF OLD.singleton IS NOT NULL THEN
+          UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+end

data/lib/delayed/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Delayed
-  VERSION = "3.0.3"
+  VERSION = "3.0.5"
 end

data/spec/shared/shared_backend.rb CHANGED Viewed

@@ -389,7 +389,153 @@ shared_examples_for "a backend" do
         expect(job1.reload.handler).to include("ErrorJob")
       end
+      context "next_in_strand management - deadlocks", non_transactional: true do
+        # The following unit tests are fairly slow and non-deterministic. It may be
+        # easier to make them fail quicker and more consistently by adding a random
+        # sleep into the appropriate trigger(s).
+        #
+        # Example:
+        #   PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+        #   PERFORM pg_sleep(random() * 2);
+        def loop_secs(val)
+          loop_start = Time.now.utc
+          loop do
+            break if Time.now.utc >= loop_start + val
+            yield
+          end
+        end
+        it "doesn't deadlock when transitioning from strand_a to strand_b" do
+          threads = []
+          def thread_body(j1_params, j2_params)
+            loop do
+              j1 = create_job(**j1_params)
+              j2 = create_job(**j2_params)
+              expect(j1.reload.next_in_strand).to eq(true)
+              expect(j2.reload.next_in_strand).to eq(false)
+              j1.delete
+              # In case we couldn't acquire a lock, we actually need to wait for
+              # the other thread to set this to true.
+              loop_secs(10.seconds) do
+                break if j2.reload.next_in_strand
+              end
+              expect(j2.reload.next_in_strand).to eq(true)
+              j2.delete
+            end
+          rescue
+            Thread.current.thread_variable_set(:fail, true)
+            raise
+          end
+          threads << Thread.new do
+            thread_body(
+              { singleton: "myjobs", strand: "myjobs2", locked_by: "w1" },
+              { singleton: "myjobs", strand: "myjobs" }
+            )
+          end
+          threads << Thread.new do
+            thread_body(
+              { singleton: "myjobs2", strand: "myjobs", locked_by: "w1" },
+              { singleton: "myjobs2", strand: "myjobs2" }
+            )
+          end
+          begin
+            loop_secs(60.seconds) do
+              if threads.any? { |x| x.thread_variable_get(:fail) }
+                raise "at least one thread hit a deadlock or other error"
+              end
+            end
+          ensure
+            threads.each(&:kill)
+            threads.each(&:join)
+          end
+        end
+      end
       context "next_in_strand management" do
+        it "handles transitions correctly when going from stranded to not stranded" do
+          @job1 = create_job(singleton: "myjobs", strand: "myjobs")
+          Delayed::Job.get_and_lock_next_available("w1")
+          @job2 = create_job(singleton: "myjobs")
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job2.reload.next_in_strand).to eq false
+          @job1.destroy
+          expect(@job2.reload.next_in_strand).to eq true
+        end
+        it "handles transitions correctly when going from not stranded to stranded" do
+          @job1 = create_job(singleton: "myjobs2", strand: "myjobs")
+          @job2 = create_job(singleton: "myjobs")
+          Delayed::Job.get_and_lock_next_available("w1")
+          Delayed::Job.get_and_lock_next_available("w1")
+          @job3 = create_job(singleton: "myjobs", strand: "myjobs2")
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job2.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq false
+          @job2.destroy
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq true
+        end
+        it "does not violate n_strand=1 constraints when going from not stranded to stranded" do
+          @job1 = create_job(singleton: "myjobs2", strand: "myjobs")
+          @job2 = create_job(singleton: "myjobs")
+          Delayed::Job.get_and_lock_next_available("w1")
+          Delayed::Job.get_and_lock_next_available("w1")
+          @job3 = create_job(singleton: "myjobs", strand: "myjobs")
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job2.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq false
+          @job2.destroy
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq false
+        end
+        it "handles transitions correctly when going from stranded to another strand" do
+          @job1 = create_job(singleton: "myjobs", strand: "myjobs")
+          Delayed::Job.get_and_lock_next_available("w1")
+          @job2 = create_job(singleton: "myjobs", strand: "myjobs2")
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job2.reload.next_in_strand).to eq false
+          @job1.destroy
+          expect(@job2.reload.next_in_strand).to eq true
+        end
+        it "does not violate n_strand=1 constraints when going from stranded to another strand" do
+          @job1 = create_job(singleton: "myjobs2", strand: "myjobs2")
+          @job2 = create_job(singleton: "myjobs", strand: "myjobs")
+          Delayed::Job.get_and_lock_next_available("w1")
+          Delayed::Job.get_and_lock_next_available("w1")
+          @job3 = create_job(singleton: "myjobs", strand: "myjobs2")
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job2.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq false
+          @job2.destroy
+          expect(@job1.reload.next_in_strand).to eq true
+          expect(@job3.reload.next_in_strand).to eq false
+        end
         it "creates first as true, and second as false, then transitions to second when deleted" do
           @job1 = create_job(singleton: "myjobs")
           Delayed::Job.get_and_lock_next_available("w1")

data/spec/spec_helper.rb CHANGED Viewed

@@ -54,7 +54,8 @@ connection_config = {
   host: ENV["TEST_DB_HOST"].presence,
   encoding: "utf8",
   username: ENV["TEST_DB_USERNAME"],
-  database: ENV["TEST_DB_DATABASE"]
+  database: ENV["TEST_DB_DATABASE"],
+  min_messages: "notice"
 }
 def migrate(file)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: inst-jobs
 version: !ruby/object:Gem::Version
-  version: 3.0.3
+  version: 3.0.5
 platform: ruby
 authors:
 - Cody Cutrer
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2021-11-30 00:00:00.000000000 Z
+date: 2021-12-09 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activerecord
@@ -466,6 +466,7 @@ files:
 - db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb
 - db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb
 - db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb
+- db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
 - exe/inst_jobs
 - lib/delayed/backend/active_record.rb
 - lib/delayed/backend/base.rb
@@ -547,7 +548,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.15
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: Instructure-maintained fork of delayed_job