RubyGems - inst-jobs - Versions diffs - 2.0.0 → 3.1.0 - Mend

inst-jobs 2.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

checksums.yaml +4 -4
data/db/migrate/20101216224513_create_delayed_jobs.rb +9 -7
data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +8 -13
data/db/migrate/20110610213249_optimize_delayed_jobs.rb +8 -8
data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +25 -25
data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +4 -8
data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +1 -3
data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +11 -15
data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +1 -1
data/db/migrate/20120608191051_add_jobs_run_at_index.rb +2 -2
data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +1 -1
data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -3
data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +9 -13
data/db/migrate/20151210162949_improve_max_concurrent.rb +4 -8
data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +3 -2
data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +13 -17
data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +8 -8
data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +72 -77
data/db/migrate/20200825011002_add_strand_order_override.rb +93 -97
data/db/migrate/20210809145804_add_n_strand_index.rb +12 -0
data/db/migrate/20210812210128_add_singleton_column.rb +200 -0
data/db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb +27 -0
data/db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb +56 -0
data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb +27 -0
data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb +137 -0
data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
data/db/migrate/20220328152900_add_failed_jobs_indicies.rb +12 -0
data/exe/inst_jobs +3 -2
data/lib/delayed/backend/active_record.rb +226 -168
data/lib/delayed/backend/base.rb +119 -72
data/lib/delayed/batch.rb +11 -9
data/lib/delayed/cli.rb +98 -84
data/lib/delayed/core_ext/kernel.rb +4 -2
data/lib/delayed/daemon.rb +70 -74
data/lib/delayed/job_tracking.rb +26 -25
data/lib/delayed/lifecycle.rb +28 -23
data/lib/delayed/log_tailer.rb +17 -17
data/lib/delayed/logging.rb +13 -16
data/lib/delayed/message_sending.rb +43 -52
data/lib/delayed/performable_method.rb +6 -8
data/lib/delayed/periodic.rb +72 -68
data/lib/delayed/plugin.rb +2 -4
data/lib/delayed/pool.rb +205 -168
data/lib/delayed/rails_reloader_plugin.rb +30 -0
data/lib/delayed/server/helpers.rb +6 -6
data/lib/delayed/server.rb +51 -54
data/lib/delayed/settings.rb +96 -81
data/lib/delayed/testing.rb +21 -22
data/lib/delayed/version.rb +1 -1
data/lib/delayed/work_queue/in_process.rb +21 -17
data/lib/delayed/work_queue/parent_process/client.rb +55 -53
data/lib/delayed/work_queue/parent_process/server.rb +245 -207
data/lib/delayed/work_queue/parent_process.rb +52 -53
data/lib/delayed/worker/consul_health_check.rb +32 -33
data/lib/delayed/worker/health_check.rb +35 -27
data/lib/delayed/worker/null_health_check.rb +3 -1
data/lib/delayed/worker/process_helper.rb +11 -12
data/lib/delayed/worker.rb +257 -244
data/lib/delayed/yaml_extensions.rb +12 -10
data/lib/delayed_job.rb +37 -37
data/lib/inst-jobs.rb +1 -1
data/spec/active_record_job_spec.rb +152 -139
data/spec/delayed/cli_spec.rb +7 -7
data/spec/delayed/daemon_spec.rb +10 -9
data/spec/delayed/message_sending_spec.rb +16 -9
data/spec/delayed/periodic_spec.rb +14 -21
data/spec/delayed/server_spec.rb +38 -38
data/spec/delayed/settings_spec.rb +26 -25
data/spec/delayed/work_queue/in_process_spec.rb +8 -9
data/spec/delayed/work_queue/parent_process/client_spec.rb +17 -12
data/spec/delayed/work_queue/parent_process/server_spec.rb +118 -42
data/spec/delayed/work_queue/parent_process_spec.rb +21 -23
data/spec/delayed/worker/consul_health_check_spec.rb +37 -50
data/spec/delayed/worker/health_check_spec.rb +60 -52
data/spec/delayed/worker_spec.rb +53 -24
data/spec/sample_jobs.rb +45 -15
data/spec/shared/delayed_batch.rb +74 -67
data/spec/shared/delayed_method.rb +143 -102
data/spec/shared/performable_method.rb +39 -38
data/spec/shared/shared_backend.rb +801 -440
data/spec/shared/testing.rb +14 -14
data/spec/shared/worker.rb +157 -149
data/spec/shared_jobs_specs.rb +13 -13
data/spec/spec_helper.rb +57 -56
metadata +183 -103
data/lib/delayed/backend/redis/bulk_update.lua +0 -50
data/lib/delayed/backend/redis/destroy_job.lua +0 -2
data/lib/delayed/backend/redis/enqueue.lua +0 -29
data/lib/delayed/backend/redis/fail_job.lua +0 -5
data/lib/delayed/backend/redis/find_available.lua +0 -3
data/lib/delayed/backend/redis/functions.rb +0 -59
data/lib/delayed/backend/redis/get_and_lock_next_available.lua +0 -17
data/lib/delayed/backend/redis/includes/jobs_common.lua +0 -203
data/lib/delayed/backend/redis/job.rb +0 -535
data/lib/delayed/backend/redis/set_running.lua +0 -5
data/lib/delayed/backend/redis/tickle_strand.lua +0 -2
data/spec/gemfiles/42.gemfile +0 -7
data/spec/gemfiles/50.gemfile +0 -7
data/spec/gemfiles/51.gemfile +0 -7
data/spec/gemfiles/52.gemfile +0 -7
data/spec/gemfiles/60.gemfile +0 -7
data/spec/redis_job_spec.rb +0 -148

data/db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+class FixSingletonConditionInBeforeInsert < ActiveRecord::Migration[5.2]
+  def change
+    reversible do |direction|
+      direction.up do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql;
+        SQL
+      end
+      direction.down do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton;
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql;
+        SQL
+      end
+    end
+  end
+end

data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+class UpdateConflictingSingletonFunctionToUseIndex < ActiveRecord::Migration[5.2]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn () RETURNS trigger AS $$
+      BEGIN
+        DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton AND locked_by IS NULL;
+        RETURN NEW;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn () RETURNS trigger AS $$
+      BEGIN
+        IF EXISTS (SELECT 1 FROM delayed_jobs j2 WHERE j2.singleton=OLD.singleton) THEN
+          DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton;
+        END IF;
+        RETURN NEW;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+end

data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb ADDED Viewed

@@ -0,0 +1,137 @@
+# frozen_string_literal: true
+class UpdateAfterDeleteTriggerForSingletonIndex < ActiveRecord::Migration[6.0]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        ELSIF OLD.singleton IS NOT NULL THEN
+          UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        ELSIF OLD.singleton IS NOT NULL THEN
+          UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false;
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+end

data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb ADDED Viewed

@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+class UpdateAfterDeleteTriggerForSingletonTransitionCases < ActiveRecord::Migration[6.0]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        ELSIF OLD.singleton IS NOT NULL THEN
+          UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+end

data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb ADDED Viewed

@@ -0,0 +1,59 @@
+# frozen_string_literal: true
+class FixSingletonRaceConditionInsert < ActiveRecord::Migration[5.2]
+  def change
+    reversible do |direction|
+      direction.up do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql;
+        SQL
+      end
+      direction.down do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql;
+        SQL
+      end
+    end
+  end
+end

data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb ADDED Viewed

@@ -0,0 +1,207 @@
+# frozen_string_literal: true
+class FixSingletonRaceConditionDelete < ActiveRecord::Migration[6.0]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql;
+    SQL
+  end
+end