RubyGems - switchman-inst-jobs - Versions diffs - 4.0.3 → 4.0.4 - Mend

switchman-inst-jobs 4.0.3 → 4.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: f07c44f5a58d897c8dbcc46a40906cde8c6a99b39723286a405669cf159019e4
-  data.tar.gz: cf730bfc3d3ad6d2c7da7712c3c097322421820b60f2d6af025ef610f0f3aa09
+  metadata.gz: ee1540b8c0c200ee917338953c9ea377dc0f32f354175e0e6953435861333c5f
+  data.tar.gz: 4e3912c490226c6f73c39d5923199e8d9cd661c8caeb6cef2fd6701b0dc5c08d
 SHA512:
-  metadata.gz: 9189978ce61d257fb25bbf57c23a36998437b3c4aa6adb93798af07ea878caf244a1514c5711c5190b39836169607e869f65b89ed71cc08d44fabfd9d6fe0e8e
-  data.tar.gz: d4a76e521ee4ba38bb2172a1ffabcc4b139a204d24690a2662ca7e8587d367ba88d98db5912c716421c96940f309296f6b35e6b292025326d61e32e03217d001
+  metadata.gz: 6ee661bcf6f5335a5c6d4d7651936de85aedc6b01221700353b97c5bde61dc9a6c351948daaea70689e401962c4249efe56d359e377636dafafecb8d09ee4a49
+  data.tar.gz: 91b155c2f00a89298a7c9de0b1ec854119534e92ae6e7d89b3823adf666ba0fc9ca045c2a4e10e2e0a337c3eb3a8f1d775a6718ae9adde7d396ee787164e0596

data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+  def up
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
+    # only one job can be queued in a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_not_running',
+              algorithm: :concurrently
+    # only one job can be running for a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_running',
+              algorithm: :concurrently
+  end
+  def down
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
+  end
+end

data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+class UpdateInsertTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
+  def change
+    reversible do |direction|
+      direction.up do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}' OR locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}');
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+        SQL
+      end
+      direction.down do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+        SQL
+      end
+    end
+  end
+end

data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb ADDED Viewed

@@ -0,0 +1,209 @@
+# frozen_string_literal: true
+class UpdateDeleteTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by = ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'' OR j3.locked_by <> ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'')))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+    SQL
+  end
+end

data/db/migrate/20220203063200_remove_old_singleton_index.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+class RemoveOldSingletonIndex < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+  def up
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
+  end
+  def down
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
+    # only one job can be queued in a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: 'singleton IS NOT NULL AND locked_by IS NULL',
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_not_running',
+              algorithm: :concurrently
+    # only one job can be running for a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_running',
+              algorithm: :concurrently
+  end
+end

data/lib/switchman_inst_jobs/engine.rb CHANGED Viewed

@@ -21,13 +21,17 @@ module SwitchmanInstJobs
       # Ensure jobs get unblocked on the new shard if they exist
       ::Delayed::Worker.lifecycle.after(:perform) do |_worker, job|
-        if job.strand
+        if job.strand || job.singleton
+          column = job.strand ? :strand : :singleton
           ::Switchman::Shard.clear_cache
           ::Switchman::Shard.default.activate do
             current_job_shard = ::Switchman::Shard.lookup(job.shard_id).delayed_jobs_shard
             if current_job_shard != ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
               current_job_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
-                j = ::Delayed::Job.where(strand: job.strand).next_in_strand_order.first
+                ::Delayed::Job.where(source: 'JobsMigrator::StrandBlocker', **{ column => job.try(column) }).delete_all
+                j = ::Delayed::Job.where(**{ column => job.try(column) }).next_in_strand_order.first
                 j.update_column(:next_in_strand, true) if j && !j.next_in_strand
               end
             end

data/lib/switchman_inst_jobs/jobs_migrator.rb CHANGED Viewed

@@ -89,7 +89,9 @@ module SwitchmanInstJobs
         migrate_everything
       end
-      def migrate_strands
+      def migrate_strands(batch_size: 1_000)
+        source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
         # there are 4 scenarios to deal with here
         # 1) no running job, no jobs moved: do nothing
         # 2) running job, no jobs moved; create blocker with next_in_strand=false
@@ -98,60 +100,64 @@ module SwitchmanInstJobs
         #    those (= do nothing since it should already be false)
         # 4) no running job, jobs moved: set next_in_strand=true on the first of
         #    those (= do nothing since it should already be true)
+        handler = lambda { |scope, column, blocker_job_kwargs = {}|
+          shard_map = build_shard_map(scope, source_shard)
+          shard_map.each do |(target_shard, source_shard_ids)|
+            shard_scope = scope.where(shard_id: source_shard_ids)
-        source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
-        strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
-        shard_map = build_shard_map(strand_scope, source_shard)
-        shard_map.each do |(target_shard, source_shard_ids)|
-          shard_scope = strand_scope.where(shard_id: source_shard_ids)
-          # 1) is taken care of because it should not show up here in strands
-          strands = shard_scope.distinct.order(:strand).pluck(:strand)
-          target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
-            strands.each do |strand|
-              transaction_on([source_shard, target_shard]) do
-                this_strand_scope = shard_scope.where(strand: strand)
-                # we want to copy all the jobs except the one that is still running.
-                jobs_scope = this_strand_scope.where(locked_by: nil)
-                # 2) and part of 3) are taken care of here by creating a blocker
-                # job with next_in_strand = false. as soon as the current
-                # running job is finished it should set next_in_strand
-                # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
-                # Since we only unlock it on the new jobs queue *after* deleting from the original
-                # the lock ensures the blocker always gets unlocked
-                first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
-                if first
-                  first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
-                  first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
-                  first_job.queue = first.queue
-                  first_job.tag = 'Kernel.sleep'
-                  first_job.source = 'JobsMigrator::StrandBlocker'
-                  first_job.max_attempts = 1
-                  # If we ever have jobs left over from 9999 jobs moves of a single shard,
-                  # something has gone terribly wrong
-                  first_job.strand_order_override = -9999
-                  first_job.save!
-                  # the rest of 3) is taken care of here
-                  # make sure that all the jobs moved over are NOT next in strand
-                  ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
-                    update_all(next_in_strand: false)
-                end
+            # 1) is taken care of because it should not show up here in strands
+            values = shard_scope.distinct.order(column).pluck(column)
-                # 4) is taken care of here, by leaving next_in_strand alone and
-                # it should execute on the new shard
-                batch_move_jobs(
-                  target_shard: target_shard,
-                  source_shard: source_shard,
-                  scope: jobs_scope
-                ) do |job, new_job|
-                  # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
-                  new_job.strand_order_override = job.strand_order_override - 1
+            target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
+              values.each do |value|
+                transaction_on([source_shard, target_shard]) do
+                  value_scope = shard_scope.where(**{ column => value })
+                  # we want to copy all the jobs except the one that is still running.
+                  jobs_scope = value_scope.where(locked_by: nil)
+                  # 2) and part of 3) are taken care of here by creating a blocker
+                  # job with next_in_strand = false. as soon as the current
+                  # running job is finished it should set next_in_strand
+                  # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
+                  # Since we only unlock it on the new jobs queue *after* deleting from the original
+                  # the lock ensures the blocker always gets unlocked
+                  first = value_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
+                  if first
+                    create_blocker_job(queue: first.queue, **{ column => value }, **blocker_job_kwargs)
+                    # the rest of 3) is taken care of here
+                    # make sure that all the jobs moved over are NOT next in strand
+                    ::Delayed::Job.where(next_in_strand: true, locked_by: nil, **{ column => value }).
+                      update_all(next_in_strand: false)
+                  end
+                  # 4) is taken care of here, by leaving next_in_strand alone and
+                  # it should execute on the new shard
+                  batch_move_jobs(
+                    target_shard: target_shard,
+                    source_shard: source_shard,
+                    scope: jobs_scope,
+                    batch_size: batch_size
+                  ) do |job, new_job|
+                    # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
+                    new_job.strand_order_override = job.strand_order_override - 1
+                  end
                 end
               end
             end
+          end
+        }
+        strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
+        singleton_scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL AND singleton IS NOT NULL')
+        all_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL OR singleton IS NOT NULL')
+        handler.call(strand_scope, :strand)
+        handler.call(singleton_scope, :singleton,
+                     { locked_at: DateTime.now, locked_by: ::Delayed::Backend::Base::ON_HOLD_BLOCKER })
+        shard_map = build_shard_map(all_scope, source_shard)
+        shard_map.each do |(target_shard, source_shard_ids)|
+          target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
             updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
               update_all(block_stranded: false)
             # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
@@ -166,26 +172,40 @@ module SwitchmanInstJobs
         end
       end
-      def unblock_strands(target_shard)
-        target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
-          loop do
-            # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
-            # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
-            # this target shard, so we shouldn't unlock them yet.  We only ever unlock one job here to keep the
-            # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
-            # batches
-            break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
-              where.not(strand: nil).
-              where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
+      def unblock_strands(target_shard, batch_size: 10_000)
+        block_stranded_ids = ::Switchman::Shard.where(block_stranded: true).pluck(:id)
+        query = lambda { |column, scope|
+          ::Delayed::Job.
+            where(id: ::Delayed::Job.select("DISTINCT ON (#{column}) id").
+              where(scope).
+              where.not(shard_id: block_stranded_ids).
+              where(
                 ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
                 where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
-                where('dj2.strand = delayed_jobs.strand').arel.exists.not
-              ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
+                where("dj2.#{column} = delayed_jobs.#{column}").arel.exists.not
+              ).
+              order(column, :strand_order_override, :id)).limit(batch_size)
+        }
+        target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
+          # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
+          # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
+          # this target shard, so we shouldn't unlock them yet.  We only ever unlock one job here to keep the
+          # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
+          # batches
+          loop do
+            break if query.call(:strand, 'strand IS NOT NULL').update_all(next_in_strand: true).zero?
+          end
+          loop do
+            break if query.call(:singleton,
+                                'strand IS NULL AND singleton IS NOT NULL').update_all(next_in_strand: true).zero?
           end
         end
       end
-      def migrate_everything
+      def migrate_everything(batch_size: 1_000)
         source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
         scope = ::Delayed::Job.shard(source_shard).where(strand: nil)
@@ -194,13 +214,26 @@ module SwitchmanInstJobs
           batch_move_jobs(
             target_shard: target_shard,
             source_shard: source_shard,
-            scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil)
+            scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil),
+            batch_size: batch_size
           )
         end
       end
       private
+      def create_blocker_job(**kwargs)
+        first_job = ::Delayed::Job.create!(**kwargs, next_in_strand: false)
+        first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
+        first_job.tag = 'Kernel.sleep'
+        first_job.source = 'JobsMigrator::StrandBlocker'
+        first_job.max_attempts = 1
+        # If we ever have jobs left over from 9999 jobs moves of a single shard,
+        # something has gone terribly wrong
+        first_job.strand_order_override = -9999
+        first_job.save!
+      end
       def build_shard_map(scope, source_shard)
         shard_ids = scope.distinct.pluck(:shard_id)
@@ -215,10 +248,10 @@ module SwitchmanInstJobs
         shard_map
       end
-      def batch_move_jobs(target_shard:, source_shard:, scope:)
+      def batch_move_jobs(target_shard:, source_shard:, scope:, batch_size:)
         while scope.exists?
           # Adapted from get_and_lock_next_available in delayed/backend/active_record.rb
-          target_jobs = scope.limit(1000).lock('FOR UPDATE SKIP LOCKED')
+          target_jobs = scope.limit(batch_size).lock('FOR UPDATE SKIP LOCKED')
           query = source_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
             <<~SQL

data/lib/switchman_inst_jobs/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SwitchmanInstJobs
-  VERSION = '4.0.3'.freeze
+  VERSION = '4.0.4'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: switchman-inst-jobs
 version: !ruby/object:Gem::Version
-  version: 4.0.3
+  version: 4.0.4
 platform: ruby
 authors:
 - Bryan Petty
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-12-20 00:00:00.000000000 Z
+date: 2022-02-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: inst-jobs
@@ -340,6 +340,10 @@ files:
 - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
 - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
 - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
+- db/migrate/20220127091200_fix_singleton_unique_constraint.rb
+- db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
+- db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
+- db/migrate/20220203063200_remove_old_singleton_index.rb
 - lib/switchman-inst-jobs.rb
 - lib/switchman_inst_jobs.rb
 - lib/switchman_inst_jobs/active_record/connection_adapters/connection_pool.rb