RubyGems - switchman-inst-jobs - Versions diffs - 3.2.7 → 3.2.8 - Mend

switchman-inst-jobs 3.2.7 → 3.2.8

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ac66ce458b7356068a5f576593f9692b5aa58d608584033a3aca941b31dc4a2d
-  data.tar.gz: c790f410b55117257f9b447987f68617b61222b10bf621385736b096affa25f8
+  metadata.gz: e4023807a9b569aff8d1751ded026466f0e135b8eae0c6ad9ff7bd80a1a7f818
+  data.tar.gz: 8a61cde9444723e15b3ec0c6034791e1887972a572755fea1e72b87fe004ec7d
 SHA512:
-  metadata.gz: 5d21bcfec5e444aeaf4bf839c13ab40453ea7b27c780e9153aeea23d5692800ff0e36dac604c7b7dec7c7723aea6a1baba675748a543240a69b37a3486665110
-  data.tar.gz: ea0797fc7294cf1be7b641ad874e4560a69a6a7bc04228ee85f7c284f5fcc1288139b6824c72ae5db1c521b19fa3aec55d36c7831dc819046a8af7f2d623886b
+  metadata.gz: 0a947509044626f1ccebbd8c1ec34db74e0086dd3df8f8ef7a6b3dfbc6c616bb76a3832cf3a168191b954d1241586bacf8a7909bbeb3a7be1eec21eb3426865e
+  data.tar.gz: fb103f3ec6eff9c2b356f592f133c8eaae99b08487623b3d0aee257f1cb1215620d66824eb221ec745abc96e9c19fdcb5c254bd5bffec78bfcd283d9cd2b9745

data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+  def up
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
+    # only one job can be queued in a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_not_running',
+              algorithm: :concurrently
+    # only one job can be running for a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_running',
+              algorithm: :concurrently
+  end
+  def down
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
+  end
+end

data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+class UpdateInsertTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
+  def change
+    reversible do |direction|
+      direction.up do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}' OR locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}');
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+        SQL
+      end
+      direction.down do
+        execute(<<~SQL)
+          CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
+          BEGIN
+            IF NEW.strand IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
+              IF (SELECT COUNT(*) FROM (
+                  SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
+                ) s) = NEW.max_concurrent THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            IF NEW.singleton IS NOT NULL THEN
+              PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
+              -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
+              -- rather than doing a seq scan
+              PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
+              IF FOUND THEN
+                NEW.next_in_strand := false;
+              END IF;
+            END IF;
+            RETURN NEW;
+          END;
+          $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+        SQL
+      end
+    end
+  end
+end

data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb ADDED Viewed

@@ -0,0 +1,209 @@
+# frozen_string_literal: true
+class UpdateDeleteTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
+  def up
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by = ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'' OR j3.locked_by <> ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'')))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+    SQL
+  end
+  def down
+    execute(<<~SQL)
+      CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
+      DECLARE
+        next_strand varchar;
+        running_count integer;
+        should_lock boolean;
+        should_be_precise boolean;
+        update_query varchar;
+        skip_locked varchar;
+        transition boolean;
+      BEGIN
+        IF OLD.strand IS NOT NULL THEN
+          should_lock := true;
+          should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
+          IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) subquery_for_count);
+            should_lock := running_count < OLD.max_concurrent;
+          END IF;
+          IF should_lock THEN
+            PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
+          END IF;
+          -- note that we don't really care if the row we're deleting has a singleton, or if it even
+          -- matches the row(s) we're going to update. we just need to make sure that whatever
+          -- singleton we grab isn't already running (which is a simple existence check, since
+          -- the unique indexes ensure there is at most one singleton running, and one queued)
+          update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.strand=$1.strand AND
+                (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
+              ORDER BY j2.strand_order_override ASC, j2.id ASC
+              LIMIT ';
+          IF should_be_precise THEN
+            running_count := (SELECT COUNT(*) FROM (
+              SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
+            ) s);
+            IF running_count < OLD.max_concurrent THEN
+              update_query := update_query || '($1.max_concurrent - $2)';
+            ELSE
+              -- we have too many running already; just bail
+              RETURN OLD;
+            END IF;
+          ELSE
+            update_query := update_query || '1';
+            -- n-strands don't require precise ordering; we can make this query more performant
+            IF OLD.max_concurrent > 1 THEN
+              skip_locked := ' SKIP LOCKED';
+            END IF;
+          END IF;
+          update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
+          EXECUTE update_query USING OLD, running_count;
+        END IF;
+        IF OLD.singleton IS NOT NULL THEN
+          PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
+          transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
+          IF transition THEN
+            next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
+            IF next_strand IS NOT NULL THEN
+              -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
+              IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
+                -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
+                RETURN OLD;
+              END IF;
+            END IF;
+          ELSIF OLD.strand IS NOT NULL THEN
+            -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
+            RETURN OLD;
+          END IF;
+          -- handles transitioning a singleton from stranded to not stranded --
+          -- handles transitioning a singleton from unstranded to stranded --
+          -- handles transitioning a singleton from strand A to strand B --
+          -- these transitions are a relatively rare case, so we take a shortcut and --
+          -- only start the next singleton if its strand does not currently have any running jobs --
+          -- if it does, the next stranded job that finishes will start this singleton if it can --
+          UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
+            SELECT id FROM delayed_jobs j2
+              WHERE next_in_strand=false AND
+                j2.singleton=OLD.singleton AND
+                j2.locked_by IS NULL AND
+                (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
+              FOR UPDATE
+            );
+        END IF;
+        RETURN OLD;
+      END;
+      $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
+    SQL
+  end
+end

data/db/migrate/20220203063200_remove_old_singleton_index.rb ADDED Viewed

@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+class RemoveOldSingletonIndex < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+  def up
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
+    remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
+  end
+  def down
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
+    rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
+    # only one job can be queued in a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: 'singleton IS NOT NULL AND locked_by IS NULL',
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_not_running',
+              algorithm: :concurrently
+    # only one job can be running for a singleton
+    add_index :delayed_jobs,
+              :singleton,
+              where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
+              unique: true,
+              name: 'index_delayed_jobs_on_singleton_running',
+              algorithm: :concurrently
+  end
+end

data/lib/switchman_inst_jobs/engine.rb CHANGED Viewed

@@ -19,13 +19,17 @@ module SwitchmanInstJobs
       # Ensure jobs get unblocked on the new shard if they exist
       ::Delayed::Worker.lifecycle.after(:perform) do |_worker, job|
-        if job.strand
+        if job.strand || job.singleton
+          column = job.strand ? :strand : :singleton
           ::Switchman::Shard.clear_cache
           ::Switchman::Shard.default.activate do
             current_job_shard = ::Switchman::Shard.lookup(job.shard_id).delayed_jobs_shard
             if current_job_shard != ::Switchman::Shard.current(:delayed_jobs)
               current_job_shard.activate(:delayed_jobs) do
-                j = ::Delayed::Job.where(strand: job.strand).next_in_strand_order.first
+                ::Delayed::Job.where(source: 'JobsMigrator::StrandBlocker', **{ column => job.try(column) }).delete_all
+                j = ::Delayed::Job.where(**{ column => job.try(column) }).next_in_strand_order.first
                 j.update_column(:next_in_strand, true) if j && !j.next_in_strand
               end
             end

data/lib/switchman_inst_jobs/jobs_migrator.rb CHANGED Viewed

@@ -89,7 +89,9 @@ module SwitchmanInstJobs
         migrate_everything
       end
-      def migrate_strands
+      def migrate_strands(batch_size: 1_000)
+        source_shard = ::Switchman::Shard.current(:delayed_jobs)
         # there are 4 scenarios to deal with here
         # 1) no running job, no jobs moved: do nothing
         # 2) running job, no jobs moved; create blocker with next_in_strand=false
@@ -98,60 +100,64 @@ module SwitchmanInstJobs
         #    those (= do nothing since it should already be false)
         # 4) no running job, jobs moved: set next_in_strand=true on the first of
         #    those (= do nothing since it should already be true)
+        handler = lambda { |scope, column, blocker_job_kwargs = {}|
+          shard_map = build_shard_map(scope, source_shard)
+          shard_map.each do |(target_shard, source_shard_ids)|
+            shard_scope = scope.where(shard_id: source_shard_ids)
-        source_shard = ::Switchman::Shard.current(:delayed_jobs)
-        strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
-        shard_map = build_shard_map(strand_scope, source_shard)
-        shard_map.each do |(target_shard, source_shard_ids)|
-          shard_scope = strand_scope.where(shard_id: source_shard_ids)
-          # 1) is taken care of because it should not show up here in strands
-          strands = shard_scope.distinct.order(:strand).pluck(:strand)
-          target_shard.activate(:delayed_jobs) do
-            strands.each do |strand|
-              transaction_on([source_shard, target_shard]) do
-                this_strand_scope = shard_scope.where(strand: strand)
-                # we want to copy all the jobs except the one that is still running.
-                jobs_scope = this_strand_scope.where(locked_by: nil)
-                # 2) and part of 3) are taken care of here by creating a blocker
-                # job with next_in_strand = false. as soon as the current
-                # running job is finished it should set next_in_strand
-                # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
-                # Since we only unlock it on the new jobs queue *after* deleting from the original
-                # the lock ensures the blocker always gets unlocked
-                first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
-                if first
-                  first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
-                  first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
-                  first_job.queue = first.queue
-                  first_job.tag = 'Kernel.sleep'
-                  first_job.source = 'JobsMigrator::StrandBlocker'
-                  first_job.max_attempts = 1
-                  # If we ever have jobs left over from 9999 jobs moves of a single shard,
-                  # something has gone terribly wrong
-                  first_job.strand_order_override = -9999
-                  first_job.save!
-                  # the rest of 3) is taken care of here
-                  # make sure that all the jobs moved over are NOT next in strand
-                  ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
-                    update_all(next_in_strand: false)
-                end
+            # 1) is taken care of because it should not show up here in strands
+            values = shard_scope.distinct.order(column).pluck(column)
-                # 4) is taken care of here, by leaving next_in_strand alone and
-                # it should execute on the new shard
-                batch_move_jobs(
-                  target_shard: target_shard,
-                  source_shard: source_shard,
-                  scope: jobs_scope
-                ) do |job, new_job|
-                  # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
-                  new_job.strand_order_override = job.strand_order_override - 1
+            target_shard.activate(:delayed_jobs) do
+              values.each do |value|
+                transaction_on([source_shard, target_shard]) do
+                  value_scope = shard_scope.where(**{ column => value })
+                  # we want to copy all the jobs except the one that is still running.
+                  jobs_scope = value_scope.where(locked_by: nil)
+                  # 2) and part of 3) are taken care of here by creating a blocker
+                  # job with next_in_strand = false. as soon as the current
+                  # running job is finished it should set next_in_strand
+                  # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
+                  # Since we only unlock it on the new jobs queue *after* deleting from the original
+                  # the lock ensures the blocker always gets unlocked
+                  first = value_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
+                  if first
+                    create_blocker_job(queue: first.queue, **{ column => value }, **blocker_job_kwargs)
+                    # the rest of 3) is taken care of here
+                    # make sure that all the jobs moved over are NOT next in strand
+                    ::Delayed::Job.where(next_in_strand: true, locked_by: nil, **{ column => value }).
+                      update_all(next_in_strand: false)
+                  end
+                  # 4) is taken care of here, by leaving next_in_strand alone and
+                  # it should execute on the new shard
+                  batch_move_jobs(
+                    target_shard: target_shard,
+                    source_shard: source_shard,
+                    scope: jobs_scope,
+                    batch_size: batch_size
+                  ) do |job, new_job|
+                    # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
+                    new_job.strand_order_override = job.strand_order_override - 1
+                  end
                 end
               end
             end
+          end
+        }
+        strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
+        singleton_scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL AND singleton IS NOT NULL')
+        all_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL OR singleton IS NOT NULL')
+        handler.call(strand_scope, :strand)
+        handler.call(singleton_scope, :singleton,
+                     { locked_at: DateTime.now, locked_by: ::Delayed::Backend::Base::ON_HOLD_BLOCKER })
+        shard_map = build_shard_map(all_scope, source_shard)
+        shard_map.each do |(target_shard, source_shard_ids)|
+          target_shard.activate(:delayed_jobs) do
             updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
               update_all(block_stranded: false)
             # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
@@ -166,26 +172,40 @@ module SwitchmanInstJobs
         end
       end
-      def unblock_strands(target_shard)
-        target_shard.activate(:delayed_jobs) do
-          loop do
-            # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
-            # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
-            # this target shard, so we shouldn't unlock them yet.  We only ever unlock one job here to keep the
-            # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
-            # batches
-            break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
-              where.not(strand: nil).
-              where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
+      def unblock_strands(target_shard, batch_size: 10_000)
+        block_stranded_ids = ::Switchman::Shard.where(block_stranded: true).pluck(:id)
+        query = lambda { |column, scope|
+          ::Delayed::Job.
+            where(id: ::Delayed::Job.select("DISTINCT ON (#{column}) id").
+              where(scope).
+              where.not(shard_id: block_stranded_ids).
+              where(
                 ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
                 where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
-                where('dj2.strand = delayed_jobs.strand').arel.exists.not
-              ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
+                where("dj2.#{column} = delayed_jobs.#{column}").arel.exists.not
+              ).
+              order(column, :strand_order_override, :id)).limit(batch_size)
+        }
+        target_shard.activate(:delayed_jobs) do
+          # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
+          # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
+          # this target shard, so we shouldn't unlock them yet.  We only ever unlock one job here to keep the
+          # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
+          # batches
+          loop do
+            break if query.call(:strand, 'strand IS NOT NULL').update_all(next_in_strand: true).zero?
+          end
+          loop do
+            break if query.call(:singleton,
+                                'strand IS NULL AND singleton IS NOT NULL').update_all(next_in_strand: true).zero?
           end
         end
       end
-      def migrate_everything
+      def migrate_everything(batch_size: 1_000)
         source_shard = ::Switchman::Shard.current(:delayed_jobs)
         scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL')
@@ -194,13 +214,26 @@ module SwitchmanInstJobs
           batch_move_jobs(
             target_shard: target_shard,
             source_shard: source_shard,
-            scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil)
+            scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil),
+            batch_size: batch_size
           )
         end
       end
       private
+      def create_blocker_job(**kwargs)
+        first_job = ::Delayed::Job.create!(**kwargs, next_in_strand: false)
+        first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
+        first_job.tag = 'Kernel.sleep'
+        first_job.source = 'JobsMigrator::StrandBlocker'
+        first_job.max_attempts = 1
+        # If we ever have jobs left over from 9999 jobs moves of a single shard,
+        # something has gone terribly wrong
+        first_job.strand_order_override = -9999
+        first_job.save!
+      end
       def build_shard_map(scope, source_shard)
         shard_ids = scope.distinct.pluck(:shard_id)
@@ -215,10 +248,10 @@ module SwitchmanInstJobs
         shard_map
       end
-      def batch_move_jobs(target_shard:, source_shard:, scope:)
+      def batch_move_jobs(target_shard:, source_shard:, scope:, batch_size:)
         while scope.exists?
           # Adapted from get_and_lock_next_available in delayed/backend/active_record.rb
-          target_jobs = scope.limit(1000).lock('FOR UPDATE SKIP LOCKED')
+          target_jobs = scope.limit(batch_size).lock('FOR UPDATE SKIP LOCKED')
           query = source_shard.activate(:delayed_jobs) do
             "WITH limited_jobs AS (#{target_jobs.to_sql}) " \

data/lib/switchman_inst_jobs/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SwitchmanInstJobs
-  VERSION = '3.2.7'.freeze
+  VERSION = '3.2.8'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: switchman-inst-jobs
 version: !ruby/object:Gem::Version
-  version: 3.2.7
+  version: 3.2.8
 platform: ruby
 authors:
 - Bryan Petty
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-12-20 00:00:00.000000000 Z
+date: 2022-02-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: inst-jobs
@@ -305,6 +305,10 @@ files:
 - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
 - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
 - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
+- db/migrate/20220127091200_fix_singleton_unique_constraint.rb
+- db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
+- db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
+- db/migrate/20220203063200_remove_old_singleton_index.rb
 - lib/switchman-inst-jobs.rb
 - lib/switchman_inst_jobs.rb
 - lib/switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter.rb