switchman-inst-jobs 4.0.3 → 4.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f07c44f5a58d897c8dbcc46a40906cde8c6a99b39723286a405669cf159019e4
4
- data.tar.gz: cf730bfc3d3ad6d2c7da7712c3c097322421820b60f2d6af025ef610f0f3aa09
3
+ metadata.gz: ee1540b8c0c200ee917338953c9ea377dc0f32f354175e0e6953435861333c5f
4
+ data.tar.gz: 4e3912c490226c6f73c39d5923199e8d9cd661c8caeb6cef2fd6701b0dc5c08d
5
5
  SHA512:
6
- metadata.gz: 9189978ce61d257fb25bbf57c23a36998437b3c4aa6adb93798af07ea878caf244a1514c5711c5190b39836169607e869f65b89ed71cc08d44fabfd9d6fe0e8e
7
- data.tar.gz: d4a76e521ee4ba38bb2172a1ffabcc4b139a204d24690a2662ca7e8587d367ba88d98db5912c716421c96940f309296f6b35e6b292025326d61e32e03217d001
6
+ metadata.gz: 6ee661bcf6f5335a5c6d4d7651936de85aedc6b01221700353b97c5bde61dc9a6c351948daaea70689e401962c4249efe56d359e377636dafafecb8d09ee4a49
7
+ data.tar.gz: 91b155c2f00a89298a7c9de0b1ec854119534e92ae6e7d89b3823adf666ba0fc9ca045c2a4e10e2e0a337c3eb3a8f1d775a6718ae9adde7d396ee787164e0596
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
4
+ disable_ddl_transaction!
5
+
6
+ def up
7
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
8
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
9
+
10
+ # only one job can be queued in a singleton
11
+ add_index :delayed_jobs,
12
+ :singleton,
13
+ where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
14
+ unique: true,
15
+ name: 'index_delayed_jobs_on_singleton_not_running',
16
+ algorithm: :concurrently
17
+
18
+ # only one job can be running for a singleton
19
+ add_index :delayed_jobs,
20
+ :singleton,
21
+ where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
22
+ unique: true,
23
+ name: 'index_delayed_jobs_on_singleton_running',
24
+ algorithm: :concurrently
25
+ end
26
+
27
+ def down
28
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
29
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
30
+ end
31
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateInsertTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
4
+ def change
5
+ reversible do |direction|
6
+ direction.up do
7
+ execute(<<~SQL)
8
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
9
+ BEGIN
10
+ IF NEW.strand IS NOT NULL THEN
11
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
12
+ IF (SELECT COUNT(*) FROM (
13
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
14
+ ) s) = NEW.max_concurrent THEN
15
+ NEW.next_in_strand := false;
16
+ END IF;
17
+ END IF;
18
+ IF NEW.singleton IS NOT NULL THEN
19
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
20
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
21
+ -- rather than doing a seq scan
22
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}' OR locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}');
23
+ IF FOUND THEN
24
+ NEW.next_in_strand := false;
25
+ END IF;
26
+ END IF;
27
+ RETURN NEW;
28
+ END;
29
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
30
+ SQL
31
+ end
32
+ direction.down do
33
+ execute(<<~SQL)
34
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
35
+ BEGIN
36
+ IF NEW.strand IS NOT NULL THEN
37
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
38
+ IF (SELECT COUNT(*) FROM (
39
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
40
+ ) s) = NEW.max_concurrent THEN
41
+ NEW.next_in_strand := false;
42
+ END IF;
43
+ END IF;
44
+ IF NEW.singleton IS NOT NULL THEN
45
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
46
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
47
+ -- rather than doing a seq scan
48
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
49
+ IF FOUND THEN
50
+ NEW.next_in_strand := false;
51
+ END IF;
52
+ END IF;
53
+ RETURN NEW;
54
+ END;
55
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
56
+ SQL
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateDeleteTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by = ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'' OR j3.locked_by <> ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'')))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
171
+
172
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
173
+
174
+ IF transition THEN
175
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
176
+
177
+ IF next_strand IS NOT NULL THEN
178
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
179
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
180
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
181
+ RETURN OLD;
182
+ END IF;
183
+ END IF;
184
+ ELSIF OLD.strand IS NOT NULL THEN
185
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
186
+ RETURN OLD;
187
+ END IF;
188
+
189
+ -- handles transitioning a singleton from stranded to not stranded --
190
+ -- handles transitioning a singleton from unstranded to stranded --
191
+ -- handles transitioning a singleton from strand A to strand B --
192
+ -- these transitions are a relatively rare case, so we take a shortcut and --
193
+ -- only start the next singleton if its strand does not currently have any running jobs --
194
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
195
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
196
+ SELECT id FROM delayed_jobs j2
197
+ WHERE next_in_strand=false AND
198
+ j2.singleton=OLD.singleton AND
199
+ j2.locked_by IS NULL AND
200
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
201
+ FOR UPDATE
202
+ );
203
+ END IF;
204
+ RETURN OLD;
205
+ END;
206
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
207
+ SQL
208
+ end
209
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RemoveOldSingletonIndex < ActiveRecord::Migration[5.2]
4
+ disable_ddl_transaction!
5
+
6
+ def up
7
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
8
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
9
+ end
10
+
11
+ def down
12
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
13
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
14
+
15
+ # only one job can be queued in a singleton
16
+ add_index :delayed_jobs,
17
+ :singleton,
18
+ where: 'singleton IS NOT NULL AND locked_by IS NULL',
19
+ unique: true,
20
+ name: 'index_delayed_jobs_on_singleton_not_running',
21
+ algorithm: :concurrently
22
+
23
+ # only one job can be running for a singleton
24
+ add_index :delayed_jobs,
25
+ :singleton,
26
+ where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
27
+ unique: true,
28
+ name: 'index_delayed_jobs_on_singleton_running',
29
+ algorithm: :concurrently
30
+ end
31
+ end
@@ -21,13 +21,17 @@ module SwitchmanInstJobs
21
21
 
22
22
  # Ensure jobs get unblocked on the new shard if they exist
23
23
  ::Delayed::Worker.lifecycle.after(:perform) do |_worker, job|
24
- if job.strand
24
+ if job.strand || job.singleton
25
+ column = job.strand ? :strand : :singleton
26
+
25
27
  ::Switchman::Shard.clear_cache
26
28
  ::Switchman::Shard.default.activate do
27
29
  current_job_shard = ::Switchman::Shard.lookup(job.shard_id).delayed_jobs_shard
28
30
  if current_job_shard != ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
29
31
  current_job_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
30
- j = ::Delayed::Job.where(strand: job.strand).next_in_strand_order.first
32
+ ::Delayed::Job.where(source: 'JobsMigrator::StrandBlocker', **{ column => job.try(column) }).delete_all
33
+
34
+ j = ::Delayed::Job.where(**{ column => job.try(column) }).next_in_strand_order.first
31
35
  j.update_column(:next_in_strand, true) if j && !j.next_in_strand
32
36
  end
33
37
  end
@@ -89,7 +89,9 @@ module SwitchmanInstJobs
89
89
  migrate_everything
90
90
  end
91
91
 
92
- def migrate_strands
92
+ def migrate_strands(batch_size: 1_000)
93
+ source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
94
+
93
95
  # there are 4 scenarios to deal with here
94
96
  # 1) no running job, no jobs moved: do nothing
95
97
  # 2) running job, no jobs moved; create blocker with next_in_strand=false
@@ -98,60 +100,64 @@ module SwitchmanInstJobs
98
100
  # those (= do nothing since it should already be false)
99
101
  # 4) no running job, jobs moved: set next_in_strand=true on the first of
100
102
  # those (= do nothing since it should already be true)
103
+ handler = lambda { |scope, column, blocker_job_kwargs = {}|
104
+ shard_map = build_shard_map(scope, source_shard)
105
+ shard_map.each do |(target_shard, source_shard_ids)|
106
+ shard_scope = scope.where(shard_id: source_shard_ids)
101
107
 
102
- source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
103
- strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
104
- shard_map = build_shard_map(strand_scope, source_shard)
105
- shard_map.each do |(target_shard, source_shard_ids)|
106
- shard_scope = strand_scope.where(shard_id: source_shard_ids)
107
-
108
- # 1) is taken care of because it should not show up here in strands
109
- strands = shard_scope.distinct.order(:strand).pluck(:strand)
110
-
111
- target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
112
- strands.each do |strand|
113
- transaction_on([source_shard, target_shard]) do
114
- this_strand_scope = shard_scope.where(strand: strand)
115
- # we want to copy all the jobs except the one that is still running.
116
- jobs_scope = this_strand_scope.where(locked_by: nil)
117
-
118
- # 2) and part of 3) are taken care of here by creating a blocker
119
- # job with next_in_strand = false. as soon as the current
120
- # running job is finished it should set next_in_strand
121
- # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
122
- # Since we only unlock it on the new jobs queue *after* deleting from the original
123
- # the lock ensures the blocker always gets unlocked
124
- first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
125
- if first
126
- first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
127
- first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
128
- first_job.queue = first.queue
129
- first_job.tag = 'Kernel.sleep'
130
- first_job.source = 'JobsMigrator::StrandBlocker'
131
- first_job.max_attempts = 1
132
- # If we ever have jobs left over from 9999 jobs moves of a single shard,
133
- # something has gone terribly wrong
134
- first_job.strand_order_override = -9999
135
- first_job.save!
136
- # the rest of 3) is taken care of here
137
- # make sure that all the jobs moved over are NOT next in strand
138
- ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
139
- update_all(next_in_strand: false)
140
- end
108
+ # 1) is taken care of because it should not show up here in strands
109
+ values = shard_scope.distinct.order(column).pluck(column)
141
110
 
142
- # 4) is taken care of here, by leaving next_in_strand alone and
143
- # it should execute on the new shard
144
- batch_move_jobs(
145
- target_shard: target_shard,
146
- source_shard: source_shard,
147
- scope: jobs_scope
148
- ) do |job, new_job|
149
- # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
150
- new_job.strand_order_override = job.strand_order_override - 1
111
+ target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
112
+ values.each do |value|
113
+ transaction_on([source_shard, target_shard]) do
114
+ value_scope = shard_scope.where(**{ column => value })
115
+ # we want to copy all the jobs except the one that is still running.
116
+ jobs_scope = value_scope.where(locked_by: nil)
117
+
118
+ # 2) and part of 3) are taken care of here by creating a blocker
119
+ # job with next_in_strand = false. as soon as the current
120
+ # running job is finished it should set next_in_strand
121
+ # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
122
+ # Since we only unlock it on the new jobs queue *after* deleting from the original
123
+ # the lock ensures the blocker always gets unlocked
124
+ first = value_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
125
+ if first
126
+ create_blocker_job(queue: first.queue, **{ column => value }, **blocker_job_kwargs)
127
+ # the rest of 3) is taken care of here
128
+ # make sure that all the jobs moved over are NOT next in strand
129
+ ::Delayed::Job.where(next_in_strand: true, locked_by: nil, **{ column => value }).
130
+ update_all(next_in_strand: false)
131
+ end
132
+
133
+ # 4) is taken care of here, by leaving next_in_strand alone and
134
+ # it should execute on the new shard
135
+ batch_move_jobs(
136
+ target_shard: target_shard,
137
+ source_shard: source_shard,
138
+ scope: jobs_scope,
139
+ batch_size: batch_size
140
+ ) do |job, new_job|
141
+ # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
142
+ new_job.strand_order_override = job.strand_order_override - 1
143
+ end
151
144
  end
152
145
  end
153
146
  end
147
+ end
148
+ }
149
+
150
+ strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
151
+ singleton_scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL AND singleton IS NOT NULL')
152
+ all_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL OR singleton IS NOT NULL')
153
+
154
+ handler.call(strand_scope, :strand)
155
+ handler.call(singleton_scope, :singleton,
156
+ { locked_at: DateTime.now, locked_by: ::Delayed::Backend::Base::ON_HOLD_BLOCKER })
154
157
 
158
+ shard_map = build_shard_map(all_scope, source_shard)
159
+ shard_map.each do |(target_shard, source_shard_ids)|
160
+ target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
155
161
  updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
156
162
  update_all(block_stranded: false)
157
163
  # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
@@ -166,26 +172,40 @@ module SwitchmanInstJobs
166
172
  end
167
173
  end
168
174
 
169
- def unblock_strands(target_shard)
170
- target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
171
- loop do
172
- # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
173
- # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
174
- # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
175
- # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
176
- # batches
177
- break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
178
- where.not(strand: nil).
179
- where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
175
+ def unblock_strands(target_shard, batch_size: 10_000)
176
+ block_stranded_ids = ::Switchman::Shard.where(block_stranded: true).pluck(:id)
177
+ query = lambda { |column, scope|
178
+ ::Delayed::Job.
179
+ where(id: ::Delayed::Job.select("DISTINCT ON (#{column}) id").
180
+ where(scope).
181
+ where.not(shard_id: block_stranded_ids).
182
+ where(
180
183
  ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
181
184
  where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
182
- where('dj2.strand = delayed_jobs.strand').arel.exists.not
183
- ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
185
+ where("dj2.#{column} = delayed_jobs.#{column}").arel.exists.not
186
+ ).
187
+ order(column, :strand_order_override, :id)).limit(batch_size)
188
+ }
189
+
190
+ target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
191
+ # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
192
+ # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
193
+ # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
194
+ # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
195
+ # batches
196
+
197
+ loop do
198
+ break if query.call(:strand, 'strand IS NOT NULL').update_all(next_in_strand: true).zero?
199
+ end
200
+
201
+ loop do
202
+ break if query.call(:singleton,
203
+ 'strand IS NULL AND singleton IS NOT NULL').update_all(next_in_strand: true).zero?
184
204
  end
185
205
  end
186
206
  end
187
207
 
188
- def migrate_everything
208
+ def migrate_everything(batch_size: 1_000)
189
209
  source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
190
210
  scope = ::Delayed::Job.shard(source_shard).where(strand: nil)
191
211
 
@@ -194,13 +214,26 @@ module SwitchmanInstJobs
194
214
  batch_move_jobs(
195
215
  target_shard: target_shard,
196
216
  source_shard: source_shard,
197
- scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil)
217
+ scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil),
218
+ batch_size: batch_size
198
219
  )
199
220
  end
200
221
  end
201
222
 
202
223
  private
203
224
 
225
+ def create_blocker_job(**kwargs)
226
+ first_job = ::Delayed::Job.create!(**kwargs, next_in_strand: false)
227
+ first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
228
+ first_job.tag = 'Kernel.sleep'
229
+ first_job.source = 'JobsMigrator::StrandBlocker'
230
+ first_job.max_attempts = 1
231
+ # If we ever have jobs left over from 9999 jobs moves of a single shard,
232
+ # something has gone terribly wrong
233
+ first_job.strand_order_override = -9999
234
+ first_job.save!
235
+ end
236
+
204
237
  def build_shard_map(scope, source_shard)
205
238
  shard_ids = scope.distinct.pluck(:shard_id)
206
239
 
@@ -215,10 +248,10 @@ module SwitchmanInstJobs
215
248
  shard_map
216
249
  end
217
250
 
218
- def batch_move_jobs(target_shard:, source_shard:, scope:)
251
+ def batch_move_jobs(target_shard:, source_shard:, scope:, batch_size:)
219
252
  while scope.exists?
220
253
  # Adapted from get_and_lock_next_available in delayed/backend/active_record.rb
221
- target_jobs = scope.limit(1000).lock('FOR UPDATE SKIP LOCKED')
254
+ target_jobs = scope.limit(batch_size).lock('FOR UPDATE SKIP LOCKED')
222
255
 
223
256
  query = source_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
224
257
  <<~SQL
@@ -1,3 +1,3 @@
1
1
  module SwitchmanInstJobs
2
- VERSION = '4.0.3'.freeze
2
+ VERSION = '4.0.4'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: switchman-inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.3
4
+ version: 4.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Petty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-20 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inst-jobs
@@ -340,6 +340,10 @@ files:
340
340
  - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
341
341
  - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
342
342
  - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
343
+ - db/migrate/20220127091200_fix_singleton_unique_constraint.rb
344
+ - db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
345
+ - db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
346
+ - db/migrate/20220203063200_remove_old_singleton_index.rb
343
347
  - lib/switchman-inst-jobs.rb
344
348
  - lib/switchman_inst_jobs.rb
345
349
  - lib/switchman_inst_jobs/active_record/connection_adapters/connection_pool.rb