switchman-inst-jobs 4.0.3 → 4.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
- data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
- data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
- data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
- data/lib/switchman_inst_jobs/engine.rb +6 -2
- data/lib/switchman_inst_jobs/jobs_migrator.rb +99 -66
- data/lib/switchman_inst_jobs/version.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee1540b8c0c200ee917338953c9ea377dc0f32f354175e0e6953435861333c5f
|
4
|
+
data.tar.gz: 4e3912c490226c6f73c39d5923199e8d9cd661c8caeb6cef2fd6701b0dc5c08d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ee661bcf6f5335a5c6d4d7651936de85aedc6b01221700353b97c5bde61dc9a6c351948daaea70689e401962c4249efe56d359e377636dafafecb8d09ee4a49
|
7
|
+
data.tar.gz: 91b155c2f00a89298a7c9de0b1ec854119534e92ae6e7d89b3823adf666ba0fc9ca045c2a4e10e2e0a337c3eb3a8f1d775a6718ae9adde7d396ee787164e0596
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
|
4
|
+
disable_ddl_transaction!
|
5
|
+
|
6
|
+
def up
|
7
|
+
rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
|
8
|
+
rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
|
9
|
+
|
10
|
+
# only one job can be queued in a singleton
|
11
|
+
add_index :delayed_jobs,
|
12
|
+
:singleton,
|
13
|
+
where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
|
14
|
+
unique: true,
|
15
|
+
name: 'index_delayed_jobs_on_singleton_not_running',
|
16
|
+
algorithm: :concurrently
|
17
|
+
|
18
|
+
# only one job can be running for a singleton
|
19
|
+
add_index :delayed_jobs,
|
20
|
+
:singleton,
|
21
|
+
where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
|
22
|
+
unique: true,
|
23
|
+
name: 'index_delayed_jobs_on_singleton_running',
|
24
|
+
algorithm: :concurrently
|
25
|
+
end
|
26
|
+
|
27
|
+
def down
|
28
|
+
remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
|
29
|
+
remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
|
30
|
+
end
|
31
|
+
end
|
data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateInsertTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
|
4
|
+
def change
|
5
|
+
reversible do |direction|
|
6
|
+
direction.up do
|
7
|
+
execute(<<~SQL)
|
8
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
9
|
+
BEGIN
|
10
|
+
IF NEW.strand IS NOT NULL THEN
|
11
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
12
|
+
IF (SELECT COUNT(*) FROM (
|
13
|
+
SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
|
14
|
+
) s) = NEW.max_concurrent THEN
|
15
|
+
NEW.next_in_strand := false;
|
16
|
+
END IF;
|
17
|
+
END IF;
|
18
|
+
IF NEW.singleton IS NOT NULL THEN
|
19
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
|
20
|
+
-- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
|
21
|
+
-- rather than doing a seq scan
|
22
|
+
PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}' OR locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}');
|
23
|
+
IF FOUND THEN
|
24
|
+
NEW.next_in_strand := false;
|
25
|
+
END IF;
|
26
|
+
END IF;
|
27
|
+
RETURN NEW;
|
28
|
+
END;
|
29
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
30
|
+
SQL
|
31
|
+
end
|
32
|
+
direction.down do
|
33
|
+
execute(<<~SQL)
|
34
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
35
|
+
BEGIN
|
36
|
+
IF NEW.strand IS NOT NULL THEN
|
37
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
38
|
+
IF (SELECT COUNT(*) FROM (
|
39
|
+
SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
|
40
|
+
) s) = NEW.max_concurrent THEN
|
41
|
+
NEW.next_in_strand := false;
|
42
|
+
END IF;
|
43
|
+
END IF;
|
44
|
+
IF NEW.singleton IS NOT NULL THEN
|
45
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
|
46
|
+
-- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
|
47
|
+
-- rather than doing a seq scan
|
48
|
+
PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
|
49
|
+
IF FOUND THEN
|
50
|
+
NEW.next_in_strand := false;
|
51
|
+
END IF;
|
52
|
+
END IF;
|
53
|
+
RETURN NEW;
|
54
|
+
END;
|
55
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
56
|
+
SQL
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateDeleteTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
|
4
|
+
def up
|
5
|
+
execute(<<~SQL)
|
6
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
7
|
+
DECLARE
|
8
|
+
next_strand varchar;
|
9
|
+
running_count integer;
|
10
|
+
should_lock boolean;
|
11
|
+
should_be_precise boolean;
|
12
|
+
update_query varchar;
|
13
|
+
skip_locked varchar;
|
14
|
+
transition boolean;
|
15
|
+
BEGIN
|
16
|
+
IF OLD.strand IS NOT NULL THEN
|
17
|
+
should_lock := true;
|
18
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
19
|
+
|
20
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
21
|
+
running_count := (SELECT COUNT(*) FROM (
|
22
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
23
|
+
) subquery_for_count);
|
24
|
+
should_lock := running_count < OLD.max_concurrent;
|
25
|
+
END IF;
|
26
|
+
|
27
|
+
IF should_lock THEN
|
28
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
29
|
+
END IF;
|
30
|
+
|
31
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
32
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
33
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
34
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
35
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
36
|
+
SELECT id FROM delayed_jobs j2
|
37
|
+
WHERE next_in_strand=false AND
|
38
|
+
j2.strand=$1.strand AND
|
39
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by = ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'' OR j3.locked_by <> ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'')))
|
40
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
41
|
+
LIMIT ';
|
42
|
+
|
43
|
+
IF should_be_precise THEN
|
44
|
+
running_count := (SELECT COUNT(*) FROM (
|
45
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
46
|
+
) s);
|
47
|
+
IF running_count < OLD.max_concurrent THEN
|
48
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
49
|
+
ELSE
|
50
|
+
-- we have too many running already; just bail
|
51
|
+
RETURN OLD;
|
52
|
+
END IF;
|
53
|
+
ELSE
|
54
|
+
update_query := update_query || '1';
|
55
|
+
|
56
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
57
|
+
IF OLD.max_concurrent > 1 THEN
|
58
|
+
skip_locked := ' SKIP LOCKED';
|
59
|
+
END IF;
|
60
|
+
END IF;
|
61
|
+
|
62
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
63
|
+
EXECUTE update_query USING OLD, running_count;
|
64
|
+
END IF;
|
65
|
+
|
66
|
+
IF OLD.singleton IS NOT NULL THEN
|
67
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
|
68
|
+
|
69
|
+
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
70
|
+
|
71
|
+
IF transition THEN
|
72
|
+
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
73
|
+
|
74
|
+
IF next_strand IS NOT NULL THEN
|
75
|
+
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
76
|
+
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
77
|
+
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
78
|
+
RETURN OLD;
|
79
|
+
END IF;
|
80
|
+
END IF;
|
81
|
+
ELSIF OLD.strand IS NOT NULL THEN
|
82
|
+
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
83
|
+
RETURN OLD;
|
84
|
+
END IF;
|
85
|
+
|
86
|
+
-- handles transitioning a singleton from stranded to not stranded --
|
87
|
+
-- handles transitioning a singleton from unstranded to stranded --
|
88
|
+
-- handles transitioning a singleton from strand A to strand B --
|
89
|
+
-- these transitions are a relatively rare case, so we take a shortcut and --
|
90
|
+
-- only start the next singleton if its strand does not currently have any running jobs --
|
91
|
+
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
92
|
+
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
93
|
+
SELECT id FROM delayed_jobs j2
|
94
|
+
WHERE next_in_strand=false AND
|
95
|
+
j2.singleton=OLD.singleton AND
|
96
|
+
j2.locked_by IS NULL AND
|
97
|
+
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
98
|
+
FOR UPDATE
|
99
|
+
);
|
100
|
+
END IF;
|
101
|
+
RETURN OLD;
|
102
|
+
END;
|
103
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
104
|
+
SQL
|
105
|
+
end
|
106
|
+
|
107
|
+
def down
|
108
|
+
execute(<<~SQL)
|
109
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
110
|
+
DECLARE
|
111
|
+
next_strand varchar;
|
112
|
+
running_count integer;
|
113
|
+
should_lock boolean;
|
114
|
+
should_be_precise boolean;
|
115
|
+
update_query varchar;
|
116
|
+
skip_locked varchar;
|
117
|
+
transition boolean;
|
118
|
+
BEGIN
|
119
|
+
IF OLD.strand IS NOT NULL THEN
|
120
|
+
should_lock := true;
|
121
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
122
|
+
|
123
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
124
|
+
running_count := (SELECT COUNT(*) FROM (
|
125
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
126
|
+
) subquery_for_count);
|
127
|
+
should_lock := running_count < OLD.max_concurrent;
|
128
|
+
END IF;
|
129
|
+
|
130
|
+
IF should_lock THEN
|
131
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
132
|
+
END IF;
|
133
|
+
|
134
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
135
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
136
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
137
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
138
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
139
|
+
SELECT id FROM delayed_jobs j2
|
140
|
+
WHERE next_in_strand=false AND
|
141
|
+
j2.strand=$1.strand AND
|
142
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
143
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
144
|
+
LIMIT ';
|
145
|
+
|
146
|
+
IF should_be_precise THEN
|
147
|
+
running_count := (SELECT COUNT(*) FROM (
|
148
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
149
|
+
) s);
|
150
|
+
IF running_count < OLD.max_concurrent THEN
|
151
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
152
|
+
ELSE
|
153
|
+
-- we have too many running already; just bail
|
154
|
+
RETURN OLD;
|
155
|
+
END IF;
|
156
|
+
ELSE
|
157
|
+
update_query := update_query || '1';
|
158
|
+
|
159
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
160
|
+
IF OLD.max_concurrent > 1 THEN
|
161
|
+
skip_locked := ' SKIP LOCKED';
|
162
|
+
END IF;
|
163
|
+
END IF;
|
164
|
+
|
165
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
166
|
+
EXECUTE update_query USING OLD, running_count;
|
167
|
+
END IF;
|
168
|
+
|
169
|
+
IF OLD.singleton IS NOT NULL THEN
|
170
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
|
171
|
+
|
172
|
+
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
173
|
+
|
174
|
+
IF transition THEN
|
175
|
+
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
176
|
+
|
177
|
+
IF next_strand IS NOT NULL THEN
|
178
|
+
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
179
|
+
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
180
|
+
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
181
|
+
RETURN OLD;
|
182
|
+
END IF;
|
183
|
+
END IF;
|
184
|
+
ELSIF OLD.strand IS NOT NULL THEN
|
185
|
+
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
186
|
+
RETURN OLD;
|
187
|
+
END IF;
|
188
|
+
|
189
|
+
-- handles transitioning a singleton from stranded to not stranded --
|
190
|
+
-- handles transitioning a singleton from unstranded to stranded --
|
191
|
+
-- handles transitioning a singleton from strand A to strand B --
|
192
|
+
-- these transitions are a relatively rare case, so we take a shortcut and --
|
193
|
+
-- only start the next singleton if its strand does not currently have any running jobs --
|
194
|
+
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
195
|
+
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
196
|
+
SELECT id FROM delayed_jobs j2
|
197
|
+
WHERE next_in_strand=false AND
|
198
|
+
j2.singleton=OLD.singleton AND
|
199
|
+
j2.locked_by IS NULL AND
|
200
|
+
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
201
|
+
FOR UPDATE
|
202
|
+
);
|
203
|
+
END IF;
|
204
|
+
RETURN OLD;
|
205
|
+
END;
|
206
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
207
|
+
SQL
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class RemoveOldSingletonIndex < ActiveRecord::Migration[5.2]
|
4
|
+
disable_ddl_transaction!
|
5
|
+
|
6
|
+
def up
|
7
|
+
remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
|
8
|
+
remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
|
9
|
+
end
|
10
|
+
|
11
|
+
def down
|
12
|
+
rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
|
13
|
+
rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
|
14
|
+
|
15
|
+
# only one job can be queued in a singleton
|
16
|
+
add_index :delayed_jobs,
|
17
|
+
:singleton,
|
18
|
+
where: 'singleton IS NOT NULL AND locked_by IS NULL',
|
19
|
+
unique: true,
|
20
|
+
name: 'index_delayed_jobs_on_singleton_not_running',
|
21
|
+
algorithm: :concurrently
|
22
|
+
|
23
|
+
# only one job can be running for a singleton
|
24
|
+
add_index :delayed_jobs,
|
25
|
+
:singleton,
|
26
|
+
where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
|
27
|
+
unique: true,
|
28
|
+
name: 'index_delayed_jobs_on_singleton_running',
|
29
|
+
algorithm: :concurrently
|
30
|
+
end
|
31
|
+
end
|
@@ -21,13 +21,17 @@ module SwitchmanInstJobs
|
|
21
21
|
|
22
22
|
# Ensure jobs get unblocked on the new shard if they exist
|
23
23
|
::Delayed::Worker.lifecycle.after(:perform) do |_worker, job|
|
24
|
-
if job.strand
|
24
|
+
if job.strand || job.singleton
|
25
|
+
column = job.strand ? :strand : :singleton
|
26
|
+
|
25
27
|
::Switchman::Shard.clear_cache
|
26
28
|
::Switchman::Shard.default.activate do
|
27
29
|
current_job_shard = ::Switchman::Shard.lookup(job.shard_id).delayed_jobs_shard
|
28
30
|
if current_job_shard != ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
|
29
31
|
current_job_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
30
|
-
|
32
|
+
::Delayed::Job.where(source: 'JobsMigrator::StrandBlocker', **{ column => job.try(column) }).delete_all
|
33
|
+
|
34
|
+
j = ::Delayed::Job.where(**{ column => job.try(column) }).next_in_strand_order.first
|
31
35
|
j.update_column(:next_in_strand, true) if j && !j.next_in_strand
|
32
36
|
end
|
33
37
|
end
|
@@ -89,7 +89,9 @@ module SwitchmanInstJobs
|
|
89
89
|
migrate_everything
|
90
90
|
end
|
91
91
|
|
92
|
-
def migrate_strands
|
92
|
+
def migrate_strands(batch_size: 1_000)
|
93
|
+
source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
|
94
|
+
|
93
95
|
# there are 4 scenarios to deal with here
|
94
96
|
# 1) no running job, no jobs moved: do nothing
|
95
97
|
# 2) running job, no jobs moved; create blocker with next_in_strand=false
|
@@ -98,60 +100,64 @@ module SwitchmanInstJobs
|
|
98
100
|
# those (= do nothing since it should already be false)
|
99
101
|
# 4) no running job, jobs moved: set next_in_strand=true on the first of
|
100
102
|
# those (= do nothing since it should already be true)
|
103
|
+
handler = lambda { |scope, column, blocker_job_kwargs = {}|
|
104
|
+
shard_map = build_shard_map(scope, source_shard)
|
105
|
+
shard_map.each do |(target_shard, source_shard_ids)|
|
106
|
+
shard_scope = scope.where(shard_id: source_shard_ids)
|
101
107
|
|
102
|
-
|
103
|
-
|
104
|
-
shard_map = build_shard_map(strand_scope, source_shard)
|
105
|
-
shard_map.each do |(target_shard, source_shard_ids)|
|
106
|
-
shard_scope = strand_scope.where(shard_id: source_shard_ids)
|
107
|
-
|
108
|
-
# 1) is taken care of because it should not show up here in strands
|
109
|
-
strands = shard_scope.distinct.order(:strand).pluck(:strand)
|
110
|
-
|
111
|
-
target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
112
|
-
strands.each do |strand|
|
113
|
-
transaction_on([source_shard, target_shard]) do
|
114
|
-
this_strand_scope = shard_scope.where(strand: strand)
|
115
|
-
# we want to copy all the jobs except the one that is still running.
|
116
|
-
jobs_scope = this_strand_scope.where(locked_by: nil)
|
117
|
-
|
118
|
-
# 2) and part of 3) are taken care of here by creating a blocker
|
119
|
-
# job with next_in_strand = false. as soon as the current
|
120
|
-
# running job is finished it should set next_in_strand
|
121
|
-
# We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
|
122
|
-
# Since we only unlock it on the new jobs queue *after* deleting from the original
|
123
|
-
# the lock ensures the blocker always gets unlocked
|
124
|
-
first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
|
125
|
-
if first
|
126
|
-
first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
|
127
|
-
first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
|
128
|
-
first_job.queue = first.queue
|
129
|
-
first_job.tag = 'Kernel.sleep'
|
130
|
-
first_job.source = 'JobsMigrator::StrandBlocker'
|
131
|
-
first_job.max_attempts = 1
|
132
|
-
# If we ever have jobs left over from 9999 jobs moves of a single shard,
|
133
|
-
# something has gone terribly wrong
|
134
|
-
first_job.strand_order_override = -9999
|
135
|
-
first_job.save!
|
136
|
-
# the rest of 3) is taken care of here
|
137
|
-
# make sure that all the jobs moved over are NOT next in strand
|
138
|
-
::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
|
139
|
-
update_all(next_in_strand: false)
|
140
|
-
end
|
108
|
+
# 1) is taken care of because it should not show up here in strands
|
109
|
+
values = shard_scope.distinct.order(column).pluck(column)
|
141
110
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
#
|
150
|
-
|
111
|
+
target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
112
|
+
values.each do |value|
|
113
|
+
transaction_on([source_shard, target_shard]) do
|
114
|
+
value_scope = shard_scope.where(**{ column => value })
|
115
|
+
# we want to copy all the jobs except the one that is still running.
|
116
|
+
jobs_scope = value_scope.where(locked_by: nil)
|
117
|
+
|
118
|
+
# 2) and part of 3) are taken care of here by creating a blocker
|
119
|
+
# job with next_in_strand = false. as soon as the current
|
120
|
+
# running job is finished it should set next_in_strand
|
121
|
+
# We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
|
122
|
+
# Since we only unlock it on the new jobs queue *after* deleting from the original
|
123
|
+
# the lock ensures the blocker always gets unlocked
|
124
|
+
first = value_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
|
125
|
+
if first
|
126
|
+
create_blocker_job(queue: first.queue, **{ column => value }, **blocker_job_kwargs)
|
127
|
+
# the rest of 3) is taken care of here
|
128
|
+
# make sure that all the jobs moved over are NOT next in strand
|
129
|
+
::Delayed::Job.where(next_in_strand: true, locked_by: nil, **{ column => value }).
|
130
|
+
update_all(next_in_strand: false)
|
131
|
+
end
|
132
|
+
|
133
|
+
# 4) is taken care of here, by leaving next_in_strand alone and
|
134
|
+
# it should execute on the new shard
|
135
|
+
batch_move_jobs(
|
136
|
+
target_shard: target_shard,
|
137
|
+
source_shard: source_shard,
|
138
|
+
scope: jobs_scope,
|
139
|
+
batch_size: batch_size
|
140
|
+
) do |job, new_job|
|
141
|
+
# This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
|
142
|
+
new_job.strand_order_override = job.strand_order_override - 1
|
143
|
+
end
|
151
144
|
end
|
152
145
|
end
|
153
146
|
end
|
147
|
+
end
|
148
|
+
}
|
149
|
+
|
150
|
+
strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
|
151
|
+
singleton_scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL AND singleton IS NOT NULL')
|
152
|
+
all_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL OR singleton IS NOT NULL')
|
153
|
+
|
154
|
+
handler.call(strand_scope, :strand)
|
155
|
+
handler.call(singleton_scope, :singleton,
|
156
|
+
{ locked_at: DateTime.now, locked_by: ::Delayed::Backend::Base::ON_HOLD_BLOCKER })
|
154
157
|
|
158
|
+
shard_map = build_shard_map(all_scope, source_shard)
|
159
|
+
shard_map.each do |(target_shard, source_shard_ids)|
|
160
|
+
target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
155
161
|
updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
|
156
162
|
update_all(block_stranded: false)
|
157
163
|
# If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
|
@@ -166,26 +172,40 @@ module SwitchmanInstJobs
|
|
166
172
|
end
|
167
173
|
end
|
168
174
|
|
169
|
-
def unblock_strands(target_shard)
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
|
178
|
-
where.not(strand: nil).
|
179
|
-
where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
|
175
|
+
def unblock_strands(target_shard, batch_size: 10_000)
|
176
|
+
block_stranded_ids = ::Switchman::Shard.where(block_stranded: true).pluck(:id)
|
177
|
+
query = lambda { |column, scope|
|
178
|
+
::Delayed::Job.
|
179
|
+
where(id: ::Delayed::Job.select("DISTINCT ON (#{column}) id").
|
180
|
+
where(scope).
|
181
|
+
where.not(shard_id: block_stranded_ids).
|
182
|
+
where(
|
180
183
|
::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
|
181
184
|
where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
|
182
|
-
where(
|
183
|
-
).
|
185
|
+
where("dj2.#{column} = delayed_jobs.#{column}").arel.exists.not
|
186
|
+
).
|
187
|
+
order(column, :strand_order_override, :id)).limit(batch_size)
|
188
|
+
}
|
189
|
+
|
190
|
+
target_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
191
|
+
# We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
|
192
|
+
# to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
|
193
|
+
# this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
|
194
|
+
# logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
|
195
|
+
# batches
|
196
|
+
|
197
|
+
loop do
|
198
|
+
break if query.call(:strand, 'strand IS NOT NULL').update_all(next_in_strand: true).zero?
|
199
|
+
end
|
200
|
+
|
201
|
+
loop do
|
202
|
+
break if query.call(:singleton,
|
203
|
+
'strand IS NULL AND singleton IS NOT NULL').update_all(next_in_strand: true).zero?
|
184
204
|
end
|
185
205
|
end
|
186
206
|
end
|
187
207
|
|
188
|
-
def migrate_everything
|
208
|
+
def migrate_everything(batch_size: 1_000)
|
189
209
|
source_shard = ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
|
190
210
|
scope = ::Delayed::Job.shard(source_shard).where(strand: nil)
|
191
211
|
|
@@ -194,13 +214,26 @@ module SwitchmanInstJobs
|
|
194
214
|
batch_move_jobs(
|
195
215
|
target_shard: target_shard,
|
196
216
|
source_shard: source_shard,
|
197
|
-
scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil)
|
217
|
+
scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil),
|
218
|
+
batch_size: batch_size
|
198
219
|
)
|
199
220
|
end
|
200
221
|
end
|
201
222
|
|
202
223
|
private
|
203
224
|
|
225
|
+
def create_blocker_job(**kwargs)
|
226
|
+
first_job = ::Delayed::Job.create!(**kwargs, next_in_strand: false)
|
227
|
+
first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
|
228
|
+
first_job.tag = 'Kernel.sleep'
|
229
|
+
first_job.source = 'JobsMigrator::StrandBlocker'
|
230
|
+
first_job.max_attempts = 1
|
231
|
+
# If we ever have jobs left over from 9999 jobs moves of a single shard,
|
232
|
+
# something has gone terribly wrong
|
233
|
+
first_job.strand_order_override = -9999
|
234
|
+
first_job.save!
|
235
|
+
end
|
236
|
+
|
204
237
|
def build_shard_map(scope, source_shard)
|
205
238
|
shard_ids = scope.distinct.pluck(:shard_id)
|
206
239
|
|
@@ -215,10 +248,10 @@ module SwitchmanInstJobs
|
|
215
248
|
shard_map
|
216
249
|
end
|
217
250
|
|
218
|
-
def batch_move_jobs(target_shard:, source_shard:, scope:)
|
251
|
+
def batch_move_jobs(target_shard:, source_shard:, scope:, batch_size:)
|
219
252
|
while scope.exists?
|
220
253
|
# Adapted from get_and_lock_next_available in delayed/backend/active_record.rb
|
221
|
-
target_jobs = scope.limit(
|
254
|
+
target_jobs = scope.limit(batch_size).lock('FOR UPDATE SKIP LOCKED')
|
222
255
|
|
223
256
|
query = source_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
|
224
257
|
<<~SQL
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: switchman-inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0.
|
4
|
+
version: 4.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Petty
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inst-jobs
|
@@ -340,6 +340,10 @@ files:
|
|
340
340
|
- db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
|
341
341
|
- db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
|
342
342
|
- db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
|
343
|
+
- db/migrate/20220127091200_fix_singleton_unique_constraint.rb
|
344
|
+
- db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
|
345
|
+
- db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
|
346
|
+
- db/migrate/20220203063200_remove_old_singleton_index.rb
|
343
347
|
- lib/switchman-inst-jobs.rb
|
344
348
|
- lib/switchman_inst_jobs.rb
|
345
349
|
- lib/switchman_inst_jobs/active_record/connection_adapters/connection_pool.rb
|