switchman-inst-jobs 4.0.2 → 4.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/db/migrate/20101216224513_create_delayed_jobs.rb +0 -4
  3. data/db/migrate/20110208031356_add_delayed_jobs_tag.rb +0 -4
  4. data/db/migrate/20110426161613_add_delayed_jobs_max_attempts.rb +0 -4
  5. data/db/migrate/20110516225834_add_delayed_jobs_strand.rb +0 -4
  6. data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +1 -5
  7. data/db/migrate/20110610213249_optimize_delayed_jobs.rb +0 -9
  8. data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +6 -10
  9. data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +0 -4
  10. data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +0 -4
  11. data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +0 -4
  12. data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +0 -4
  13. data/db/migrate/20120608191051_add_jobs_run_at_index.rb +0 -4
  14. data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +0 -4
  15. data/db/migrate/20140505215131_add_failed_jobs_original_job_id.rb +0 -4
  16. data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -6
  17. data/db/migrate/20140505223637_drop_failed_jobs_original_id.rb +0 -4
  18. data/db/migrate/20140512213941_add_source_to_jobs.rb +0 -4
  19. data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +0 -4
  20. data/db/migrate/20151123210429_add_expires_at_to_jobs.rb +0 -4
  21. data/db/migrate/20151210162949_improve_max_concurrent.rb +0 -4
  22. data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +4 -8
  23. data/db/migrate/20170308045400_add_shard_id_to_delayed_jobs.rb +0 -4
  24. data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +0 -4
  25. data/db/migrate/20190726154743_make_critical_columns_not_null.rb +0 -4
  26. data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +0 -4
  27. data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +0 -4
  28. data/db/migrate/20200825011002_add_strand_order_override.rb +0 -4
  29. data/db/migrate/20210812210128_add_singleton_column.rb +6 -6
  30. data/db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb +1 -1
  31. data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
  32. data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
  33. data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
  34. data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
  35. data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
  36. data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
  37. data/lib/switchman_inst_jobs/delayed/backend/active_record/abstract_job.rb +25 -0
  38. data/lib/switchman_inst_jobs/engine.rb +6 -2
  39. data/lib/switchman_inst_jobs/jobs_migrator.rb +115 -66
  40. data/lib/switchman_inst_jobs/version.rb +1 -1
  41. data/lib/switchman_inst_jobs.rb +4 -4
  42. metadata +11 -5
  43. data/lib/switchman_inst_jobs/active_record/connection_adapters/connection_pool.rb +0 -15
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionDelete < ActiveRecord::Migration[5.2]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
171
+
172
+ IF transition THEN
173
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
174
+
175
+ IF next_strand IS NOT NULL THEN
176
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
177
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
178
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
179
+ RETURN OLD;
180
+ END IF;
181
+ END IF;
182
+ ELSIF OLD.strand IS NOT NULL THEN
183
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
184
+ RETURN OLD;
185
+ END IF;
186
+
187
+ -- handles transitioning a singleton from stranded to not stranded --
188
+ -- handles transitioning a singleton from unstranded to stranded --
189
+ -- handles transitioning a singleton from strand A to strand B --
190
+ -- these transitions are a relatively rare case, so we take a shortcut and --
191
+ -- only start the next singleton if its strand does not currently have any running jobs --
192
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
193
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
194
+ SELECT id FROM delayed_jobs j2
195
+ WHERE next_in_strand=false AND
196
+ j2.singleton=OLD.singleton AND
197
+ j2.locked_by IS NULL AND
198
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
199
+ FOR UPDATE
200
+ );
201
+ END IF;
202
+ RETURN OLD;
203
+ END;
204
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
205
+ SQL
206
+ end
207
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
4
+ disable_ddl_transaction!
5
+
6
+ def up
7
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
8
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
9
+
10
+ # only one job can be queued in a singleton
11
+ add_index :delayed_jobs,
12
+ :singleton,
13
+ where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
14
+ unique: true,
15
+ name: 'index_delayed_jobs_on_singleton_not_running',
16
+ algorithm: :concurrently
17
+
18
+ # only one job can be running for a singleton
19
+ add_index :delayed_jobs,
20
+ :singleton,
21
+ where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
22
+ unique: true,
23
+ name: 'index_delayed_jobs_on_singleton_running',
24
+ algorithm: :concurrently
25
+ end
26
+
27
+ def down
28
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
29
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
30
+ end
31
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateInsertTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
4
+ def change
5
+ reversible do |direction|
6
+ direction.up do
7
+ execute(<<~SQL)
8
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
9
+ BEGIN
10
+ IF NEW.strand IS NOT NULL THEN
11
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
12
+ IF (SELECT COUNT(*) FROM (
13
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
14
+ ) s) = NEW.max_concurrent THEN
15
+ NEW.next_in_strand := false;
16
+ END IF;
17
+ END IF;
18
+ IF NEW.singleton IS NOT NULL THEN
19
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
20
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
21
+ -- rather than doing a seq scan
22
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}' OR locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}');
23
+ IF FOUND THEN
24
+ NEW.next_in_strand := false;
25
+ END IF;
26
+ END IF;
27
+ RETURN NEW;
28
+ END;
29
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
30
+ SQL
31
+ end
32
+ direction.down do
33
+ execute(<<~SQL)
34
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
35
+ BEGIN
36
+ IF NEW.strand IS NOT NULL THEN
37
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
38
+ IF (SELECT COUNT(*) FROM (
39
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
40
+ ) s) = NEW.max_concurrent THEN
41
+ NEW.next_in_strand := false;
42
+ END IF;
43
+ END IF;
44
+ IF NEW.singleton IS NOT NULL THEN
45
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
46
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
47
+ -- rather than doing a seq scan
48
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
49
+ IF FOUND THEN
50
+ NEW.next_in_strand := false;
51
+ END IF;
52
+ END IF;
53
+ RETURN NEW;
54
+ END;
55
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
56
+ SQL
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,209 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateDeleteTriggerForSingletonUniqueConstraintChange < ActiveRecord::Migration[5.2]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by = ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'' OR j3.locked_by <> ''#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'')))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
171
+
172
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
173
+
174
+ IF transition THEN
175
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
176
+
177
+ IF next_strand IS NOT NULL THEN
178
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
179
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
180
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
181
+ RETURN OLD;
182
+ END IF;
183
+ END IF;
184
+ ELSIF OLD.strand IS NOT NULL THEN
185
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
186
+ RETURN OLD;
187
+ END IF;
188
+
189
+ -- handles transitioning a singleton from stranded to not stranded --
190
+ -- handles transitioning a singleton from unstranded to stranded --
191
+ -- handles transitioning a singleton from strand A to strand B --
192
+ -- these transitions are a relatively rare case, so we take a shortcut and --
193
+ -- only start the next singleton if its strand does not currently have any running jobs --
194
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
195
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
196
+ SELECT id FROM delayed_jobs j2
197
+ WHERE next_in_strand=false AND
198
+ j2.singleton=OLD.singleton AND
199
+ j2.locked_by IS NULL AND
200
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
201
+ FOR UPDATE
202
+ );
203
+ END IF;
204
+ RETURN OLD;
205
+ END;
206
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
207
+ SQL
208
+ end
209
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RemoveOldSingletonIndex < ActiveRecord::Migration[5.2]
4
+ disable_ddl_transaction!
5
+
6
+ def up
7
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_not_running_old'
8
+ remove_index :delayed_jobs, name: 'index_delayed_jobs_on_singleton_running_old'
9
+ end
10
+
11
+ def down
12
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_not_running', 'index_delayed_jobs_on_singleton_not_running_old'
13
+ rename_index :delayed_jobs, 'index_delayed_jobs_on_singleton_running', 'index_delayed_jobs_on_singleton_running_old'
14
+
15
+ # only one job can be queued in a singleton
16
+ add_index :delayed_jobs,
17
+ :singleton,
18
+ where: 'singleton IS NOT NULL AND locked_by IS NULL',
19
+ unique: true,
20
+ name: 'index_delayed_jobs_on_singleton_not_running',
21
+ algorithm: :concurrently
22
+
23
+ # only one job can be running for a singleton
24
+ add_index :delayed_jobs,
25
+ :singleton,
26
+ where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
27
+ unique: true,
28
+ name: 'index_delayed_jobs_on_singleton_running',
29
+ algorithm: :concurrently
30
+ end
31
+ end
@@ -0,0 +1,25 @@
1
+ module SwitchmanInstJobs
2
+ module Delayed
3
+ module Backend
4
+ module ActiveRecord
5
+ module AbstractJob
6
+ module ClassMethods
7
+ def current_switchman_shard
8
+ connected_to_stack.reverse_each do |hash|
9
+ return hash[:switchman_shard] if hash[:switchman_shard] && hash[:klasses].include?(connection_classes)
10
+ end
11
+
12
+ ::ActiveRecord::Base.current_switchman_shard.delayed_jobs_shard
13
+ end
14
+ end
15
+
16
+ def self.prepended(base)
17
+ base.singleton_class.prepend(ClassMethods)
18
+
19
+ base.sharded_model
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -21,13 +21,17 @@ module SwitchmanInstJobs
21
21
 
22
22
  # Ensure jobs get unblocked on the new shard if they exist
23
23
  ::Delayed::Worker.lifecycle.after(:perform) do |_worker, job|
24
- if job.strand
24
+ if job.strand || job.singleton
25
+ column = job.strand ? :strand : :singleton
26
+
25
27
  ::Switchman::Shard.clear_cache
26
28
  ::Switchman::Shard.default.activate do
27
29
  current_job_shard = ::Switchman::Shard.lookup(job.shard_id).delayed_jobs_shard
28
30
  if current_job_shard != ::Switchman::Shard.current(::Delayed::Backend::ActiveRecord::AbstractJob)
29
31
  current_job_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
30
- j = ::Delayed::Job.where(strand: job.strand).next_in_strand_order.first
32
+ ::Delayed::Job.where(source: 'JobsMigrator::StrandBlocker', **{ column => job.try(column) }).delete_all
33
+
34
+ j = ::Delayed::Job.where(**{ column => job.try(column) }).next_in_strand_order.first
31
35
  j.update_column(:next_in_strand, true) if j && !j.next_in_strand
32
36
  end
33
37
  end