inst-jobs 2.4.10 → 3.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb +27 -0
  3. data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb +137 -0
  4. data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
  5. data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
  6. data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
  7. data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
  8. data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
  9. data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
  10. data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
  11. data/db/migrate/20220328152900_add_failed_jobs_indicies.rb +12 -0
  12. data/db/migrate/20220519204546_add_requeued_job_id_to_failed_jobs.rb +7 -0
  13. data/exe/inst_jobs +1 -1
  14. data/lib/delayed/backend/active_record.rb +62 -15
  15. data/lib/delayed/backend/base.rb +20 -5
  16. data/lib/delayed/batch.rb +1 -1
  17. data/lib/delayed/lifecycle.rb +8 -1
  18. data/lib/delayed/message_sending.rb +1 -1
  19. data/lib/delayed/periodic.rb +1 -1
  20. data/lib/delayed/pool.rb +12 -2
  21. data/lib/delayed/rails_reloader_plugin.rb +30 -0
  22. data/lib/delayed/server.rb +8 -2
  23. data/lib/delayed/settings.rb +3 -1
  24. data/lib/delayed/version.rb +1 -1
  25. data/lib/delayed/work_queue/parent_process/server.rb +43 -12
  26. data/lib/delayed/worker/health_check.rb +1 -1
  27. data/lib/delayed/worker/process_helper.rb +3 -3
  28. data/lib/delayed/worker.rb +6 -24
  29. metadata +65 -83
  30. data/spec/active_record_job_spec.rb +0 -294
  31. data/spec/delayed/cli_spec.rb +0 -25
  32. data/spec/delayed/daemon_spec.rb +0 -38
  33. data/spec/delayed/message_sending_spec.rb +0 -108
  34. data/spec/delayed/periodic_spec.rb +0 -32
  35. data/spec/delayed/server_spec.rb +0 -103
  36. data/spec/delayed/settings_spec.rb +0 -48
  37. data/spec/delayed/work_queue/in_process_spec.rb +0 -31
  38. data/spec/delayed/work_queue/parent_process/client_spec.rb +0 -87
  39. data/spec/delayed/work_queue/parent_process/server_spec.rb +0 -233
  40. data/spec/delayed/work_queue/parent_process_spec.rb +0 -60
  41. data/spec/delayed/worker/consul_health_check_spec.rb +0 -63
  42. data/spec/delayed/worker/health_check_spec.rb +0 -134
  43. data/spec/delayed/worker_spec.rb +0 -100
  44. data/spec/migrate/20140924140513_add_story_table.rb +0 -9
  45. data/spec/sample_jobs.rb +0 -79
  46. data/spec/shared/delayed_batch.rb +0 -105
  47. data/spec/shared/delayed_method.rb +0 -287
  48. data/spec/shared/performable_method.rb +0 -75
  49. data/spec/shared/shared_backend.rb +0 -989
  50. data/spec/shared/testing.rb +0 -50
  51. data/spec/shared/worker.rb +0 -413
  52. data/spec/shared_jobs_specs.rb +0 -17
  53. data/spec/spec_helper.rb +0 -134
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 65052c093825ca46de07a054fc2b62925cf2d1cb0a3fcb877b408e5c88edc865
4
- data.tar.gz: '08d1c103f2c41f11f5989a0b5e2bb857e7d8ea250ba995dcd1ddae62510426cf'
3
+ metadata.gz: 0475c3b076cb2d06a380ab5439899071d7b242f0b984efc656c07cdac42a30ef
4
+ data.tar.gz: 6b327fe5fa9e90ff3e0a9cc6fbbf3bf141fac324999c7b3fe4b86c60196ddab0
5
5
  SHA512:
6
- metadata.gz: 3e28511d5c23fcdccd976e9d8f9f09ae318c50154d01223b84b3f60de42e34dbb55ed6735618224865b8c8050e15eed78bd9c8505b85abb56515c41c4266c985
7
- data.tar.gz: bc60ce604e1b5077251c1c5855a3bd0582950c03c0ed0423b0daf25e8750d6e2f6dfbb32b18e99ace156a1a5232ac8e827377997185bd8c4889abe20e6e7249b
6
+ metadata.gz: fc255ab122c9744ba5222125898d43d12e668d091c5ad7f034693e5ce6d224be21c107caf67261d3bdef03b55f481baa4d0d3912e3c9bff8e1039d8ff12f3f35
7
+ data.tar.gz: 6a7d73a994637c0241d54da38e98a8c25008345622b9410a867da603740c6eb8133e7bb1d36e8dd62d479148e7bf43bc5e0ea0f1a59f63dd1f0b8a7763467e19
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateConflictingSingletonFunctionToUseIndex < ActiveRecord::Migration[5.2]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn () RETURNS trigger AS $$
7
+ BEGIN
8
+ DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton AND locked_by IS NULL;
9
+ RETURN NEW;
10
+ END;
11
+ $$ LANGUAGE plpgsql;
12
+ SQL
13
+ end
14
+
15
+ def down
16
+ execute(<<~SQL)
17
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn () RETURNS trigger AS $$
18
+ BEGIN
19
+ IF EXISTS (SELECT 1 FROM delayed_jobs j2 WHERE j2.singleton=OLD.singleton) THEN
20
+ DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton;
21
+ END IF;
22
+ RETURN NEW;
23
+ END;
24
+ $$ LANGUAGE plpgsql;
25
+ SQL
26
+ end
27
+ end
@@ -0,0 +1,137 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateAfterDeleteTriggerForSingletonIndex < ActiveRecord::Migration[6.0]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
7
+ DECLARE
8
+ running_count integer;
9
+ should_lock boolean;
10
+ should_be_precise boolean;
11
+ update_query varchar;
12
+ skip_locked varchar;
13
+ BEGIN
14
+ IF OLD.strand IS NOT NULL THEN
15
+ should_lock := true;
16
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
17
+
18
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
19
+ running_count := (SELECT COUNT(*) FROM (
20
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
21
+ ) subquery_for_count);
22
+ should_lock := running_count < OLD.max_concurrent;
23
+ END IF;
24
+
25
+ IF should_lock THEN
26
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
27
+ END IF;
28
+
29
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
30
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
31
+ -- singleton we grab isn't already running (which is a simple existence check, since
32
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
33
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
34
+ SELECT id FROM delayed_jobs j2
35
+ WHERE next_in_strand=false AND
36
+ j2.strand=$1.strand AND
37
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
38
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
39
+ LIMIT ';
40
+
41
+ IF should_be_precise THEN
42
+ running_count := (SELECT COUNT(*) FROM (
43
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
44
+ ) s);
45
+ IF running_count < OLD.max_concurrent THEN
46
+ update_query := update_query || '($1.max_concurrent - $2)';
47
+ ELSE
48
+ -- we have too many running already; just bail
49
+ RETURN OLD;
50
+ END IF;
51
+ ELSE
52
+ update_query := update_query || '1';
53
+
54
+ -- n-strands don't require precise ordering; we can make this query more performant
55
+ IF OLD.max_concurrent > 1 THEN
56
+ skip_locked := ' SKIP LOCKED';
57
+ END IF;
58
+ END IF;
59
+
60
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
61
+ EXECUTE update_query USING OLD, running_count;
62
+ ELSIF OLD.singleton IS NOT NULL THEN
63
+ UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
64
+ END IF;
65
+ RETURN OLD;
66
+ END;
67
+ $$ LANGUAGE plpgsql;
68
+ SQL
69
+ end
70
+
71
+ def down
72
+ execute(<<~SQL)
73
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
74
+ DECLARE
75
+ running_count integer;
76
+ should_lock boolean;
77
+ should_be_precise boolean;
78
+ update_query varchar;
79
+ skip_locked varchar;
80
+ BEGIN
81
+ IF OLD.strand IS NOT NULL THEN
82
+ should_lock := true;
83
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
84
+
85
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
86
+ running_count := (SELECT COUNT(*) FROM (
87
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
88
+ ) subquery_for_count);
89
+ should_lock := running_count < OLD.max_concurrent;
90
+ END IF;
91
+
92
+ IF should_lock THEN
93
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
94
+ END IF;
95
+
96
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
97
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
98
+ -- singleton we grab isn't already running (which is a simple existence check, since
99
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
100
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
101
+ SELECT id FROM delayed_jobs j2
102
+ WHERE next_in_strand=false AND
103
+ j2.strand=$1.strand AND
104
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id))
105
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
106
+ LIMIT ';
107
+
108
+ IF should_be_precise THEN
109
+ running_count := (SELECT COUNT(*) FROM (
110
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
111
+ ) s);
112
+ IF running_count < OLD.max_concurrent THEN
113
+ update_query := update_query || '($1.max_concurrent - $2)';
114
+ ELSE
115
+ -- we have too many running already; just bail
116
+ RETURN OLD;
117
+ END IF;
118
+ ELSE
119
+ update_query := update_query || '1';
120
+
121
+ -- n-strands don't require precise ordering; we can make this query more performant
122
+ IF OLD.max_concurrent > 1 THEN
123
+ skip_locked := ' SKIP LOCKED';
124
+ END IF;
125
+ END IF;
126
+
127
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
128
+ EXECUTE update_query USING OLD, running_count;
129
+ ELSIF OLD.singleton IS NOT NULL THEN
130
+ UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false;
131
+ END IF;
132
+ RETURN OLD;
133
+ END;
134
+ $$ LANGUAGE plpgsql;
135
+ SQL
136
+ end
137
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ class UpdateAfterDeleteTriggerForSingletonTransitionCases < ActiveRecord::Migration[6.0]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
68
+
69
+ IF transition THEN
70
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
71
+
72
+ IF next_strand IS NOT NULL THEN
73
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
74
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
75
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
76
+ RETURN OLD;
77
+ END IF;
78
+ END IF;
79
+ ELSIF OLD.strand IS NOT NULL THEN
80
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
81
+ RETURN OLD;
82
+ END IF;
83
+
84
+ -- handles transitioning a singleton from stranded to not stranded --
85
+ -- handles transitioning a singleton from unstranded to stranded --
86
+ -- handles transitioning a singleton from strand A to strand B --
87
+ -- these transitions are a relatively rare case, so we take a shortcut and --
88
+ -- only start the next singleton if its strand does not currently have any running jobs --
89
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
90
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
91
+ SELECT id FROM delayed_jobs j2
92
+ WHERE next_in_strand=false AND
93
+ j2.singleton=OLD.singleton AND
94
+ j2.locked_by IS NULL AND
95
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
96
+ FOR UPDATE
97
+ );
98
+ END IF;
99
+ RETURN OLD;
100
+ END;
101
+ $$ LANGUAGE plpgsql;
102
+ SQL
103
+ end
104
+
105
+ def down
106
+ execute(<<~SQL)
107
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
108
+ DECLARE
109
+ running_count integer;
110
+ should_lock boolean;
111
+ should_be_precise boolean;
112
+ update_query varchar;
113
+ skip_locked varchar;
114
+ BEGIN
115
+ IF OLD.strand IS NOT NULL THEN
116
+ should_lock := true;
117
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
118
+
119
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
120
+ running_count := (SELECT COUNT(*) FROM (
121
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
122
+ ) subquery_for_count);
123
+ should_lock := running_count < OLD.max_concurrent;
124
+ END IF;
125
+
126
+ IF should_lock THEN
127
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
128
+ END IF;
129
+
130
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
131
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
132
+ -- singleton we grab isn't already running (which is a simple existence check, since
133
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
134
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
135
+ SELECT id FROM delayed_jobs j2
136
+ WHERE next_in_strand=false AND
137
+ j2.strand=$1.strand AND
138
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
139
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
140
+ LIMIT ';
141
+
142
+ IF should_be_precise THEN
143
+ running_count := (SELECT COUNT(*) FROM (
144
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
145
+ ) s);
146
+ IF running_count < OLD.max_concurrent THEN
147
+ update_query := update_query || '($1.max_concurrent - $2)';
148
+ ELSE
149
+ -- we have too many running already; just bail
150
+ RETURN OLD;
151
+ END IF;
152
+ ELSE
153
+ update_query := update_query || '1';
154
+
155
+ -- n-strands don't require precise ordering; we can make this query more performant
156
+ IF OLD.max_concurrent > 1 THEN
157
+ skip_locked := ' SKIP LOCKED';
158
+ END IF;
159
+ END IF;
160
+
161
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
162
+ EXECUTE update_query USING OLD, running_count;
163
+ ELSIF OLD.singleton IS NOT NULL THEN
164
+ UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
165
+ END IF;
166
+ RETURN OLD;
167
+ END;
168
+ $$ LANGUAGE plpgsql;
169
+ SQL
170
+ end
171
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionInsert < ActiveRecord::Migration[5.2]
4
+ def change
5
+ reversible do |direction|
6
+ direction.up do
7
+ execute(<<~SQL)
8
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
9
+ BEGIN
10
+ IF NEW.strand IS NOT NULL THEN
11
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
12
+ IF (SELECT COUNT(*) FROM (
13
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
14
+ ) s) = NEW.max_concurrent THEN
15
+ NEW.next_in_strand := false;
16
+ END IF;
17
+ END IF;
18
+ IF NEW.singleton IS NOT NULL THEN
19
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
20
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
21
+ -- rather than doing a seq scan
22
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
23
+ IF FOUND THEN
24
+ NEW.next_in_strand := false;
25
+ END IF;
26
+ END IF;
27
+ RETURN NEW;
28
+ END;
29
+ $$ LANGUAGE plpgsql;
30
+ SQL
31
+ end
32
+ direction.down do
33
+ execute(<<~SQL)
34
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
35
+ BEGIN
36
+ IF NEW.strand IS NOT NULL THEN
37
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
38
+ IF (SELECT COUNT(*) FROM (
39
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
40
+ ) s) = NEW.max_concurrent THEN
41
+ NEW.next_in_strand := false;
42
+ END IF;
43
+ END IF;
44
+ IF NEW.singleton IS NOT NULL THEN
45
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
46
+ -- rather than doing a seq scan
47
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
48
+ IF FOUND THEN
49
+ NEW.next_in_strand := false;
50
+ END IF;
51
+ END IF;
52
+ RETURN NEW;
53
+ END;
54
+ $$ LANGUAGE plpgsql;
55
+ SQL
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionDelete < ActiveRecord::Migration[6.0]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql;
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
171
+
172
+ IF transition THEN
173
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
174
+
175
+ IF next_strand IS NOT NULL THEN
176
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
177
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
178
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
179
+ RETURN OLD;
180
+ END IF;
181
+ END IF;
182
+ ELSIF OLD.strand IS NOT NULL THEN
183
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
184
+ RETURN OLD;
185
+ END IF;
186
+
187
+ -- handles transitioning a singleton from stranded to not stranded --
188
+ -- handles transitioning a singleton from unstranded to stranded --
189
+ -- handles transitioning a singleton from strand A to strand B --
190
+ -- these transitions are a relatively rare case, so we take a shortcut and --
191
+ -- only start the next singleton if its strand does not currently have any running jobs --
192
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
193
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
194
+ SELECT id FROM delayed_jobs j2
195
+ WHERE next_in_strand=false AND
196
+ j2.singleton=OLD.singleton AND
197
+ j2.locked_by IS NULL AND
198
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
199
+ FOR UPDATE
200
+ );
201
+ END IF;
202
+ RETURN OLD;
203
+ END;
204
+ $$ LANGUAGE plpgsql;
205
+ SQL
206
+ end
207
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonUniqueConstraint < ActiveRecord::Migration[5.2]
4
+ disable_ddl_transaction!
5
+
6
+ def up
7
+ rename_index :delayed_jobs, "index_delayed_jobs_on_singleton_not_running", "index_delayed_jobs_on_singleton_not_running_old"
8
+ rename_index :delayed_jobs, "index_delayed_jobs_on_singleton_running", "index_delayed_jobs_on_singleton_running_old"
9
+
10
+ # only one job can be queued in a singleton
11
+ add_index :delayed_jobs,
12
+ :singleton,
13
+ where: "singleton IS NOT NULL AND (locked_by IS NULL OR locked_by = '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}')",
14
+ unique: true,
15
+ name: "index_delayed_jobs_on_singleton_not_running",
16
+ algorithm: :concurrently
17
+
18
+ # only one job can be running for a singleton
19
+ add_index :delayed_jobs,
20
+ :singleton,
21
+ where: "singleton IS NOT NULL AND locked_by IS NOT NULL AND locked_by <> '#{::Delayed::Backend::Base::ON_HOLD_LOCKED_BY}'",
22
+ unique: true,
23
+ name: "index_delayed_jobs_on_singleton_running",
24
+ algorithm: :concurrently
25
+ end
26
+
27
+ def down
28
+ remove_index :delayed_jobs, name: "index_delayed_jobs_on_singleton_not_running_old"
29
+ remove_index :delayed_jobs, name: "index_delayed_jobs_on_singleton_running_old"
30
+ end
31
+ end