switchman-inst-jobs 3.2.6 → 3.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fc596dd957a48e77a57106b08a3474fa39b2127f4c2a929fd363b3208e2a2e59
4
- data.tar.gz: 39157783192cf59506d320ec36e7426e2239dd6de248a32c60cfcbf8eedf667e
3
+ metadata.gz: ac66ce458b7356068a5f576593f9692b5aa58d608584033a3aca941b31dc4a2d
4
+ data.tar.gz: c790f410b55117257f9b447987f68617b61222b10bf621385736b096affa25f8
5
5
  SHA512:
6
- metadata.gz: 5b256e4ac3fe9a779faada8b57df4725f1a227af4fddff925277ca5b68fa476ec5fd51bbbf96ff996e393089d162f41c043a3225587a79e4df4096d9ecdc4168
7
- data.tar.gz: 5b43c7df21c598d1e9838e62e982ebe1ec31ab7a3daafa7b48bfafb882f3b716ced376eca1b2b80c27895e58a04bbd684fcfbed437712efcb20aa5d5ef780d55
6
+ metadata.gz: 5d21bcfec5e444aeaf4bf839c13ab40453ea7b27c780e9153aeea23d5692800ff0e36dac604c7b7dec7c7723aea6a1baba675748a543240a69b37a3486665110
7
+ data.tar.gz: ea0797fc7294cf1be7b641ad874e4560a69a6a7bc04228ee85f7c284f5fcc1288139b6824c72ae5db1c521b19fa3aec55d36c7831dc819046a8af7f2d623886b
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionInsert < ActiveRecord::Migration[5.2]
4
+ def change
5
+ reversible do |direction|
6
+ direction.up do
7
+ execute(<<~SQL)
8
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
9
+ BEGIN
10
+ IF NEW.strand IS NOT NULL THEN
11
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
12
+ IF (SELECT COUNT(*) FROM (
13
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
14
+ ) s) = NEW.max_concurrent THEN
15
+ NEW.next_in_strand := false;
16
+ END IF;
17
+ END IF;
18
+ IF NEW.singleton IS NOT NULL THEN
19
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
20
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
21
+ -- rather than doing a seq scan
22
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
23
+ IF FOUND THEN
24
+ NEW.next_in_strand := false;
25
+ END IF;
26
+ END IF;
27
+ RETURN NEW;
28
+ END;
29
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
30
+ SQL
31
+ end
32
+ direction.down do
33
+ execute(<<~SQL)
34
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
35
+ BEGIN
36
+ IF NEW.strand IS NOT NULL THEN
37
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
38
+ IF (SELECT COUNT(*) FROM (
39
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
40
+ ) s) = NEW.max_concurrent THEN
41
+ NEW.next_in_strand := false;
42
+ END IF;
43
+ END IF;
44
+ IF NEW.singleton IS NOT NULL THEN
45
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
46
+ -- rather than doing a seq scan
47
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
48
+ IF FOUND THEN
49
+ NEW.next_in_strand := false;
50
+ END IF;
51
+ END IF;
52
+ RETURN NEW;
53
+ END;
54
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
55
+ SQL
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionDelete < ActiveRecord::Migration[5.2]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
171
+
172
+ IF transition THEN
173
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
174
+
175
+ IF next_strand IS NOT NULL THEN
176
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
177
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
178
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
179
+ RETURN OLD;
180
+ END IF;
181
+ END IF;
182
+ ELSIF OLD.strand IS NOT NULL THEN
183
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
184
+ RETURN OLD;
185
+ END IF;
186
+
187
+ -- handles transitioning a singleton from stranded to not stranded --
188
+ -- handles transitioning a singleton from unstranded to stranded --
189
+ -- handles transitioning a singleton from strand A to strand B --
190
+ -- these transitions are a relatively rare case, so we take a shortcut and --
191
+ -- only start the next singleton if its strand does not currently have any running jobs --
192
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
193
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
194
+ SELECT id FROM delayed_jobs j2
195
+ WHERE next_in_strand=false AND
196
+ j2.singleton=OLD.singleton AND
197
+ j2.locked_by IS NULL AND
198
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
199
+ FOR UPDATE
200
+ );
201
+ END IF;
202
+ RETURN OLD;
203
+ END;
204
+ $$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
205
+ SQL
206
+ end
207
+ end
@@ -1,3 +1,3 @@
1
1
  module SwitchmanInstJobs
2
- VERSION = '3.2.6'.freeze
2
+ VERSION = '3.2.7'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: switchman-inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Petty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-09 00:00:00.000000000 Z
11
+ date: 2021-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inst-jobs
@@ -303,6 +303,8 @@ files:
303
303
  - db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb
304
304
  - db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb
305
305
  - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
306
+ - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
307
+ - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
306
308
  - lib/switchman-inst-jobs.rb
307
309
  - lib/switchman_inst_jobs.rb
308
310
  - lib/switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter.rb