switchman-inst-jobs 3.2.2 → 3.2.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb +27 -0
- data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb +137 -0
- data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
- data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
- data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
- data/lib/switchman_inst_jobs/jobs_migrator.rb +7 -1
- data/lib/switchman_inst_jobs/version.rb +1 -1
- metadata +17 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac66ce458b7356068a5f576593f9692b5aa58d608584033a3aca941b31dc4a2d
|
4
|
+
data.tar.gz: c790f410b55117257f9b447987f68617b61222b10bf621385736b096affa25f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5d21bcfec5e444aeaf4bf839c13ab40453ea7b27c780e9153aeea23d5692800ff0e36dac604c7b7dec7c7723aea6a1baba675748a543240a69b37a3486665110
|
7
|
+
data.tar.gz: ea0797fc7294cf1be7b641ad874e4560a69a6a7bc04228ee85f7c284f5fcc1288139b6824c72ae5db1c521b19fa3aec55d36c7831dc819046a8af7f2d623886b
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateConflictingSingletonFunctionToUseIndex < ActiveRecord::Migration[5.2]
|
4
|
+
def up
|
5
|
+
execute(<<~SQL)
|
6
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn')} () RETURNS trigger AS $$
|
7
|
+
BEGIN
|
8
|
+
DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton AND locked_by IS NULL;
|
9
|
+
RETURN NEW;
|
10
|
+
END;
|
11
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
12
|
+
SQL
|
13
|
+
end
|
14
|
+
|
15
|
+
def down
|
16
|
+
execute(<<~SQL)
|
17
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_unlock_delete_conflicting_singletons_row_fn')} () RETURNS trigger AS $$
|
18
|
+
BEGIN
|
19
|
+
IF EXISTS (SELECT 1 FROM delayed_jobs j2 WHERE j2.singleton=OLD.singleton) THEN
|
20
|
+
DELETE FROM delayed_jobs WHERE id<>OLD.id AND singleton=OLD.singleton;
|
21
|
+
END IF;
|
22
|
+
RETURN NEW;
|
23
|
+
END;
|
24
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
25
|
+
SQL
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateAfterDeleteTriggerForSingletonIndex < ActiveRecord::Migration[5.2]
|
4
|
+
def up
|
5
|
+
execute(<<~SQL)
|
6
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
7
|
+
DECLARE
|
8
|
+
running_count integer;
|
9
|
+
should_lock boolean;
|
10
|
+
should_be_precise boolean;
|
11
|
+
update_query varchar;
|
12
|
+
skip_locked varchar;
|
13
|
+
BEGIN
|
14
|
+
IF OLD.strand IS NOT NULL THEN
|
15
|
+
should_lock := true;
|
16
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
17
|
+
|
18
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
19
|
+
running_count := (SELECT COUNT(*) FROM (
|
20
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
21
|
+
) subquery_for_count);
|
22
|
+
should_lock := running_count < OLD.max_concurrent;
|
23
|
+
END IF;
|
24
|
+
|
25
|
+
IF should_lock THEN
|
26
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
27
|
+
END IF;
|
28
|
+
|
29
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
30
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
31
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
32
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
33
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
34
|
+
SELECT id FROM delayed_jobs j2
|
35
|
+
WHERE next_in_strand=false AND
|
36
|
+
j2.strand=$1.strand AND
|
37
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
38
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
39
|
+
LIMIT ';
|
40
|
+
|
41
|
+
IF should_be_precise THEN
|
42
|
+
running_count := (SELECT COUNT(*) FROM (
|
43
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
44
|
+
) s);
|
45
|
+
IF running_count < OLD.max_concurrent THEN
|
46
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
47
|
+
ELSE
|
48
|
+
-- we have too many running already; just bail
|
49
|
+
RETURN OLD;
|
50
|
+
END IF;
|
51
|
+
ELSE
|
52
|
+
update_query := update_query || '1';
|
53
|
+
|
54
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
55
|
+
IF OLD.max_concurrent > 1 THEN
|
56
|
+
skip_locked := ' SKIP LOCKED';
|
57
|
+
END IF;
|
58
|
+
END IF;
|
59
|
+
|
60
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
61
|
+
EXECUTE update_query USING OLD, running_count;
|
62
|
+
ELSIF OLD.singleton IS NOT NULL THEN
|
63
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
|
64
|
+
END IF;
|
65
|
+
RETURN OLD;
|
66
|
+
END;
|
67
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
68
|
+
SQL
|
69
|
+
end
|
70
|
+
|
71
|
+
def down
|
72
|
+
execute(<<~SQL)
|
73
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
74
|
+
DECLARE
|
75
|
+
running_count integer;
|
76
|
+
should_lock boolean;
|
77
|
+
should_be_precise boolean;
|
78
|
+
update_query varchar;
|
79
|
+
skip_locked varchar;
|
80
|
+
BEGIN
|
81
|
+
IF OLD.strand IS NOT NULL THEN
|
82
|
+
should_lock := true;
|
83
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
84
|
+
|
85
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
86
|
+
running_count := (SELECT COUNT(*) FROM (
|
87
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
88
|
+
) subquery_for_count);
|
89
|
+
should_lock := running_count < OLD.max_concurrent;
|
90
|
+
END IF;
|
91
|
+
|
92
|
+
IF should_lock THEN
|
93
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
94
|
+
END IF;
|
95
|
+
|
96
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
97
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
98
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
99
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
100
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
101
|
+
SELECT id FROM delayed_jobs j2
|
102
|
+
WHERE next_in_strand=false AND
|
103
|
+
j2.strand=$1.strand AND
|
104
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id))
|
105
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
106
|
+
LIMIT ';
|
107
|
+
|
108
|
+
IF should_be_precise THEN
|
109
|
+
running_count := (SELECT COUNT(*) FROM (
|
110
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
111
|
+
) s);
|
112
|
+
IF running_count < OLD.max_concurrent THEN
|
113
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
114
|
+
ELSE
|
115
|
+
-- we have too many running already; just bail
|
116
|
+
RETURN OLD;
|
117
|
+
END IF;
|
118
|
+
ELSE
|
119
|
+
update_query := update_query || '1';
|
120
|
+
|
121
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
122
|
+
IF OLD.max_concurrent > 1 THEN
|
123
|
+
skip_locked := ' SKIP LOCKED';
|
124
|
+
END IF;
|
125
|
+
END IF;
|
126
|
+
|
127
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
128
|
+
EXECUTE update_query USING OLD, running_count;
|
129
|
+
ELSIF OLD.singleton IS NOT NULL THEN
|
130
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false;
|
131
|
+
END IF;
|
132
|
+
RETURN OLD;
|
133
|
+
END;
|
134
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
135
|
+
SQL
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class UpdateAfterDeleteTriggerForSingletonTransitionCases < ActiveRecord::Migration[5.2]
|
4
|
+
def up
|
5
|
+
execute(<<~SQL)
|
6
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
7
|
+
DECLARE
|
8
|
+
next_strand varchar;
|
9
|
+
running_count integer;
|
10
|
+
should_lock boolean;
|
11
|
+
should_be_precise boolean;
|
12
|
+
update_query varchar;
|
13
|
+
skip_locked varchar;
|
14
|
+
transition boolean;
|
15
|
+
BEGIN
|
16
|
+
IF OLD.strand IS NOT NULL THEN
|
17
|
+
should_lock := true;
|
18
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
19
|
+
|
20
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
21
|
+
running_count := (SELECT COUNT(*) FROM (
|
22
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
23
|
+
) subquery_for_count);
|
24
|
+
should_lock := running_count < OLD.max_concurrent;
|
25
|
+
END IF;
|
26
|
+
|
27
|
+
IF should_lock THEN
|
28
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
29
|
+
END IF;
|
30
|
+
|
31
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
32
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
33
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
34
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
35
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
36
|
+
SELECT id FROM delayed_jobs j2
|
37
|
+
WHERE next_in_strand=false AND
|
38
|
+
j2.strand=$1.strand AND
|
39
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
40
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
41
|
+
LIMIT ';
|
42
|
+
|
43
|
+
IF should_be_precise THEN
|
44
|
+
running_count := (SELECT COUNT(*) FROM (
|
45
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
46
|
+
) s);
|
47
|
+
IF running_count < OLD.max_concurrent THEN
|
48
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
49
|
+
ELSE
|
50
|
+
-- we have too many running already; just bail
|
51
|
+
RETURN OLD;
|
52
|
+
END IF;
|
53
|
+
ELSE
|
54
|
+
update_query := update_query || '1';
|
55
|
+
|
56
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
57
|
+
IF OLD.max_concurrent > 1 THEN
|
58
|
+
skip_locked := ' SKIP LOCKED';
|
59
|
+
END IF;
|
60
|
+
END IF;
|
61
|
+
|
62
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
63
|
+
EXECUTE update_query USING OLD, running_count;
|
64
|
+
END IF;
|
65
|
+
|
66
|
+
IF OLD.singleton IS NOT NULL THEN
|
67
|
+
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
68
|
+
|
69
|
+
IF transition THEN
|
70
|
+
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
71
|
+
|
72
|
+
IF next_strand IS NOT NULL THEN
|
73
|
+
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
74
|
+
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
75
|
+
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
76
|
+
RETURN OLD;
|
77
|
+
END IF;
|
78
|
+
END IF;
|
79
|
+
ELSIF OLD.strand IS NOT NULL THEN
|
80
|
+
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
81
|
+
RETURN OLD;
|
82
|
+
END IF;
|
83
|
+
|
84
|
+
-- handles transitioning a singleton from stranded to not stranded --
|
85
|
+
-- handles transitioning a singleton from unstranded to stranded --
|
86
|
+
-- handles transitioning a singleton from strand A to strand B --
|
87
|
+
-- these transitions are a relatively rare case, so we take a shortcut and --
|
88
|
+
-- only start the next singleton if its strand does not currently have any running jobs --
|
89
|
+
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
90
|
+
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
91
|
+
SELECT id FROM delayed_jobs j2
|
92
|
+
WHERE next_in_strand=false AND
|
93
|
+
j2.singleton=OLD.singleton AND
|
94
|
+
j2.locked_by IS NULL AND
|
95
|
+
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
96
|
+
FOR UPDATE
|
97
|
+
);
|
98
|
+
END IF;
|
99
|
+
RETURN OLD;
|
100
|
+
END;
|
101
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
102
|
+
SQL
|
103
|
+
end
|
104
|
+
|
105
|
+
def down
|
106
|
+
execute(<<~SQL)
|
107
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
108
|
+
DECLARE
|
109
|
+
running_count integer;
|
110
|
+
should_lock boolean;
|
111
|
+
should_be_precise boolean;
|
112
|
+
update_query varchar;
|
113
|
+
skip_locked varchar;
|
114
|
+
BEGIN
|
115
|
+
IF OLD.strand IS NOT NULL THEN
|
116
|
+
should_lock := true;
|
117
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
118
|
+
|
119
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
120
|
+
running_count := (SELECT COUNT(*) FROM (
|
121
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
122
|
+
) subquery_for_count);
|
123
|
+
should_lock := running_count < OLD.max_concurrent;
|
124
|
+
END IF;
|
125
|
+
|
126
|
+
IF should_lock THEN
|
127
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
128
|
+
END IF;
|
129
|
+
|
130
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
131
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
132
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
133
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
134
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
135
|
+
SELECT id FROM delayed_jobs j2
|
136
|
+
WHERE next_in_strand=false AND
|
137
|
+
j2.strand=$1.strand AND
|
138
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
139
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
140
|
+
LIMIT ';
|
141
|
+
|
142
|
+
IF should_be_precise THEN
|
143
|
+
running_count := (SELECT COUNT(*) FROM (
|
144
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
145
|
+
) s);
|
146
|
+
IF running_count < OLD.max_concurrent THEN
|
147
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
148
|
+
ELSE
|
149
|
+
-- we have too many running already; just bail
|
150
|
+
RETURN OLD;
|
151
|
+
END IF;
|
152
|
+
ELSE
|
153
|
+
update_query := update_query || '1';
|
154
|
+
|
155
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
156
|
+
IF OLD.max_concurrent > 1 THEN
|
157
|
+
skip_locked := ' SKIP LOCKED';
|
158
|
+
END IF;
|
159
|
+
END IF;
|
160
|
+
|
161
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
162
|
+
EXECUTE update_query USING OLD, running_count;
|
163
|
+
ELSIF OLD.singleton IS NOT NULL THEN
|
164
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false AND locked_by IS NULL;
|
165
|
+
END IF;
|
166
|
+
RETURN OLD;
|
167
|
+
END;
|
168
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
169
|
+
SQL
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class FixSingletonRaceConditionInsert < ActiveRecord::Migration[5.2]
|
4
|
+
def change
|
5
|
+
reversible do |direction|
|
6
|
+
direction.up do
|
7
|
+
execute(<<~SQL)
|
8
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
9
|
+
BEGIN
|
10
|
+
IF NEW.strand IS NOT NULL THEN
|
11
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
12
|
+
IF (SELECT COUNT(*) FROM (
|
13
|
+
SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
|
14
|
+
) s) = NEW.max_concurrent THEN
|
15
|
+
NEW.next_in_strand := false;
|
16
|
+
END IF;
|
17
|
+
END IF;
|
18
|
+
IF NEW.singleton IS NOT NULL THEN
|
19
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
|
20
|
+
-- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
|
21
|
+
-- rather than doing a seq scan
|
22
|
+
PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
|
23
|
+
IF FOUND THEN
|
24
|
+
NEW.next_in_strand := false;
|
25
|
+
END IF;
|
26
|
+
END IF;
|
27
|
+
RETURN NEW;
|
28
|
+
END;
|
29
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
30
|
+
SQL
|
31
|
+
end
|
32
|
+
direction.down do
|
33
|
+
execute(<<~SQL)
|
34
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
35
|
+
BEGIN
|
36
|
+
IF NEW.strand IS NOT NULL THEN
|
37
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
38
|
+
IF (SELECT COUNT(*) FROM (
|
39
|
+
SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
|
40
|
+
) s) = NEW.max_concurrent THEN
|
41
|
+
NEW.next_in_strand := false;
|
42
|
+
END IF;
|
43
|
+
END IF;
|
44
|
+
IF NEW.singleton IS NOT NULL THEN
|
45
|
+
-- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
|
46
|
+
-- rather than doing a seq scan
|
47
|
+
PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
|
48
|
+
IF FOUND THEN
|
49
|
+
NEW.next_in_strand := false;
|
50
|
+
END IF;
|
51
|
+
END IF;
|
52
|
+
RETURN NEW;
|
53
|
+
END;
|
54
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
55
|
+
SQL
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,207 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class FixSingletonRaceConditionDelete < ActiveRecord::Migration[5.2]
|
4
|
+
def up
|
5
|
+
execute(<<~SQL)
|
6
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
7
|
+
DECLARE
|
8
|
+
next_strand varchar;
|
9
|
+
running_count integer;
|
10
|
+
should_lock boolean;
|
11
|
+
should_be_precise boolean;
|
12
|
+
update_query varchar;
|
13
|
+
skip_locked varchar;
|
14
|
+
transition boolean;
|
15
|
+
BEGIN
|
16
|
+
IF OLD.strand IS NOT NULL THEN
|
17
|
+
should_lock := true;
|
18
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
19
|
+
|
20
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
21
|
+
running_count := (SELECT COUNT(*) FROM (
|
22
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
23
|
+
) subquery_for_count);
|
24
|
+
should_lock := running_count < OLD.max_concurrent;
|
25
|
+
END IF;
|
26
|
+
|
27
|
+
IF should_lock THEN
|
28
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
29
|
+
END IF;
|
30
|
+
|
31
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
32
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
33
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
34
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
35
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
36
|
+
SELECT id FROM delayed_jobs j2
|
37
|
+
WHERE next_in_strand=false AND
|
38
|
+
j2.strand=$1.strand AND
|
39
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
40
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
41
|
+
LIMIT ';
|
42
|
+
|
43
|
+
IF should_be_precise THEN
|
44
|
+
running_count := (SELECT COUNT(*) FROM (
|
45
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
46
|
+
) s);
|
47
|
+
IF running_count < OLD.max_concurrent THEN
|
48
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
49
|
+
ELSE
|
50
|
+
-- we have too many running already; just bail
|
51
|
+
RETURN OLD;
|
52
|
+
END IF;
|
53
|
+
ELSE
|
54
|
+
update_query := update_query || '1';
|
55
|
+
|
56
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
57
|
+
IF OLD.max_concurrent > 1 THEN
|
58
|
+
skip_locked := ' SKIP LOCKED';
|
59
|
+
END IF;
|
60
|
+
END IF;
|
61
|
+
|
62
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
63
|
+
EXECUTE update_query USING OLD, running_count;
|
64
|
+
END IF;
|
65
|
+
|
66
|
+
IF OLD.singleton IS NOT NULL THEN
|
67
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
|
68
|
+
|
69
|
+
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
70
|
+
|
71
|
+
IF transition THEN
|
72
|
+
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
73
|
+
|
74
|
+
IF next_strand IS NOT NULL THEN
|
75
|
+
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
76
|
+
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
77
|
+
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
78
|
+
RETURN OLD;
|
79
|
+
END IF;
|
80
|
+
END IF;
|
81
|
+
ELSIF OLD.strand IS NOT NULL THEN
|
82
|
+
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
83
|
+
RETURN OLD;
|
84
|
+
END IF;
|
85
|
+
|
86
|
+
-- handles transitioning a singleton from stranded to not stranded --
|
87
|
+
-- handles transitioning a singleton from unstranded to stranded --
|
88
|
+
-- handles transitioning a singleton from strand A to strand B --
|
89
|
+
-- these transitions are a relatively rare case, so we take a shortcut and --
|
90
|
+
-- only start the next singleton if its strand does not currently have any running jobs --
|
91
|
+
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
92
|
+
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
93
|
+
SELECT id FROM delayed_jobs j2
|
94
|
+
WHERE next_in_strand=false AND
|
95
|
+
j2.singleton=OLD.singleton AND
|
96
|
+
j2.locked_by IS NULL AND
|
97
|
+
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
98
|
+
FOR UPDATE
|
99
|
+
);
|
100
|
+
END IF;
|
101
|
+
RETURN OLD;
|
102
|
+
END;
|
103
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
104
|
+
SQL
|
105
|
+
end
|
106
|
+
|
107
|
+
def down
|
108
|
+
execute(<<~SQL)
|
109
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
110
|
+
DECLARE
|
111
|
+
next_strand varchar;
|
112
|
+
running_count integer;
|
113
|
+
should_lock boolean;
|
114
|
+
should_be_precise boolean;
|
115
|
+
update_query varchar;
|
116
|
+
skip_locked varchar;
|
117
|
+
transition boolean;
|
118
|
+
BEGIN
|
119
|
+
IF OLD.strand IS NOT NULL THEN
|
120
|
+
should_lock := true;
|
121
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
122
|
+
|
123
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
124
|
+
running_count := (SELECT COUNT(*) FROM (
|
125
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
126
|
+
) subquery_for_count);
|
127
|
+
should_lock := running_count < OLD.max_concurrent;
|
128
|
+
END IF;
|
129
|
+
|
130
|
+
IF should_lock THEN
|
131
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
132
|
+
END IF;
|
133
|
+
|
134
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
135
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
136
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
137
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
138
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
139
|
+
SELECT id FROM delayed_jobs j2
|
140
|
+
WHERE next_in_strand=false AND
|
141
|
+
j2.strand=$1.strand AND
|
142
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
|
143
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
144
|
+
LIMIT ';
|
145
|
+
|
146
|
+
IF should_be_precise THEN
|
147
|
+
running_count := (SELECT COUNT(*) FROM (
|
148
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
149
|
+
) s);
|
150
|
+
IF running_count < OLD.max_concurrent THEN
|
151
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
152
|
+
ELSE
|
153
|
+
-- we have too many running already; just bail
|
154
|
+
RETURN OLD;
|
155
|
+
END IF;
|
156
|
+
ELSE
|
157
|
+
update_query := update_query || '1';
|
158
|
+
|
159
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
160
|
+
IF OLD.max_concurrent > 1 THEN
|
161
|
+
skip_locked := ' SKIP LOCKED';
|
162
|
+
END IF;
|
163
|
+
END IF;
|
164
|
+
|
165
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
166
|
+
EXECUTE update_query USING OLD, running_count;
|
167
|
+
END IF;
|
168
|
+
|
169
|
+
IF OLD.singleton IS NOT NULL THEN
|
170
|
+
transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
|
171
|
+
|
172
|
+
IF transition THEN
|
173
|
+
next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
|
174
|
+
|
175
|
+
IF next_strand IS NOT NULL THEN
|
176
|
+
-- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
|
177
|
+
IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
|
178
|
+
-- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
|
179
|
+
RETURN OLD;
|
180
|
+
END IF;
|
181
|
+
END IF;
|
182
|
+
ELSIF OLD.strand IS NOT NULL THEN
|
183
|
+
-- if there is no transition and there is a strand then we have already handled this singleton in the case above --
|
184
|
+
RETURN OLD;
|
185
|
+
END IF;
|
186
|
+
|
187
|
+
-- handles transitioning a singleton from stranded to not stranded --
|
188
|
+
-- handles transitioning a singleton from unstranded to stranded --
|
189
|
+
-- handles transitioning a singleton from strand A to strand B --
|
190
|
+
-- these transitions are a relatively rare case, so we take a shortcut and --
|
191
|
+
-- only start the next singleton if its strand does not currently have any running jobs --
|
192
|
+
-- if it does, the next stranded job that finishes will start this singleton if it can --
|
193
|
+
UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
194
|
+
SELECT id FROM delayed_jobs j2
|
195
|
+
WHERE next_in_strand=false AND
|
196
|
+
j2.singleton=OLD.singleton AND
|
197
|
+
j2.locked_by IS NULL AND
|
198
|
+
(j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
|
199
|
+
FOR UPDATE
|
200
|
+
);
|
201
|
+
END IF;
|
202
|
+
RETURN OLD;
|
203
|
+
END;
|
204
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
205
|
+
SQL
|
206
|
+
end
|
207
|
+
end
|
@@ -277,7 +277,10 @@ module SwitchmanInstJobs
|
|
277
277
|
connection = ::Delayed::Job.connection
|
278
278
|
quoted_keys = keys.map { |k| connection.quote_column_name(k) }.join(', ')
|
279
279
|
|
280
|
-
connection.execute
|
280
|
+
connection.execute 'DROP TABLE IF EXISTS delayed_jobs_bulk_copy'
|
281
|
+
connection.execute "CREATE TEMPORARY TABLE delayed_jobs_bulk_copy
|
282
|
+
(LIKE #{::Delayed::Job.quoted_table_name} INCLUDING DEFAULTS)"
|
283
|
+
connection.execute "COPY delayed_jobs_bulk_copy (#{quoted_keys}) FROM STDIN"
|
281
284
|
records.map do |record|
|
282
285
|
connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
|
283
286
|
end
|
@@ -289,6 +292,9 @@ module SwitchmanInstJobs
|
|
289
292
|
rescue StandardError => e
|
290
293
|
raise connection.send(:translate_exception, e, 'COPY FROM STDIN')
|
291
294
|
end
|
295
|
+
connection.execute "INSERT INTO #{::Delayed::Job.quoted_table_name} (#{quoted_keys})
|
296
|
+
SELECT #{quoted_keys} FROM delayed_jobs_bulk_copy
|
297
|
+
ON CONFLICT (singleton) WHERE singleton IS NOT NULL AND locked_by IS NULL DO NOTHING"
|
292
298
|
result.cmd_tuples
|
293
299
|
end
|
294
300
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: switchman-inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Petty
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inst-jobs
|
@@ -19,7 +19,7 @@ dependencies:
|
|
19
19
|
version: 2.4.9
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
|
-
version: '
|
22
|
+
version: '4.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -29,7 +29,7 @@ dependencies:
|
|
29
29
|
version: 2.4.9
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
|
-
version: '
|
32
|
+
version: '4.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: parallel
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -260,7 +260,7 @@ dependencies:
|
|
260
260
|
- - "~>"
|
261
261
|
- !ruby/object:Gem::Version
|
262
262
|
version: '1.4'
|
263
|
-
description:
|
263
|
+
description:
|
264
264
|
email:
|
265
265
|
- bpetty@instructure.com
|
266
266
|
executables: []
|
@@ -300,6 +300,11 @@ files:
|
|
300
300
|
- db/migrate/20210812210128_add_singleton_column.rb
|
301
301
|
- db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb
|
302
302
|
- db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb
|
303
|
+
- db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb
|
304
|
+
- db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb
|
305
|
+
- db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
|
306
|
+
- db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
|
307
|
+
- db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
|
303
308
|
- lib/switchman-inst-jobs.rb
|
304
309
|
- lib/switchman_inst_jobs.rb
|
305
310
|
- lib/switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter.rb
|
@@ -323,8 +328,10 @@ files:
|
|
323
328
|
homepage: https://github.com/instructure/switchman-inst-jobs
|
324
329
|
licenses:
|
325
330
|
- MIT
|
326
|
-
metadata:
|
327
|
-
|
331
|
+
metadata:
|
332
|
+
allowed_push_host: https://rubygems.org
|
333
|
+
rubygems_mfa_required: 'true'
|
334
|
+
post_install_message:
|
328
335
|
rdoc_options: []
|
329
336
|
require_paths:
|
330
337
|
- lib
|
@@ -339,8 +346,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
339
346
|
- !ruby/object:Gem::Version
|
340
347
|
version: '0'
|
341
348
|
requirements: []
|
342
|
-
rubygems_version: 3.
|
343
|
-
signing_key:
|
349
|
+
rubygems_version: 3.1.4
|
350
|
+
signing_key:
|
344
351
|
specification_version: 4
|
345
352
|
summary: Switchman and Instructure Jobs compatibility gem.
|
346
353
|
test_files: []
|