inst-jobs 3.0.5 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11059ce779a0ff644edcae25335e23b7ad2a4448dfb48e8cbc95295f3e6c468c
4
- data.tar.gz: 84dfa8a1185219823013e1363190b0469e397dd423105eb29a205fea1e7207fa
3
+ metadata.gz: 245e01e85640f50351b2bca5ae7b7391c5373c0f8bc5128f1294d9e7566b5346
4
+ data.tar.gz: 38622bfe41a62682e198119a6d86e10210e06341d1a045f30cdabd6281600566
5
5
  SHA512:
6
- metadata.gz: 9e4c5291673edccd5760cd11d087102e0d8829d25793a6ca4f9f3255336b3abb1ce2bcf7426a79a76d0f373b44ec2b0152c395974f78a4552942f7a858d9d499
7
- data.tar.gz: 77aea9b18c3492e2b98f26a23c0591c8da9a31a34c0736868f00d54b3eadff77f7b2bbeb00b91b0819f93bffcefb7a26d1f2c829ebd82bc6b58a5c5a7bb01b16
6
+ metadata.gz: ced7dd5a9cfe21b545d1ade7d05efabd53de6eff97589626dd38ce650b1da641d93811b48848339bed3057f3e873c77f6c6b4d5d3d29640a68694d40f20402b2
7
+ data.tar.gz: 82f0bfad222bef95dec0c154e6a649c184ba4f09c122db0a52e6f5d17f30ce836987def104d4b49fab4b293838e0ba37034d1cad3fdb4e2df2ebe6c7a0556ec3
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionInsert < ActiveRecord::Migration[5.2]
4
+ def change
5
+ reversible do |direction|
6
+ direction.up do
7
+ execute(<<~SQL)
8
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
9
+ BEGIN
10
+ IF NEW.strand IS NOT NULL THEN
11
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
12
+ IF (SELECT COUNT(*) FROM (
13
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
14
+ ) s) = NEW.max_concurrent THEN
15
+ NEW.next_in_strand := false;
16
+ END IF;
17
+ END IF;
18
+ IF NEW.singleton IS NOT NULL THEN
19
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', NEW.singleton)));
20
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
21
+ -- rather than doing a seq scan
22
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
23
+ IF FOUND THEN
24
+ NEW.next_in_strand := false;
25
+ END IF;
26
+ END IF;
27
+ RETURN NEW;
28
+ END;
29
+ $$ LANGUAGE plpgsql;
30
+ SQL
31
+ end
32
+ direction.down do
33
+ execute(<<~SQL)
34
+ CREATE OR REPLACE FUNCTION delayed_jobs_before_insert_row_tr_fn () RETURNS trigger AS $$
35
+ BEGIN
36
+ IF NEW.strand IS NOT NULL THEN
37
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
38
+ IF (SELECT COUNT(*) FROM (
39
+ SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
40
+ ) s) = NEW.max_concurrent THEN
41
+ NEW.next_in_strand := false;
42
+ END IF;
43
+ END IF;
44
+ IF NEW.singleton IS NOT NULL THEN
45
+ -- this condition seems silly, but it forces postgres to use the two partial indexes on singleton,
46
+ -- rather than doing a seq scan
47
+ PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton AND (locked_by IS NULL OR locked_by IS NOT NULL);
48
+ IF FOUND THEN
49
+ NEW.next_in_strand := false;
50
+ END IF;
51
+ END IF;
52
+ RETURN NEW;
53
+ END;
54
+ $$ LANGUAGE plpgsql;
55
+ SQL
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,207 @@
1
+ # frozen_string_literal: true
2
+
3
+ class FixSingletonRaceConditionDelete < ActiveRecord::Migration[6.0]
4
+ def up
5
+ execute(<<~SQL)
6
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
7
+ DECLARE
8
+ next_strand varchar;
9
+ running_count integer;
10
+ should_lock boolean;
11
+ should_be_precise boolean;
12
+ update_query varchar;
13
+ skip_locked varchar;
14
+ transition boolean;
15
+ BEGIN
16
+ IF OLD.strand IS NOT NULL THEN
17
+ should_lock := true;
18
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
19
+
20
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
21
+ running_count := (SELECT COUNT(*) FROM (
22
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
23
+ ) subquery_for_count);
24
+ should_lock := running_count < OLD.max_concurrent;
25
+ END IF;
26
+
27
+ IF should_lock THEN
28
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
29
+ END IF;
30
+
31
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
32
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
33
+ -- singleton we grab isn't already running (which is a simple existence check, since
34
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
35
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
36
+ SELECT id FROM delayed_jobs j2
37
+ WHERE next_in_strand=false AND
38
+ j2.strand=$1.strand AND
39
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
40
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
41
+ LIMIT ';
42
+
43
+ IF should_be_precise THEN
44
+ running_count := (SELECT COUNT(*) FROM (
45
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
46
+ ) s);
47
+ IF running_count < OLD.max_concurrent THEN
48
+ update_query := update_query || '($1.max_concurrent - $2)';
49
+ ELSE
50
+ -- we have too many running already; just bail
51
+ RETURN OLD;
52
+ END IF;
53
+ ELSE
54
+ update_query := update_query || '1';
55
+
56
+ -- n-strands don't require precise ordering; we can make this query more performant
57
+ IF OLD.max_concurrent > 1 THEN
58
+ skip_locked := ' SKIP LOCKED';
59
+ END IF;
60
+ END IF;
61
+
62
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
63
+ EXECUTE update_query USING OLD, running_count;
64
+ END IF;
65
+
66
+ IF OLD.singleton IS NOT NULL THEN
67
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(CONCAT('singleton:', OLD.singleton)));
68
+
69
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
70
+
71
+ IF transition THEN
72
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
73
+
74
+ IF next_strand IS NOT NULL THEN
75
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
76
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
77
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
78
+ RETURN OLD;
79
+ END IF;
80
+ END IF;
81
+ ELSIF OLD.strand IS NOT NULL THEN
82
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
83
+ RETURN OLD;
84
+ END IF;
85
+
86
+ -- handles transitioning a singleton from stranded to not stranded --
87
+ -- handles transitioning a singleton from unstranded to stranded --
88
+ -- handles transitioning a singleton from strand A to strand B --
89
+ -- these transitions are a relatively rare case, so we take a shortcut and --
90
+ -- only start the next singleton if its strand does not currently have any running jobs --
91
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
92
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
93
+ SELECT id FROM delayed_jobs j2
94
+ WHERE next_in_strand=false AND
95
+ j2.singleton=OLD.singleton AND
96
+ j2.locked_by IS NULL AND
97
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
98
+ FOR UPDATE
99
+ );
100
+ END IF;
101
+ RETURN OLD;
102
+ END;
103
+ $$ LANGUAGE plpgsql;
104
+ SQL
105
+ end
106
+
107
+ def down
108
+ execute(<<~SQL)
109
+ CREATE OR REPLACE FUNCTION delayed_jobs_after_delete_row_tr_fn () RETURNS trigger AS $$
110
+ DECLARE
111
+ next_strand varchar;
112
+ running_count integer;
113
+ should_lock boolean;
114
+ should_be_precise boolean;
115
+ update_query varchar;
116
+ skip_locked varchar;
117
+ transition boolean;
118
+ BEGIN
119
+ IF OLD.strand IS NOT NULL THEN
120
+ should_lock := true;
121
+ should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
122
+
123
+ IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
124
+ running_count := (SELECT COUNT(*) FROM (
125
+ SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
126
+ ) subquery_for_count);
127
+ should_lock := running_count < OLD.max_concurrent;
128
+ END IF;
129
+
130
+ IF should_lock THEN
131
+ PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
132
+ END IF;
133
+
134
+ -- note that we don't really care if the row we're deleting has a singleton, or if it even
135
+ -- matches the row(s) we're going to update. we just need to make sure that whatever
136
+ -- singleton we grab isn't already running (which is a simple existence check, since
137
+ -- the unique indexes ensure there is at most one singleton running, and one queued)
138
+ update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
139
+ SELECT id FROM delayed_jobs j2
140
+ WHERE next_in_strand=false AND
141
+ j2.strand=$1.strand AND
142
+ (j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id AND (j3.locked_by IS NULL OR j3.locked_by IS NOT NULL)))
143
+ ORDER BY j2.strand_order_override ASC, j2.id ASC
144
+ LIMIT ';
145
+
146
+ IF should_be_precise THEN
147
+ running_count := (SELECT COUNT(*) FROM (
148
+ SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
149
+ ) s);
150
+ IF running_count < OLD.max_concurrent THEN
151
+ update_query := update_query || '($1.max_concurrent - $2)';
152
+ ELSE
153
+ -- we have too many running already; just bail
154
+ RETURN OLD;
155
+ END IF;
156
+ ELSE
157
+ update_query := update_query || '1';
158
+
159
+ -- n-strands don't require precise ordering; we can make this query more performant
160
+ IF OLD.max_concurrent > 1 THEN
161
+ skip_locked := ' SKIP LOCKED';
162
+ END IF;
163
+ END IF;
164
+
165
+ update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
166
+ EXECUTE update_query USING OLD, running_count;
167
+ END IF;
168
+
169
+ IF OLD.singleton IS NOT NULL THEN
170
+ transition := EXISTS (SELECT 1 FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL);
171
+
172
+ IF transition THEN
173
+ next_strand := (SELECT j1.strand FROM delayed_jobs AS j1 WHERE j1.singleton = OLD.singleton AND j1.strand IS DISTINCT FROM OLD.strand AND locked_by IS NULL AND j1.strand IS NOT NULL LIMIT 1);
174
+
175
+ IF next_strand IS NOT NULL THEN
176
+ -- if the singleton has a new strand defined, we need to lock it to ensure we obey n_strand constraints --
177
+ IF NOT pg_try_advisory_xact_lock(half_md5_as_bigint(next_strand)) THEN
178
+ -- a failure to acquire the lock means that another process already has it and will thus handle this singleton --
179
+ RETURN OLD;
180
+ END IF;
181
+ END IF;
182
+ ELSIF OLD.strand IS NOT NULL THEN
183
+ -- if there is no transition and there is a strand then we have already handled this singleton in the case above --
184
+ RETURN OLD;
185
+ END IF;
186
+
187
+ -- handles transitioning a singleton from stranded to not stranded --
188
+ -- handles transitioning a singleton from unstranded to stranded --
189
+ -- handles transitioning a singleton from strand A to strand B --
190
+ -- these transitions are a relatively rare case, so we take a shortcut and --
191
+ -- only start the next singleton if its strand does not currently have any running jobs --
192
+ -- if it does, the next stranded job that finishes will start this singleton if it can --
193
+ UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
194
+ SELECT id FROM delayed_jobs j2
195
+ WHERE next_in_strand=false AND
196
+ j2.singleton=OLD.singleton AND
197
+ j2.locked_by IS NULL AND
198
+ (j2.strand IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.strand=j2.strand AND j3.id<>j2.id))
199
+ FOR UPDATE
200
+ );
201
+ END IF;
202
+ RETURN OLD;
203
+ END;
204
+ $$ LANGUAGE plpgsql;
205
+ SQL
206
+ end
207
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Delayed
4
- VERSION = "3.0.5"
4
+ VERSION = "3.0.6"
5
5
  end
@@ -389,14 +389,10 @@ shared_examples_for "a backend" do
389
389
  expect(job1.reload.handler).to include("ErrorJob")
390
390
  end
391
391
 
392
- context "next_in_strand management - deadlocks", non_transactional: true do
392
+ context "next_in_strand management - deadlocks and race conditions", non_transactional: true do
393
393
  # The following unit tests are fairly slow and non-deterministic. It may be
394
394
  # easier to make them fail quicker and more consistently by adding a random
395
395
  # sleep into the appropriate trigger(s).
396
- #
397
- # Example:
398
- # PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
399
- # PERFORM pg_sleep(random() * 2);
400
396
 
401
397
  def loop_secs(val)
402
398
  loop_start = Time.now.utc
@@ -408,46 +404,119 @@ shared_examples_for "a backend" do
408
404
  end
409
405
  end
410
406
 
411
- it "doesn't deadlock when transitioning from strand_a to strand_b" do
407
+ def loop_until_found(params)
408
+ found = false
409
+
410
+ loop_secs(10.seconds) do
411
+ if Delayed::Job.exists?(**params)
412
+ found = true
413
+ break
414
+ end
415
+ end
416
+
417
+ raise "timed out waiting for condition" unless found
418
+ end
419
+
420
+ def thread_body
421
+ yield
422
+ rescue
423
+ Thread.current.thread_variable_set(:fail, true)
424
+ raise
425
+ end
426
+
427
+ it "doesn't orphan the singleton when two are queued consecutively" do
428
+ # In order to reproduce this one efficiently, you'll probably want to add
429
+ # a sleep within delayed_jobs_before_insert_row_tr_fn.
430
+ # IF NEW.singleton IS NOT NULL THEN
431
+ # ...
432
+ # PERFORM pg_sleep(random() * 2);
433
+ # END IF;
434
+
412
435
  threads = []
413
436
 
414
- def thread_body(j1_params, j2_params)
415
- loop do
416
- j1 = create_job(**j1_params)
417
- j2 = create_job(**j2_params)
437
+ threads << Thread.new do
438
+ thread_body do
439
+ loop do
440
+ create_job(singleton: "singleton_job")
441
+ create_job(singleton: "singleton_job")
442
+ end
443
+ end
444
+ end
418
445
 
419
- expect(j1.reload.next_in_strand).to eq(true)
420
- expect(j2.reload.next_in_strand).to eq(false)
446
+ threads << Thread.new do
447
+ thread_body do
448
+ loop do
449
+ Delayed::Job.get_and_lock_next_available("w1")&.destroy
450
+ end
451
+ end
452
+ end
421
453
 
422
- j1.delete
454
+ threads << Thread.new do
455
+ thread_body do
456
+ loop do
457
+ loop_until_found(singleton: "singleton_job", next_in_strand: true)
458
+ end
459
+ end
460
+ end
423
461
 
424
- # In case we couldn't acquire a lock, we actually need to wait for
425
- # the other thread to set this to true.
426
- loop_secs(10.seconds) do
427
- break if j2.reload.next_in_strand
462
+ begin
463
+ loop_secs(60.seconds) do
464
+ if threads.any? { |x| x.thread_variable_get(:fail) }
465
+ raise "at least one job became orphaned or other error"
428
466
  end
467
+ end
468
+ ensure
469
+ threads.each(&:kill)
470
+ threads.each(&:join)
471
+ end
472
+ end
473
+
474
+ it "doesn't deadlock when transitioning from strand_a to strand_b" do
475
+ # In order to reproduce this one efficiently, you'll probably want to add
476
+ # a sleep within delayed_jobs_after_delete_row_tr_fn.
477
+ # PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
478
+ # PERFORM pg_sleep(random() * 2);
429
479
 
430
- expect(j2.reload.next_in_strand).to eq(true)
480
+ threads = []
481
+
482
+ threads << Thread.new do
483
+ thread_body do
484
+ loop do
485
+ j1 = create_job(singleton: "myjobs", strand: "myjobs2", locked_by: "w1")
486
+ j2 = create_job(singleton: "myjobs", strand: "myjobs")
431
487
 
432
- j2.delete
488
+ j1.delete
489
+ j2.delete
490
+ end
433
491
  end
434
- rescue
435
- Thread.current.thread_variable_set(:fail, true)
436
- raise
437
492
  end
438
493
 
439
494
  threads << Thread.new do
440
- thread_body(
441
- { singleton: "myjobs", strand: "myjobs2", locked_by: "w1" },
442
- { singleton: "myjobs", strand: "myjobs" }
443
- )
495
+ thread_body do
496
+ loop do
497
+ j1 = create_job(singleton: "myjobs2", strand: "myjobs", locked_by: "w1")
498
+ j2 = create_job(singleton: "myjobs2", strand: "myjobs2")
499
+
500
+ j1.delete
501
+ j2.delete
502
+ end
503
+ end
444
504
  end
445
505
 
446
506
  threads << Thread.new do
447
- thread_body(
448
- { singleton: "myjobs2", strand: "myjobs", locked_by: "w1" },
449
- { singleton: "myjobs2", strand: "myjobs2" }
450
- )
507
+ thread_body do
508
+ loop do
509
+ loop_until_found(singleton: "myjobs", next_in_strand: true)
510
+ end
511
+ end
512
+ end
513
+
514
+ threads << Thread.new do
515
+ thread_body do
516
+ loop do
517
+ loop_until_found(singleton: "myjobs2", next_in_strand: true)
518
+ end
519
+ end
451
520
  end
452
521
 
453
522
  begin
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Cody Cutrer
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2021-12-09 00:00:00.000000000 Z
13
+ date: 2021-12-20 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
@@ -467,6 +467,8 @@ files:
467
467
  - db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb
468
468
  - db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb
469
469
  - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
470
+ - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
471
+ - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
470
472
  - exe/inst_jobs
471
473
  - lib/delayed/backend/active_record.rb
472
474
  - lib/delayed/backend/base.rb