switchman-inst-jobs 3.1.0 → 3.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/db/migrate/20210809145804_add_n_strand_index.rb +12 -0
- data/db/migrate/20210812210128_add_singleton_column.rb +203 -0
- data/lib/switchman_inst_jobs/delayed/backend/base.rb +5 -4
- data/lib/switchman_inst_jobs/delayed/settings.rb +9 -0
- data/lib/switchman_inst_jobs/delayed/worker/health_check.rb +15 -14
- data/lib/switchman_inst_jobs/jobs_migrator.rb +49 -40
- data/lib/switchman_inst_jobs/version.rb +1 -1
- data/lib/switchman_inst_jobs.rb +1 -4
- metadata +14 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9eece1d5d1e3c141c242aa24a81d5582a34639f09ea55e9826ed95504e442a22
|
4
|
+
data.tar.gz: 3e45e49e403dc6b0d60c7803c84b23d48aaf05ce675b3d40e275d9a83b562b28
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5f209f5e754d2ec9d31af0afe618894c45b10f0be2c8d04e8fdbf4565c4f70ec4a3546fb3c86520832e5b5f102fd8fb2572ccaacaa8a2f332422d8b4fc93278
|
7
|
+
data.tar.gz: c59a09f62c4c2247dd3b8a62520f94490d9c1b84988f131f4c19abb759eb5f20397553b68617c2fe00d5a15c1f169b2e1ceff4ad2fa3882487d53ec297d71032
|
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class AddNStrandIndex < ActiveRecord::Migration[5.2]
|
4
|
+
disable_ddl_transaction!
|
5
|
+
|
6
|
+
def change
|
7
|
+
add_index :delayed_jobs, %i[strand next_in_strand id],
|
8
|
+
name: 'n_strand_index',
|
9
|
+
where: 'strand IS NOT NULL',
|
10
|
+
algorithm: :concurrently
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class AddSingletonColumn < ActiveRecord::Migration[5.2]
|
4
|
+
disable_ddl_transaction!
|
5
|
+
|
6
|
+
def change
|
7
|
+
add_column :delayed_jobs, :singleton, :string, if_not_exists: true
|
8
|
+
add_column :failed_jobs, :singleton, :string, if_not_exists: true
|
9
|
+
# only one job can be queued in a singleton
|
10
|
+
add_index :delayed_jobs,
|
11
|
+
:singleton,
|
12
|
+
where: 'singleton IS NOT NULL AND locked_by IS NULL',
|
13
|
+
unique: true,
|
14
|
+
name: 'index_delayed_jobs_on_singleton_not_running',
|
15
|
+
algorithm: :concurrently
|
16
|
+
# only one job can be running for a singleton
|
17
|
+
add_index :delayed_jobs,
|
18
|
+
:singleton,
|
19
|
+
where: 'singleton IS NOT NULL AND locked_by IS NOT NULL',
|
20
|
+
unique: true,
|
21
|
+
name: 'index_delayed_jobs_on_singleton_running',
|
22
|
+
algorithm: :concurrently
|
23
|
+
|
24
|
+
reversible do |direction|
|
25
|
+
direction.up do
|
26
|
+
execute(<<~SQL)
|
27
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
28
|
+
DECLARE
|
29
|
+
running_count integer;
|
30
|
+
should_lock boolean;
|
31
|
+
should_be_precise boolean;
|
32
|
+
update_query varchar;
|
33
|
+
skip_locked varchar;
|
34
|
+
BEGIN
|
35
|
+
IF OLD.strand IS NOT NULL THEN
|
36
|
+
should_lock := true;
|
37
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
38
|
+
|
39
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
40
|
+
running_count := (SELECT COUNT(*) FROM (
|
41
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
42
|
+
) subquery_for_count);
|
43
|
+
should_lock := running_count < OLD.max_concurrent;
|
44
|
+
END IF;
|
45
|
+
|
46
|
+
IF should_lock THEN
|
47
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
48
|
+
END IF;
|
49
|
+
|
50
|
+
-- note that we don't really care if the row we're deleting has a singleton, or if it even
|
51
|
+
-- matches the row(s) we're going to update. we just need to make sure that whatever
|
52
|
+
-- singleton we grab isn't already running (which is a simple existence check, since
|
53
|
+
-- the unique indexes ensure there is at most one singleton running, and one queued)
|
54
|
+
update_query := 'UPDATE delayed_jobs SET next_in_strand=true WHERE id IN (
|
55
|
+
SELECT id FROM delayed_jobs j2
|
56
|
+
WHERE next_in_strand=false AND
|
57
|
+
j2.strand=$1.strand AND
|
58
|
+
(j2.singleton IS NULL OR NOT EXISTS (SELECT 1 FROM delayed_jobs j3 WHERE j3.singleton=j2.singleton AND j3.id<>j2.id))
|
59
|
+
ORDER BY j2.strand_order_override ASC, j2.id ASC
|
60
|
+
LIMIT ';
|
61
|
+
|
62
|
+
IF should_be_precise THEN
|
63
|
+
running_count := (SELECT COUNT(*) FROM (
|
64
|
+
SELECT 1 FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
65
|
+
) s);
|
66
|
+
IF running_count < OLD.max_concurrent THEN
|
67
|
+
update_query := update_query || '($1.max_concurrent - $2)';
|
68
|
+
ELSE
|
69
|
+
-- we have too many running already; just bail
|
70
|
+
RETURN OLD;
|
71
|
+
END IF;
|
72
|
+
ELSE
|
73
|
+
update_query := update_query || '1';
|
74
|
+
|
75
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
76
|
+
IF OLD.max_concurrent > 1 THEN
|
77
|
+
skip_locked := ' SKIP LOCKED';
|
78
|
+
END IF;
|
79
|
+
END IF;
|
80
|
+
|
81
|
+
update_query := update_query || ' FOR UPDATE' || COALESCE(skip_locked, '') || ')';
|
82
|
+
EXECUTE update_query USING OLD, running_count;
|
83
|
+
ELSIF OLD.singleton IS NOT NULL THEN
|
84
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE singleton=OLD.singleton AND next_in_strand=false;
|
85
|
+
END IF;
|
86
|
+
RETURN OLD;
|
87
|
+
END;
|
88
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
89
|
+
SQL
|
90
|
+
execute(<<~SQL)
|
91
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
92
|
+
BEGIN
|
93
|
+
RAISE NOTICE 'inserting job';
|
94
|
+
IF NEW.strand IS NOT NULL THEN
|
95
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
96
|
+
IF (SELECT COUNT(*) FROM (
|
97
|
+
SELECT 1 FROM delayed_jobs WHERE strand = NEW.strand AND next_in_strand=true LIMIT NEW.max_concurrent
|
98
|
+
) s) = NEW.max_concurrent THEN
|
99
|
+
NEW.next_in_strand := false;
|
100
|
+
END IF;
|
101
|
+
END IF;
|
102
|
+
IF NEW.singleton IS NOT NULL THEN
|
103
|
+
RAISE NOTICE 'inserting job that is a singleton';
|
104
|
+
PERFORM 1 FROM delayed_jobs WHERE singleton = NEW.singleton;
|
105
|
+
IF FOUND THEN
|
106
|
+
RAISE NOTICE 'and not first';
|
107
|
+
NEW.next_in_strand := false;
|
108
|
+
END IF;
|
109
|
+
END IF;
|
110
|
+
RETURN NEW;
|
111
|
+
END;
|
112
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
113
|
+
SQL
|
114
|
+
end
|
115
|
+
direction.down do
|
116
|
+
execute(<<~SQL)
|
117
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')} () RETURNS trigger AS $$
|
118
|
+
DECLARE
|
119
|
+
running_count integer;
|
120
|
+
should_lock boolean;
|
121
|
+
should_be_precise boolean;
|
122
|
+
BEGIN
|
123
|
+
IF OLD.strand IS NOT NULL THEN
|
124
|
+
should_lock := true;
|
125
|
+
should_be_precise := OLD.id % (OLD.max_concurrent * 4) = 0;
|
126
|
+
|
127
|
+
IF NOT should_be_precise AND OLD.max_concurrent > 16 THEN
|
128
|
+
running_count := (SELECT COUNT(*) FROM (
|
129
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
130
|
+
) subquery_for_count);
|
131
|
+
should_lock := running_count < OLD.max_concurrent;
|
132
|
+
END IF;
|
133
|
+
|
134
|
+
IF should_lock THEN
|
135
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(OLD.strand));
|
136
|
+
END IF;
|
137
|
+
|
138
|
+
IF should_be_precise THEN
|
139
|
+
running_count := (SELECT COUNT(*) FROM (
|
140
|
+
SELECT 1 as one FROM delayed_jobs WHERE strand = OLD.strand AND next_in_strand = 't' LIMIT OLD.max_concurrent
|
141
|
+
) subquery_for_count);
|
142
|
+
IF running_count < OLD.max_concurrent THEN
|
143
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE id IN (
|
144
|
+
SELECT id FROM delayed_jobs j2 WHERE next_in_strand = 'f' AND
|
145
|
+
j2.strand = OLD.strand ORDER BY j2.strand_order_override ASC, j2.id ASC LIMIT (OLD.max_concurrent - running_count) FOR UPDATE
|
146
|
+
);
|
147
|
+
END IF;
|
148
|
+
ELSE
|
149
|
+
-- n-strands don't require precise ordering; we can make this query more performant
|
150
|
+
IF OLD.max_concurrent > 1 THEN
|
151
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE id =
|
152
|
+
(SELECT id FROM delayed_jobs j2 WHERE next_in_strand = 'f' AND
|
153
|
+
j2.strand = OLD.strand ORDER BY j2.strand_order_override ASC, j2.id ASC LIMIT 1 FOR UPDATE SKIP LOCKED);
|
154
|
+
ELSE
|
155
|
+
UPDATE delayed_jobs SET next_in_strand = 't' WHERE id =
|
156
|
+
(SELECT id FROM delayed_jobs j2 WHERE next_in_strand = 'f' AND
|
157
|
+
j2.strand = OLD.strand ORDER BY j2.strand_order_override ASC, j2.id ASC LIMIT 1 FOR UPDATE);
|
158
|
+
END IF;
|
159
|
+
END IF;
|
160
|
+
END IF;
|
161
|
+
RETURN OLD;
|
162
|
+
END;
|
163
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
164
|
+
SQL
|
165
|
+
execute(<<~SQL)
|
166
|
+
CREATE OR REPLACE FUNCTION #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')} () RETURNS trigger AS $$
|
167
|
+
BEGIN
|
168
|
+
IF NEW.strand IS NOT NULL THEN
|
169
|
+
PERFORM pg_advisory_xact_lock(half_md5_as_bigint(NEW.strand));
|
170
|
+
IF (SELECT COUNT(*) FROM (
|
171
|
+
SELECT 1 AS one FROM delayed_jobs WHERE strand = NEW.strand LIMIT NEW.max_concurrent
|
172
|
+
) subquery_for_count) = NEW.max_concurrent THEN
|
173
|
+
NEW.next_in_strand := 'f';
|
174
|
+
END IF;
|
175
|
+
END IF;
|
176
|
+
RETURN NEW;
|
177
|
+
END;
|
178
|
+
$$ LANGUAGE plpgsql SET search_path TO #{::Switchman::Shard.current.name};
|
179
|
+
SQL
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
connection.transaction do
|
184
|
+
reversible do |direction|
|
185
|
+
direction.up do
|
186
|
+
drop_triggers
|
187
|
+
execute("CREATE TRIGGER delayed_jobs_before_insert_row_tr BEFORE INSERT ON #{::Delayed::Job.quoted_table_name} FOR EACH ROW WHEN (NEW.strand IS NOT NULL OR NEW.singleton IS NOT NULL) EXECUTE PROCEDURE #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')}()")
|
188
|
+
execute("CREATE TRIGGER delayed_jobs_after_delete_row_tr AFTER DELETE ON #{::Delayed::Job.quoted_table_name} FOR EACH ROW WHEN ((OLD.strand IS NOT NULL OR OLD.singleton IS NOT NULL) AND OLD.next_in_strand=true) EXECUTE PROCEDURE #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn')}()")
|
189
|
+
end
|
190
|
+
direction.down do
|
191
|
+
drop_triggers
|
192
|
+
execute("CREATE TRIGGER delayed_jobs_before_insert_row_tr BEFORE INSERT ON #{::Delayed::Job.quoted_table_name} FOR EACH ROW WHEN (NEW.strand IS NOT NULL) EXECUTE PROCEDURE #{connection.quote_table_name('delayed_jobs_before_insert_row_tr_fn')}()")
|
193
|
+
execute("CREATE TRIGGER delayed_jobs_after_delete_row_tr AFTER DELETE ON #{::Delayed::Job.quoted_table_name} FOR EACH ROW WHEN (OLD.strand IS NOT NULL AND OLD.next_in_strand = 't') EXECUTE PROCEDURE #{connection.quote_table_name('delayed_jobs_after_delete_row_tr_fn()')}")
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def drop_triggers
|
200
|
+
execute("DROP TRIGGER delayed_jobs_before_insert_row_tr ON #{::Delayed::Job.quoted_table_name}")
|
201
|
+
execute("DROP TRIGGER delayed_jobs_after_delete_row_tr ON #{::Delayed::Job.quoted_table_name}")
|
202
|
+
end
|
203
|
+
end
|
@@ -18,7 +18,7 @@ module SwitchmanInstJobs
|
|
18
18
|
enqueue_options = options.merge(
|
19
19
|
current_shard: current_shard
|
20
20
|
)
|
21
|
-
enqueue_job = -> { ::GuardRail.activate(:
|
21
|
+
enqueue_job = -> { ::GuardRail.activate(:primary) { super(object, **enqueue_options) } }
|
22
22
|
|
23
23
|
# Another dj shard must be currently manually activated, so just use that
|
24
24
|
# In general this will only happen in unusual circumstances like tests
|
@@ -43,8 +43,7 @@ module SwitchmanInstJobs
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def configured_shard_ids
|
46
|
-
|
47
|
-
map { |w| w['shard'] }.compact.uniq
|
46
|
+
::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
|
48
47
|
end
|
49
48
|
|
50
49
|
def processes_locked_locally
|
@@ -74,7 +73,7 @@ module SwitchmanInstJobs
|
|
74
73
|
self.shard_id = shard.id
|
75
74
|
self.shard_id = nil if shard.is_a?(::Switchman::DefaultShard)
|
76
75
|
# If jobs are held for a shard, enqueue new ones as held as well
|
77
|
-
return unless shard.jobs_held
|
76
|
+
return unless ::Switchman::Shard.columns_hash.key?('jobs_held') && shard.jobs_held
|
78
77
|
|
79
78
|
self.locked_by = ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY
|
80
79
|
self.locked_at = ::Delayed::Job.db_time_now
|
@@ -82,6 +81,8 @@ module SwitchmanInstJobs
|
|
82
81
|
end
|
83
82
|
|
84
83
|
def invoke_job
|
84
|
+
raise ShardNotFoundError, shard_id unless current_shard
|
85
|
+
|
85
86
|
current_shard.activate { super }
|
86
87
|
end
|
87
88
|
|
@@ -18,22 +18,23 @@ module SwitchmanInstJobs
|
|
18
18
|
::Delayed::Settings.worker_health_check_config['service_name'] = original_service_name
|
19
19
|
end
|
20
20
|
|
21
|
-
def reschedule_abandoned_jobs
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
def reschedule_abandoned_jobs
|
22
|
+
shard_ids = ::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
|
23
|
+
shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
|
24
|
+
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
|
25
|
+
dj_shard = ::Switchman::Shard.current(:delayed_jobs)
|
26
|
+
dj_shard.activate do
|
27
|
+
munge_service_name(dj_shard) do
|
28
|
+
# because this rescheduling process is running on every host, we need
|
29
|
+
# to make sure that it's functioning for each shard the current
|
30
|
+
# host is programmed to interact with, but ONLY for those shards.
|
31
|
+
# reading the config lets us iterate over any shards this host should
|
32
|
+
# work with and lets us pick the correct service name to identify which
|
33
|
+
# hosts are currently alive and valid via the health checks
|
34
|
+
super()
|
35
|
+
end
|
27
36
|
end
|
28
37
|
end
|
29
|
-
|
30
|
-
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs], exception: :ignore) do
|
31
|
-
shard = ::Switchman::Shard.current(:delayed_jobs)
|
32
|
-
singleton = <<~SINGLETON
|
33
|
-
periodic: Delayed::Worker::HealthCheck.reschedule_abandoned_jobs:#{shard.id}
|
34
|
-
SINGLETON
|
35
|
-
delay(singleton: singleton).reschedule_abandoned_jobs(call_super: shard)
|
36
|
-
end
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
@@ -1,7 +1,3 @@
|
|
1
|
-
# Just disabling all the rubocop metrics for this file for now,
|
2
|
-
# as it is a direct port-in of existing code
|
3
|
-
|
4
|
-
# rubocop:disable Metrics/BlockLength, Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
|
5
1
|
require 'set'
|
6
2
|
require 'parallel'
|
7
3
|
|
@@ -39,16 +35,12 @@ module SwitchmanInstJobs
|
|
39
35
|
|
40
36
|
# Do the updates in batches and then just clear redis instead of clearing them one at a time
|
41
37
|
target_shards.each do |target_shard, shards|
|
42
|
-
|
38
|
+
updates = { delayed_jobs_shard_id: target_shard, block_stranded: true }
|
39
|
+
updates[:updated_at] = Time.zone.now if ::Switchman::Shard.column_names.include?('updated_at')
|
40
|
+
::Switchman::Shard.where(id: shards).update_all(updates)
|
43
41
|
end
|
44
42
|
clear_shard_cache
|
45
43
|
|
46
|
-
# Wait a little over the 60 second in-process shard cache clearing
|
47
|
-
# threshold to ensure that all new stranded jobs are now being
|
48
|
-
# enqueued with next_in_strand: false
|
49
|
-
Rails.logger.debug('Waiting for caches to clear')
|
50
|
-
sleep(65) unless @skip_cache_wait
|
51
|
-
|
52
44
|
::Switchman::Shard.clear_cache
|
53
45
|
# rubocop:disable Style/CombinableLoops
|
54
46
|
# We first migrate strands so that we can stop blocking strands before we migrate unstranded jobs
|
@@ -59,11 +51,33 @@ module SwitchmanInstJobs
|
|
59
51
|
source_shards.each do |s|
|
60
52
|
::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_everything }
|
61
53
|
end
|
54
|
+
ensure_unblock_stranded_for(shard_map.map(&:first))
|
62
55
|
# rubocop:enable Style/CombinableLoops
|
63
56
|
end
|
64
57
|
|
65
|
-
|
58
|
+
# if :migrate_strands ran on any shards that fell into scenario 1, then
|
59
|
+
# block_stranded never got flipped, so do that now.
|
60
|
+
def ensure_unblock_stranded_for(shards)
|
61
|
+
shards = ::Switchman::Shard.where(id: shards, block_stranded: true).to_a
|
62
|
+
return unless shards.any?
|
63
|
+
|
64
|
+
::Switchman::Shard.where(id: shards).update_all(block_stranded: false)
|
65
|
+
clear_shard_cache
|
66
|
+
|
67
|
+
# shards is an array of shard objects that is now stale cause block_stranded has been updated.
|
68
|
+
shards.map(&:delayed_jobs_shard).uniq.each do |dj_shard|
|
69
|
+
unblock_strands(dj_shard)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def clear_shard_cache(debug_message = nil)
|
66
74
|
::Switchman.cache.clear
|
75
|
+
Rails.logger.debug("Waiting for caches to clear #{debug_message}")
|
76
|
+
# Wait a little over the 60 second in-process shard cache clearing
|
77
|
+
# threshold to ensure that all new stranded jobs are now being
|
78
|
+
# enqueued with next_in_strand: false
|
79
|
+
# @skip_cache_wait is for spec usage only
|
80
|
+
sleep(65) unless @skip_cache_wait
|
67
81
|
end
|
68
82
|
|
69
83
|
# This method expects that all relevant shards already have block_stranded: true
|
@@ -125,7 +139,7 @@ module SwitchmanInstJobs
|
|
125
139
|
update_all(next_in_strand: false)
|
126
140
|
end
|
127
141
|
|
128
|
-
# 4) is taken care of here, by
|
142
|
+
# 4) is taken care of here, by leaving next_in_strand alone and
|
129
143
|
# it should execute on the new shard
|
130
144
|
batch_move_jobs(
|
131
145
|
target_shard: target_shard,
|
@@ -141,35 +155,32 @@ module SwitchmanInstJobs
|
|
141
155
|
updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
|
142
156
|
update_all(block_stranded: false)
|
143
157
|
# If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
|
144
|
-
unless updated.zero?
|
145
|
-
|
146
|
-
# Wait a little over the 60 second in-process shard cache clearing
|
147
|
-
# threshold to ensure that all new stranded jobs are now being
|
148
|
-
# enqueued with next_in_strand: false
|
149
|
-
Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
|
150
|
-
# for spec usage only
|
151
|
-
sleep(65) unless @skip_cache_wait
|
152
|
-
end
|
158
|
+
clear_shard_cache("(#{source_shard.id} -> #{target_shard.id})") unless updated.zero?
|
159
|
+
|
153
160
|
::Switchman::Shard.clear_cache
|
154
161
|
# At this time, let's unblock all the strands on the target shard that aren't being held by a blocker
|
155
162
|
# but actually could have run and we just didn't know it because we didn't know if they had jobs
|
156
163
|
# on the source shard
|
157
|
-
target_shard
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
164
|
+
unblock_strands(target_shard)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def unblock_strands(target_shard)
|
170
|
+
target_shard.activate(:delayed_jobs) do
|
171
|
+
loop do
|
172
|
+
# We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
|
173
|
+
# to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
|
174
|
+
# this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
|
175
|
+
# logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
|
176
|
+
# batches
|
177
|
+
break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
|
178
|
+
where.not(strand: nil).
|
179
|
+
where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
|
180
|
+
::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
|
181
|
+
where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
|
182
|
+
where('dj2.strand = delayed_jobs.strand').arel.exists.not
|
183
|
+
).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
|
173
184
|
end
|
174
185
|
end
|
175
186
|
end
|
@@ -295,5 +306,3 @@ module SwitchmanInstJobs
|
|
295
306
|
end
|
296
307
|
end
|
297
308
|
end
|
298
|
-
|
299
|
-
# rubocop:enable Metrics/BlockLength, Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
|
data/lib/switchman_inst_jobs.rb
CHANGED
@@ -14,10 +14,6 @@ module SwitchmanInstJobs
|
|
14
14
|
::Delayed::Backend::ActiveRecord::Job.prepend(
|
15
15
|
Delayed::Backend::Base
|
16
16
|
)
|
17
|
-
::Delayed::Backend::Redis::Job.prepend(
|
18
|
-
Delayed::Backend::Base
|
19
|
-
)
|
20
|
-
::Delayed::Backend::Redis::Job.column :shard_id, :integer
|
21
17
|
::Delayed::Pool.prepend Delayed::Pool
|
22
18
|
::Delayed::Worker.prepend Delayed::Worker
|
23
19
|
::Delayed::Worker::HealthCheck.prepend Delayed::Worker::HealthCheck
|
@@ -38,6 +34,7 @@ end
|
|
38
34
|
|
39
35
|
require 'switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter'
|
40
36
|
require 'switchman_inst_jobs/active_record/migration'
|
37
|
+
require 'switchman_inst_jobs/delayed/settings'
|
41
38
|
require 'switchman_inst_jobs/delayed/backend/base'
|
42
39
|
require 'switchman_inst_jobs/delayed/message_sending'
|
43
40
|
require 'switchman_inst_jobs/delayed/pool'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: switchman-inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1
|
4
|
+
version: 3.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Petty
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inst-jobs
|
@@ -16,7 +16,7 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 2.4.0
|
20
20
|
- - "<"
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: '3.0'
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version:
|
29
|
+
version: 2.4.0
|
30
30
|
- - "<"
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: '3.0'
|
@@ -107,7 +107,7 @@ dependencies:
|
|
107
107
|
- !ruby/object:Gem::Version
|
108
108
|
version: '0'
|
109
109
|
- !ruby/object:Gem::Dependency
|
110
|
-
name:
|
110
|
+
name: diplomat
|
111
111
|
requirement: !ruby/object:Gem::Requirement
|
112
112
|
requirements:
|
113
113
|
- - ">="
|
@@ -260,7 +260,7 @@ dependencies:
|
|
260
260
|
- - "~>"
|
261
261
|
- !ruby/object:Gem::Version
|
262
262
|
version: '1.4'
|
263
|
-
description:
|
263
|
+
description:
|
264
264
|
email:
|
265
265
|
- bpetty@instructure.com
|
266
266
|
executables: []
|
@@ -296,6 +296,8 @@ files:
|
|
296
296
|
- db/migrate/20200822014259_add_block_stranded_to_switchman_shards.rb
|
297
297
|
- db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb
|
298
298
|
- db/migrate/20200825011002_add_strand_order_override.rb
|
299
|
+
- db/migrate/20210809145804_add_n_strand_index.rb
|
300
|
+
- db/migrate/20210812210128_add_singleton_column.rb
|
299
301
|
- lib/switchman-inst-jobs.rb
|
300
302
|
- lib/switchman_inst_jobs.rb
|
301
303
|
- lib/switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter.rb
|
@@ -303,6 +305,7 @@ files:
|
|
303
305
|
- lib/switchman_inst_jobs/delayed/backend/base.rb
|
304
306
|
- lib/switchman_inst_jobs/delayed/message_sending.rb
|
305
307
|
- lib/switchman_inst_jobs/delayed/pool.rb
|
308
|
+
- lib/switchman_inst_jobs/delayed/settings.rb
|
306
309
|
- lib/switchman_inst_jobs/delayed/worker.rb
|
307
310
|
- lib/switchman_inst_jobs/delayed/worker/health_check.rb
|
308
311
|
- lib/switchman_inst_jobs/engine.rb
|
@@ -319,7 +322,7 @@ homepage: https://github.com/instructure/switchman-inst-jobs
|
|
319
322
|
licenses:
|
320
323
|
- MIT
|
321
324
|
metadata: {}
|
322
|
-
post_install_message:
|
325
|
+
post_install_message:
|
323
326
|
rdoc_options: []
|
324
327
|
require_paths:
|
325
328
|
- lib
|
@@ -327,15 +330,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
327
330
|
requirements:
|
328
331
|
- - ">="
|
329
332
|
- !ruby/object:Gem::Version
|
330
|
-
version: '2.
|
333
|
+
version: '2.6'
|
331
334
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
332
335
|
requirements:
|
333
336
|
- - ">="
|
334
337
|
- !ruby/object:Gem::Version
|
335
338
|
version: '0'
|
336
339
|
requirements: []
|
337
|
-
rubygems_version: 3.
|
338
|
-
signing_key:
|
340
|
+
rubygems_version: 3.2.24
|
341
|
+
signing_key:
|
339
342
|
specification_version: 4
|
340
343
|
summary: Switchman and Instructure Jobs compatibility gem.
|
341
344
|
test_files: []
|