switchman-inst-jobs 3.0.3 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/db/migrate/20110610213249_optimize_delayed_jobs.rb +1 -1
- data/lib/switchman_inst_jobs.rb +1 -0
- data/lib/switchman_inst_jobs/delayed/backend/base.rb +2 -3
- data/lib/switchman_inst_jobs/delayed/pool.rb +1 -1
- data/lib/switchman_inst_jobs/delayed/settings.rb +9 -0
- data/lib/switchman_inst_jobs/delayed/worker/health_check.rb +15 -14
- data/lib/switchman_inst_jobs/jobs_migrator.rb +74 -33
- data/lib/switchman_inst_jobs/switchman/shard.rb +9 -2
- data/lib/switchman_inst_jobs/timed_cache.rb +1 -1
- data/lib/switchman_inst_jobs/version.rb +1 -1
- metadata +18 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4efbe05e879c9162df06f945af688c64876b9c56cc870509687160549359fd39
|
4
|
+
data.tar.gz: e32c8a6dfecd0b6c760d07a4d45eeaf6651d6cc7e70e7b6418d82c5a112f70cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fe0103d1b4bc305f0c85ba2135366968eb7c33bb6482b6d302dde9b9532db1d3d09fac64d90dad16fe6c6a438236ede5732d16a22d8538053d814010c9d2c87
|
7
|
+
data.tar.gz: feb2e546ee06bc38f964f58e63cfb59fdc25757952762ef8f8faf0521050d264e9f018ea8e6bb0131b277997d63035f68562cf45a7af13886fbfd80f481f5515
|
@@ -29,7 +29,7 @@ class OptimizeDelayedJobs < ActiveRecord::Migration[4.2]
|
|
29
29
|
add_index :delayed_jobs, %w[strand id], name: 'index_delayed_jobs_on_strand'
|
30
30
|
|
31
31
|
# move all failed jobs to the new failed table
|
32
|
-
Delayed::Backend::ActiveRecord::Job.where(
|
32
|
+
Delayed::Backend::ActiveRecord::Job.where.not(failed_at: nil).find_each do |job|
|
33
33
|
job.fail! unless job.on_hold?
|
34
34
|
end
|
35
35
|
end
|
data/lib/switchman_inst_jobs.rb
CHANGED
@@ -38,6 +38,7 @@ end
|
|
38
38
|
|
39
39
|
require 'switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter'
|
40
40
|
require 'switchman_inst_jobs/active_record/migration'
|
41
|
+
require 'switchman_inst_jobs/delayed/settings'
|
41
42
|
require 'switchman_inst_jobs/delayed/backend/base'
|
42
43
|
require 'switchman_inst_jobs/delayed/message_sending'
|
43
44
|
require 'switchman_inst_jobs/delayed/pool'
|
@@ -43,8 +43,7 @@ module SwitchmanInstJobs
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def configured_shard_ids
|
46
|
-
|
47
|
-
map { |w| w['shard'] }.compact.uniq
|
46
|
+
::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
|
48
47
|
end
|
49
48
|
|
50
49
|
def processes_locked_locally
|
@@ -93,7 +92,7 @@ module SwitchmanInstJobs
|
|
93
92
|
# likely a missing shard with a stale cache
|
94
93
|
current_shard.send(:clear_cache)
|
95
94
|
::Switchman::Shard.clear_cache
|
96
|
-
raise ShardNotFoundError, shard_id unless ::Switchman::Shard.
|
95
|
+
raise ShardNotFoundError, shard_id unless ::Switchman::Shard.exists?(id: shard_id)
|
97
96
|
|
98
97
|
raise
|
99
98
|
end
|
@@ -17,7 +17,7 @@ module SwitchmanInstJobs
|
|
17
17
|
# We purposely don't .compact to remove nils here, since if any
|
18
18
|
# workers are on the default jobs shard we want to unlock against
|
19
19
|
# that shard too.
|
20
|
-
shard_ids = @config[:workers].
|
20
|
+
shard_ids = @config[:workers].pluck(:shard).uniq
|
21
21
|
shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
|
22
22
|
end
|
23
23
|
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
|
@@ -18,22 +18,23 @@ module SwitchmanInstJobs
|
|
18
18
|
::Delayed::Settings.worker_health_check_config['service_name'] = original_service_name
|
19
19
|
end
|
20
20
|
|
21
|
-
def reschedule_abandoned_jobs
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
def reschedule_abandoned_jobs
|
22
|
+
shard_ids = ::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
|
23
|
+
shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
|
24
|
+
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
|
25
|
+
dj_shard = ::Switchman::Shard.current(:delayed_jobs)
|
26
|
+
dj_shard.activate do
|
27
|
+
munge_service_name(dj_shard) do
|
28
|
+
# because this rescheduling process is running on every host, we need
|
29
|
+
# to make sure that it's functioning for each shard the current
|
30
|
+
# host is programmed to interact with, but ONLY for those shards.
|
31
|
+
# reading the config lets us iterate over any shards this host should
|
32
|
+
# work with and lets us pick the correct service name to identify which
|
33
|
+
# hosts are currently alive and valid via the health checks
|
34
|
+
super()
|
35
|
+
end
|
27
36
|
end
|
28
37
|
end
|
29
|
-
|
30
|
-
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs], exception: :ignore) do
|
31
|
-
shard = ::Switchman::Shard.current(:delayed_jobs)
|
32
|
-
singleton = <<~SINGLETON
|
33
|
-
periodic: Delayed::Worker::HealthCheck.reschedule_abandoned_jobs:#{shard.id}
|
34
|
-
SINGLETON
|
35
|
-
delay(singleton: singleton).reschedule_abandoned_jobs(call_super: shard)
|
36
|
-
end
|
37
38
|
end
|
38
39
|
end
|
39
40
|
end
|
@@ -29,26 +29,59 @@ module SwitchmanInstJobs
|
|
29
29
|
|
30
30
|
def migrate_shards(shard_map)
|
31
31
|
source_shards = Set[]
|
32
|
+
target_shards = Hash.new([])
|
32
33
|
shard_map.each do |(shard, target_shard)|
|
33
34
|
shard = ::Switchman::Shard.find(shard) unless shard.is_a?(::Switchman::Shard)
|
34
35
|
source_shards << shard.delayed_jobs_shard.id
|
35
|
-
|
36
|
-
shard.
|
36
|
+
target_shard = target_shard.try(:id) || target_shard
|
37
|
+
target_shards[target_shard] += [shard.id]
|
37
38
|
end
|
38
39
|
|
40
|
+
# Do the updates in batches and then just clear redis instead of clearing them one at a time
|
41
|
+
target_shards.each do |target_shard, shards|
|
42
|
+
updates = { delayed_jobs_shard_id: target_shard, block_stranded: true }
|
43
|
+
updates[:updated_at] = Time.zone.now if ::Switchman::Shard.column_names.include?('updated_at')
|
44
|
+
::Switchman::Shard.where(id: shards).update_all(updates)
|
45
|
+
end
|
46
|
+
clear_shard_cache
|
47
|
+
|
48
|
+
::Switchman::Shard.clear_cache
|
49
|
+
# rubocop:disable Style/CombinableLoops
|
50
|
+
# We first migrate strands so that we can stop blocking strands before we migrate unstranded jobs
|
51
|
+
source_shards.each do |s|
|
52
|
+
::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_strands }
|
53
|
+
end
|
54
|
+
|
55
|
+
source_shards.each do |s|
|
56
|
+
::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_everything }
|
57
|
+
end
|
58
|
+
ensure_unblock_stranded_for(shard_map.map(&:first))
|
59
|
+
# rubocop:enable Style/CombinableLoops
|
60
|
+
end
|
61
|
+
|
62
|
+
# if :migrate_strands ran on any shards that fell into scenario 1, then
|
63
|
+
# block_stranded never got flipped, so do that now.
|
64
|
+
def ensure_unblock_stranded_for(shards)
|
65
|
+
shards = ::Switchman::Shard.where(id: shards, block_stranded: true).to_a
|
66
|
+
return unless shards.any?
|
67
|
+
|
68
|
+
::Switchman::Shard.where(id: shards).update_all(block_stranded: false)
|
69
|
+
clear_shard_cache
|
70
|
+
|
71
|
+
# shards is an array of shard objects that is now stale cause block_stranded has been updated.
|
72
|
+
shards.map(&:delayed_jobs_shard).uniq.each do |dj_shard|
|
73
|
+
unblock_strands(dj_shard)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def clear_shard_cache(debug_message = nil)
|
78
|
+
::Switchman.cache.clear
|
79
|
+
Rails.logger.debug("Waiting for caches to clear #{debug_message}")
|
39
80
|
# Wait a little over the 60 second in-process shard cache clearing
|
40
81
|
# threshold to ensure that all new stranded jobs are now being
|
41
82
|
# enqueued with next_in_strand: false
|
42
|
-
|
83
|
+
# @skip_cache_wait is for spec usage only
|
43
84
|
sleep(65) unless @skip_cache_wait
|
44
|
-
|
45
|
-
# TODO: 4 has been picked completely out of a hat. We should make it configurable or something
|
46
|
-
Parallel.each(source_shards, in_processes: 4) do |s|
|
47
|
-
# Ensure the child processes don't share connections with the parent
|
48
|
-
Delayed::Pool.on_fork.call
|
49
|
-
ActiveRecord::Base.clear_all_connections!
|
50
|
-
s.activate(:delayed_jobs) { run }
|
51
|
-
end
|
52
85
|
end
|
53
86
|
|
54
87
|
# This method expects that all relevant shards already have block_stranded: true
|
@@ -71,7 +104,7 @@ module SwitchmanInstJobs
|
|
71
104
|
# those (= do nothing since it should already be true)
|
72
105
|
|
73
106
|
source_shard = ::Switchman::Shard.current(:delayed_jobs)
|
74
|
-
strand_scope = ::Delayed::Job.shard(source_shard).where(
|
107
|
+
strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
|
75
108
|
shard_map = build_shard_map(strand_scope, source_shard)
|
76
109
|
shard_map.each do |(target_shard, source_shard_ids)|
|
77
110
|
shard_scope = strand_scope.where(shard_id: source_shard_ids)
|
@@ -92,7 +125,7 @@ module SwitchmanInstJobs
|
|
92
125
|
# We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
|
93
126
|
# Since we only unlock it on the new jobs queue *after* deleting from the original
|
94
127
|
# the lock ensures the blocker always gets unlocked
|
95
|
-
first = this_strand_scope.where(
|
128
|
+
first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
|
96
129
|
if first
|
97
130
|
first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
|
98
131
|
first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
|
@@ -110,7 +143,7 @@ module SwitchmanInstJobs
|
|
110
143
|
update_all(next_in_strand: false)
|
111
144
|
end
|
112
145
|
|
113
|
-
# 4) is taken care of here, by
|
146
|
+
# 4) is taken care of here, by leaving next_in_strand alone and
|
114
147
|
# it should execute on the new shard
|
115
148
|
batch_move_jobs(
|
116
149
|
target_shard: target_shard,
|
@@ -123,27 +156,35 @@ module SwitchmanInstJobs
|
|
123
156
|
end
|
124
157
|
end
|
125
158
|
|
126
|
-
::Switchman::Shard.
|
127
|
-
|
128
|
-
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
|
133
|
-
# for spec usage only
|
134
|
-
sleep(65) unless @skip_cache_wait
|
159
|
+
updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
|
160
|
+
update_all(block_stranded: false)
|
161
|
+
# If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
|
162
|
+
clear_shard_cache("(#{source_shard.id} -> #{target_shard.id})") unless updated.zero?
|
163
|
+
|
164
|
+
::Switchman::Shard.clear_cache
|
135
165
|
# At this time, let's unblock all the strands on the target shard that aren't being held by a blocker
|
136
166
|
# but actually could have run and we just didn't know it because we didn't know if they had jobs
|
137
167
|
# on the source shard
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
168
|
+
unblock_strands(target_shard)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def unblock_strands(target_shard)
|
174
|
+
target_shard.activate(:delayed_jobs) do
|
175
|
+
loop do
|
176
|
+
# We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
|
177
|
+
# to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
|
178
|
+
# this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
|
179
|
+
# logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
|
180
|
+
# batches
|
181
|
+
break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
|
182
|
+
where.not(strand: nil).
|
183
|
+
where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
|
184
|
+
::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
|
185
|
+
where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
|
186
|
+
where('dj2.strand = delayed_jobs.strand').arel.exists.not
|
187
|
+
).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
|
147
188
|
end
|
148
189
|
end
|
149
190
|
end
|
@@ -242,7 +283,7 @@ module SwitchmanInstJobs
|
|
242
283
|
|
243
284
|
connection.execute "COPY #{::Delayed::Job.quoted_table_name} (#{quoted_keys}) FROM STDIN"
|
244
285
|
records.map do |record|
|
245
|
-
connection.raw_connection.put_copy_data(keys.map { |k| quote_text(record[k]) }.join("\t")
|
286
|
+
connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
|
246
287
|
end
|
247
288
|
connection.clear_query_cache
|
248
289
|
connection.raw_connection.put_copy_end
|
@@ -14,7 +14,7 @@ module SwitchmanInstJobs
|
|
14
14
|
shard = ::Switchman::Shard.lookup(delayed_jobs_shard_id)
|
15
15
|
return shard if shard
|
16
16
|
end
|
17
|
-
database_server&.delayed_jobs_shard(self)
|
17
|
+
@delayed_jobs_shard ||= database_server&.delayed_jobs_shard(self)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Adapted from hold/unhold methods in base delayed jobs base
|
@@ -39,7 +39,14 @@ module SwitchmanInstJobs
|
|
39
39
|
|
40
40
|
def unhold_jobs!
|
41
41
|
self.jobs_held = false
|
42
|
-
|
42
|
+
if changed?
|
43
|
+
save!
|
44
|
+
# Wait a little over the 60 second in-process shard cache clearing
|
45
|
+
# threshold to ensure that all new jobs are now being enqueued
|
46
|
+
# unlocked
|
47
|
+
Rails.logger.debug('Waiting for caches to clear')
|
48
|
+
sleep(65)
|
49
|
+
end
|
43
50
|
delayed_jobs_shard.activate(:delayed_jobs) do
|
44
51
|
::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id).
|
45
52
|
in_batches(of: 10_000).
|
metadata
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: switchman-inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Petty
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inst-jobs
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
19
|
version: 1.0.3
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- - "~>"
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '1.0'
|
30
27
|
- - ">="
|
31
28
|
- !ruby/object:Gem::Version
|
32
29
|
version: 1.0.3
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: parallel
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -210,28 +210,28 @@ dependencies:
|
|
210
210
|
requirements:
|
211
211
|
- - "~>"
|
212
212
|
- !ruby/object:Gem::Version
|
213
|
-
version:
|
213
|
+
version: 1.3.1
|
214
214
|
type: :development
|
215
215
|
prerelease: false
|
216
216
|
version_requirements: !ruby/object:Gem::Requirement
|
217
217
|
requirements:
|
218
218
|
- - "~>"
|
219
219
|
- !ruby/object:Gem::Version
|
220
|
-
version:
|
220
|
+
version: 1.3.1
|
221
221
|
- !ruby/object:Gem::Dependency
|
222
222
|
name: rubocop-rails
|
223
223
|
requirement: !ruby/object:Gem::Requirement
|
224
224
|
requirements:
|
225
225
|
- - "~>"
|
226
226
|
- !ruby/object:Gem::Version
|
227
|
-
version: 2.
|
227
|
+
version: 2.8.1
|
228
228
|
type: :development
|
229
229
|
prerelease: false
|
230
230
|
version_requirements: !ruby/object:Gem::Requirement
|
231
231
|
requirements:
|
232
232
|
- - "~>"
|
233
233
|
- !ruby/object:Gem::Version
|
234
|
-
version: 2.
|
234
|
+
version: 2.8.1
|
235
235
|
- !ruby/object:Gem::Dependency
|
236
236
|
name: simplecov
|
237
237
|
requirement: !ruby/object:Gem::Requirement
|
@@ -260,7 +260,7 @@ dependencies:
|
|
260
260
|
- - "~>"
|
261
261
|
- !ruby/object:Gem::Version
|
262
262
|
version: '1.4'
|
263
|
-
description:
|
263
|
+
description:
|
264
264
|
email:
|
265
265
|
- bpetty@instructure.com
|
266
266
|
executables: []
|
@@ -303,6 +303,7 @@ files:
|
|
303
303
|
- lib/switchman_inst_jobs/delayed/backend/base.rb
|
304
304
|
- lib/switchman_inst_jobs/delayed/message_sending.rb
|
305
305
|
- lib/switchman_inst_jobs/delayed/pool.rb
|
306
|
+
- lib/switchman_inst_jobs/delayed/settings.rb
|
306
307
|
- lib/switchman_inst_jobs/delayed/worker.rb
|
307
308
|
- lib/switchman_inst_jobs/delayed/worker/health_check.rb
|
308
309
|
- lib/switchman_inst_jobs/engine.rb
|
@@ -319,7 +320,7 @@ homepage: https://github.com/instructure/switchman-inst-jobs
|
|
319
320
|
licenses:
|
320
321
|
- MIT
|
321
322
|
metadata: {}
|
322
|
-
post_install_message:
|
323
|
+
post_install_message:
|
323
324
|
rdoc_options: []
|
324
325
|
require_paths:
|
325
326
|
- lib
|
@@ -334,8 +335,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
335
|
- !ruby/object:Gem::Version
|
335
336
|
version: '0'
|
336
337
|
requirements: []
|
337
|
-
rubygems_version: 3.
|
338
|
-
signing_key:
|
338
|
+
rubygems_version: 3.0.3
|
339
|
+
signing_key:
|
339
340
|
specification_version: 4
|
340
341
|
summary: Switchman and Instructure Jobs compatibility gem.
|
341
342
|
test_files: []
|