switchman-inst-jobs 3.0.2 → 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/db/migrate/20110610213249_optimize_delayed_jobs.rb +1 -1
- data/lib/switchman_inst_jobs/delayed/backend/base.rb +3 -3
- data/lib/switchman_inst_jobs/delayed/pool.rb +1 -1
- data/lib/switchman_inst_jobs/delayed/worker/health_check.rb +1 -1
- data/lib/switchman_inst_jobs/jobs_migrator.rb +76 -35
- data/lib/switchman_inst_jobs/switchman/database_server.rb +1 -1
- data/lib/switchman_inst_jobs/switchman/shard.rb +35 -33
- data/lib/switchman_inst_jobs/timed_cache.rb +1 -1
- data/lib/switchman_inst_jobs/version.rb +1 -1
- metadata +12 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a489d2a35d787be5492b4a5d710f1cc6560ad5c3d9f9ad965e0860338562d62
|
4
|
+
data.tar.gz: 88d1ff219ce44f9cb50846c631261ed792f4ce78a8ebde00ea5b08b3a172bf2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d0c0574bcb3b5e006072f67fcd6c6a4d892e8db919da73cf4edf4d8ac790c1481fd9777a6b6e6950934951efb647445ff756f59ee9ca663a5f5303588fa5a52b
|
7
|
+
data.tar.gz: b2ffe5cc363ab95d407d21a6c76e486dc9e40dc18d52ceabff9639e6408a33f392c33260a0d243720832e9c760ea8f9c55a941b6b0c31ba8fdae1328308bc5ee
|
@@ -29,7 +29,7 @@ class OptimizeDelayedJobs < ActiveRecord::Migration[4.2]
|
|
29
29
|
add_index :delayed_jobs, %w[strand id], name: 'index_delayed_jobs_on_strand'
|
30
30
|
|
31
31
|
# move all failed jobs to the new failed table
|
32
|
-
Delayed::Backend::ActiveRecord::Job.where(
|
32
|
+
Delayed::Backend::ActiveRecord::Job.where.not(failed_at: nil).find_each do |job|
|
33
33
|
job.fail! unless job.on_hold?
|
34
34
|
end
|
35
35
|
end
|
@@ -43,8 +43,8 @@ module SwitchmanInstJobs
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def configured_shard_ids
|
46
|
-
(::Delayed::Settings.worker_config.try(:[], 'workers') || [])
|
47
|
-
|
46
|
+
(::Delayed::Settings.worker_config.try(:[], 'workers') || []).
|
47
|
+
map { |w| w['shard'] }.compact.uniq
|
48
48
|
end
|
49
49
|
|
50
50
|
def processes_locked_locally
|
@@ -93,7 +93,7 @@ module SwitchmanInstJobs
|
|
93
93
|
# likely a missing shard with a stale cache
|
94
94
|
current_shard.send(:clear_cache)
|
95
95
|
::Switchman::Shard.clear_cache
|
96
|
-
raise ShardNotFoundError, shard_id unless ::Switchman::Shard.
|
96
|
+
raise ShardNotFoundError, shard_id unless ::Switchman::Shard.exists?(id: shard_id)
|
97
97
|
|
98
98
|
raise
|
99
99
|
end
|
@@ -17,7 +17,7 @@ module SwitchmanInstJobs
|
|
17
17
|
# We purposely don't .compact to remove nils here, since if any
|
18
18
|
# workers are on the default jobs shard we want to unlock against
|
19
19
|
# that shard too.
|
20
|
-
shard_ids = @config[:workers].
|
20
|
+
shard_ids = @config[:workers].pluck(:shard).uniq
|
21
21
|
shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
|
22
22
|
end
|
23
23
|
::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
|
@@ -19,7 +19,7 @@ module SwitchmanInstJobs
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def reschedule_abandoned_jobs(call_super: false)
|
22
|
-
shards = ::Switchman::Shard.delayed_jobs_shards
|
22
|
+
shards = ::Switchman::Shard.delayed_jobs_shards.to_a
|
23
23
|
call_super = shards.first if shards.length == 1
|
24
24
|
unless call_super == false
|
25
25
|
call_super.activate(:delayed_jobs) do
|
@@ -29,26 +29,59 @@ module SwitchmanInstJobs
|
|
29
29
|
|
30
30
|
def migrate_shards(shard_map)
|
31
31
|
source_shards = Set[]
|
32
|
+
target_shards = Hash.new([])
|
32
33
|
shard_map.each do |(shard, target_shard)|
|
33
34
|
shard = ::Switchman::Shard.find(shard) unless shard.is_a?(::Switchman::Shard)
|
34
35
|
source_shards << shard.delayed_jobs_shard.id
|
35
|
-
|
36
|
-
shard.
|
36
|
+
target_shard = target_shard.try(:id) || target_shard
|
37
|
+
target_shards[target_shard] += [shard.id]
|
37
38
|
end
|
38
39
|
|
40
|
+
# Do the updates in batches and then just clear redis instead of clearing them one at a time
|
41
|
+
target_shards.each do |target_shard, shards|
|
42
|
+
updates = { delayed_jobs_shard_id: target_shard, block_stranded: true }
|
43
|
+
updates[:updated_at] = Time.zone.now if ::Switchman::Shard.column_names.include?('updated_at')
|
44
|
+
::Switchman::Shard.where(id: shards).update_all(updates)
|
45
|
+
end
|
46
|
+
clear_shard_cache
|
47
|
+
|
48
|
+
::Switchman::Shard.clear_cache
|
49
|
+
# rubocop:disable Style/CombinableLoops
|
50
|
+
# We first migrate strands so that we can stop blocking strands before we migrate unstranded jobs
|
51
|
+
source_shards.each do |s|
|
52
|
+
::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_strands }
|
53
|
+
end
|
54
|
+
|
55
|
+
source_shards.each do |s|
|
56
|
+
::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_everything }
|
57
|
+
end
|
58
|
+
ensure_unblock_stranded_for(shard_map.map(&:first))
|
59
|
+
# rubocop:enable Style/CombinableLoops
|
60
|
+
end
|
61
|
+
|
62
|
+
# if :migrate_strands ran on any shards that fell into scenario 1, then
|
63
|
+
# block_stranded never got flipped, so do that now.
|
64
|
+
def ensure_unblock_stranded_for(shards)
|
65
|
+
shards = ::Switchman::Shard.where(id: shards, block_stranded: true).to_a
|
66
|
+
return unless shards.any?
|
67
|
+
|
68
|
+
::Switchman::Shard.where(id: shards).update_all(block_stranded: false)
|
69
|
+
clear_shard_cache
|
70
|
+
|
71
|
+
# shards is an array of shard objects that is now stale cause block_stranded has been updated.
|
72
|
+
shards.map(&:delayed_jobs_shard).uniq.each do |dj_shard|
|
73
|
+
unblock_strands(dj_shard)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def clear_shard_cache(debug_message = nil)
|
78
|
+
::Switchman.cache.clear
|
79
|
+
Rails.logger.debug("Waiting for caches to clear #{debug_message}")
|
39
80
|
# Wait a little over the 60 second in-process shard cache clearing
|
40
81
|
# threshold to ensure that all new stranded jobs are now being
|
41
82
|
# enqueued with next_in_strand: false
|
42
|
-
|
83
|
+
# @skip_cache_wait is for spec usage only
|
43
84
|
sleep(65) unless @skip_cache_wait
|
44
|
-
|
45
|
-
# TODO: 4 has been picked completely out of a hat. We should make it configurable or something
|
46
|
-
Parallel.each(source_shards, in_processes: 4) do |s|
|
47
|
-
# Ensure the child processes don't share connections with the parent
|
48
|
-
Delayed::Pool.on_fork.call
|
49
|
-
ActiveRecord::Base.clear_all_connections!
|
50
|
-
s.activate(:delayed_jobs) { run }
|
51
|
-
end
|
52
85
|
end
|
53
86
|
|
54
87
|
# This method expects that all relevant shards already have block_stranded: true
|
@@ -71,7 +104,7 @@ module SwitchmanInstJobs
|
|
71
104
|
# those (= do nothing since it should already be true)
|
72
105
|
|
73
106
|
source_shard = ::Switchman::Shard.current(:delayed_jobs)
|
74
|
-
strand_scope = ::Delayed::Job.shard(source_shard).where(
|
107
|
+
strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
|
75
108
|
shard_map = build_shard_map(strand_scope, source_shard)
|
76
109
|
shard_map.each do |(target_shard, source_shard_ids)|
|
77
110
|
shard_scope = strand_scope.where(shard_id: source_shard_ids)
|
@@ -92,7 +125,7 @@ module SwitchmanInstJobs
|
|
92
125
|
# We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
|
93
126
|
# Since we only unlock it on the new jobs queue *after* deleting from the original
|
94
127
|
# the lock ensures the blocker always gets unlocked
|
95
|
-
first = this_strand_scope.where(
|
128
|
+
first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
|
96
129
|
if first
|
97
130
|
first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
|
98
131
|
first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
|
@@ -106,11 +139,11 @@ module SwitchmanInstJobs
|
|
106
139
|
first_job.save!
|
107
140
|
# the rest of 3) is taken care of here
|
108
141
|
# make sure that all the jobs moved over are NOT next in strand
|
109
|
-
::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil)
|
110
|
-
|
142
|
+
::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
|
143
|
+
update_all(next_in_strand: false)
|
111
144
|
end
|
112
145
|
|
113
|
-
# 4) is taken care of here, by
|
146
|
+
# 4) is taken care of here, by leaving next_in_strand alone and
|
114
147
|
# it should execute on the new shard
|
115
148
|
batch_move_jobs(
|
116
149
|
target_shard: target_shard,
|
@@ -123,27 +156,35 @@ module SwitchmanInstJobs
|
|
123
156
|
end
|
124
157
|
end
|
125
158
|
|
126
|
-
::Switchman::Shard.
|
127
|
-
|
128
|
-
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
|
133
|
-
# for spec usage only
|
134
|
-
sleep(65) unless @skip_cache_wait
|
159
|
+
updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
|
160
|
+
update_all(block_stranded: false)
|
161
|
+
# If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
|
162
|
+
clear_shard_cache("(#{source_shard.id} -> #{target_shard.id})") unless updated.zero?
|
163
|
+
|
164
|
+
::Switchman::Shard.clear_cache
|
135
165
|
# At this time, let's unblock all the strands on the target shard that aren't being held by a blocker
|
136
166
|
# but actually could have run and we just didn't know it because we didn't know if they had jobs
|
137
167
|
# on the source shard
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
168
|
+
unblock_strands(target_shard)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def unblock_strands(target_shard)
|
174
|
+
target_shard.activate(:delayed_jobs) do
|
175
|
+
loop do
|
176
|
+
# We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
|
177
|
+
# to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
|
178
|
+
# this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
|
179
|
+
# logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
|
180
|
+
# batches
|
181
|
+
break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
|
182
|
+
where.not(strand: nil).
|
183
|
+
where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
|
184
|
+
::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
|
185
|
+
where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
|
186
|
+
where('dj2.strand = delayed_jobs.strand').arel.exists.not
|
187
|
+
).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
|
147
188
|
end
|
148
189
|
end
|
149
190
|
end
|
@@ -242,7 +283,7 @@ module SwitchmanInstJobs
|
|
242
283
|
|
243
284
|
connection.execute "COPY #{::Delayed::Job.quoted_table_name} (#{quoted_keys}) FROM STDIN"
|
244
285
|
records.map do |record|
|
245
|
-
connection.raw_connection.put_copy_data(keys.map { |k| quote_text(record[k]) }.join("\t")
|
286
|
+
connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
|
246
287
|
end
|
247
288
|
connection.clear_query_cache
|
248
289
|
connection.raw_connection.put_copy_end
|
@@ -11,7 +11,7 @@ module SwitchmanInstJobs
|
|
11
11
|
# shard's delayed_jobs_shard
|
12
12
|
if shard&.default?
|
13
13
|
# first look for any shard that behaves like a jobs shard
|
14
|
-
dj_shard ||= ::Switchman::Shard.delayed_jobs_shards.
|
14
|
+
dj_shard ||= ::Switchman::Shard.delayed_jobs_shards.find(&:database_server)
|
15
15
|
# we're really truly out of options, use the default shard itself
|
16
16
|
dj_shard ||= shard
|
17
17
|
end
|
@@ -14,7 +14,7 @@ module SwitchmanInstJobs
|
|
14
14
|
shard = ::Switchman::Shard.lookup(delayed_jobs_shard_id)
|
15
15
|
return shard if shard
|
16
16
|
end
|
17
|
-
database_server&.delayed_jobs_shard(self)
|
17
|
+
@delayed_jobs_shard ||= database_server&.delayed_jobs_shard(self)
|
18
18
|
end
|
19
19
|
|
20
20
|
# Adapted from hold/unhold methods in base delayed jobs base
|
@@ -28,9 +28,9 @@ module SwitchmanInstJobs
|
|
28
28
|
return unless wait
|
29
29
|
|
30
30
|
delayed_jobs_shard.activate(:delayed_jobs) do
|
31
|
-
while ::Delayed::Job.where(shard_id: id)
|
32
|
-
|
33
|
-
|
31
|
+
while ::Delayed::Job.where(shard_id: id).
|
32
|
+
where.not(locked_at: nil).
|
33
|
+
where.not(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY).exists?
|
34
34
|
sleep 10
|
35
35
|
lock_jobs_for_hold
|
36
36
|
end
|
@@ -39,11 +39,18 @@ module SwitchmanInstJobs
|
|
39
39
|
|
40
40
|
def unhold_jobs!
|
41
41
|
self.jobs_held = false
|
42
|
-
|
42
|
+
if changed?
|
43
|
+
save!
|
44
|
+
# Wait a little over the 60 second in-process shard cache clearing
|
45
|
+
# threshold to ensure that all new jobs are now being enqueued
|
46
|
+
# unlocked
|
47
|
+
Rails.logger.debug('Waiting for caches to clear')
|
48
|
+
sleep(65)
|
49
|
+
end
|
43
50
|
delayed_jobs_shard.activate(:delayed_jobs) do
|
44
|
-
::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id)
|
45
|
-
|
46
|
-
|
51
|
+
::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id).
|
52
|
+
in_batches(of: 10_000).
|
53
|
+
update_all(
|
47
54
|
locked_by: nil,
|
48
55
|
locked_at: nil,
|
49
56
|
attempts: 0,
|
@@ -111,32 +118,27 @@ module SwitchmanInstJobs
|
|
111
118
|
end
|
112
119
|
|
113
120
|
def delayed_jobs_shards
|
114
|
-
unless
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
db_dj_shards = ::Switchman::DatabaseServer.all.map do |db|
|
131
|
-
next db.shards.to_a if db.config[:delayed_jobs_shard] == 'self'
|
132
|
-
|
133
|
-
db.delayed_jobs_shard
|
134
|
-
end.compact.flatten.uniq # yes, all three
|
135
|
-
|
136
|
-
(db_dj_shards + shard_dj_shards).uniq.sort
|
137
|
-
end
|
121
|
+
return none unless ::Switchman::Shard.columns_hash.key?('delayed_jobs_shard_id')
|
122
|
+
|
123
|
+
scope = ::Switchman::Shard.unscoped.
|
124
|
+
where(id: ::Switchman::Shard.unscoped.distinct.where.not(delayed_jobs_shard_id: nil).
|
125
|
+
select(:delayed_jobs_shard_id))
|
126
|
+
db_jobs_shards = ::Switchman::DatabaseServer.all.map { |db| db.config[:delayed_jobs_shard] }.uniq
|
127
|
+
db_jobs_shards.delete(nil)
|
128
|
+
has_self = db_jobs_shards.delete('self')
|
129
|
+
scope = scope.or(::Switchman::Shard.unscoped.where(id: db_jobs_shards)) unless db_jobs_shards.empty?
|
130
|
+
|
131
|
+
if has_self
|
132
|
+
self_dbs = ::Switchman::DatabaseServer.all.
|
133
|
+
select { |db| db.config[:delayed_jobs_shard] == 'self' }.map(&:id)
|
134
|
+
scope = scope.or(::Switchman::Shard.unscoped.
|
135
|
+
where(id: ::Switchman::Shard.unscoped.where(delayed_jobs_shard_id: nil, database_server_id: self_dbs).
|
136
|
+
select(:id)))
|
138
137
|
end
|
139
|
-
@
|
138
|
+
@jobs_scope_empty = !scope.exists? unless instance_variable_defined?(:@jobs_scope_empty)
|
139
|
+
return [::Switchman::Shard.default] if @jobs_scope_empty
|
140
|
+
|
141
|
+
::Switchman::Shard.merge(scope)
|
140
142
|
end
|
141
143
|
end
|
142
144
|
end
|
metadata
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: switchman-inst-jobs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bryan Petty
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: inst-jobs
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "~>"
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '1.0'
|
20
17
|
- - ">="
|
21
18
|
- !ruby/object:Gem::Version
|
22
19
|
version: 1.0.3
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '3.0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
25
|
version_requirements: !ruby/object:Gem::Requirement
|
26
26
|
requirements:
|
27
|
-
- - "~>"
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: '1.0'
|
30
27
|
- - ">="
|
31
28
|
- !ruby/object:Gem::Version
|
32
29
|
version: 1.0.3
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '3.0'
|
33
33
|
- !ruby/object:Gem::Dependency
|
34
34
|
name: parallel
|
35
35
|
requirement: !ruby/object:Gem::Requirement
|
@@ -210,28 +210,28 @@ dependencies:
|
|
210
210
|
requirements:
|
211
211
|
- - "~>"
|
212
212
|
- !ruby/object:Gem::Version
|
213
|
-
version:
|
213
|
+
version: 1.3.1
|
214
214
|
type: :development
|
215
215
|
prerelease: false
|
216
216
|
version_requirements: !ruby/object:Gem::Requirement
|
217
217
|
requirements:
|
218
218
|
- - "~>"
|
219
219
|
- !ruby/object:Gem::Version
|
220
|
-
version:
|
220
|
+
version: 1.3.1
|
221
221
|
- !ruby/object:Gem::Dependency
|
222
222
|
name: rubocop-rails
|
223
223
|
requirement: !ruby/object:Gem::Requirement
|
224
224
|
requirements:
|
225
225
|
- - "~>"
|
226
226
|
- !ruby/object:Gem::Version
|
227
|
-
version: 2.
|
227
|
+
version: 2.8.1
|
228
228
|
type: :development
|
229
229
|
prerelease: false
|
230
230
|
version_requirements: !ruby/object:Gem::Requirement
|
231
231
|
requirements:
|
232
232
|
- - "~>"
|
233
233
|
- !ruby/object:Gem::Version
|
234
|
-
version: 2.
|
234
|
+
version: 2.8.1
|
235
235
|
- !ruby/object:Gem::Dependency
|
236
236
|
name: simplecov
|
237
237
|
requirement: !ruby/object:Gem::Requirement
|