switchman-inst-jobs 3.0.2 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 711bf6fd7dfbea7baa7bf9f663264fdcfc37c3aa137d7b01f4c64985274dd045
4
- data.tar.gz: 24955492cd97b95fbf3a8ab38c989fc40e0355751ddb43474c3c77b4839cdc43
3
+ metadata.gz: 2a489d2a35d787be5492b4a5d710f1cc6560ad5c3d9f9ad965e0860338562d62
4
+ data.tar.gz: 88d1ff219ce44f9cb50846c631261ed792f4ce78a8ebde00ea5b08b3a172bf2c
5
5
  SHA512:
6
- metadata.gz: 8e498902747984003acc07838e7cc596cd06c1bc0e9d9f88d24fb01c2eeef8d952e47f1da8913b4bea759cb19dde0e5ee0b0cb1651cb2c07e9708a34cef4ecaf
7
- data.tar.gz: 547333f01e1ec2aad62221c76702c33ce07b67009f2b584e0187d152ded1c468637514c3e401c83389898a24386c6c72d74ffb1c7b2e7e5e6a3b50591a3ebfa6
6
+ metadata.gz: d0c0574bcb3b5e006072f67fcd6c6a4d892e8db919da73cf4edf4d8ac790c1481fd9777a6b6e6950934951efb647445ff756f59ee9ca663a5f5303588fa5a52b
7
+ data.tar.gz: b2ffe5cc363ab95d407d21a6c76e486dc9e40dc18d52ceabff9639e6408a33f392c33260a0d243720832e9c760ea8f9c55a941b6b0c31ba8fdae1328308bc5ee
@@ -29,7 +29,7 @@ class OptimizeDelayedJobs < ActiveRecord::Migration[4.2]
29
29
  add_index :delayed_jobs, %w[strand id], name: 'index_delayed_jobs_on_strand'
30
30
 
31
31
  # move all failed jobs to the new failed table
32
- Delayed::Backend::ActiveRecord::Job.where('failed_at IS NOT NULL').find_each do |job|
32
+ Delayed::Backend::ActiveRecord::Job.where.not(failed_at: nil).find_each do |job|
33
33
  job.fail! unless job.on_hold?
34
34
  end
35
35
  end
@@ -43,8 +43,8 @@ module SwitchmanInstJobs
43
43
  end
44
44
 
45
45
  def configured_shard_ids
46
- (::Delayed::Settings.worker_config.try(:[], 'workers') || [])
47
- .map { |w| w['shard'] }.compact.uniq
46
+ (::Delayed::Settings.worker_config.try(:[], 'workers') || []).
47
+ map { |w| w['shard'] }.compact.uniq
48
48
  end
49
49
 
50
50
  def processes_locked_locally
@@ -93,7 +93,7 @@ module SwitchmanInstJobs
93
93
  # likely a missing shard with a stale cache
94
94
  current_shard.send(:clear_cache)
95
95
  ::Switchman::Shard.clear_cache
96
- raise ShardNotFoundError, shard_id unless ::Switchman::Shard.where(id: shard_id).exists?
96
+ raise ShardNotFoundError, shard_id unless ::Switchman::Shard.exists?(id: shard_id)
97
97
 
98
98
  raise
99
99
  end
@@ -17,7 +17,7 @@ module SwitchmanInstJobs
17
17
  # We purposely don't .compact to remove nils here, since if any
18
18
  # workers are on the default jobs shard we want to unlock against
19
19
  # that shard too.
20
- shard_ids = @config[:workers].map { |c| c[:shard] }.uniq
20
+ shard_ids = @config[:workers].pluck(:shard).uniq
21
21
  shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
22
22
  end
23
23
  ::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
@@ -19,7 +19,7 @@ module SwitchmanInstJobs
19
19
  end
20
20
 
21
21
  def reschedule_abandoned_jobs(call_super: false)
22
- shards = ::Switchman::Shard.delayed_jobs_shards
22
+ shards = ::Switchman::Shard.delayed_jobs_shards.to_a
23
23
  call_super = shards.first if shards.length == 1
24
24
  unless call_super == false
25
25
  call_super.activate(:delayed_jobs) do
@@ -29,26 +29,59 @@ module SwitchmanInstJobs
29
29
 
30
30
  def migrate_shards(shard_map)
31
31
  source_shards = Set[]
32
+ target_shards = Hash.new([])
32
33
  shard_map.each do |(shard, target_shard)|
33
34
  shard = ::Switchman::Shard.find(shard) unless shard.is_a?(::Switchman::Shard)
34
35
  source_shards << shard.delayed_jobs_shard.id
35
- # If target_shard is an int, it won't have an id, but we can just use it as is
36
- shard.update(delayed_jobs_shard_id: target_shard.try(:id) || target_shard, block_stranded: true)
36
+ target_shard = target_shard.try(:id) || target_shard
37
+ target_shards[target_shard] += [shard.id]
37
38
  end
38
39
 
40
+ # Do the updates in batches and then just clear redis instead of clearing them one at a time
41
+ target_shards.each do |target_shard, shards|
42
+ updates = { delayed_jobs_shard_id: target_shard, block_stranded: true }
43
+ updates[:updated_at] = Time.zone.now if ::Switchman::Shard.column_names.include?('updated_at')
44
+ ::Switchman::Shard.where(id: shards).update_all(updates)
45
+ end
46
+ clear_shard_cache
47
+
48
+ ::Switchman::Shard.clear_cache
49
+ # rubocop:disable Style/CombinableLoops
50
+ # We first migrate strands so that we can stop blocking strands before we migrate unstranded jobs
51
+ source_shards.each do |s|
52
+ ::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_strands }
53
+ end
54
+
55
+ source_shards.each do |s|
56
+ ::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_everything }
57
+ end
58
+ ensure_unblock_stranded_for(shard_map.map(&:first))
59
+ # rubocop:enable Style/CombinableLoops
60
+ end
61
+
62
+ # if :migrate_strands ran on any shards that fell into scenario 1, then
63
+ # block_stranded never got flipped, so do that now.
64
+ def ensure_unblock_stranded_for(shards)
65
+ shards = ::Switchman::Shard.where(id: shards, block_stranded: true).to_a
66
+ return unless shards.any?
67
+
68
+ ::Switchman::Shard.where(id: shards).update_all(block_stranded: false)
69
+ clear_shard_cache
70
+
71
+ # shards is an array of shard objects that is now stale cause block_stranded has been updated.
72
+ shards.map(&:delayed_jobs_shard).uniq.each do |dj_shard|
73
+ unblock_strands(dj_shard)
74
+ end
75
+ end
76
+
77
+ def clear_shard_cache(debug_message = nil)
78
+ ::Switchman.cache.clear
79
+ Rails.logger.debug("Waiting for caches to clear #{debug_message}")
39
80
  # Wait a little over the 60 second in-process shard cache clearing
40
81
  # threshold to ensure that all new stranded jobs are now being
41
82
  # enqueued with next_in_strand: false
42
- Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
83
+ # @skip_cache_wait is for spec usage only
43
84
  sleep(65) unless @skip_cache_wait
44
-
45
- # TODO: 4 has been picked completely out of a hat. We should make it configurable or something
46
- Parallel.each(source_shards, in_processes: 4) do |s|
47
- # Ensure the child processes don't share connections with the parent
48
- Delayed::Pool.on_fork.call
49
- ActiveRecord::Base.clear_all_connections!
50
- s.activate(:delayed_jobs) { run }
51
- end
52
85
  end
53
86
 
54
87
  # This method expects that all relevant shards already have block_stranded: true
@@ -71,7 +104,7 @@ module SwitchmanInstJobs
71
104
  # those (= do nothing since it should already be true)
72
105
 
73
106
  source_shard = ::Switchman::Shard.current(:delayed_jobs)
74
- strand_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL')
107
+ strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
75
108
  shard_map = build_shard_map(strand_scope, source_shard)
76
109
  shard_map.each do |(target_shard, source_shard_ids)|
77
110
  shard_scope = strand_scope.where(shard_id: source_shard_ids)
@@ -92,7 +125,7 @@ module SwitchmanInstJobs
92
125
  # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
93
126
  # Since we only unlock it on the new jobs queue *after* deleting from the original
94
127
  # the lock ensures the blocker always gets unlocked
95
- first = this_strand_scope.where('locked_by IS NOT NULL').next_in_strand_order.lock.first
128
+ first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
96
129
  if first
97
130
  first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
98
131
  first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
@@ -106,11 +139,11 @@ module SwitchmanInstJobs
106
139
  first_job.save!
107
140
  # the rest of 3) is taken care of here
108
141
  # make sure that all the jobs moved over are NOT next in strand
109
- ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil)
110
- .update_all(next_in_strand: false)
142
+ ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
143
+ update_all(next_in_strand: false)
111
144
  end
112
145
 
113
- # 4) is taken care of here, by leaveing next_in_strand alone and
146
+ # 4) is taken care of here, by leaving next_in_strand alone and
114
147
  # it should execute on the new shard
115
148
  batch_move_jobs(
116
149
  target_shard: target_shard,
@@ -123,27 +156,35 @@ module SwitchmanInstJobs
123
156
  end
124
157
  end
125
158
 
126
- ::Switchman::Shard.find(source_shard_ids).each do |shard|
127
- shard.update(block_stranded: false)
128
- end
129
- # Wait a little over the 60 second in-process shard cache clearing
130
- # threshold to ensure that all new stranded jobs are now being
131
- # enqueued with next_in_strand: false
132
- Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
133
- # for spec usage only
134
- sleep(65) unless @skip_cache_wait
159
+ updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
160
+ update_all(block_stranded: false)
161
+ # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
162
+ clear_shard_cache("(#{source_shard.id} -> #{target_shard.id})") unless updated.zero?
163
+
164
+ ::Switchman::Shard.clear_cache
135
165
  # At this time, let's unblock all the strands on the target shard that aren't being held by a blocker
136
166
  # but actually could have run and we just didn't know it because we didn't know if they had jobs
137
167
  # on the source shard
138
- # rubocop:disable Layout/LineLength
139
- strands_to_unblock = shard_scope.where.not(source: 'JobsMigrator::StrandBlocker')
140
- .distinct
141
- .where("NOT EXISTS (SELECT 1 FROM #{::Delayed::Job.quoted_table_name} dj2 WHERE delayed_jobs.strand=dj2.strand AND next_in_strand)")
142
- .pluck(:strand)
143
- # rubocop:enable Layout/LineLength
144
- strands_to_unblock.each do |strand|
145
- Delayed::Job.where(strand: strand).next_in_strand_order.first.update_attribute(:next_in_strand, true)
146
- end
168
+ unblock_strands(target_shard)
169
+ end
170
+ end
171
+ end
172
+
173
+ def unblock_strands(target_shard)
174
+ target_shard.activate(:delayed_jobs) do
175
+ loop do
176
+ # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
177
+ # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
178
+ # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
179
+ # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
180
+ # batches
181
+ break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
182
+ where.not(strand: nil).
183
+ where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
184
+ ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
185
+ where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
186
+ where('dj2.strand = delayed_jobs.strand').arel.exists.not
187
+ ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
147
188
  end
148
189
  end
149
190
  end
@@ -242,7 +283,7 @@ module SwitchmanInstJobs
242
283
 
243
284
  connection.execute "COPY #{::Delayed::Job.quoted_table_name} (#{quoted_keys}) FROM STDIN"
244
285
  records.map do |record|
245
- connection.raw_connection.put_copy_data(keys.map { |k| quote_text(record[k]) }.join("\t") + "\n")
286
+ connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
246
287
  end
247
288
  connection.clear_query_cache
248
289
  connection.raw_connection.put_copy_end
@@ -11,7 +11,7 @@ module SwitchmanInstJobs
11
11
  # shard's delayed_jobs_shard
12
12
  if shard&.default?
13
13
  # first look for any shard that behaves like a jobs shard
14
- dj_shard ||= ::Switchman::Shard.delayed_jobs_shards.first
14
+ dj_shard ||= ::Switchman::Shard.delayed_jobs_shards.find(&:database_server)
15
15
  # we're really truly out of options, use the default shard itself
16
16
  dj_shard ||= shard
17
17
  end
@@ -14,7 +14,7 @@ module SwitchmanInstJobs
14
14
  shard = ::Switchman::Shard.lookup(delayed_jobs_shard_id)
15
15
  return shard if shard
16
16
  end
17
- database_server&.delayed_jobs_shard(self)
17
+ @delayed_jobs_shard ||= database_server&.delayed_jobs_shard(self)
18
18
  end
19
19
 
20
20
  # Adapted from hold/unhold methods in base delayed jobs base
@@ -28,9 +28,9 @@ module SwitchmanInstJobs
28
28
  return unless wait
29
29
 
30
30
  delayed_jobs_shard.activate(:delayed_jobs) do
31
- while ::Delayed::Job.where(shard_id: id)
32
- .where.not(locked_at: nil)
33
- .where.not(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY).exists?
31
+ while ::Delayed::Job.where(shard_id: id).
32
+ where.not(locked_at: nil).
33
+ where.not(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY).exists?
34
34
  sleep 10
35
35
  lock_jobs_for_hold
36
36
  end
@@ -39,11 +39,18 @@ module SwitchmanInstJobs
39
39
 
40
40
  def unhold_jobs!
41
41
  self.jobs_held = false
42
- save! if changed?
42
+ if changed?
43
+ save!
44
+ # Wait a little over the 60 second in-process shard cache clearing
45
+ # threshold to ensure that all new jobs are now being enqueued
46
+ # unlocked
47
+ Rails.logger.debug('Waiting for caches to clear')
48
+ sleep(65)
49
+ end
43
50
  delayed_jobs_shard.activate(:delayed_jobs) do
44
- ::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id)
45
- .in_batches(of: 10_000)
46
- .update_all(
51
+ ::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id).
52
+ in_batches(of: 10_000).
53
+ update_all(
47
54
  locked_by: nil,
48
55
  locked_at: nil,
49
56
  attempts: 0,
@@ -111,32 +118,27 @@ module SwitchmanInstJobs
111
118
  end
112
119
 
113
120
  def delayed_jobs_shards
114
- unless instance_variable_defined?(:@delayed_jobs_shards)
115
- # re-entrancy protection
116
- @delayed_jobs_shards = begin
117
- shard_dj_shards = [] unless ::Switchman::Shard.columns_hash.key?('delayed_jobs_shard_id')
118
- shard_dj_shards ||= begin
119
- ::Switchman::Shard
120
- .where.not(delayed_jobs_shard_id: nil)
121
- .distinct
122
- .pluck(:delayed_jobs_shard_id)
123
- .map { |id| ::Switchman::Shard.lookup(id) }
124
- .compact
125
- end
126
- # set it temporarily, to avoid the default shard falling back to itself
127
- # if other shards are usable
128
- @delayed_jobs_shards = shard_dj_shards.uniq.sort
129
-
130
- db_dj_shards = ::Switchman::DatabaseServer.all.map do |db|
131
- next db.shards.to_a if db.config[:delayed_jobs_shard] == 'self'
132
-
133
- db.delayed_jobs_shard
134
- end.compact.flatten.uniq # yes, all three
135
-
136
- (db_dj_shards + shard_dj_shards).uniq.sort
137
- end
121
+ return none unless ::Switchman::Shard.columns_hash.key?('delayed_jobs_shard_id')
122
+
123
+ scope = ::Switchman::Shard.unscoped.
124
+ where(id: ::Switchman::Shard.unscoped.distinct.where.not(delayed_jobs_shard_id: nil).
125
+ select(:delayed_jobs_shard_id))
126
+ db_jobs_shards = ::Switchman::DatabaseServer.all.map { |db| db.config[:delayed_jobs_shard] }.uniq
127
+ db_jobs_shards.delete(nil)
128
+ has_self = db_jobs_shards.delete('self')
129
+ scope = scope.or(::Switchman::Shard.unscoped.where(id: db_jobs_shards)) unless db_jobs_shards.empty?
130
+
131
+ if has_self
132
+ self_dbs = ::Switchman::DatabaseServer.all.
133
+ select { |db| db.config[:delayed_jobs_shard] == 'self' }.map(&:id)
134
+ scope = scope.or(::Switchman::Shard.unscoped.
135
+ where(id: ::Switchman::Shard.unscoped.where(delayed_jobs_shard_id: nil, database_server_id: self_dbs).
136
+ select(:id)))
138
137
  end
139
- @delayed_jobs_shards
138
+ @jobs_scope_empty = !scope.exists? unless instance_variable_defined?(:@jobs_scope_empty)
139
+ return [::Switchman::Shard.default] if @jobs_scope_empty
140
+
141
+ ::Switchman::Shard.merge(scope)
140
142
  end
141
143
  end
142
144
  end
@@ -6,7 +6,7 @@ module SwitchmanInstJobs
6
6
  @cached_at = Time.zone.now
7
7
  end
8
8
 
9
- def clear(force = false)
9
+ def clear(force: false)
10
10
  if force || @cached_at < @timeout.call
11
11
  @block.call
12
12
  @cached_at = Time.zone.now
@@ -1,3 +1,3 @@
1
1
  module SwitchmanInstJobs
2
- VERSION = '3.0.2'.freeze
2
+ VERSION = '3.1.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,35 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: switchman-inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Petty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-10-30 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inst-jobs
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
19
  version: 1.0.3
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.0'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
29
  version: 1.0.3
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: parallel
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -210,28 +210,28 @@ dependencies:
210
210
  requirements:
211
211
  - - "~>"
212
212
  - !ruby/object:Gem::Version
213
- version: 0.79.0
213
+ version: 1.3.1
214
214
  type: :development
215
215
  prerelease: false
216
216
  version_requirements: !ruby/object:Gem::Requirement
217
217
  requirements:
218
218
  - - "~>"
219
219
  - !ruby/object:Gem::Version
220
- version: 0.79.0
220
+ version: 1.3.1
221
221
  - !ruby/object:Gem::Dependency
222
222
  name: rubocop-rails
223
223
  requirement: !ruby/object:Gem::Requirement
224
224
  requirements:
225
225
  - - "~>"
226
226
  - !ruby/object:Gem::Version
227
- version: 2.4.2
227
+ version: 2.8.1
228
228
  type: :development
229
229
  prerelease: false
230
230
  version_requirements: !ruby/object:Gem::Requirement
231
231
  requirements:
232
232
  - - "~>"
233
233
  - !ruby/object:Gem::Version
234
- version: 2.4.2
234
+ version: 2.8.1
235
235
  - !ruby/object:Gem::Dependency
236
236
  name: simplecov
237
237
  requirement: !ruby/object:Gem::Requirement