switchman-inst-jobs 3.0.3 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08102eee5c1da6d032170565b624d61f90fcbc1c6232e1214babda846c9674c9'
4
- data.tar.gz: 5f9c75d442f40b706dff2a45bfab8dc99ff8599470c096bbea594f1a763d7640
3
+ metadata.gz: 4efbe05e879c9162df06f945af688c64876b9c56cc870509687160549359fd39
4
+ data.tar.gz: e32c8a6dfecd0b6c760d07a4d45eeaf6651d6cc7e70e7b6418d82c5a112f70cd
5
5
  SHA512:
6
- metadata.gz: 9118a73e2a47f1e6083737f967cc298674b8884c8cc23c9d16efff9e087951d63e54129f51a1c5595f2ec4cadb4332208a2a0262e7992b5777c39ac08ae8efa1
7
- data.tar.gz: 791c382a9fbb141dc0e2f763b936d953bbd575ac01fc869a652628992c93b47c3cf93b590d95bd7867698c9410cbb278932ed1404e1e6d9f582cf00825cda8b0
6
+ metadata.gz: 4fe0103d1b4bc305f0c85ba2135366968eb7c33bb6482b6d302dde9b9532db1d3d09fac64d90dad16fe6c6a438236ede5732d16a22d8538053d814010c9d2c87
7
+ data.tar.gz: feb2e546ee06bc38f964f58e63cfb59fdc25757952762ef8f8faf0521050d264e9f018ea8e6bb0131b277997d63035f68562cf45a7af13886fbfd80f481f5515
@@ -29,7 +29,7 @@ class OptimizeDelayedJobs < ActiveRecord::Migration[4.2]
29
29
  add_index :delayed_jobs, %w[strand id], name: 'index_delayed_jobs_on_strand'
30
30
 
31
31
  # move all failed jobs to the new failed table
32
- Delayed::Backend::ActiveRecord::Job.where('failed_at IS NOT NULL').find_each do |job|
32
+ Delayed::Backend::ActiveRecord::Job.where.not(failed_at: nil).find_each do |job|
33
33
  job.fail! unless job.on_hold?
34
34
  end
35
35
  end
@@ -38,6 +38,7 @@ end
38
38
 
39
39
  require 'switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter'
40
40
  require 'switchman_inst_jobs/active_record/migration'
41
+ require 'switchman_inst_jobs/delayed/settings'
41
42
  require 'switchman_inst_jobs/delayed/backend/base'
42
43
  require 'switchman_inst_jobs/delayed/message_sending'
43
44
  require 'switchman_inst_jobs/delayed/pool'
@@ -43,8 +43,7 @@ module SwitchmanInstJobs
43
43
  end
44
44
 
45
45
  def configured_shard_ids
46
- (::Delayed::Settings.worker_config.try(:[], 'workers') || []).
47
- map { |w| w['shard'] }.compact.uniq
46
+ ::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
48
47
  end
49
48
 
50
49
  def processes_locked_locally
@@ -93,7 +92,7 @@ module SwitchmanInstJobs
93
92
  # likely a missing shard with a stale cache
94
93
  current_shard.send(:clear_cache)
95
94
  ::Switchman::Shard.clear_cache
96
- raise ShardNotFoundError, shard_id unless ::Switchman::Shard.where(id: shard_id).exists?
95
+ raise ShardNotFoundError, shard_id unless ::Switchman::Shard.exists?(id: shard_id)
97
96
 
98
97
  raise
99
98
  end
@@ -17,7 +17,7 @@ module SwitchmanInstJobs
17
17
  # We purposely don't .compact to remove nils here, since if any
18
18
  # workers are on the default jobs shard we want to unlock against
19
19
  # that shard too.
20
- shard_ids = @config[:workers].map { |c| c[:shard] }.uniq
20
+ shard_ids = @config[:workers].pluck(:shard).uniq
21
21
  shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
22
22
  end
23
23
  ::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
@@ -0,0 +1,9 @@
1
+ module SwitchmanInstJobs
2
+ module Delayed
3
+ module Settings
4
+ def self.configured_shard_ids
5
+ (::Delayed::Settings.worker_config.try(:[], 'workers') || []).map { |w| w['shard'] }.compact.uniq
6
+ end
7
+ end
8
+ end
9
+ end
@@ -18,22 +18,23 @@ module SwitchmanInstJobs
18
18
  ::Delayed::Settings.worker_health_check_config['service_name'] = original_service_name
19
19
  end
20
20
 
21
- def reschedule_abandoned_jobs(call_super: false)
22
- shards = ::Switchman::Shard.delayed_jobs_shards.to_a
23
- call_super = shards.first if shards.length == 1
24
- unless call_super == false
25
- call_super.activate(:delayed_jobs) do
26
- return munge_service_name(call_super) { super() }
21
+ def reschedule_abandoned_jobs
22
+ shard_ids = ::SwitchmanInstJobs::Delayed::Settings.configured_shard_ids
23
+ shards = shard_ids.map { |shard_id| ::Delayed::Worker.shard(shard_id) }
24
+ ::Switchman::Shard.with_each_shard(shards, [:delayed_jobs]) do
25
+ dj_shard = ::Switchman::Shard.current(:delayed_jobs)
26
+ dj_shard.activate do
27
+ munge_service_name(dj_shard) do
28
+ # because this rescheduling process is running on every host, we need
29
+ # to make sure that it's functioning for each shard the current
30
+ # host is programmed to interact with, but ONLY for those shards.
31
+ # reading the config lets us iterate over any shards this host should
32
+ # work with and lets us pick the correct service name to identify which
33
+ # hosts are currently alive and valid via the health checks
34
+ super()
35
+ end
27
36
  end
28
37
  end
29
-
30
- ::Switchman::Shard.with_each_shard(shards, [:delayed_jobs], exception: :ignore) do
31
- shard = ::Switchman::Shard.current(:delayed_jobs)
32
- singleton = <<~SINGLETON
33
- periodic: Delayed::Worker::HealthCheck.reschedule_abandoned_jobs:#{shard.id}
34
- SINGLETON
35
- delay(singleton: singleton).reschedule_abandoned_jobs(call_super: shard)
36
- end
37
38
  end
38
39
  end
39
40
  end
@@ -29,26 +29,59 @@ module SwitchmanInstJobs
29
29
 
30
30
  def migrate_shards(shard_map)
31
31
  source_shards = Set[]
32
+ target_shards = Hash.new([])
32
33
  shard_map.each do |(shard, target_shard)|
33
34
  shard = ::Switchman::Shard.find(shard) unless shard.is_a?(::Switchman::Shard)
34
35
  source_shards << shard.delayed_jobs_shard.id
35
- # If target_shard is an int, it won't have an id, but we can just use it as is
36
- shard.update(delayed_jobs_shard_id: target_shard.try(:id) || target_shard, block_stranded: true)
36
+ target_shard = target_shard.try(:id) || target_shard
37
+ target_shards[target_shard] += [shard.id]
37
38
  end
38
39
 
40
+ # Do the updates in batches and then just clear redis instead of clearing them one at a time
41
+ target_shards.each do |target_shard, shards|
42
+ updates = { delayed_jobs_shard_id: target_shard, block_stranded: true }
43
+ updates[:updated_at] = Time.zone.now if ::Switchman::Shard.column_names.include?('updated_at')
44
+ ::Switchman::Shard.where(id: shards).update_all(updates)
45
+ end
46
+ clear_shard_cache
47
+
48
+ ::Switchman::Shard.clear_cache
49
+ # rubocop:disable Style/CombinableLoops
50
+ # We first migrate strands so that we can stop blocking strands before we migrate unstranded jobs
51
+ source_shards.each do |s|
52
+ ::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_strands }
53
+ end
54
+
55
+ source_shards.each do |s|
56
+ ::Switchman::Shard.lookup(s).activate(:delayed_jobs) { migrate_everything }
57
+ end
58
+ ensure_unblock_stranded_for(shard_map.map(&:first))
59
+ # rubocop:enable Style/CombinableLoops
60
+ end
61
+
62
+ # if :migrate_strands ran on any shards that fell into scenario 1, then
63
+ # block_stranded never got flipped, so do that now.
64
+ def ensure_unblock_stranded_for(shards)
65
+ shards = ::Switchman::Shard.where(id: shards, block_stranded: true).to_a
66
+ return unless shards.any?
67
+
68
+ ::Switchman::Shard.where(id: shards).update_all(block_stranded: false)
69
+ clear_shard_cache
70
+
71
+ # shards is an array of shard objects that is now stale cause block_stranded has been updated.
72
+ shards.map(&:delayed_jobs_shard).uniq.each do |dj_shard|
73
+ unblock_strands(dj_shard)
74
+ end
75
+ end
76
+
77
+ def clear_shard_cache(debug_message = nil)
78
+ ::Switchman.cache.clear
79
+ Rails.logger.debug("Waiting for caches to clear #{debug_message}")
39
80
  # Wait a little over the 60 second in-process shard cache clearing
40
81
  # threshold to ensure that all new stranded jobs are now being
41
82
  # enqueued with next_in_strand: false
42
- Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
83
+ # @skip_cache_wait is for spec usage only
43
84
  sleep(65) unless @skip_cache_wait
44
-
45
- # TODO: 4 has been picked completely out of a hat. We should make it configurable or something
46
- Parallel.each(source_shards, in_processes: 4) do |s|
47
- # Ensure the child processes don't share connections with the parent
48
- Delayed::Pool.on_fork.call
49
- ActiveRecord::Base.clear_all_connections!
50
- s.activate(:delayed_jobs) { run }
51
- end
52
85
  end
53
86
 
54
87
  # This method expects that all relevant shards already have block_stranded: true
@@ -71,7 +104,7 @@ module SwitchmanInstJobs
71
104
  # those (= do nothing since it should already be true)
72
105
 
73
106
  source_shard = ::Switchman::Shard.current(:delayed_jobs)
74
- strand_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL')
107
+ strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
75
108
  shard_map = build_shard_map(strand_scope, source_shard)
76
109
  shard_map.each do |(target_shard, source_shard_ids)|
77
110
  shard_scope = strand_scope.where(shard_id: source_shard_ids)
@@ -92,7 +125,7 @@ module SwitchmanInstJobs
92
125
  # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
93
126
  # Since we only unlock it on the new jobs queue *after* deleting from the original
94
127
  # the lock ensures the blocker always gets unlocked
95
- first = this_strand_scope.where('locked_by IS NOT NULL').next_in_strand_order.lock.first
128
+ first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
96
129
  if first
97
130
  first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
98
131
  first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
@@ -110,7 +143,7 @@ module SwitchmanInstJobs
110
143
  update_all(next_in_strand: false)
111
144
  end
112
145
 
113
- # 4) is taken care of here, by leaveing next_in_strand alone and
146
+ # 4) is taken care of here, by leaving next_in_strand alone and
114
147
  # it should execute on the new shard
115
148
  batch_move_jobs(
116
149
  target_shard: target_shard,
@@ -123,27 +156,35 @@ module SwitchmanInstJobs
123
156
  end
124
157
  end
125
158
 
126
- ::Switchman::Shard.find(source_shard_ids).each do |shard|
127
- shard.update(block_stranded: false)
128
- end
129
- # Wait a little over the 60 second in-process shard cache clearing
130
- # threshold to ensure that all new stranded jobs are now being
131
- # enqueued with next_in_strand: false
132
- Rails.logger.debug("Waiting for caches to clear (#{source_shard.id} -> #{target_shard.id})")
133
- # for spec usage only
134
- sleep(65) unless @skip_cache_wait
159
+ updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
160
+ update_all(block_stranded: false)
161
+ # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
162
+ clear_shard_cache("(#{source_shard.id} -> #{target_shard.id})") unless updated.zero?
163
+
164
+ ::Switchman::Shard.clear_cache
135
165
  # At this time, let's unblock all the strands on the target shard that aren't being held by a blocker
136
166
  # but actually could have run and we just didn't know it because we didn't know if they had jobs
137
167
  # on the source shard
138
- # rubocop:disable Layout/LineLength
139
- strands_to_unblock = shard_scope.where.not(source: 'JobsMigrator::StrandBlocker').
140
- distinct.
141
- where("NOT EXISTS (SELECT 1 FROM #{::Delayed::Job.quoted_table_name} dj2 WHERE delayed_jobs.strand=dj2.strand AND next_in_strand)").
142
- pluck(:strand)
143
- # rubocop:enable Layout/LineLength
144
- strands_to_unblock.each do |strand|
145
- Delayed::Job.where(strand: strand).next_in_strand_order.first.update_attribute(:next_in_strand, true)
146
- end
168
+ unblock_strands(target_shard)
169
+ end
170
+ end
171
+ end
172
+
173
+ def unblock_strands(target_shard)
174
+ target_shard.activate(:delayed_jobs) do
175
+ loop do
176
+ # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
177
+ # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
178
+ # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
179
+ # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
180
+ # batches
181
+ break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
182
+ where.not(strand: nil).
183
+ where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
184
+ ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
185
+ where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
186
+ where('dj2.strand = delayed_jobs.strand').arel.exists.not
187
+ ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
147
188
  end
148
189
  end
149
190
  end
@@ -242,7 +283,7 @@ module SwitchmanInstJobs
242
283
 
243
284
  connection.execute "COPY #{::Delayed::Job.quoted_table_name} (#{quoted_keys}) FROM STDIN"
244
285
  records.map do |record|
245
- connection.raw_connection.put_copy_data(keys.map { |k| quote_text(record[k]) }.join("\t") + "\n")
286
+ connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
246
287
  end
247
288
  connection.clear_query_cache
248
289
  connection.raw_connection.put_copy_end
@@ -14,7 +14,7 @@ module SwitchmanInstJobs
14
14
  shard = ::Switchman::Shard.lookup(delayed_jobs_shard_id)
15
15
  return shard if shard
16
16
  end
17
- database_server&.delayed_jobs_shard(self)
17
+ @delayed_jobs_shard ||= database_server&.delayed_jobs_shard(self)
18
18
  end
19
19
 
20
20
  # Adapted from hold/unhold methods in base delayed jobs base
@@ -39,7 +39,14 @@ module SwitchmanInstJobs
39
39
 
40
40
  def unhold_jobs!
41
41
  self.jobs_held = false
42
- save! if changed?
42
+ if changed?
43
+ save!
44
+ # Wait a little over the 60 second in-process shard cache clearing
45
+ # threshold to ensure that all new jobs are now being enqueued
46
+ # unlocked
47
+ Rails.logger.debug('Waiting for caches to clear')
48
+ sleep(65)
49
+ end
43
50
  delayed_jobs_shard.activate(:delayed_jobs) do
44
51
  ::Delayed::Job.where(locked_by: ::Delayed::Backend::Base::ON_HOLD_LOCKED_BY, shard_id: id).
45
52
  in_batches(of: 10_000).
@@ -6,7 +6,7 @@ module SwitchmanInstJobs
6
6
  @cached_at = Time.zone.now
7
7
  end
8
8
 
9
- def clear(force = false)
9
+ def clear(force: false)
10
10
  if force || @cached_at < @timeout.call
11
11
  @block.call
12
12
  @cached_at = Time.zone.now
@@ -1,3 +1,3 @@
1
1
  module SwitchmanInstJobs
2
- VERSION = '3.0.3'.freeze
2
+ VERSION = '3.1.2'.freeze
3
3
  end
metadata CHANGED
@@ -1,35 +1,35 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: switchman-inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.3
4
+ version: 3.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Petty
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-11 00:00:00.000000000 Z
11
+ date: 2021-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inst-jobs
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '1.0'
20
17
  - - ">="
21
18
  - !ruby/object:Gem::Version
22
19
  version: 1.0.3
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '3.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - "~>"
28
- - !ruby/object:Gem::Version
29
- version: '1.0'
30
27
  - - ">="
31
28
  - !ruby/object:Gem::Version
32
29
  version: 1.0.3
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '3.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: parallel
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -210,28 +210,28 @@ dependencies:
210
210
  requirements:
211
211
  - - "~>"
212
212
  - !ruby/object:Gem::Version
213
- version: 0.79.0
213
+ version: 1.3.1
214
214
  type: :development
215
215
  prerelease: false
216
216
  version_requirements: !ruby/object:Gem::Requirement
217
217
  requirements:
218
218
  - - "~>"
219
219
  - !ruby/object:Gem::Version
220
- version: 0.79.0
220
+ version: 1.3.1
221
221
  - !ruby/object:Gem::Dependency
222
222
  name: rubocop-rails
223
223
  requirement: !ruby/object:Gem::Requirement
224
224
  requirements:
225
225
  - - "~>"
226
226
  - !ruby/object:Gem::Version
227
- version: 2.4.2
227
+ version: 2.8.1
228
228
  type: :development
229
229
  prerelease: false
230
230
  version_requirements: !ruby/object:Gem::Requirement
231
231
  requirements:
232
232
  - - "~>"
233
233
  - !ruby/object:Gem::Version
234
- version: 2.4.2
234
+ version: 2.8.1
235
235
  - !ruby/object:Gem::Dependency
236
236
  name: simplecov
237
237
  requirement: !ruby/object:Gem::Requirement
@@ -260,7 +260,7 @@ dependencies:
260
260
  - - "~>"
261
261
  - !ruby/object:Gem::Version
262
262
  version: '1.4'
263
- description:
263
+ description:
264
264
  email:
265
265
  - bpetty@instructure.com
266
266
  executables: []
@@ -303,6 +303,7 @@ files:
303
303
  - lib/switchman_inst_jobs/delayed/backend/base.rb
304
304
  - lib/switchman_inst_jobs/delayed/message_sending.rb
305
305
  - lib/switchman_inst_jobs/delayed/pool.rb
306
+ - lib/switchman_inst_jobs/delayed/settings.rb
306
307
  - lib/switchman_inst_jobs/delayed/worker.rb
307
308
  - lib/switchman_inst_jobs/delayed/worker/health_check.rb
308
309
  - lib/switchman_inst_jobs/engine.rb
@@ -319,7 +320,7 @@ homepage: https://github.com/instructure/switchman-inst-jobs
319
320
  licenses:
320
321
  - MIT
321
322
  metadata: {}
322
- post_install_message:
323
+ post_install_message:
323
324
  rdoc_options: []
324
325
  require_paths:
325
326
  - lib
@@ -334,8 +335,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
334
335
  - !ruby/object:Gem::Version
335
336
  version: '0'
336
337
  requirements: []
337
- rubygems_version: 3.1.4
338
- signing_key:
338
+ rubygems_version: 3.0.3
339
+ signing_key:
339
340
  specification_version: 4
340
341
  summary: Switchman and Instructure Jobs compatibility gem.
341
342
  test_files: []