switchman-inst-jobs 3.1.2 → 3.2.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,3 @@
1
- # Just disabling all the rubocop metrics for this file for now,
2
- # as it is a direct port-in of existing code
3
-
4
- # rubocop:disable Metrics/BlockLength, Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
5
1
  require 'set'
6
2
  require 'parallel'
7
3
 
@@ -93,7 +89,9 @@ module SwitchmanInstJobs
93
89
  migrate_everything
94
90
  end
95
91
 
96
- def migrate_strands
92
+ def migrate_strands(batch_size: 1_000)
93
+ source_shard = ::Switchman::Shard.current(:delayed_jobs)
94
+
97
95
  # there are 4 scenarios to deal with here
98
96
  # 1) no running job, no jobs moved: do nothing
99
97
  # 2) running job, no jobs moved; create blocker with next_in_strand=false
@@ -102,60 +100,89 @@ module SwitchmanInstJobs
102
100
  # those (= do nothing since it should already be false)
103
101
  # 4) no running job, jobs moved: set next_in_strand=true on the first of
104
102
  # those (= do nothing since it should already be true)
103
+ handler = lambda { |scope, column, blocker_job_kwargs = {}, advisory_lock_cb = nil|
104
+ shard_map = build_shard_map(scope, source_shard)
105
+ shard_map.each do |(target_shard, source_shard_ids)|
106
+ shard_scope = scope.where(shard_id: source_shard_ids)
105
107
 
106
- source_shard = ::Switchman::Shard.current(:delayed_jobs)
107
- strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
108
- shard_map = build_shard_map(strand_scope, source_shard)
109
- shard_map.each do |(target_shard, source_shard_ids)|
110
- shard_scope = strand_scope.where(shard_id: source_shard_ids)
111
-
112
- # 1) is taken care of because it should not show up here in strands
113
- strands = shard_scope.distinct.order(:strand).pluck(:strand)
114
-
115
- target_shard.activate(:delayed_jobs) do
116
- strands.each do |strand|
117
- transaction_on([source_shard, target_shard]) do
118
- this_strand_scope = shard_scope.where(strand: strand)
119
- # we want to copy all the jobs except the one that is still running.
120
- jobs_scope = this_strand_scope.where(locked_by: nil)
121
-
122
- # 2) and part of 3) are taken care of here by creating a blocker
123
- # job with next_in_strand = false. as soon as the current
124
- # running job is finished it should set next_in_strand
125
- # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
126
- # Since we only unlock it on the new jobs queue *after* deleting from the original
127
- # the lock ensures the blocker always gets unlocked
128
- first = this_strand_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
129
- if first
130
- first_job = ::Delayed::Job.create!(strand: strand, next_in_strand: false)
131
- first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
132
- first_job.queue = first.queue
133
- first_job.tag = 'Kernel.sleep'
134
- first_job.source = 'JobsMigrator::StrandBlocker'
135
- first_job.max_attempts = 1
136
- # If we ever have jobs left over from 9999 jobs moves of a single shard,
137
- # something has gone terribly wrong
138
- first_job.strand_order_override = -9999
139
- first_job.save!
140
- # the rest of 3) is taken care of here
141
- # make sure that all the jobs moved over are NOT next in strand
142
- ::Delayed::Job.where(next_in_strand: true, strand: strand, locked_by: nil).
143
- update_all(next_in_strand: false)
144
- end
108
+ # 1) is taken care of because it should not show up here in strands
109
+ values = shard_scope.distinct.order(column).pluck(column)
145
110
 
146
- # 4) is taken care of here, by leaving next_in_strand alone and
147
- # it should execute on the new shard
148
- batch_move_jobs(
149
- target_shard: target_shard,
150
- source_shard: source_shard,
151
- scope: jobs_scope
152
- ) do |job, new_job|
153
- # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
154
- new_job.strand_order_override = job.strand_order_override - 1
111
+ target_shard.activate(:delayed_jobs) do
112
+ values.each do |value|
113
+ transaction_on([source_shard, target_shard]) do
114
+ source_shard.activate(::Delayed::Backend::ActiveRecord::AbstractJob) do
115
+ advisory_lock_cb&.call(value)
116
+ end
117
+
118
+ value_scope = shard_scope.where(**{ column => value })
119
+ # we want to copy all the jobs except the one that is still running.
120
+ jobs_scope = value_scope.where(locked_by: nil)
121
+
122
+ # 2) and part of 3) are taken care of here by creating a blocker
123
+ # job with next_in_strand = false. as soon as the current
124
+ # running job is finished it should set next_in_strand
125
+ # We lock it to ensure that the jobs worker can't delete it until we are done moving the strand
126
+ # Since we only unlock it on the new jobs queue *after* deleting from the original
127
+ # the lock ensures the blocker always gets unlocked
128
+ first = value_scope.where.not(locked_by: nil).next_in_strand_order.lock.first
129
+ if first
130
+ create_blocker_job(
131
+ queue: first.queue,
132
+ shard_id: first.shard_id,
133
+ **{ column => value },
134
+ **blocker_job_kwargs
135
+ )
136
+
137
+ # the rest of 3) is taken care of here
138
+ # make sure that all the jobs moved over are NOT next in strand
139
+ ::Delayed::Job.where(next_in_strand: true, locked_by: nil, **{ column => value }).
140
+ update_all(next_in_strand: false)
141
+ end
142
+
143
+ # 4) is taken care of here, by leaving next_in_strand alone and
144
+ # it should execute on the new shard
145
+ batch_move_jobs(
146
+ target_shard: target_shard,
147
+ source_shard: source_shard,
148
+ scope: jobs_scope,
149
+ batch_size: batch_size
150
+ ) do |job, new_job|
151
+ # This ensures jobs enqueued on the old jobs shard run before jobs on the new jobs queue
152
+ new_job.strand_order_override = job.strand_order_override - 1
153
+ end
155
154
  end
156
155
  end
157
156
  end
157
+ end
158
+ }
159
+
160
+ strand_scope = ::Delayed::Job.shard(source_shard).where.not(strand: nil)
161
+ singleton_scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL AND singleton IS NOT NULL')
162
+ all_scope = ::Delayed::Job.shard(source_shard).where('strand IS NOT NULL OR singleton IS NOT NULL')
158
163
 
164
+ singleton_blocker_additional_kwargs = {
165
+ locked_at: DateTime.now,
166
+ locked_by: ::Delayed::Backend::Base::ON_HOLD_BLOCKER
167
+ }
168
+
169
+ quoted_function_name = ::Delayed::Job.connection.quote_table_name('half_md5_as_bigint')
170
+ strand_advisory_lock_fn = lambda do |value|
171
+ ::Delayed::Job.connection.execute("SELECT pg_advisory_xact_lock(#{quoted_function_name}('#{value}'))")
172
+ end
173
+
174
+ singleton_advisory_lock_fn = lambda do |value|
175
+ ::Delayed::Job.connection.execute(
176
+ "SELECT pg_advisory_xact_lock(#{quoted_function_name}('singleton:#{value}'))"
177
+ )
178
+ end
179
+
180
+ handler.call(strand_scope, :strand, {}, strand_advisory_lock_fn)
181
+ handler.call(singleton_scope, :singleton, singleton_blocker_additional_kwargs, singleton_advisory_lock_fn)
182
+
183
+ shard_map = build_shard_map(all_scope, source_shard)
184
+ shard_map.each do |(target_shard, source_shard_ids)|
185
+ target_shard.activate(:delayed_jobs) do
159
186
  updated = ::Switchman::Shard.where(id: source_shard_ids, block_stranded: true).
160
187
  update_all(block_stranded: false)
161
188
  # If this is being manually re-run for some reason to clean something up, don't wait for nothing to happen
@@ -170,26 +197,40 @@ module SwitchmanInstJobs
170
197
  end
171
198
  end
172
199
 
173
- def unblock_strands(target_shard)
174
- target_shard.activate(:delayed_jobs) do
175
- loop do
176
- # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
177
- # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
178
- # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
179
- # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
180
- # batches
181
- break if ::Delayed::Job.where(id: ::Delayed::Job.select('DISTINCT ON (strand) id').
182
- where.not(strand: nil).
183
- where.not(shard_id: ::Switchman::Shard.where(block_stranded: true).pluck(:id)).where(
200
+ def unblock_strands(target_shard, batch_size: 10_000)
201
+ block_stranded_ids = ::Switchman::Shard.where(block_stranded: true).pluck(:id)
202
+ query = lambda { |column, scope|
203
+ ::Delayed::Job.
204
+ where(id: ::Delayed::Job.select("DISTINCT ON (#{column}) id").
205
+ where(scope).
206
+ where.not(shard_id: block_stranded_ids).
207
+ where(
184
208
  ::Delayed::Job.select(1).from("#{::Delayed::Job.quoted_table_name} dj2").
185
209
  where("dj2.next_in_strand = true OR dj2.source = 'JobsMigrator::StrandBlocker'").
186
- where('dj2.strand = delayed_jobs.strand').arel.exists.not
187
- ).order(:strand, :strand_order_override, :id)).limit(500).update_all(next_in_strand: true).zero?
210
+ where("dj2.#{column} = delayed_jobs.#{column}").arel.exists.not
211
+ ).
212
+ order(column, :strand_order_override, :id)).limit(batch_size)
213
+ }
214
+
215
+ target_shard.activate(:delayed_jobs) do
216
+ # We only want to unlock stranded jobs where they don't belong to a blocked shard (if they *do* belong)
217
+ # to a blocked shard, they must be part of a concurrent jobs migration from a different source shard to
218
+ # this target shard, so we shouldn't unlock them yet. We only ever unlock one job here to keep the
219
+ # logic cleaner; if the job is n-stranded, after the first one runs, the trigger will unlock larger
220
+ # batches
221
+
222
+ loop do
223
+ break if query.call(:strand, 'strand IS NOT NULL').update_all(next_in_strand: true).zero?
224
+ end
225
+
226
+ loop do
227
+ break if query.call(:singleton,
228
+ 'strand IS NULL AND singleton IS NOT NULL').update_all(next_in_strand: true).zero?
188
229
  end
189
230
  end
190
231
  end
191
232
 
192
- def migrate_everything
233
+ def migrate_everything(batch_size: 1_000)
193
234
  source_shard = ::Switchman::Shard.current(:delayed_jobs)
194
235
  scope = ::Delayed::Job.shard(source_shard).where('strand IS NULL')
195
236
 
@@ -198,13 +239,26 @@ module SwitchmanInstJobs
198
239
  batch_move_jobs(
199
240
  target_shard: target_shard,
200
241
  source_shard: source_shard,
201
- scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil)
242
+ scope: scope.where(shard_id: source_shard_ids).where(locked_by: nil),
243
+ batch_size: batch_size
202
244
  )
203
245
  end
204
246
  end
205
247
 
206
248
  private
207
249
 
250
+ def create_blocker_job(**kwargs)
251
+ first_job = ::Delayed::Job.create!(**kwargs, next_in_strand: false)
252
+ first_job.payload_object = ::Delayed::PerformableMethod.new(Kernel, :sleep, args: [0])
253
+ first_job.tag = 'Kernel.sleep'
254
+ first_job.source = 'JobsMigrator::StrandBlocker'
255
+ first_job.max_attempts = 1
256
+ # If we ever have jobs left over from 9999 jobs moves of a single shard,
257
+ # something has gone terribly wrong
258
+ first_job.strand_order_override = -9999
259
+ first_job.save!
260
+ end
261
+
208
262
  def build_shard_map(scope, source_shard)
209
263
  shard_ids = scope.distinct.pluck(:shard_id)
210
264
 
@@ -219,10 +273,10 @@ module SwitchmanInstJobs
219
273
  shard_map
220
274
  end
221
275
 
222
- def batch_move_jobs(target_shard:, source_shard:, scope:)
276
+ def batch_move_jobs(target_shard:, source_shard:, scope:, batch_size:)
223
277
  while scope.exists?
224
278
  # Adapted from get_and_lock_next_available in delayed/backend/active_record.rb
225
- target_jobs = scope.limit(1000).lock('FOR UPDATE SKIP LOCKED')
279
+ target_jobs = scope.limit(batch_size).lock('FOR UPDATE SKIP LOCKED')
226
280
 
227
281
  query = source_shard.activate(:delayed_jobs) do
228
282
  "WITH limited_jobs AS (#{target_jobs.to_sql}) " \
@@ -281,7 +335,10 @@ module SwitchmanInstJobs
281
335
  connection = ::Delayed::Job.connection
282
336
  quoted_keys = keys.map { |k| connection.quote_column_name(k) }.join(', ')
283
337
 
284
- connection.execute "COPY #{::Delayed::Job.quoted_table_name} (#{quoted_keys}) FROM STDIN"
338
+ connection.execute 'DROP TABLE IF EXISTS delayed_jobs_bulk_copy'
339
+ connection.execute "CREATE TEMPORARY TABLE delayed_jobs_bulk_copy
340
+ (LIKE #{::Delayed::Job.quoted_table_name} INCLUDING DEFAULTS)"
341
+ connection.execute "COPY delayed_jobs_bulk_copy (#{quoted_keys}) FROM STDIN"
285
342
  records.map do |record|
286
343
  connection.raw_connection.put_copy_data("#{keys.map { |k| quote_text(record[k]) }.join("\t")}\n")
287
344
  end
@@ -293,6 +350,9 @@ module SwitchmanInstJobs
293
350
  rescue StandardError => e
294
351
  raise connection.send(:translate_exception, e, 'COPY FROM STDIN')
295
352
  end
353
+ connection.execute "INSERT INTO #{::Delayed::Job.quoted_table_name} (#{quoted_keys})
354
+ SELECT #{quoted_keys} FROM delayed_jobs_bulk_copy
355
+ ON CONFLICT (singleton) WHERE singleton IS NOT NULL AND locked_by IS NULL DO NOTHING"
296
356
  result.cmd_tuples
297
357
  end
298
358
 
@@ -310,5 +370,3 @@ module SwitchmanInstJobs
310
370
  end
311
371
  end
312
372
  end
313
-
314
- # rubocop:enable Metrics/BlockLength, Metrics/MethodLength, Metrics/AbcSize, Metrics/ClassLength
@@ -1,3 +1,3 @@
1
1
  module SwitchmanInstJobs
2
- VERSION = '3.1.2'.freeze
2
+ VERSION = '3.2.10'.freeze
3
3
  end
@@ -14,10 +14,6 @@ module SwitchmanInstJobs
14
14
  ::Delayed::Backend::ActiveRecord::Job.prepend(
15
15
  Delayed::Backend::Base
16
16
  )
17
- ::Delayed::Backend::Redis::Job.prepend(
18
- Delayed::Backend::Base
19
- )
20
- ::Delayed::Backend::Redis::Job.column :shard_id, :integer
21
17
  ::Delayed::Pool.prepend Delayed::Pool
22
18
  ::Delayed::Worker.prepend Delayed::Worker
23
19
  ::Delayed::Worker::HealthCheck.prepend Delayed::Worker::HealthCheck
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: switchman-inst-jobs
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.2
4
+ version: 3.2.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bryan Petty
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-26 00:00:00.000000000 Z
11
+ date: 2022-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: inst-jobs
@@ -16,20 +16,20 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.0.3
19
+ version: 2.4.9
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '3.0'
22
+ version: '4.0'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 1.0.3
29
+ version: 2.4.9
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '3.0'
32
+ version: '4.0'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: parallel
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -107,7 +107,7 @@ dependencies:
107
107
  - !ruby/object:Gem::Version
108
108
  version: '0'
109
109
  - !ruby/object:Gem::Dependency
110
- name: imperium
110
+ name: diplomat
111
111
  requirement: !ruby/object:Gem::Requirement
112
112
  requirements:
113
113
  - - ">="
@@ -296,6 +296,19 @@ files:
296
296
  - db/migrate/20200822014259_add_block_stranded_to_switchman_shards.rb
297
297
  - db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb
298
298
  - db/migrate/20200825011002_add_strand_order_override.rb
299
+ - db/migrate/20210809145804_add_n_strand_index.rb
300
+ - db/migrate/20210812210128_add_singleton_column.rb
301
+ - db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb
302
+ - db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb
303
+ - db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb
304
+ - db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb
305
+ - db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb
306
+ - db/migrate/20211220112800_fix_singleton_race_condition_insert.rb
307
+ - db/migrate/20211220113000_fix_singleton_race_condition_delete.rb
308
+ - db/migrate/20220127091200_fix_singleton_unique_constraint.rb
309
+ - db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb
310
+ - db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb
311
+ - db/migrate/20220203063200_remove_old_singleton_index.rb
299
312
  - lib/switchman-inst-jobs.rb
300
313
  - lib/switchman_inst_jobs.rb
301
314
  - lib/switchman_inst_jobs/active_record/connection_adapters/postgresql_adapter.rb
@@ -319,7 +332,9 @@ files:
319
332
  homepage: https://github.com/instructure/switchman-inst-jobs
320
333
  licenses:
321
334
  - MIT
322
- metadata: {}
335
+ metadata:
336
+ allowed_push_host: https://rubygems.org
337
+ rubygems_mfa_required: 'true'
323
338
  post_install_message:
324
339
  rdoc_options: []
325
340
  require_paths:
@@ -328,14 +343,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
328
343
  requirements:
329
344
  - - ">="
330
345
  - !ruby/object:Gem::Version
331
- version: '2.5'
346
+ version: '2.6'
332
347
  required_rubygems_version: !ruby/object:Gem::Requirement
333
348
  requirements:
334
349
  - - ">="
335
350
  - !ruby/object:Gem::Version
336
351
  version: '0'
337
352
  requirements: []
338
- rubygems_version: 3.0.3
353
+ rubygems_version: 3.1.4
339
354
  signing_key:
340
355
  specification_version: 4
341
356
  summary: Switchman and Instructure Jobs compatibility gem.