canvas_sync 0.16.5 → 0.17.0.beta5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +49 -137
  3. data/app/models/canvas_sync/sync_batch.rb +5 -0
  4. data/db/migrate/20201018210836_create_canvas_sync_sync_batches.rb +11 -0
  5. data/lib/canvas_sync.rb +35 -97
  6. data/lib/canvas_sync/importers/bulk_importer.rb +4 -7
  7. data/lib/canvas_sync/job.rb +4 -10
  8. data/lib/canvas_sync/job_batches/batch.rb +403 -0
  9. data/lib/canvas_sync/job_batches/batch_aware_job.rb +62 -0
  10. data/lib/canvas_sync/job_batches/callback.rb +152 -0
  11. data/lib/canvas_sync/job_batches/chain_builder.rb +220 -0
  12. data/lib/canvas_sync/job_batches/context_hash.rb +147 -0
  13. data/lib/canvas_sync/job_batches/jobs/base_job.rb +7 -0
  14. data/lib/canvas_sync/job_batches/jobs/concurrent_batch_job.rb +19 -0
  15. data/lib/canvas_sync/job_batches/jobs/serial_batch_job.rb +75 -0
  16. data/lib/canvas_sync/job_batches/sidekiq.rb +93 -0
  17. data/lib/canvas_sync/job_batches/status.rb +83 -0
  18. data/lib/canvas_sync/jobs/begin_sync_chain_job.rb +35 -0
  19. data/lib/canvas_sync/jobs/report_checker.rb +3 -6
  20. data/lib/canvas_sync/jobs/report_processor_job.rb +2 -5
  21. data/lib/canvas_sync/jobs/report_starter.rb +28 -20
  22. data/lib/canvas_sync/jobs/sync_accounts_job.rb +3 -5
  23. data/lib/canvas_sync/jobs/sync_admins_job.rb +2 -4
  24. data/lib/canvas_sync/jobs/sync_assignment_groups_job.rb +2 -4
  25. data/lib/canvas_sync/jobs/sync_assignments_job.rb +2 -4
  26. data/lib/canvas_sync/jobs/sync_context_module_items_job.rb +2 -4
  27. data/lib/canvas_sync/jobs/sync_context_modules_job.rb +2 -4
  28. data/lib/canvas_sync/jobs/sync_provisioning_report_job.rb +4 -34
  29. data/lib/canvas_sync/jobs/sync_roles_job.rb +2 -5
  30. data/lib/canvas_sync/jobs/sync_simple_table_job.rb +11 -32
  31. data/lib/canvas_sync/jobs/sync_submissions_job.rb +2 -4
  32. data/lib/canvas_sync/jobs/sync_terms_job.rb +25 -8
  33. data/lib/canvas_sync/processors/assignment_groups_processor.rb +2 -3
  34. data/lib/canvas_sync/processors/assignments_processor.rb +2 -3
  35. data/lib/canvas_sync/processors/context_module_items_processor.rb +2 -3
  36. data/lib/canvas_sync/processors/context_modules_processor.rb +2 -3
  37. data/lib/canvas_sync/processors/normal_processor.rb +1 -2
  38. data/lib/canvas_sync/processors/provisioning_report_processor.rb +2 -10
  39. data/lib/canvas_sync/processors/submissions_processor.rb +2 -3
  40. data/lib/canvas_sync/version.rb +1 -1
  41. data/spec/canvas_sync/canvas_sync_spec.rb +136 -153
  42. data/spec/canvas_sync/jobs/job_spec.rb +9 -17
  43. data/spec/canvas_sync/jobs/report_checker_spec.rb +1 -3
  44. data/spec/canvas_sync/jobs/report_processor_job_spec.rb +0 -3
  45. data/spec/canvas_sync/jobs/report_starter_spec.rb +19 -28
  46. data/spec/canvas_sync/jobs/sync_admins_job_spec.rb +1 -4
  47. data/spec/canvas_sync/jobs/sync_assignment_groups_job_spec.rb +2 -1
  48. data/spec/canvas_sync/jobs/sync_assignments_job_spec.rb +3 -2
  49. data/spec/canvas_sync/jobs/sync_context_module_items_job_spec.rb +3 -2
  50. data/spec/canvas_sync/jobs/sync_context_modules_job_spec.rb +3 -2
  51. data/spec/canvas_sync/jobs/sync_provisioning_report_job_spec.rb +3 -35
  52. data/spec/canvas_sync/jobs/sync_roles_job_spec.rb +1 -4
  53. data/spec/canvas_sync/jobs/sync_simple_table_job_spec.rb +5 -12
  54. data/spec/canvas_sync/jobs/sync_submissions_job_spec.rb +2 -1
  55. data/spec/canvas_sync/jobs/sync_terms_job_spec.rb +1 -4
  56. data/spec/dummy/config/environments/test.rb +2 -0
  57. data/spec/dummy/db/schema.rb +9 -1
  58. data/spec/job_batching/batch_aware_job_spec.rb +100 -0
  59. data/spec/job_batching/batch_spec.rb +372 -0
  60. data/spec/job_batching/callback_spec.rb +38 -0
  61. data/spec/job_batching/flow_spec.rb +88 -0
  62. data/spec/job_batching/integration/integration.rb +57 -0
  63. data/spec/job_batching/integration/nested.rb +88 -0
  64. data/spec/job_batching/integration/simple.rb +47 -0
  65. data/spec/job_batching/integration/workflow.rb +134 -0
  66. data/spec/job_batching/integration_helper.rb +48 -0
  67. data/spec/job_batching/sidekiq_spec.rb +124 -0
  68. data/spec/job_batching/status_spec.rb +92 -0
  69. data/spec/job_batching/support/base_job.rb +14 -0
  70. data/spec/job_batching/support/sample_callback.rb +2 -0
  71. data/spec/spec_helper.rb +17 -0
  72. metadata +85 -8
  73. data/lib/canvas_sync/job_chain.rb +0 -102
  74. data/lib/canvas_sync/jobs/fork_gather.rb +0 -74
  75. data/spec/canvas_sync/jobs/fork_gather_spec.rb +0 -73
@@ -64,12 +64,13 @@ module CanvasSync
64
64
  columns = columns.dup
65
65
 
66
66
  update_conditions = {
67
- condition: condition_sql(klass, columns, import_args[:sync_start_time]),
67
+ condition: condition_sql(klass, columns),
68
68
  columns: columns
69
69
  }
70
70
  update_conditions[:conflict_target] = conflict_target if conflict_target
71
71
 
72
72
  options = { validate: false, on_duplicate_key_update: update_conditions }.merge(import_args)
73
+
73
74
  options.delete(:on_duplicate_key_update) if options.key?(:on_duplicate_key_ignore)
74
75
  klass.import(columns, rows, options)
75
76
  end
@@ -84,14 +85,10 @@ module CanvasSync
84
85
  # started_at = Time.now
85
86
  # run_the_users_sync!
86
87
  # changed = User.where("updated_at >= ?", started_at)
87
- def self.condition_sql(klass, columns, report_start)
88
+ def self.condition_sql(klass, columns)
88
89
  columns_str = columns.map { |c| "#{klass.quoted_table_name}.#{c}" }.join(", ")
89
90
  excluded_str = columns.map { |c| "EXCLUDED.#{c}" }.join(", ")
90
- condition_sql = "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
91
- if klass.column_names.include?("updated_at") && report_start
92
- condition_sql += " AND #{klass.quoted_table_name}.updated_at < '#{report_start}'"
93
- end
94
- condition_sql
91
+ "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
95
92
  end
96
93
 
97
94
  def self.batch_size
@@ -3,7 +3,7 @@ require "active_job"
3
3
  module CanvasSync
4
4
  # Inherit from this class to build a Job that will log to the canvas_sync_job_logs table
5
5
  class Job < ActiveJob::Base
6
- attr_reader :job_chain, :job_log
6
+ attr_reader :job_log
7
7
 
8
8
  before_enqueue do |job|
9
9
  create_job_log(job)
@@ -15,12 +15,6 @@ module CanvasSync
15
15
  @job_log.started_at = Time.now
16
16
  @job_log.save
17
17
 
18
- if job.arguments[0].is_a?(Hash) && job.arguments[0].include?(:jobs)
19
- # @job_chain = JobChain.new(job.arguments[0])
20
- @job_chain = job.arguments[0]
21
- job.arguments[0] = @job_chain
22
- end
23
-
24
18
  begin
25
19
  block.call
26
20
  @job_log.status = JobLog::SUCCESS_STATUS
@@ -28,11 +22,11 @@ module CanvasSync
28
22
  @job_log.exception = "#{e.class}: #{e.message}"
29
23
  @job_log.backtrace = e.backtrace.join('\n')
30
24
  @job_log.status = JobLog::ERROR_STATUS
31
- if @job_chain&.dig(:global_options, :on_failure)&.present?
25
+ if batch_context&.[](:on_failure)&.present?
32
26
  begin
33
- class_name, method = @job_chain[:global_options][:on_failure].split('.')
27
+ class_name, method = batch_context[:on_failure].split('.')
34
28
  klass = class_name.constantize
35
- klass.send(method.to_sym, e, job_chain: @job_chain, job_log: @job_log)
29
+ klass.send(method.to_sym, e, batch_context: batch_context, job_log: @job_log)
36
30
  rescue => e2
37
31
  @job_log.backtrace += "\n\nError Occurred while handling an Error: #{e2.class}: #{e2.message}"
38
32
  @job_log.backtrace += "\n" + e2.backtrace.join('\n')
@@ -0,0 +1,403 @@
1
+
2
+ begin
3
+ require 'sidekiq'
4
+ rescue LoadError
5
+ end
6
+
7
+ require_relative './batch_aware_job'
8
+ require_relative "./callback"
9
+ require_relative "./context_hash"
10
+ require_relative "./status"
11
+ Dir[File.dirname(__FILE__) + "/jobs/*.rb"].each { |file| require file }
12
+ require_relative "./chain_builder"
13
+
14
+ # Implement Job Batching similar to Sidekiq::Batch. Supports ActiveJob and Sidekiq, or a mix thereof.
15
+ # Much of this code is modifed/extended from https://github.com/breamware/sidekiq-batch
16
+
17
+ module CanvasSync
18
+ module JobBatches
19
+ class Batch
20
+ class NoBlockGivenError < StandardError; end
21
+
22
+ def self.batch_attr(key, read_only: true)
23
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
24
+ def #{key}=(value)
25
+ raise "#{key} is read-only once the batch has been started" if #{read_only.to_s} && (@initialized || @existing)
26
+ @#{key} = value
27
+ persist_bid_attr('#{key}', value)
28
+ end
29
+
30
+ def #{key}
31
+ return @#{key} if defined?(@#{key})
32
+ if (@initialized || @existing)
33
+ @#{key} = read_bid_attr('#{key}')
34
+ end
35
+ end
36
+ RUBY
37
+ end
38
+
39
+ delegate :redis, to: :class
40
+
41
+ BID_EXPIRE_TTL = 2_592_000
42
+
43
+ attr_reader :bid
44
+
45
+ def initialize(existing_bid = nil)
46
+ @bid = existing_bid || SecureRandom.urlsafe_base64(10)
47
+ @existing = !(!existing_bid || existing_bid.empty?) # Basically existing_bid.present?
48
+ @initialized = false
49
+ @bidkey = "BID-" + @bid.to_s
50
+ @pending_attrs = {}
51
+ @ready_to_queue = []
52
+ self.created_at = Time.now.utc.to_f unless @existing
53
+ end
54
+
55
+ batch_attr :description
56
+ batch_attr :created_at
57
+ batch_attr :callback_queue, read_only: false
58
+ batch_attr :callback_batch, read_only: false
59
+ batch_attr :allow_context_changes
60
+
61
+ def context
62
+ return @context if defined?(@context)
63
+
64
+ if (@initialized || @existing)
65
+ @context = ContextHash.new(bid)
66
+ else
67
+ @context = ContextHash.new(bid, {})
68
+ end
69
+ end
70
+
71
+ def context=(value)
72
+ raise "context is read-only once the batch has been started" if (@initialized || @existing) # && !allow_context_changes
73
+ raise "context must be a Hash" unless value.is_a?(Hash) || value.nil?
74
+ return nil if value.nil? && @context.nil?
75
+
76
+ value = {} if value.nil?
77
+ value = value.local if value.is_a?(ContextHash)
78
+
79
+ @context ||= ContextHash.new(bid, {})
80
+ @context.set_local(value)
81
+ # persist_bid_attr('context', JSON.unparse(@context.local))
82
+ end
83
+
84
+ def save_context_changes
85
+ @context&.save!
86
+ end
87
+
88
+ def on(event, callback, options = {})
89
+ return unless Callback::VALID_CALLBACKS.include?(event.to_s)
90
+ callback_key = "#{@bidkey}-callbacks-#{event}"
91
+ redis do |r|
92
+ r.multi do
93
+ r.sadd(callback_key, JSON.unparse({
94
+ callback: callback,
95
+ opts: options
96
+ }))
97
+ r.expire(callback_key, BID_EXPIRE_TTL)
98
+ end
99
+ end
100
+ end
101
+
102
+ def jobs
103
+ raise NoBlockGivenError unless block_given?
104
+
105
+ if !@existing && !@initialized
106
+ parent_bid = Thread.current[:batch]&.bid
107
+
108
+ redis do |r|
109
+ r.multi do
110
+ r.hset(@bidkey, "parent_bid", parent_bid.to_s) if parent_bid
111
+ r.expire(@bidkey, BID_EXPIRE_TTL)
112
+ end
113
+ end
114
+
115
+ flush_pending_attrs
116
+ @context&.save!
117
+
118
+ @initialized = true
119
+ end
120
+
121
+ @ready_to_queue = []
122
+
123
+ Batch.logger.debug("Beginning Batch #{@bidkey}")
124
+
125
+ begin
126
+ parent = Thread.current[:batch]
127
+ Thread.current[:batch] = self
128
+ yield
129
+ ensure
130
+ Thread.current[:batch] = parent
131
+ end
132
+
133
+ redis do |r|
134
+ r.multi do
135
+ if parent_bid
136
+ r.hincrby("BID-#{parent_bid}", "children", 1)
137
+ r.hincrby("BID-#{parent_bid}", "total", @ready_to_queue.size)
138
+ r.expire("BID-#{parent_bid}", BID_EXPIRE_TTL)
139
+ end
140
+
141
+ r.hincrby(@bidkey, "pending", @ready_to_queue.size)
142
+ r.hincrby(@bidkey, "total", @ready_to_queue.size)
143
+ r.expire(@bidkey, BID_EXPIRE_TTL)
144
+
145
+ if @ready_to_queue.size > 0
146
+ r.sadd(@bidkey + "-jids", @ready_to_queue)
147
+ r.expire(@bidkey + "-jids", BID_EXPIRE_TTL)
148
+ end
149
+ end
150
+ end
151
+
152
+ @ready_to_queue
153
+ end
154
+
155
+ def increment_job_queue(jid)
156
+ @ready_to_queue << jid
157
+ end
158
+
159
+ def invalidate_all
160
+ redis do |r|
161
+ r.setex("invalidated-bid-#{bid}", BID_EXPIRE_TTL, 1)
162
+ end
163
+ end
164
+
165
+ def parent_bid
166
+ redis do |r|
167
+ r.hget(@bidkey, "parent_bid")
168
+ end
169
+ end
170
+
171
+ def parent
172
+ if parent_bid
173
+ Batch.new(parent_bid)
174
+ end
175
+ end
176
+
177
+ def valid?(batch = self)
178
+ valid = !redis { |r| r.exists?("invalidated-bid-#{batch.bid}") }
179
+ batch.parent ? valid && valid?(batch.parent) : valid
180
+ end
181
+
182
+ # Any Batches or Jobs created in the given block won't be assocaiated to the current batch
183
+ def self.without_batch
184
+ parent = Thread.current[:batch]
185
+ Thread.current[:batch] = nil
186
+ yield
187
+ ensure
188
+ Thread.current[:batch] = parent
189
+ end
190
+
191
+ private
192
+
193
+ def persist_bid_attr(attribute, value)
194
+ if @initialized || @existing
195
+ redis do |r|
196
+ r.multi do
197
+ r.hset(@bidkey, attribute, value)
198
+ r.expire(@bidkey, BID_EXPIRE_TTL)
199
+ end
200
+ end
201
+ else
202
+ @pending_attrs[attribute] = value
203
+ end
204
+ end
205
+
206
+ def read_bid_attr(attribute)
207
+ redis do |r|
208
+ r.hget(@bidkey, attribute)
209
+ end
210
+ end
211
+
212
+ def flush_pending_attrs
213
+ redis do |r|
214
+ r.mapped_hmset(@bidkey, @pending_attrs)
215
+ end
216
+ @pending_attrs = {}
217
+ end
218
+
219
+ class << self
220
+ def process_failed_job(bid, jid)
221
+ _, pending, failed, children, complete, parent_bid = redis do |r|
222
+ r.multi do
223
+ r.sadd("BID-#{bid}-failed", jid)
224
+
225
+ r.hincrby("BID-#{bid}", "pending", 0)
226
+ r.scard("BID-#{bid}-failed")
227
+ r.hincrby("BID-#{bid}", "children", 0)
228
+ r.scard("BID-#{bid}-batches-complete")
229
+ r.hget("BID-#{bid}", "parent_bid")
230
+
231
+ r.expire("BID-#{bid}-failed", BID_EXPIRE_TTL)
232
+ end
233
+ end
234
+
235
+ # if the batch failed, and has a parent, update the parent to show one pending and failed job
236
+ if parent_bid
237
+ redis do |r|
238
+ r.multi do
239
+ r.hincrby("BID-#{parent_bid}", "pending", 1)
240
+ r.sadd("BID-#{parent_bid}-failed", jid)
241
+ r.expire("BID-#{parent_bid}-failed", BID_EXPIRE_TTL)
242
+ end
243
+ end
244
+ end
245
+
246
+ if pending.to_i == failed.to_i && children == complete
247
+ enqueue_callbacks(:complete, bid)
248
+ end
249
+ end
250
+
251
+ def process_dead_job(bid, jid)
252
+ _, failed, children, complete, parent_bid = redis do |r|
253
+ r.multi do
254
+ r.sadd("BID-#{bid}-dead", jid)
255
+
256
+ r.scard("BID-#{bid}-dead")
257
+ r.hincrby("BID-#{bid}", "children", 0)
258
+ r.scard("BID-#{bid}-batches-complete")
259
+ r.hget("BID-#{bid}", "parent_bid")
260
+
261
+ r.expire("BID-#{bid}-dead", BID_EXPIRE_TTL)
262
+ end
263
+ end
264
+
265
+ if parent_bid
266
+ redis do |r|
267
+ r.multi do
268
+ r.sadd("BID-#{parent_bid}-dead", jid)
269
+ r.expire("BID-#{parent_bid}-dead", BID_EXPIRE_TTL)
270
+ end
271
+ end
272
+ end
273
+
274
+ enqueue_callbacks(:dead, bid)
275
+ end
276
+
277
+ def process_successful_job(bid, jid)
278
+ _, failed, pending, children, complete, success, total, parent_bid = redis do |r|
279
+ r.multi do
280
+ r.srem("BID-#{bid}-failed", jid)
281
+
282
+ r.scard("BID-#{bid}-failed")
283
+ r.hincrby("BID-#{bid}", "pending", -1)
284
+ r.hincrby("BID-#{bid}", "children", 0)
285
+ r.scard("BID-#{bid}-batches-complete")
286
+ r.scard("BID-#{bid}-batches-success")
287
+ r.hget("BID-#{bid}", "total")
288
+ r.hget("BID-#{bid}", "parent_bid")
289
+
290
+ r.srem("BID-#{bid}-jids", jid)
291
+ r.expire("BID-#{bid}", BID_EXPIRE_TTL)
292
+ end
293
+ end
294
+
295
+ all_success = pending.to_i.zero? && children == success
296
+ # if complete or successfull call complete callback (the complete callback may then call successful)
297
+ if (pending.to_i == failed.to_i && children == complete) || all_success
298
+ enqueue_callbacks(:complete, bid)
299
+ enqueue_callbacks(:success, bid) if all_success
300
+ end
301
+ end
302
+
303
+ def enqueue_callbacks(event, bid)
304
+ batch_key = "BID-#{bid}"
305
+ callback_key = "#{batch_key}-callbacks-#{event}"
306
+ already_processed, _, callbacks, queue, parent_bid, callback_batch = redis do |r|
307
+ r.multi do
308
+ r.hget(batch_key, event)
309
+ r.hset(batch_key, event, true)
310
+ r.smembers(callback_key)
311
+ r.hget(batch_key, "callback_queue")
312
+ r.hget(batch_key, "parent_bid")
313
+ r.hget(batch_key, "callback_batch")
314
+ end
315
+ end
316
+
317
+ return if already_processed == 'true'
318
+
319
+ queue ||= "default"
320
+ parent_bid = !parent_bid || parent_bid.empty? ? nil : parent_bid # Basically parent_bid.blank?
321
+ callback_args = callbacks.reduce([]) do |memo, jcb|
322
+ cb = JSON.load(jcb)
323
+ memo << [cb['callback'], event.to_s, cb['opts'], bid, parent_bid]
324
+ end
325
+
326
+ opts = {"bid" => bid, "event" => event}
327
+
328
+ # Run callback batch finalize synchronously
329
+ if callback_batch
330
+ # Extract opts from cb_args or use current
331
+ # Pass in stored event as callback finalize is processed on complete event
332
+ cb_opts = callback_args.first&.at(2) || opts
333
+
334
+ logger.debug {"Run callback batch bid: #{bid} event: #{event} args: #{callback_args.inspect}"}
335
+ # Finalize now
336
+ finalizer = Batch::Callback::Finalize.new
337
+ status = Status.new bid
338
+ finalizer.dispatch(status, cb_opts)
339
+
340
+ return
341
+ end
342
+
343
+ logger.debug {"Enqueue callback bid: #{bid} event: #{event} args: #{callback_args.inspect}"}
344
+
345
+ if callback_args.empty?
346
+ # Finalize now
347
+ finalizer = Batch::Callback::Finalize.new
348
+ status = Status.new bid
349
+ finalizer.dispatch(status, opts)
350
+ else
351
+ # Otherwise finalize in sub batch complete callback
352
+ cb_batch = self.new
353
+ cb_batch.callback_batch = true
354
+ logger.debug {"Adding callback batch: #{cb_batch.bid} for batch: #{bid}"}
355
+ cb_batch.on(:complete, "#{Batch::Callback::Finalize.to_s}#dispatch", opts)
356
+ cb_batch.jobs do
357
+ push_callbacks callback_args, queue
358
+ end
359
+ end
360
+ end
361
+
362
+ def cleanup_redis(bid)
363
+ logger.debug {"Cleaning redis of batch #{bid}"}
364
+ redis do |r|
365
+ r.del(
366
+ "BID-#{bid}",
367
+ "BID-#{bid}-callbacks-complete",
368
+ "BID-#{bid}-callbacks-success",
369
+ "BID-#{bid}-failed",
370
+
371
+ "BID-#{bid}-batches-success",
372
+ "BID-#{bid}-batches-complete",
373
+ "BID-#{bid}-batches-failed",
374
+ "BID-#{bid}-jids",
375
+ )
376
+ end
377
+ end
378
+
379
+ def redis(*args, &blk)
380
+ defined?(::Sidekiq) ? ::Sidekiq.redis(*args, &blk) : nil # TODO
381
+ end
382
+
383
+ def logger
384
+ defined?(::Sidekiq) ? ::Sidekiq.logger : Rails.logger
385
+ end
386
+
387
+ private
388
+
389
+ def push_callbacks(args, queue)
390
+ Batch::Callback::Worker.enqueue_all(args, queue)
391
+ end
392
+ end
393
+ end
394
+
395
+ ActiveJob::Base.include BatchAwareJob
396
+ end
397
+ end
398
+
399
+ # Automatically integrate with Sidekiq if it is present.
400
+ if defined?(::Sidekiq)
401
+ require_relative './sidekiq'
402
+ CanvasSync::JobBatches::Sidekiq.configure
403
+ end