canvas_sync 0.16.5 → 0.17.0.beta5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +49 -137
  3. data/app/models/canvas_sync/sync_batch.rb +5 -0
  4. data/db/migrate/20201018210836_create_canvas_sync_sync_batches.rb +11 -0
  5. data/lib/canvas_sync.rb +35 -97
  6. data/lib/canvas_sync/importers/bulk_importer.rb +4 -7
  7. data/lib/canvas_sync/job.rb +4 -10
  8. data/lib/canvas_sync/job_batches/batch.rb +403 -0
  9. data/lib/canvas_sync/job_batches/batch_aware_job.rb +62 -0
  10. data/lib/canvas_sync/job_batches/callback.rb +152 -0
  11. data/lib/canvas_sync/job_batches/chain_builder.rb +220 -0
  12. data/lib/canvas_sync/job_batches/context_hash.rb +147 -0
  13. data/lib/canvas_sync/job_batches/jobs/base_job.rb +7 -0
  14. data/lib/canvas_sync/job_batches/jobs/concurrent_batch_job.rb +19 -0
  15. data/lib/canvas_sync/job_batches/jobs/serial_batch_job.rb +75 -0
  16. data/lib/canvas_sync/job_batches/sidekiq.rb +93 -0
  17. data/lib/canvas_sync/job_batches/status.rb +83 -0
  18. data/lib/canvas_sync/jobs/begin_sync_chain_job.rb +35 -0
  19. data/lib/canvas_sync/jobs/report_checker.rb +3 -6
  20. data/lib/canvas_sync/jobs/report_processor_job.rb +2 -5
  21. data/lib/canvas_sync/jobs/report_starter.rb +28 -20
  22. data/lib/canvas_sync/jobs/sync_accounts_job.rb +3 -5
  23. data/lib/canvas_sync/jobs/sync_admins_job.rb +2 -4
  24. data/lib/canvas_sync/jobs/sync_assignment_groups_job.rb +2 -4
  25. data/lib/canvas_sync/jobs/sync_assignments_job.rb +2 -4
  26. data/lib/canvas_sync/jobs/sync_context_module_items_job.rb +2 -4
  27. data/lib/canvas_sync/jobs/sync_context_modules_job.rb +2 -4
  28. data/lib/canvas_sync/jobs/sync_provisioning_report_job.rb +4 -34
  29. data/lib/canvas_sync/jobs/sync_roles_job.rb +2 -5
  30. data/lib/canvas_sync/jobs/sync_simple_table_job.rb +11 -32
  31. data/lib/canvas_sync/jobs/sync_submissions_job.rb +2 -4
  32. data/lib/canvas_sync/jobs/sync_terms_job.rb +25 -8
  33. data/lib/canvas_sync/processors/assignment_groups_processor.rb +2 -3
  34. data/lib/canvas_sync/processors/assignments_processor.rb +2 -3
  35. data/lib/canvas_sync/processors/context_module_items_processor.rb +2 -3
  36. data/lib/canvas_sync/processors/context_modules_processor.rb +2 -3
  37. data/lib/canvas_sync/processors/normal_processor.rb +1 -2
  38. data/lib/canvas_sync/processors/provisioning_report_processor.rb +2 -10
  39. data/lib/canvas_sync/processors/submissions_processor.rb +2 -3
  40. data/lib/canvas_sync/version.rb +1 -1
  41. data/spec/canvas_sync/canvas_sync_spec.rb +136 -153
  42. data/spec/canvas_sync/jobs/job_spec.rb +9 -17
  43. data/spec/canvas_sync/jobs/report_checker_spec.rb +1 -3
  44. data/spec/canvas_sync/jobs/report_processor_job_spec.rb +0 -3
  45. data/spec/canvas_sync/jobs/report_starter_spec.rb +19 -28
  46. data/spec/canvas_sync/jobs/sync_admins_job_spec.rb +1 -4
  47. data/spec/canvas_sync/jobs/sync_assignment_groups_job_spec.rb +2 -1
  48. data/spec/canvas_sync/jobs/sync_assignments_job_spec.rb +3 -2
  49. data/spec/canvas_sync/jobs/sync_context_module_items_job_spec.rb +3 -2
  50. data/spec/canvas_sync/jobs/sync_context_modules_job_spec.rb +3 -2
  51. data/spec/canvas_sync/jobs/sync_provisioning_report_job_spec.rb +3 -35
  52. data/spec/canvas_sync/jobs/sync_roles_job_spec.rb +1 -4
  53. data/spec/canvas_sync/jobs/sync_simple_table_job_spec.rb +5 -12
  54. data/spec/canvas_sync/jobs/sync_submissions_job_spec.rb +2 -1
  55. data/spec/canvas_sync/jobs/sync_terms_job_spec.rb +1 -4
  56. data/spec/dummy/config/environments/test.rb +2 -0
  57. data/spec/dummy/db/schema.rb +9 -1
  58. data/spec/job_batching/batch_aware_job_spec.rb +100 -0
  59. data/spec/job_batching/batch_spec.rb +372 -0
  60. data/spec/job_batching/callback_spec.rb +38 -0
  61. data/spec/job_batching/flow_spec.rb +88 -0
  62. data/spec/job_batching/integration/integration.rb +57 -0
  63. data/spec/job_batching/integration/nested.rb +88 -0
  64. data/spec/job_batching/integration/simple.rb +47 -0
  65. data/spec/job_batching/integration/workflow.rb +134 -0
  66. data/spec/job_batching/integration_helper.rb +48 -0
  67. data/spec/job_batching/sidekiq_spec.rb +124 -0
  68. data/spec/job_batching/status_spec.rb +92 -0
  69. data/spec/job_batching/support/base_job.rb +14 -0
  70. data/spec/job_batching/support/sample_callback.rb +2 -0
  71. data/spec/spec_helper.rb +17 -0
  72. metadata +85 -8
  73. data/lib/canvas_sync/job_chain.rb +0 -102
  74. data/lib/canvas_sync/jobs/fork_gather.rb +0 -74
  75. data/spec/canvas_sync/jobs/fork_gather_spec.rb +0 -73
@@ -64,12 +64,13 @@ module CanvasSync
64
64
  columns = columns.dup
65
65
 
66
66
  update_conditions = {
67
- condition: condition_sql(klass, columns, import_args[:sync_start_time]),
67
+ condition: condition_sql(klass, columns),
68
68
  columns: columns
69
69
  }
70
70
  update_conditions[:conflict_target] = conflict_target if conflict_target
71
71
 
72
72
  options = { validate: false, on_duplicate_key_update: update_conditions }.merge(import_args)
73
+
73
74
  options.delete(:on_duplicate_key_update) if options.key?(:on_duplicate_key_ignore)
74
75
  klass.import(columns, rows, options)
75
76
  end
@@ -84,14 +85,10 @@ module CanvasSync
84
85
  # started_at = Time.now
85
86
  # run_the_users_sync!
86
87
  # changed = User.where("updated_at >= ?", started_at)
87
- def self.condition_sql(klass, columns, report_start)
88
+ def self.condition_sql(klass, columns)
88
89
  columns_str = columns.map { |c| "#{klass.quoted_table_name}.#{c}" }.join(", ")
89
90
  excluded_str = columns.map { |c| "EXCLUDED.#{c}" }.join(", ")
90
- condition_sql = "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
91
- if klass.column_names.include?("updated_at") && report_start
92
- condition_sql += " AND #{klass.quoted_table_name}.updated_at < '#{report_start}'"
93
- end
94
- condition_sql
91
+ "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
95
92
  end
96
93
 
97
94
  def self.batch_size
@@ -3,7 +3,7 @@ require "active_job"
3
3
  module CanvasSync
4
4
  # Inherit from this class to build a Job that will log to the canvas_sync_job_logs table
5
5
  class Job < ActiveJob::Base
6
- attr_reader :job_chain, :job_log
6
+ attr_reader :job_log
7
7
 
8
8
  before_enqueue do |job|
9
9
  create_job_log(job)
@@ -15,12 +15,6 @@ module CanvasSync
15
15
  @job_log.started_at = Time.now
16
16
  @job_log.save
17
17
 
18
- if job.arguments[0].is_a?(Hash) && job.arguments[0].include?(:jobs)
19
- # @job_chain = JobChain.new(job.arguments[0])
20
- @job_chain = job.arguments[0]
21
- job.arguments[0] = @job_chain
22
- end
23
-
24
18
  begin
25
19
  block.call
26
20
  @job_log.status = JobLog::SUCCESS_STATUS
@@ -28,11 +22,11 @@ module CanvasSync
28
22
  @job_log.exception = "#{e.class}: #{e.message}"
29
23
  @job_log.backtrace = e.backtrace.join('\n')
30
24
  @job_log.status = JobLog::ERROR_STATUS
31
- if @job_chain&.dig(:global_options, :on_failure)&.present?
25
+ if batch_context&.[](:on_failure)&.present?
32
26
  begin
33
- class_name, method = @job_chain[:global_options][:on_failure].split('.')
27
+ class_name, method = batch_context[:on_failure].split('.')
34
28
  klass = class_name.constantize
35
- klass.send(method.to_sym, e, job_chain: @job_chain, job_log: @job_log)
29
+ klass.send(method.to_sym, e, batch_context: batch_context, job_log: @job_log)
36
30
  rescue => e2
37
31
  @job_log.backtrace += "\n\nError Occurred while handling an Error: #{e2.class}: #{e2.message}"
38
32
  @job_log.backtrace += "\n" + e2.backtrace.join('\n')
@@ -0,0 +1,403 @@
1
+
2
+ begin
3
+ require 'sidekiq'
4
+ rescue LoadError
5
+ end
6
+
7
+ require_relative './batch_aware_job'
8
+ require_relative "./callback"
9
+ require_relative "./context_hash"
10
+ require_relative "./status"
11
+ Dir[File.dirname(__FILE__) + "/jobs/*.rb"].each { |file| require file }
12
+ require_relative "./chain_builder"
13
+
14
+ # Implement Job Batching similar to Sidekiq::Batch. Supports ActiveJob and Sidekiq, or a mix thereof.
15
+ # Much of this code is modifed/extended from https://github.com/breamware/sidekiq-batch
16
+
17
+ module CanvasSync
18
+ module JobBatches
19
+ class Batch
20
+ class NoBlockGivenError < StandardError; end
21
+
22
+ def self.batch_attr(key, read_only: true)
23
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
24
+ def #{key}=(value)
25
+ raise "#{key} is read-only once the batch has been started" if #{read_only.to_s} && (@initialized || @existing)
26
+ @#{key} = value
27
+ persist_bid_attr('#{key}', value)
28
+ end
29
+
30
+ def #{key}
31
+ return @#{key} if defined?(@#{key})
32
+ if (@initialized || @existing)
33
+ @#{key} = read_bid_attr('#{key}')
34
+ end
35
+ end
36
+ RUBY
37
+ end
38
+
39
+ delegate :redis, to: :class
40
+
41
+ BID_EXPIRE_TTL = 2_592_000
42
+
43
+ attr_reader :bid
44
+
45
+ def initialize(existing_bid = nil)
46
+ @bid = existing_bid || SecureRandom.urlsafe_base64(10)
47
+ @existing = !(!existing_bid || existing_bid.empty?) # Basically existing_bid.present?
48
+ @initialized = false
49
+ @bidkey = "BID-" + @bid.to_s
50
+ @pending_attrs = {}
51
+ @ready_to_queue = []
52
+ self.created_at = Time.now.utc.to_f unless @existing
53
+ end
54
+
55
+ batch_attr :description
56
+ batch_attr :created_at
57
+ batch_attr :callback_queue, read_only: false
58
+ batch_attr :callback_batch, read_only: false
59
+ batch_attr :allow_context_changes
60
+
61
+ def context
62
+ return @context if defined?(@context)
63
+
64
+ if (@initialized || @existing)
65
+ @context = ContextHash.new(bid)
66
+ else
67
+ @context = ContextHash.new(bid, {})
68
+ end
69
+ end
70
+
71
+ def context=(value)
72
+ raise "context is read-only once the batch has been started" if (@initialized || @existing) # && !allow_context_changes
73
+ raise "context must be a Hash" unless value.is_a?(Hash) || value.nil?
74
+ return nil if value.nil? && @context.nil?
75
+
76
+ value = {} if value.nil?
77
+ value = value.local if value.is_a?(ContextHash)
78
+
79
+ @context ||= ContextHash.new(bid, {})
80
+ @context.set_local(value)
81
+ # persist_bid_attr('context', JSON.unparse(@context.local))
82
+ end
83
+
84
+ def save_context_changes
85
+ @context&.save!
86
+ end
87
+
88
+ def on(event, callback, options = {})
89
+ return unless Callback::VALID_CALLBACKS.include?(event.to_s)
90
+ callback_key = "#{@bidkey}-callbacks-#{event}"
91
+ redis do |r|
92
+ r.multi do
93
+ r.sadd(callback_key, JSON.unparse({
94
+ callback: callback,
95
+ opts: options
96
+ }))
97
+ r.expire(callback_key, BID_EXPIRE_TTL)
98
+ end
99
+ end
100
+ end
101
+
102
+ def jobs
103
+ raise NoBlockGivenError unless block_given?
104
+
105
+ if !@existing && !@initialized
106
+ parent_bid = Thread.current[:batch]&.bid
107
+
108
+ redis do |r|
109
+ r.multi do
110
+ r.hset(@bidkey, "parent_bid", parent_bid.to_s) if parent_bid
111
+ r.expire(@bidkey, BID_EXPIRE_TTL)
112
+ end
113
+ end
114
+
115
+ flush_pending_attrs
116
+ @context&.save!
117
+
118
+ @initialized = true
119
+ end
120
+
121
+ @ready_to_queue = []
122
+
123
+ Batch.logger.debug("Beginning Batch #{@bidkey}")
124
+
125
+ begin
126
+ parent = Thread.current[:batch]
127
+ Thread.current[:batch] = self
128
+ yield
129
+ ensure
130
+ Thread.current[:batch] = parent
131
+ end
132
+
133
+ redis do |r|
134
+ r.multi do
135
+ if parent_bid
136
+ r.hincrby("BID-#{parent_bid}", "children", 1)
137
+ r.hincrby("BID-#{parent_bid}", "total", @ready_to_queue.size)
138
+ r.expire("BID-#{parent_bid}", BID_EXPIRE_TTL)
139
+ end
140
+
141
+ r.hincrby(@bidkey, "pending", @ready_to_queue.size)
142
+ r.hincrby(@bidkey, "total", @ready_to_queue.size)
143
+ r.expire(@bidkey, BID_EXPIRE_TTL)
144
+
145
+ if @ready_to_queue.size > 0
146
+ r.sadd(@bidkey + "-jids", @ready_to_queue)
147
+ r.expire(@bidkey + "-jids", BID_EXPIRE_TTL)
148
+ end
149
+ end
150
+ end
151
+
152
+ @ready_to_queue
153
+ end
154
+
155
+ def increment_job_queue(jid)
156
+ @ready_to_queue << jid
157
+ end
158
+
159
+ def invalidate_all
160
+ redis do |r|
161
+ r.setex("invalidated-bid-#{bid}", BID_EXPIRE_TTL, 1)
162
+ end
163
+ end
164
+
165
+ def parent_bid
166
+ redis do |r|
167
+ r.hget(@bidkey, "parent_bid")
168
+ end
169
+ end
170
+
171
+ def parent
172
+ if parent_bid
173
+ Batch.new(parent_bid)
174
+ end
175
+ end
176
+
177
+ def valid?(batch = self)
178
+ valid = !redis { |r| r.exists?("invalidated-bid-#{batch.bid}") }
179
+ batch.parent ? valid && valid?(batch.parent) : valid
180
+ end
181
+
182
+ # Any Batches or Jobs created in the given block won't be assocaiated to the current batch
183
+ def self.without_batch
184
+ parent = Thread.current[:batch]
185
+ Thread.current[:batch] = nil
186
+ yield
187
+ ensure
188
+ Thread.current[:batch] = parent
189
+ end
190
+
191
+ private
192
+
193
+ def persist_bid_attr(attribute, value)
194
+ if @initialized || @existing
195
+ redis do |r|
196
+ r.multi do
197
+ r.hset(@bidkey, attribute, value)
198
+ r.expire(@bidkey, BID_EXPIRE_TTL)
199
+ end
200
+ end
201
+ else
202
+ @pending_attrs[attribute] = value
203
+ end
204
+ end
205
+
206
+ def read_bid_attr(attribute)
207
+ redis do |r|
208
+ r.hget(@bidkey, attribute)
209
+ end
210
+ end
211
+
212
+ def flush_pending_attrs
213
+ redis do |r|
214
+ r.mapped_hmset(@bidkey, @pending_attrs)
215
+ end
216
+ @pending_attrs = {}
217
+ end
218
+
219
+ class << self
220
+ def process_failed_job(bid, jid)
221
+ _, pending, failed, children, complete, parent_bid = redis do |r|
222
+ r.multi do
223
+ r.sadd("BID-#{bid}-failed", jid)
224
+
225
+ r.hincrby("BID-#{bid}", "pending", 0)
226
+ r.scard("BID-#{bid}-failed")
227
+ r.hincrby("BID-#{bid}", "children", 0)
228
+ r.scard("BID-#{bid}-batches-complete")
229
+ r.hget("BID-#{bid}", "parent_bid")
230
+
231
+ r.expire("BID-#{bid}-failed", BID_EXPIRE_TTL)
232
+ end
233
+ end
234
+
235
+ # if the batch failed, and has a parent, update the parent to show one pending and failed job
236
+ if parent_bid
237
+ redis do |r|
238
+ r.multi do
239
+ r.hincrby("BID-#{parent_bid}", "pending", 1)
240
+ r.sadd("BID-#{parent_bid}-failed", jid)
241
+ r.expire("BID-#{parent_bid}-failed", BID_EXPIRE_TTL)
242
+ end
243
+ end
244
+ end
245
+
246
+ if pending.to_i == failed.to_i && children == complete
247
+ enqueue_callbacks(:complete, bid)
248
+ end
249
+ end
250
+
251
+ def process_dead_job(bid, jid)
252
+ _, failed, children, complete, parent_bid = redis do |r|
253
+ r.multi do
254
+ r.sadd("BID-#{bid}-dead", jid)
255
+
256
+ r.scard("BID-#{bid}-dead")
257
+ r.hincrby("BID-#{bid}", "children", 0)
258
+ r.scard("BID-#{bid}-batches-complete")
259
+ r.hget("BID-#{bid}", "parent_bid")
260
+
261
+ r.expire("BID-#{bid}-dead", BID_EXPIRE_TTL)
262
+ end
263
+ end
264
+
265
+ if parent_bid
266
+ redis do |r|
267
+ r.multi do
268
+ r.sadd("BID-#{parent_bid}-dead", jid)
269
+ r.expire("BID-#{parent_bid}-dead", BID_EXPIRE_TTL)
270
+ end
271
+ end
272
+ end
273
+
274
+ enqueue_callbacks(:dead, bid)
275
+ end
276
+
277
+ def process_successful_job(bid, jid)
278
+ _, failed, pending, children, complete, success, total, parent_bid = redis do |r|
279
+ r.multi do
280
+ r.srem("BID-#{bid}-failed", jid)
281
+
282
+ r.scard("BID-#{bid}-failed")
283
+ r.hincrby("BID-#{bid}", "pending", -1)
284
+ r.hincrby("BID-#{bid}", "children", 0)
285
+ r.scard("BID-#{bid}-batches-complete")
286
+ r.scard("BID-#{bid}-batches-success")
287
+ r.hget("BID-#{bid}", "total")
288
+ r.hget("BID-#{bid}", "parent_bid")
289
+
290
+ r.srem("BID-#{bid}-jids", jid)
291
+ r.expire("BID-#{bid}", BID_EXPIRE_TTL)
292
+ end
293
+ end
294
+
295
+ all_success = pending.to_i.zero? && children == success
296
+ # if complete or successfull call complete callback (the complete callback may then call successful)
297
+ if (pending.to_i == failed.to_i && children == complete) || all_success
298
+ enqueue_callbacks(:complete, bid)
299
+ enqueue_callbacks(:success, bid) if all_success
300
+ end
301
+ end
302
+
303
+ def enqueue_callbacks(event, bid)
304
+ batch_key = "BID-#{bid}"
305
+ callback_key = "#{batch_key}-callbacks-#{event}"
306
+ already_processed, _, callbacks, queue, parent_bid, callback_batch = redis do |r|
307
+ r.multi do
308
+ r.hget(batch_key, event)
309
+ r.hset(batch_key, event, true)
310
+ r.smembers(callback_key)
311
+ r.hget(batch_key, "callback_queue")
312
+ r.hget(batch_key, "parent_bid")
313
+ r.hget(batch_key, "callback_batch")
314
+ end
315
+ end
316
+
317
+ return if already_processed == 'true'
318
+
319
+ queue ||= "default"
320
+ parent_bid = !parent_bid || parent_bid.empty? ? nil : parent_bid # Basically parent_bid.blank?
321
+ callback_args = callbacks.reduce([]) do |memo, jcb|
322
+ cb = JSON.load(jcb)
323
+ memo << [cb['callback'], event.to_s, cb['opts'], bid, parent_bid]
324
+ end
325
+
326
+ opts = {"bid" => bid, "event" => event}
327
+
328
+ # Run callback batch finalize synchronously
329
+ if callback_batch
330
+ # Extract opts from cb_args or use current
331
+ # Pass in stored event as callback finalize is processed on complete event
332
+ cb_opts = callback_args.first&.at(2) || opts
333
+
334
+ logger.debug {"Run callback batch bid: #{bid} event: #{event} args: #{callback_args.inspect}"}
335
+ # Finalize now
336
+ finalizer = Batch::Callback::Finalize.new
337
+ status = Status.new bid
338
+ finalizer.dispatch(status, cb_opts)
339
+
340
+ return
341
+ end
342
+
343
+ logger.debug {"Enqueue callback bid: #{bid} event: #{event} args: #{callback_args.inspect}"}
344
+
345
+ if callback_args.empty?
346
+ # Finalize now
347
+ finalizer = Batch::Callback::Finalize.new
348
+ status = Status.new bid
349
+ finalizer.dispatch(status, opts)
350
+ else
351
+ # Otherwise finalize in sub batch complete callback
352
+ cb_batch = self.new
353
+ cb_batch.callback_batch = true
354
+ logger.debug {"Adding callback batch: #{cb_batch.bid} for batch: #{bid}"}
355
+ cb_batch.on(:complete, "#{Batch::Callback::Finalize.to_s}#dispatch", opts)
356
+ cb_batch.jobs do
357
+ push_callbacks callback_args, queue
358
+ end
359
+ end
360
+ end
361
+
362
+ def cleanup_redis(bid)
363
+ logger.debug {"Cleaning redis of batch #{bid}"}
364
+ redis do |r|
365
+ r.del(
366
+ "BID-#{bid}",
367
+ "BID-#{bid}-callbacks-complete",
368
+ "BID-#{bid}-callbacks-success",
369
+ "BID-#{bid}-failed",
370
+
371
+ "BID-#{bid}-batches-success",
372
+ "BID-#{bid}-batches-complete",
373
+ "BID-#{bid}-batches-failed",
374
+ "BID-#{bid}-jids",
375
+ )
376
+ end
377
+ end
378
+
379
+ def redis(*args, &blk)
380
+ defined?(::Sidekiq) ? ::Sidekiq.redis(*args, &blk) : nil # TODO
381
+ end
382
+
383
+ def logger
384
+ defined?(::Sidekiq) ? ::Sidekiq.logger : Rails.logger
385
+ end
386
+
387
+ private
388
+
389
+ def push_callbacks(args, queue)
390
+ Batch::Callback::Worker.enqueue_all(args, queue)
391
+ end
392
+ end
393
+ end
394
+
395
+ ActiveJob::Base.include BatchAwareJob
396
+ end
397
+ end
398
+
399
+ # Automatically integrate with Sidekiq if it is present.
400
+ if defined?(::Sidekiq)
401
+ require_relative './sidekiq'
402
+ CanvasSync::JobBatches::Sidekiq.configure
403
+ end