canvas_sync 0.16.2 → 0.17.0.beta3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +49 -137
  3. data/app/models/canvas_sync/sync_batch.rb +5 -0
  4. data/db/migrate/20170915210836_create_canvas_sync_job_log.rb +12 -31
  5. data/db/migrate/20180725155729_add_job_id_to_canvas_sync_job_logs.rb +4 -13
  6. data/db/migrate/20190916154829_add_fork_count_to_canvas_sync_job_logs.rb +3 -11
  7. data/db/migrate/20201018210836_create_canvas_sync_sync_batches.rb +11 -0
  8. data/lib/canvas_sync.rb +36 -118
  9. data/lib/canvas_sync/concerns/api_syncable.rb +27 -0
  10. data/lib/canvas_sync/job.rb +5 -5
  11. data/lib/canvas_sync/job_batches/batch.rb +399 -0
  12. data/lib/canvas_sync/job_batches/batch_aware_job.rb +62 -0
  13. data/lib/canvas_sync/job_batches/callback.rb +153 -0
  14. data/lib/canvas_sync/job_batches/chain_builder.rb +210 -0
  15. data/lib/canvas_sync/job_batches/context_hash.rb +147 -0
  16. data/lib/canvas_sync/job_batches/jobs/base_job.rb +7 -0
  17. data/lib/canvas_sync/job_batches/jobs/concurrent_batch_job.rb +18 -0
  18. data/lib/canvas_sync/job_batches/jobs/serial_batch_job.rb +73 -0
  19. data/lib/canvas_sync/job_batches/sidekiq.rb +93 -0
  20. data/lib/canvas_sync/job_batches/status.rb +63 -0
  21. data/lib/canvas_sync/jobs/begin_sync_chain_job.rb +34 -0
  22. data/lib/canvas_sync/jobs/report_checker.rb +3 -6
  23. data/lib/canvas_sync/jobs/report_processor_job.rb +2 -5
  24. data/lib/canvas_sync/jobs/report_starter.rb +27 -19
  25. data/lib/canvas_sync/jobs/sync_accounts_job.rb +3 -5
  26. data/lib/canvas_sync/jobs/sync_admins_job.rb +2 -4
  27. data/lib/canvas_sync/jobs/sync_assignment_groups_job.rb +2 -4
  28. data/lib/canvas_sync/jobs/sync_assignments_job.rb +2 -4
  29. data/lib/canvas_sync/jobs/sync_context_module_items_job.rb +2 -4
  30. data/lib/canvas_sync/jobs/sync_context_modules_job.rb +2 -4
  31. data/lib/canvas_sync/jobs/sync_provisioning_report_job.rb +5 -35
  32. data/lib/canvas_sync/jobs/sync_roles_job.rb +2 -5
  33. data/lib/canvas_sync/jobs/sync_simple_table_job.rb +11 -32
  34. data/lib/canvas_sync/jobs/sync_submissions_job.rb +2 -4
  35. data/lib/canvas_sync/jobs/sync_terms_job.rb +25 -8
  36. data/lib/canvas_sync/misc_helper.rb +15 -0
  37. data/lib/canvas_sync/version.rb +1 -1
  38. data/spec/canvas_sync/canvas_sync_spec.rb +136 -153
  39. data/spec/canvas_sync/jobs/job_spec.rb +9 -17
  40. data/spec/canvas_sync/jobs/report_checker_spec.rb +1 -3
  41. data/spec/canvas_sync/jobs/report_processor_job_spec.rb +0 -3
  42. data/spec/canvas_sync/jobs/report_starter_spec.rb +19 -28
  43. data/spec/canvas_sync/jobs/sync_admins_job_spec.rb +1 -4
  44. data/spec/canvas_sync/jobs/sync_assignment_groups_job_spec.rb +2 -1
  45. data/spec/canvas_sync/jobs/sync_assignments_job_spec.rb +3 -2
  46. data/spec/canvas_sync/jobs/sync_context_module_items_job_spec.rb +3 -2
  47. data/spec/canvas_sync/jobs/sync_context_modules_job_spec.rb +3 -2
  48. data/spec/canvas_sync/jobs/sync_provisioning_report_job_spec.rb +3 -35
  49. data/spec/canvas_sync/jobs/sync_roles_job_spec.rb +1 -4
  50. data/spec/canvas_sync/jobs/sync_simple_table_job_spec.rb +5 -12
  51. data/spec/canvas_sync/jobs/sync_submissions_job_spec.rb +2 -1
  52. data/spec/canvas_sync/jobs/sync_terms_job_spec.rb +1 -4
  53. data/spec/dummy/app/models/account.rb +3 -0
  54. data/spec/dummy/app/models/pseudonym.rb +14 -0
  55. data/spec/dummy/app/models/submission.rb +1 -0
  56. data/spec/dummy/app/models/user.rb +1 -0
  57. data/spec/dummy/config/environments/test.rb +2 -0
  58. data/spec/dummy/db/migrate/20201016181346_create_pseudonyms.rb +24 -0
  59. data/spec/dummy/db/schema.rb +24 -4
  60. data/spec/job_batching/batch_aware_job_spec.rb +100 -0
  61. data/spec/job_batching/batch_spec.rb +363 -0
  62. data/spec/job_batching/callback_spec.rb +38 -0
  63. data/spec/job_batching/flow_spec.rb +91 -0
  64. data/spec/job_batching/integration/integration.rb +57 -0
  65. data/spec/job_batching/integration/nested.rb +88 -0
  66. data/spec/job_batching/integration/simple.rb +47 -0
  67. data/spec/job_batching/integration/workflow.rb +134 -0
  68. data/spec/job_batching/integration_helper.rb +48 -0
  69. data/spec/job_batching/sidekiq_spec.rb +124 -0
  70. data/spec/job_batching/status_spec.rb +92 -0
  71. data/spec/job_batching/support/base_job.rb +14 -0
  72. data/spec/job_batching/support/sample_callback.rb +2 -0
  73. data/spec/spec_helper.rb +17 -0
  74. metadata +90 -8
  75. data/lib/canvas_sync/job_chain.rb +0 -57
  76. data/lib/canvas_sync/jobs/fork_gather.rb +0 -59
  77. data/spec/canvas_sync/jobs/fork_gather_spec.rb +0 -73
@@ -0,0 +1,7 @@
1
+ module CanvasSync
2
+ module JobBatches
3
+ class BaseJob < ActiveJob::Base
4
+
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,18 @@
1
+ require_relative './base_job'
2
+
3
+ module CanvasSync
4
+ module JobBatches
5
+ class ConcurrentBatchJob < BaseJob
6
+ def perform(sub_jobs, context: nil)
7
+ Batch.new.tap do |b|
8
+ b.context = context
9
+ b.jobs do
10
+ sub_jobs.each do |j|
11
+ ChainBuilder.enqueue_job(j)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,73 @@
1
+ require_relative './base_job'
2
+
3
+ module CanvasSync
4
+ module JobBatches
5
+ class SerialBatchJob < BaseJob
6
+ def perform(sub_jobs, context: nil)
7
+ serial_id = SecureRandom.urlsafe_base64(10)
8
+
9
+ root_batch = Batch.new
10
+
11
+ Batch.redis do |r|
12
+ r.multi do
13
+ mapped_sub_jobs = sub_jobs.map do |j|
14
+ j = ActiveJob::Arguments.serialize([j])
15
+ JSON.unparse(j)
16
+ end
17
+ r.hset("SERBID-#{serial_id}", "root_bid", root_batch.bid)
18
+ r.expire("SERBID-#{serial_id}", Batch::BID_EXPIRE_TTL)
19
+ r.rpush("SERBID-#{serial_id}-jobs", mapped_sub_jobs)
20
+ r.expire("SERBID-#{serial_id}-jobs", Batch::BID_EXPIRE_TTL)
21
+ end
22
+ end
23
+
24
+ root_batch.allow_context_changes = true
25
+ root_batch.context = context
26
+ root_batch.on(:success, "#{self.class.to_s}.cleanup_redis", serial_batch_id: serial_id)
27
+ root_batch.jobs do
28
+ self.class.perform_next_sequence_job(serial_id)
29
+ end
30
+ end
31
+
32
+ def self.cleanup_redis(status, options)
33
+ serial_id = options['serial_batch_id']
34
+ Batch.redis do |r|
35
+ r.del(
36
+ "SERBID-#{serial_id}",
37
+ "SERBID-#{serial_id}-jobs",
38
+ )
39
+ end
40
+ end
41
+
42
+ def self.job_succeeded_callback(status, options)
43
+ serial_id = options['serial_batch_id']
44
+ perform_next_sequence_job(serial_id)
45
+ end
46
+
47
+ protected
48
+
49
+ def self.perform_next_sequence_job(serial_id)
50
+ root_bid, next_job_json = Batch.redis do |r|
51
+ r.multi do
52
+ r.hget("SERBID-#{serial_id}", "root_bid")
53
+ r.lpop("SERBID-#{serial_id}-jobs")
54
+ end
55
+ end
56
+
57
+ return unless next_job_json.present?
58
+
59
+ next_job = JSON.parse(next_job_json)
60
+ next_job = ActiveJob::Arguments.deserialize(next_job)[0]
61
+
62
+ Batch.new(root_bid).jobs do
63
+ Batch.new.tap do |batch|
64
+ batch.on(:success, "#{self.to_s}.job_succeeded_callback", serial_batch_id: serial_id)
65
+ batch.jobs do
66
+ ChainBuilder.enqueue_job(next_job)
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,93 @@
1
+ begin
2
+ require 'sidekiq/batch'
3
+ rescue LoadError
4
+ end
5
+
6
+ module CanvasSync
7
+ module JobBatches
8
+ module Sidekiq
9
+ module WorkerExtension
10
+ def bid
11
+ Thread.current[:batch].bid
12
+ end
13
+
14
+ def batch
15
+ Thread.current[:batch]
16
+ end
17
+
18
+ def valid_within_batch?
19
+ batch.valid?
20
+ end
21
+ end
22
+
23
+ class ClientMiddleware
24
+ def call(_worker, msg, _queue, _redis_pool = nil)
25
+ if (batch = Thread.current[:batch])
26
+ batch.increment_job_queue(msg['jid']) if (msg[:bid] = batch.bid)
27
+ end
28
+ yield
29
+ end
30
+ end
31
+
32
+ class ServerMiddleware
33
+ def call(_worker, msg, _queue)
34
+ if (bid = msg['bid'])
35
+ begin
36
+ Thread.current[:batch] = Batch.new(bid)
37
+ yield
38
+ Thread.current[:batch] = nil
39
+ Batch.process_successful_job(bid, msg['jid'])
40
+ rescue
41
+ Batch.process_failed_job(bid, msg['jid'])
42
+ raise
43
+ ensure
44
+ Thread.current[:batch] = nil
45
+ end
46
+ else
47
+ yield
48
+ end
49
+ end
50
+ end
51
+
52
+ def self.configure
53
+ if defined?(::Sidekiq::Batch) && ::Sidekiq::Batch != JobBatches::Batch
54
+ print "WARNING: Detected Sidekiq Pro or sidekiq-batch. CanvasSync JobBatches may not be fully compatible!"
55
+ end
56
+
57
+ ::Sidekiq.configure_client do |config|
58
+ config.client_middleware do |chain|
59
+ chain.remove ::Sidekiq::Batch::Middleware::ClientMiddleware if defined?(::Sidekiq::Batch::Middleware::ClientMiddleware)
60
+ chain.add JobBatches::Sidekiq::ClientMiddleware
61
+ end
62
+ end
63
+ ::Sidekiq.configure_server do |config|
64
+ config.client_middleware do |chain|
65
+ chain.remove ::Sidekiq::Batch::Middleware::ClientMiddleware if defined?(::Sidekiq::Batch::Middleware::ClientMiddleware)
66
+ chain.add JobBatches::Sidekiq::ClientMiddleware
67
+ end
68
+
69
+ config.server_middleware do |chain|
70
+ chain.remove ::Sidekiq::Batch::Middleware::ServerMiddleware if defined?(::Sidekiq::Batch::Middleware::ServerMiddleware)
71
+ chain.add JobBatches::Sidekiq::ServerMiddleware
72
+ end
73
+
74
+ config.death_handlers << ->(job, ex) do
75
+ return unless job['bid'].present?
76
+
77
+ if defined?(::Apartment)
78
+ ::Apartment::Tenant.switch(job['apartment'] || 'public') do
79
+ Sidekiq::Batch.process_dead_job(job['bid'], job['jid'])
80
+ end
81
+ else
82
+ Sidekiq::Batch.process_dead_job(job['bid'], job['jid'])
83
+ end
84
+ end
85
+ end
86
+ ::Sidekiq.const_set(:Batch, CanvasSync::JobBatches::Batch)
87
+ # This alias helps apartment-sidekiq set itself up correctly
88
+ ::Sidekiq::Batch.const_set(:Server, CanvasSync::JobBatches::Sidekiq::ServerMiddleware)
89
+ ::Sidekiq::Worker.send(:include, JobBatches::Sidekiq::WorkerExtension)
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,63 @@
1
+ module CanvasSync
2
+ module JobBatches
3
+ class Batch
4
+ class Status
5
+ attr_reader :bid
6
+
7
+ def initialize(bid)
8
+ @bid = bid
9
+ end
10
+
11
+ def join
12
+ raise "Not supported"
13
+ end
14
+
15
+ def pending
16
+ Batch.redis { |r| r.hget("BID-#{bid}", 'pending') }.to_i
17
+ end
18
+
19
+ def failures
20
+ Batch.redis { |r| r.scard("BID-#{bid}-failed") }.to_i
21
+ end
22
+
23
+ def created_at
24
+ Batch.redis { |r| r.hget("BID-#{bid}", 'created_at') }
25
+ end
26
+
27
+ def total
28
+ Batch.redis { |r| r.hget("BID-#{bid}", 'total') }.to_i
29
+ end
30
+
31
+ def parent_bid
32
+ Batch.redis { |r| r.hget("BID-#{bid}", "parent_bid") }
33
+ end
34
+
35
+ def failure_info
36
+ Batch.redis { |r| r.smembers("BID-#{bid}-failed") } || []
37
+ end
38
+
39
+ def complete?
40
+ 'true' == Batch.redis { |r| r.hget("BID-#{bid}", 'complete') }
41
+ end
42
+
43
+ def child_count
44
+ Batch.redis { |r| r.hget("BID-#{bid}", 'children') }.to_i
45
+ end
46
+
47
+ def data
48
+ {
49
+ bid: bid,
50
+ total: total,
51
+ failures: failures,
52
+ pending: pending,
53
+ created_at: created_at,
54
+ complete: complete?,
55
+ failure_info: failure_info,
56
+ parent_bid: parent_bid,
57
+ child_count: child_count
58
+ }
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,34 @@
1
+ module CanvasSync
2
+ module Jobs
3
+ class BeginSyncChainJob < CanvasSync::Job
4
+ def perform(chain_definition, globals = {})
5
+ if !globals[:updated_after].present? || globals[:updated_after] == true
6
+ last_batch = SyncBatch.where(status: 'completed').last
7
+ globals[:updated_after] = last_batch&.started_at&.iso8601
8
+ end
9
+
10
+ sync_batch = SyncBatch.create!(
11
+ started_at: DateTime.now,
12
+ status: 'pending',
13
+ )
14
+
15
+ JobBatches::Batch.new.tap do |b|
16
+ b.description = "CanvasSync Root Batch"
17
+ b.on(:complete, "#{self.class.to_s}.batch_completed", sync_batch_id: sync_batch.id)
18
+ b.context = globals
19
+ b.jobs do
20
+ JobBatches::SerialBatchJob.perform_now(chain_definition)
21
+ end
22
+ end
23
+ end
24
+
25
+ def self.batch_completed(status, options)
26
+ sbatch = SyncBatch.find(options['sync_batch_id'])
27
+ sbatch.update!(
28
+ status: status.failures.positive? ? 'failed' : 'completed',
29
+ completed_at: DateTime.now,
30
+ )
31
+ end
32
+ end
33
+ end
34
+ end
@@ -4,21 +4,19 @@ module CanvasSync
4
4
  # Re-enqueues itself if the report is still processing on Canvas.
5
5
  # Enqueues the ReportProcessor when the report has completed.
6
6
  class ReportChecker < CanvasSync::Job
7
- # @param job_chain [Hash]
8
7
  # @param report_name [Hash] e.g., 'provisioning_csv'
9
8
  # @param report_id [Integer]
10
9
  # @param processor [String] a stringified report processor class name
11
10
  # @param options [Hash] hash of options that will be passed to the job processor
12
11
  # @return [nil]
13
- def perform(job_chain, report_name, report_id, processor, options) # rubocop:disable Metrics/AbcSize
14
- account_id = options[:account_id] || job_chain[:global_options][:account_id] || "self"
15
- report_status = CanvasSync.get_canvas_sync_client(job_chain[:global_options])
12
+ def perform(report_name, report_id, processor, options) # rubocop:disable Metrics/AbcSize
13
+ account_id = options[:account_id] || batch_context[:account_id] || "self"
14
+ report_status = CanvasSync.get_canvas_sync_client(batch_context)
16
15
  .report_status(account_id, report_name, report_id)
17
16
 
18
17
  case report_status["status"].downcase
19
18
  when "complete"
20
19
  CanvasSync::Jobs::ReportProcessorJob.perform_later(
21
- job_chain,
22
20
  report_name,
23
21
  report_status["attachment"]["url"],
24
22
  processor,
@@ -33,7 +31,6 @@ module CanvasSync
33
31
  CanvasSync::Jobs::ReportChecker
34
32
  .set(wait: report_checker_wait_time)
35
33
  .perform_later(
36
- job_chain,
37
34
  report_name,
38
35
  report_id,
39
36
  processor,
@@ -6,22 +6,19 @@ module CanvasSync
6
6
  # download the report, and then pass the file path and options into the
7
7
  # process method on the processor.
8
8
  class ReportProcessorJob < CanvasSync::Job
9
- # @param job_chain [Hash]
10
9
  # @param report_name [Hash] e.g., 'provisioning_csv'
11
10
  # @param report_url [String]
12
11
  # @param processor [String] a stringified report processor class name
13
12
  # @param options [Hash] hash of options that will be passed to the job processor
14
13
  # @return [nil]
15
- def perform(job_chain, report_name, report_url, processor, options, report_id)
14
+ def perform(report_name, report_url, processor, options, report_id)
16
15
  @job_log.update_attributes(job_class: processor)
17
16
  download(report_name, report_url) do |file_path|
18
- options = job_chain[:global_options].merge(options).merge({
17
+ options = batch_context.merge(options).merge({
19
18
  report_processor_job_id: @job_log.job_id
20
19
  })
21
20
  processor.constantize.process(file_path, options, report_id)
22
21
  end
23
-
24
- CanvasSync.invoke_next(job_chain)
25
22
  end
26
23
 
27
24
  private
@@ -2,7 +2,6 @@ module CanvasSync
2
2
  module Jobs
3
3
  # Starts a Canvas report and enqueues a ReportChecker
4
4
  class ReportStarter < CanvasSync::Job
5
- # @param job_chain [Hash]
6
5
  # @param report_name [Hash] e.g., 'provisioning_csv'
7
6
  # @param report_params [Hash] The Canvas report parameters
8
7
  # @param processor [String] a stringified report processor class name
@@ -10,31 +9,39 @@ module CanvasSync
10
9
  # @param allow_redownloads [Boolean] whether you want the job_chain to cache this report,
11
10
  # so that any later jobs in the chain will use the same generated report
12
11
  # @return [nil]
13
- def perform(job_chain, report_name, report_params, processor, options, allow_redownloads: false)
14
- account_id = options[:account_id] || job_chain[:global_options][:account_id] || "self"
12
+ def perform(report_name, report_params, processor, options, allow_redownloads: false)
13
+ account_id = options[:account_id] || batch_context[:account_id] || "self"
15
14
 
16
- report_id = if allow_redownloads
17
- get_cached_report(job_chain, account_id, report_name, report_params)
18
- else
19
- start_report(job_chain, account_id, report_name, report_params)
20
- end
15
+ report_id = start_report(account_id, report_name, report_params)
16
+ # TODO: Restore report caching support (does nayone actually use it?)
17
+ # report_id = if allow_redownloads
18
+ # get_cached_report(account_id, report_name, report_params)
19
+ # else
20
+ # start_report(account_id, report_name, report_params)
21
+ # end
21
22
 
22
- CanvasSync::Jobs::ReportChecker.set(wait: report_checker_wait_time).perform_later(
23
- job_chain,
24
- report_name,
25
- report_id,
26
- processor,
27
- options,
28
- )
23
+ batch = JobBatches::Batch.new
24
+ batch.description = "CanvasSync #{report_name} Fiber"
25
+ batch.jobs do
26
+ CanvasSync::Jobs::ReportChecker.set(wait: report_checker_wait_time).perform_later(
27
+ report_name,
28
+ report_id,
29
+ processor,
30
+ options,
31
+ )
32
+ end
29
33
  end
30
34
 
31
35
  protected
32
36
 
33
- def merge_report_params(job_chain, options={}, params={}, term_scope: true)
34
- term_scope = job_chain[:global_options][:canvas_term_id] if term_scope == true
37
+ def merge_report_params(options={}, params={}, term_scope: true)
38
+ term_scope = batch_context[:canvas_term_id] if term_scope == true
35
39
  if term_scope.present?
36
40
  params[:enrollment_term_id] = term_scope
37
41
  end
42
+ if (updated_after = batch_context[:updated_after]).present?
43
+ params[:updated_after] = updated_after
44
+ end
38
45
  params.merge!(options[:report_params]) if options[:report_params].present?
39
46
  { parameters: params }
40
47
  end
@@ -42,6 +49,7 @@ module CanvasSync
42
49
  private
43
50
 
44
51
  def get_cached_report(job_chain, account_id, report_name, report_params)
52
+ # TODO: job_chain[:global_options] is no longer available and batch_context won't work for this
45
53
  if job_chain[:global_options][report_name].present?
46
54
  job_chain[:global_options][report_name]
47
55
  else
@@ -51,8 +59,8 @@ module CanvasSync
51
59
  end
52
60
  end
53
61
 
54
- def start_report(job_chain, account_id, report_name, report_params)
55
- report = CanvasSync.get_canvas_sync_client(job_chain[:global_options])
62
+ def start_report(account_id, report_name, report_params)
63
+ report = CanvasSync.get_canvas_sync_client(batch_context)
56
64
  .start_report(account_id, report_name, report_params)
57
65
  report["id"]
58
66
  end
@@ -7,18 +7,16 @@ module CanvasSync
7
7
  # running provisioning by term we sync users first so we don't duplicate
8
8
  # the work of syncing all accounts for each term.
9
9
  #
10
- # @param job_chain [Hash]
11
10
  # @param options [Hash]
12
- def perform(job_chain, options)
11
+ def perform(options)
13
12
  unless options[:root_account] == false
14
- acc_params = CanvasSync.get_canvas_sync_client(job_chain[:global_options]).account("self")
13
+ acc_params = CanvasSync.get_canvas_sync_client(batch_context).account("self")
15
14
  update_or_create_model(Account, acc_params)
16
15
  end
17
16
 
18
17
  super(
19
- job_chain,
20
18
  "proservices_provisioning_csv",
21
- merge_report_params(job_chain, options, {
19
+ merge_report_params(options, {
22
20
  accounts: true,
23
21
  include_deleted: true,
24
22
  }, term_scope: false),