canvas_sync 0.16.5 → 0.17.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +49 -137
  3. data/app/models/canvas_sync/sync_batch.rb +5 -0
  4. data/db/migrate/20201018210836_create_canvas_sync_sync_batches.rb +11 -0
  5. data/lib/canvas_sync/importers/bulk_importer.rb +4 -7
  6. data/lib/canvas_sync/job.rb +4 -10
  7. data/lib/canvas_sync/job_batches/batch.rb +399 -0
  8. data/lib/canvas_sync/job_batches/batch_aware_job.rb +62 -0
  9. data/lib/canvas_sync/job_batches/callback.rb +153 -0
  10. data/lib/canvas_sync/job_batches/chain_builder.rb +203 -0
  11. data/lib/canvas_sync/job_batches/context_hash.rb +147 -0
  12. data/lib/canvas_sync/job_batches/jobs/base_job.rb +7 -0
  13. data/lib/canvas_sync/job_batches/jobs/concurrent_batch_job.rb +18 -0
  14. data/lib/canvas_sync/job_batches/jobs/serial_batch_job.rb +73 -0
  15. data/lib/canvas_sync/job_batches/sidekiq.rb +91 -0
  16. data/lib/canvas_sync/job_batches/status.rb +63 -0
  17. data/lib/canvas_sync/jobs/begin_sync_chain_job.rb +34 -0
  18. data/lib/canvas_sync/jobs/report_checker.rb +3 -6
  19. data/lib/canvas_sync/jobs/report_processor_job.rb +2 -5
  20. data/lib/canvas_sync/jobs/report_starter.rb +28 -20
  21. data/lib/canvas_sync/jobs/sync_accounts_job.rb +3 -5
  22. data/lib/canvas_sync/jobs/sync_admins_job.rb +2 -4
  23. data/lib/canvas_sync/jobs/sync_assignment_groups_job.rb +2 -4
  24. data/lib/canvas_sync/jobs/sync_assignments_job.rb +2 -4
  25. data/lib/canvas_sync/jobs/sync_context_module_items_job.rb +2 -4
  26. data/lib/canvas_sync/jobs/sync_context_modules_job.rb +2 -4
  27. data/lib/canvas_sync/jobs/sync_provisioning_report_job.rb +4 -31
  28. data/lib/canvas_sync/jobs/sync_roles_job.rb +2 -5
  29. data/lib/canvas_sync/jobs/sync_simple_table_job.rb +11 -32
  30. data/lib/canvas_sync/jobs/sync_submissions_job.rb +2 -4
  31. data/lib/canvas_sync/jobs/sync_terms_job.rb +22 -7
  32. data/lib/canvas_sync/processors/assignment_groups_processor.rb +2 -3
  33. data/lib/canvas_sync/processors/assignments_processor.rb +2 -3
  34. data/lib/canvas_sync/processors/context_module_items_processor.rb +2 -3
  35. data/lib/canvas_sync/processors/context_modules_processor.rb +2 -3
  36. data/lib/canvas_sync/processors/normal_processor.rb +1 -2
  37. data/lib/canvas_sync/processors/provisioning_report_processor.rb +2 -10
  38. data/lib/canvas_sync/processors/submissions_processor.rb +2 -3
  39. data/lib/canvas_sync/version.rb +1 -1
  40. data/lib/canvas_sync.rb +34 -97
  41. data/spec/canvas_sync/canvas_sync_spec.rb +126 -153
  42. data/spec/canvas_sync/jobs/job_spec.rb +9 -17
  43. data/spec/canvas_sync/jobs/report_checker_spec.rb +1 -3
  44. data/spec/canvas_sync/jobs/report_processor_job_spec.rb +0 -3
  45. data/spec/canvas_sync/jobs/report_starter_spec.rb +19 -28
  46. data/spec/canvas_sync/jobs/sync_admins_job_spec.rb +1 -4
  47. data/spec/canvas_sync/jobs/sync_assignment_groups_job_spec.rb +2 -1
  48. data/spec/canvas_sync/jobs/sync_assignments_job_spec.rb +3 -2
  49. data/spec/canvas_sync/jobs/sync_context_module_items_job_spec.rb +3 -2
  50. data/spec/canvas_sync/jobs/sync_context_modules_job_spec.rb +3 -2
  51. data/spec/canvas_sync/jobs/sync_provisioning_report_job_spec.rb +3 -35
  52. data/spec/canvas_sync/jobs/sync_roles_job_spec.rb +1 -4
  53. data/spec/canvas_sync/jobs/sync_simple_table_job_spec.rb +5 -12
  54. data/spec/canvas_sync/jobs/sync_submissions_job_spec.rb +2 -1
  55. data/spec/canvas_sync/jobs/sync_terms_job_spec.rb +1 -4
  56. data/spec/dummy/config/environments/test.rb +2 -0
  57. data/spec/dummy/db/schema.rb +9 -1
  58. data/spec/job_batching/batch_aware_job_spec.rb +100 -0
  59. data/spec/job_batching/batch_spec.rb +363 -0
  60. data/spec/job_batching/callback_spec.rb +38 -0
  61. data/spec/job_batching/flow_spec.rb +91 -0
  62. data/spec/job_batching/integration/integration.rb +57 -0
  63. data/spec/job_batching/integration/nested.rb +88 -0
  64. data/spec/job_batching/integration/simple.rb +47 -0
  65. data/spec/job_batching/integration/workflow.rb +134 -0
  66. data/spec/job_batching/integration_helper.rb +48 -0
  67. data/spec/job_batching/sidekiq_spec.rb +124 -0
  68. data/spec/job_batching/status_spec.rb +92 -0
  69. data/spec/job_batching/support/base_job.rb +14 -0
  70. data/spec/job_batching/support/sample_callback.rb +2 -0
  71. data/spec/spec_helper.rb +10 -0
  72. metadata +91 -23
  73. data/lib/canvas_sync/job_chain.rb +0 -102
  74. data/lib/canvas_sync/jobs/fork_gather.rb +0 -74
  75. data/spec/canvas_sync/jobs/fork_gather_spec.rb +0 -73
  76. data/spec/dummy/db/test.sqlite3 +0 -0
  77. data/spec/dummy/log/development.log +0 -1248
  78. data/spec/dummy/log/test.log +0 -43258
  79. data/spec/support/fixtures/reports/provisioning_csv_unzipped/courses.csv +0 -3
  80. data/spec/support/fixtures/reports/provisioning_csv_unzipped/users.csv +0 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 9be3b81c22d5b45a02d88fa29b5d57afa152ee7dec6ce0ecf3d81c041507c619
4
- data.tar.gz: 0c42577064cbf018f8fd8d5f665e1adfbc542e38e704a675e735491543b3ab40
2
+ SHA1:
3
+ metadata.gz: b4ffe8d3a42b647f8c30f1d831c4a86998bc7c1b
4
+ data.tar.gz: 2ba762a479d2df35e22a69b7930d051a2e43bd30
5
5
  SHA512:
6
- metadata.gz: 9ea295b8cc43aa6b27bf6067cf684d024430a9fd85608bdc0a243113920b1b65b72c257e6a99644afb5d20455ecf2265949ce941a9ae1d9bea160ab1bee803ba
7
- data.tar.gz: e63b7f557e792cb850a5799753f9effe436ad391bc6e4442768683b8c4d7c8eea1d1e9bbd6419127f2f4401f6cce6129019fb73192496f7efc1d731d7ef48847
6
+ metadata.gz: 6702f6754b217edfe2c7e4ff51c586940e7073993b5e3a79464e49308edd274af9bc8165f38aa919ef3c288ec86f1623065a91f42121f68c4da1389f27a57cb2
7
+ data.tar.gz: 71365002bcf14b762b00b73ae805862d8691a2b78ecd0254c0ef7fcf56eea6caeae838652e7ca4a685834eb380a08e782cf78e591b5af2c8950b4d4fd763e704
data/README.md CHANGED
@@ -91,27 +91,45 @@ This gem also helps with syncing and processing other reports if needed. In orde
91
91
  - Integrate your reports with the `ReportStarter`
92
92
  - Tell the gem what jobs to run
93
93
 
94
+ ### `updated_after`
95
+ An `updated_after` param may be passed when triggering a provision or making a chain:
96
+ ```ruby
97
+ CanvasSync.default_provisioning_report_chain(
98
+ %i[list of models to sync], updated_after: false
99
+ )
100
+ ```
101
+ It may be one of the following values:
102
+ * `false` - Will not apply any `updated_after` filtering to the requested reports
103
+ * An ISO-8601 Date - Will pass the supplied date ad the `updated_after` param for the requested reports
104
+ * `true` (Default) - Will use the start date of the last successful sync
105
+
94
106
  ### Extensible chain
95
107
  It is sometimes desired to extend or customize the chain of jobs that are run with CanvasSync.
96
108
  This can be achieved with the following pattern:
97
109
 
98
110
  ```ruby
99
- job_chain = CanvasSync.default_provisioning_report_chain(
111
+ chain = CanvasSync.default_provisioning_report_chain(
100
112
  %i[list of models to sync]
101
113
  )
102
114
 
103
- # CanvasSync forks the chain for each term within a term scope. The ForkGather Job can be used to unfork the chain.
104
- # For example multiple Terms are in the term_scope. CanvasSync syncs Accounts, Terms, and Users (if enabled) and then
105
- # forks the chain to sync other models (eg Course, Sections, etc.) per-Term.
106
- # ForkGather will wait until all the forked chains are complete before continuing.
107
- # TL;DR: Jobs placed after SyncProvisioningReportJob and before ForkGather will run once per Term per Sync;
108
- # Jobs placed before SyncProvisioningReportJob or after ForkGather will run once per Sync
109
- job_chain[:jobs] << { job: CanvasSync::Jobs::ForkGather, options: {} }
115
+ # Add a custom job to the end of the chain.
116
+ chain << { job: CanvasSyncCompleteWorker, parameters: [{ job_id: job.id }] }
117
+ chain << { job: CanvasSyncCompleteWorker, options: { job_id: job.id } } # If an options key is provided, it will be automatically appended to the end of the :parameters array
118
+
119
+ chain.process!
110
120
 
111
- # Add a custom job to the end of the chain. Custom jobs must accept 2 arguments (job_chain, options) and call CanvasSync.invoke_next(job_chain) when complete
112
- job_chain[:jobs] << { job: CanvasSyncCompleteWorker, options: { job_id: job.id } }
121
+ # The chain object provides a fairly extensive API:
122
+ chain.insert({ job: SomeOtherJob }) # Adds the job to the end of the chain
123
+ chain.insert_at(0, { job: SomeOtherJob }) # Adds the job to the beginning of the chain
124
+ chain.insert({ job: SomeOtherJob }, after: 'CanvasSync::Jobs::SyncTermsJob') # Adds the job right after the SyncTermsJob
125
+ chain.insert({ job: SomeOtherJob }, before: 'CanvasSync::Jobs::SyncTermsJob') # Adds the job right before the SyncTermsJob
126
+ chain.insert({ job: SomeOtherJob }, with: 'CanvasSync::Jobs::SyncTermsJob') # Adds the job to be performed concurrently with the SyncTermsJob
113
127
 
114
- CanvasSync.invoke_next(job_chain)
128
+ # Some Jobs (such as the SyncTermsJob) have a sub-chain for, eg, Courses.
129
+ # chain.insert is aware of these sub-chains and will recurse into them when looking for a before:/after:/with: reference
130
+ chain.insert({ job: SomeOtherJob }, after: 'CanvasSync::Jobs::SyncCoursesJob') # Adds the job to be performed after SyncCoursesJob (which is a sub-job of the terms job and is duplicated for each term in the term_scope:)
131
+ # You can also retrieve the sub-chain like so:
132
+ chain.get_sub_chain('CanvasSync::Jobs::SyncTermsJob')
115
133
  ```
116
134
 
117
135
  ### Processor
@@ -134,9 +152,8 @@ Let's say we have a custom Canvas report called "my_really_cool_report_csv". Fir
134
152
 
135
153
  ```ruby
136
154
  class MyReallyCoolReportJob < CanvasSync::Jobs::ReportStarter
137
- def perform(job_chain, options)
155
+ def perform(options)
138
156
  super(
139
- job_chain,
140
157
  'my_really_cool_report_csv', # Report name
141
158
  { "parameters[param1]" => true }, # Report parameters
142
159
  MyCoolProcessor.to_s, # Your processor class as a string
@@ -148,57 +165,6 @@ end
148
165
 
149
166
  You can also see examples in `lib/canvas_sync/jobs/sync_users_job.rb` and `lib/canvas_sync/jobs/sync_provisioning_report.rb`.
150
167
 
151
- ### Start the jobs
152
-
153
- The `CanvasSync.process_jobs` method allows you to pass in a chain of jobs to run. The job chain must be formatted like:
154
-
155
- ```ruby
156
- {
157
- jobs: [
158
- { job: JobClass, options: {} },
159
- { job: JobClass2, options: {} }
160
- ],
161
- global_options: {}
162
- }
163
- ```
164
-
165
- Here is an example that runs our new report job first followed by the builtin provisioning job:
166
-
167
- ```ruby
168
- job_chain = {
169
- jobs: [
170
- { job: MyReallyCoolReportJob, options: {} },
171
- { job: CanvasSync::Jobs::SyncProvisioningReportJob, options: { models: ['users', 'courses'] } }
172
- ],
173
- global_options: {}
174
- }
175
-
176
- CanvasSync.process_jobs(job_chain)
177
- ```
178
-
179
- What if you've got some other job that you want run that doesn't deal with a report? No problem! Just make sure you call `CanvasSync.invoke_next` at the end of your job. Example:
180
-
181
-
182
- ```ruby
183
- class SomeRandomJob < CanvasSync::Job
184
- def perform(job_chain, options)
185
- i_dunno_do_something!
186
-
187
- CanvasSync.invoke_next(job_chain)
188
- end
189
- end
190
-
191
- job_chain = {
192
- jobs: [
193
- { job: SomeRandomJob, options: {} },
194
- { job: CanvasSync::Jobs::SyncProvisioningReportJob, options: { models: ['users', 'courses'] } }
195
- ],
196
- global_options: {}
197
- }
198
-
199
- CanvasSync.process_jobs(job_chain)
200
- ```
201
-
202
168
  ### Batching
203
169
 
204
170
  The provisioning report uses the `CanvasSync::Importers::BulkImporter` class to bulk import rows with the activerecord-import gem. It inserts rows in batches of 10,000 by default. This can be customized by setting the `BULK_IMPORTER_BATCH_SIZE` environment variable.
@@ -257,6 +223,14 @@ class CanvasSyncModel < ApplicationRecord
257
223
  end
258
224
  ```
259
225
 
226
+ ### Job Batching
227
+ CanvasSync adds a `CanvasSync::JobBatches` module. It adds Sidekiq/sidekiq-batch like support for Job Batches.
228
+ It integrates automatically with both Sidekiq and ActiveJob. The API is highly similar to the Sidekiq-batch implementation,
229
+ documentation for which can be found at https://github.com/mperham/sidekiq/wiki/Batches
230
+
231
+ A batch can be created using `Sidekiq::Batch` or `CanvasSync::JobBatching::Batch`.
232
+
233
+ Also see `canvas_sync/jobs/begin_sync_chain_job`, `canvas_sync/Job_batches/jobs/serial_batch_job`, or `canvas_sync/Job_batches/jobs/concurrent_batch_job` for example usage.
260
234
 
261
235
  ## Legacy Support
262
236
 
@@ -308,80 +282,19 @@ end
308
282
  ## Syncronize different reports
309
283
  CanvasSync provides the functionality to import data from other reports into an specific table.
310
284
 
311
- This can be achived by using the followin method
312
-
313
- ```ruby
314
- CanvasSync.provisioning_sync(<array of models to sync>, term_scope: <optional term scope>)
315
- CanvasSync
316
- .simple_report_sync(
317
- [
318
- {
319
- report_name: <report name>,
320
- model: <model to sync>,
321
- params: <hash with the require parameters the report needs to sync>
322
- },
323
- {
324
- report_name: <report name>,
325
- model: <model to sync>,
326
- params: <hash with the require parameters the report needs to sync>
327
- },
328
- ...
329
- ],
330
- term_scope: <optional term scope>
331
- )
332
- ```
333
-
334
- Example:
285
+ This can be achieved by using the following method
335
286
 
336
287
  ```ruby
337
- CanvasSync
338
- .simple_report_sync(
339
- [
340
- {
341
- report_name: 'proservices_provisioning_csv',
342
- model: 'users',
343
- params: {
344
- "parameters[include_deleted]" => true,
345
- "parameters[users]" => true
346
- }
347
- },
348
- {
349
- report_name: 'proservices_provisioning_csv',
350
- model: 'accounts',
351
- params: {
352
- "parameters[include_deleted]" => true,
353
- "parameters[accounts]" => true
354
- }
355
- }
356
- ]
357
- )
358
- ```
359
-
360
- Example with the term_scope active:
361
-
362
- ```ruby
363
- CanvasSync
364
- .simple_report_sync(
365
- [
366
- {
367
- report_name: 'proservices_provisioning_csv',
368
- model: 'sections',
369
- params: {
370
- "parameters[include_deleted]" => true,
371
- "parameters[sections]" => true
372
- }
373
- },
374
- {
375
- report_name: 'proservices_provisioning_csv',
376
- model: 'courses',
377
- params: {
378
- "parameters[include_deleted]" => true,
379
- "parameters[courses]" => true
380
- }
381
- }
382
- ],
383
- term_scope: 'active'
384
- )
288
+ chain = CanvasSync.default_provisioning_report_chain
289
+ chain << {
290
+ job: CanvasSync::Jobs::SyncSimpleTableJob,
291
+ options: {
292
+ report_name: <report name>,
293
+ model: <model to sync>,
294
+ params: <hash with the require parameters the report needs to sync>
295
+ },
296
+ }
297
+ chain.process!
385
298
  ```
386
299
 
387
300
  ## Configuration
@@ -421,7 +334,6 @@ class CanvasSyncStarterWorker
421
334
  }
422
335
  }
423
336
  )
424
- CanvasSync.invoke_next(job_chain)
425
337
  end
426
338
 
427
339
  def self.handle_canvas_sync_error(error, **options)
@@ -0,0 +1,5 @@
1
+ module CanvasSync
2
+ class SyncBatch < ApplicationRecord
3
+ serialize :job_arguments, Array
4
+ end
5
+ end
@@ -0,0 +1,11 @@
1
+ class CreateCanvasSyncSyncBatches < CanvasSync::MiscHelper::MigrationClass
2
+ def change
3
+ create_table :canvas_sync_sync_batches do |t|
4
+ t.datetime :started_at
5
+ t.datetime :completed_at
6
+ t.string :status
7
+
8
+ t.timestamps
9
+ end
10
+ end
11
+ end
@@ -64,12 +64,13 @@ module CanvasSync
64
64
  columns = columns.dup
65
65
 
66
66
  update_conditions = {
67
- condition: condition_sql(klass, columns, import_args[:sync_start_time]),
67
+ condition: condition_sql(klass, columns),
68
68
  columns: columns
69
69
  }
70
70
  update_conditions[:conflict_target] = conflict_target if conflict_target
71
71
 
72
72
  options = { validate: false, on_duplicate_key_update: update_conditions }.merge(import_args)
73
+
73
74
  options.delete(:on_duplicate_key_update) if options.key?(:on_duplicate_key_ignore)
74
75
  klass.import(columns, rows, options)
75
76
  end
@@ -84,14 +85,10 @@ module CanvasSync
84
85
  # started_at = Time.now
85
86
  # run_the_users_sync!
86
87
  # changed = User.where("updated_at >= ?", started_at)
87
- def self.condition_sql(klass, columns, report_start)
88
+ def self.condition_sql(klass, columns)
88
89
  columns_str = columns.map { |c| "#{klass.quoted_table_name}.#{c}" }.join(", ")
89
90
  excluded_str = columns.map { |c| "EXCLUDED.#{c}" }.join(", ")
90
- condition_sql = "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
91
- if klass.column_names.include?("updated_at") && report_start
92
- condition_sql += " AND #{klass.quoted_table_name}.updated_at < '#{report_start}'"
93
- end
94
- condition_sql
91
+ "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})"
95
92
  end
96
93
 
97
94
  def self.batch_size
@@ -3,7 +3,7 @@ require "active_job"
3
3
  module CanvasSync
4
4
  # Inherit from this class to build a Job that will log to the canvas_sync_job_logs table
5
5
  class Job < ActiveJob::Base
6
- attr_reader :job_chain, :job_log
6
+ attr_reader :job_log
7
7
 
8
8
  before_enqueue do |job|
9
9
  create_job_log(job)
@@ -15,12 +15,6 @@ module CanvasSync
15
15
  @job_log.started_at = Time.now
16
16
  @job_log.save
17
17
 
18
- if job.arguments[0].is_a?(Hash) && job.arguments[0].include?(:jobs)
19
- # @job_chain = JobChain.new(job.arguments[0])
20
- @job_chain = job.arguments[0]
21
- job.arguments[0] = @job_chain
22
- end
23
-
24
18
  begin
25
19
  block.call
26
20
  @job_log.status = JobLog::SUCCESS_STATUS
@@ -28,11 +22,11 @@ module CanvasSync
28
22
  @job_log.exception = "#{e.class}: #{e.message}"
29
23
  @job_log.backtrace = e.backtrace.join('\n')
30
24
  @job_log.status = JobLog::ERROR_STATUS
31
- if @job_chain&.dig(:global_options, :on_failure)&.present?
25
+ if batch_context&.[](:on_failure)&.present?
32
26
  begin
33
- class_name, method = @job_chain[:global_options][:on_failure].split('.')
27
+ class_name, method = batch_context[:on_failure].split('.')
34
28
  klass = class_name.constantize
35
- klass.send(method.to_sym, e, job_chain: @job_chain, job_log: @job_log)
29
+ klass.send(method.to_sym, e, batch_context: batch_context, job_log: @job_log)
36
30
  rescue => e2
37
31
  @job_log.backtrace += "\n\nError Occurred while handling an Error: #{e2.class}: #{e2.message}"
38
32
  @job_log.backtrace += "\n" + e2.backtrace.join('\n')