inst_data_shipper 0.1.0.beta2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
4
- data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
3
+ metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
4
+ data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
5
5
  SHA512:
6
- metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
7
- data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
6
+ metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
7
+ data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # InstDataShipper
2
2
 
3
- This gem is intended to facilitate fast and easy syncing of Canvas data.
3
+ This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,144 @@ Then run the migrations:
16
16
  bundle exec rake db:migrate
17
17
  ```
18
18
 
19
+ ## Usage
20
+
21
+ ### Dumper
22
+
23
+ The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
24
+
25
+ Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
26
+ ```ruby
27
+ class HostedDataPushJob < ApplicationJob
28
+ # The schema serves two purposes: defining the schema and mapping data
29
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
30
+ # You can augment the Table-builder DSL with custom methods like so:
31
+ extend_table_builder do
32
+ # It may be useful to define a custom column definition helpers:
33
+ def custom_column(*args, from: nil, **kwargs, &blk)
34
+ # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
35
+ # to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
36
+ from ||= args[0].to_s
37
+ from = ->(row) { row.data[from] } if from.is_a?(String)
38
+ column(*args, **kwargs, from: from, &blk)
39
+ end
40
+
41
+ # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
42
+ include SomeConcern
43
+ end
44
+
45
+ table(ALocalModel, "<TABLE DESCRIPTION>") do
46
+ # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
47
+ # The first argument "scope" can be interpreted in different ways:
48
+ # If exporting a local model it may be a: (default: `updated_at`)
49
+ # Proc that will receive a Relation and return a Relation (use `incremental_since`)
50
+ # String of a column to compare with `incremental_since`
51
+ # If exporting a Canvas report it may be a: (default: `updated_after`)
52
+ # Proc that will receive report params and return modified report params (use `incremental_since`)
53
+ # String of a report param to set to `incremental_since`
54
+ # `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
55
+ # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
+ incremental "updated_at", on: [:id], if: ->() {}
57
+
58
+ column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
59
+
60
+ # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
61
+ custom_column :name, :"varchar(128)"
62
+
63
+ # `from:` May be...
64
+ # A Symbol of a method to be called on the record
65
+ custom_column :sis_type, :"varchar(32)", from: :some_model_method
66
+ # A String of a column to read from the record
67
+ custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
68
+ # A Proc to be called with each record
69
+ custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
70
+ # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
71
+ custom_column :sis_type, :"varchar(32)"
72
+ end
73
+
74
+ table("my_table", model: ALocalModel) do
75
+ # ...
76
+ end
77
+
78
+ table("proserv_student_submissions_csv") do
79
+ column :canvas_id, :bigint, from: "canvas user id"
80
+ column :sis_id, :"varchar(64)", from: "sis user id"
81
+ column :name, :"varchar(64)", from: "user name"
82
+ column :submission_id, :bigint, from: "submission id"
83
+ end
84
+ end
85
+
86
+ Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
87
+ InstDataShipper::DataSources::LocalTables,
88
+ InstDataShipper::DataSources::CanvasReports,
89
+ ]) do
90
+ import_local_table(ALocalModel)
91
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
92
+
93
+ # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
94
+ import_local_table(SomeModel, schema_name: "my_table")
95
+ import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
96
+ end
97
+
98
+ def perform
99
+ Dumper.perform_dump([
100
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
101
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
102
+ ])
103
+ end
104
+ end
105
+ ```
106
+
107
+ `Dumper`s may also be formed as a normal Ruby subclass:
108
+ ```ruby
109
+ class HostedDataPushJob < ApplicationJob
110
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
111
+ # ...
112
+ end
113
+
114
+ class Dumper < InstDataShipper::Dumper
115
+ include InstDataShipper::DataSources::LocalTables
116
+ include InstDataShipper::DataSources::CanvasReports
117
+
118
+ def enqueue_tasks
119
+ import_local_table(ALocalModel)
120
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
121
+ end
122
+
123
+ def table_schemas
124
+ SCHEMA
125
+ end
126
+ end
127
+
128
+ def perform
129
+ Dumper.perform_dump([
130
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
131
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
132
+ ])
133
+ end
134
+ end
135
+ ```
136
+
137
+ ### Destinations
138
+
139
+ This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
140
+
141
+ Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
142
+
143
+ Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
144
+
145
+ #### Hosted Data
146
+ `hosted-data://<JWT>@<HOSTED DATA SERVER>`
147
+
148
+ ##### Optional Parameters:
149
+ - `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
150
+
151
+ #### S3
152
+ `s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
153
+
154
+ ##### Optional Parameters:
155
+ _None_
156
+
19
157
  ## Development
20
158
 
21
159
  When adding to or updating this gem, make sure you do the following:
@@ -7,11 +7,12 @@ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
7
7
 
8
8
  t.string :job_class
9
9
  t.string :genre
10
+ t.string :batch_id
10
11
 
11
12
  t.string :exception
12
13
  t.text :backtrace
13
- t.text :metadata
14
- t.text :job_arguments
14
+ # t.text :metadata
15
+ # t.text :job_arguments
15
16
 
16
17
  t.timestamps
17
18
  end
@@ -19,7 +19,7 @@ module InstDataShipper
19
19
  instance_exec(&@body_block)
20
20
  end
21
21
 
22
- def table_schemas
22
+ def schema
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
24
  pointer.constantize
25
25
  end
@@ -22,7 +22,12 @@ module InstDataShipper
22
22
  query = _resolve_model_query(query, table_def[:query])
23
23
 
24
24
  if table_is_incremental?(table_def)
25
- query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
25
+ query = _resolve_model_query(
26
+ query,
27
+ table_def.dig(:incremental, :scope),
28
+ string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
29
+ default: "updated_at",
30
+ )
26
31
  end
27
32
 
28
33
  query.find_each do |m|
@@ -35,7 +40,9 @@ module InstDataShipper
35
40
  upload_data(table_def, &inner_block)
36
41
  end
37
42
 
38
- def _resolve_model_query(relation, query, string: nil)
43
+ def _resolve_model_query(relation, query, string: nil, default: nil)
44
+ return relation if query == false
45
+ query = default if query.nil?
39
46
  return relation if query.nil?
40
47
 
41
48
  if query.is_a?(Symbol)
@@ -3,7 +3,7 @@ module InstDataShipper
3
3
  class Base
4
4
  attr_reader :dumper
5
5
 
6
- delegate :tracker, :table_schemas, :working_dir, to: :dumper
6
+ delegate :tracker, :schema, :working_dir, to: :dumper
7
7
 
8
8
  def initialize(cache_key, config, dumper)
9
9
  @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
11
11
  @dumper = dumper
12
12
  end
13
13
 
14
+ # This method is called before taking any actions.
15
+ # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
16
+ def preinitialize_dump(context); end
17
+
14
18
  # This method is called before processing any data.
15
19
  # It should be used to initialize any external resources needed for the dump.
16
- def initialize_dump; end
20
+ def initialize_dump(context); end
17
21
 
18
22
  # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
19
23
  #
@@ -5,11 +5,43 @@ module InstDataShipper
5
5
  class HostedData < Base
6
6
  include Concerns::Chunking
7
7
 
8
- def initialize_dump
8
+ def preinitialize_dump(context)
9
+ if context[:incremental_since].present?
10
+ begin
11
+ last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
12
+ status: 'imported',
13
+ # schema_version: convert_schema[:version],
14
+ tags: [
15
+ "ids-schema=#{dumper.schema_digest}",
16
+ "ids-genre=#{dumper.export_genre}",
17
+ ],
18
+ }).body.with_indifferent_access
19
+
20
+ if last_dump[:created_at] < context[:incremental_since]
21
+ InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
22
+ context[:incremental_since] = last_dump[:created_at]
23
+ end
24
+ rescue Faraday::ResourceNotFound
25
+ # TODO It'd be nice to make this per-table
26
+ InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
27
+ context[:incremental_since] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def initialize_dump(context)
33
+ tags = [
34
+ "ids-schema=#{dumper.schema_digest}",
35
+ "ids-genre=#{dumper.export_genre}",
36
+ ]
37
+ tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
38
+ tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
39
+
9
40
  dump = hosted_data_client.post(
10
41
  'api/v1/custom_dumps/',
11
42
  reference_id: tracker.id,
12
43
  schema: convert_schema,
44
+ tags: tags,
13
45
  ).body.with_indifferent_access
14
46
 
15
47
  redis.hset(rk(:state), :dump_id, dump[:id])
@@ -62,6 +94,7 @@ module InstDataShipper
62
94
 
63
95
  def convert_schema
64
96
  definititions = {}
97
+ table_schemas = schema[:tables]
65
98
  table_schemas.each do |ts|
66
99
  ts = ts.dup
67
100
  tname = table_name(ts)
@@ -86,7 +119,7 @@ module InstDataShipper
86
119
  end
87
120
 
88
121
  {
89
- version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
122
+ version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
90
123
  definition: definititions,
91
124
  }
92
125
  end
@@ -19,10 +19,20 @@ module InstDataShipper
19
19
  include(*include)
20
20
 
21
21
  define_method(:enqueue_tasks, &blk)
22
- define_method(:table_schemas) { schema }
22
+ define_method(:schema) { schema }
23
23
  end
24
24
  end
25
25
 
26
+ def self.current(executor: nil)
27
+ cur_batch = Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
28
+ ctx = cur_batch&.context || {}
29
+ return nil unless ctx[:origin_class].present? && ctx[:tracker_id].present?
30
+
31
+ clazz = ctx[:origin_class]
32
+ clazz = clazz.constantize if clazz.is_a?(String)
33
+ clazz.new(executor: executor)
34
+ end
35
+
26
36
  public
27
37
 
28
38
  def begin_dump
@@ -31,15 +41,18 @@ module InstDataShipper
31
41
  @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
32
42
 
33
43
  @batch_context = context = {
34
- # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
35
44
  # TODO Consider behavior if last is still running
36
- incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
45
+ incremental_since: last_successful_tracker&.created_at,
37
46
  }
38
47
 
48
+ destinations.each do |dest|
49
+ dest.preinitialize_dump(context)
50
+ end
51
+
39
52
  begin
40
53
  begin
41
54
  destinations.each do |dest|
42
- dest.initialize_dump()
55
+ dest.initialize_dump(context)
43
56
  end
44
57
 
45
58
  run_hook(:initialize_dump_batch, context)
@@ -52,6 +65,7 @@ module InstDataShipper
52
65
 
53
66
  Sidekiq::Batch.new.tap do |batch|
54
67
  context[:root_bid] = batch.bid
68
+ tracker.update(batch_id: batch.bid)
55
69
 
56
70
  batch.description = "HD #{export_genre} Export #{tracker.id} Root"
57
71
  batch.context = context
@@ -62,6 +76,7 @@ module InstDataShipper
62
76
  rescue => ex
63
77
  delayed :cleanup_fatal_error!
64
78
  InstDataShipper.handle_suppressed_error(ex)
79
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
65
80
  end
66
81
  end
67
82
  rescue => ex
@@ -74,6 +89,7 @@ module InstDataShipper
74
89
  end
75
90
  end
76
91
  end
92
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
77
93
  raise ex
78
94
  end
79
95
  end
@@ -82,15 +98,31 @@ module InstDataShipper
82
98
  @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
83
99
  end
84
100
 
101
+ def last_successful_tracker
102
+ @last_successful_tracker ||= DumpBatch.where(job_class: self.class.to_s, genre: export_genre, status: 'completed').order(created_at: :desc).first
103
+ end
104
+
85
105
  def export_genre
86
- self.class.to_s.gsub(/HD|ExportJob/, '')
106
+ self.class.to_s
87
107
  end
88
108
 
89
109
  def origin_class
90
110
  batch_context[:origin_class]&.constantize || self.class
91
111
  end
92
112
 
113
+ def schema
114
+ return origin_class::SCHEMA if defined?(origin_class::SCHEMA)
115
+ raise NotImplementedError
116
+ end
117
+
118
+ def schema_digest
119
+ Digest::MD5.hexdigest(schema.to_json)[0...8]
120
+ end
121
+
93
122
  def table_is_incremental?(table_def)
123
+ return false unless incremental_since.present?
124
+
125
+ # TODO Return false if table's schema changes
94
126
  if (inc = table_def[:incremental]).present?
95
127
  differ = inc[:if]
96
128
  return !!incremental_since if differ.nil?
@@ -119,7 +151,7 @@ module InstDataShipper
119
151
 
120
152
  value = Array(value).compact
121
153
 
122
- table_schemas.each do |ts|
154
+ schema[:tables].each do |ts|
123
155
  return ts if value.include?(ts[key])
124
156
  end
125
157
  end
@@ -207,11 +239,6 @@ module InstDataShipper
207
239
 
208
240
  # Helper Methods
209
241
 
210
- def table_schemas
211
- return origin_class::TABLE_SCHEMAS if defined?(origin_class::TABLE_SCHEMAS)
212
- raise NotImplementedError
213
- end
214
-
215
242
  def delayed(mthd, *args, **kwargs)
216
243
  Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
217
244
  end
@@ -2,16 +2,22 @@ module InstDataShipper
2
2
  # This class ends up fill two roles - Schema and Mapping.
3
3
  # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
4
4
  class SchemaBuilder
5
- attr_reader :tables
5
+ attr_reader :schema
6
6
 
7
7
  def initialize
8
- @tables = []
8
+ @schema = {
9
+ tables: [],
10
+ }
9
11
  end
10
12
 
11
13
  def self.build(&block)
12
14
  builder = new
13
15
  builder.instance_exec(&block)
14
- builder.tables
16
+ builder.schema
17
+ end
18
+
19
+ def version(version)
20
+ @schema[:version] = version
15
21
  end
16
22
 
17
23
  def extend_table_builder(&block)
@@ -45,7 +51,7 @@ module InstDataShipper
45
51
 
46
52
  @table_builder_class.build(tdef, &block)
47
53
 
48
- @tables << tdef
54
+ @schema[:tables] << tdef
49
55
 
50
56
  tdef
51
57
  end
@@ -68,7 +74,11 @@ module InstDataShipper
68
74
  # options[key] = value
69
75
  # end
70
76
 
71
- def incremental(scope="updated_at", **kwargs)
77
+ def version(version)
78
+ options[:version] = version
79
+ end
80
+
81
+ def incremental(scope=nil, **kwargs)
72
82
  if (extras = kwargs.keys - %i[on if]).present?
73
83
  raise ArgumentError, "Unsuppored options: #{extras.inspect}"
74
84
  end
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.1.0.beta2".freeze
2
+ VERSION = "0.2.0".freeze
3
3
  end
@@ -39,6 +39,7 @@ module InstDataShipper
39
39
 
40
40
  def logger
41
41
  return @logger if defined? @logger
42
+ # TODO Annotate logs with DumpBatch ID
42
43
  @logger = Logger.new(STDOUT)
43
44
  @logger.level = Logger::DEBUG
44
45
  @logger
@@ -49,7 +50,7 @@ module InstDataShipper
49
50
  end
50
51
 
51
52
  def redis_prefix
52
- pfx = "hdd"
53
+ pfx = "ids"
53
54
  pfx = "#{Apartment::Tenant.current}:#{pfx}" if defined?(Apartment)
54
55
  pfx
55
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-08 00:00:00.000000000 Z
11
+ date: 2024-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -399,7 +399,6 @@ files:
399
399
  - lib/inst_data_shipper/engine.rb
400
400
  - lib/inst_data_shipper/jobs/async_caller.rb
401
401
  - lib/inst_data_shipper/jobs/base.rb
402
- - lib/inst_data_shipper/jobs/basic_dump_job.rb
403
402
  - lib/inst_data_shipper/record.rb
404
403
  - lib/inst_data_shipper/schema_builder.rb
405
404
  - lib/inst_data_shipper/version.rb
@@ -436,9 +435,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
436
435
  version: '0'
437
436
  required_rubygems_version: !ruby/object:Gem::Requirement
438
437
  requirements:
439
- - - ">"
438
+ - - ">="
440
439
  - !ruby/object:Gem::Version
441
- version: 1.3.1
440
+ version: '0'
442
441
  requirements: []
443
442
  rubygems_version: 3.1.6
444
443
  signing_key:
@@ -1,15 +0,0 @@
1
- module InstDataShipper
2
- module Jobs
3
- class BasicDumpJob < InstDataShipper::Jobs::Base
4
- sidekiq_options retry: 3 if defined?(sidekiq_options)
5
-
6
- def perform(endpoints)
7
-
8
- end
9
-
10
- protected
11
-
12
-
13
- end
14
- end
15
- end