inst_data_shipper 0.1.0.beta2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
4
- data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
3
+ metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
4
+ data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
5
5
  SHA512:
6
- metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
7
- data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
6
+ metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
7
+ data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # InstDataShipper
2
2
 
3
- This gem is intended to facilitate fast and easy syncing of Canvas data.
3
+ This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,157 @@ Then run the migrations:
16
16
  bundle exec rake db:migrate
17
17
  ```
18
18
 
19
+ ## Usage
20
+
21
+ ### Dumper
22
+
23
+ The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
24
+
25
+ Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
26
+ ```ruby
27
+ class HostedDataPushJob < ApplicationJob
28
+ # The schema serves two purposes: defining the schema and mapping data
29
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
30
+ # You can augment the Table-builder DSL with custom methods like so:
31
+ extend_table_builder do
32
+ # It may be useful to define a custom column definition helpers:
33
+ def custom_column(*args, from: nil, **kwargs, &blk)
34
+ # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
35
+ # to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
36
+ from ||= args[0].to_s
37
+ from = ->(row) { row.data[from] } if from.is_a?(String)
38
+ column(*args, **kwargs, from: from, &blk)
39
+ end
40
+
41
+ # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
42
+ include SomeConcern
43
+ end
44
+
45
+ table(ALocalModel, "<TABLE DESCRIPTION>") do
46
+ # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
47
+ # The first argument "scope" can be interpreted in different ways:
48
+ # If exporting a local model it may be a: (default: `updated_at`)
49
+ # Proc that will receive a Relation and return a Relation (use `incremental_since`)
50
+ # String of a column to compare with `incremental_since`
51
+ # If exporting a Canvas report it may be a: (default: `updated_after`)
52
+ # Proc that will receive report params and return modified report params (use `incremental_since`)
53
+ # String of a report param to set to `incremental_since`
54
+ # `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
55
+ # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
+ incremental "updated_at", on: [:id], if: ->() {}
57
+
58
+ # Schema's may declaratively define the data source.
59
+ # This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
60
+ # In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
61
+ source :local_table
62
+ # A Proc can also be passed. The below is equivalent to the above
63
+ source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
64
+
65
+ column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
66
+
67
+ # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
68
+ custom_column :name, :"varchar(128)"
69
+
70
+ # `from:` May be...
71
+ # A Symbol of a method to be called on the record
72
+ custom_column :sis_type, :"varchar(32)", from: :some_model_method
73
+ # A String of a column to read from the record
74
+ custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
75
+ # A Proc to be called with each record
76
+ custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
77
+ # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
78
+ custom_column :sis_type, :"varchar(32)"
79
+ end
80
+
81
+ table("my_table", model: ALocalModel) do
82
+ # ...
83
+ end
84
+
85
+ table("proserv_student_submissions_csv") do
86
+ column :canvas_id, :bigint, from: "canvas user id"
87
+ column :sis_id, :"varchar(64)", from: "sis user id"
88
+ column :name, :"varchar(64)", from: "user name"
89
+ column :submission_id, :bigint, from: "submission id"
90
+ end
91
+ end
92
+
93
+ Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
94
+ InstDataShipper::DataSources::LocalTables,
95
+ InstDataShipper::DataSources::CanvasReports,
96
+ ]) do
97
+ import_local_table(ALocalModel)
98
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
99
+
100
+ # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
101
+ import_local_table(SomeModel, schema_name: "my_table")
102
+ import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
103
+
104
+ # Iterate through the Tables defined in the Schema and apply any defined `source` statements.
105
+ # This is the default behavior if `define()` is called w/o a block.
106
+ auto_enqueue_from_schema
107
+ end
108
+
109
+ def perform
110
+ Dumper.perform_dump([
111
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
112
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
113
+ ])
114
+ end
115
+ end
116
+ ```
117
+
118
+ `Dumper`s may also be formed as a normal Ruby subclass:
119
+ ```ruby
120
+ class HostedDataPushJob < ApplicationJob
121
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
122
+ # ...
123
+ end
124
+
125
+ class Dumper < InstDataShipper::Dumper
126
+ include InstDataShipper::DataSources::LocalTables
127
+ include InstDataShipper::DataSources::CanvasReports
128
+
129
+ def enqueue_tasks
130
+ import_local_table(ALocalModel)
131
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
132
+
133
+ # auto_enqueue_from_schema
134
+ end
135
+
136
+ def table_schemas
137
+ SCHEMA
138
+ end
139
+ end
140
+
141
+ def perform
142
+ Dumper.perform_dump([
143
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
144
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
145
+ ])
146
+ end
147
+ end
148
+ ```
149
+
150
+ ### Destinations
151
+
152
+ This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
153
+
154
+ Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
155
+
156
+ Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
157
+
158
+ #### Hosted Data
159
+ `hosted-data://<JWT>@<HOSTED DATA SERVER>`
160
+
161
+ ##### Optional Parameters:
162
+ - `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
163
+
164
+ #### S3
165
+ `s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
166
+
167
+ ##### Optional Parameters:
168
+ _None_
169
+
19
170
  ## Development
20
171
 
21
172
  When adding to or updating this gem, make sure you do the following:
@@ -7,11 +7,12 @@ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
7
7
 
8
8
  t.string :job_class
9
9
  t.string :genre
10
+ t.string :batch_id
10
11
 
11
12
  t.string :exception
12
13
  t.text :backtrace
13
- t.text :metadata
14
- t.text :job_arguments
14
+ # t.text :metadata
15
+ # t.text :job_arguments
15
16
 
16
17
  t.timestamps
17
18
  end
@@ -19,7 +19,7 @@ module InstDataShipper
19
19
  instance_exec(&@body_block)
20
20
  end
21
21
 
22
- def table_schemas
22
+ def schema
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
24
  pointer.constantize
25
25
  end
@@ -1,7 +1,31 @@
1
1
  module InstDataShipper
2
2
  module DataSources
3
3
  module Base
4
- extend ActiveSupport::Concern
4
+ # This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
5
+
6
+ def self.included(base)
7
+ base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
8
+ base.extend ModuleHelperMethods
9
+ base.send(:include, Concern)
10
+ end
11
+
12
+ module Concern
13
+ extend ActiveSupport::Concern
14
+ end
15
+
16
+ module ModuleHelperMethods
17
+ def delayed(mthd_sym)
18
+ mthd = instance_method(mthd_sym)
19
+ pmthd_sym = :"_delayed_#{mthd_sym}"
20
+
21
+ alias_method pmthd_sym, mthd_sym
22
+ private pmthd_sym
23
+
24
+ define_method(mthd_sym) do |*args, **kwargs|
25
+ delayed(pmthd_sym, *args, **kwargs)
26
+ end
27
+ end
28
+ end
5
29
  end
6
30
  end
7
31
  end
@@ -2,7 +2,7 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing Canvas reports
4
4
  module CanvasReports
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  included do
8
8
  hook :initialize_dump_batch do |context|
@@ -2,17 +2,11 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing local AR tables
4
4
  module LocalTables
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  public
8
8
 
9
- def import_local_table(*args, **kwargs)
10
- delayed(:_import_local_table, *args, **kwargs)
11
- end
12
-
13
- private
14
-
15
- def _import_local_table(model, schema_name: nil)
9
+ delayed def import_local_table(model, schema_name: nil)
16
10
  model = model.safe_constantize if model.is_a?(String)
17
11
 
18
12
  table_def = lookup_table_schema!(schema_name, { model: model })
@@ -22,7 +16,12 @@ module InstDataShipper
22
16
  query = _resolve_model_query(query, table_def[:query])
23
17
 
24
18
  if table_is_incremental?(table_def)
25
- query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
19
+ query = _resolve_model_query(
20
+ query,
21
+ table_def.dig(:incremental, :scope),
22
+ string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
23
+ default: "updated_at",
24
+ )
26
25
  end
27
26
 
28
27
  query.find_each do |m|
@@ -35,7 +34,11 @@ module InstDataShipper
35
34
  upload_data(table_def, &inner_block)
36
35
  end
37
36
 
38
- def _resolve_model_query(relation, query, string: nil)
37
+ private
38
+
39
+ def _resolve_model_query(relation, query, string: nil, default: nil)
40
+ return relation if query == false
41
+ query = default if query.nil?
39
42
  return relation if query.nil?
40
43
 
41
44
  if query.is_a?(Symbol)
@@ -3,7 +3,7 @@ module InstDataShipper
3
3
  class Base
4
4
  attr_reader :dumper
5
5
 
6
- delegate :tracker, :table_schemas, :working_dir, to: :dumper
6
+ delegate :tracker, :schema, :working_dir, to: :dumper
7
7
 
8
8
  def initialize(cache_key, config, dumper)
9
9
  @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
11
11
  @dumper = dumper
12
12
  end
13
13
 
14
+ # This method is called before taking any actions.
15
+ # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
16
+ def preinitialize_dump(context); end
17
+
14
18
  # This method is called before processing any data.
15
19
  # It should be used to initialize any external resources needed for the dump.
16
- def initialize_dump; end
20
+ def initialize_dump(context); end
17
21
 
18
22
  # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
19
23
  #
@@ -7,7 +7,7 @@ module InstDataShipper
7
7
  DEFAULT_CHUNK_SIZE = 100_000
8
8
 
9
9
  def chunk_data(generator, chunk_size: nil, **kwargs)
10
- chunk_size ||= config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
10
+ chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
11
11
  slice = 1
12
12
 
13
13
  btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
@@ -24,7 +24,7 @@ module InstDataShipper
24
24
 
25
25
  def group_key
26
26
  super.tap do |k|
27
- k[:chunk_size] = config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
27
+ k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
28
28
  end
29
29
  end
30
30
 
@@ -5,11 +5,43 @@ module InstDataShipper
5
5
  class HostedData < Base
6
6
  include Concerns::Chunking
7
7
 
8
- def initialize_dump
8
+ def preinitialize_dump(context)
9
+ if context[:incremental_since].present?
10
+ begin
11
+ last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
12
+ status: 'imported',
13
+ # schema_version: convert_schema[:version],
14
+ tags: [
15
+ "ids-schema=#{dumper.schema_digest}",
16
+ "ids-genre=#{dumper.export_genre}",
17
+ ],
18
+ }).body.with_indifferent_access
19
+
20
+ if last_dump[:created_at] < context[:incremental_since]
21
+ InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
22
+ context[:incremental_since] = last_dump[:created_at]
23
+ end
24
+ rescue Faraday::ResourceNotFound
25
+ # TODO It'd be nice to make this per-table
26
+ InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
27
+ context[:incremental_since] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def initialize_dump(context)
33
+ tags = [
34
+ "ids-schema=#{dumper.schema_digest}",
35
+ "ids-genre=#{dumper.export_genre}",
36
+ ]
37
+ tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
38
+ tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
39
+
9
40
  dump = hosted_data_client.post(
10
41
  'api/v1/custom_dumps/',
11
42
  reference_id: tracker.id,
12
43
  schema: convert_schema,
44
+ tags: tags,
13
45
  ).body.with_indifferent_access
14
46
 
15
47
  redis.hset(rk(:state), :dump_id, dump[:id])
@@ -62,6 +94,7 @@ module InstDataShipper
62
94
 
63
95
  def convert_schema
64
96
  definititions = {}
97
+ table_schemas = schema[:tables]
65
98
  table_schemas.each do |ts|
66
99
  ts = ts.dup
67
100
  tname = table_name(ts)
@@ -86,7 +119,7 @@ module InstDataShipper
86
119
  end
87
120
 
88
121
  {
89
- version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
122
+ version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
90
123
  definition: definititions,
91
124
  }
92
125
  end
@@ -0,0 +1,17 @@
1
+ module InstDataShipper
2
+ module Destinations
3
+ class Speccable < Base
4
+ include Concerns::Chunking
5
+
6
+ def chunk_data(generator, table:, extra: nil)
7
+ super(generator) do |batch, idx|
8
+ yield batch
9
+ end
10
+ end
11
+
12
+ def upload_data_chunk(table_def, chunk); end
13
+
14
+ def parse_configuration(uri); end
15
+ end
16
+ end
17
+ end
@@ -18,8 +18,34 @@ module InstDataShipper
18
18
  Class.new(self) do
19
19
  include(*include)
20
20
 
21
+ if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
22
+ blk = -> { auto_enqueue_from_schema }
23
+ elsif blk.nil?
24
+ raise ArgumentError, "Must provide a block or a schema with source definitions"
25
+ end
26
+
21
27
  define_method(:enqueue_tasks, &blk)
22
- define_method(:table_schemas) { schema }
28
+ define_method(:schema) { schema }
29
+ end
30
+ end
31
+
32
+ def self.current(executor: nil)
33
+ cur_batch = Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
34
+ ctx = cur_batch&.context || {}
35
+ return nil unless ctx[:origin_class].present? && ctx[:tracker_id].present?
36
+
37
+ clazz = ctx[:origin_class]
38
+ clazz = clazz.constantize if clazz.is_a?(String)
39
+ clazz.new(executor: executor)
40
+ end
41
+
42
+ if defined?(Rails) && Rails.env.test?
43
+ def for_specs!
44
+ @raw_destinations = ["speccable://nil"]
45
+ @executor = InstDataShipper::Jobs::AsyncCaller.new
46
+ @tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
47
+ define_singleton_method(:spec_destination) { destinations.first }
48
+ self
23
49
  end
24
50
  end
25
51
 
@@ -31,15 +57,18 @@ module InstDataShipper
31
57
  @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
32
58
 
33
59
  @batch_context = context = {
34
- # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
35
60
  # TODO Consider behavior if last is still running
36
- incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
61
+ incremental_since: last_successful_tracker&.created_at,
37
62
  }
38
63
 
64
+ destinations.each do |dest|
65
+ dest.preinitialize_dump(context)
66
+ end
67
+
39
68
  begin
40
69
  begin
41
70
  destinations.each do |dest|
42
- dest.initialize_dump()
71
+ dest.initialize_dump(context)
43
72
  end
44
73
 
45
74
  run_hook(:initialize_dump_batch, context)
@@ -52,6 +81,7 @@ module InstDataShipper
52
81
 
53
82
  Sidekiq::Batch.new.tap do |batch|
54
83
  context[:root_bid] = batch.bid
84
+ tracker.update(batch_id: batch.bid)
55
85
 
56
86
  batch.description = "HD #{export_genre} Export #{tracker.id} Root"
57
87
  batch.context = context
@@ -62,6 +92,7 @@ module InstDataShipper
62
92
  rescue => ex
63
93
  delayed :cleanup_fatal_error!
64
94
  InstDataShipper.handle_suppressed_error(ex)
95
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
65
96
  end
66
97
  end
67
98
  rescue => ex
@@ -74,6 +105,7 @@ module InstDataShipper
74
105
  end
75
106
  end
76
107
  end
108
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
77
109
  raise ex
78
110
  end
79
111
  end
@@ -82,15 +114,31 @@ module InstDataShipper
82
114
  @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
83
115
  end
84
116
 
117
+ def last_successful_tracker
118
+ @last_successful_tracker ||= DumpBatch.where(job_class: self.class.to_s, genre: export_genre, status: 'completed').order(created_at: :desc).first
119
+ end
120
+
85
121
  def export_genre
86
- self.class.to_s.gsub(/HD|ExportJob/, '')
122
+ self.class.to_s
87
123
  end
88
124
 
89
125
  def origin_class
90
126
  batch_context[:origin_class]&.constantize || self.class
91
127
  end
92
128
 
129
+ def schema
130
+ return origin_class::SCHEMA if defined?(origin_class::SCHEMA)
131
+ raise NotImplementedError
132
+ end
133
+
134
+ def schema_digest
135
+ Digest::MD5.hexdigest(schema.to_json)[0...8]
136
+ end
137
+
93
138
  def table_is_incremental?(table_def)
139
+ return false unless incremental_since.present?
140
+
141
+ # TODO Return false if table's schema changes
94
142
  if (inc = table_def[:incremental]).present?
95
143
  differ = inc[:if]
96
144
  return !!incremental_since if differ.nil?
@@ -119,7 +167,7 @@ module InstDataShipper
119
167
 
120
168
  value = Array(value).compact
121
169
 
122
- table_schemas.each do |ts|
170
+ schema[:tables].each do |ts|
123
171
  return ts if value.include?(ts[key])
124
172
  end
125
173
  end
@@ -144,6 +192,14 @@ module InstDataShipper
144
192
  raise NotImplementedError
145
193
  end
146
194
 
195
+ def auto_enqueue_from_schema
196
+ schema[:tables].each do |table_def|
197
+ src = table_def[:sourcer]
198
+ next unless src.present?
199
+ instance_exec(table_def, &src)
200
+ end
201
+ end
202
+
147
203
  def upload_data(table_def, extra: nil, &datagen)
148
204
  # Allow muxing, allowing a hook to prevent some files going to certain destinations
149
205
  dests = destinations_for_table(table_def)
@@ -207,11 +263,6 @@ module InstDataShipper
207
263
 
208
264
  # Helper Methods
209
265
 
210
- def table_schemas
211
- return origin_class::TABLE_SCHEMAS if defined?(origin_class::TABLE_SCHEMAS)
212
- raise NotImplementedError
213
- end
214
-
215
266
  def delayed(mthd, *args, **kwargs)
216
267
  Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
217
268
  end
@@ -231,7 +282,7 @@ module InstDataShipper
231
282
  end
232
283
 
233
284
  def destinations
234
- @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
285
+ @destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
235
286
  dcls = InstDataShipper.resolve_destination(dest)
236
287
  dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
237
288
  end
@@ -2,21 +2,28 @@ module InstDataShipper
2
2
  # This class ends up fill two roles - Schema and Mapping.
3
3
  # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
4
4
  class SchemaBuilder
5
- attr_reader :tables
5
+ attr_reader :schema
6
6
 
7
7
  def initialize
8
- @tables = []
8
+ @schema = {
9
+ tables: [],
10
+ }
9
11
  end
10
12
 
11
13
  def self.build(&block)
12
14
  builder = new
13
15
  builder.instance_exec(&block)
14
- builder.tables
16
+ builder.schema
17
+ end
18
+
19
+ def version(version)
20
+ @schema[:version] = version
15
21
  end
16
22
 
17
- def extend_table_builder(&block)
23
+ def extend_table_builder(modul = nil, &block)
18
24
  @table_builder_class ||= Class.new(TableSchemaBuilder)
19
- @table_builder_class.class_eval(&block)
25
+ @table_builder_class.class_eval(&block) if block.present?
26
+ @table_builder_class.extend(modul) if modul.present?
20
27
  end
21
28
 
22
29
  def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
@@ -36,6 +43,7 @@ module InstDataShipper
36
43
 
37
44
  tdef[:query] = model_or_name
38
45
  tdef[:model] = model_or_name.model
46
+ tdef[:warehouse_name] = model_or_name.model.table_name
39
47
  elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
40
48
  tdef[:warehouse_name] = model_or_name.table_name
41
49
  tdef[:model] = model_or_name
@@ -43,9 +51,9 @@ module InstDataShipper
43
51
  tdef[:warehouse_name] = model_or_name
44
52
  end
45
53
 
46
- @table_builder_class.build(tdef, &block)
54
+ (@table_builder_class || TableSchemaBuilder).build(tdef, &block)
47
55
 
48
- @tables << tdef
56
+ @schema[:tables] << tdef
49
57
 
50
58
  tdef
51
59
  end
@@ -61,14 +69,18 @@ module InstDataShipper
61
69
  def self.build(tdef, &block)
62
70
  builder = new(tdef)
63
71
  builder.instance_exec(&block)
64
- builder.columns
72
+ builder.options
73
+ end
74
+
75
+ def annotate(key, value)
76
+ options[key] = value
65
77
  end
66
78
 
67
- # def annotate(key, value)
68
- # options[key] = value
69
- # end
79
+ def version(version)
80
+ options[:version] = version
81
+ end
70
82
 
71
- def incremental(scope="updated_at", **kwargs)
83
+ def incremental(scope=nil, **kwargs)
72
84
  if (extras = kwargs.keys - %i[on if]).present?
73
85
  raise ArgumentError, "Unsuppored options: #{extras.inspect}"
74
86
  end
@@ -80,6 +92,17 @@ module InstDataShipper
80
92
  }
81
93
  end
82
94
 
95
+ def source(source, override_model=nil, **kwargs)
96
+ raise "Source already set" if options[:sourcer].present?
97
+
98
+ if source.is_a?(Symbol)
99
+ mthd = :"import_#{source}"
100
+ options = self.options
101
+ source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
102
+ end
103
+ options[:sourcer] = source
104
+ end
105
+
83
106
  def column(name, *args, refs: [], from: nil, **extra, &block)
84
107
  from ||= name.to_s
85
108
 
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.1.0.beta2".freeze
2
+ VERSION = "0.2.1".freeze
3
3
  end
@@ -39,6 +39,7 @@ module InstDataShipper
39
39
 
40
40
  def logger
41
41
  return @logger if defined? @logger
42
+ # TODO Annotate logs with DumpBatch ID
42
43
  @logger = Logger.new(STDOUT)
43
44
  @logger.level = Logger::DEBUG
44
45
  @logger
@@ -49,7 +50,7 @@ module InstDataShipper
49
50
  end
50
51
 
51
52
  def redis_prefix
52
- pfx = "hdd"
53
+ pfx = "ids"
53
54
  pfx = "#{Apartment::Tenant.current}:#{pfx}" if defined?(Apartment)
54
55
  pfx
55
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-08 00:00:00.000000000 Z
11
+ date: 2024-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -395,11 +395,11 @@ files:
395
395
  - lib/inst_data_shipper/destinations/concerns/chunking.rb
396
396
  - lib/inst_data_shipper/destinations/hosted_data.rb
397
397
  - lib/inst_data_shipper/destinations/s3.rb
398
+ - lib/inst_data_shipper/destinations/speccable.rb
398
399
  - lib/inst_data_shipper/dumper.rb
399
400
  - lib/inst_data_shipper/engine.rb
400
401
  - lib/inst_data_shipper/jobs/async_caller.rb
401
402
  - lib/inst_data_shipper/jobs/base.rb
402
- - lib/inst_data_shipper/jobs/basic_dump_job.rb
403
403
  - lib/inst_data_shipper/record.rb
404
404
  - lib/inst_data_shipper/schema_builder.rb
405
405
  - lib/inst_data_shipper/version.rb
@@ -436,9 +436,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
436
436
  version: '0'
437
437
  required_rubygems_version: !ruby/object:Gem::Requirement
438
438
  requirements:
439
- - - ">"
439
+ - - ">="
440
440
  - !ruby/object:Gem::Version
441
- version: 1.3.1
441
+ version: '0'
442
442
  requirements: []
443
443
  rubygems_version: 3.1.6
444
444
  signing_key:
@@ -1,15 +0,0 @@
1
- module InstDataShipper
2
- module Jobs
3
- class BasicDumpJob < InstDataShipper::Jobs::Base
4
- sidekiq_options retry: 3 if defined?(sidekiq_options)
5
-
6
- def perform(endpoints)
7
-
8
- end
9
-
10
- protected
11
-
12
-
13
- end
14
- end
15
- end