inst_data_shipper 0.1.0.beta2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
4
- data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
3
+ metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
4
+ data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
5
5
  SHA512:
6
- metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
7
- data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
6
+ metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
7
+ data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # InstDataShipper
2
2
 
3
- This gem is intended to facilitate fast and easy syncing of Canvas data.
3
+ This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,157 @@ Then run the migrations:
16
16
  bundle exec rake db:migrate
17
17
  ```
18
18
 
19
+ ## Usage
20
+
21
+ ### Dumper
22
+
23
+ The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
24
+
25
+ Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
26
+ ```ruby
27
+ class HostedDataPushJob < ApplicationJob
28
+ # The schema serves two purposes: defining the schema and mapping data
29
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
30
+ # You can augment the Table-builder DSL with custom methods like so:
31
+ extend_table_builder do
32
+ # It may be useful to define a custom column definition helpers:
33
+ def custom_column(*args, from: nil, **kwargs, &blk)
34
+ # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
35
+ # to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
36
+ from ||= args[0].to_s
37
+ from = ->(row) { row.data[from] } if from.is_a?(String)
38
+ column(*args, **kwargs, from: from, &blk)
39
+ end
40
+
41
+ # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
42
+ include SomeConcern
43
+ end
44
+
45
+ table(ALocalModel, "<TABLE DESCRIPTION>") do
46
+ # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
47
+ # The first argument "scope" can be interpreted in different ways:
48
+ # If exporting a local model it may be a: (default: `updated_at`)
49
+ # Proc that will receive a Relation and return a Relation (use `incremental_since`)
50
+ # String of a column to compare with `incremental_since`
51
+ # If exporting a Canvas report it may be a: (default: `updated_after`)
52
+ # Proc that will receive report params and return modified report params (use `incremental_since`)
53
+ # String of a report param to set to `incremental_since`
54
+ # `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
55
+ # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
+ incremental "updated_at", on: [:id], if: ->() {}
57
+
58
+ # Schema's may declaratively define the data source.
59
+ # This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
60
+ # In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
61
+ source :local_table
62
+ # A Proc can also be passed. The below is equivalent to the above
63
+ source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
64
+
65
+ column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
66
+
67
+ # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
68
+ custom_column :name, :"varchar(128)"
69
+
70
+ # `from:` May be...
71
+ # A Symbol of a method to be called on the record
72
+ custom_column :sis_type, :"varchar(32)", from: :some_model_method
73
+ # A String of a column to read from the record
74
+ custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
75
+ # A Proc to be called with each record
76
+ custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
77
+ # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
78
+ custom_column :sis_type, :"varchar(32)"
79
+ end
80
+
81
+ table("my_table", model: ALocalModel) do
82
+ # ...
83
+ end
84
+
85
+ table("proserv_student_submissions_csv") do
86
+ column :canvas_id, :bigint, from: "canvas user id"
87
+ column :sis_id, :"varchar(64)", from: "sis user id"
88
+ column :name, :"varchar(64)", from: "user name"
89
+ column :submission_id, :bigint, from: "submission id"
90
+ end
91
+ end
92
+
93
+ Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
94
+ InstDataShipper::DataSources::LocalTables,
95
+ InstDataShipper::DataSources::CanvasReports,
96
+ ]) do
97
+ import_local_table(ALocalModel)
98
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
99
+
100
+ # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
101
+ import_local_table(SomeModel, schema_name: "my_table")
102
+ import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
103
+
104
+ # Iterate through the Tables defined in the Schema and apply any defined `source` statements.
105
+ # This is the default behavior if `define()` is called w/o a block.
106
+ auto_enqueue_from_schema
107
+ end
108
+
109
+ def perform
110
+ Dumper.perform_dump([
111
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
112
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
113
+ ])
114
+ end
115
+ end
116
+ ```
117
+
118
+ `Dumper`s may also be formed as a normal Ruby subclass:
119
+ ```ruby
120
+ class HostedDataPushJob < ApplicationJob
121
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
122
+ # ...
123
+ end
124
+
125
+ class Dumper < InstDataShipper::Dumper
126
+ include InstDataShipper::DataSources::LocalTables
127
+ include InstDataShipper::DataSources::CanvasReports
128
+
129
+ def enqueue_tasks
130
+ import_local_table(ALocalModel)
131
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
132
+
133
+ # auto_enqueue_from_schema
134
+ end
135
+
136
+ def table_schemas
137
+ SCHEMA
138
+ end
139
+ end
140
+
141
+ def perform
142
+ Dumper.perform_dump([
143
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
144
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
145
+ ])
146
+ end
147
+ end
148
+ ```
149
+
150
+ ### Destinations
151
+
152
+ This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
153
+
154
+ Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
155
+
156
+ Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
157
+
158
+ #### Hosted Data
159
+ `hosted-data://<JWT>@<HOSTED DATA SERVER>`
160
+
161
+ ##### Optional Parameters:
162
+ - `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
163
+
164
+ #### S3
165
+ `s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
166
+
167
+ ##### Optional Parameters:
168
+ _None_
169
+
19
170
  ## Development
20
171
 
21
172
  When adding to or updating this gem, make sure you do the following:
@@ -7,11 +7,12 @@ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
7
7
 
8
8
  t.string :job_class
9
9
  t.string :genre
10
+ t.string :batch_id
10
11
 
11
12
  t.string :exception
12
13
  t.text :backtrace
13
- t.text :metadata
14
- t.text :job_arguments
14
+ # t.text :metadata
15
+ # t.text :job_arguments
15
16
 
16
17
  t.timestamps
17
18
  end
@@ -19,7 +19,7 @@ module InstDataShipper
19
19
  instance_exec(&@body_block)
20
20
  end
21
21
 
22
- def table_schemas
22
+ def schema
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
24
  pointer.constantize
25
25
  end
@@ -1,7 +1,31 @@
1
1
  module InstDataShipper
2
2
  module DataSources
3
3
  module Base
4
- extend ActiveSupport::Concern
4
+ # This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
5
+
6
+ def self.included(base)
7
+ base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
8
+ base.extend ModuleHelperMethods
9
+ base.send(:include, Concern)
10
+ end
11
+
12
+ module Concern
13
+ extend ActiveSupport::Concern
14
+ end
15
+
16
+ module ModuleHelperMethods
17
+ def delayed(mthd_sym)
18
+ mthd = instance_method(mthd_sym)
19
+ pmthd_sym = :"_delayed_#{mthd_sym}"
20
+
21
+ alias_method pmthd_sym, mthd_sym
22
+ private pmthd_sym
23
+
24
+ define_method(mthd_sym) do |*args, **kwargs|
25
+ delayed(pmthd_sym, *args, **kwargs)
26
+ end
27
+ end
28
+ end
5
29
  end
6
30
  end
7
31
  end
@@ -2,7 +2,7 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing Canvas reports
4
4
  module CanvasReports
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  included do
8
8
  hook :initialize_dump_batch do |context|
@@ -2,17 +2,11 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing local AR tables
4
4
  module LocalTables
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  public
8
8
 
9
- def import_local_table(*args, **kwargs)
10
- delayed(:_import_local_table, *args, **kwargs)
11
- end
12
-
13
- private
14
-
15
- def _import_local_table(model, schema_name: nil)
9
+ delayed def import_local_table(model, schema_name: nil)
16
10
  model = model.safe_constantize if model.is_a?(String)
17
11
 
18
12
  table_def = lookup_table_schema!(schema_name, { model: model })
@@ -22,7 +16,12 @@ module InstDataShipper
22
16
  query = _resolve_model_query(query, table_def[:query])
23
17
 
24
18
  if table_is_incremental?(table_def)
25
- query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
19
+ query = _resolve_model_query(
20
+ query,
21
+ table_def.dig(:incremental, :scope),
22
+ string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
23
+ default: "updated_at",
24
+ )
26
25
  end
27
26
 
28
27
  query.find_each do |m|
@@ -35,7 +34,11 @@ module InstDataShipper
35
34
  upload_data(table_def, &inner_block)
36
35
  end
37
36
 
38
- def _resolve_model_query(relation, query, string: nil)
37
+ private
38
+
39
+ def _resolve_model_query(relation, query, string: nil, default: nil)
40
+ return relation if query == false
41
+ query = default if query.nil?
39
42
  return relation if query.nil?
40
43
 
41
44
  if query.is_a?(Symbol)
@@ -3,7 +3,7 @@ module InstDataShipper
3
3
  class Base
4
4
  attr_reader :dumper
5
5
 
6
- delegate :tracker, :table_schemas, :working_dir, to: :dumper
6
+ delegate :tracker, :schema, :working_dir, to: :dumper
7
7
 
8
8
  def initialize(cache_key, config, dumper)
9
9
  @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
11
11
  @dumper = dumper
12
12
  end
13
13
 
14
+ # This method is called before taking any actions.
15
+ # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
16
+ def preinitialize_dump(context); end
17
+
14
18
  # This method is called before processing any data.
15
19
  # It should be used to initialize any external resources needed for the dump.
16
- def initialize_dump; end
20
+ def initialize_dump(context); end
17
21
 
18
22
  # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
19
23
  #
@@ -7,7 +7,7 @@ module InstDataShipper
7
7
  DEFAULT_CHUNK_SIZE = 100_000
8
8
 
9
9
  def chunk_data(generator, chunk_size: nil, **kwargs)
10
- chunk_size ||= config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
10
+ chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
11
11
  slice = 1
12
12
 
13
13
  btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
@@ -24,7 +24,7 @@ module InstDataShipper
24
24
 
25
25
  def group_key
26
26
  super.tap do |k|
27
- k[:chunk_size] = config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
27
+ k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
28
28
  end
29
29
  end
30
30
 
@@ -5,11 +5,43 @@ module InstDataShipper
5
5
  class HostedData < Base
6
6
  include Concerns::Chunking
7
7
 
8
- def initialize_dump
8
+ def preinitialize_dump(context)
9
+ if context[:incremental_since].present?
10
+ begin
11
+ last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
12
+ status: 'imported',
13
+ # schema_version: convert_schema[:version],
14
+ tags: [
15
+ "ids-schema=#{dumper.schema_digest}",
16
+ "ids-genre=#{dumper.export_genre}",
17
+ ],
18
+ }).body.with_indifferent_access
19
+
20
+ if last_dump[:created_at] < context[:incremental_since]
21
+ InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
22
+ context[:incremental_since] = last_dump[:created_at]
23
+ end
24
+ rescue Faraday::ResourceNotFound
25
+ # TODO It'd be nice to make this per-table
26
+ InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
27
+ context[:incremental_since] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def initialize_dump(context)
33
+ tags = [
34
+ "ids-schema=#{dumper.schema_digest}",
35
+ "ids-genre=#{dumper.export_genre}",
36
+ ]
37
+ tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
38
+ tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
39
+
9
40
  dump = hosted_data_client.post(
10
41
  'api/v1/custom_dumps/',
11
42
  reference_id: tracker.id,
12
43
  schema: convert_schema,
44
+ tags: tags,
13
45
  ).body.with_indifferent_access
14
46
 
15
47
  redis.hset(rk(:state), :dump_id, dump[:id])
@@ -62,6 +94,7 @@ module InstDataShipper
62
94
 
63
95
  def convert_schema
64
96
  definititions = {}
97
+ table_schemas = schema[:tables]
65
98
  table_schemas.each do |ts|
66
99
  ts = ts.dup
67
100
  tname = table_name(ts)
@@ -86,7 +119,7 @@ module InstDataShipper
86
119
  end
87
120
 
88
121
  {
89
- version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
122
+ version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
90
123
  definition: definititions,
91
124
  }
92
125
  end
@@ -0,0 +1,17 @@
1
+ module InstDataShipper
2
+ module Destinations
3
+ class Speccable < Base
4
+ include Concerns::Chunking
5
+
6
+ def chunk_data(generator, table:, extra: nil)
7
+ super(generator) do |batch, idx|
8
+ yield batch
9
+ end
10
+ end
11
+
12
+ def upload_data_chunk(table_def, chunk); end
13
+
14
+ def parse_configuration(uri); end
15
+ end
16
+ end
17
+ end
@@ -18,8 +18,34 @@ module InstDataShipper
18
18
  Class.new(self) do
19
19
  include(*include)
20
20
 
21
+ if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
22
+ blk = -> { auto_enqueue_from_schema }
23
+ elsif blk.nil?
24
+ raise ArgumentError, "Must provide a block or a schema with source definitions"
25
+ end
26
+
21
27
  define_method(:enqueue_tasks, &blk)
22
- define_method(:table_schemas) { schema }
28
+ define_method(:schema) { schema }
29
+ end
30
+ end
31
+
32
+ def self.current(executor: nil)
33
+ cur_batch = Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
34
+ ctx = cur_batch&.context || {}
35
+ return nil unless ctx[:origin_class].present? && ctx[:tracker_id].present?
36
+
37
+ clazz = ctx[:origin_class]
38
+ clazz = clazz.constantize if clazz.is_a?(String)
39
+ clazz.new(executor: executor)
40
+ end
41
+
42
+ if defined?(Rails) && Rails.env.test?
43
+ def for_specs!
44
+ @raw_destinations = ["speccable://nil"]
45
+ @executor = InstDataShipper::Jobs::AsyncCaller.new
46
+ @tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
47
+ define_singleton_method(:spec_destination) { destinations.first }
48
+ self
23
49
  end
24
50
  end
25
51
 
@@ -31,15 +57,18 @@ module InstDataShipper
31
57
  @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
32
58
 
33
59
  @batch_context = context = {
34
- # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
35
60
  # TODO Consider behavior if last is still running
36
- incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
61
+ incremental_since: last_successful_tracker&.created_at,
37
62
  }
38
63
 
64
+ destinations.each do |dest|
65
+ dest.preinitialize_dump(context)
66
+ end
67
+
39
68
  begin
40
69
  begin
41
70
  destinations.each do |dest|
42
- dest.initialize_dump()
71
+ dest.initialize_dump(context)
43
72
  end
44
73
 
45
74
  run_hook(:initialize_dump_batch, context)
@@ -52,6 +81,7 @@ module InstDataShipper
52
81
 
53
82
  Sidekiq::Batch.new.tap do |batch|
54
83
  context[:root_bid] = batch.bid
84
+ tracker.update(batch_id: batch.bid)
55
85
 
56
86
  batch.description = "HD #{export_genre} Export #{tracker.id} Root"
57
87
  batch.context = context
@@ -62,6 +92,7 @@ module InstDataShipper
62
92
  rescue => ex
63
93
  delayed :cleanup_fatal_error!
64
94
  InstDataShipper.handle_suppressed_error(ex)
95
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
65
96
  end
66
97
  end
67
98
  rescue => ex
@@ -74,6 +105,7 @@ module InstDataShipper
74
105
  end
75
106
  end
76
107
  end
108
+ tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
77
109
  raise ex
78
110
  end
79
111
  end
@@ -82,15 +114,31 @@ module InstDataShipper
82
114
  @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
83
115
  end
84
116
 
117
+ def last_successful_tracker
118
+ @last_successful_tracker ||= DumpBatch.where(job_class: self.class.to_s, genre: export_genre, status: 'completed').order(created_at: :desc).first
119
+ end
120
+
85
121
  def export_genre
86
- self.class.to_s.gsub(/HD|ExportJob/, '')
122
+ self.class.to_s
87
123
  end
88
124
 
89
125
  def origin_class
90
126
  batch_context[:origin_class]&.constantize || self.class
91
127
  end
92
128
 
129
+ def schema
130
+ return origin_class::SCHEMA if defined?(origin_class::SCHEMA)
131
+ raise NotImplementedError
132
+ end
133
+
134
+ def schema_digest
135
+ Digest::MD5.hexdigest(schema.to_json)[0...8]
136
+ end
137
+
93
138
  def table_is_incremental?(table_def)
139
+ return false unless incremental_since.present?
140
+
141
+ # TODO Return false if table's schema changes
94
142
  if (inc = table_def[:incremental]).present?
95
143
  differ = inc[:if]
96
144
  return !!incremental_since if differ.nil?
@@ -119,7 +167,7 @@ module InstDataShipper
119
167
 
120
168
  value = Array(value).compact
121
169
 
122
- table_schemas.each do |ts|
170
+ schema[:tables].each do |ts|
123
171
  return ts if value.include?(ts[key])
124
172
  end
125
173
  end
@@ -144,6 +192,14 @@ module InstDataShipper
144
192
  raise NotImplementedError
145
193
  end
146
194
 
195
+ def auto_enqueue_from_schema
196
+ schema[:tables].each do |table_def|
197
+ src = table_def[:sourcer]
198
+ next unless src.present?
199
+ instance_exec(table_def, &src)
200
+ end
201
+ end
202
+
147
203
  def upload_data(table_def, extra: nil, &datagen)
148
204
  # Allow muxing, allowing a hook to prevent some files going to certain destinations
149
205
  dests = destinations_for_table(table_def)
@@ -207,11 +263,6 @@ module InstDataShipper
207
263
 
208
264
  # Helper Methods
209
265
 
210
- def table_schemas
211
- return origin_class::TABLE_SCHEMAS if defined?(origin_class::TABLE_SCHEMAS)
212
- raise NotImplementedError
213
- end
214
-
215
266
  def delayed(mthd, *args, **kwargs)
216
267
  Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
217
268
  end
@@ -231,7 +282,7 @@ module InstDataShipper
231
282
  end
232
283
 
233
284
  def destinations
234
- @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
285
+ @destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
235
286
  dcls = InstDataShipper.resolve_destination(dest)
236
287
  dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
237
288
  end
@@ -2,21 +2,28 @@ module InstDataShipper
2
2
  # This class ends up fill two roles - Schema and Mapping.
3
3
  # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
4
4
  class SchemaBuilder
5
- attr_reader :tables
5
+ attr_reader :schema
6
6
 
7
7
  def initialize
8
- @tables = []
8
+ @schema = {
9
+ tables: [],
10
+ }
9
11
  end
10
12
 
11
13
  def self.build(&block)
12
14
  builder = new
13
15
  builder.instance_exec(&block)
14
- builder.tables
16
+ builder.schema
17
+ end
18
+
19
+ def version(version)
20
+ @schema[:version] = version
15
21
  end
16
22
 
17
- def extend_table_builder(&block)
23
+ def extend_table_builder(modul = nil, &block)
18
24
  @table_builder_class ||= Class.new(TableSchemaBuilder)
19
- @table_builder_class.class_eval(&block)
25
+ @table_builder_class.class_eval(&block) if block.present?
26
+ @table_builder_class.extend(modul) if modul.present?
20
27
  end
21
28
 
22
29
  def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
@@ -36,6 +43,7 @@ module InstDataShipper
36
43
 
37
44
  tdef[:query] = model_or_name
38
45
  tdef[:model] = model_or_name.model
46
+ tdef[:warehouse_name] = model_or_name.model.table_name
39
47
  elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
40
48
  tdef[:warehouse_name] = model_or_name.table_name
41
49
  tdef[:model] = model_or_name
@@ -43,9 +51,9 @@ module InstDataShipper
43
51
  tdef[:warehouse_name] = model_or_name
44
52
  end
45
53
 
46
- @table_builder_class.build(tdef, &block)
54
+ (@table_builder_class || TableSchemaBuilder).build(tdef, &block)
47
55
 
48
- @tables << tdef
56
+ @schema[:tables] << tdef
49
57
 
50
58
  tdef
51
59
  end
@@ -61,14 +69,18 @@ module InstDataShipper
61
69
  def self.build(tdef, &block)
62
70
  builder = new(tdef)
63
71
  builder.instance_exec(&block)
64
- builder.columns
72
+ builder.options
73
+ end
74
+
75
+ def annotate(key, value)
76
+ options[key] = value
65
77
  end
66
78
 
67
- # def annotate(key, value)
68
- # options[key] = value
69
- # end
79
+ def version(version)
80
+ options[:version] = version
81
+ end
70
82
 
71
- def incremental(scope="updated_at", **kwargs)
83
+ def incremental(scope=nil, **kwargs)
72
84
  if (extras = kwargs.keys - %i[on if]).present?
73
85
  raise ArgumentError, "Unsuppored options: #{extras.inspect}"
74
86
  end
@@ -80,6 +92,17 @@ module InstDataShipper
80
92
  }
81
93
  end
82
94
 
95
+ def source(source, override_model=nil, **kwargs)
96
+ raise "Source already set" if options[:sourcer].present?
97
+
98
+ if source.is_a?(Symbol)
99
+ mthd = :"import_#{source}"
100
+ options = self.options
101
+ source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
102
+ end
103
+ options[:sourcer] = source
104
+ end
105
+
83
106
  def column(name, *args, refs: [], from: nil, **extra, &block)
84
107
  from ||= name.to_s
85
108
 
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.1.0.beta2".freeze
2
+ VERSION = "0.2.1".freeze
3
3
  end
@@ -39,6 +39,7 @@ module InstDataShipper
39
39
 
40
40
  def logger
41
41
  return @logger if defined? @logger
42
+ # TODO Annotate logs with DumpBatch ID
42
43
  @logger = Logger.new(STDOUT)
43
44
  @logger.level = Logger::DEBUG
44
45
  @logger
@@ -49,7 +50,7 @@ module InstDataShipper
49
50
  end
50
51
 
51
52
  def redis_prefix
52
- pfx = "hdd"
53
+ pfx = "ids"
53
54
  pfx = "#{Apartment::Tenant.current}:#{pfx}" if defined?(Apartment)
54
55
  pfx
55
56
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta2
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-08 00:00:00.000000000 Z
11
+ date: 2024-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -395,11 +395,11 @@ files:
395
395
  - lib/inst_data_shipper/destinations/concerns/chunking.rb
396
396
  - lib/inst_data_shipper/destinations/hosted_data.rb
397
397
  - lib/inst_data_shipper/destinations/s3.rb
398
+ - lib/inst_data_shipper/destinations/speccable.rb
398
399
  - lib/inst_data_shipper/dumper.rb
399
400
  - lib/inst_data_shipper/engine.rb
400
401
  - lib/inst_data_shipper/jobs/async_caller.rb
401
402
  - lib/inst_data_shipper/jobs/base.rb
402
- - lib/inst_data_shipper/jobs/basic_dump_job.rb
403
403
  - lib/inst_data_shipper/record.rb
404
404
  - lib/inst_data_shipper/schema_builder.rb
405
405
  - lib/inst_data_shipper/version.rb
@@ -436,9 +436,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
436
436
  version: '0'
437
437
  required_rubygems_version: !ruby/object:Gem::Requirement
438
438
  requirements:
439
- - - ">"
439
+ - - ">="
440
440
  - !ruby/object:Gem::Version
441
- version: 1.3.1
441
+ version: '0'
442
442
  requirements: []
443
443
  rubygems_version: 3.1.6
444
444
  signing_key:
@@ -1,15 +0,0 @@
1
- module InstDataShipper
2
- module Jobs
3
- class BasicDumpJob < InstDataShipper::Jobs::Base
4
- sidekiq_options retry: 3 if defined?(sidekiq_options)
5
-
6
- def perform(endpoints)
7
-
8
- end
9
-
10
- protected
11
-
12
-
13
- end
14
- end
15
- end