inst_data_shipper 0.1.0.beta2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +152 -1
- data/db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb +3 -2
- data/lib/inst_data_shipper/basic_dumper.rb +1 -1
- data/lib/inst_data_shipper/data_sources/base.rb +25 -1
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +1 -1
- data/lib/inst_data_shipper/data_sources/local_tables.rb +13 -10
- data/lib/inst_data_shipper/destinations/base.rb +6 -2
- data/lib/inst_data_shipper/destinations/concerns/chunking.rb +2 -2
- data/lib/inst_data_shipper/destinations/hosted_data.rb +35 -2
- data/lib/inst_data_shipper/destinations/speccable.rb +17 -0
- data/lib/inst_data_shipper/dumper.rb +63 -12
- data/lib/inst_data_shipper/schema_builder.rb +35 -12
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +2 -1
- metadata +5 -5
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
|
4
|
+
data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
|
7
|
+
data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# InstDataShipper
|
2
2
|
|
3
|
-
This gem is intended to facilitate
|
3
|
+
This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,6 +16,157 @@ Then run the migrations:
|
|
16
16
|
bundle exec rake db:migrate
|
17
17
|
```
|
18
18
|
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Dumper
|
22
|
+
|
23
|
+
The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
|
24
|
+
|
25
|
+
Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
|
26
|
+
```ruby
|
27
|
+
class HostedDataPushJob < ApplicationJob
|
28
|
+
# The schema serves two purposes: defining the schema and mapping data
|
29
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
30
|
+
# You can augment the Table-builder DSL with custom methods like so:
|
31
|
+
extend_table_builder do
|
32
|
+
# It may be useful to define a custom column definition helpers:
|
33
|
+
def custom_column(*args, from: nil, **kwargs, &blk)
|
34
|
+
# In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
|
35
|
+
# to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
|
36
|
+
from ||= args[0].to_s
|
37
|
+
from = ->(row) { row.data[from] } if from.is_a?(String)
|
38
|
+
column(*args, **kwargs, from: from, &blk)
|
39
|
+
end
|
40
|
+
|
41
|
+
# `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
|
42
|
+
include SomeConcern
|
43
|
+
end
|
44
|
+
|
45
|
+
table(ALocalModel, "<TABLE DESCRIPTION>") do
|
46
|
+
# If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
|
47
|
+
# The first argument "scope" can be interpreted in different ways:
|
48
|
+
# If exporting a local model it may be a: (default: `updated_at`)
|
49
|
+
# Proc that will receive a Relation and return a Relation (use `incremental_since`)
|
50
|
+
# String of a column to compare with `incremental_since`
|
51
|
+
# If exporting a Canvas report it may be a: (default: `updated_after`)
|
52
|
+
# Proc that will receive report params and return modified report params (use `incremental_since`)
|
53
|
+
# String of a report param to set to `incremental_since`
|
54
|
+
# `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
|
55
|
+
# `if:` may be a Proc or a Symbol (of a method on the Dumper)
|
56
|
+
incremental "updated_at", on: [:id], if: ->() {}
|
57
|
+
|
58
|
+
# Schema's may declaratively define the data source.
|
59
|
+
# This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
|
60
|
+
# In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
|
61
|
+
source :local_table
|
62
|
+
# A Proc can also be passed. The below is equivalent to the above
|
63
|
+
source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
|
64
|
+
|
65
|
+
column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
|
66
|
+
|
67
|
+
# The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
|
68
|
+
custom_column :name, :"varchar(128)"
|
69
|
+
|
70
|
+
# `from:` May be...
|
71
|
+
# A Symbol of a method to be called on the record
|
72
|
+
custom_column :sis_type, :"varchar(32)", from: :some_model_method
|
73
|
+
# A String of a column to read from the record
|
74
|
+
custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
|
75
|
+
# A Proc to be called with each record
|
76
|
+
custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
|
77
|
+
# Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
|
78
|
+
custom_column :sis_type, :"varchar(32)"
|
79
|
+
end
|
80
|
+
|
81
|
+
table("my_table", model: ALocalModel) do
|
82
|
+
# ...
|
83
|
+
end
|
84
|
+
|
85
|
+
table("proserv_student_submissions_csv") do
|
86
|
+
column :canvas_id, :bigint, from: "canvas user id"
|
87
|
+
column :sis_id, :"varchar(64)", from: "sis user id"
|
88
|
+
column :name, :"varchar(64)", from: "user name"
|
89
|
+
column :submission_id, :bigint, from: "submission id"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
|
94
|
+
InstDataShipper::DataSources::LocalTables,
|
95
|
+
InstDataShipper::DataSources::CanvasReports,
|
96
|
+
]) do
|
97
|
+
import_local_table(ALocalModel)
|
98
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
99
|
+
|
100
|
+
# If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
|
101
|
+
import_local_table(SomeModel, schema_name: "my_table")
|
102
|
+
import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
|
103
|
+
|
104
|
+
# Iterate through the Tables defined in the Schema and apply any defined `source` statements.
|
105
|
+
# This is the default behavior if `define()` is called w/o a block.
|
106
|
+
auto_enqueue_from_schema
|
107
|
+
end
|
108
|
+
|
109
|
+
def perform
|
110
|
+
Dumper.perform_dump([
|
111
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
112
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
113
|
+
])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
```
|
117
|
+
|
118
|
+
`Dumper`s may also be formed as a normal Ruby subclass:
|
119
|
+
```ruby
|
120
|
+
class HostedDataPushJob < ApplicationJob
|
121
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
122
|
+
# ...
|
123
|
+
end
|
124
|
+
|
125
|
+
class Dumper < InstDataShipper::Dumper
|
126
|
+
include InstDataShipper::DataSources::LocalTables
|
127
|
+
include InstDataShipper::DataSources::CanvasReports
|
128
|
+
|
129
|
+
def enqueue_tasks
|
130
|
+
import_local_table(ALocalModel)
|
131
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
132
|
+
|
133
|
+
# auto_enqueue_from_schema
|
134
|
+
end
|
135
|
+
|
136
|
+
def table_schemas
|
137
|
+
SCHEMA
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def perform
|
142
|
+
Dumper.perform_dump([
|
143
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
144
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
145
|
+
])
|
146
|
+
end
|
147
|
+
end
|
148
|
+
```
|
149
|
+
|
150
|
+
### Destinations
|
151
|
+
|
152
|
+
This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
|
153
|
+
|
154
|
+
Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
|
155
|
+
|
156
|
+
Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
|
157
|
+
|
158
|
+
#### Hosted Data
|
159
|
+
`hosted-data://<JWT>@<HOSTED DATA SERVER>`
|
160
|
+
|
161
|
+
##### Optional Parameters:
|
162
|
+
- `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
|
163
|
+
|
164
|
+
#### S3
|
165
|
+
`s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
|
166
|
+
|
167
|
+
##### Optional Parameters:
|
168
|
+
_None_
|
169
|
+
|
19
170
|
## Development
|
20
171
|
|
21
172
|
When adding to or updating this gem, make sure you do the following:
|
@@ -7,11 +7,12 @@ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
|
7
7
|
|
8
8
|
t.string :job_class
|
9
9
|
t.string :genre
|
10
|
+
t.string :batch_id
|
10
11
|
|
11
12
|
t.string :exception
|
12
13
|
t.text :backtrace
|
13
|
-
t.text :metadata
|
14
|
-
t.text :job_arguments
|
14
|
+
# t.text :metadata
|
15
|
+
# t.text :job_arguments
|
15
16
|
|
16
17
|
t.timestamps
|
17
18
|
end
|
@@ -1,7 +1,31 @@
|
|
1
1
|
module InstDataShipper
|
2
2
|
module DataSources
|
3
3
|
module Base
|
4
|
-
|
4
|
+
# This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
|
8
|
+
base.extend ModuleHelperMethods
|
9
|
+
base.send(:include, Concern)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Concern
|
13
|
+
extend ActiveSupport::Concern
|
14
|
+
end
|
15
|
+
|
16
|
+
module ModuleHelperMethods
|
17
|
+
def delayed(mthd_sym)
|
18
|
+
mthd = instance_method(mthd_sym)
|
19
|
+
pmthd_sym = :"_delayed_#{mthd_sym}"
|
20
|
+
|
21
|
+
alias_method pmthd_sym, mthd_sym
|
22
|
+
private pmthd_sym
|
23
|
+
|
24
|
+
define_method(mthd_sym) do |*args, **kwargs|
|
25
|
+
delayed(pmthd_sym, *args, **kwargs)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
5
29
|
end
|
6
30
|
end
|
7
31
|
end
|
@@ -2,17 +2,11 @@ module InstDataShipper
|
|
2
2
|
module DataSources
|
3
3
|
# This module contains the logic for processing local AR tables
|
4
4
|
module LocalTables
|
5
|
-
|
5
|
+
include Base
|
6
6
|
|
7
7
|
public
|
8
8
|
|
9
|
-
def import_local_table(
|
10
|
-
delayed(:_import_local_table, *args, **kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def _import_local_table(model, schema_name: nil)
|
9
|
+
delayed def import_local_table(model, schema_name: nil)
|
16
10
|
model = model.safe_constantize if model.is_a?(String)
|
17
11
|
|
18
12
|
table_def = lookup_table_schema!(schema_name, { model: model })
|
@@ -22,7 +16,12 @@ module InstDataShipper
|
|
22
16
|
query = _resolve_model_query(query, table_def[:query])
|
23
17
|
|
24
18
|
if table_is_incremental?(table_def)
|
25
|
-
query = _resolve_model_query(
|
19
|
+
query = _resolve_model_query(
|
20
|
+
query,
|
21
|
+
table_def.dig(:incremental, :scope),
|
22
|
+
string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
|
23
|
+
default: "updated_at",
|
24
|
+
)
|
26
25
|
end
|
27
26
|
|
28
27
|
query.find_each do |m|
|
@@ -35,7 +34,11 @@ module InstDataShipper
|
|
35
34
|
upload_data(table_def, &inner_block)
|
36
35
|
end
|
37
36
|
|
38
|
-
|
37
|
+
private
|
38
|
+
|
39
|
+
def _resolve_model_query(relation, query, string: nil, default: nil)
|
40
|
+
return relation if query == false
|
41
|
+
query = default if query.nil?
|
39
42
|
return relation if query.nil?
|
40
43
|
|
41
44
|
if query.is_a?(Symbol)
|
@@ -3,7 +3,7 @@ module InstDataShipper
|
|
3
3
|
class Base
|
4
4
|
attr_reader :dumper
|
5
5
|
|
6
|
-
delegate :tracker, :
|
6
|
+
delegate :tracker, :schema, :working_dir, to: :dumper
|
7
7
|
|
8
8
|
def initialize(cache_key, config, dumper)
|
9
9
|
@cache_key = cache_key
|
@@ -11,9 +11,13 @@ module InstDataShipper
|
|
11
11
|
@dumper = dumper
|
12
12
|
end
|
13
13
|
|
14
|
+
# This method is called before taking any actions.
|
15
|
+
# It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
|
16
|
+
def preinitialize_dump(context); end
|
17
|
+
|
14
18
|
# This method is called before processing any data.
|
15
19
|
# It should be used to initialize any external resources needed for the dump.
|
16
|
-
def initialize_dump; end
|
20
|
+
def initialize_dump(context); end
|
17
21
|
|
18
22
|
# Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
|
19
23
|
#
|
@@ -7,7 +7,7 @@ module InstDataShipper
|
|
7
7
|
DEFAULT_CHUNK_SIZE = 100_000
|
8
8
|
|
9
9
|
def chunk_data(generator, chunk_size: nil, **kwargs)
|
10
|
-
chunk_size ||= config
|
10
|
+
chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
11
11
|
slice = 1
|
12
12
|
|
13
13
|
btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
|
@@ -24,7 +24,7 @@ module InstDataShipper
|
|
24
24
|
|
25
25
|
def group_key
|
26
26
|
super.tap do |k|
|
27
|
-
k[:chunk_size] = config
|
27
|
+
k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -5,11 +5,43 @@ module InstDataShipper
|
|
5
5
|
class HostedData < Base
|
6
6
|
include Concerns::Chunking
|
7
7
|
|
8
|
-
def
|
8
|
+
def preinitialize_dump(context)
|
9
|
+
if context[:incremental_since].present?
|
10
|
+
begin
|
11
|
+
last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
|
12
|
+
status: 'imported',
|
13
|
+
# schema_version: convert_schema[:version],
|
14
|
+
tags: [
|
15
|
+
"ids-schema=#{dumper.schema_digest}",
|
16
|
+
"ids-genre=#{dumper.export_genre}",
|
17
|
+
],
|
18
|
+
}).body.with_indifferent_access
|
19
|
+
|
20
|
+
if last_dump[:created_at] < context[:incremental_since]
|
21
|
+
InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
|
22
|
+
context[:incremental_since] = last_dump[:created_at]
|
23
|
+
end
|
24
|
+
rescue Faraday::ResourceNotFound
|
25
|
+
# TODO It'd be nice to make this per-table
|
26
|
+
InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
|
27
|
+
context[:incremental_since] = nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize_dump(context)
|
33
|
+
tags = [
|
34
|
+
"ids-schema=#{dumper.schema_digest}",
|
35
|
+
"ids-genre=#{dumper.export_genre}",
|
36
|
+
]
|
37
|
+
tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
|
38
|
+
tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
|
39
|
+
|
9
40
|
dump = hosted_data_client.post(
|
10
41
|
'api/v1/custom_dumps/',
|
11
42
|
reference_id: tracker.id,
|
12
43
|
schema: convert_schema,
|
44
|
+
tags: tags,
|
13
45
|
).body.with_indifferent_access
|
14
46
|
|
15
47
|
redis.hset(rk(:state), :dump_id, dump[:id])
|
@@ -62,6 +94,7 @@ module InstDataShipper
|
|
62
94
|
|
63
95
|
def convert_schema
|
64
96
|
definititions = {}
|
97
|
+
table_schemas = schema[:tables]
|
65
98
|
table_schemas.each do |ts|
|
66
99
|
ts = ts.dup
|
67
100
|
tname = table_name(ts)
|
@@ -86,7 +119,7 @@ module InstDataShipper
|
|
86
119
|
end
|
87
120
|
|
88
121
|
{
|
89
|
-
version: "#{dumper.
|
122
|
+
version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
|
90
123
|
definition: definititions,
|
91
124
|
}
|
92
125
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class Speccable < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def chunk_data(generator, table:, extra: nil)
|
7
|
+
super(generator) do |batch, idx|
|
8
|
+
yield batch
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def upload_data_chunk(table_def, chunk); end
|
13
|
+
|
14
|
+
def parse_configuration(uri); end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -18,8 +18,34 @@ module InstDataShipper
|
|
18
18
|
Class.new(self) do
|
19
19
|
include(*include)
|
20
20
|
|
21
|
+
if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
|
22
|
+
blk = -> { auto_enqueue_from_schema }
|
23
|
+
elsif blk.nil?
|
24
|
+
raise ArgumentError, "Must provide a block or a schema with source definitions"
|
25
|
+
end
|
26
|
+
|
21
27
|
define_method(:enqueue_tasks, &blk)
|
22
|
-
define_method(:
|
28
|
+
define_method(:schema) { schema }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.current(executor: nil)
|
33
|
+
cur_batch = Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
|
34
|
+
ctx = cur_batch&.context || {}
|
35
|
+
return nil unless ctx[:origin_class].present? && ctx[:tracker_id].present?
|
36
|
+
|
37
|
+
clazz = ctx[:origin_class]
|
38
|
+
clazz = clazz.constantize if clazz.is_a?(String)
|
39
|
+
clazz.new(executor: executor)
|
40
|
+
end
|
41
|
+
|
42
|
+
if defined?(Rails) && Rails.env.test?
|
43
|
+
def for_specs!
|
44
|
+
@raw_destinations = ["speccable://nil"]
|
45
|
+
@executor = InstDataShipper::Jobs::AsyncCaller.new
|
46
|
+
@tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
47
|
+
define_singleton_method(:spec_destination) { destinations.first }
|
48
|
+
self
|
23
49
|
end
|
24
50
|
end
|
25
51
|
|
@@ -31,15 +57,18 @@ module InstDataShipper
|
|
31
57
|
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
32
58
|
|
33
59
|
@batch_context = context = {
|
34
|
-
# TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
|
35
60
|
# TODO Consider behavior if last is still running
|
36
|
-
incremental_since:
|
61
|
+
incremental_since: last_successful_tracker&.created_at,
|
37
62
|
}
|
38
63
|
|
64
|
+
destinations.each do |dest|
|
65
|
+
dest.preinitialize_dump(context)
|
66
|
+
end
|
67
|
+
|
39
68
|
begin
|
40
69
|
begin
|
41
70
|
destinations.each do |dest|
|
42
|
-
dest.initialize_dump()
|
71
|
+
dest.initialize_dump(context)
|
43
72
|
end
|
44
73
|
|
45
74
|
run_hook(:initialize_dump_batch, context)
|
@@ -52,6 +81,7 @@ module InstDataShipper
|
|
52
81
|
|
53
82
|
Sidekiq::Batch.new.tap do |batch|
|
54
83
|
context[:root_bid] = batch.bid
|
84
|
+
tracker.update(batch_id: batch.bid)
|
55
85
|
|
56
86
|
batch.description = "HD #{export_genre} Export #{tracker.id} Root"
|
57
87
|
batch.context = context
|
@@ -62,6 +92,7 @@ module InstDataShipper
|
|
62
92
|
rescue => ex
|
63
93
|
delayed :cleanup_fatal_error!
|
64
94
|
InstDataShipper.handle_suppressed_error(ex)
|
95
|
+
tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
|
65
96
|
end
|
66
97
|
end
|
67
98
|
rescue => ex
|
@@ -74,6 +105,7 @@ module InstDataShipper
|
|
74
105
|
end
|
75
106
|
end
|
76
107
|
end
|
108
|
+
tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
|
77
109
|
raise ex
|
78
110
|
end
|
79
111
|
end
|
@@ -82,15 +114,31 @@ module InstDataShipper
|
|
82
114
|
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
83
115
|
end
|
84
116
|
|
117
|
+
def last_successful_tracker
|
118
|
+
@last_successful_tracker ||= DumpBatch.where(job_class: self.class.to_s, genre: export_genre, status: 'completed').order(created_at: :desc).first
|
119
|
+
end
|
120
|
+
|
85
121
|
def export_genre
|
86
|
-
self.class.to_s
|
122
|
+
self.class.to_s
|
87
123
|
end
|
88
124
|
|
89
125
|
def origin_class
|
90
126
|
batch_context[:origin_class]&.constantize || self.class
|
91
127
|
end
|
92
128
|
|
129
|
+
def schema
|
130
|
+
return origin_class::SCHEMA if defined?(origin_class::SCHEMA)
|
131
|
+
raise NotImplementedError
|
132
|
+
end
|
133
|
+
|
134
|
+
def schema_digest
|
135
|
+
Digest::MD5.hexdigest(schema.to_json)[0...8]
|
136
|
+
end
|
137
|
+
|
93
138
|
def table_is_incremental?(table_def)
|
139
|
+
return false unless incremental_since.present?
|
140
|
+
|
141
|
+
# TODO Return false if table's schema changes
|
94
142
|
if (inc = table_def[:incremental]).present?
|
95
143
|
differ = inc[:if]
|
96
144
|
return !!incremental_since if differ.nil?
|
@@ -119,7 +167,7 @@ module InstDataShipper
|
|
119
167
|
|
120
168
|
value = Array(value).compact
|
121
169
|
|
122
|
-
|
170
|
+
schema[:tables].each do |ts|
|
123
171
|
return ts if value.include?(ts[key])
|
124
172
|
end
|
125
173
|
end
|
@@ -144,6 +192,14 @@ module InstDataShipper
|
|
144
192
|
raise NotImplementedError
|
145
193
|
end
|
146
194
|
|
195
|
+
def auto_enqueue_from_schema
|
196
|
+
schema[:tables].each do |table_def|
|
197
|
+
src = table_def[:sourcer]
|
198
|
+
next unless src.present?
|
199
|
+
instance_exec(table_def, &src)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
147
203
|
def upload_data(table_def, extra: nil, &datagen)
|
148
204
|
# Allow muxing, allowing a hook to prevent some files going to certain destinations
|
149
205
|
dests = destinations_for_table(table_def)
|
@@ -207,11 +263,6 @@ module InstDataShipper
|
|
207
263
|
|
208
264
|
# Helper Methods
|
209
265
|
|
210
|
-
def table_schemas
|
211
|
-
return origin_class::TABLE_SCHEMAS if defined?(origin_class::TABLE_SCHEMAS)
|
212
|
-
raise NotImplementedError
|
213
|
-
end
|
214
|
-
|
215
266
|
def delayed(mthd, *args, **kwargs)
|
216
267
|
Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
217
268
|
end
|
@@ -231,7 +282,7 @@ module InstDataShipper
|
|
231
282
|
end
|
232
283
|
|
233
284
|
def destinations
|
234
|
-
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
285
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
|
235
286
|
dcls = InstDataShipper.resolve_destination(dest)
|
236
287
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
237
288
|
end
|
@@ -2,21 +2,28 @@ module InstDataShipper
|
|
2
2
|
# This class ends up fill two roles - Schema and Mapping.
|
3
3
|
# It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
|
4
4
|
class SchemaBuilder
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :schema
|
6
6
|
|
7
7
|
def initialize
|
8
|
-
@
|
8
|
+
@schema = {
|
9
|
+
tables: [],
|
10
|
+
}
|
9
11
|
end
|
10
12
|
|
11
13
|
def self.build(&block)
|
12
14
|
builder = new
|
13
15
|
builder.instance_exec(&block)
|
14
|
-
builder.
|
16
|
+
builder.schema
|
17
|
+
end
|
18
|
+
|
19
|
+
def version(version)
|
20
|
+
@schema[:version] = version
|
15
21
|
end
|
16
22
|
|
17
|
-
def extend_table_builder(&block)
|
23
|
+
def extend_table_builder(modul = nil, &block)
|
18
24
|
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
19
|
-
@table_builder_class.class_eval(&block)
|
25
|
+
@table_builder_class.class_eval(&block) if block.present?
|
26
|
+
@table_builder_class.extend(modul) if modul.present?
|
20
27
|
end
|
21
28
|
|
22
29
|
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
@@ -36,6 +43,7 @@ module InstDataShipper
|
|
36
43
|
|
37
44
|
tdef[:query] = model_or_name
|
38
45
|
tdef[:model] = model_or_name.model
|
46
|
+
tdef[:warehouse_name] = model_or_name.model.table_name
|
39
47
|
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
40
48
|
tdef[:warehouse_name] = model_or_name.table_name
|
41
49
|
tdef[:model] = model_or_name
|
@@ -43,9 +51,9 @@ module InstDataShipper
|
|
43
51
|
tdef[:warehouse_name] = model_or_name
|
44
52
|
end
|
45
53
|
|
46
|
-
@table_builder_class.build(tdef, &block)
|
54
|
+
(@table_builder_class || TableSchemaBuilder).build(tdef, &block)
|
47
55
|
|
48
|
-
@tables << tdef
|
56
|
+
@schema[:tables] << tdef
|
49
57
|
|
50
58
|
tdef
|
51
59
|
end
|
@@ -61,14 +69,18 @@ module InstDataShipper
|
|
61
69
|
def self.build(tdef, &block)
|
62
70
|
builder = new(tdef)
|
63
71
|
builder.instance_exec(&block)
|
64
|
-
builder.
|
72
|
+
builder.options
|
73
|
+
end
|
74
|
+
|
75
|
+
def annotate(key, value)
|
76
|
+
options[key] = value
|
65
77
|
end
|
66
78
|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
def version(version)
|
80
|
+
options[:version] = version
|
81
|
+
end
|
70
82
|
|
71
|
-
def incremental(scope=
|
83
|
+
def incremental(scope=nil, **kwargs)
|
72
84
|
if (extras = kwargs.keys - %i[on if]).present?
|
73
85
|
raise ArgumentError, "Unsuppored options: #{extras.inspect}"
|
74
86
|
end
|
@@ -80,6 +92,17 @@ module InstDataShipper
|
|
80
92
|
}
|
81
93
|
end
|
82
94
|
|
95
|
+
def source(source, override_model=nil, **kwargs)
|
96
|
+
raise "Source already set" if options[:sourcer].present?
|
97
|
+
|
98
|
+
if source.is_a?(Symbol)
|
99
|
+
mthd = :"import_#{source}"
|
100
|
+
options = self.options
|
101
|
+
source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
|
102
|
+
end
|
103
|
+
options[:sourcer] = source
|
104
|
+
end
|
105
|
+
|
83
106
|
def column(name, *args, refs: [], from: nil, **extra, &block)
|
84
107
|
from ||= name.to_s
|
85
108
|
|
data/lib/inst_data_shipper.rb
CHANGED
@@ -39,6 +39,7 @@ module InstDataShipper
|
|
39
39
|
|
40
40
|
def logger
|
41
41
|
return @logger if defined? @logger
|
42
|
+
# TODO Annotate logs with DumpBatch ID
|
42
43
|
@logger = Logger.new(STDOUT)
|
43
44
|
@logger.level = Logger::DEBUG
|
44
45
|
@logger
|
@@ -49,7 +50,7 @@ module InstDataShipper
|
|
49
50
|
end
|
50
51
|
|
51
52
|
def redis_prefix
|
52
|
-
pfx = "
|
53
|
+
pfx = "ids"
|
53
54
|
pfx = "#{Apartment::Tenant.current}:#{pfx}" if defined?(Apartment)
|
54
55
|
pfx
|
55
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -395,11 +395,11 @@ files:
|
|
395
395
|
- lib/inst_data_shipper/destinations/concerns/chunking.rb
|
396
396
|
- lib/inst_data_shipper/destinations/hosted_data.rb
|
397
397
|
- lib/inst_data_shipper/destinations/s3.rb
|
398
|
+
- lib/inst_data_shipper/destinations/speccable.rb
|
398
399
|
- lib/inst_data_shipper/dumper.rb
|
399
400
|
- lib/inst_data_shipper/engine.rb
|
400
401
|
- lib/inst_data_shipper/jobs/async_caller.rb
|
401
402
|
- lib/inst_data_shipper/jobs/base.rb
|
402
|
-
- lib/inst_data_shipper/jobs/basic_dump_job.rb
|
403
403
|
- lib/inst_data_shipper/record.rb
|
404
404
|
- lib/inst_data_shipper/schema_builder.rb
|
405
405
|
- lib/inst_data_shipper/version.rb
|
@@ -436,9 +436,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
436
436
|
version: '0'
|
437
437
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
438
438
|
requirements:
|
439
|
-
- - "
|
439
|
+
- - ">="
|
440
440
|
- !ruby/object:Gem::Version
|
441
|
-
version:
|
441
|
+
version: '0'
|
442
442
|
requirements: []
|
443
443
|
rubygems_version: 3.1.6
|
444
444
|
signing_key:
|