inst_data_shipper 0.1.0.beta2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +152 -1
- data/db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb +3 -2
- data/lib/inst_data_shipper/basic_dumper.rb +1 -1
- data/lib/inst_data_shipper/data_sources/base.rb +25 -1
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +1 -1
- data/lib/inst_data_shipper/data_sources/local_tables.rb +13 -10
- data/lib/inst_data_shipper/destinations/base.rb +6 -2
- data/lib/inst_data_shipper/destinations/concerns/chunking.rb +2 -2
- data/lib/inst_data_shipper/destinations/hosted_data.rb +35 -2
- data/lib/inst_data_shipper/destinations/speccable.rb +17 -0
- data/lib/inst_data_shipper/dumper.rb +63 -12
- data/lib/inst_data_shipper/schema_builder.rb +35 -12
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +2 -1
- metadata +5 -5
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
|
4
|
+
data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
|
7
|
+
data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# InstDataShipper
|
2
2
|
|
3
|
-
This gem is intended to facilitate
|
3
|
+
This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,6 +16,157 @@ Then run the migrations:
|
|
16
16
|
bundle exec rake db:migrate
|
17
17
|
```
|
18
18
|
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Dumper
|
22
|
+
|
23
|
+
The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
|
24
|
+
|
25
|
+
Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
|
26
|
+
```ruby
|
27
|
+
class HostedDataPushJob < ApplicationJob
|
28
|
+
# The schema serves two purposes: defining the schema and mapping data
|
29
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
30
|
+
# You can augment the Table-builder DSL with custom methods like so:
|
31
|
+
extend_table_builder do
|
32
|
+
# It may be useful to define a custom column definition helpers:
|
33
|
+
def custom_column(*args, from: nil, **kwargs, &blk)
|
34
|
+
# In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
|
35
|
+
# to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
|
36
|
+
from ||= args[0].to_s
|
37
|
+
from = ->(row) { row.data[from] } if from.is_a?(String)
|
38
|
+
column(*args, **kwargs, from: from, &blk)
|
39
|
+
end
|
40
|
+
|
41
|
+
# `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
|
42
|
+
include SomeConcern
|
43
|
+
end
|
44
|
+
|
45
|
+
table(ALocalModel, "<TABLE DESCRIPTION>") do
|
46
|
+
# If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
|
47
|
+
# The first argument "scope" can be interpreted in different ways:
|
48
|
+
# If exporting a local model it may be a: (default: `updated_at`)
|
49
|
+
# Proc that will receive a Relation and return a Relation (use `incremental_since`)
|
50
|
+
# String of a column to compare with `incremental_since`
|
51
|
+
# If exporting a Canvas report it may be a: (default: `updated_after`)
|
52
|
+
# Proc that will receive report params and return modified report params (use `incremental_since`)
|
53
|
+
# String of a report param to set to `incremental_since`
|
54
|
+
# `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
|
55
|
+
# `if:` may be a Proc or a Symbol (of a method on the Dumper)
|
56
|
+
incremental "updated_at", on: [:id], if: ->() {}
|
57
|
+
|
58
|
+
# Schema's may declaratively define the data source.
|
59
|
+
# This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
|
60
|
+
# In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
|
61
|
+
source :local_table
|
62
|
+
# A Proc can also be passed. The below is equivalent to the above
|
63
|
+
source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
|
64
|
+
|
65
|
+
column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
|
66
|
+
|
67
|
+
# The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
|
68
|
+
custom_column :name, :"varchar(128)"
|
69
|
+
|
70
|
+
# `from:` May be...
|
71
|
+
# A Symbol of a method to be called on the record
|
72
|
+
custom_column :sis_type, :"varchar(32)", from: :some_model_method
|
73
|
+
# A String of a column to read from the record
|
74
|
+
custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
|
75
|
+
# A Proc to be called with each record
|
76
|
+
custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
|
77
|
+
# Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
|
78
|
+
custom_column :sis_type, :"varchar(32)"
|
79
|
+
end
|
80
|
+
|
81
|
+
table("my_table", model: ALocalModel) do
|
82
|
+
# ...
|
83
|
+
end
|
84
|
+
|
85
|
+
table("proserv_student_submissions_csv") do
|
86
|
+
column :canvas_id, :bigint, from: "canvas user id"
|
87
|
+
column :sis_id, :"varchar(64)", from: "sis user id"
|
88
|
+
column :name, :"varchar(64)", from: "user name"
|
89
|
+
column :submission_id, :bigint, from: "submission id"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
|
94
|
+
InstDataShipper::DataSources::LocalTables,
|
95
|
+
InstDataShipper::DataSources::CanvasReports,
|
96
|
+
]) do
|
97
|
+
import_local_table(ALocalModel)
|
98
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
99
|
+
|
100
|
+
# If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
|
101
|
+
import_local_table(SomeModel, schema_name: "my_table")
|
102
|
+
import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
|
103
|
+
|
104
|
+
# Iterate through the Tables defined in the Schema and apply any defined `source` statements.
|
105
|
+
# This is the default behavior if `define()` is called w/o a block.
|
106
|
+
auto_enqueue_from_schema
|
107
|
+
end
|
108
|
+
|
109
|
+
def perform
|
110
|
+
Dumper.perform_dump([
|
111
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
112
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
113
|
+
])
|
114
|
+
end
|
115
|
+
end
|
116
|
+
```
|
117
|
+
|
118
|
+
`Dumper`s may also be formed as a normal Ruby subclass:
|
119
|
+
```ruby
|
120
|
+
class HostedDataPushJob < ApplicationJob
|
121
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
122
|
+
# ...
|
123
|
+
end
|
124
|
+
|
125
|
+
class Dumper < InstDataShipper::Dumper
|
126
|
+
include InstDataShipper::DataSources::LocalTables
|
127
|
+
include InstDataShipper::DataSources::CanvasReports
|
128
|
+
|
129
|
+
def enqueue_tasks
|
130
|
+
import_local_table(ALocalModel)
|
131
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
132
|
+
|
133
|
+
# auto_enqueue_from_schema
|
134
|
+
end
|
135
|
+
|
136
|
+
def table_schemas
|
137
|
+
SCHEMA
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def perform
|
142
|
+
Dumper.perform_dump([
|
143
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
144
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
145
|
+
])
|
146
|
+
end
|
147
|
+
end
|
148
|
+
```
|
149
|
+
|
150
|
+
### Destinations
|
151
|
+
|
152
|
+
This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
|
153
|
+
|
154
|
+
Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
|
155
|
+
|
156
|
+
Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
|
157
|
+
|
158
|
+
#### Hosted Data
|
159
|
+
`hosted-data://<JWT>@<HOSTED DATA SERVER>`
|
160
|
+
|
161
|
+
##### Optional Parameters:
|
162
|
+
- `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
|
163
|
+
|
164
|
+
#### S3
|
165
|
+
`s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
|
166
|
+
|
167
|
+
##### Optional Parameters:
|
168
|
+
_None_
|
169
|
+
|
19
170
|
## Development
|
20
171
|
|
21
172
|
When adding to or updating this gem, make sure you do the following:
|
@@ -7,11 +7,12 @@ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
|
7
7
|
|
8
8
|
t.string :job_class
|
9
9
|
t.string :genre
|
10
|
+
t.string :batch_id
|
10
11
|
|
11
12
|
t.string :exception
|
12
13
|
t.text :backtrace
|
13
|
-
t.text :metadata
|
14
|
-
t.text :job_arguments
|
14
|
+
# t.text :metadata
|
15
|
+
# t.text :job_arguments
|
15
16
|
|
16
17
|
t.timestamps
|
17
18
|
end
|
@@ -1,7 +1,31 @@
|
|
1
1
|
module InstDataShipper
|
2
2
|
module DataSources
|
3
3
|
module Base
|
4
|
-
|
4
|
+
# This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
|
8
|
+
base.extend ModuleHelperMethods
|
9
|
+
base.send(:include, Concern)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Concern
|
13
|
+
extend ActiveSupport::Concern
|
14
|
+
end
|
15
|
+
|
16
|
+
module ModuleHelperMethods
|
17
|
+
def delayed(mthd_sym)
|
18
|
+
mthd = instance_method(mthd_sym)
|
19
|
+
pmthd_sym = :"_delayed_#{mthd_sym}"
|
20
|
+
|
21
|
+
alias_method pmthd_sym, mthd_sym
|
22
|
+
private pmthd_sym
|
23
|
+
|
24
|
+
define_method(mthd_sym) do |*args, **kwargs|
|
25
|
+
delayed(pmthd_sym, *args, **kwargs)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
5
29
|
end
|
6
30
|
end
|
7
31
|
end
|
@@ -2,17 +2,11 @@ module InstDataShipper
|
|
2
2
|
module DataSources
|
3
3
|
# This module contains the logic for processing local AR tables
|
4
4
|
module LocalTables
|
5
|
-
|
5
|
+
include Base
|
6
6
|
|
7
7
|
public
|
8
8
|
|
9
|
-
def import_local_table(
|
10
|
-
delayed(:_import_local_table, *args, **kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def _import_local_table(model, schema_name: nil)
|
9
|
+
delayed def import_local_table(model, schema_name: nil)
|
16
10
|
model = model.safe_constantize if model.is_a?(String)
|
17
11
|
|
18
12
|
table_def = lookup_table_schema!(schema_name, { model: model })
|
@@ -22,7 +16,12 @@ module InstDataShipper
|
|
22
16
|
query = _resolve_model_query(query, table_def[:query])
|
23
17
|
|
24
18
|
if table_is_incremental?(table_def)
|
25
|
-
query = _resolve_model_query(
|
19
|
+
query = _resolve_model_query(
|
20
|
+
query,
|
21
|
+
table_def.dig(:incremental, :scope),
|
22
|
+
string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
|
23
|
+
default: "updated_at",
|
24
|
+
)
|
26
25
|
end
|
27
26
|
|
28
27
|
query.find_each do |m|
|
@@ -35,7 +34,11 @@ module InstDataShipper
|
|
35
34
|
upload_data(table_def, &inner_block)
|
36
35
|
end
|
37
36
|
|
38
|
-
|
37
|
+
private
|
38
|
+
|
39
|
+
def _resolve_model_query(relation, query, string: nil, default: nil)
|
40
|
+
return relation if query == false
|
41
|
+
query = default if query.nil?
|
39
42
|
return relation if query.nil?
|
40
43
|
|
41
44
|
if query.is_a?(Symbol)
|
@@ -3,7 +3,7 @@ module InstDataShipper
|
|
3
3
|
class Base
|
4
4
|
attr_reader :dumper
|
5
5
|
|
6
|
-
delegate :tracker, :
|
6
|
+
delegate :tracker, :schema, :working_dir, to: :dumper
|
7
7
|
|
8
8
|
def initialize(cache_key, config, dumper)
|
9
9
|
@cache_key = cache_key
|
@@ -11,9 +11,13 @@ module InstDataShipper
|
|
11
11
|
@dumper = dumper
|
12
12
|
end
|
13
13
|
|
14
|
+
# This method is called before taking any actions.
|
15
|
+
# It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
|
16
|
+
def preinitialize_dump(context); end
|
17
|
+
|
14
18
|
# This method is called before processing any data.
|
15
19
|
# It should be used to initialize any external resources needed for the dump.
|
16
|
-
def initialize_dump; end
|
20
|
+
def initialize_dump(context); end
|
17
21
|
|
18
22
|
# Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
|
19
23
|
#
|
@@ -7,7 +7,7 @@ module InstDataShipper
|
|
7
7
|
DEFAULT_CHUNK_SIZE = 100_000
|
8
8
|
|
9
9
|
def chunk_data(generator, chunk_size: nil, **kwargs)
|
10
|
-
chunk_size ||= config
|
10
|
+
chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
11
11
|
slice = 1
|
12
12
|
|
13
13
|
btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
|
@@ -24,7 +24,7 @@ module InstDataShipper
|
|
24
24
|
|
25
25
|
def group_key
|
26
26
|
super.tap do |k|
|
27
|
-
k[:chunk_size] = config
|
27
|
+
k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -5,11 +5,43 @@ module InstDataShipper
|
|
5
5
|
class HostedData < Base
|
6
6
|
include Concerns::Chunking
|
7
7
|
|
8
|
-
def
|
8
|
+
def preinitialize_dump(context)
|
9
|
+
if context[:incremental_since].present?
|
10
|
+
begin
|
11
|
+
last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
|
12
|
+
status: 'imported',
|
13
|
+
# schema_version: convert_schema[:version],
|
14
|
+
tags: [
|
15
|
+
"ids-schema=#{dumper.schema_digest}",
|
16
|
+
"ids-genre=#{dumper.export_genre}",
|
17
|
+
],
|
18
|
+
}).body.with_indifferent_access
|
19
|
+
|
20
|
+
if last_dump[:created_at] < context[:incremental_since]
|
21
|
+
InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
|
22
|
+
context[:incremental_since] = last_dump[:created_at]
|
23
|
+
end
|
24
|
+
rescue Faraday::ResourceNotFound
|
25
|
+
# TODO It'd be nice to make this per-table
|
26
|
+
InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
|
27
|
+
context[:incremental_since] = nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize_dump(context)
|
33
|
+
tags = [
|
34
|
+
"ids-schema=#{dumper.schema_digest}",
|
35
|
+
"ids-genre=#{dumper.export_genre}",
|
36
|
+
]
|
37
|
+
tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
|
38
|
+
tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
|
39
|
+
|
9
40
|
dump = hosted_data_client.post(
|
10
41
|
'api/v1/custom_dumps/',
|
11
42
|
reference_id: tracker.id,
|
12
43
|
schema: convert_schema,
|
44
|
+
tags: tags,
|
13
45
|
).body.with_indifferent_access
|
14
46
|
|
15
47
|
redis.hset(rk(:state), :dump_id, dump[:id])
|
@@ -62,6 +94,7 @@ module InstDataShipper
|
|
62
94
|
|
63
95
|
def convert_schema
|
64
96
|
definititions = {}
|
97
|
+
table_schemas = schema[:tables]
|
65
98
|
table_schemas.each do |ts|
|
66
99
|
ts = ts.dup
|
67
100
|
tname = table_name(ts)
|
@@ -86,7 +119,7 @@ module InstDataShipper
|
|
86
119
|
end
|
87
120
|
|
88
121
|
{
|
89
|
-
version: "#{dumper.
|
122
|
+
version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
|
90
123
|
definition: definititions,
|
91
124
|
}
|
92
125
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class Speccable < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def chunk_data(generator, table:, extra: nil)
|
7
|
+
super(generator) do |batch, idx|
|
8
|
+
yield batch
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def upload_data_chunk(table_def, chunk); end
|
13
|
+
|
14
|
+
def parse_configuration(uri); end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -18,8 +18,34 @@ module InstDataShipper
|
|
18
18
|
Class.new(self) do
|
19
19
|
include(*include)
|
20
20
|
|
21
|
+
if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
|
22
|
+
blk = -> { auto_enqueue_from_schema }
|
23
|
+
elsif blk.nil?
|
24
|
+
raise ArgumentError, "Must provide a block or a schema with source definitions"
|
25
|
+
end
|
26
|
+
|
21
27
|
define_method(:enqueue_tasks, &blk)
|
22
|
-
define_method(:
|
28
|
+
define_method(:schema) { schema }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.current(executor: nil)
|
33
|
+
cur_batch = Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
|
34
|
+
ctx = cur_batch&.context || {}
|
35
|
+
return nil unless ctx[:origin_class].present? && ctx[:tracker_id].present?
|
36
|
+
|
37
|
+
clazz = ctx[:origin_class]
|
38
|
+
clazz = clazz.constantize if clazz.is_a?(String)
|
39
|
+
clazz.new(executor: executor)
|
40
|
+
end
|
41
|
+
|
42
|
+
if defined?(Rails) && Rails.env.test?
|
43
|
+
def for_specs!
|
44
|
+
@raw_destinations = ["speccable://nil"]
|
45
|
+
@executor = InstDataShipper::Jobs::AsyncCaller.new
|
46
|
+
@tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
47
|
+
define_singleton_method(:spec_destination) { destinations.first }
|
48
|
+
self
|
23
49
|
end
|
24
50
|
end
|
25
51
|
|
@@ -31,15 +57,18 @@ module InstDataShipper
|
|
31
57
|
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
32
58
|
|
33
59
|
@batch_context = context = {
|
34
|
-
# TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
|
35
60
|
# TODO Consider behavior if last is still running
|
36
|
-
incremental_since:
|
61
|
+
incremental_since: last_successful_tracker&.created_at,
|
37
62
|
}
|
38
63
|
|
64
|
+
destinations.each do |dest|
|
65
|
+
dest.preinitialize_dump(context)
|
66
|
+
end
|
67
|
+
|
39
68
|
begin
|
40
69
|
begin
|
41
70
|
destinations.each do |dest|
|
42
|
-
dest.initialize_dump()
|
71
|
+
dest.initialize_dump(context)
|
43
72
|
end
|
44
73
|
|
45
74
|
run_hook(:initialize_dump_batch, context)
|
@@ -52,6 +81,7 @@ module InstDataShipper
|
|
52
81
|
|
53
82
|
Sidekiq::Batch.new.tap do |batch|
|
54
83
|
context[:root_bid] = batch.bid
|
84
|
+
tracker.update(batch_id: batch.bid)
|
55
85
|
|
56
86
|
batch.description = "HD #{export_genre} Export #{tracker.id} Root"
|
57
87
|
batch.context = context
|
@@ -62,6 +92,7 @@ module InstDataShipper
|
|
62
92
|
rescue => ex
|
63
93
|
delayed :cleanup_fatal_error!
|
64
94
|
InstDataShipper.handle_suppressed_error(ex)
|
95
|
+
tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
|
65
96
|
end
|
66
97
|
end
|
67
98
|
rescue => ex
|
@@ -74,6 +105,7 @@ module InstDataShipper
|
|
74
105
|
end
|
75
106
|
end
|
76
107
|
end
|
108
|
+
tracker.update(status: 'failed', exception: ex.message, backtrace: ex.backtrace.join("\n"))
|
77
109
|
raise ex
|
78
110
|
end
|
79
111
|
end
|
@@ -82,15 +114,31 @@ module InstDataShipper
|
|
82
114
|
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
83
115
|
end
|
84
116
|
|
117
|
+
def last_successful_tracker
|
118
|
+
@last_successful_tracker ||= DumpBatch.where(job_class: self.class.to_s, genre: export_genre, status: 'completed').order(created_at: :desc).first
|
119
|
+
end
|
120
|
+
|
85
121
|
def export_genre
|
86
|
-
self.class.to_s
|
122
|
+
self.class.to_s
|
87
123
|
end
|
88
124
|
|
89
125
|
def origin_class
|
90
126
|
batch_context[:origin_class]&.constantize || self.class
|
91
127
|
end
|
92
128
|
|
129
|
+
def schema
|
130
|
+
return origin_class::SCHEMA if defined?(origin_class::SCHEMA)
|
131
|
+
raise NotImplementedError
|
132
|
+
end
|
133
|
+
|
134
|
+
def schema_digest
|
135
|
+
Digest::MD5.hexdigest(schema.to_json)[0...8]
|
136
|
+
end
|
137
|
+
|
93
138
|
def table_is_incremental?(table_def)
|
139
|
+
return false unless incremental_since.present?
|
140
|
+
|
141
|
+
# TODO Return false if table's schema changes
|
94
142
|
if (inc = table_def[:incremental]).present?
|
95
143
|
differ = inc[:if]
|
96
144
|
return !!incremental_since if differ.nil?
|
@@ -119,7 +167,7 @@ module InstDataShipper
|
|
119
167
|
|
120
168
|
value = Array(value).compact
|
121
169
|
|
122
|
-
|
170
|
+
schema[:tables].each do |ts|
|
123
171
|
return ts if value.include?(ts[key])
|
124
172
|
end
|
125
173
|
end
|
@@ -144,6 +192,14 @@ module InstDataShipper
|
|
144
192
|
raise NotImplementedError
|
145
193
|
end
|
146
194
|
|
195
|
+
def auto_enqueue_from_schema
|
196
|
+
schema[:tables].each do |table_def|
|
197
|
+
src = table_def[:sourcer]
|
198
|
+
next unless src.present?
|
199
|
+
instance_exec(table_def, &src)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
147
203
|
def upload_data(table_def, extra: nil, &datagen)
|
148
204
|
# Allow muxing, allowing a hook to prevent some files going to certain destinations
|
149
205
|
dests = destinations_for_table(table_def)
|
@@ -207,11 +263,6 @@ module InstDataShipper
|
|
207
263
|
|
208
264
|
# Helper Methods
|
209
265
|
|
210
|
-
def table_schemas
|
211
|
-
return origin_class::TABLE_SCHEMAS if defined?(origin_class::TABLE_SCHEMAS)
|
212
|
-
raise NotImplementedError
|
213
|
-
end
|
214
|
-
|
215
266
|
def delayed(mthd, *args, **kwargs)
|
216
267
|
Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
217
268
|
end
|
@@ -231,7 +282,7 @@ module InstDataShipper
|
|
231
282
|
end
|
232
283
|
|
233
284
|
def destinations
|
234
|
-
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
285
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
|
235
286
|
dcls = InstDataShipper.resolve_destination(dest)
|
236
287
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
237
288
|
end
|
@@ -2,21 +2,28 @@ module InstDataShipper
|
|
2
2
|
# This class ends up fill two roles - Schema and Mapping.
|
3
3
|
# It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
|
4
4
|
class SchemaBuilder
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :schema
|
6
6
|
|
7
7
|
def initialize
|
8
|
-
@
|
8
|
+
@schema = {
|
9
|
+
tables: [],
|
10
|
+
}
|
9
11
|
end
|
10
12
|
|
11
13
|
def self.build(&block)
|
12
14
|
builder = new
|
13
15
|
builder.instance_exec(&block)
|
14
|
-
builder.
|
16
|
+
builder.schema
|
17
|
+
end
|
18
|
+
|
19
|
+
def version(version)
|
20
|
+
@schema[:version] = version
|
15
21
|
end
|
16
22
|
|
17
|
-
def extend_table_builder(&block)
|
23
|
+
def extend_table_builder(modul = nil, &block)
|
18
24
|
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
19
|
-
@table_builder_class.class_eval(&block)
|
25
|
+
@table_builder_class.class_eval(&block) if block.present?
|
26
|
+
@table_builder_class.extend(modul) if modul.present?
|
20
27
|
end
|
21
28
|
|
22
29
|
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
@@ -36,6 +43,7 @@ module InstDataShipper
|
|
36
43
|
|
37
44
|
tdef[:query] = model_or_name
|
38
45
|
tdef[:model] = model_or_name.model
|
46
|
+
tdef[:warehouse_name] = model_or_name.model.table_name
|
39
47
|
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
40
48
|
tdef[:warehouse_name] = model_or_name.table_name
|
41
49
|
tdef[:model] = model_or_name
|
@@ -43,9 +51,9 @@ module InstDataShipper
|
|
43
51
|
tdef[:warehouse_name] = model_or_name
|
44
52
|
end
|
45
53
|
|
46
|
-
@table_builder_class.build(tdef, &block)
|
54
|
+
(@table_builder_class || TableSchemaBuilder).build(tdef, &block)
|
47
55
|
|
48
|
-
@tables << tdef
|
56
|
+
@schema[:tables] << tdef
|
49
57
|
|
50
58
|
tdef
|
51
59
|
end
|
@@ -61,14 +69,18 @@ module InstDataShipper
|
|
61
69
|
def self.build(tdef, &block)
|
62
70
|
builder = new(tdef)
|
63
71
|
builder.instance_exec(&block)
|
64
|
-
builder.
|
72
|
+
builder.options
|
73
|
+
end
|
74
|
+
|
75
|
+
def annotate(key, value)
|
76
|
+
options[key] = value
|
65
77
|
end
|
66
78
|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
def version(version)
|
80
|
+
options[:version] = version
|
81
|
+
end
|
70
82
|
|
71
|
-
def incremental(scope=
|
83
|
+
def incremental(scope=nil, **kwargs)
|
72
84
|
if (extras = kwargs.keys - %i[on if]).present?
|
73
85
|
raise ArgumentError, "Unsuppored options: #{extras.inspect}"
|
74
86
|
end
|
@@ -80,6 +92,17 @@ module InstDataShipper
|
|
80
92
|
}
|
81
93
|
end
|
82
94
|
|
95
|
+
def source(source, override_model=nil, **kwargs)
|
96
|
+
raise "Source already set" if options[:sourcer].present?
|
97
|
+
|
98
|
+
if source.is_a?(Symbol)
|
99
|
+
mthd = :"import_#{source}"
|
100
|
+
options = self.options
|
101
|
+
source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
|
102
|
+
end
|
103
|
+
options[:sourcer] = source
|
104
|
+
end
|
105
|
+
|
83
106
|
def column(name, *args, refs: [], from: nil, **extra, &block)
|
84
107
|
from ||= name.to_s
|
85
108
|
|
data/lib/inst_data_shipper.rb
CHANGED
@@ -39,6 +39,7 @@ module InstDataShipper
|
|
39
39
|
|
40
40
|
def logger
|
41
41
|
return @logger if defined? @logger
|
42
|
+
# TODO Annotate logs with DumpBatch ID
|
42
43
|
@logger = Logger.new(STDOUT)
|
43
44
|
@logger.level = Logger::DEBUG
|
44
45
|
@logger
|
@@ -49,7 +50,7 @@ module InstDataShipper
|
|
49
50
|
end
|
50
51
|
|
51
52
|
def redis_prefix
|
52
|
-
pfx = "
|
53
|
+
pfx = "ids"
|
53
54
|
pfx = "#{Apartment::Tenant.current}:#{pfx}" if defined?(Apartment)
|
54
55
|
pfx
|
55
56
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -395,11 +395,11 @@ files:
|
|
395
395
|
- lib/inst_data_shipper/destinations/concerns/chunking.rb
|
396
396
|
- lib/inst_data_shipper/destinations/hosted_data.rb
|
397
397
|
- lib/inst_data_shipper/destinations/s3.rb
|
398
|
+
- lib/inst_data_shipper/destinations/speccable.rb
|
398
399
|
- lib/inst_data_shipper/dumper.rb
|
399
400
|
- lib/inst_data_shipper/engine.rb
|
400
401
|
- lib/inst_data_shipper/jobs/async_caller.rb
|
401
402
|
- lib/inst_data_shipper/jobs/base.rb
|
402
|
-
- lib/inst_data_shipper/jobs/basic_dump_job.rb
|
403
403
|
- lib/inst_data_shipper/record.rb
|
404
404
|
- lib/inst_data_shipper/schema_builder.rb
|
405
405
|
- lib/inst_data_shipper/version.rb
|
@@ -436,9 +436,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
436
436
|
version: '0'
|
437
437
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
438
438
|
requirements:
|
439
|
-
- - "
|
439
|
+
- - ">="
|
440
440
|
- !ruby/object:Gem::Version
|
441
|
-
version:
|
441
|
+
version: '0'
|
442
442
|
requirements: []
|
443
443
|
rubygems_version: 3.1.6
|
444
444
|
signing_key:
|