inst_data_shipper 0.1.0.beta1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +139 -1
- data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} +6 -3
- data/lib/inst_data_shipper/basic_dumper.rb +2 -2
- data/lib/inst_data_shipper/concerns/hooks.rb +18 -4
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +58 -21
- data/lib/inst_data_shipper/data_sources/local_tables.rb +35 -7
- data/lib/inst_data_shipper/destinations/base.rb +33 -6
- data/lib/inst_data_shipper/destinations/hosted_data.rb +63 -19
- data/lib/inst_data_shipper/destinations/s3.rb +1 -1
- data/lib/inst_data_shipper/dumper.rb +158 -50
- data/lib/inst_data_shipper/engine.rb +6 -0
- data/lib/inst_data_shipper/jobs/async_caller.rb +10 -2
- data/lib/inst_data_shipper/schema_builder.rb +99 -37
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +13 -3
- data/spec/spec_helper.rb +2 -2
- metadata +22 -9
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +0 -11
- /data/app/models/{hosted_data_dumper → inst_data_shipper}/dump_batch.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
|
4
|
+
data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
|
7
|
+
data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# InstDataShipper
|
2
2
|
|
3
|
-
This gem is intended to facilitate
|
3
|
+
This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,6 +16,144 @@ Then run the migrations:
|
|
16
16
|
bundle exec rake db:migrate
|
17
17
|
```
|
18
18
|
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
### Dumper
|
22
|
+
|
23
|
+
The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
|
24
|
+
|
25
|
+
Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
|
26
|
+
```ruby
|
27
|
+
class HostedDataPushJob < ApplicationJob
|
28
|
+
# The schema serves two purposes: defining the schema and mapping data
|
29
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
30
|
+
# You can augment the Table-builder DSL with custom methods like so:
|
31
|
+
extend_table_builder do
|
32
|
+
# It may be useful to define a custom column definition helpers:
|
33
|
+
def custom_column(*args, from: nil, **kwargs, &blk)
|
34
|
+
# In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
|
35
|
+
# to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
|
36
|
+
from ||= args[0].to_s
|
37
|
+
from = ->(row) { row.data[from] } if from.is_a?(String)
|
38
|
+
column(*args, **kwargs, from: from, &blk)
|
39
|
+
end
|
40
|
+
|
41
|
+
# `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
|
42
|
+
include SomeConcern
|
43
|
+
end
|
44
|
+
|
45
|
+
table(ALocalModel, "<TABLE DESCRIPTION>") do
|
46
|
+
# If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
|
47
|
+
# The first argument "scope" can be interpreted in different ways:
|
48
|
+
# If exporting a local model it may be a: (default: `updated_at`)
|
49
|
+
# Proc that will receive a Relation and return a Relation (use `incremental_since`)
|
50
|
+
# String of a column to compare with `incremental_since`
|
51
|
+
# If exporting a Canvas report it may be a: (default: `updated_after`)
|
52
|
+
# Proc that will receive report params and return modified report params (use `incremental_since`)
|
53
|
+
# String of a report param to set to `incremental_since`
|
54
|
+
# `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
|
55
|
+
# `if:` may be a Proc or a Symbol (of a method on the Dumper)
|
56
|
+
incremental "updated_at", on: [:id], if: ->() {}
|
57
|
+
|
58
|
+
column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
|
59
|
+
|
60
|
+
# The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
|
61
|
+
custom_column :name, :"varchar(128)"
|
62
|
+
|
63
|
+
# `from:` May be...
|
64
|
+
# A Symbol of a method to be called on the record
|
65
|
+
custom_column :sis_type, :"varchar(32)", from: :some_model_method
|
66
|
+
# A String of a column to read from the record
|
67
|
+
custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
|
68
|
+
# A Proc to be called with each record
|
69
|
+
custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
|
70
|
+
# Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
|
71
|
+
custom_column :sis_type, :"varchar(32)"
|
72
|
+
end
|
73
|
+
|
74
|
+
table("my_table", model: ALocalModel) do
|
75
|
+
# ...
|
76
|
+
end
|
77
|
+
|
78
|
+
table("proserv_student_submissions_csv") do
|
79
|
+
column :canvas_id, :bigint, from: "canvas user id"
|
80
|
+
column :sis_id, :"varchar(64)", from: "sis user id"
|
81
|
+
column :name, :"varchar(64)", from: "user name"
|
82
|
+
column :submission_id, :bigint, from: "submission id"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
|
87
|
+
InstDataShipper::DataSources::LocalTables,
|
88
|
+
InstDataShipper::DataSources::CanvasReports,
|
89
|
+
]) do
|
90
|
+
import_local_table(ALocalModel)
|
91
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
92
|
+
|
93
|
+
# If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
|
94
|
+
import_local_table(SomeModel, schema_name: "my_table")
|
95
|
+
import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
|
96
|
+
end
|
97
|
+
|
98
|
+
def perform
|
99
|
+
Dumper.perform_dump([
|
100
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
101
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
102
|
+
])
|
103
|
+
end
|
104
|
+
end
|
105
|
+
```
|
106
|
+
|
107
|
+
`Dumper`s may also be formed as a normal Ruby subclass:
|
108
|
+
```ruby
|
109
|
+
class HostedDataPushJob < ApplicationJob
|
110
|
+
SCHEMA = InstDataShipper::SchemaBuilder.build do
|
111
|
+
# ...
|
112
|
+
end
|
113
|
+
|
114
|
+
class Dumper < InstDataShipper::Dumper
|
115
|
+
include InstDataShipper::DataSources::LocalTables
|
116
|
+
include InstDataShipper::DataSources::CanvasReports
|
117
|
+
|
118
|
+
def enqueue_tasks
|
119
|
+
import_local_table(ALocalModel)
|
120
|
+
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
121
|
+
end
|
122
|
+
|
123
|
+
def table_schemas
|
124
|
+
SCHEMA
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def perform
|
129
|
+
Dumper.perform_dump([
|
130
|
+
"hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
|
131
|
+
"s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
|
132
|
+
])
|
133
|
+
end
|
134
|
+
end
|
135
|
+
```
|
136
|
+
|
137
|
+
### Destinations
|
138
|
+
|
139
|
+
This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
|
140
|
+
|
141
|
+
Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
|
142
|
+
|
143
|
+
Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
|
144
|
+
|
145
|
+
#### Hosted Data
|
146
|
+
`hosted-data://<JWT>@<HOSTED DATA SERVER>`
|
147
|
+
|
148
|
+
##### Optional Parameters:
|
149
|
+
- `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
|
150
|
+
|
151
|
+
#### S3
|
152
|
+
`s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
|
153
|
+
|
154
|
+
##### Optional Parameters:
|
155
|
+
_None_
|
156
|
+
|
19
157
|
## Development
|
20
158
|
|
21
159
|
When adding to or updating this gem, make sure you do the following:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
2
2
|
def change
|
3
3
|
create_table :inst_data_shipper_dump_batches do |t|
|
4
4
|
t.datetime :started_at
|
@@ -6,10 +6,13 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
|
6
6
|
t.string :status
|
7
7
|
|
8
8
|
t.string :job_class
|
9
|
+
t.string :genre
|
10
|
+
t.string :batch_id
|
11
|
+
|
9
12
|
t.string :exception
|
10
13
|
t.text :backtrace
|
11
|
-
t.text :metadata
|
12
|
-
t.text :job_arguments
|
14
|
+
# t.text :metadata
|
15
|
+
# t.text :job_arguments
|
13
16
|
|
14
17
|
t.timestamps
|
15
18
|
end
|
@@ -9,21 +9,35 @@ module InstDataShipper
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def hook(name, prepend: false, &block)
|
12
|
+
_assert_hook_defined(name)
|
13
|
+
@hooks ||= {}
|
14
|
+
@hooks[name] ||= []
|
12
15
|
hooks = @hooks[name]
|
13
16
|
prepend ? hooks.unshift(block) : hooks << block
|
14
17
|
end
|
18
|
+
|
19
|
+
def _assert_hook_defined(name)
|
20
|
+
return true if @hooks&.key?(name)
|
21
|
+
return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
|
22
|
+
raise ArgumentError, "Hook #{name} is not defined"
|
23
|
+
end
|
24
|
+
|
25
|
+
def _list_hooks(name)
|
26
|
+
list = []
|
27
|
+
list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
|
28
|
+
list.push(*@hooks[name]) if (@hooks || {})[name]
|
29
|
+
list
|
30
|
+
end
|
15
31
|
end
|
16
32
|
|
17
33
|
def run_hook(name, *args, **kwargs)
|
18
|
-
|
19
|
-
hooks.each do |blk|
|
34
|
+
self.class._list_hooks(name).each do |blk|
|
20
35
|
instance_exec(*args, **kwargs, &blk)
|
21
36
|
end
|
22
37
|
end
|
23
38
|
|
24
39
|
def run_hook_safe(name, *args, **kwargs)
|
25
|
-
|
26
|
-
hooks.each do |blk|
|
40
|
+
self.class._list_hooks(name).each do |blk|
|
27
41
|
instance_exec(*args, **kwargs, &blk)
|
28
42
|
rescue StandardError
|
29
43
|
end
|
@@ -27,11 +27,25 @@ module InstDataShipper
|
|
27
27
|
_in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
|
28
28
|
end
|
29
29
|
|
30
|
-
def import_canvas_report_by_terms(
|
30
|
+
def import_canvas_report_by_terms(*args, **kwargs)
|
31
|
+
_in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
|
32
|
+
end
|
33
|
+
|
34
|
+
def import_existing_report(report, **kwargs)
|
35
|
+
delayed(:_process_canvas_report, report: report, **kwargs)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
|
31
41
|
term_ids = (terms || []).map do |term|
|
32
42
|
term.is_a?(Term) ? term.canvas_id : term
|
33
43
|
end
|
34
44
|
|
45
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
46
|
+
|
47
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
48
|
+
|
35
49
|
Sidekiq::Batch.new.tap do |b|
|
36
50
|
b.description = "Term Scoped #{report_name} Runners"
|
37
51
|
b.context = {
|
@@ -40,19 +54,21 @@ module InstDataShipper
|
|
40
54
|
b.jobs do
|
41
55
|
terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
|
42
56
|
terms_query.find_each do |t|
|
43
|
-
|
57
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
|
44
58
|
end
|
45
59
|
end
|
46
60
|
end
|
47
61
|
end
|
48
62
|
|
49
|
-
def
|
50
|
-
|
51
|
-
end
|
63
|
+
def _import_canvas_report(report_name, params: {}, **kwargs)
|
64
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
52
65
|
|
53
|
-
|
66
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
67
|
+
|
68
|
+
_trigger_canvas_report(report_name, params: params, **kwargs)
|
69
|
+
end
|
54
70
|
|
55
|
-
def
|
71
|
+
def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
|
56
72
|
report = canvas_sync_client.start_report(
|
57
73
|
'self', report_name,
|
58
74
|
parameters: params,
|
@@ -61,15 +77,13 @@ module InstDataShipper
|
|
61
77
|
CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
|
62
78
|
"/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
|
63
79
|
{
|
64
|
-
|
65
|
-
|
66
|
-
args: [target_table],
|
80
|
+
job: Jobs::AsyncCaller,
|
81
|
+
args: [origin_class, :_process_canvas_report],
|
67
82
|
kwargs: kwargs,
|
68
83
|
},
|
69
84
|
on_failure: {
|
70
|
-
|
71
|
-
|
72
|
-
args: [target_table, report_name, kwargs],
|
85
|
+
job: Jobs::AsyncCaller,
|
86
|
+
args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
|
73
87
|
kwargs: { retry_count: retry_count },
|
74
88
|
},
|
75
89
|
status_key: :status,
|
@@ -79,18 +93,18 @@ module InstDataShipper
|
|
79
93
|
|
80
94
|
def _in_canvas_report_pool(mthd, *args, **kwargs)
|
81
95
|
pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
|
82
|
-
AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
96
|
+
Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
83
97
|
end
|
84
98
|
|
85
|
-
def _process_canvas_report(
|
86
|
-
table_def =
|
99
|
+
def _process_canvas_report(report:, schema_name: nil)
|
100
|
+
table_def = lookup_table_schema!(schema_name, report[:report])
|
87
101
|
|
88
|
-
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}
|
102
|
+
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
|
89
103
|
|
90
104
|
inner_block = ->(file) {
|
91
|
-
CSV.foreach("#{working_dir}
|
105
|
+
CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
|
92
106
|
file << table_def[:columns].map do |c|
|
93
|
-
|
107
|
+
instance_exec(m, &c[:block])
|
94
108
|
end
|
95
109
|
end
|
96
110
|
}
|
@@ -98,13 +112,36 @@ module InstDataShipper
|
|
98
112
|
upload_data(table_def, extra: report['id'], &inner_block)
|
99
113
|
end
|
100
114
|
|
101
|
-
def
|
115
|
+
def _resolve_report_incremenal_parameters(table_def, params)
|
116
|
+
if table_is_incremental?(table_def)
|
117
|
+
inc = table_def[:incremental]
|
118
|
+
scope = inc[:scope]
|
119
|
+
|
120
|
+
if scope != false
|
121
|
+
scope ||= "updated_after"
|
122
|
+
|
123
|
+
if scope.is_a?(Proc)
|
124
|
+
scope = instance_exec(params, &scope)
|
125
|
+
if scope.is_a?(Hash) && scope != params
|
126
|
+
params.merge!(scope)
|
127
|
+
end
|
128
|
+
elsif scope.is_a?(String) || scope.is_a?(Symbol)
|
129
|
+
params[scope] = incremental_since
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
params
|
135
|
+
end
|
136
|
+
|
137
|
+
def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
|
102
138
|
if retry_count.positive?
|
103
139
|
tbid = batch_context[:report_bid] || batch_context[:root_bid]
|
104
140
|
Sidekiq::Batch.new(tbid).jobs do
|
105
|
-
|
141
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
|
106
142
|
end
|
107
143
|
else
|
144
|
+
# TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
|
108
145
|
cleanup_fatal_error!
|
109
146
|
end
|
110
147
|
end
|
@@ -12,22 +12,50 @@ module InstDataShipper
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
-
def _import_local_table(
|
16
|
-
|
17
|
-
|
15
|
+
def _import_local_table(model, schema_name: nil)
|
16
|
+
model = model.safe_constantize if model.is_a?(String)
|
17
|
+
|
18
|
+
table_def = lookup_table_schema!(schema_name, { model: model })
|
18
19
|
|
19
20
|
inner_block = ->(file) {
|
20
|
-
query = model
|
21
|
-
query = query
|
22
|
-
|
21
|
+
query = model.all
|
22
|
+
query = _resolve_model_query(query, table_def[:query])
|
23
|
+
|
24
|
+
if table_is_incremental?(table_def)
|
25
|
+
query = _resolve_model_query(
|
26
|
+
query,
|
27
|
+
table_def.dig(:incremental, :scope),
|
28
|
+
string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
|
29
|
+
default: "updated_at",
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
query.find_each do |m|
|
23
34
|
file << table_def[:columns].map do |c|
|
24
|
-
|
35
|
+
instance_exec(m, &c[:block])
|
25
36
|
end
|
26
37
|
end
|
27
38
|
}
|
28
39
|
|
29
40
|
upload_data(table_def, &inner_block)
|
30
41
|
end
|
42
|
+
|
43
|
+
def _resolve_model_query(relation, query, string: nil, default: nil)
|
44
|
+
return relation if query == false
|
45
|
+
query = default if query.nil?
|
46
|
+
return relation if query.nil?
|
47
|
+
|
48
|
+
if query.is_a?(Symbol)
|
49
|
+
relation.send(query)
|
50
|
+
elsif query.is_a?(Proc)
|
51
|
+
instance_exec(relation, &query)
|
52
|
+
elsif query.is_a?(String) && string.present?
|
53
|
+
instance_exec(relation, query, &string)
|
54
|
+
else
|
55
|
+
raise "Invalid query: #{query.inspect}"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
31
59
|
end
|
32
60
|
end
|
33
61
|
end
|
@@ -3,7 +3,7 @@ module InstDataShipper
|
|
3
3
|
class Base
|
4
4
|
attr_reader :dumper
|
5
5
|
|
6
|
-
delegate :tracker, :
|
6
|
+
delegate :tracker, :schema, :working_dir, to: :dumper
|
7
7
|
|
8
8
|
def initialize(cache_key, config, dumper)
|
9
9
|
@cache_key = cache_key
|
@@ -11,9 +11,13 @@ module InstDataShipper
|
|
11
11
|
@dumper = dumper
|
12
12
|
end
|
13
13
|
|
14
|
+
# This method is called before taking any actions.
|
15
|
+
# It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
|
16
|
+
def preinitialize_dump(context); end
|
17
|
+
|
14
18
|
# This method is called before processing any data.
|
15
19
|
# It should be used to initialize any external resources needed for the dump.
|
16
|
-
def initialize_dump; end
|
20
|
+
def initialize_dump(context); end
|
17
21
|
|
18
22
|
# Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
|
19
23
|
#
|
@@ -50,7 +54,7 @@ module InstDataShipper
|
|
50
54
|
end
|
51
55
|
|
52
56
|
def user_config
|
53
|
-
config[:
|
57
|
+
config[:user_config]
|
54
58
|
end
|
55
59
|
|
56
60
|
def group_key
|
@@ -62,11 +66,11 @@ module InstDataShipper
|
|
62
66
|
def parse_configuration(uri)
|
63
67
|
if block_given?
|
64
68
|
parsed = URI.parse(uri)
|
69
|
+
cparsed = ConfigURI.new(parsed)
|
65
70
|
cfg = {
|
66
|
-
|
67
|
-
extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
|
71
|
+
user_config: cparsed.hash_params,
|
68
72
|
}
|
69
|
-
yield
|
73
|
+
yield cparsed, cfg
|
70
74
|
cfg
|
71
75
|
else
|
72
76
|
raise NotImplementedError
|
@@ -100,5 +104,28 @@ module InstDataShipper
|
|
100
104
|
end
|
101
105
|
|
102
106
|
end
|
107
|
+
|
108
|
+
class ConfigURI
|
109
|
+
def initialize(uri)
|
110
|
+
@uri = uri
|
111
|
+
end
|
112
|
+
|
113
|
+
# delegate_missing_to :uri
|
114
|
+
delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
|
115
|
+
|
116
|
+
def params
|
117
|
+
@params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
|
118
|
+
end
|
119
|
+
|
120
|
+
def hash_params
|
121
|
+
@hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
|
122
|
+
end
|
123
|
+
|
124
|
+
private
|
125
|
+
|
126
|
+
def uri
|
127
|
+
@uri
|
128
|
+
end
|
129
|
+
end
|
103
130
|
end
|
104
131
|
end
|
@@ -1,13 +1,47 @@
|
|
1
|
+
require "faraday_middleware"
|
2
|
+
|
1
3
|
module InstDataShipper
|
2
4
|
module Destinations
|
3
5
|
class HostedData < Base
|
4
6
|
include Concerns::Chunking
|
5
7
|
|
6
|
-
def
|
8
|
+
def preinitialize_dump(context)
|
9
|
+
if context[:incremental_since].present?
|
10
|
+
begin
|
11
|
+
last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
|
12
|
+
status: 'imported',
|
13
|
+
# schema_version: convert_schema[:version],
|
14
|
+
tags: [
|
15
|
+
"ids-schema=#{dumper.schema_digest}",
|
16
|
+
"ids-genre=#{dumper.export_genre}",
|
17
|
+
],
|
18
|
+
}).body.with_indifferent_access
|
19
|
+
|
20
|
+
if last_dump[:created_at] < context[:incremental_since]
|
21
|
+
InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
|
22
|
+
context[:incremental_since] = last_dump[:created_at]
|
23
|
+
end
|
24
|
+
rescue Faraday::ResourceNotFound
|
25
|
+
# TODO It'd be nice to make this per-table
|
26
|
+
InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
|
27
|
+
context[:incremental_since] = nil
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def initialize_dump(context)
|
33
|
+
tags = [
|
34
|
+
"ids-schema=#{dumper.schema_digest}",
|
35
|
+
"ids-genre=#{dumper.export_genre}",
|
36
|
+
]
|
37
|
+
tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
|
38
|
+
tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
|
39
|
+
|
7
40
|
dump = hosted_data_client.post(
|
8
41
|
'api/v1/custom_dumps/',
|
9
42
|
reference_id: tracker.id,
|
10
43
|
schema: convert_schema,
|
44
|
+
tags: tags,
|
11
45
|
).body.with_indifferent_access
|
12
46
|
|
13
47
|
redis.hset(rk(:state), :dump_id, dump[:id])
|
@@ -15,7 +49,7 @@ module InstDataShipper
|
|
15
49
|
end
|
16
50
|
|
17
51
|
def chunk_data(generator, table:, extra: nil)
|
18
|
-
warehouse_name =
|
52
|
+
warehouse_name = table[:warehouse_name]
|
19
53
|
|
20
54
|
super(generator) do |batch, idx|
|
21
55
|
bits = [warehouse_name, extra, idx].compact
|
@@ -36,18 +70,18 @@ module InstDataShipper
|
|
36
70
|
|
37
71
|
def upload_data_chunk(table_def, chunk)
|
38
72
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
|
39
|
-
table_def
|
73
|
+
table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
|
40
74
|
})
|
41
75
|
end
|
42
76
|
|
43
77
|
def finalize_dump
|
44
78
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
|
45
|
-
redis.
|
79
|
+
redis.del(rk(:state))
|
46
80
|
end
|
47
81
|
|
48
82
|
def cleanup_fatal_error
|
49
83
|
hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
|
50
|
-
redis.
|
84
|
+
redis.del(rk(:state))
|
51
85
|
end
|
52
86
|
|
53
87
|
# TODO Support/allow single-table fatal errors?
|
@@ -59,39 +93,45 @@ module InstDataShipper
|
|
59
93
|
end
|
60
94
|
|
61
95
|
def convert_schema
|
62
|
-
table_prefix = config[:table_prefix]
|
63
|
-
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
64
|
-
|
65
96
|
definititions = {}
|
97
|
+
table_schemas = schema[:tables]
|
66
98
|
table_schemas.each do |ts|
|
67
99
|
ts = ts.dup
|
100
|
+
tname = table_name(ts)
|
68
101
|
|
69
|
-
|
70
|
-
table_name = table_prefix + table_name if table_prefix.present?
|
71
|
-
|
72
|
-
definititions[ts[:warehouse_name]] = {
|
102
|
+
definititions[tname] = {
|
73
103
|
dw_type: 'dimension',
|
74
104
|
description: ts[:description],
|
75
|
-
incremental:
|
76
|
-
incremental_on: ts
|
105
|
+
incremental: dumper.table_is_incremental?(ts),
|
106
|
+
incremental_on: ts.dig(:incremental, :on),
|
77
107
|
# indexed_columns
|
78
|
-
tableName:
|
108
|
+
tableName: tname,
|
79
109
|
columns: ts[:columns].map do |col|
|
110
|
+
coltype = col[:type]
|
111
|
+
coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
|
80
112
|
{
|
81
113
|
name: col[:warehouse_name],
|
82
114
|
description: col[:description],
|
83
|
-
type:
|
115
|
+
type: coltype,
|
84
116
|
}
|
85
117
|
end,
|
86
118
|
}
|
87
119
|
end
|
88
120
|
|
89
121
|
{
|
90
|
-
version: "#{dumper.
|
122
|
+
version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
|
91
123
|
definition: definititions,
|
92
124
|
}
|
93
125
|
end
|
94
126
|
|
127
|
+
def table_name(table_def)
|
128
|
+
table_prefix = config[:table_prefix]
|
129
|
+
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
130
|
+
table_name = table_def[:warehouse_name]
|
131
|
+
table_name = table_prefix + table_name if table_prefix.present?
|
132
|
+
table_name
|
133
|
+
end
|
134
|
+
|
95
135
|
def hosted_data_client
|
96
136
|
@hosted_data_client ||= begin
|
97
137
|
token = config[:token]
|
@@ -102,6 +142,8 @@ module InstDataShipper
|
|
102
142
|
host = tok_content['host']
|
103
143
|
end
|
104
144
|
|
145
|
+
host = "https://#{host}" unless host.include?('://')
|
146
|
+
|
105
147
|
Faraday.new(url: host) do |faraday|
|
106
148
|
faraday.request :multipart
|
107
149
|
faraday.request :json
|
@@ -117,14 +159,16 @@ module InstDataShipper
|
|
117
159
|
|
118
160
|
def parse_configuration(uri)
|
119
161
|
super do |parsed_uri, cfg|
|
120
|
-
if parsed_uri.
|
162
|
+
if parsed_uri.user.present?
|
121
163
|
# hosted-data://<JWT>:<hosted_data_domain>
|
122
|
-
cfg[:token] = parsed_uri.
|
164
|
+
cfg[:token] = parsed_uri.user
|
123
165
|
cfg[:host] = parsed_uri.host
|
124
166
|
else
|
125
167
|
# hosted-data://<JWT>
|
126
168
|
cfg[:token] = parsed_uri.host
|
127
169
|
end
|
170
|
+
|
171
|
+
cfg[:table_prefix] = parsed_uri.params[:table_prefix]
|
128
172
|
end
|
129
173
|
end
|
130
174
|
|