inst_data_shipper 0.1.0.beta1 → 0.1.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} +3 -1
- data/lib/inst_data_shipper/basic_dumper.rb +1 -1
- data/lib/inst_data_shipper/concerns/hooks.rb +18 -4
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +58 -21
- data/lib/inst_data_shipper/data_sources/local_tables.rb +28 -7
- data/lib/inst_data_shipper/destinations/base.rb +27 -4
- data/lib/inst_data_shipper/destinations/hosted_data.rb +28 -17
- data/lib/inst_data_shipper/destinations/s3.rb +1 -1
- data/lib/inst_data_shipper/dumper.rb +128 -47
- data/lib/inst_data_shipper/engine.rb +6 -0
- data/lib/inst_data_shipper/jobs/async_caller.rb +10 -2
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +6 -2
- data/lib/inst_data_shipper/schema_builder.rb +85 -33
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +11 -2
- data/spec/spec_helper.rb +2 -2
- metadata +20 -6
- /data/app/models/{hosted_data_dumper → inst_data_shipper}/dump_batch.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
|
4
|
+
data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
|
7
|
+
data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
|
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
2
2
|
def change
|
3
3
|
create_table :inst_data_shipper_dump_batches do |t|
|
4
4
|
t.datetime :started_at
|
@@ -6,6 +6,8 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
|
6
6
|
t.string :status
|
7
7
|
|
8
8
|
t.string :job_class
|
9
|
+
t.string :genre
|
10
|
+
|
9
11
|
t.string :exception
|
10
12
|
t.text :backtrace
|
11
13
|
t.text :metadata
|
@@ -9,21 +9,35 @@ module InstDataShipper
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def hook(name, prepend: false, &block)
|
12
|
+
_assert_hook_defined(name)
|
13
|
+
@hooks ||= {}
|
14
|
+
@hooks[name] ||= []
|
12
15
|
hooks = @hooks[name]
|
13
16
|
prepend ? hooks.unshift(block) : hooks << block
|
14
17
|
end
|
18
|
+
|
19
|
+
def _assert_hook_defined(name)
|
20
|
+
return true if @hooks&.key?(name)
|
21
|
+
return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
|
22
|
+
raise ArgumentError, "Hook #{name} is not defined"
|
23
|
+
end
|
24
|
+
|
25
|
+
def _list_hooks(name)
|
26
|
+
list = []
|
27
|
+
list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
|
28
|
+
list.push(*@hooks[name]) if (@hooks || {})[name]
|
29
|
+
list
|
30
|
+
end
|
15
31
|
end
|
16
32
|
|
17
33
|
def run_hook(name, *args, **kwargs)
|
18
|
-
|
19
|
-
hooks.each do |blk|
|
34
|
+
self.class._list_hooks(name).each do |blk|
|
20
35
|
instance_exec(*args, **kwargs, &blk)
|
21
36
|
end
|
22
37
|
end
|
23
38
|
|
24
39
|
def run_hook_safe(name, *args, **kwargs)
|
25
|
-
|
26
|
-
hooks.each do |blk|
|
40
|
+
self.class._list_hooks(name).each do |blk|
|
27
41
|
instance_exec(*args, **kwargs, &blk)
|
28
42
|
rescue StandardError
|
29
43
|
end
|
@@ -27,11 +27,25 @@ module InstDataShipper
|
|
27
27
|
_in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
|
28
28
|
end
|
29
29
|
|
30
|
-
def import_canvas_report_by_terms(
|
30
|
+
def import_canvas_report_by_terms(*args, **kwargs)
|
31
|
+
_in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
|
32
|
+
end
|
33
|
+
|
34
|
+
def import_existing_report(report, **kwargs)
|
35
|
+
delayed(:_process_canvas_report, report: report, **kwargs)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
|
31
41
|
term_ids = (terms || []).map do |term|
|
32
42
|
term.is_a?(Term) ? term.canvas_id : term
|
33
43
|
end
|
34
44
|
|
45
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
46
|
+
|
47
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
48
|
+
|
35
49
|
Sidekiq::Batch.new.tap do |b|
|
36
50
|
b.description = "Term Scoped #{report_name} Runners"
|
37
51
|
b.context = {
|
@@ -40,19 +54,21 @@ module InstDataShipper
|
|
40
54
|
b.jobs do
|
41
55
|
terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
|
42
56
|
terms_query.find_each do |t|
|
43
|
-
|
57
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
|
44
58
|
end
|
45
59
|
end
|
46
60
|
end
|
47
61
|
end
|
48
62
|
|
49
|
-
def
|
50
|
-
|
51
|
-
end
|
63
|
+
def _import_canvas_report(report_name, params: {}, **kwargs)
|
64
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
52
65
|
|
53
|
-
|
66
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
67
|
+
|
68
|
+
_trigger_canvas_report(report_name, params: params, **kwargs)
|
69
|
+
end
|
54
70
|
|
55
|
-
def
|
71
|
+
def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
|
56
72
|
report = canvas_sync_client.start_report(
|
57
73
|
'self', report_name,
|
58
74
|
parameters: params,
|
@@ -61,15 +77,13 @@ module InstDataShipper
|
|
61
77
|
CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
|
62
78
|
"/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
|
63
79
|
{
|
64
|
-
|
65
|
-
|
66
|
-
args: [target_table],
|
80
|
+
job: Jobs::AsyncCaller,
|
81
|
+
args: [origin_class, :_process_canvas_report],
|
67
82
|
kwargs: kwargs,
|
68
83
|
},
|
69
84
|
on_failure: {
|
70
|
-
|
71
|
-
|
72
|
-
args: [target_table, report_name, kwargs],
|
85
|
+
job: Jobs::AsyncCaller,
|
86
|
+
args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
|
73
87
|
kwargs: { retry_count: retry_count },
|
74
88
|
},
|
75
89
|
status_key: :status,
|
@@ -79,18 +93,18 @@ module InstDataShipper
|
|
79
93
|
|
80
94
|
def _in_canvas_report_pool(mthd, *args, **kwargs)
|
81
95
|
pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
|
82
|
-
AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
96
|
+
Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
83
97
|
end
|
84
98
|
|
85
|
-
def _process_canvas_report(
|
86
|
-
table_def =
|
99
|
+
def _process_canvas_report(report:, schema_name: nil)
|
100
|
+
table_def = lookup_table_schema!(schema_name, report[:report])
|
87
101
|
|
88
|
-
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}
|
102
|
+
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
|
89
103
|
|
90
104
|
inner_block = ->(file) {
|
91
|
-
CSV.foreach("#{working_dir}
|
105
|
+
CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
|
92
106
|
file << table_def[:columns].map do |c|
|
93
|
-
|
107
|
+
instance_exec(m, &c[:block])
|
94
108
|
end
|
95
109
|
end
|
96
110
|
}
|
@@ -98,13 +112,36 @@ module InstDataShipper
|
|
98
112
|
upload_data(table_def, extra: report['id'], &inner_block)
|
99
113
|
end
|
100
114
|
|
101
|
-
def
|
115
|
+
def _resolve_report_incremenal_parameters(table_def, params)
|
116
|
+
if table_is_incremental?(table_def)
|
117
|
+
inc = table_def[:incremental]
|
118
|
+
scope = inc[:scope]
|
119
|
+
|
120
|
+
if scope != false
|
121
|
+
scope ||= "updated_after"
|
122
|
+
|
123
|
+
if scope.is_a?(Proc)
|
124
|
+
scope = instance_exec(params, &scope)
|
125
|
+
if scope.is_a?(Hash) && scope != params
|
126
|
+
params.merge!(scope)
|
127
|
+
end
|
128
|
+
elsif scope.is_a?(String) || scope.is_a?(Symbol)
|
129
|
+
params[scope] = incremental_since
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
params
|
135
|
+
end
|
136
|
+
|
137
|
+
def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
|
102
138
|
if retry_count.positive?
|
103
139
|
tbid = batch_context[:report_bid] || batch_context[:root_bid]
|
104
140
|
Sidekiq::Batch.new(tbid).jobs do
|
105
|
-
|
141
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
|
106
142
|
end
|
107
143
|
else
|
144
|
+
# TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
|
108
145
|
cleanup_fatal_error!
|
109
146
|
end
|
110
147
|
end
|
@@ -12,22 +12,43 @@ module InstDataShipper
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
-
def _import_local_table(
|
16
|
-
|
17
|
-
|
15
|
+
def _import_local_table(model, schema_name: nil)
|
16
|
+
model = model.safe_constantize if model.is_a?(String)
|
17
|
+
|
18
|
+
table_def = lookup_table_schema!(schema_name, { model: model })
|
18
19
|
|
19
20
|
inner_block = ->(file) {
|
20
|
-
query = model
|
21
|
-
query = query
|
22
|
-
|
21
|
+
query = model.all
|
22
|
+
query = _resolve_model_query(query, table_def[:query])
|
23
|
+
|
24
|
+
if table_is_incremental?(table_def)
|
25
|
+
query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
|
26
|
+
end
|
27
|
+
|
28
|
+
query.find_each do |m|
|
23
29
|
file << table_def[:columns].map do |c|
|
24
|
-
|
30
|
+
instance_exec(m, &c[:block])
|
25
31
|
end
|
26
32
|
end
|
27
33
|
}
|
28
34
|
|
29
35
|
upload_data(table_def, &inner_block)
|
30
36
|
end
|
37
|
+
|
38
|
+
def _resolve_model_query(relation, query, string: nil)
|
39
|
+
return relation if query.nil?
|
40
|
+
|
41
|
+
if query.is_a?(Symbol)
|
42
|
+
relation.send(query)
|
43
|
+
elsif query.is_a?(Proc)
|
44
|
+
instance_exec(relation, &query)
|
45
|
+
elsif query.is_a?(String) && string.present?
|
46
|
+
instance_exec(relation, query, &string)
|
47
|
+
else
|
48
|
+
raise "Invalid query: #{query.inspect}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
31
52
|
end
|
32
53
|
end
|
33
54
|
end
|
@@ -50,7 +50,7 @@ module InstDataShipper
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def user_config
|
53
|
-
config[:
|
53
|
+
config[:user_config]
|
54
54
|
end
|
55
55
|
|
56
56
|
def group_key
|
@@ -62,11 +62,11 @@ module InstDataShipper
|
|
62
62
|
def parse_configuration(uri)
|
63
63
|
if block_given?
|
64
64
|
parsed = URI.parse(uri)
|
65
|
+
cparsed = ConfigURI.new(parsed)
|
65
66
|
cfg = {
|
66
|
-
|
67
|
-
extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
|
67
|
+
user_config: cparsed.hash_params,
|
68
68
|
}
|
69
|
-
yield
|
69
|
+
yield cparsed, cfg
|
70
70
|
cfg
|
71
71
|
else
|
72
72
|
raise NotImplementedError
|
@@ -100,5 +100,28 @@ module InstDataShipper
|
|
100
100
|
end
|
101
101
|
|
102
102
|
end
|
103
|
+
|
104
|
+
class ConfigURI
|
105
|
+
def initialize(uri)
|
106
|
+
@uri = uri
|
107
|
+
end
|
108
|
+
|
109
|
+
# delegate_missing_to :uri
|
110
|
+
delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
|
111
|
+
|
112
|
+
def params
|
113
|
+
@params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
|
114
|
+
end
|
115
|
+
|
116
|
+
def hash_params
|
117
|
+
@hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
def uri
|
123
|
+
@uri
|
124
|
+
end
|
125
|
+
end
|
103
126
|
end
|
104
127
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require "faraday_middleware"
|
2
|
+
|
1
3
|
module InstDataShipper
|
2
4
|
module Destinations
|
3
5
|
class HostedData < Base
|
@@ -15,7 +17,7 @@ module InstDataShipper
|
|
15
17
|
end
|
16
18
|
|
17
19
|
def chunk_data(generator, table:, extra: nil)
|
18
|
-
warehouse_name =
|
20
|
+
warehouse_name = table[:warehouse_name]
|
19
21
|
|
20
22
|
super(generator) do |batch, idx|
|
21
23
|
bits = [warehouse_name, extra, idx].compact
|
@@ -36,18 +38,18 @@ module InstDataShipper
|
|
36
38
|
|
37
39
|
def upload_data_chunk(table_def, chunk)
|
38
40
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
|
39
|
-
table_def
|
41
|
+
table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
|
40
42
|
})
|
41
43
|
end
|
42
44
|
|
43
45
|
def finalize_dump
|
44
46
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
|
45
|
-
redis.
|
47
|
+
redis.del(rk(:state))
|
46
48
|
end
|
47
49
|
|
48
50
|
def cleanup_fatal_error
|
49
51
|
hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
|
50
|
-
redis.
|
52
|
+
redis.del(rk(:state))
|
51
53
|
end
|
52
54
|
|
53
55
|
# TODO Support/allow single-table fatal errors?
|
@@ -59,28 +61,25 @@ module InstDataShipper
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def convert_schema
|
62
|
-
table_prefix = config[:table_prefix]
|
63
|
-
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
64
|
-
|
65
64
|
definititions = {}
|
66
65
|
table_schemas.each do |ts|
|
67
66
|
ts = ts.dup
|
67
|
+
tname = table_name(ts)
|
68
68
|
|
69
|
-
|
70
|
-
table_name = table_prefix + table_name if table_prefix.present?
|
71
|
-
|
72
|
-
definititions[ts[:warehouse_name]] = {
|
69
|
+
definititions[tname] = {
|
73
70
|
dw_type: 'dimension',
|
74
71
|
description: ts[:description],
|
75
|
-
incremental:
|
76
|
-
incremental_on: ts
|
72
|
+
incremental: dumper.table_is_incremental?(ts),
|
73
|
+
incremental_on: ts.dig(:incremental, :on),
|
77
74
|
# indexed_columns
|
78
|
-
tableName:
|
75
|
+
tableName: tname,
|
79
76
|
columns: ts[:columns].map do |col|
|
77
|
+
coltype = col[:type]
|
78
|
+
coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
|
80
79
|
{
|
81
80
|
name: col[:warehouse_name],
|
82
81
|
description: col[:description],
|
83
|
-
type:
|
82
|
+
type: coltype,
|
84
83
|
}
|
85
84
|
end,
|
86
85
|
}
|
@@ -92,6 +91,14 @@ module InstDataShipper
|
|
92
91
|
}
|
93
92
|
end
|
94
93
|
|
94
|
+
def table_name(table_def)
|
95
|
+
table_prefix = config[:table_prefix]
|
96
|
+
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
97
|
+
table_name = table_def[:warehouse_name]
|
98
|
+
table_name = table_prefix + table_name if table_prefix.present?
|
99
|
+
table_name
|
100
|
+
end
|
101
|
+
|
95
102
|
def hosted_data_client
|
96
103
|
@hosted_data_client ||= begin
|
97
104
|
token = config[:token]
|
@@ -102,6 +109,8 @@ module InstDataShipper
|
|
102
109
|
host = tok_content['host']
|
103
110
|
end
|
104
111
|
|
112
|
+
host = "https://#{host}" unless host.include?('://')
|
113
|
+
|
105
114
|
Faraday.new(url: host) do |faraday|
|
106
115
|
faraday.request :multipart
|
107
116
|
faraday.request :json
|
@@ -117,14 +126,16 @@ module InstDataShipper
|
|
117
126
|
|
118
127
|
def parse_configuration(uri)
|
119
128
|
super do |parsed_uri, cfg|
|
120
|
-
if parsed_uri.
|
129
|
+
if parsed_uri.user.present?
|
121
130
|
# hosted-data://<JWT>:<hosted_data_domain>
|
122
|
-
cfg[:token] = parsed_uri.
|
131
|
+
cfg[:token] = parsed_uri.user
|
123
132
|
cfg[:host] = parsed_uri.host
|
124
133
|
else
|
125
134
|
# hosted-data://<JWT>
|
126
135
|
cfg[:token] = parsed_uri.host
|
127
136
|
end
|
137
|
+
|
138
|
+
cfg[:table_prefix] = parsed_uri.params[:table_prefix]
|
128
139
|
end
|
129
140
|
end
|
130
141
|
|
@@ -4,7 +4,7 @@ module InstDataShipper
|
|
4
4
|
include Concerns::Chunking
|
5
5
|
|
6
6
|
def chunk_data(generator, table:, extra: nil)
|
7
|
-
warehouse_name =
|
7
|
+
warehouse_name = table[:warehouse_name]
|
8
8
|
|
9
9
|
super(generator) do |batch, idx|
|
10
10
|
bits = [warehouse_name, extra, idx].compact
|
@@ -5,7 +5,7 @@ module InstDataShipper
|
|
5
5
|
define_hook :initialize_dump_batch
|
6
6
|
define_hook :finalize_dump_batch
|
7
7
|
|
8
|
-
def self.perform_dump(destinations
|
8
|
+
def self.perform_dump(destinations)
|
9
9
|
raise "Must subclass Dumper to use perform_dump" if self == Dumper
|
10
10
|
|
11
11
|
dumper = new(destinations)
|
@@ -14,48 +14,134 @@ module InstDataShipper
|
|
14
14
|
dumper.tracker
|
15
15
|
end
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
def self.define(include: [], schema: , &blk)
|
18
|
+
Class.new(self) do
|
19
|
+
include(*include)
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
define_method(:enqueue_tasks, &blk)
|
22
|
+
define_method(:table_schemas) { schema }
|
23
|
+
end
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
raise NotImplementedError
|
28
|
-
end
|
26
|
+
public
|
29
27
|
|
30
28
|
def begin_dump
|
31
29
|
raise "Dump already begun" unless @raw_destinations.present?
|
32
30
|
|
33
|
-
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, status: 'in_progress')
|
31
|
+
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
34
32
|
|
35
|
-
|
36
|
-
|
33
|
+
@batch_context = context = {
|
34
|
+
# TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
|
35
|
+
# TODO Consider behavior if last is still running
|
36
|
+
incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
|
37
|
+
}
|
38
|
+
|
39
|
+
begin
|
40
|
+
begin
|
41
|
+
destinations.each do |dest|
|
42
|
+
dest.initialize_dump()
|
43
|
+
end
|
44
|
+
|
45
|
+
run_hook(:initialize_dump_batch, context)
|
46
|
+
ensure
|
47
|
+
@batch_context = nil
|
48
|
+
context[:tracker_id] = tracker.id
|
49
|
+
context[:origin_class] = batch_context[:origin_class] || self.class.to_s
|
50
|
+
context[:destinations] = @raw_destinations
|
51
|
+
end
|
52
|
+
|
53
|
+
Sidekiq::Batch.new.tap do |batch|
|
54
|
+
context[:root_bid] = batch.bid
|
55
|
+
|
56
|
+
batch.description = "HD #{export_genre} Export #{tracker.id} Root"
|
57
|
+
batch.context = context
|
58
|
+
batch.on(:success, "#{self.class}#finalize_dump")
|
59
|
+
batch.on(:death, "#{self.class}#cleanup_fatal_error!")
|
60
|
+
batch.jobs do
|
61
|
+
enqueue_tasks
|
62
|
+
rescue => ex
|
63
|
+
delayed :cleanup_fatal_error!
|
64
|
+
InstDataShipper.handle_suppressed_error(ex)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
rescue => ex
|
68
|
+
if context
|
69
|
+
batch ||= Sidekiq::Batch.new.tap do |batch|
|
70
|
+
batch.description = "HD #{export_genre} Export #{tracker.id} Early Failure Cleanup"
|
71
|
+
batch.context = context
|
72
|
+
batch.jobs do
|
73
|
+
delayed :cleanup_fatal_error!
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
raise ex
|
37
78
|
end
|
79
|
+
end
|
38
80
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
81
|
+
def tracker
|
82
|
+
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
83
|
+
end
|
84
|
+
|
85
|
+
def export_genre
|
86
|
+
self.class.to_s.gsub(/HD|ExportJob/, '')
|
87
|
+
end
|
88
|
+
|
89
|
+
def origin_class
|
90
|
+
batch_context[:origin_class]&.constantize || self.class
|
91
|
+
end
|
92
|
+
|
93
|
+
def table_is_incremental?(table_def)
|
94
|
+
if (inc = table_def[:incremental]).present?
|
95
|
+
differ = inc[:if]
|
96
|
+
return !!incremental_since if differ.nil?
|
97
|
+
|
98
|
+
differ = :"#{differ}".to_proc if differ.is_a?(Symbol)
|
99
|
+
differ = instance_exec(&differ) if differ.is_a?(Proc)
|
100
|
+
return !!differ
|
101
|
+
end
|
102
|
+
|
103
|
+
false
|
104
|
+
end
|
105
|
+
|
106
|
+
def incremental_since
|
107
|
+
batch_context[:incremental_since]
|
108
|
+
end
|
109
|
+
|
110
|
+
def lookup_table_schema(*identifiers)
|
111
|
+
identifiers.compact.each do |ident|
|
112
|
+
if ident.is_a?(Hash)
|
113
|
+
key = ident.keys.first
|
114
|
+
value = ident.values.first
|
115
|
+
else
|
116
|
+
key = :warehouse_name
|
117
|
+
value = ident
|
118
|
+
end
|
119
|
+
|
120
|
+
value = Array(value).compact
|
121
|
+
|
122
|
+
table_schemas.each do |ts|
|
123
|
+
return ts if value.include?(ts[key])
|
55
124
|
end
|
56
125
|
end
|
57
126
|
|
58
|
-
|
127
|
+
nil
|
128
|
+
end
|
129
|
+
|
130
|
+
def lookup_table_schema!(*identifiers)
|
131
|
+
lookup_table_schema(*identifiers) || raise("No table schema found for #{identifiers.inspect}")
|
132
|
+
end
|
133
|
+
|
134
|
+
protected
|
135
|
+
|
136
|
+
attr_reader :executor
|
137
|
+
|
138
|
+
def initialize(destinations = nil, executor: nil)
|
139
|
+
@raw_destinations = Array(destinations)
|
140
|
+
@executor = executor
|
141
|
+
end
|
142
|
+
|
143
|
+
def enqueue_tasks
|
144
|
+
raise NotImplementedError
|
59
145
|
end
|
60
146
|
|
61
147
|
def upload_data(table_def, extra: nil, &datagen)
|
@@ -96,7 +182,7 @@ module InstDataShipper
|
|
96
182
|
def finalize_dump(_status, _opts)
|
97
183
|
run_hook(:finalize_dump_batch)
|
98
184
|
|
99
|
-
|
185
|
+
destinations.each do |dest|
|
100
186
|
dest.finalize_dump
|
101
187
|
end
|
102
188
|
|
@@ -108,14 +194,15 @@ module InstDataShipper
|
|
108
194
|
|
109
195
|
run_hook(:finalize_dump_batch)
|
110
196
|
|
111
|
-
|
197
|
+
destinations.each do |dest|
|
112
198
|
dest.cleanup_fatal_error
|
113
|
-
rescue
|
199
|
+
rescue => ex
|
200
|
+
InstDataShipper.handle_suppressed_error(ex)
|
114
201
|
end
|
115
202
|
|
116
203
|
DumpBatch.find(batch_context[:tracker_id]).update(status: 'failed')
|
117
204
|
|
118
|
-
CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid])
|
205
|
+
CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid]) if batch_context[:root_bid].present?
|
119
206
|
end
|
120
207
|
|
121
208
|
# Helper Methods
|
@@ -126,23 +213,17 @@ module InstDataShipper
|
|
126
213
|
end
|
127
214
|
|
128
215
|
def delayed(mthd, *args, **kwargs)
|
129
|
-
AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
130
|
-
end
|
131
|
-
|
132
|
-
def tracker
|
133
|
-
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
216
|
+
Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
134
217
|
end
|
135
218
|
|
136
|
-
|
137
|
-
self.class.to_s.gsub(/HD|ExportJob/, '')
|
138
|
-
end
|
219
|
+
delegate :working_dir, to: :executor
|
139
220
|
|
140
|
-
def
|
141
|
-
|
221
|
+
def batch
|
222
|
+
Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
|
142
223
|
end
|
143
224
|
|
144
|
-
def
|
145
|
-
|
225
|
+
def batch_context
|
226
|
+
@batch_context || batch&.context || {}
|
146
227
|
end
|
147
228
|
|
148
229
|
def destinations_for_table(table_def)
|
@@ -150,7 +231,7 @@ module InstDataShipper
|
|
150
231
|
end
|
151
232
|
|
152
233
|
def destinations
|
153
|
-
@destinations ||= (@raw_destinations || batch_context[:destinations]).map.with_index do |dest, i|
|
234
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
154
235
|
dcls = InstDataShipper.resolve_destination(dest)
|
155
236
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
156
237
|
end
|
@@ -4,5 +4,11 @@ module InstDataShipper
|
|
4
4
|
class Engine < ::Rails::Engine
|
5
5
|
isolate_namespace InstDataShipper
|
6
6
|
|
7
|
+
initializer :append_migrations do |app|
|
8
|
+
config.paths["db/migrate"].expanded.each do |expanded_path|
|
9
|
+
app.config.paths["db/migrate"] << expanded_path
|
10
|
+
end
|
11
|
+
ActiveRecord::Migrator.migrations_paths = Rails.application.paths['db/migrate'].to_a
|
12
|
+
end
|
7
13
|
end
|
8
14
|
end
|
@@ -1,7 +1,14 @@
|
|
1
|
+
|
2
|
+
require "sidekiq"
|
3
|
+
|
1
4
|
module InstDataShipper
|
2
5
|
module Jobs
|
3
6
|
class AsyncCaller < InstDataShipper::Jobs::Base
|
4
|
-
sidekiq_options
|
7
|
+
sidekiq_options(retry: 0) if defined?(sidekiq_options)
|
8
|
+
|
9
|
+
def self.get_sidekiq_options
|
10
|
+
{ retry: 0 }
|
11
|
+
end
|
5
12
|
|
6
13
|
def self.call_from_pool(pool, clazz, method, *args, **kwargs)
|
7
14
|
pool.add_job(
|
@@ -12,7 +19,8 @@ module InstDataShipper
|
|
12
19
|
end
|
13
20
|
|
14
21
|
def perform(clazz, method, *args, **kwargs)
|
15
|
-
clazz.constantize
|
22
|
+
clazz = clazz.constantize if clazz.is_a?(String)
|
23
|
+
clazz.new(executor: self).send(method.to_sym, *args, **kwargs)
|
16
24
|
end
|
17
25
|
end
|
18
26
|
end
|
@@ -3,9 +3,13 @@ module InstDataShipper
|
|
3
3
|
class BasicDumpJob < InstDataShipper::Jobs::Base
|
4
4
|
sidekiq_options retry: 3 if defined?(sidekiq_options)
|
5
5
|
|
6
|
-
def perform(endpoints
|
7
|
-
|
6
|
+
def perform(endpoints)
|
7
|
+
|
8
8
|
end
|
9
|
+
|
10
|
+
protected
|
11
|
+
|
12
|
+
|
9
13
|
end
|
10
14
|
end
|
11
15
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
module InstDataShipper
|
2
|
+
# This class ends up fill two roles - Schema and Mapping.
|
3
|
+
# It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
|
2
4
|
class SchemaBuilder
|
3
5
|
attr_reader :tables
|
4
6
|
|
@@ -12,24 +14,40 @@ module InstDataShipper
|
|
12
14
|
builder.tables
|
13
15
|
end
|
14
16
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
17
|
+
def extend_table_builder(&block)
|
18
|
+
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
19
|
+
@table_builder_class.class_eval(&block)
|
20
|
+
end
|
18
21
|
|
22
|
+
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
19
23
|
tdef = {
|
24
|
+
warehouse_name: nil,
|
20
25
|
description: description,
|
21
|
-
model: model_or_name.is_a?(String) ? nil : model_or_name,
|
22
|
-
warehouse_name: as.to_s,
|
23
|
-
incremental: incremental,
|
24
26
|
columns: [],
|
25
|
-
|
27
|
+
|
28
|
+
model: model,
|
29
|
+
query: query,
|
30
|
+
**extra,
|
26
31
|
}
|
27
32
|
|
28
|
-
|
33
|
+
if model_or_name.is_a?(ActiveRecord::Relation)
|
34
|
+
raise "model specified twice" if model.present?
|
35
|
+
raise "query specified twice" if query.present?
|
36
|
+
|
37
|
+
tdef[:query] = model_or_name
|
38
|
+
tdef[:model] = model_or_name.model
|
39
|
+
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
40
|
+
tdef[:warehouse_name] = model_or_name.table_name
|
41
|
+
tdef[:model] = model_or_name
|
42
|
+
else
|
43
|
+
tdef[:warehouse_name] = model_or_name
|
44
|
+
end
|
45
|
+
|
46
|
+
@table_builder_class.build(tdef, &block)
|
29
47
|
|
30
48
|
@tables << tdef
|
31
49
|
|
32
|
-
|
50
|
+
tdef
|
33
51
|
end
|
34
52
|
|
35
53
|
class TableSchemaBuilder
|
@@ -46,48 +64,82 @@ module InstDataShipper
|
|
46
64
|
builder.columns
|
47
65
|
end
|
48
66
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
|
67
|
+
# def annotate(key, value)
|
68
|
+
# options[key] = value
|
69
|
+
# end
|
70
|
+
|
71
|
+
def incremental(scope="updated_at", **kwargs)
|
72
|
+
if (extras = kwargs.keys - %i[on if]).present?
|
73
|
+
raise ArgumentError, "Unsuppored options: #{extras.inspect}"
|
74
|
+
end
|
75
|
+
|
76
|
+
options[:incremental] = {
|
77
|
+
on: Array(kwargs[:on]),
|
78
|
+
scope: scope,
|
79
|
+
if: kwargs[:if],
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def column(name, *args, refs: [], from: nil, **extra, &block)
|
84
|
+
from ||= name.to_s
|
53
85
|
|
54
86
|
cdef = {
|
55
|
-
|
56
|
-
|
57
|
-
|
87
|
+
warehouse_name: name.to_s,
|
88
|
+
from: from,
|
89
|
+
**extra,
|
58
90
|
}
|
59
91
|
|
60
|
-
[
|
61
|
-
|
62
|
-
k.each do |hk, hv|
|
63
|
-
cdef[hv] = kwargs.delete(hk) if kwargs.key?(hk)
|
64
|
-
end
|
65
|
-
elsif kwargs.key?(k)
|
66
|
-
cdef[k] = kwargs.delete(k)
|
67
|
-
end
|
92
|
+
if args[0].is_a?(Symbol)
|
93
|
+
cdef[:type] = args.shift()
|
68
94
|
end
|
69
95
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
k = (a.is_a?(String) && :description) || (a.is_a?(Symbol) && :type) || nil
|
74
|
-
raise ArgumentError, 'Unsupported Argument' if k.nil?
|
75
|
-
raise ArgumentError, "Duplicate Argument for #{k}" if cdef.key?(k)
|
96
|
+
if args[0].is_a?(String)
|
97
|
+
cdef[:description] = args.shift()
|
98
|
+
end
|
76
99
|
|
77
|
-
|
100
|
+
if args.present?
|
101
|
+
raise ArgumentError, "Received unexpected arguments: #{args.inspect}"
|
78
102
|
end
|
79
103
|
|
104
|
+
cdef[:references] = Array(refs)
|
105
|
+
|
80
106
|
if options[:model].is_a?(Class) && cdef[:local_name].to_s.ends_with?('_id')
|
81
107
|
rel_name = cdef[:local_name].to_s[0...-3]
|
82
108
|
refl = options[:model].reflections[rel_name]
|
83
109
|
cdef[:references] << "#{refl.klass}##{refl.options[:primary_key] || 'id'}" if refl.present? && !refl.polymorphic?
|
84
110
|
end
|
85
111
|
|
112
|
+
compiled_from = compile_transformer(from)
|
113
|
+
|
114
|
+
cdef[:block] = ->(row) {
|
115
|
+
value = instance_exec(row, &compiled_from)
|
116
|
+
value = instance_exec(value, row, &block) if block.present?
|
117
|
+
value
|
118
|
+
}
|
119
|
+
|
86
120
|
@columns << cdef
|
87
121
|
|
88
|
-
|
122
|
+
cdef
|
89
123
|
end
|
90
|
-
|
124
|
+
|
125
|
+
protected
|
126
|
+
|
127
|
+
def compile_transformer(from)
|
128
|
+
if from.present?
|
129
|
+
if from.is_a?(Symbol)
|
130
|
+
->(row) { row.send(from) }
|
131
|
+
elsif from.is_a?(Proc)
|
132
|
+
from
|
133
|
+
elsif from.is_a?(String)
|
134
|
+
->(row) { row[from] }
|
135
|
+
else
|
136
|
+
raise ArgumentError, "Invalid transformer: #{from.inspect}"
|
137
|
+
end
|
138
|
+
else
|
139
|
+
->(row) { row }
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
91
143
|
end
|
92
144
|
end
|
93
145
|
end
|
data/lib/inst_data_shipper.rb
CHANGED
@@ -23,13 +23,20 @@ module InstDataShipper
|
|
23
23
|
destination = @destination_aliases[type]
|
24
24
|
end
|
25
25
|
|
26
|
-
|
26
|
+
destination.constantize
|
27
27
|
end
|
28
28
|
|
29
29
|
def start_basic_dump(*args, **kwargs, &block)
|
30
30
|
BasicDumper.perform_dump(*args, **kwargs, &block)
|
31
31
|
end
|
32
32
|
|
33
|
+
def handle_suppressed_error(ex)
|
34
|
+
logger.error "Suppressed Error: #{ex.message}"
|
35
|
+
logger.error ex.backtrace.join("\n")
|
36
|
+
Raven.capture_exception(ex) if defined?(Raven)
|
37
|
+
Sentry.capture_exception(ex) if defined?(Sentry)
|
38
|
+
end
|
39
|
+
|
33
40
|
def logger
|
34
41
|
return @logger if defined? @logger
|
35
42
|
@logger = Logger.new(STDOUT)
|
@@ -66,6 +73,8 @@ Dir[File.dirname(__FILE__) + "/inst_data_shipper/destinations/*.rb"].each do |fi
|
|
66
73
|
basename = File.basename(file, ".rb")
|
67
74
|
next if basename == "base"
|
68
75
|
|
69
|
-
InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.
|
76
|
+
InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.camelize}")
|
70
77
|
end
|
71
78
|
|
79
|
+
require "inst_data_shipper/dumper"
|
80
|
+
require "inst_data_shipper/basic_dumper"
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,7 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__)
|
|
7
7
|
require "bundler/setup"
|
8
8
|
require 'rspec/rails'
|
9
9
|
require 'spec_helper'
|
10
|
-
require '
|
10
|
+
require 'factory_bot_rails'
|
11
11
|
require 'timecop'
|
12
12
|
require 'webmock/rspec'
|
13
13
|
require 'support/fake_canvas'
|
@@ -29,7 +29,7 @@ ActiveRecord::Migration.maintain_test_schema!
|
|
29
29
|
RSpec.configure do |config|
|
30
30
|
config.extend WithModel
|
31
31
|
|
32
|
-
config.include
|
32
|
+
config.include FactoryBot::Syntax::Methods
|
33
33
|
config.use_transactional_fixtures = true
|
34
34
|
config.infer_spec_type_from_file_location!
|
35
35
|
config.filter_rails_from_backtrace!
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.
|
4
|
+
version: 0.1.0.beta2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '6.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '6.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -360,6 +360,20 @@ dependencies:
|
|
360
360
|
- - ">="
|
361
361
|
- !ruby/object:Gem::Version
|
362
362
|
version: '0'
|
363
|
+
- !ruby/object:Gem::Dependency
|
364
|
+
name: faraday_middleware
|
365
|
+
requirement: !ruby/object:Gem::Requirement
|
366
|
+
requirements:
|
367
|
+
- - ">="
|
368
|
+
- !ruby/object:Gem::Version
|
369
|
+
version: '0'
|
370
|
+
type: :runtime
|
371
|
+
prerelease: false
|
372
|
+
version_requirements: !ruby/object:Gem::Requirement
|
373
|
+
requirements:
|
374
|
+
- - ">="
|
375
|
+
- !ruby/object:Gem::Version
|
376
|
+
version: '0'
|
363
377
|
description:
|
364
378
|
email:
|
365
379
|
- pseng@instructure.com
|
@@ -369,8 +383,8 @@ extra_rdoc_files: []
|
|
369
383
|
files:
|
370
384
|
- README.md
|
371
385
|
- Rakefile
|
372
|
-
- app/models/
|
373
|
-
- db/migrate/
|
386
|
+
- app/models/inst_data_shipper/dump_batch.rb
|
387
|
+
- db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb
|
374
388
|
- lib/inst_data_shipper.rb
|
375
389
|
- lib/inst_data_shipper/basic_dumper.rb
|
376
390
|
- lib/inst_data_shipper/concerns/hooks.rb
|
File without changes
|