inst_data_shipper 0.1.0.beta1 → 0.1.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/db/migrate/{20240301090836_create_canvas_sync_sync_batches.rb → 20240301090836_create_inst_data_shipper_dump_batches.rb} +3 -1
- data/lib/inst_data_shipper/basic_dumper.rb +1 -1
- data/lib/inst_data_shipper/concerns/hooks.rb +18 -4
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +58 -21
- data/lib/inst_data_shipper/data_sources/local_tables.rb +28 -7
- data/lib/inst_data_shipper/destinations/base.rb +27 -4
- data/lib/inst_data_shipper/destinations/hosted_data.rb +28 -17
- data/lib/inst_data_shipper/destinations/s3.rb +1 -1
- data/lib/inst_data_shipper/dumper.rb +128 -47
- data/lib/inst_data_shipper/engine.rb +6 -0
- data/lib/inst_data_shipper/jobs/async_caller.rb +10 -2
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +6 -2
- data/lib/inst_data_shipper/schema_builder.rb +85 -33
- data/lib/inst_data_shipper/version.rb +1 -1
- data/lib/inst_data_shipper.rb +11 -2
- data/spec/spec_helper.rb +2 -2
- metadata +20 -6
- /data/app/models/{hosted_data_dumper → inst_data_shipper}/dump_batch.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
|
4
|
+
data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
|
7
|
+
data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
|
@@ -1,4 +1,4 @@
|
|
1
|
-
class
|
1
|
+
class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
2
2
|
def change
|
3
3
|
create_table :inst_data_shipper_dump_batches do |t|
|
4
4
|
t.datetime :started_at
|
@@ -6,6 +6,8 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
|
6
6
|
t.string :status
|
7
7
|
|
8
8
|
t.string :job_class
|
9
|
+
t.string :genre
|
10
|
+
|
9
11
|
t.string :exception
|
10
12
|
t.text :backtrace
|
11
13
|
t.text :metadata
|
@@ -9,21 +9,35 @@ module InstDataShipper
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def hook(name, prepend: false, &block)
|
12
|
+
_assert_hook_defined(name)
|
13
|
+
@hooks ||= {}
|
14
|
+
@hooks[name] ||= []
|
12
15
|
hooks = @hooks[name]
|
13
16
|
prepend ? hooks.unshift(block) : hooks << block
|
14
17
|
end
|
18
|
+
|
19
|
+
def _assert_hook_defined(name)
|
20
|
+
return true if @hooks&.key?(name)
|
21
|
+
return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
|
22
|
+
raise ArgumentError, "Hook #{name} is not defined"
|
23
|
+
end
|
24
|
+
|
25
|
+
def _list_hooks(name)
|
26
|
+
list = []
|
27
|
+
list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
|
28
|
+
list.push(*@hooks[name]) if (@hooks || {})[name]
|
29
|
+
list
|
30
|
+
end
|
15
31
|
end
|
16
32
|
|
17
33
|
def run_hook(name, *args, **kwargs)
|
18
|
-
|
19
|
-
hooks.each do |blk|
|
34
|
+
self.class._list_hooks(name).each do |blk|
|
20
35
|
instance_exec(*args, **kwargs, &blk)
|
21
36
|
end
|
22
37
|
end
|
23
38
|
|
24
39
|
def run_hook_safe(name, *args, **kwargs)
|
25
|
-
|
26
|
-
hooks.each do |blk|
|
40
|
+
self.class._list_hooks(name).each do |blk|
|
27
41
|
instance_exec(*args, **kwargs, &blk)
|
28
42
|
rescue StandardError
|
29
43
|
end
|
@@ -27,11 +27,25 @@ module InstDataShipper
|
|
27
27
|
_in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
|
28
28
|
end
|
29
29
|
|
30
|
-
def import_canvas_report_by_terms(
|
30
|
+
def import_canvas_report_by_terms(*args, **kwargs)
|
31
|
+
_in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
|
32
|
+
end
|
33
|
+
|
34
|
+
def import_existing_report(report, **kwargs)
|
35
|
+
delayed(:_process_canvas_report, report: report, **kwargs)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
|
31
41
|
term_ids = (terms || []).map do |term|
|
32
42
|
term.is_a?(Term) ? term.canvas_id : term
|
33
43
|
end
|
34
44
|
|
45
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
46
|
+
|
47
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
48
|
+
|
35
49
|
Sidekiq::Batch.new.tap do |b|
|
36
50
|
b.description = "Term Scoped #{report_name} Runners"
|
37
51
|
b.context = {
|
@@ -40,19 +54,21 @@ module InstDataShipper
|
|
40
54
|
b.jobs do
|
41
55
|
terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
|
42
56
|
terms_query.find_each do |t|
|
43
|
-
|
57
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
|
44
58
|
end
|
45
59
|
end
|
46
60
|
end
|
47
61
|
end
|
48
62
|
|
49
|
-
def
|
50
|
-
|
51
|
-
end
|
63
|
+
def _import_canvas_report(report_name, params: {}, **kwargs)
|
64
|
+
table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
|
52
65
|
|
53
|
-
|
66
|
+
_resolve_report_incremenal_parameters(table_def, params)
|
67
|
+
|
68
|
+
_trigger_canvas_report(report_name, params: params, **kwargs)
|
69
|
+
end
|
54
70
|
|
55
|
-
def
|
71
|
+
def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
|
56
72
|
report = canvas_sync_client.start_report(
|
57
73
|
'self', report_name,
|
58
74
|
parameters: params,
|
@@ -61,15 +77,13 @@ module InstDataShipper
|
|
61
77
|
CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
|
62
78
|
"/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
|
63
79
|
{
|
64
|
-
|
65
|
-
|
66
|
-
args: [target_table],
|
80
|
+
job: Jobs::AsyncCaller,
|
81
|
+
args: [origin_class, :_process_canvas_report],
|
67
82
|
kwargs: kwargs,
|
68
83
|
},
|
69
84
|
on_failure: {
|
70
|
-
|
71
|
-
|
72
|
-
args: [target_table, report_name, kwargs],
|
85
|
+
job: Jobs::AsyncCaller,
|
86
|
+
args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
|
73
87
|
kwargs: { retry_count: retry_count },
|
74
88
|
},
|
75
89
|
status_key: :status,
|
@@ -79,18 +93,18 @@ module InstDataShipper
|
|
79
93
|
|
80
94
|
def _in_canvas_report_pool(mthd, *args, **kwargs)
|
81
95
|
pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
|
82
|
-
AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
96
|
+
Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
83
97
|
end
|
84
98
|
|
85
|
-
def _process_canvas_report(
|
86
|
-
table_def =
|
99
|
+
def _process_canvas_report(report:, schema_name: nil)
|
100
|
+
table_def = lookup_table_schema!(schema_name, report[:report])
|
87
101
|
|
88
|
-
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}
|
102
|
+
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
|
89
103
|
|
90
104
|
inner_block = ->(file) {
|
91
|
-
CSV.foreach("#{working_dir}
|
105
|
+
CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
|
92
106
|
file << table_def[:columns].map do |c|
|
93
|
-
|
107
|
+
instance_exec(m, &c[:block])
|
94
108
|
end
|
95
109
|
end
|
96
110
|
}
|
@@ -98,13 +112,36 @@ module InstDataShipper
|
|
98
112
|
upload_data(table_def, extra: report['id'], &inner_block)
|
99
113
|
end
|
100
114
|
|
101
|
-
def
|
115
|
+
def _resolve_report_incremenal_parameters(table_def, params)
|
116
|
+
if table_is_incremental?(table_def)
|
117
|
+
inc = table_def[:incremental]
|
118
|
+
scope = inc[:scope]
|
119
|
+
|
120
|
+
if scope != false
|
121
|
+
scope ||= "updated_after"
|
122
|
+
|
123
|
+
if scope.is_a?(Proc)
|
124
|
+
scope = instance_exec(params, &scope)
|
125
|
+
if scope.is_a?(Hash) && scope != params
|
126
|
+
params.merge!(scope)
|
127
|
+
end
|
128
|
+
elsif scope.is_a?(String) || scope.is_a?(Symbol)
|
129
|
+
params[scope] = incremental_since
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
params
|
135
|
+
end
|
136
|
+
|
137
|
+
def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
|
102
138
|
if retry_count.positive?
|
103
139
|
tbid = batch_context[:report_bid] || batch_context[:root_bid]
|
104
140
|
Sidekiq::Batch.new(tbid).jobs do
|
105
|
-
|
141
|
+
_in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
|
106
142
|
end
|
107
143
|
else
|
144
|
+
# TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
|
108
145
|
cleanup_fatal_error!
|
109
146
|
end
|
110
147
|
end
|
@@ -12,22 +12,43 @@ module InstDataShipper
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
-
def _import_local_table(
|
16
|
-
|
17
|
-
|
15
|
+
def _import_local_table(model, schema_name: nil)
|
16
|
+
model = model.safe_constantize if model.is_a?(String)
|
17
|
+
|
18
|
+
table_def = lookup_table_schema!(schema_name, { model: model })
|
18
19
|
|
19
20
|
inner_block = ->(file) {
|
20
|
-
query = model
|
21
|
-
query = query
|
22
|
-
|
21
|
+
query = model.all
|
22
|
+
query = _resolve_model_query(query, table_def[:query])
|
23
|
+
|
24
|
+
if table_is_incremental?(table_def)
|
25
|
+
query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
|
26
|
+
end
|
27
|
+
|
28
|
+
query.find_each do |m|
|
23
29
|
file << table_def[:columns].map do |c|
|
24
|
-
|
30
|
+
instance_exec(m, &c[:block])
|
25
31
|
end
|
26
32
|
end
|
27
33
|
}
|
28
34
|
|
29
35
|
upload_data(table_def, &inner_block)
|
30
36
|
end
|
37
|
+
|
38
|
+
def _resolve_model_query(relation, query, string: nil)
|
39
|
+
return relation if query.nil?
|
40
|
+
|
41
|
+
if query.is_a?(Symbol)
|
42
|
+
relation.send(query)
|
43
|
+
elsif query.is_a?(Proc)
|
44
|
+
instance_exec(relation, &query)
|
45
|
+
elsif query.is_a?(String) && string.present?
|
46
|
+
instance_exec(relation, query, &string)
|
47
|
+
else
|
48
|
+
raise "Invalid query: #{query.inspect}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
31
52
|
end
|
32
53
|
end
|
33
54
|
end
|
@@ -50,7 +50,7 @@ module InstDataShipper
|
|
50
50
|
end
|
51
51
|
|
52
52
|
def user_config
|
53
|
-
config[:
|
53
|
+
config[:user_config]
|
54
54
|
end
|
55
55
|
|
56
56
|
def group_key
|
@@ -62,11 +62,11 @@ module InstDataShipper
|
|
62
62
|
def parse_configuration(uri)
|
63
63
|
if block_given?
|
64
64
|
parsed = URI.parse(uri)
|
65
|
+
cparsed = ConfigURI.new(parsed)
|
65
66
|
cfg = {
|
66
|
-
|
67
|
-
extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
|
67
|
+
user_config: cparsed.hash_params,
|
68
68
|
}
|
69
|
-
yield
|
69
|
+
yield cparsed, cfg
|
70
70
|
cfg
|
71
71
|
else
|
72
72
|
raise NotImplementedError
|
@@ -100,5 +100,28 @@ module InstDataShipper
|
|
100
100
|
end
|
101
101
|
|
102
102
|
end
|
103
|
+
|
104
|
+
class ConfigURI
|
105
|
+
def initialize(uri)
|
106
|
+
@uri = uri
|
107
|
+
end
|
108
|
+
|
109
|
+
# delegate_missing_to :uri
|
110
|
+
delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
|
111
|
+
|
112
|
+
def params
|
113
|
+
@params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
|
114
|
+
end
|
115
|
+
|
116
|
+
def hash_params
|
117
|
+
@hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
def uri
|
123
|
+
@uri
|
124
|
+
end
|
125
|
+
end
|
103
126
|
end
|
104
127
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require "faraday_middleware"
|
2
|
+
|
1
3
|
module InstDataShipper
|
2
4
|
module Destinations
|
3
5
|
class HostedData < Base
|
@@ -15,7 +17,7 @@ module InstDataShipper
|
|
15
17
|
end
|
16
18
|
|
17
19
|
def chunk_data(generator, table:, extra: nil)
|
18
|
-
warehouse_name =
|
20
|
+
warehouse_name = table[:warehouse_name]
|
19
21
|
|
20
22
|
super(generator) do |batch, idx|
|
21
23
|
bits = [warehouse_name, extra, idx].compact
|
@@ -36,18 +38,18 @@ module InstDataShipper
|
|
36
38
|
|
37
39
|
def upload_data_chunk(table_def, chunk)
|
38
40
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
|
39
|
-
table_def
|
41
|
+
table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
|
40
42
|
})
|
41
43
|
end
|
42
44
|
|
43
45
|
def finalize_dump
|
44
46
|
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
|
45
|
-
redis.
|
47
|
+
redis.del(rk(:state))
|
46
48
|
end
|
47
49
|
|
48
50
|
def cleanup_fatal_error
|
49
51
|
hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
|
50
|
-
redis.
|
52
|
+
redis.del(rk(:state))
|
51
53
|
end
|
52
54
|
|
53
55
|
# TODO Support/allow single-table fatal errors?
|
@@ -59,28 +61,25 @@ module InstDataShipper
|
|
59
61
|
end
|
60
62
|
|
61
63
|
def convert_schema
|
62
|
-
table_prefix = config[:table_prefix]
|
63
|
-
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
64
|
-
|
65
64
|
definititions = {}
|
66
65
|
table_schemas.each do |ts|
|
67
66
|
ts = ts.dup
|
67
|
+
tname = table_name(ts)
|
68
68
|
|
69
|
-
|
70
|
-
table_name = table_prefix + table_name if table_prefix.present?
|
71
|
-
|
72
|
-
definititions[ts[:warehouse_name]] = {
|
69
|
+
definititions[tname] = {
|
73
70
|
dw_type: 'dimension',
|
74
71
|
description: ts[:description],
|
75
|
-
incremental:
|
76
|
-
incremental_on: ts
|
72
|
+
incremental: dumper.table_is_incremental?(ts),
|
73
|
+
incremental_on: ts.dig(:incremental, :on),
|
77
74
|
# indexed_columns
|
78
|
-
tableName:
|
75
|
+
tableName: tname,
|
79
76
|
columns: ts[:columns].map do |col|
|
77
|
+
coltype = col[:type]
|
78
|
+
coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
|
80
79
|
{
|
81
80
|
name: col[:warehouse_name],
|
82
81
|
description: col[:description],
|
83
|
-
type:
|
82
|
+
type: coltype,
|
84
83
|
}
|
85
84
|
end,
|
86
85
|
}
|
@@ -92,6 +91,14 @@ module InstDataShipper
|
|
92
91
|
}
|
93
92
|
end
|
94
93
|
|
94
|
+
def table_name(table_def)
|
95
|
+
table_prefix = config[:table_prefix]
|
96
|
+
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
97
|
+
table_name = table_def[:warehouse_name]
|
98
|
+
table_name = table_prefix + table_name if table_prefix.present?
|
99
|
+
table_name
|
100
|
+
end
|
101
|
+
|
95
102
|
def hosted_data_client
|
96
103
|
@hosted_data_client ||= begin
|
97
104
|
token = config[:token]
|
@@ -102,6 +109,8 @@ module InstDataShipper
|
|
102
109
|
host = tok_content['host']
|
103
110
|
end
|
104
111
|
|
112
|
+
host = "https://#{host}" unless host.include?('://')
|
113
|
+
|
105
114
|
Faraday.new(url: host) do |faraday|
|
106
115
|
faraday.request :multipart
|
107
116
|
faraday.request :json
|
@@ -117,14 +126,16 @@ module InstDataShipper
|
|
117
126
|
|
118
127
|
def parse_configuration(uri)
|
119
128
|
super do |parsed_uri, cfg|
|
120
|
-
if parsed_uri.
|
129
|
+
if parsed_uri.user.present?
|
121
130
|
# hosted-data://<JWT>:<hosted_data_domain>
|
122
|
-
cfg[:token] = parsed_uri.
|
131
|
+
cfg[:token] = parsed_uri.user
|
123
132
|
cfg[:host] = parsed_uri.host
|
124
133
|
else
|
125
134
|
# hosted-data://<JWT>
|
126
135
|
cfg[:token] = parsed_uri.host
|
127
136
|
end
|
137
|
+
|
138
|
+
cfg[:table_prefix] = parsed_uri.params[:table_prefix]
|
128
139
|
end
|
129
140
|
end
|
130
141
|
|
@@ -4,7 +4,7 @@ module InstDataShipper
|
|
4
4
|
include Concerns::Chunking
|
5
5
|
|
6
6
|
def chunk_data(generator, table:, extra: nil)
|
7
|
-
warehouse_name =
|
7
|
+
warehouse_name = table[:warehouse_name]
|
8
8
|
|
9
9
|
super(generator) do |batch, idx|
|
10
10
|
bits = [warehouse_name, extra, idx].compact
|
@@ -5,7 +5,7 @@ module InstDataShipper
|
|
5
5
|
define_hook :initialize_dump_batch
|
6
6
|
define_hook :finalize_dump_batch
|
7
7
|
|
8
|
-
def self.perform_dump(destinations
|
8
|
+
def self.perform_dump(destinations)
|
9
9
|
raise "Must subclass Dumper to use perform_dump" if self == Dumper
|
10
10
|
|
11
11
|
dumper = new(destinations)
|
@@ -14,48 +14,134 @@ module InstDataShipper
|
|
14
14
|
dumper.tracker
|
15
15
|
end
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
def self.define(include: [], schema: , &blk)
|
18
|
+
Class.new(self) do
|
19
|
+
include(*include)
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
21
|
+
define_method(:enqueue_tasks, &blk)
|
22
|
+
define_method(:table_schemas) { schema }
|
23
|
+
end
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
raise NotImplementedError
|
28
|
-
end
|
26
|
+
public
|
29
27
|
|
30
28
|
def begin_dump
|
31
29
|
raise "Dump already begun" unless @raw_destinations.present?
|
32
30
|
|
33
|
-
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, status: 'in_progress')
|
31
|
+
@tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
34
32
|
|
35
|
-
|
36
|
-
|
33
|
+
@batch_context = context = {
|
34
|
+
# TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
|
35
|
+
# TODO Consider behavior if last is still running
|
36
|
+
incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
|
37
|
+
}
|
38
|
+
|
39
|
+
begin
|
40
|
+
begin
|
41
|
+
destinations.each do |dest|
|
42
|
+
dest.initialize_dump()
|
43
|
+
end
|
44
|
+
|
45
|
+
run_hook(:initialize_dump_batch, context)
|
46
|
+
ensure
|
47
|
+
@batch_context = nil
|
48
|
+
context[:tracker_id] = tracker.id
|
49
|
+
context[:origin_class] = batch_context[:origin_class] || self.class.to_s
|
50
|
+
context[:destinations] = @raw_destinations
|
51
|
+
end
|
52
|
+
|
53
|
+
Sidekiq::Batch.new.tap do |batch|
|
54
|
+
context[:root_bid] = batch.bid
|
55
|
+
|
56
|
+
batch.description = "HD #{export_genre} Export #{tracker.id} Root"
|
57
|
+
batch.context = context
|
58
|
+
batch.on(:success, "#{self.class}#finalize_dump")
|
59
|
+
batch.on(:death, "#{self.class}#cleanup_fatal_error!")
|
60
|
+
batch.jobs do
|
61
|
+
enqueue_tasks
|
62
|
+
rescue => ex
|
63
|
+
delayed :cleanup_fatal_error!
|
64
|
+
InstDataShipper.handle_suppressed_error(ex)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
rescue => ex
|
68
|
+
if context
|
69
|
+
batch ||= Sidekiq::Batch.new.tap do |batch|
|
70
|
+
batch.description = "HD #{export_genre} Export #{tracker.id} Early Failure Cleanup"
|
71
|
+
batch.context = context
|
72
|
+
batch.jobs do
|
73
|
+
delayed :cleanup_fatal_error!
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
raise ex
|
37
78
|
end
|
79
|
+
end
|
38
80
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
81
|
+
def tracker
|
82
|
+
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
83
|
+
end
|
84
|
+
|
85
|
+
def export_genre
|
86
|
+
self.class.to_s.gsub(/HD|ExportJob/, '')
|
87
|
+
end
|
88
|
+
|
89
|
+
def origin_class
|
90
|
+
batch_context[:origin_class]&.constantize || self.class
|
91
|
+
end
|
92
|
+
|
93
|
+
def table_is_incremental?(table_def)
|
94
|
+
if (inc = table_def[:incremental]).present?
|
95
|
+
differ = inc[:if]
|
96
|
+
return !!incremental_since if differ.nil?
|
97
|
+
|
98
|
+
differ = :"#{differ}".to_proc if differ.is_a?(Symbol)
|
99
|
+
differ = instance_exec(&differ) if differ.is_a?(Proc)
|
100
|
+
return !!differ
|
101
|
+
end
|
102
|
+
|
103
|
+
false
|
104
|
+
end
|
105
|
+
|
106
|
+
def incremental_since
|
107
|
+
batch_context[:incremental_since]
|
108
|
+
end
|
109
|
+
|
110
|
+
def lookup_table_schema(*identifiers)
|
111
|
+
identifiers.compact.each do |ident|
|
112
|
+
if ident.is_a?(Hash)
|
113
|
+
key = ident.keys.first
|
114
|
+
value = ident.values.first
|
115
|
+
else
|
116
|
+
key = :warehouse_name
|
117
|
+
value = ident
|
118
|
+
end
|
119
|
+
|
120
|
+
value = Array(value).compact
|
121
|
+
|
122
|
+
table_schemas.each do |ts|
|
123
|
+
return ts if value.include?(ts[key])
|
55
124
|
end
|
56
125
|
end
|
57
126
|
|
58
|
-
|
127
|
+
nil
|
128
|
+
end
|
129
|
+
|
130
|
+
def lookup_table_schema!(*identifiers)
|
131
|
+
lookup_table_schema(*identifiers) || raise("No table schema found for #{identifiers.inspect}")
|
132
|
+
end
|
133
|
+
|
134
|
+
protected
|
135
|
+
|
136
|
+
attr_reader :executor
|
137
|
+
|
138
|
+
def initialize(destinations = nil, executor: nil)
|
139
|
+
@raw_destinations = Array(destinations)
|
140
|
+
@executor = executor
|
141
|
+
end
|
142
|
+
|
143
|
+
def enqueue_tasks
|
144
|
+
raise NotImplementedError
|
59
145
|
end
|
60
146
|
|
61
147
|
def upload_data(table_def, extra: nil, &datagen)
|
@@ -96,7 +182,7 @@ module InstDataShipper
|
|
96
182
|
def finalize_dump(_status, _opts)
|
97
183
|
run_hook(:finalize_dump_batch)
|
98
184
|
|
99
|
-
|
185
|
+
destinations.each do |dest|
|
100
186
|
dest.finalize_dump
|
101
187
|
end
|
102
188
|
|
@@ -108,14 +194,15 @@ module InstDataShipper
|
|
108
194
|
|
109
195
|
run_hook(:finalize_dump_batch)
|
110
196
|
|
111
|
-
|
197
|
+
destinations.each do |dest|
|
112
198
|
dest.cleanup_fatal_error
|
113
|
-
rescue
|
199
|
+
rescue => ex
|
200
|
+
InstDataShipper.handle_suppressed_error(ex)
|
114
201
|
end
|
115
202
|
|
116
203
|
DumpBatch.find(batch_context[:tracker_id]).update(status: 'failed')
|
117
204
|
|
118
|
-
CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid])
|
205
|
+
CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid]) if batch_context[:root_bid].present?
|
119
206
|
end
|
120
207
|
|
121
208
|
# Helper Methods
|
@@ -126,23 +213,17 @@ module InstDataShipper
|
|
126
213
|
end
|
127
214
|
|
128
215
|
def delayed(mthd, *args, **kwargs)
|
129
|
-
AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
130
|
-
end
|
131
|
-
|
132
|
-
def tracker
|
133
|
-
@tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
|
216
|
+
Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
|
134
217
|
end
|
135
218
|
|
136
|
-
|
137
|
-
self.class.to_s.gsub(/HD|ExportJob/, '')
|
138
|
-
end
|
219
|
+
delegate :working_dir, to: :executor
|
139
220
|
|
140
|
-
def
|
141
|
-
|
221
|
+
def batch
|
222
|
+
Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
|
142
223
|
end
|
143
224
|
|
144
|
-
def
|
145
|
-
|
225
|
+
def batch_context
|
226
|
+
@batch_context || batch&.context || {}
|
146
227
|
end
|
147
228
|
|
148
229
|
def destinations_for_table(table_def)
|
@@ -150,7 +231,7 @@ module InstDataShipper
|
|
150
231
|
end
|
151
232
|
|
152
233
|
def destinations
|
153
|
-
@destinations ||= (@raw_destinations || batch_context[:destinations]).map.with_index do |dest, i|
|
234
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
154
235
|
dcls = InstDataShipper.resolve_destination(dest)
|
155
236
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
156
237
|
end
|
@@ -4,5 +4,11 @@ module InstDataShipper
|
|
4
4
|
class Engine < ::Rails::Engine
|
5
5
|
isolate_namespace InstDataShipper
|
6
6
|
|
7
|
+
initializer :append_migrations do |app|
|
8
|
+
config.paths["db/migrate"].expanded.each do |expanded_path|
|
9
|
+
app.config.paths["db/migrate"] << expanded_path
|
10
|
+
end
|
11
|
+
ActiveRecord::Migrator.migrations_paths = Rails.application.paths['db/migrate'].to_a
|
12
|
+
end
|
7
13
|
end
|
8
14
|
end
|
@@ -1,7 +1,14 @@
|
|
1
|
+
|
2
|
+
require "sidekiq"
|
3
|
+
|
1
4
|
module InstDataShipper
|
2
5
|
module Jobs
|
3
6
|
class AsyncCaller < InstDataShipper::Jobs::Base
|
4
|
-
sidekiq_options
|
7
|
+
sidekiq_options(retry: 0) if defined?(sidekiq_options)
|
8
|
+
|
9
|
+
def self.get_sidekiq_options
|
10
|
+
{ retry: 0 }
|
11
|
+
end
|
5
12
|
|
6
13
|
def self.call_from_pool(pool, clazz, method, *args, **kwargs)
|
7
14
|
pool.add_job(
|
@@ -12,7 +19,8 @@ module InstDataShipper
|
|
12
19
|
end
|
13
20
|
|
14
21
|
def perform(clazz, method, *args, **kwargs)
|
15
|
-
clazz.constantize
|
22
|
+
clazz = clazz.constantize if clazz.is_a?(String)
|
23
|
+
clazz.new(executor: self).send(method.to_sym, *args, **kwargs)
|
16
24
|
end
|
17
25
|
end
|
18
26
|
end
|
@@ -3,9 +3,13 @@ module InstDataShipper
|
|
3
3
|
class BasicDumpJob < InstDataShipper::Jobs::Base
|
4
4
|
sidekiq_options retry: 3 if defined?(sidekiq_options)
|
5
5
|
|
6
|
-
def perform(endpoints
|
7
|
-
|
6
|
+
def perform(endpoints)
|
7
|
+
|
8
8
|
end
|
9
|
+
|
10
|
+
protected
|
11
|
+
|
12
|
+
|
9
13
|
end
|
10
14
|
end
|
11
15
|
end
|
@@ -1,4 +1,6 @@
|
|
1
1
|
module InstDataShipper
|
2
|
+
# This class ends up fill two roles - Schema and Mapping.
|
3
|
+
# It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
|
2
4
|
class SchemaBuilder
|
3
5
|
attr_reader :tables
|
4
6
|
|
@@ -12,24 +14,40 @@ module InstDataShipper
|
|
12
14
|
builder.tables
|
13
15
|
end
|
14
16
|
|
15
|
-
def
|
16
|
-
|
17
|
-
|
17
|
+
def extend_table_builder(&block)
|
18
|
+
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
19
|
+
@table_builder_class.class_eval(&block)
|
20
|
+
end
|
18
21
|
|
22
|
+
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
19
23
|
tdef = {
|
24
|
+
warehouse_name: nil,
|
20
25
|
description: description,
|
21
|
-
model: model_or_name.is_a?(String) ? nil : model_or_name,
|
22
|
-
warehouse_name: as.to_s,
|
23
|
-
incremental: incremental,
|
24
26
|
columns: [],
|
25
|
-
|
27
|
+
|
28
|
+
model: model,
|
29
|
+
query: query,
|
30
|
+
**extra,
|
26
31
|
}
|
27
32
|
|
28
|
-
|
33
|
+
if model_or_name.is_a?(ActiveRecord::Relation)
|
34
|
+
raise "model specified twice" if model.present?
|
35
|
+
raise "query specified twice" if query.present?
|
36
|
+
|
37
|
+
tdef[:query] = model_or_name
|
38
|
+
tdef[:model] = model_or_name.model
|
39
|
+
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
40
|
+
tdef[:warehouse_name] = model_or_name.table_name
|
41
|
+
tdef[:model] = model_or_name
|
42
|
+
else
|
43
|
+
tdef[:warehouse_name] = model_or_name
|
44
|
+
end
|
45
|
+
|
46
|
+
@table_builder_class.build(tdef, &block)
|
29
47
|
|
30
48
|
@tables << tdef
|
31
49
|
|
32
|
-
|
50
|
+
tdef
|
33
51
|
end
|
34
52
|
|
35
53
|
class TableSchemaBuilder
|
@@ -46,48 +64,82 @@ module InstDataShipper
|
|
46
64
|
builder.columns
|
47
65
|
end
|
48
66
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
|
67
|
+
# def annotate(key, value)
|
68
|
+
# options[key] = value
|
69
|
+
# end
|
70
|
+
|
71
|
+
def incremental(scope="updated_at", **kwargs)
|
72
|
+
if (extras = kwargs.keys - %i[on if]).present?
|
73
|
+
raise ArgumentError, "Unsuppored options: #{extras.inspect}"
|
74
|
+
end
|
75
|
+
|
76
|
+
options[:incremental] = {
|
77
|
+
on: Array(kwargs[:on]),
|
78
|
+
scope: scope,
|
79
|
+
if: kwargs[:if],
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
def column(name, *args, refs: [], from: nil, **extra, &block)
|
84
|
+
from ||= name.to_s
|
53
85
|
|
54
86
|
cdef = {
|
55
|
-
|
56
|
-
|
57
|
-
|
87
|
+
warehouse_name: name.to_s,
|
88
|
+
from: from,
|
89
|
+
**extra,
|
58
90
|
}
|
59
91
|
|
60
|
-
[
|
61
|
-
|
62
|
-
k.each do |hk, hv|
|
63
|
-
cdef[hv] = kwargs.delete(hk) if kwargs.key?(hk)
|
64
|
-
end
|
65
|
-
elsif kwargs.key?(k)
|
66
|
-
cdef[k] = kwargs.delete(k)
|
67
|
-
end
|
92
|
+
if args[0].is_a?(Symbol)
|
93
|
+
cdef[:type] = args.shift()
|
68
94
|
end
|
69
95
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
k = (a.is_a?(String) && :description) || (a.is_a?(Symbol) && :type) || nil
|
74
|
-
raise ArgumentError, 'Unsupported Argument' if k.nil?
|
75
|
-
raise ArgumentError, "Duplicate Argument for #{k}" if cdef.key?(k)
|
96
|
+
if args[0].is_a?(String)
|
97
|
+
cdef[:description] = args.shift()
|
98
|
+
end
|
76
99
|
|
77
|
-
|
100
|
+
if args.present?
|
101
|
+
raise ArgumentError, "Received unexpected arguments: #{args.inspect}"
|
78
102
|
end
|
79
103
|
|
104
|
+
cdef[:references] = Array(refs)
|
105
|
+
|
80
106
|
if options[:model].is_a?(Class) && cdef[:local_name].to_s.ends_with?('_id')
|
81
107
|
rel_name = cdef[:local_name].to_s[0...-3]
|
82
108
|
refl = options[:model].reflections[rel_name]
|
83
109
|
cdef[:references] << "#{refl.klass}##{refl.options[:primary_key] || 'id'}" if refl.present? && !refl.polymorphic?
|
84
110
|
end
|
85
111
|
|
112
|
+
compiled_from = compile_transformer(from)
|
113
|
+
|
114
|
+
cdef[:block] = ->(row) {
|
115
|
+
value = instance_exec(row, &compiled_from)
|
116
|
+
value = instance_exec(value, row, &block) if block.present?
|
117
|
+
value
|
118
|
+
}
|
119
|
+
|
86
120
|
@columns << cdef
|
87
121
|
|
88
|
-
|
122
|
+
cdef
|
89
123
|
end
|
90
|
-
|
124
|
+
|
125
|
+
protected
|
126
|
+
|
127
|
+
def compile_transformer(from)
|
128
|
+
if from.present?
|
129
|
+
if from.is_a?(Symbol)
|
130
|
+
->(row) { row.send(from) }
|
131
|
+
elsif from.is_a?(Proc)
|
132
|
+
from
|
133
|
+
elsif from.is_a?(String)
|
134
|
+
->(row) { row[from] }
|
135
|
+
else
|
136
|
+
raise ArgumentError, "Invalid transformer: #{from.inspect}"
|
137
|
+
end
|
138
|
+
else
|
139
|
+
->(row) { row }
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
91
143
|
end
|
92
144
|
end
|
93
145
|
end
|
data/lib/inst_data_shipper.rb
CHANGED
@@ -23,13 +23,20 @@ module InstDataShipper
|
|
23
23
|
destination = @destination_aliases[type]
|
24
24
|
end
|
25
25
|
|
26
|
-
|
26
|
+
destination.constantize
|
27
27
|
end
|
28
28
|
|
29
29
|
def start_basic_dump(*args, **kwargs, &block)
|
30
30
|
BasicDumper.perform_dump(*args, **kwargs, &block)
|
31
31
|
end
|
32
32
|
|
33
|
+
def handle_suppressed_error(ex)
|
34
|
+
logger.error "Suppressed Error: #{ex.message}"
|
35
|
+
logger.error ex.backtrace.join("\n")
|
36
|
+
Raven.capture_exception(ex) if defined?(Raven)
|
37
|
+
Sentry.capture_exception(ex) if defined?(Sentry)
|
38
|
+
end
|
39
|
+
|
33
40
|
def logger
|
34
41
|
return @logger if defined? @logger
|
35
42
|
@logger = Logger.new(STDOUT)
|
@@ -66,6 +73,8 @@ Dir[File.dirname(__FILE__) + "/inst_data_shipper/destinations/*.rb"].each do |fi
|
|
66
73
|
basename = File.basename(file, ".rb")
|
67
74
|
next if basename == "base"
|
68
75
|
|
69
|
-
InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.
|
76
|
+
InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.camelize}")
|
70
77
|
end
|
71
78
|
|
79
|
+
require "inst_data_shipper/dumper"
|
80
|
+
require "inst_data_shipper/basic_dumper"
|
data/spec/spec_helper.rb
CHANGED
@@ -7,7 +7,7 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__)
|
|
7
7
|
require "bundler/setup"
|
8
8
|
require 'rspec/rails'
|
9
9
|
require 'spec_helper'
|
10
|
-
require '
|
10
|
+
require 'factory_bot_rails'
|
11
11
|
require 'timecop'
|
12
12
|
require 'webmock/rspec'
|
13
13
|
require 'support/fake_canvas'
|
@@ -29,7 +29,7 @@ ActiveRecord::Migration.maintain_test_schema!
|
|
29
29
|
RSpec.configure do |config|
|
30
30
|
config.extend WithModel
|
31
31
|
|
32
|
-
config.include
|
32
|
+
config.include FactoryBot::Syntax::Methods
|
33
33
|
config.use_transactional_fixtures = true
|
34
34
|
config.infer_spec_type_from_file_location!
|
35
35
|
config.filter_rails_from_backtrace!
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.0.
|
4
|
+
version: 0.1.0.beta2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '6.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '6.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
@@ -360,6 +360,20 @@ dependencies:
|
|
360
360
|
- - ">="
|
361
361
|
- !ruby/object:Gem::Version
|
362
362
|
version: '0'
|
363
|
+
- !ruby/object:Gem::Dependency
|
364
|
+
name: faraday_middleware
|
365
|
+
requirement: !ruby/object:Gem::Requirement
|
366
|
+
requirements:
|
367
|
+
- - ">="
|
368
|
+
- !ruby/object:Gem::Version
|
369
|
+
version: '0'
|
370
|
+
type: :runtime
|
371
|
+
prerelease: false
|
372
|
+
version_requirements: !ruby/object:Gem::Requirement
|
373
|
+
requirements:
|
374
|
+
- - ">="
|
375
|
+
- !ruby/object:Gem::Version
|
376
|
+
version: '0'
|
363
377
|
description:
|
364
378
|
email:
|
365
379
|
- pseng@instructure.com
|
@@ -369,8 +383,8 @@ extra_rdoc_files: []
|
|
369
383
|
files:
|
370
384
|
- README.md
|
371
385
|
- Rakefile
|
372
|
-
- app/models/
|
373
|
-
- db/migrate/
|
386
|
+
- app/models/inst_data_shipper/dump_batch.rb
|
387
|
+
- db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb
|
374
388
|
- lib/inst_data_shipper.rb
|
375
389
|
- lib/inst_data_shipper/basic_dumper.rb
|
376
390
|
- lib/inst_data_shipper/concerns/hooks.rb
|
File without changes
|