inst_data_shipper 0.1.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +35 -0
- data/Rakefile +21 -0
- data/app/models/hosted_data_dumper/dump_batch.rb +10 -0
- data/db/migrate/20240301090836_create_canvas_sync_sync_batches.rb +17 -0
- data/lib/inst_data_shipper/basic_dumper.rb +27 -0
- data/lib/inst_data_shipper/concerns/hooks.rb +32 -0
- data/lib/inst_data_shipper/data_sources/base.rb +7 -0
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +113 -0
- data/lib/inst_data_shipper/data_sources/local_tables.rb +33 -0
- data/lib/inst_data_shipper/destinations/base.rb +104 -0
- data/lib/inst_data_shipper/destinations/concerns/chunking.rb +34 -0
- data/lib/inst_data_shipper/destinations/hosted_data.rb +133 -0
- data/lib/inst_data_shipper/destinations/s3.rb +72 -0
- data/lib/inst_data_shipper/dumper.rb +159 -0
- data/lib/inst_data_shipper/engine.rb +8 -0
- data/lib/inst_data_shipper/jobs/async_caller.rb +19 -0
- data/lib/inst_data_shipper/jobs/base.rb +27 -0
- data/lib/inst_data_shipper/jobs/basic_dump_job.rb +11 -0
- data/lib/inst_data_shipper/record.rb +6 -0
- data/lib/inst_data_shipper/schema_builder.rb +93 -0
- data/lib/inst_data_shipper/version.rb +3 -0
- data/lib/inst_data_shipper.rb +71 -0
- data/spec/dummy/README.rdoc +1 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/config/application.rb +37 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +41 -0
- data/spec/dummy/config/environments/test.rb +44 -0
- data/spec/dummy/config/initializers/assets.rb +11 -0
- data/spec/dummy/config/initializers/session_store.rb +3 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/routes.rb +2 -0
- data/spec/dummy/config/secrets.yml +22 -0
- data/spec/dummy/config.ru +4 -0
- data/spec/dummy/db/schema.rb +45 -0
- data/spec/spec_helper.rb +70 -0
- data/spec/support/fixtures/reports/provisioning_csv_unzipped/courses.csv +3 -0
- data/spec/support/fixtures/reports/provisioning_csv_unzipped/users.csv +4 -0
- metadata +452 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
|
4
|
+
data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
|
7
|
+
data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# InstDataShipper
|
2
|
+
|
3
|
+
This gem is intended to facilitate fast and easy syncing of Canvas data.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'inst_data_shipper'
|
11
|
+
```
|
12
|
+
|
13
|
+
Then run the migrations:
|
14
|
+
|
15
|
+
```
|
16
|
+
bundle exec rake db:migrate
|
17
|
+
```
|
18
|
+
|
19
|
+
## Development
|
20
|
+
|
21
|
+
When adding to or updating this gem, make sure you do the following:
|
22
|
+
|
23
|
+
- Update the yardoc comments where necessary, and confirm the changes by running `yardoc --server`
|
24
|
+
- Write specs
|
25
|
+
- If you modify the model or migration templates, run `bundle exec rake update_test_schema` to update them in the Rails Dummy application (and commit those changes)
|
26
|
+
|
27
|
+
## Docs
|
28
|
+
|
29
|
+
Docs can be generated using [yard](https://yardoc.org/). To view the docs:
|
30
|
+
|
31
|
+
- Clone this gem's repository
|
32
|
+
- `bundle install`
|
33
|
+
- `yard server --reload`
|
34
|
+
|
35
|
+
The yard server will give you a URL you can visit to view the docs.
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rspec/core/rake_task"
|
3
|
+
require "open3"
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
|
7
|
+
task default: :spec
|
8
|
+
|
9
|
+
desc 'This updates the migrations used by the testing Rails Dummy app and should be run whenever those are updated.'
|
10
|
+
task :update_test_schema do
|
11
|
+
puts "Updating the test database and schema..."
|
12
|
+
stream_command("cd spec/dummy; bundle exec rake db:drop; bundle exec rake db:create; bundle exec rake db:migrate")
|
13
|
+
end
|
14
|
+
|
15
|
+
def stream_command(cmd)
|
16
|
+
Open3.popen2e(cmd) do |stdin, stdout_stderr, wait_thr|
|
17
|
+
while line = stdout_stderr.gets
|
18
|
+
puts line
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
|
2
|
+
def change
|
3
|
+
create_table :inst_data_shipper_dump_batches do |t|
|
4
|
+
t.datetime :started_at
|
5
|
+
t.datetime :completed_at
|
6
|
+
t.string :status
|
7
|
+
|
8
|
+
t.string :job_class
|
9
|
+
t.string :exception
|
10
|
+
t.text :backtrace
|
11
|
+
t.text :metadata
|
12
|
+
t.text :job_arguments
|
13
|
+
|
14
|
+
t.timestamps
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
class BasicDumper < Dumper
|
3
|
+
def self.perform_dump(destinations:, schema:, &block)
|
4
|
+
raise "Schema must be a constantizable string" unless schema.is_a?(String)
|
5
|
+
|
6
|
+
dumper = new(destinations)
|
7
|
+
dumper.instance_variable_set(:@schema_pointer, schema)
|
8
|
+
dumper.instance_variable_set(:@body_block, block)
|
9
|
+
dumper.begin_dump
|
10
|
+
|
11
|
+
dumper.tracker
|
12
|
+
end
|
13
|
+
|
14
|
+
hook :initialize_dump_batch do |context|
|
15
|
+
context[:schema_pointer] = @schema_pointer
|
16
|
+
end
|
17
|
+
|
18
|
+
def enqueue_tasks
|
19
|
+
instance_exec(&@body_block)
|
20
|
+
end
|
21
|
+
|
22
|
+
def table_schemas
|
23
|
+
pointer = @schema_pointer || batch_context[:schema_pointer]
|
24
|
+
safe_constantize(pointer)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Hooks
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
class_methods do
|
6
|
+
def define_hook(name)
|
7
|
+
@hooks ||= {}
|
8
|
+
@hooks[name] ||= []
|
9
|
+
end
|
10
|
+
|
11
|
+
def hook(name, prepend: false, &block)
|
12
|
+
hooks = @hooks[name]
|
13
|
+
prepend ? hooks.unshift(block) : hooks << block
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def run_hook(name, *args, **kwargs)
|
18
|
+
hooks = @hooks[name]
|
19
|
+
hooks.each do |blk|
|
20
|
+
instance_exec(*args, **kwargs, &blk)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_hook_safe(name, *args, **kwargs)
|
25
|
+
hooks = @hooks[name]
|
26
|
+
hooks.each do |blk|
|
27
|
+
instance_exec(*args, **kwargs, &blk)
|
28
|
+
rescue StandardError
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module DataSources
|
3
|
+
# This module contains the logic for processing Canvas reports
|
4
|
+
module CanvasReports
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
included do
|
8
|
+
hook :initialize_dump_batch do |context|
|
9
|
+
report_processor_pool = CanvasSync::JobBatches::Pool.new(
|
10
|
+
description: "HD #{export_genre} Export #{tracker.id} Canvas Report Pool",
|
11
|
+
concurrency: 4,
|
12
|
+
clean_when_empty: false,
|
13
|
+
)
|
14
|
+
context[:report_processor_pool] = report_processor_pool.pid
|
15
|
+
end
|
16
|
+
|
17
|
+
hook :finalize_dump_batch do
|
18
|
+
if batch_context[:report_processor_pool]
|
19
|
+
CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool]).cleanup_redis
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
public
|
25
|
+
|
26
|
+
def import_canvas_report(*args, **kwargs)
|
27
|
+
_in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
|
28
|
+
end
|
29
|
+
|
30
|
+
def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
|
31
|
+
term_ids = (terms || []).map do |term|
|
32
|
+
term.is_a?(Term) ? term.canvas_id : term
|
33
|
+
end
|
34
|
+
|
35
|
+
Sidekiq::Batch.new.tap do |b|
|
36
|
+
b.description = "Term Scoped #{report_name} Runners"
|
37
|
+
b.context = {
|
38
|
+
report_bid: b.bid,
|
39
|
+
}
|
40
|
+
b.jobs do
|
41
|
+
terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
|
42
|
+
terms_query.find_each do |t|
|
43
|
+
import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def import_existing_report(table, report)
|
50
|
+
delayed(:_process_canvas_report, table, report: report)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
|
56
|
+
report = canvas_sync_client.start_report(
|
57
|
+
'self', report_name,
|
58
|
+
parameters: params,
|
59
|
+
)
|
60
|
+
|
61
|
+
CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
|
62
|
+
"/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
|
63
|
+
{
|
64
|
+
instance_of: origin_class,
|
65
|
+
method: :_process_canvas_report,
|
66
|
+
args: [target_table],
|
67
|
+
kwargs: kwargs,
|
68
|
+
},
|
69
|
+
on_failure: {
|
70
|
+
instance_of: origin_class,
|
71
|
+
method: :_handle_failed_canvas_report,
|
72
|
+
args: [target_table, report_name, kwargs],
|
73
|
+
kwargs: { retry_count: retry_count },
|
74
|
+
},
|
75
|
+
status_key: :status,
|
76
|
+
progress_as: :report,
|
77
|
+
)
|
78
|
+
end
|
79
|
+
|
80
|
+
def _in_canvas_report_pool(mthd, *args, **kwargs)
|
81
|
+
pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
|
82
|
+
AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
|
83
|
+
end
|
84
|
+
|
85
|
+
def _process_canvas_report(table, report:)
|
86
|
+
table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
|
87
|
+
|
88
|
+
IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
|
89
|
+
|
90
|
+
inner_block = ->(file) {
|
91
|
+
CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
|
92
|
+
file << table_def[:columns].map do |c|
|
93
|
+
c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
}
|
97
|
+
|
98
|
+
upload_data(table_def, extra: report['id'], &inner_block)
|
99
|
+
end
|
100
|
+
|
101
|
+
def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
|
102
|
+
if retry_count.positive?
|
103
|
+
tbid = batch_context[:report_bid] || batch_context[:root_bid]
|
104
|
+
Sidekiq::Batch.new(tbid).jobs do
|
105
|
+
import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
|
106
|
+
end
|
107
|
+
else
|
108
|
+
cleanup_fatal_error!
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module DataSources
|
3
|
+
# This module contains the logic for processing local AR tables
|
4
|
+
module LocalTables
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
public
|
8
|
+
|
9
|
+
def import_local_table(*args, **kwargs)
|
10
|
+
delayed(:_import_local_table, *args, **kwargs)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def _import_local_table(table_name)
|
16
|
+
table_def = table_schemas.find { |t| t[:model].to_s == table_name }
|
17
|
+
model = table_def[:model]
|
18
|
+
|
19
|
+
inner_block = ->(file) {
|
20
|
+
query = model
|
21
|
+
query = query.includes(table_def[:includes]) if table_def[:includes].present?
|
22
|
+
model.find_each do |m|
|
23
|
+
file << table_def[:columns].map do |c|
|
24
|
+
c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
}
|
28
|
+
|
29
|
+
upload_data(table_def, &inner_block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class Base
|
4
|
+
attr_reader :dumper
|
5
|
+
|
6
|
+
delegate :tracker, :table_schemas, :working_dir, to: :dumper
|
7
|
+
|
8
|
+
def initialize(cache_key, config, dumper)
|
9
|
+
@cache_key = cache_key
|
10
|
+
@_config = config
|
11
|
+
@dumper = dumper
|
12
|
+
end
|
13
|
+
|
14
|
+
# This method is called before processing any data.
|
15
|
+
# It should be used to initialize any external resources needed for the dump.
|
16
|
+
def initialize_dump; end
|
17
|
+
|
18
|
+
# Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
|
19
|
+
#
|
20
|
+
# If multiple Destinations have the same `group_key`, `chunk_data` will only be called on the first and the chunk will be passed to each destination.
|
21
|
+
# Thus, if chunking is config-dependent, your Destination must modify the `group_key` to be unique for each configuration.
|
22
|
+
#
|
23
|
+
# This must be overridden, but you may call super with a block to iterate individual rows. Manually batch the rows, or include Concerns::Chunking to pre-batch them.
|
24
|
+
def chunk_data(generator, **kwargs)
|
25
|
+
raise NotImplementedError if method(__method__).owner == Base
|
26
|
+
|
27
|
+
enum = Enumerator.new(&generator)
|
28
|
+
enum.each do |row|
|
29
|
+
yield format_row(row)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Called with any values yielded from chunk_data.
|
34
|
+
# This method should upload the chunk to the destination.
|
35
|
+
def upload_data_chunk(table_def, chunk)
|
36
|
+
raise NotImplementedError
|
37
|
+
end
|
38
|
+
|
39
|
+
# This method is called after processing all data.
|
40
|
+
# It should be used to finalize any external resources created by the dump.
|
41
|
+
def finalize_dump; end
|
42
|
+
|
43
|
+
# This method is called if a fatal error occurs.
|
44
|
+
# It should cleanup any external resources created by the dump.
|
45
|
+
def cleanup_fatal_error; end
|
46
|
+
|
47
|
+
def config
|
48
|
+
return @_config if @_config.is_a?(Hash)
|
49
|
+
@config ||= parse_configuration(@_config)
|
50
|
+
end
|
51
|
+
|
52
|
+
def user_config
|
53
|
+
config[:extra]
|
54
|
+
end
|
55
|
+
|
56
|
+
def group_key
|
57
|
+
{ class: self.class }
|
58
|
+
end
|
59
|
+
|
60
|
+
protected
|
61
|
+
|
62
|
+
def parse_configuration(uri)
|
63
|
+
if block_given?
|
64
|
+
parsed = URI.parse(uri)
|
65
|
+
cfg = {
|
66
|
+
params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
|
67
|
+
extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
|
68
|
+
}
|
69
|
+
yield parsed, cfg
|
70
|
+
cfg
|
71
|
+
else
|
72
|
+
raise NotImplementedError
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def rk(key)
|
77
|
+
"#{@cache_key}:#{key}"
|
78
|
+
end
|
79
|
+
|
80
|
+
def redis(*args, &blk)
|
81
|
+
InstDataShipper.redis(*args, &blk)
|
82
|
+
end
|
83
|
+
|
84
|
+
# This is a base/generic implementation and may need to be overridden
|
85
|
+
def format_row(row, override_nils: true)
|
86
|
+
if row.is_a?(Array)
|
87
|
+
row = row.map do |v|
|
88
|
+
v = '\N' if v.nil? && override_nils
|
89
|
+
v = v.utc.strftime('%Y-%m-%d %H:%M:%S') if v.is_a?(DateTime) || v.is_a?(Time)
|
90
|
+
v = v.strftime('%Y-%m-%d') if v.is_a?(Date)
|
91
|
+
v = JSON.dump(v) if v.is_a?(Hash) || v.is_a?(Array)
|
92
|
+
if v.is_a?(String)
|
93
|
+
v = v.gsub("\t", '\t')
|
94
|
+
v = v.gsub("\n", '\n')
|
95
|
+
end
|
96
|
+
v
|
97
|
+
end
|
98
|
+
end
|
99
|
+
row
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
module Concerns
|
4
|
+
module Chunking
|
5
|
+
extend ActiveSupport::Concern
|
6
|
+
|
7
|
+
DEFAULT_CHUNK_SIZE = 100_000
|
8
|
+
|
9
|
+
def chunk_data(generator, chunk_size: nil, **kwargs)
|
10
|
+
chunk_size ||= config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
11
|
+
slice = 1
|
12
|
+
|
13
|
+
btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
|
14
|
+
yield batch, slice
|
15
|
+
slice += 1
|
16
|
+
end
|
17
|
+
|
18
|
+
super(generator, **kwargs) do |row|
|
19
|
+
btchr << row
|
20
|
+
end
|
21
|
+
|
22
|
+
btchr.flush
|
23
|
+
end
|
24
|
+
|
25
|
+
def group_key
|
26
|
+
super.tap do |k|
|
27
|
+
k[:chunk_size] = config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class HostedData < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def initialize_dump
|
7
|
+
dump = hosted_data_client.post(
|
8
|
+
'api/v1/custom_dumps/',
|
9
|
+
reference_id: tracker.id,
|
10
|
+
schema: convert_schema,
|
11
|
+
).body.with_indifferent_access
|
12
|
+
|
13
|
+
redis.hset(rk(:state), :dump_id, dump[:id])
|
14
|
+
redis.expire(rk(:state), 30.days.to_i)
|
15
|
+
end
|
16
|
+
|
17
|
+
def chunk_data(generator, table:, extra: nil)
|
18
|
+
warehouse_name = table_def[:warehouse_name]
|
19
|
+
|
20
|
+
super(generator) do |batch, idx|
|
21
|
+
bits = [warehouse_name, extra, idx].compact
|
22
|
+
temp_file = "#{working_dir}/#{bits.join('.')}.tsv.gz"
|
23
|
+
|
24
|
+
Zlib::GzipWriter.open(temp_file) do |gz|
|
25
|
+
batch.each do |row|
|
26
|
+
row = row.join("\t") if row.is_a?(Array)
|
27
|
+
gz.puts(row)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
yield temp_file
|
32
|
+
|
33
|
+
File.delete(temp_file)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def upload_data_chunk(table_def, chunk)
|
38
|
+
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
|
39
|
+
table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
|
40
|
+
})
|
41
|
+
end
|
42
|
+
|
43
|
+
def finalize_dump
|
44
|
+
hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
|
45
|
+
redis.delete(rk(:state))
|
46
|
+
end
|
47
|
+
|
48
|
+
def cleanup_fatal_error
|
49
|
+
hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
|
50
|
+
redis.delete(rk(:state))
|
51
|
+
end
|
52
|
+
|
53
|
+
# TODO Support/allow single-table fatal errors?
|
54
|
+
|
55
|
+
protected
|
56
|
+
|
57
|
+
def hd_dump_id
|
58
|
+
@hd_dump_id ||= redis.hget(rk(:state), :dump_id)
|
59
|
+
end
|
60
|
+
|
61
|
+
def convert_schema
|
62
|
+
table_prefix = config[:table_prefix]
|
63
|
+
table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
|
64
|
+
|
65
|
+
definititions = {}
|
66
|
+
table_schemas.each do |ts|
|
67
|
+
ts = ts.dup
|
68
|
+
|
69
|
+
table_name = ts[:warehouse_name]
|
70
|
+
table_name = table_prefix + table_name if table_prefix.present?
|
71
|
+
|
72
|
+
definititions[ts[:warehouse_name]] = {
|
73
|
+
dw_type: 'dimension',
|
74
|
+
description: ts[:description],
|
75
|
+
incremental: !!ts[:incremental],
|
76
|
+
incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
|
77
|
+
# indexed_columns
|
78
|
+
tableName: table_name,
|
79
|
+
columns: ts[:columns].map do |col|
|
80
|
+
{
|
81
|
+
name: col[:warehouse_name],
|
82
|
+
description: col[:description],
|
83
|
+
type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
|
84
|
+
}
|
85
|
+
end,
|
86
|
+
}
|
87
|
+
end
|
88
|
+
|
89
|
+
{
|
90
|
+
version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
|
91
|
+
definition: definititions,
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def hosted_data_client
|
96
|
+
@hosted_data_client ||= begin
|
97
|
+
token = config[:token]
|
98
|
+
|
99
|
+
host = config[:host]
|
100
|
+
unless host.present?
|
101
|
+
tok_content = JWT.decode(token, nil, false).first
|
102
|
+
host = tok_content['host']
|
103
|
+
end
|
104
|
+
|
105
|
+
Faraday.new(url: host) do |faraday|
|
106
|
+
faraday.request :multipart
|
107
|
+
faraday.request :json
|
108
|
+
faraday.response :raise_error
|
109
|
+
faraday.response :follow_redirects
|
110
|
+
faraday.response :json, :content_type => /\bjson$/
|
111
|
+
faraday.headers[:accept] = 'application/json'
|
112
|
+
faraday.headers[:authorization] = "Bearer #{token}"
|
113
|
+
faraday.adapter Faraday.default_adapter
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def parse_configuration(uri)
|
119
|
+
super do |parsed_uri, cfg|
|
120
|
+
if parsed_uri.username.present?
|
121
|
+
# hosted-data://<JWT>:<hosted_data_domain>
|
122
|
+
cfg[:token] = parsed_uri.username
|
123
|
+
cfg[:host] = parsed_uri.host
|
124
|
+
else
|
125
|
+
# hosted-data://<JWT>
|
126
|
+
cfg[:token] = parsed_uri.host
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class S3 < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def chunk_data(generator, table:, extra: nil)
|
7
|
+
warehouse_name = table_def[:warehouse_name]
|
8
|
+
|
9
|
+
super(generator) do |batch, idx|
|
10
|
+
bits = [warehouse_name, extra, idx].compact
|
11
|
+
temp_file = "#{working_dir}/#{bits.join('.')}.csv"
|
12
|
+
|
13
|
+
CSV.open(temp_file, 'w', headers: false) do |row|
|
14
|
+
row << table[:columns].map { |c| c[:warehouse_name] }
|
15
|
+
batch.each do |batch_row|
|
16
|
+
row << batch_row
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
yield temp_file
|
21
|
+
|
22
|
+
File.delete(temp_file)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def upload_data_chunk(table_def, chunk)
|
27
|
+
s3 = Aws::S3::Resource.new(client: aws_client)
|
28
|
+
dir_key = tracker.created_at.strftime("%Y-%m-%dT%H:%M") + "_#{tracker.id}"
|
29
|
+
bucket = s3.bucket(config[:bucket])
|
30
|
+
|
31
|
+
subpath = config[:path].presence || "/"
|
32
|
+
subpath = subpath[1..-1] if subpath.starts_with?("/")
|
33
|
+
subpath = "instructure" unless subpath.present?
|
34
|
+
|
35
|
+
obj_path = File.join(config[:path], dir_key, File.basename(chunk))
|
36
|
+
object = bucket.object(obj_path)
|
37
|
+
|
38
|
+
File.open(chunk, 'rb') do |file|
|
39
|
+
object.put(body: file)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
protected
|
44
|
+
|
45
|
+
def aws_client
|
46
|
+
@aws_client ||= Aws::S3::Client.new(
|
47
|
+
region: config[:region],
|
48
|
+
credentials: Aws::Credentials.new(
|
49
|
+
config[:access_key_id],
|
50
|
+
config[:access_key_secret],
|
51
|
+
)
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_configuration(uri)
|
56
|
+
# s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>
|
57
|
+
super do |parsed_uri, cfg|
|
58
|
+
split_path = parsed_uri.path.split('/')
|
59
|
+
|
60
|
+
cfg.merge!({
|
61
|
+
region: parsed_uri.host,
|
62
|
+
bucket: split_path[0],
|
63
|
+
access_key_id: parsed_uri.user,
|
64
|
+
access_key_secret: parsed_uri.password,
|
65
|
+
path: split_path[1..-1].join('/').presence,
|
66
|
+
})
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|