inst_data_shipper 0.1.0.beta1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
4
- data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
3
+ metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
4
+ data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
5
5
  SHA512:
6
- metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
7
- data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
6
+ metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
7
+ data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # InstDataShipper
2
2
 
3
- This gem is intended to facilitate fast and easy syncing of Canvas data.
3
+ This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,144 @@ Then run the migrations:
16
16
  bundle exec rake db:migrate
17
17
  ```
18
18
 
19
+ ## Usage
20
+
21
+ ### Dumper
22
+
23
+ The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
24
+
25
+ Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
26
+ ```ruby
27
+ class HostedDataPushJob < ApplicationJob
28
+ # The schema serves two purposes: defining the schema and mapping data
29
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
30
+ # You can augment the Table-builder DSL with custom methods like so:
31
+ extend_table_builder do
32
+ # It may be useful to define a custom column definition helpers:
33
+ def custom_column(*args, from: nil, **kwargs, &blk)
34
+ # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
35
+ # to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
36
+ from ||= args[0].to_s
37
+ from = ->(row) { row.data[from] } if from.is_a?(String)
38
+ column(*args, **kwargs, from: from, &blk)
39
+ end
40
+
41
+ # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
42
+ include SomeConcern
43
+ end
44
+
45
+ table(ALocalModel, "<TABLE DESCRIPTION>") do
46
+ # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
47
+ # The first argument "scope" can be interpreted in different ways:
48
+ # If exporting a local model it may be a: (default: `updated_at`)
49
+ # Proc that will receive a Relation and return a Relation (use `incremental_since`)
50
+ # String of a column to compare with `incremental_since`
51
+ # If exporting a Canvas report it may be a: (default: `updated_after`)
52
+ # Proc that will receive report params and return modified report params (use `incremental_since`)
53
+ # String of a report param to set to `incremental_since`
54
+ # `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
55
+ # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
+ incremental "updated_at", on: [:id], if: ->() {}
57
+
58
+ column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
59
+
60
+ # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
61
+ custom_column :name, :"varchar(128)"
62
+
63
+ # `from:` May be...
64
+ # A Symbol of a method to be called on the record
65
+ custom_column :sis_type, :"varchar(32)", from: :some_model_method
66
+ # A String of a column to read from the record
67
+ custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
68
+ # A Proc to be called with each record
69
+ custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
70
+ # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
71
+ custom_column :sis_type, :"varchar(32)"
72
+ end
73
+
74
+ table("my_table", model: ALocalModel) do
75
+ # ...
76
+ end
77
+
78
+ table("proserv_student_submissions_csv") do
79
+ column :canvas_id, :bigint, from: "canvas user id"
80
+ column :sis_id, :"varchar(64)", from: "sis user id"
81
+ column :name, :"varchar(64)", from: "user name"
82
+ column :submission_id, :bigint, from: "submission id"
83
+ end
84
+ end
85
+
86
+ Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
87
+ InstDataShipper::DataSources::LocalTables,
88
+ InstDataShipper::DataSources::CanvasReports,
89
+ ]) do
90
+ import_local_table(ALocalModel)
91
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
92
+
93
+ # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
94
+ import_local_table(SomeModel, schema_name: "my_table")
95
+ import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
96
+ end
97
+
98
+ def perform
99
+ Dumper.perform_dump([
100
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
101
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
102
+ ])
103
+ end
104
+ end
105
+ ```
106
+
107
+ `Dumper`s may also be formed as a normal Ruby subclass:
108
+ ```ruby
109
+ class HostedDataPushJob < ApplicationJob
110
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
111
+ # ...
112
+ end
113
+
114
+ class Dumper < InstDataShipper::Dumper
115
+ include InstDataShipper::DataSources::LocalTables
116
+ include InstDataShipper::DataSources::CanvasReports
117
+
118
+ def enqueue_tasks
119
+ import_local_table(ALocalModel)
120
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
121
+ end
122
+
123
+ def table_schemas
124
+ SCHEMA
125
+ end
126
+ end
127
+
128
+ def perform
129
+ Dumper.perform_dump([
130
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
131
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
132
+ ])
133
+ end
134
+ end
135
+ ```
136
+
137
+ ### Destinations
138
+
139
+ This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
140
+
141
+ Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
142
+
143
+ Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
144
+
145
+ #### Hosted Data
146
+ `hosted-data://<JWT>@<HOSTED DATA SERVER>`
147
+
148
+ ##### Optional Parameters:
149
+ - `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
150
+
151
+ #### S3
152
+ `s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
153
+
154
+ ##### Optional Parameters:
155
+ _None_
156
+
19
157
  ## Development
20
158
 
21
159
  When adding to or updating this gem, make sure you do the following:
@@ -1,4 +1,4 @@
1
- class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
1
+ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
2
2
  def change
3
3
  create_table :inst_data_shipper_dump_batches do |t|
4
4
  t.datetime :started_at
@@ -6,10 +6,13 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
6
6
  t.string :status
7
7
 
8
8
  t.string :job_class
9
+ t.string :genre
10
+ t.string :batch_id
11
+
9
12
  t.string :exception
10
13
  t.text :backtrace
11
- t.text :metadata
12
- t.text :job_arguments
14
+ # t.text :metadata
15
+ # t.text :job_arguments
13
16
 
14
17
  t.timestamps
15
18
  end
@@ -19,9 +19,9 @@ module InstDataShipper
19
19
  instance_exec(&@body_block)
20
20
  end
21
21
 
22
- def table_schemas
22
+ def schema
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
- safe_constantize(pointer)
24
+ pointer.constantize
25
25
  end
26
26
  end
27
27
  end
@@ -9,21 +9,35 @@ module InstDataShipper
9
9
  end
10
10
 
11
11
  def hook(name, prepend: false, &block)
12
+ _assert_hook_defined(name)
13
+ @hooks ||= {}
14
+ @hooks[name] ||= []
12
15
  hooks = @hooks[name]
13
16
  prepend ? hooks.unshift(block) : hooks << block
14
17
  end
18
+
19
+ def _assert_hook_defined(name)
20
+ return true if @hooks&.key?(name)
21
+ return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
22
+ raise ArgumentError, "Hook #{name} is not defined"
23
+ end
24
+
25
+ def _list_hooks(name)
26
+ list = []
27
+ list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
28
+ list.push(*@hooks[name]) if (@hooks || {})[name]
29
+ list
30
+ end
15
31
  end
16
32
 
17
33
  def run_hook(name, *args, **kwargs)
18
- hooks = @hooks[name]
19
- hooks.each do |blk|
34
+ self.class._list_hooks(name).each do |blk|
20
35
  instance_exec(*args, **kwargs, &blk)
21
36
  end
22
37
  end
23
38
 
24
39
  def run_hook_safe(name, *args, **kwargs)
25
- hooks = @hooks[name]
26
- hooks.each do |blk|
40
+ self.class._list_hooks(name).each do |blk|
27
41
  instance_exec(*args, **kwargs, &blk)
28
42
  rescue StandardError
29
43
  end
@@ -27,11 +27,25 @@ module InstDataShipper
27
27
  _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
28
28
  end
29
29
 
30
- def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
30
+ def import_canvas_report_by_terms(*args, **kwargs)
31
+ _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
32
+ end
33
+
34
+ def import_existing_report(report, **kwargs)
35
+ delayed(:_process_canvas_report, report: report, **kwargs)
36
+ end
37
+
38
+ private
39
+
40
+ def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
31
41
  term_ids = (terms || []).map do |term|
32
42
  term.is_a?(Term) ? term.canvas_id : term
33
43
  end
34
44
 
45
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
46
+
47
+ _resolve_report_incremenal_parameters(table_def, params)
48
+
35
49
  Sidekiq::Batch.new.tap do |b|
36
50
  b.description = "Term Scoped #{report_name} Runners"
37
51
  b.context = {
@@ -40,19 +54,21 @@ module InstDataShipper
40
54
  b.jobs do
41
55
  terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
42
56
  terms_query.find_each do |t|
43
- import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
57
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
44
58
  end
45
59
  end
46
60
  end
47
61
  end
48
62
 
49
- def import_existing_report(table, report)
50
- delayed(:_process_canvas_report, table, report: report)
51
- end
63
+ def _import_canvas_report(report_name, params: {}, **kwargs)
64
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
52
65
 
53
- private
66
+ _resolve_report_incremenal_parameters(table_def, params)
67
+
68
+ _trigger_canvas_report(report_name, params: params, **kwargs)
69
+ end
54
70
 
55
- def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
71
+ def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
56
72
  report = canvas_sync_client.start_report(
57
73
  'self', report_name,
58
74
  parameters: params,
@@ -61,15 +77,13 @@ module InstDataShipper
61
77
  CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
62
78
  "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
63
79
  {
64
- instance_of: origin_class,
65
- method: :_process_canvas_report,
66
- args: [target_table],
80
+ job: Jobs::AsyncCaller,
81
+ args: [origin_class, :_process_canvas_report],
67
82
  kwargs: kwargs,
68
83
  },
69
84
  on_failure: {
70
- instance_of: origin_class,
71
- method: :_handle_failed_canvas_report,
72
- args: [target_table, report_name, kwargs],
85
+ job: Jobs::AsyncCaller,
86
+ args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
73
87
  kwargs: { retry_count: retry_count },
74
88
  },
75
89
  status_key: :status,
@@ -79,18 +93,18 @@ module InstDataShipper
79
93
 
80
94
  def _in_canvas_report_pool(mthd, *args, **kwargs)
81
95
  pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
82
- AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
96
+ Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
83
97
  end
84
98
 
85
- def _process_canvas_report(table, report:)
86
- table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
99
+ def _process_canvas_report(report:, schema_name: nil)
100
+ table_def = lookup_table_schema!(schema_name, report[:report])
87
101
 
88
- IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
102
+ IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
89
103
 
90
104
  inner_block = ->(file) {
91
- CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
105
+ CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
92
106
  file << table_def[:columns].map do |c|
93
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
107
+ instance_exec(m, &c[:block])
94
108
  end
95
109
  end
96
110
  }
@@ -98,13 +112,36 @@ module InstDataShipper
98
112
  upload_data(table_def, extra: report['id'], &inner_block)
99
113
  end
100
114
 
101
- def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
115
+ def _resolve_report_incremenal_parameters(table_def, params)
116
+ if table_is_incremental?(table_def)
117
+ inc = table_def[:incremental]
118
+ scope = inc[:scope]
119
+
120
+ if scope != false
121
+ scope ||= "updated_after"
122
+
123
+ if scope.is_a?(Proc)
124
+ scope = instance_exec(params, &scope)
125
+ if scope.is_a?(Hash) && scope != params
126
+ params.merge!(scope)
127
+ end
128
+ elsif scope.is_a?(String) || scope.is_a?(Symbol)
129
+ params[scope] = incremental_since
130
+ end
131
+ end
132
+ end
133
+
134
+ params
135
+ end
136
+
137
+ def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
102
138
  if retry_count.positive?
103
139
  tbid = batch_context[:report_bid] || batch_context[:root_bid]
104
140
  Sidekiq::Batch.new(tbid).jobs do
105
- import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
141
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
106
142
  end
107
143
  else
144
+ # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
108
145
  cleanup_fatal_error!
109
146
  end
110
147
  end
@@ -12,22 +12,50 @@ module InstDataShipper
12
12
 
13
13
  private
14
14
 
15
- def _import_local_table(table_name)
16
- table_def = table_schemas.find { |t| t[:model].to_s == table_name }
17
- model = table_def[:model]
15
+ def _import_local_table(model, schema_name: nil)
16
+ model = model.safe_constantize if model.is_a?(String)
17
+
18
+ table_def = lookup_table_schema!(schema_name, { model: model })
18
19
 
19
20
  inner_block = ->(file) {
20
- query = model
21
- query = query.includes(table_def[:includes]) if table_def[:includes].present?
22
- model.find_each do |m|
21
+ query = model.all
22
+ query = _resolve_model_query(query, table_def[:query])
23
+
24
+ if table_is_incremental?(table_def)
25
+ query = _resolve_model_query(
26
+ query,
27
+ table_def.dig(:incremental, :scope),
28
+ string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
29
+ default: "updated_at",
30
+ )
31
+ end
32
+
33
+ query.find_each do |m|
23
34
  file << table_def[:columns].map do |c|
24
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
35
+ instance_exec(m, &c[:block])
25
36
  end
26
37
  end
27
38
  }
28
39
 
29
40
  upload_data(table_def, &inner_block)
30
41
  end
42
+
43
+ def _resolve_model_query(relation, query, string: nil, default: nil)
44
+ return relation if query == false
45
+ query = default if query.nil?
46
+ return relation if query.nil?
47
+
48
+ if query.is_a?(Symbol)
49
+ relation.send(query)
50
+ elsif query.is_a?(Proc)
51
+ instance_exec(relation, &query)
52
+ elsif query.is_a?(String) && string.present?
53
+ instance_exec(relation, query, &string)
54
+ else
55
+ raise "Invalid query: #{query.inspect}"
56
+ end
57
+ end
58
+
31
59
  end
32
60
  end
33
61
  end
@@ -3,7 +3,7 @@ module InstDataShipper
3
3
  class Base
4
4
  attr_reader :dumper
5
5
 
6
- delegate :tracker, :table_schemas, :working_dir, to: :dumper
6
+ delegate :tracker, :schema, :working_dir, to: :dumper
7
7
 
8
8
  def initialize(cache_key, config, dumper)
9
9
  @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
11
11
  @dumper = dumper
12
12
  end
13
13
 
14
+ # This method is called before taking any actions.
15
+ # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
16
+ def preinitialize_dump(context); end
17
+
14
18
  # This method is called before processing any data.
15
19
  # It should be used to initialize any external resources needed for the dump.
16
- def initialize_dump; end
20
+ def initialize_dump(context); end
17
21
 
18
22
  # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
19
23
  #
@@ -50,7 +54,7 @@ module InstDataShipper
50
54
  end
51
55
 
52
56
  def user_config
53
- config[:extra]
57
+ config[:user_config]
54
58
  end
55
59
 
56
60
  def group_key
@@ -62,11 +66,11 @@ module InstDataShipper
62
66
  def parse_configuration(uri)
63
67
  if block_given?
64
68
  parsed = URI.parse(uri)
69
+ cparsed = ConfigURI.new(parsed)
65
70
  cfg = {
66
- params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
67
- extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
71
+ user_config: cparsed.hash_params,
68
72
  }
69
- yield parsed, cfg
73
+ yield cparsed, cfg
70
74
  cfg
71
75
  else
72
76
  raise NotImplementedError
@@ -100,5 +104,28 @@ module InstDataShipper
100
104
  end
101
105
 
102
106
  end
107
+
108
+ class ConfigURI
109
+ def initialize(uri)
110
+ @uri = uri
111
+ end
112
+
113
+ # delegate_missing_to :uri
114
+ delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
115
+
116
+ def params
117
+ @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
118
+ end
119
+
120
+ def hash_params
121
+ @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
122
+ end
123
+
124
+ private
125
+
126
+ def uri
127
+ @uri
128
+ end
129
+ end
103
130
  end
104
131
  end
@@ -1,13 +1,47 @@
1
+ require "faraday_middleware"
2
+
1
3
  module InstDataShipper
2
4
  module Destinations
3
5
  class HostedData < Base
4
6
  include Concerns::Chunking
5
7
 
6
- def initialize_dump
8
+ def preinitialize_dump(context)
9
+ if context[:incremental_since].present?
10
+ begin
11
+ last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
12
+ status: 'imported',
13
+ # schema_version: convert_schema[:version],
14
+ tags: [
15
+ "ids-schema=#{dumper.schema_digest}",
16
+ "ids-genre=#{dumper.export_genre}",
17
+ ],
18
+ }).body.with_indifferent_access
19
+
20
+ if last_dump[:created_at] < context[:incremental_since]
21
+ InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
22
+ context[:incremental_since] = last_dump[:created_at]
23
+ end
24
+ rescue Faraday::ResourceNotFound
25
+ # TODO It'd be nice to make this per-table
26
+ InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
27
+ context[:incremental_since] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def initialize_dump(context)
33
+ tags = [
34
+ "ids-schema=#{dumper.schema_digest}",
35
+ "ids-genre=#{dumper.export_genre}",
36
+ ]
37
+ tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
38
+ tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
39
+
7
40
  dump = hosted_data_client.post(
8
41
  'api/v1/custom_dumps/',
9
42
  reference_id: tracker.id,
10
43
  schema: convert_schema,
44
+ tags: tags,
11
45
  ).body.with_indifferent_access
12
46
 
13
47
  redis.hset(rk(:state), :dump_id, dump[:id])
@@ -15,7 +49,7 @@ module InstDataShipper
15
49
  end
16
50
 
17
51
  def chunk_data(generator, table:, extra: nil)
18
- warehouse_name = table_def[:warehouse_name]
52
+ warehouse_name = table[:warehouse_name]
19
53
 
20
54
  super(generator) do |batch, idx|
21
55
  bits = [warehouse_name, extra, idx].compact
@@ -36,18 +70,18 @@ module InstDataShipper
36
70
 
37
71
  def upload_data_chunk(table_def, chunk)
38
72
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
39
- table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
73
+ table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
40
74
  })
41
75
  end
42
76
 
43
77
  def finalize_dump
44
78
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
45
- redis.delete(rk(:state))
79
+ redis.del(rk(:state))
46
80
  end
47
81
 
48
82
  def cleanup_fatal_error
49
83
  hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
50
- redis.delete(rk(:state))
84
+ redis.del(rk(:state))
51
85
  end
52
86
 
53
87
  # TODO Support/allow single-table fatal errors?
@@ -59,39 +93,45 @@ module InstDataShipper
59
93
  end
60
94
 
61
95
  def convert_schema
62
- table_prefix = config[:table_prefix]
63
- table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
64
-
65
96
  definititions = {}
97
+ table_schemas = schema[:tables]
66
98
  table_schemas.each do |ts|
67
99
  ts = ts.dup
100
+ tname = table_name(ts)
68
101
 
69
- table_name = ts[:warehouse_name]
70
- table_name = table_prefix + table_name if table_prefix.present?
71
-
72
- definititions[ts[:warehouse_name]] = {
102
+ definititions[tname] = {
73
103
  dw_type: 'dimension',
74
104
  description: ts[:description],
75
- incremental: !!ts[:incremental],
76
- incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
105
+ incremental: dumper.table_is_incremental?(ts),
106
+ incremental_on: ts.dig(:incremental, :on),
77
107
  # indexed_columns
78
- tableName: table_name,
108
+ tableName: tname,
79
109
  columns: ts[:columns].map do |col|
110
+ coltype = col[:type]
111
+ coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
80
112
  {
81
113
  name: col[:warehouse_name],
82
114
  description: col[:description],
83
- type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
115
+ type: coltype,
84
116
  }
85
117
  end,
86
118
  }
87
119
  end
88
120
 
89
121
  {
90
- version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
122
+ version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
91
123
  definition: definititions,
92
124
  }
93
125
  end
94
126
 
127
+ def table_name(table_def)
128
+ table_prefix = config[:table_prefix]
129
+ table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
130
+ table_name = table_def[:warehouse_name]
131
+ table_name = table_prefix + table_name if table_prefix.present?
132
+ table_name
133
+ end
134
+
95
135
  def hosted_data_client
96
136
  @hosted_data_client ||= begin
97
137
  token = config[:token]
@@ -102,6 +142,8 @@ module InstDataShipper
102
142
  host = tok_content['host']
103
143
  end
104
144
 
145
+ host = "https://#{host}" unless host.include?('://')
146
+
105
147
  Faraday.new(url: host) do |faraday|
106
148
  faraday.request :multipart
107
149
  faraday.request :json
@@ -117,14 +159,16 @@ module InstDataShipper
117
159
 
118
160
  def parse_configuration(uri)
119
161
  super do |parsed_uri, cfg|
120
- if parsed_uri.username.present?
162
+ if parsed_uri.user.present?
121
163
  # hosted-data://<JWT>:<hosted_data_domain>
122
- cfg[:token] = parsed_uri.username
164
+ cfg[:token] = parsed_uri.user
123
165
  cfg[:host] = parsed_uri.host
124
166
  else
125
167
  # hosted-data://<JWT>
126
168
  cfg[:token] = parsed_uri.host
127
169
  end
170
+
171
+ cfg[:table_prefix] = parsed_uri.params[:table_prefix]
128
172
  end
129
173
  end
130
174