inst_data_shipper 0.1.0.beta1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
4
- data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
3
+ metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
4
+ data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
5
5
  SHA512:
6
- metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
7
- data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
6
+ metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
7
+ data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # InstDataShipper
2
2
 
3
- This gem is intended to facilitate fast and easy syncing of Canvas data.
3
+ This gem is intended to facilitate easy upload of LTI datasets to Instructure Hosted Data.
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,144 @@ Then run the migrations:
16
16
  bundle exec rake db:migrate
17
17
  ```
18
18
 
19
+ ## Usage
20
+
21
+ ### Dumper
22
+
23
+ The main tool provided by this Gem is the `InstDataDumper::Dumper` class. It is used to define a "Dump" which is a combination of tasks and schema.
24
+
25
+ Here is an example `Dumper` implementation, wrapped in an ActiveJob job:
26
+ ```ruby
27
+ class HostedDataPushJob < ApplicationJob
28
+ # The schema serves two purposes: defining the schema and mapping data
29
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
30
+ # You can augment the Table-builder DSL with custom methods like so:
31
+ extend_table_builder do
32
+ # It may be useful to define a custom column definition helpers:
33
+ def custom_column(*args, from: nil, **kwargs, &blk)
34
+ # In this example, the helper reads the value from a `data` jsonb column - without it, you'd need
35
+ # to define `from: ->(row) { row.data["<KEY>"] }` on each column that needs to read from the jsonb
36
+ from ||= args[0].to_s
37
+ from = ->(row) { row.data[from] } if from.is_a?(String)
38
+ column(*args, **kwargs, from: from, &blk)
39
+ end
40
+
41
+ # `extend_table_builder` uses `class_eval`, so you could alternatively write your helpers in a Concern or Module and include them like normal:
42
+ include SomeConcern
43
+ end
44
+
45
+ table(ALocalModel, "<TABLE DESCRIPTION>") do
46
+ # If you define a table as incremental, it'll only export changes made since the start of the last successful Dumper run
47
+ # The first argument "scope" can be interpreted in different ways:
48
+ # If exporting a local model it may be a: (default: `updated_at`)
49
+ # Proc that will receive a Relation and return a Relation (use `incremental_since`)
50
+ # String of a column to compare with `incremental_since`
51
+ # If exporting a Canvas report it may be a: (default: `updated_after`)
52
+ # Proc that will receive report params and return modified report params (use `incremental_since`)
53
+ # String of a report param to set to `incremental_since`
54
+ # `on:` is passed to Hosted Data and is used as the unique key. It may be an array to form a composite-key
55
+ # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
+ incremental "updated_at", on: [:id], if: ->() {}
57
+
58
+ column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
59
+
60
+ # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
61
+ custom_column :name, :"varchar(128)"
62
+
63
+ # `from:` May be...
64
+ # A Symbol of a method to be called on the record
65
+ custom_column :sis_type, :"varchar(32)", from: :some_model_method
66
+ # A String of a column to read from the record
67
+ custom_column :sis_type, :"varchar(32)", from: "sis_source_type"
68
+ # A Proc to be called with each record
69
+ custom_column :sis_type, :"varchar(32)", from: ->(rec) { ... }
70
+ # Not specified. Will default to using the Schema Column Name as a String ("sis_type" in this case)
71
+ custom_column :sis_type, :"varchar(32)"
72
+ end
73
+
74
+ table("my_table", model: ALocalModel) do
75
+ # ...
76
+ end
77
+
78
+ table("proserv_student_submissions_csv") do
79
+ column :canvas_id, :bigint, from: "canvas user id"
80
+ column :sis_id, :"varchar(64)", from: "sis user id"
81
+ column :name, :"varchar(64)", from: "user name"
82
+ column :submission_id, :bigint, from: "submission id"
83
+ end
84
+ end
85
+
86
+ Dumper = InstDataShipper::Dumper.define(schema: SCHEMA, include: [
87
+ InstDataShipper::DataSources::LocalTables,
88
+ InstDataShipper::DataSources::CanvasReports,
89
+ ]) do
90
+ import_local_table(ALocalModel)
91
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
92
+
93
+ # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
94
+ import_local_table(SomeModel, schema_name: "my_table")
95
+ import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
96
+ end
97
+
98
+ def perform
99
+ Dumper.perform_dump([
100
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
101
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
102
+ ])
103
+ end
104
+ end
105
+ ```
106
+
107
+ `Dumper`s may also be formed as a normal Ruby subclass:
108
+ ```ruby
109
+ class HostedDataPushJob < ApplicationJob
110
+ SCHEMA = InstDataShipper::SchemaBuilder.build do
111
+ # ...
112
+ end
113
+
114
+ class Dumper < InstDataShipper::Dumper
115
+ include InstDataShipper::DataSources::LocalTables
116
+ include InstDataShipper::DataSources::CanvasReports
117
+
118
+ def enqueue_tasks
119
+ import_local_table(ALocalModel)
120
+ import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
121
+ end
122
+
123
+ def table_schemas
124
+ SCHEMA
125
+ end
126
+ end
127
+
128
+ def perform
129
+ Dumper.perform_dump([
130
+ "hosted-data://<JWT>@<HOSTED DATA SERVER>?table_prefix=example",
131
+ "s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<path>",
132
+ ])
133
+ end
134
+ end
135
+ ```
136
+
137
+ ### Destinations
138
+
139
+ This Gem is mainly designed for use with Hosted Data, but it tries to abstract that a little to allow for other destinations/backends. Out of the box, support for Hosted Data and S3 are included.
140
+
141
+ Destinations are passed as URI-formatted strings. Passing Hashes is also supported, but the format/keys are destination specific.
142
+
143
+ Destinations blindly accept URI Fragments (the `#` chunk at the end of the URI). These options are not used internally but will be made available as `dest.user_config`. Ideally these are in the same format as query parameters (`x=1&y=2`, which it will try to parse into a Hash), but it can be any string.
144
+
145
+ #### Hosted Data
146
+ `hosted-data://<JWT>@<HOSTED DATA SERVER>`
147
+
148
+ ##### Optional Parameters:
149
+ - `table_prefix`: An optional string to prefix onto each table name in the schema when declaring the schema in Hosted Data
150
+
151
+ #### S3
152
+ `s3://<access_key_id>:<access_key_secret>@<region>/<bucket>/<optional path>`
153
+
154
+ ##### Optional Parameters:
155
+ _None_
156
+
19
157
  ## Development
20
158
 
21
159
  When adding to or updating this gem, make sure you do the following:
@@ -1,4 +1,4 @@
1
- class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
1
+ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
2
2
  def change
3
3
  create_table :inst_data_shipper_dump_batches do |t|
4
4
  t.datetime :started_at
@@ -6,10 +6,13 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
6
6
  t.string :status
7
7
 
8
8
  t.string :job_class
9
+ t.string :genre
10
+ t.string :batch_id
11
+
9
12
  t.string :exception
10
13
  t.text :backtrace
11
- t.text :metadata
12
- t.text :job_arguments
14
+ # t.text :metadata
15
+ # t.text :job_arguments
13
16
 
14
17
  t.timestamps
15
18
  end
@@ -19,9 +19,9 @@ module InstDataShipper
19
19
  instance_exec(&@body_block)
20
20
  end
21
21
 
22
- def table_schemas
22
+ def schema
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
- safe_constantize(pointer)
24
+ pointer.constantize
25
25
  end
26
26
  end
27
27
  end
@@ -9,21 +9,35 @@ module InstDataShipper
9
9
  end
10
10
 
11
11
  def hook(name, prepend: false, &block)
12
+ _assert_hook_defined(name)
13
+ @hooks ||= {}
14
+ @hooks[name] ||= []
12
15
  hooks = @hooks[name]
13
16
  prepend ? hooks.unshift(block) : hooks << block
14
17
  end
18
+
19
+ def _assert_hook_defined(name)
20
+ return true if @hooks&.key?(name)
21
+ return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
22
+ raise ArgumentError, "Hook #{name} is not defined"
23
+ end
24
+
25
+ def _list_hooks(name)
26
+ list = []
27
+ list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
28
+ list.push(*@hooks[name]) if (@hooks || {})[name]
29
+ list
30
+ end
15
31
  end
16
32
 
17
33
  def run_hook(name, *args, **kwargs)
18
- hooks = @hooks[name]
19
- hooks.each do |blk|
34
+ self.class._list_hooks(name).each do |blk|
20
35
  instance_exec(*args, **kwargs, &blk)
21
36
  end
22
37
  end
23
38
 
24
39
  def run_hook_safe(name, *args, **kwargs)
25
- hooks = @hooks[name]
26
- hooks.each do |blk|
40
+ self.class._list_hooks(name).each do |blk|
27
41
  instance_exec(*args, **kwargs, &blk)
28
42
  rescue StandardError
29
43
  end
@@ -27,11 +27,25 @@ module InstDataShipper
27
27
  _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
28
28
  end
29
29
 
30
- def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
30
+ def import_canvas_report_by_terms(*args, **kwargs)
31
+ _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
32
+ end
33
+
34
+ def import_existing_report(report, **kwargs)
35
+ delayed(:_process_canvas_report, report: report, **kwargs)
36
+ end
37
+
38
+ private
39
+
40
+ def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
31
41
  term_ids = (terms || []).map do |term|
32
42
  term.is_a?(Term) ? term.canvas_id : term
33
43
  end
34
44
 
45
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
46
+
47
+ _resolve_report_incremenal_parameters(table_def, params)
48
+
35
49
  Sidekiq::Batch.new.tap do |b|
36
50
  b.description = "Term Scoped #{report_name} Runners"
37
51
  b.context = {
@@ -40,19 +54,21 @@ module InstDataShipper
40
54
  b.jobs do
41
55
  terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
42
56
  terms_query.find_each do |t|
43
- import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
57
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
44
58
  end
45
59
  end
46
60
  end
47
61
  end
48
62
 
49
- def import_existing_report(table, report)
50
- delayed(:_process_canvas_report, table, report: report)
51
- end
63
+ def _import_canvas_report(report_name, params: {}, **kwargs)
64
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
52
65
 
53
- private
66
+ _resolve_report_incremenal_parameters(table_def, params)
67
+
68
+ _trigger_canvas_report(report_name, params: params, **kwargs)
69
+ end
54
70
 
55
- def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
71
+ def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
56
72
  report = canvas_sync_client.start_report(
57
73
  'self', report_name,
58
74
  parameters: params,
@@ -61,15 +77,13 @@ module InstDataShipper
61
77
  CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
62
78
  "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
63
79
  {
64
- instance_of: origin_class,
65
- method: :_process_canvas_report,
66
- args: [target_table],
80
+ job: Jobs::AsyncCaller,
81
+ args: [origin_class, :_process_canvas_report],
67
82
  kwargs: kwargs,
68
83
  },
69
84
  on_failure: {
70
- instance_of: origin_class,
71
- method: :_handle_failed_canvas_report,
72
- args: [target_table, report_name, kwargs],
85
+ job: Jobs::AsyncCaller,
86
+ args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
73
87
  kwargs: { retry_count: retry_count },
74
88
  },
75
89
  status_key: :status,
@@ -79,18 +93,18 @@ module InstDataShipper
79
93
 
80
94
  def _in_canvas_report_pool(mthd, *args, **kwargs)
81
95
  pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
82
- AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
96
+ Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
83
97
  end
84
98
 
85
- def _process_canvas_report(table, report:)
86
- table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
99
+ def _process_canvas_report(report:, schema_name: nil)
100
+ table_def = lookup_table_schema!(schema_name, report[:report])
87
101
 
88
- IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
102
+ IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
89
103
 
90
104
  inner_block = ->(file) {
91
- CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
105
+ CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
92
106
  file << table_def[:columns].map do |c|
93
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
107
+ instance_exec(m, &c[:block])
94
108
  end
95
109
  end
96
110
  }
@@ -98,13 +112,36 @@ module InstDataShipper
98
112
  upload_data(table_def, extra: report['id'], &inner_block)
99
113
  end
100
114
 
101
- def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
115
+ def _resolve_report_incremenal_parameters(table_def, params)
116
+ if table_is_incremental?(table_def)
117
+ inc = table_def[:incremental]
118
+ scope = inc[:scope]
119
+
120
+ if scope != false
121
+ scope ||= "updated_after"
122
+
123
+ if scope.is_a?(Proc)
124
+ scope = instance_exec(params, &scope)
125
+ if scope.is_a?(Hash) && scope != params
126
+ params.merge!(scope)
127
+ end
128
+ elsif scope.is_a?(String) || scope.is_a?(Symbol)
129
+ params[scope] = incremental_since
130
+ end
131
+ end
132
+ end
133
+
134
+ params
135
+ end
136
+
137
+ def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
102
138
  if retry_count.positive?
103
139
  tbid = batch_context[:report_bid] || batch_context[:root_bid]
104
140
  Sidekiq::Batch.new(tbid).jobs do
105
- import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
141
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
106
142
  end
107
143
  else
144
+ # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
108
145
  cleanup_fatal_error!
109
146
  end
110
147
  end
@@ -12,22 +12,50 @@ module InstDataShipper
12
12
 
13
13
  private
14
14
 
15
- def _import_local_table(table_name)
16
- table_def = table_schemas.find { |t| t[:model].to_s == table_name }
17
- model = table_def[:model]
15
+ def _import_local_table(model, schema_name: nil)
16
+ model = model.safe_constantize if model.is_a?(String)
17
+
18
+ table_def = lookup_table_schema!(schema_name, { model: model })
18
19
 
19
20
  inner_block = ->(file) {
20
- query = model
21
- query = query.includes(table_def[:includes]) if table_def[:includes].present?
22
- model.find_each do |m|
21
+ query = model.all
22
+ query = _resolve_model_query(query, table_def[:query])
23
+
24
+ if table_is_incremental?(table_def)
25
+ query = _resolve_model_query(
26
+ query,
27
+ table_def.dig(:incremental, :scope),
28
+ string: ->(query, column) { query.where("#{column} > ?", incremental_since) },
29
+ default: "updated_at",
30
+ )
31
+ end
32
+
33
+ query.find_each do |m|
23
34
  file << table_def[:columns].map do |c|
24
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
35
+ instance_exec(m, &c[:block])
25
36
  end
26
37
  end
27
38
  }
28
39
 
29
40
  upload_data(table_def, &inner_block)
30
41
  end
42
+
43
+ def _resolve_model_query(relation, query, string: nil, default: nil)
44
+ return relation if query == false
45
+ query = default if query.nil?
46
+ return relation if query.nil?
47
+
48
+ if query.is_a?(Symbol)
49
+ relation.send(query)
50
+ elsif query.is_a?(Proc)
51
+ instance_exec(relation, &query)
52
+ elsif query.is_a?(String) && string.present?
53
+ instance_exec(relation, query, &string)
54
+ else
55
+ raise "Invalid query: #{query.inspect}"
56
+ end
57
+ end
58
+
31
59
  end
32
60
  end
33
61
  end
@@ -3,7 +3,7 @@ module InstDataShipper
3
3
  class Base
4
4
  attr_reader :dumper
5
5
 
6
- delegate :tracker, :table_schemas, :working_dir, to: :dumper
6
+ delegate :tracker, :schema, :working_dir, to: :dumper
7
7
 
8
8
  def initialize(cache_key, config, dumper)
9
9
  @cache_key = cache_key
@@ -11,9 +11,13 @@ module InstDataShipper
11
11
  @dumper = dumper
12
12
  end
13
13
 
14
+ # This method is called before taking any actions.
15
+ # It should be used to make any necessarry state assumptions (eg, the HostedData destination checks for a previous dump to determine if it can use incremental_since)
16
+ def preinitialize_dump(context); end
17
+
14
18
  # This method is called before processing any data.
15
19
  # It should be used to initialize any external resources needed for the dump.
16
- def initialize_dump; end
20
+ def initialize_dump(context); end
17
21
 
18
22
  # Yields an object (can be anything) that will be passed to `upload_data_chunk` as `chunk`.
19
23
  #
@@ -50,7 +54,7 @@ module InstDataShipper
50
54
  end
51
55
 
52
56
  def user_config
53
- config[:extra]
57
+ config[:user_config]
54
58
  end
55
59
 
56
60
  def group_key
@@ -62,11 +66,11 @@ module InstDataShipper
62
66
  def parse_configuration(uri)
63
67
  if block_given?
64
68
  parsed = URI.parse(uri)
69
+ cparsed = ConfigURI.new(parsed)
65
70
  cfg = {
66
- params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
67
- extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
71
+ user_config: cparsed.hash_params,
68
72
  }
69
- yield parsed, cfg
73
+ yield cparsed, cfg
70
74
  cfg
71
75
  else
72
76
  raise NotImplementedError
@@ -100,5 +104,28 @@ module InstDataShipper
100
104
  end
101
105
 
102
106
  end
107
+
108
+ class ConfigURI
109
+ def initialize(uri)
110
+ @uri = uri
111
+ end
112
+
113
+ # delegate_missing_to :uri
114
+ delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
115
+
116
+ def params
117
+ @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
118
+ end
119
+
120
+ def hash_params
121
+ @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
122
+ end
123
+
124
+ private
125
+
126
+ def uri
127
+ @uri
128
+ end
129
+ end
103
130
  end
104
131
  end
@@ -1,13 +1,47 @@
1
+ require "faraday_middleware"
2
+
1
3
  module InstDataShipper
2
4
  module Destinations
3
5
  class HostedData < Base
4
6
  include Concerns::Chunking
5
7
 
6
- def initialize_dump
8
+ def preinitialize_dump(context)
9
+ if context[:incremental_since].present?
10
+ begin
11
+ last_dump = hosted_data_client.get("api/v1/custom_dumps/last", {
12
+ status: 'imported',
13
+ # schema_version: convert_schema[:version],
14
+ tags: [
15
+ "ids-schema=#{dumper.schema_digest}",
16
+ "ids-genre=#{dumper.export_genre}",
17
+ ],
18
+ }).body.with_indifferent_access
19
+
20
+ if last_dump[:created_at] < context[:incremental_since]
21
+ InstDataShipper.logger.info("Last successful HostedData dump is older than incremental_since - bumping back incremental_since")
22
+ context[:incremental_since] = last_dump[:created_at]
23
+ end
24
+ rescue Faraday::ResourceNotFound
25
+ # TODO It'd be nice to make this per-table
26
+ InstDataShipper.logger.info("No Last successful HostedData dump of the same schema - not using incremental_since")
27
+ context[:incremental_since] = nil
28
+ end
29
+ end
30
+ end
31
+
32
+ def initialize_dump(context)
33
+ tags = [
34
+ "ids-schema=#{dumper.schema_digest}",
35
+ "ids-genre=#{dumper.export_genre}",
36
+ ]
37
+ tags << "ids-app=#{Rails.application.class.name.gsub(/::Application$/, '')}" if defined?(Rails) && Rails.application
38
+ tags << "ids-schema-version=#{schema[:version]}" if schema[:version].present?
39
+
7
40
  dump = hosted_data_client.post(
8
41
  'api/v1/custom_dumps/',
9
42
  reference_id: tracker.id,
10
43
  schema: convert_schema,
44
+ tags: tags,
11
45
  ).body.with_indifferent_access
12
46
 
13
47
  redis.hset(rk(:state), :dump_id, dump[:id])
@@ -15,7 +49,7 @@ module InstDataShipper
15
49
  end
16
50
 
17
51
  def chunk_data(generator, table:, extra: nil)
18
- warehouse_name = table_def[:warehouse_name]
52
+ warehouse_name = table[:warehouse_name]
19
53
 
20
54
  super(generator) do |batch, idx|
21
55
  bits = [warehouse_name, extra, idx].compact
@@ -36,18 +70,18 @@ module InstDataShipper
36
70
 
37
71
  def upload_data_chunk(table_def, chunk)
38
72
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
39
- table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
73
+ table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
40
74
  })
41
75
  end
42
76
 
43
77
  def finalize_dump
44
78
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
45
- redis.delete(rk(:state))
79
+ redis.del(rk(:state))
46
80
  end
47
81
 
48
82
  def cleanup_fatal_error
49
83
  hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
50
- redis.delete(rk(:state))
84
+ redis.del(rk(:state))
51
85
  end
52
86
 
53
87
  # TODO Support/allow single-table fatal errors?
@@ -59,39 +93,45 @@ module InstDataShipper
59
93
  end
60
94
 
61
95
  def convert_schema
62
- table_prefix = config[:table_prefix]
63
- table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
64
-
65
96
  definititions = {}
97
+ table_schemas = schema[:tables]
66
98
  table_schemas.each do |ts|
67
99
  ts = ts.dup
100
+ tname = table_name(ts)
68
101
 
69
- table_name = ts[:warehouse_name]
70
- table_name = table_prefix + table_name if table_prefix.present?
71
-
72
- definititions[ts[:warehouse_name]] = {
102
+ definititions[tname] = {
73
103
  dw_type: 'dimension',
74
104
  description: ts[:description],
75
- incremental: !!ts[:incremental],
76
- incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
105
+ incremental: dumper.table_is_incremental?(ts),
106
+ incremental_on: ts.dig(:incremental, :on),
77
107
  # indexed_columns
78
- tableName: table_name,
108
+ tableName: tname,
79
109
  columns: ts[:columns].map do |col|
110
+ coltype = col[:type]
111
+ coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
80
112
  {
81
113
  name: col[:warehouse_name],
82
114
  description: col[:description],
83
- type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
115
+ type: coltype,
84
116
  }
85
117
  end,
86
118
  }
87
119
  end
88
120
 
89
121
  {
90
- version: "#{dumper.export_genre.downcase}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
122
+ version: "#{dumper.schema_digest}-#{Digest::MD5.hexdigest(definititions.to_json)[0...6]}",
91
123
  definition: definititions,
92
124
  }
93
125
  end
94
126
 
127
+ def table_name(table_def)
128
+ table_prefix = config[:table_prefix]
129
+ table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
130
+ table_name = table_def[:warehouse_name]
131
+ table_name = table_prefix + table_name if table_prefix.present?
132
+ table_name
133
+ end
134
+
95
135
  def hosted_data_client
96
136
  @hosted_data_client ||= begin
97
137
  token = config[:token]
@@ -102,6 +142,8 @@ module InstDataShipper
102
142
  host = tok_content['host']
103
143
  end
104
144
 
145
+ host = "https://#{host}" unless host.include?('://')
146
+
105
147
  Faraday.new(url: host) do |faraday|
106
148
  faraday.request :multipart
107
149
  faraday.request :json
@@ -117,14 +159,16 @@ module InstDataShipper
117
159
 
118
160
  def parse_configuration(uri)
119
161
  super do |parsed_uri, cfg|
120
- if parsed_uri.username.present?
162
+ if parsed_uri.user.present?
121
163
  # hosted-data://<JWT>:<hosted_data_domain>
122
- cfg[:token] = parsed_uri.username
164
+ cfg[:token] = parsed_uri.user
123
165
  cfg[:host] = parsed_uri.host
124
166
  else
125
167
  # hosted-data://<JWT>
126
168
  cfg[:token] = parsed_uri.host
127
169
  end
170
+
171
+ cfg[:table_prefix] = parsed_uri.params[:table_prefix]
128
172
  end
129
173
  end
130
174