inst_data_shipper 0.1.0.beta1 → 0.1.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
4
- data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
3
+ metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
4
+ data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
5
5
  SHA512:
6
- metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
7
- data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
6
+ metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
7
+ data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
@@ -1,4 +1,4 @@
1
- class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
1
+ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
2
2
  def change
3
3
  create_table :inst_data_shipper_dump_batches do |t|
4
4
  t.datetime :started_at
@@ -6,6 +6,8 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
6
6
  t.string :status
7
7
 
8
8
  t.string :job_class
9
+ t.string :genre
10
+
9
11
  t.string :exception
10
12
  t.text :backtrace
11
13
  t.text :metadata
@@ -21,7 +21,7 @@ module InstDataShipper
21
21
 
22
22
  def table_schemas
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
- safe_constantize(pointer)
24
+ pointer.constantize
25
25
  end
26
26
  end
27
27
  end
@@ -9,21 +9,35 @@ module InstDataShipper
9
9
  end
10
10
 
11
11
  def hook(name, prepend: false, &block)
12
+ _assert_hook_defined(name)
13
+ @hooks ||= {}
14
+ @hooks[name] ||= []
12
15
  hooks = @hooks[name]
13
16
  prepend ? hooks.unshift(block) : hooks << block
14
17
  end
18
+
19
+ def _assert_hook_defined(name)
20
+ return true if @hooks&.key?(name)
21
+ return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
22
+ raise ArgumentError, "Hook #{name} is not defined"
23
+ end
24
+
25
+ def _list_hooks(name)
26
+ list = []
27
+ list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
28
+ list.push(*@hooks[name]) if (@hooks || {})[name]
29
+ list
30
+ end
15
31
  end
16
32
 
17
33
  def run_hook(name, *args, **kwargs)
18
- hooks = @hooks[name]
19
- hooks.each do |blk|
34
+ self.class._list_hooks(name).each do |blk|
20
35
  instance_exec(*args, **kwargs, &blk)
21
36
  end
22
37
  end
23
38
 
24
39
  def run_hook_safe(name, *args, **kwargs)
25
- hooks = @hooks[name]
26
- hooks.each do |blk|
40
+ self.class._list_hooks(name).each do |blk|
27
41
  instance_exec(*args, **kwargs, &blk)
28
42
  rescue StandardError
29
43
  end
@@ -27,11 +27,25 @@ module InstDataShipper
27
27
  _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
28
28
  end
29
29
 
30
- def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
30
+ def import_canvas_report_by_terms(*args, **kwargs)
31
+ _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
32
+ end
33
+
34
+ def import_existing_report(report, **kwargs)
35
+ delayed(:_process_canvas_report, report: report, **kwargs)
36
+ end
37
+
38
+ private
39
+
40
+ def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
31
41
  term_ids = (terms || []).map do |term|
32
42
  term.is_a?(Term) ? term.canvas_id : term
33
43
  end
34
44
 
45
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
46
+
47
+ _resolve_report_incremenal_parameters(table_def, params)
48
+
35
49
  Sidekiq::Batch.new.tap do |b|
36
50
  b.description = "Term Scoped #{report_name} Runners"
37
51
  b.context = {
@@ -40,19 +54,21 @@ module InstDataShipper
40
54
  b.jobs do
41
55
  terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
42
56
  terms_query.find_each do |t|
43
- import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
57
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
44
58
  end
45
59
  end
46
60
  end
47
61
  end
48
62
 
49
- def import_existing_report(table, report)
50
- delayed(:_process_canvas_report, table, report: report)
51
- end
63
+ def _import_canvas_report(report_name, params: {}, **kwargs)
64
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
52
65
 
53
- private
66
+ _resolve_report_incremenal_parameters(table_def, params)
67
+
68
+ _trigger_canvas_report(report_name, params: params, **kwargs)
69
+ end
54
70
 
55
- def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
71
+ def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
56
72
  report = canvas_sync_client.start_report(
57
73
  'self', report_name,
58
74
  parameters: params,
@@ -61,15 +77,13 @@ module InstDataShipper
61
77
  CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
62
78
  "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
63
79
  {
64
- instance_of: origin_class,
65
- method: :_process_canvas_report,
66
- args: [target_table],
80
+ job: Jobs::AsyncCaller,
81
+ args: [origin_class, :_process_canvas_report],
67
82
  kwargs: kwargs,
68
83
  },
69
84
  on_failure: {
70
- instance_of: origin_class,
71
- method: :_handle_failed_canvas_report,
72
- args: [target_table, report_name, kwargs],
85
+ job: Jobs::AsyncCaller,
86
+ args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
73
87
  kwargs: { retry_count: retry_count },
74
88
  },
75
89
  status_key: :status,
@@ -79,18 +93,18 @@ module InstDataShipper
79
93
 
80
94
  def _in_canvas_report_pool(mthd, *args, **kwargs)
81
95
  pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
82
- AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
96
+ Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
83
97
  end
84
98
 
85
- def _process_canvas_report(table, report:)
86
- table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
99
+ def _process_canvas_report(report:, schema_name: nil)
100
+ table_def = lookup_table_schema!(schema_name, report[:report])
87
101
 
88
- IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
102
+ IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
89
103
 
90
104
  inner_block = ->(file) {
91
- CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
105
+ CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
92
106
  file << table_def[:columns].map do |c|
93
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
107
+ instance_exec(m, &c[:block])
94
108
  end
95
109
  end
96
110
  }
@@ -98,13 +112,36 @@ module InstDataShipper
98
112
  upload_data(table_def, extra: report['id'], &inner_block)
99
113
  end
100
114
 
101
- def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
115
+ def _resolve_report_incremenal_parameters(table_def, params)
116
+ if table_is_incremental?(table_def)
117
+ inc = table_def[:incremental]
118
+ scope = inc[:scope]
119
+
120
+ if scope != false
121
+ scope ||= "updated_after"
122
+
123
+ if scope.is_a?(Proc)
124
+ scope = instance_exec(params, &scope)
125
+ if scope.is_a?(Hash) && scope != params
126
+ params.merge!(scope)
127
+ end
128
+ elsif scope.is_a?(String) || scope.is_a?(Symbol)
129
+ params[scope] = incremental_since
130
+ end
131
+ end
132
+ end
133
+
134
+ params
135
+ end
136
+
137
+ def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
102
138
  if retry_count.positive?
103
139
  tbid = batch_context[:report_bid] || batch_context[:root_bid]
104
140
  Sidekiq::Batch.new(tbid).jobs do
105
- import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
141
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
106
142
  end
107
143
  else
144
+ # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
108
145
  cleanup_fatal_error!
109
146
  end
110
147
  end
@@ -12,22 +12,43 @@ module InstDataShipper
12
12
 
13
13
  private
14
14
 
15
- def _import_local_table(table_name)
16
- table_def = table_schemas.find { |t| t[:model].to_s == table_name }
17
- model = table_def[:model]
15
+ def _import_local_table(model, schema_name: nil)
16
+ model = model.safe_constantize if model.is_a?(String)
17
+
18
+ table_def = lookup_table_schema!(schema_name, { model: model })
18
19
 
19
20
  inner_block = ->(file) {
20
- query = model
21
- query = query.includes(table_def[:includes]) if table_def[:includes].present?
22
- model.find_each do |m|
21
+ query = model.all
22
+ query = _resolve_model_query(query, table_def[:query])
23
+
24
+ if table_is_incremental?(table_def)
25
+ query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
26
+ end
27
+
28
+ query.find_each do |m|
23
29
  file << table_def[:columns].map do |c|
24
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
30
+ instance_exec(m, &c[:block])
25
31
  end
26
32
  end
27
33
  }
28
34
 
29
35
  upload_data(table_def, &inner_block)
30
36
  end
37
+
38
+ def _resolve_model_query(relation, query, string: nil)
39
+ return relation if query.nil?
40
+
41
+ if query.is_a?(Symbol)
42
+ relation.send(query)
43
+ elsif query.is_a?(Proc)
44
+ instance_exec(relation, &query)
45
+ elsif query.is_a?(String) && string.present?
46
+ instance_exec(relation, query, &string)
47
+ else
48
+ raise "Invalid query: #{query.inspect}"
49
+ end
50
+ end
51
+
31
52
  end
32
53
  end
33
54
  end
@@ -50,7 +50,7 @@ module InstDataShipper
50
50
  end
51
51
 
52
52
  def user_config
53
- config[:extra]
53
+ config[:user_config]
54
54
  end
55
55
 
56
56
  def group_key
@@ -62,11 +62,11 @@ module InstDataShipper
62
62
  def parse_configuration(uri)
63
63
  if block_given?
64
64
  parsed = URI.parse(uri)
65
+ cparsed = ConfigURI.new(parsed)
65
66
  cfg = {
66
- params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
67
- extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
67
+ user_config: cparsed.hash_params,
68
68
  }
69
- yield parsed, cfg
69
+ yield cparsed, cfg
70
70
  cfg
71
71
  else
72
72
  raise NotImplementedError
@@ -100,5 +100,28 @@ module InstDataShipper
100
100
  end
101
101
 
102
102
  end
103
+
104
+ class ConfigURI
105
+ def initialize(uri)
106
+ @uri = uri
107
+ end
108
+
109
+ # delegate_missing_to :uri
110
+ delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
111
+
112
+ def params
113
+ @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
114
+ end
115
+
116
+ def hash_params
117
+ @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
118
+ end
119
+
120
+ private
121
+
122
+ def uri
123
+ @uri
124
+ end
125
+ end
103
126
  end
104
127
  end
@@ -1,3 +1,5 @@
1
+ require "faraday_middleware"
2
+
1
3
  module InstDataShipper
2
4
  module Destinations
3
5
  class HostedData < Base
@@ -15,7 +17,7 @@ module InstDataShipper
15
17
  end
16
18
 
17
19
  def chunk_data(generator, table:, extra: nil)
18
- warehouse_name = table_def[:warehouse_name]
20
+ warehouse_name = table[:warehouse_name]
19
21
 
20
22
  super(generator) do |batch, idx|
21
23
  bits = [warehouse_name, extra, idx].compact
@@ -36,18 +38,18 @@ module InstDataShipper
36
38
 
37
39
  def upload_data_chunk(table_def, chunk)
38
40
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
39
- table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
41
+ table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
40
42
  })
41
43
  end
42
44
 
43
45
  def finalize_dump
44
46
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
45
- redis.delete(rk(:state))
47
+ redis.del(rk(:state))
46
48
  end
47
49
 
48
50
  def cleanup_fatal_error
49
51
  hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
50
- redis.delete(rk(:state))
52
+ redis.del(rk(:state))
51
53
  end
52
54
 
53
55
  # TODO Support/allow single-table fatal errors?
@@ -59,28 +61,25 @@ module InstDataShipper
59
61
  end
60
62
 
61
63
  def convert_schema
62
- table_prefix = config[:table_prefix]
63
- table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
64
-
65
64
  definititions = {}
66
65
  table_schemas.each do |ts|
67
66
  ts = ts.dup
67
+ tname = table_name(ts)
68
68
 
69
- table_name = ts[:warehouse_name]
70
- table_name = table_prefix + table_name if table_prefix.present?
71
-
72
- definititions[ts[:warehouse_name]] = {
69
+ definititions[tname] = {
73
70
  dw_type: 'dimension',
74
71
  description: ts[:description],
75
- incremental: !!ts[:incremental],
76
- incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
72
+ incremental: dumper.table_is_incremental?(ts),
73
+ incremental_on: ts.dig(:incremental, :on),
77
74
  # indexed_columns
78
- tableName: table_name,
75
+ tableName: tname,
79
76
  columns: ts[:columns].map do |col|
77
+ coltype = col[:type]
78
+ coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
80
79
  {
81
80
  name: col[:warehouse_name],
82
81
  description: col[:description],
83
- type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
82
+ type: coltype,
84
83
  }
85
84
  end,
86
85
  }
@@ -92,6 +91,14 @@ module InstDataShipper
92
91
  }
93
92
  end
94
93
 
94
+ def table_name(table_def)
95
+ table_prefix = config[:table_prefix]
96
+ table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
97
+ table_name = table_def[:warehouse_name]
98
+ table_name = table_prefix + table_name if table_prefix.present?
99
+ table_name
100
+ end
101
+
95
102
  def hosted_data_client
96
103
  @hosted_data_client ||= begin
97
104
  token = config[:token]
@@ -102,6 +109,8 @@ module InstDataShipper
102
109
  host = tok_content['host']
103
110
  end
104
111
 
112
+ host = "https://#{host}" unless host.include?('://')
113
+
105
114
  Faraday.new(url: host) do |faraday|
106
115
  faraday.request :multipart
107
116
  faraday.request :json
@@ -117,14 +126,16 @@ module InstDataShipper
117
126
 
118
127
  def parse_configuration(uri)
119
128
  super do |parsed_uri, cfg|
120
- if parsed_uri.username.present?
129
+ if parsed_uri.user.present?
121
130
  # hosted-data://<JWT>:<hosted_data_domain>
122
- cfg[:token] = parsed_uri.username
131
+ cfg[:token] = parsed_uri.user
123
132
  cfg[:host] = parsed_uri.host
124
133
  else
125
134
  # hosted-data://<JWT>
126
135
  cfg[:token] = parsed_uri.host
127
136
  end
137
+
138
+ cfg[:table_prefix] = parsed_uri.params[:table_prefix]
128
139
  end
129
140
  end
130
141
 
@@ -4,7 +4,7 @@ module InstDataShipper
4
4
  include Concerns::Chunking
5
5
 
6
6
  def chunk_data(generator, table:, extra: nil)
7
- warehouse_name = table_def[:warehouse_name]
7
+ warehouse_name = table[:warehouse_name]
8
8
 
9
9
  super(generator) do |batch, idx|
10
10
  bits = [warehouse_name, extra, idx].compact
@@ -5,7 +5,7 @@ module InstDataShipper
5
5
  define_hook :initialize_dump_batch
6
6
  define_hook :finalize_dump_batch
7
7
 
8
- def self.perform_dump(destinations:)
8
+ def self.perform_dump(destinations)
9
9
  raise "Must subclass Dumper to use perform_dump" if self == Dumper
10
10
 
11
11
  dumper = new(destinations)
@@ -14,48 +14,134 @@ module InstDataShipper
14
14
  dumper.tracker
15
15
  end
16
16
 
17
- protected
18
-
19
- attr_reader :executor
17
+ def self.define(include: [], schema: , &blk)
18
+ Class.new(self) do
19
+ include(*include)
20
20
 
21
- def initialize(destinations = nil, executor: nil)
22
- @raw_destinations = destinations
23
- @executor = executor
21
+ define_method(:enqueue_tasks, &blk)
22
+ define_method(:table_schemas) { schema }
23
+ end
24
24
  end
25
25
 
26
- def enqueue_tasks
27
- raise NotImplementedError
28
- end
26
+ public
29
27
 
30
28
  def begin_dump
31
29
  raise "Dump already begun" unless @raw_destinations.present?
32
30
 
33
- @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, status: 'in_progress')
31
+ @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
34
32
 
35
- destinations.each do |dest|
36
- dest.initialize_dump()
33
+ @batch_context = context = {
34
+ # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
35
+ # TODO Consider behavior if last is still running
36
+ incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
37
+ }
38
+
39
+ begin
40
+ begin
41
+ destinations.each do |dest|
42
+ dest.initialize_dump()
43
+ end
44
+
45
+ run_hook(:initialize_dump_batch, context)
46
+ ensure
47
+ @batch_context = nil
48
+ context[:tracker_id] = tracker.id
49
+ context[:origin_class] = batch_context[:origin_class] || self.class.to_s
50
+ context[:destinations] = @raw_destinations
51
+ end
52
+
53
+ Sidekiq::Batch.new.tap do |batch|
54
+ context[:root_bid] = batch.bid
55
+
56
+ batch.description = "HD #{export_genre} Export #{tracker.id} Root"
57
+ batch.context = context
58
+ batch.on(:success, "#{self.class}#finalize_dump")
59
+ batch.on(:death, "#{self.class}#cleanup_fatal_error!")
60
+ batch.jobs do
61
+ enqueue_tasks
62
+ rescue => ex
63
+ delayed :cleanup_fatal_error!
64
+ InstDataShipper.handle_suppressed_error(ex)
65
+ end
66
+ end
67
+ rescue => ex
68
+ if context
69
+ batch ||= Sidekiq::Batch.new.tap do |batch|
70
+ batch.description = "HD #{export_genre} Export #{tracker.id} Early Failure Cleanup"
71
+ batch.context = context
72
+ batch.jobs do
73
+ delayed :cleanup_fatal_error!
74
+ end
75
+ end
76
+ end
77
+ raise ex
37
78
  end
79
+ end
38
80
 
39
- context = {}
40
- run_hook(:initialize_dump_batch, context)
41
-
42
- Sidekiq::Batch.new.tap do |batch|
43
- batch.description = "HD #{export_genre} Export #{tracker.id} Root"
44
- batch.context = {
45
- **context,
46
- root_bid: batch.bid,
47
- tracker_id: tracker.id,
48
- origin_class: batch_context[:origin_class] || self.class.to_s,
49
- destinations: @raw_destinations,
50
- }
51
- batch.on(:success, "#{self.class}#finalize_dump")
52
- batch.on(:death, "#{self.class}#cleanup_fatal_error!")
53
- batch.jobs do
54
- enqueue_tasks
81
+ def tracker
82
+ @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
83
+ end
84
+
85
+ def export_genre
86
+ self.class.to_s.gsub(/HD|ExportJob/, '')
87
+ end
88
+
89
+ def origin_class
90
+ batch_context[:origin_class]&.constantize || self.class
91
+ end
92
+
93
+ def table_is_incremental?(table_def)
94
+ if (inc = table_def[:incremental]).present?
95
+ differ = inc[:if]
96
+ return !!incremental_since if differ.nil?
97
+
98
+ differ = :"#{differ}".to_proc if differ.is_a?(Symbol)
99
+ differ = instance_exec(&differ) if differ.is_a?(Proc)
100
+ return !!differ
101
+ end
102
+
103
+ false
104
+ end
105
+
106
+ def incremental_since
107
+ batch_context[:incremental_since]
108
+ end
109
+
110
+ def lookup_table_schema(*identifiers)
111
+ identifiers.compact.each do |ident|
112
+ if ident.is_a?(Hash)
113
+ key = ident.keys.first
114
+ value = ident.values.first
115
+ else
116
+ key = :warehouse_name
117
+ value = ident
118
+ end
119
+
120
+ value = Array(value).compact
121
+
122
+ table_schemas.each do |ts|
123
+ return ts if value.include?(ts[key])
55
124
  end
56
125
  end
57
126
 
58
- # TODO Catch errors in here and cleanup as needed
127
+ nil
128
+ end
129
+
130
+ def lookup_table_schema!(*identifiers)
131
+ lookup_table_schema(*identifiers) || raise("No table schema found for #{identifiers.inspect}")
132
+ end
133
+
134
+ protected
135
+
136
+ attr_reader :executor
137
+
138
+ def initialize(destinations = nil, executor: nil)
139
+ @raw_destinations = Array(destinations)
140
+ @executor = executor
141
+ end
142
+
143
+ def enqueue_tasks
144
+ raise NotImplementedError
59
145
  end
60
146
 
61
147
  def upload_data(table_def, extra: nil, &datagen)
@@ -96,7 +182,7 @@ module InstDataShipper
96
182
  def finalize_dump(_status, _opts)
97
183
  run_hook(:finalize_dump_batch)
98
184
 
99
- destination.each do |dest|
185
+ destinations.each do |dest|
100
186
  dest.finalize_dump
101
187
  end
102
188
 
@@ -108,14 +194,15 @@ module InstDataShipper
108
194
 
109
195
  run_hook(:finalize_dump_batch)
110
196
 
111
- destination.each do |dest|
197
+ destinations.each do |dest|
112
198
  dest.cleanup_fatal_error
113
- rescue StandardError # rubocop:disable Lint/SuppressedException
199
+ rescue => ex
200
+ InstDataShipper.handle_suppressed_error(ex)
114
201
  end
115
202
 
116
203
  DumpBatch.find(batch_context[:tracker_id]).update(status: 'failed')
117
204
 
118
- CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid])
205
+ CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid]) if batch_context[:root_bid].present?
119
206
  end
120
207
 
121
208
  # Helper Methods
@@ -126,23 +213,17 @@ module InstDataShipper
126
213
  end
127
214
 
128
215
  def delayed(mthd, *args, **kwargs)
129
- AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
130
- end
131
-
132
- def tracker
133
- @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
216
+ Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
134
217
  end
135
218
 
136
- def export_genre
137
- self.class.to_s.gsub(/HD|ExportJob/, '')
138
- end
219
+ delegate :working_dir, to: :executor
139
220
 
140
- def origin_class
141
- batch_context[:origin_class]&.constantize || self.class
221
+ def batch
222
+ Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
142
223
  end
143
224
 
144
- def working_dir
145
- executor.working_dir
225
+ def batch_context
226
+ @batch_context || batch&.context || {}
146
227
  end
147
228
 
148
229
  def destinations_for_table(table_def)
@@ -150,7 +231,7 @@ module InstDataShipper
150
231
  end
151
232
 
152
233
  def destinations
153
- @destinations ||= (@raw_destinations || batch_context[:destinations]).map.with_index do |dest, i|
234
+ @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
154
235
  dcls = InstDataShipper.resolve_destination(dest)
155
236
  dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
156
237
  end
@@ -4,5 +4,11 @@ module InstDataShipper
4
4
  class Engine < ::Rails::Engine
5
5
  isolate_namespace InstDataShipper
6
6
 
7
+ initializer :append_migrations do |app|
8
+ config.paths["db/migrate"].expanded.each do |expanded_path|
9
+ app.config.paths["db/migrate"] << expanded_path
10
+ end
11
+ ActiveRecord::Migrator.migrations_paths = Rails.application.paths['db/migrate'].to_a
12
+ end
7
13
  end
8
14
  end
@@ -1,7 +1,14 @@
1
+
2
+ require "sidekiq"
3
+
1
4
  module InstDataShipper
2
5
  module Jobs
3
6
  class AsyncCaller < InstDataShipper::Jobs::Base
4
- sidekiq_options retry: 6 if defined?(sidekiq_options)
7
+ sidekiq_options(retry: 0) if defined?(sidekiq_options)
8
+
9
+ def self.get_sidekiq_options
10
+ { retry: 0 }
11
+ end
5
12
 
6
13
  def self.call_from_pool(pool, clazz, method, *args, **kwargs)
7
14
  pool.add_job(
@@ -12,7 +19,8 @@ module InstDataShipper
12
19
  end
13
20
 
14
21
  def perform(clazz, method, *args, **kwargs)
15
- clazz.constantize.new(executor: self).send(method.to_sym, *args, **kwargs)
22
+ clazz = clazz.constantize if clazz.is_a?(String)
23
+ clazz.new(executor: self).send(method.to_sym, *args, **kwargs)
16
24
  end
17
25
  end
18
26
  end
@@ -3,9 +3,13 @@ module InstDataShipper
3
3
  class BasicDumpJob < InstDataShipper::Jobs::Base
4
4
  sidekiq_options retry: 3 if defined?(sidekiq_options)
5
5
 
6
- def perform(endpoints, dump_class = nil)
7
- dumper.perform_dump(endpoints: endpoints)
6
+ def perform(endpoints)
7
+
8
8
  end
9
+
10
+ protected
11
+
12
+
9
13
  end
10
14
  end
11
15
  end
@@ -1,4 +1,6 @@
1
1
  module InstDataShipper
2
+ # This class ends up fill two roles - Schema and Mapping.
3
+ # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
2
4
  class SchemaBuilder
3
5
  attr_reader :tables
4
6
 
@@ -12,24 +14,40 @@ module InstDataShipper
12
14
  builder.tables
13
15
  end
14
16
 
15
- def table(model_or_name, description = nil, as: nil, includes: nil, incremental: false, &block)
16
- as ||= model_or_name
17
- as = as.table_name if as.respond_to?(:table_name)
17
+ def extend_table_builder(&block)
18
+ @table_builder_class ||= Class.new(TableSchemaBuilder)
19
+ @table_builder_class.class_eval(&block)
20
+ end
18
21
 
22
+ def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
19
23
  tdef = {
24
+ warehouse_name: nil,
20
25
  description: description,
21
- model: model_or_name.is_a?(String) ? nil : model_or_name,
22
- warehouse_name: as.to_s,
23
- incremental: incremental,
24
26
  columns: [],
25
- includes: includes,
27
+
28
+ model: model,
29
+ query: query,
30
+ **extra,
26
31
  }
27
32
 
28
- TableSchemaBuilder.build(tdef, &block)
33
+ if model_or_name.is_a?(ActiveRecord::Relation)
34
+ raise "model specified twice" if model.present?
35
+ raise "query specified twice" if query.present?
36
+
37
+ tdef[:query] = model_or_name
38
+ tdef[:model] = model_or_name.model
39
+ elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
40
+ tdef[:warehouse_name] = model_or_name.table_name
41
+ tdef[:model] = model_or_name
42
+ else
43
+ tdef[:warehouse_name] = model_or_name
44
+ end
45
+
46
+ @table_builder_class.build(tdef, &block)
29
47
 
30
48
  @tables << tdef
31
49
 
32
- self
50
+ tdef
33
51
  end
34
52
 
35
53
  class TableSchemaBuilder
@@ -46,48 +64,82 @@ module InstDataShipper
46
64
  builder.columns
47
65
  end
48
66
 
49
- # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
50
- def column(name, *args, **kwargs, &block)
51
- name = { name => name } unless name.is_a?(Hash)
52
- raise ArgumentError, 'Exactly one name must be provided' unless name.count == 1
67
+ # def annotate(key, value)
68
+ # options[key] = value
69
+ # end
70
+
71
+ def incremental(scope="updated_at", **kwargs)
72
+ if (extras = kwargs.keys - %i[on if]).present?
73
+ raise ArgumentError, "Unsuppored options: #{extras.inspect}"
74
+ end
75
+
76
+ options[:incremental] = {
77
+ on: Array(kwargs[:on]),
78
+ scope: scope,
79
+ if: kwargs[:if],
80
+ }
81
+ end
82
+
83
+ def column(name, *args, refs: [], from: nil, **extra, &block)
84
+ from ||= name.to_s
53
85
 
54
86
  cdef = {
55
- local_name: name.keys[0].to_s,
56
- warehouse_name: name.values[0].to_s,
57
- transformer: block,
87
+ warehouse_name: name.to_s,
88
+ from: from,
89
+ **extra,
58
90
  }
59
91
 
60
- [:description, :type, :refs => :references].each do |k|
61
- if k.is_a? Hash
62
- k.each do |hk, hv|
63
- cdef[hv] = kwargs.delete(hk) if kwargs.key?(hk)
64
- end
65
- elsif kwargs.key?(k)
66
- cdef[k] = kwargs.delete(k)
67
- end
92
+ if args[0].is_a?(Symbol)
93
+ cdef[:type] = args.shift()
68
94
  end
69
95
 
70
- cdef[:references] = Array(cdef[:references])
71
-
72
- args[0..1].each do |a|
73
- k = (a.is_a?(String) && :description) || (a.is_a?(Symbol) && :type) || nil
74
- raise ArgumentError, 'Unsupported Argument' if k.nil?
75
- raise ArgumentError, "Duplicate Argument for #{k}" if cdef.key?(k)
96
+ if args[0].is_a?(String)
97
+ cdef[:description] = args.shift()
98
+ end
76
99
 
77
- cdef[k] = a
100
+ if args.present?
101
+ raise ArgumentError, "Received unexpected arguments: #{args.inspect}"
78
102
  end
79
103
 
104
+ cdef[:references] = Array(refs)
105
+
80
106
  if options[:model].is_a?(Class) && cdef[:local_name].to_s.ends_with?('_id')
81
107
  rel_name = cdef[:local_name].to_s[0...-3]
82
108
  refl = options[:model].reflections[rel_name]
83
109
  cdef[:references] << "#{refl.klass}##{refl.options[:primary_key] || 'id'}" if refl.present? && !refl.polymorphic?
84
110
  end
85
111
 
112
+ compiled_from = compile_transformer(from)
113
+
114
+ cdef[:block] = ->(row) {
115
+ value = instance_exec(row, &compiled_from)
116
+ value = instance_exec(value, row, &block) if block.present?
117
+ value
118
+ }
119
+
86
120
  @columns << cdef
87
121
 
88
- self
122
+ cdef
89
123
  end
90
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
124
+
125
+ protected
126
+
127
+ def compile_transformer(from)
128
+ if from.present?
129
+ if from.is_a?(Symbol)
130
+ ->(row) { row.send(from) }
131
+ elsif from.is_a?(Proc)
132
+ from
133
+ elsif from.is_a?(String)
134
+ ->(row) { row[from] }
135
+ else
136
+ raise ArgumentError, "Invalid transformer: #{from.inspect}"
137
+ end
138
+ else
139
+ ->(row) { row }
140
+ end
141
+ end
142
+
91
143
  end
92
144
  end
93
145
  end
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.1.0.beta1".freeze
2
+ VERSION = "0.1.0.beta2".freeze
3
3
  end
@@ -23,13 +23,20 @@ module InstDataShipper
23
23
  destination = @destination_aliases[type]
24
24
  end
25
25
 
26
- safe_constantize(destination)
26
+ destination.constantize
27
27
  end
28
28
 
29
29
  def start_basic_dump(*args, **kwargs, &block)
30
30
  BasicDumper.perform_dump(*args, **kwargs, &block)
31
31
  end
32
32
 
33
+ def handle_suppressed_error(ex)
34
+ logger.error "Suppressed Error: #{ex.message}"
35
+ logger.error ex.backtrace.join("\n")
36
+ Raven.capture_exception(ex) if defined?(Raven)
37
+ Sentry.capture_exception(ex) if defined?(Sentry)
38
+ end
39
+
33
40
  def logger
34
41
  return @logger if defined? @logger
35
42
  @logger = Logger.new(STDOUT)
@@ -66,6 +73,8 @@ Dir[File.dirname(__FILE__) + "/inst_data_shipper/destinations/*.rb"].each do |fi
66
73
  basename = File.basename(file, ".rb")
67
74
  next if basename == "base"
68
75
 
69
- InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.classify}")
76
+ InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.camelize}")
70
77
  end
71
78
 
79
+ require "inst_data_shipper/dumper"
80
+ require "inst_data_shipper/basic_dumper"
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,7 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__)
7
7
  require "bundler/setup"
8
8
  require 'rspec/rails'
9
9
  require 'spec_helper'
10
- require 'factory_girl_rails'
10
+ require 'factory_bot_rails'
11
11
  require 'timecop'
12
12
  require 'webmock/rspec'
13
13
  require 'support/fake_canvas'
@@ -29,7 +29,7 @@ ActiveRecord::Migration.maintain_test_schema!
29
29
  RSpec.configure do |config|
30
30
  config.extend WithModel
31
31
 
32
- config.include FactoryGirl::Syntax::Methods
32
+ config.include FactoryBot::Syntax::Methods
33
33
  config.use_transactional_fixtures = true
34
34
  config.infer_spec_type_from_file_location!
35
35
  config.filter_rails_from_backtrace!
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta1
4
+ version: 0.1.0.beta2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-06 00:00:00.000000000 Z
11
+ date: 2024-03-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "<"
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '6.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "<"
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '6.0'
27
27
  - !ruby/object:Gem::Dependency
@@ -360,6 +360,20 @@ dependencies:
360
360
  - - ">="
361
361
  - !ruby/object:Gem::Version
362
362
  version: '0'
363
+ - !ruby/object:Gem::Dependency
364
+ name: faraday_middleware
365
+ requirement: !ruby/object:Gem::Requirement
366
+ requirements:
367
+ - - ">="
368
+ - !ruby/object:Gem::Version
369
+ version: '0'
370
+ type: :runtime
371
+ prerelease: false
372
+ version_requirements: !ruby/object:Gem::Requirement
373
+ requirements:
374
+ - - ">="
375
+ - !ruby/object:Gem::Version
376
+ version: '0'
363
377
  description:
364
378
  email:
365
379
  - pseng@instructure.com
@@ -369,8 +383,8 @@ extra_rdoc_files: []
369
383
  files:
370
384
  - README.md
371
385
  - Rakefile
372
- - app/models/hosted_data_dumper/dump_batch.rb
373
- - db/migrate/20240301090836_create_canvas_sync_sync_batches.rb
386
+ - app/models/inst_data_shipper/dump_batch.rb
387
+ - db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb
374
388
  - lib/inst_data_shipper.rb
375
389
  - lib/inst_data_shipper/basic_dumper.rb
376
390
  - lib/inst_data_shipper/concerns/hooks.rb