inst_data_shipper 0.1.0.beta1 → 0.1.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf2f1cdd4b4181e945c5f36e7680ed0a054429dc191197fafbee60de9598305b
4
- data.tar.gz: 5fb781dc8aa17bf7d672fdfc8942d70365edb68fd324d8aeab70297270af0b1e
3
+ metadata.gz: 146f5b93819d7950f9bd256a99eb690a63d453b86be4ac6ac7cf4c5901724cdd
4
+ data.tar.gz: 2410298ebb3b1ddc565ca70d49a274a129e83087d461dcfae4d4981979795ea5
5
5
  SHA512:
6
- metadata.gz: 9212cd9c647193aa7256f15f6da12cd4ee3c56a12e011ac269a1d801d15e0cb7182a71c2fa8d8e2d2ea808aff73ff4f7c974c3720db54414eb43c24658ca554f
7
- data.tar.gz: c4cb69ad7ea635833aa5051dec5a8c14f3aa13e2b11dd3e8fbdd4d12c2a9d63ac9dbb5b235915da0d80898d0b048d5677fb807d6947911e3e10a87956b8ee1fc
6
+ metadata.gz: c6dc93902e0ef7a114d2434d3901c677021d57603b07c1122d72bd2f953184b207d0e57f5441fafb7035a00bf28f2e5035d34cd0edfb73da6d2f93d93874f344
7
+ data.tar.gz: 2e7babf6a2ed86f9a2e5769bfb393549fb07b771b700513efd640ba2b98ebc3eeadec99578e4c828738903ef121f8e3bebe8490f6f75de0ce9afac43ac28b8fa
@@ -1,4 +1,4 @@
1
- class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
1
+ class CreateInstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
2
2
  def change
3
3
  create_table :inst_data_shipper_dump_batches do |t|
4
4
  t.datetime :started_at
@@ -6,6 +6,8 @@ class InstDataShipperDumpBatches < CanvasSync::MiscHelper::MigrationClass
6
6
  t.string :status
7
7
 
8
8
  t.string :job_class
9
+ t.string :genre
10
+
9
11
  t.string :exception
10
12
  t.text :backtrace
11
13
  t.text :metadata
@@ -21,7 +21,7 @@ module InstDataShipper
21
21
 
22
22
  def table_schemas
23
23
  pointer = @schema_pointer || batch_context[:schema_pointer]
24
- safe_constantize(pointer)
24
+ pointer.constantize
25
25
  end
26
26
  end
27
27
  end
@@ -9,21 +9,35 @@ module InstDataShipper
9
9
  end
10
10
 
11
11
  def hook(name, prepend: false, &block)
12
+ _assert_hook_defined(name)
13
+ @hooks ||= {}
14
+ @hooks[name] ||= []
12
15
  hooks = @hooks[name]
13
16
  prepend ? hooks.unshift(block) : hooks << block
14
17
  end
18
+
19
+ def _assert_hook_defined(name)
20
+ return true if @hooks&.key?(name)
21
+ return if superclass.respond_to?(:_assert_hook_defined) && superclass._assert_hook_defined(name)
22
+ raise ArgumentError, "Hook #{name} is not defined"
23
+ end
24
+
25
+ def _list_hooks(name)
26
+ list = []
27
+ list.push(*superclass._list_hooks(name)) if superclass.respond_to?(:_list_hooks)
28
+ list.push(*@hooks[name]) if (@hooks || {})[name]
29
+ list
30
+ end
15
31
  end
16
32
 
17
33
  def run_hook(name, *args, **kwargs)
18
- hooks = @hooks[name]
19
- hooks.each do |blk|
34
+ self.class._list_hooks(name).each do |blk|
20
35
  instance_exec(*args, **kwargs, &blk)
21
36
  end
22
37
  end
23
38
 
24
39
  def run_hook_safe(name, *args, **kwargs)
25
- hooks = @hooks[name]
26
- hooks.each do |blk|
40
+ self.class._list_hooks(name).each do |blk|
27
41
  instance_exec(*args, **kwargs, &blk)
28
42
  rescue StandardError
29
43
  end
@@ -27,11 +27,25 @@ module InstDataShipper
27
27
  _in_canvas_report_pool(:_import_canvas_report, *args, **kwargs)
28
28
  end
29
29
 
30
- def import_canvas_report_by_terms(target_table, report_name, terms: [], params: {}, **kwargs)
30
+ def import_canvas_report_by_terms(*args, **kwargs)
31
+ _in_canvas_report_pool(:_import_canvas_report_by_terms, *args, **kwargs)
32
+ end
33
+
34
+ def import_existing_report(report, **kwargs)
35
+ delayed(:_process_canvas_report, report: report, **kwargs)
36
+ end
37
+
38
+ private
39
+
40
+ def _import_canvas_report_by_terms(report_name, terms: [], params: {}, **kwargs)
31
41
  term_ids = (terms || []).map do |term|
32
42
  term.is_a?(Term) ? term.canvas_id : term
33
43
  end
34
44
 
45
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
46
+
47
+ _resolve_report_incremenal_parameters(table_def, params)
48
+
35
49
  Sidekiq::Batch.new.tap do |b|
36
50
  b.description = "Term Scoped #{report_name} Runners"
37
51
  b.context = {
@@ -40,19 +54,21 @@ module InstDataShipper
40
54
  b.jobs do
41
55
  terms_query = term_ids.present? ? Term.where(canvas_id: term_ids) : Term
42
56
  terms_query.find_each do |t|
43
- import_canvas_report(target_table, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
57
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, params: { **params, enrollment_term_id: t.canvas_id }, **kwargs)
44
58
  end
45
59
  end
46
60
  end
47
61
  end
48
62
 
49
- def import_existing_report(table, report)
50
- delayed(:_process_canvas_report, table, report: report)
51
- end
63
+ def _import_canvas_report(report_name, params: {}, **kwargs)
64
+ table_def = lookup_table_schema!(kwargs[:schema_name], report_name)
52
65
 
53
- private
66
+ _resolve_report_incremenal_parameters(table_def, params)
67
+
68
+ _trigger_canvas_report(report_name, params: params, **kwargs)
69
+ end
54
70
 
55
- def _import_canvas_report(target_table, report_name, retry_count: 3, params: {}, **kwargs)
71
+ def _trigger_canvas_report(report_name, retry_count: 3, params: {}, **kwargs)
56
72
  report = canvas_sync_client.start_report(
57
73
  'self', report_name,
58
74
  parameters: params,
@@ -61,15 +77,13 @@ module InstDataShipper
61
77
  CanvasSync::Jobs::CanvasProcessWaiter.perform_later(
62
78
  "/api/v1/accounts/self/reports/#{report_name}/#{report[:id]}",
63
79
  {
64
- instance_of: origin_class,
65
- method: :_process_canvas_report,
66
- args: [target_table],
80
+ job: Jobs::AsyncCaller,
81
+ args: [origin_class, :_process_canvas_report],
67
82
  kwargs: kwargs,
68
83
  },
69
84
  on_failure: {
70
- instance_of: origin_class,
71
- method: :_handle_failed_canvas_report,
72
- args: [target_table, report_name, kwargs],
85
+ job: Jobs::AsyncCaller,
86
+ args: [origin_class, :_handle_failed_canvas_report, report_name, kwargs],
73
87
  kwargs: { retry_count: retry_count },
74
88
  },
75
89
  status_key: :status,
@@ -79,18 +93,18 @@ module InstDataShipper
79
93
 
80
94
  def _in_canvas_report_pool(mthd, *args, **kwargs)
81
95
  pool = CanvasSync::JobBatches::Pool.from_pid(batch_context[:report_processor_pool])
82
- AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
96
+ Jobs::AsyncCaller.call_from_pool(pool, self.class, mthd, *args, **kwargs)
83
97
  end
84
98
 
85
- def _process_canvas_report(table, report:)
86
- table_def = table_schemas.find { |t| t[:warehouse_name].to_s == table }
99
+ def _process_canvas_report(report:, schema_name: nil)
100
+ table_def = lookup_table_schema!(schema_name, report[:report])
87
101
 
88
- IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/#{table}.csv")
102
+ IO.copy_stream(URI.parse(report['attachment']['url']).open, "#{working_dir}/temp_report.csv")
89
103
 
90
104
  inner_block = ->(file) {
91
- CSV.foreach("#{working_dir}/#{table}.csv", headers: true) do |m|
105
+ CSV.foreach("#{working_dir}/temp_report.csv", headers: true) do |m|
92
106
  file << table_def[:columns].map do |c|
93
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
107
+ instance_exec(m, &c[:block])
94
108
  end
95
109
  end
96
110
  }
@@ -98,13 +112,36 @@ module InstDataShipper
98
112
  upload_data(table_def, extra: report['id'], &inner_block)
99
113
  end
100
114
 
101
- def _handle_failed_canvas_report(table, report_name, kwargs, retry_count:, report:) # rubocop:disable Lint/UnusedMethodArgument
115
+ def _resolve_report_incremenal_parameters(table_def, params)
116
+ if table_is_incremental?(table_def)
117
+ inc = table_def[:incremental]
118
+ scope = inc[:scope]
119
+
120
+ if scope != false
121
+ scope ||= "updated_after"
122
+
123
+ if scope.is_a?(Proc)
124
+ scope = instance_exec(params, &scope)
125
+ if scope.is_a?(Hash) && scope != params
126
+ params.merge!(scope)
127
+ end
128
+ elsif scope.is_a?(String) || scope.is_a?(Symbol)
129
+ params[scope] = incremental_since
130
+ end
131
+ end
132
+ end
133
+
134
+ params
135
+ end
136
+
137
+ def _handle_failed_canvas_report(report_name, kwargs, retry_count:, report:)
102
138
  if retry_count.positive?
103
139
  tbid = batch_context[:report_bid] || batch_context[:root_bid]
104
140
  Sidekiq::Batch.new(tbid).jobs do
105
- import_canvas_report(table, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
141
+ _in_canvas_report_pool(:_trigger_canvas_report, report_name, retry_count: retry_count - 1, **kwargs.symbolize_keys)
106
142
  end
107
143
  else
144
+ # TODO Allow marking the table as incomplete. Destination code can then decide how to handle incomplete tables since (eg) incremental imports wouldn't mind too much
108
145
  cleanup_fatal_error!
109
146
  end
110
147
  end
@@ -12,22 +12,43 @@ module InstDataShipper
12
12
 
13
13
  private
14
14
 
15
- def _import_local_table(table_name)
16
- table_def = table_schemas.find { |t| t[:model].to_s == table_name }
17
- model = table_def[:model]
15
+ def _import_local_table(model, schema_name: nil)
16
+ model = model.safe_constantize if model.is_a?(String)
17
+
18
+ table_def = lookup_table_schema!(schema_name, { model: model })
18
19
 
19
20
  inner_block = ->(file) {
20
- query = model
21
- query = query.includes(table_def[:includes]) if table_def[:includes].present?
22
- model.find_each do |m|
21
+ query = model.all
22
+ query = _resolve_model_query(query, table_def[:query])
23
+
24
+ if table_is_incremental?(table_def)
25
+ query = _resolve_model_query(query, table_def.dig(:incremental, :scope), string: ->(r, c) { r.where("? > ?", c, incremental_since) })
26
+ end
27
+
28
+ query.find_each do |m|
23
29
  file << table_def[:columns].map do |c|
24
- c[:transformer].present? ? m.instance_exec(&c[:transformer]) : m[c[:local_name].to_s]
30
+ instance_exec(m, &c[:block])
25
31
  end
26
32
  end
27
33
  }
28
34
 
29
35
  upload_data(table_def, &inner_block)
30
36
  end
37
+
38
+ def _resolve_model_query(relation, query, string: nil)
39
+ return relation if query.nil?
40
+
41
+ if query.is_a?(Symbol)
42
+ relation.send(query)
43
+ elsif query.is_a?(Proc)
44
+ instance_exec(relation, &query)
45
+ elsif query.is_a?(String) && string.present?
46
+ instance_exec(relation, query, &string)
47
+ else
48
+ raise "Invalid query: #{query.inspect}"
49
+ end
50
+ end
51
+
31
52
  end
32
53
  end
33
54
  end
@@ -50,7 +50,7 @@ module InstDataShipper
50
50
  end
51
51
 
52
52
  def user_config
53
- config[:extra]
53
+ config[:user_config]
54
54
  end
55
55
 
56
56
  def group_key
@@ -62,11 +62,11 @@ module InstDataShipper
62
62
  def parse_configuration(uri)
63
63
  if block_given?
64
64
  parsed = URI.parse(uri)
65
+ cparsed = ConfigURI.new(parsed)
65
66
  cfg = {
66
- params: parsed.query.present? ? Rack::Utils.parse_nested_query(parsed.query) : {},
67
- extra: (parsed.fragment.present? && parsed.fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(parsed.fragment)).presence || parsed.fragment || nil,
67
+ user_config: cparsed.hash_params,
68
68
  }
69
- yield parsed, cfg
69
+ yield cparsed, cfg
70
70
  cfg
71
71
  else
72
72
  raise NotImplementedError
@@ -100,5 +100,28 @@ module InstDataShipper
100
100
  end
101
101
 
102
102
  end
103
+
104
+ class ConfigURI
105
+ def initialize(uri)
106
+ @uri = uri
107
+ end
108
+
109
+ # delegate_missing_to :uri
110
+ delegate :scheme, :user, :password, :host, :hostname, :port, :path, :query, :fragment, to: :uri
111
+
112
+ def params
113
+ @params ||= (query.present? ? Rack::Utils.parse_nested_query(query).with_indifferent_access : {}).freeze
114
+ end
115
+
116
+ def hash_params
117
+ @hash_params ||= ((fragment.present? && fragment.match?(/^\w+=/) && Rack::Utils.parse_nested_query(fragment).with_indifferent_access).presence || fragment || nil)&.freeze
118
+ end
119
+
120
+ private
121
+
122
+ def uri
123
+ @uri
124
+ end
125
+ end
103
126
  end
104
127
  end
@@ -1,3 +1,5 @@
1
+ require "faraday_middleware"
2
+
1
3
  module InstDataShipper
2
4
  module Destinations
3
5
  class HostedData < Base
@@ -15,7 +17,7 @@ module InstDataShipper
15
17
  end
16
18
 
17
19
  def chunk_data(generator, table:, extra: nil)
18
- warehouse_name = table_def[:warehouse_name]
20
+ warehouse_name = table[:warehouse_name]
19
21
 
20
22
  super(generator) do |batch, idx|
21
23
  bits = [warehouse_name, extra, idx].compact
@@ -36,18 +38,18 @@ module InstDataShipper
36
38
 
37
39
  def upload_data_chunk(table_def, chunk)
38
40
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", artifacts: {
39
- table_def[:warehouse_name] => [Faraday::UploadIO.new(chunk, 'application/gzip')],
41
+ table_name(table_def) => [Faraday::UploadIO.new(chunk, 'application/gzip')],
40
42
  })
41
43
  end
42
44
 
43
45
  def finalize_dump
44
46
  hosted_data_client.put("api/v1/custom_dumps/#{hd_dump_id}/", start_import: true) if hd_dump_id.present?
45
- redis.delete(rk(:state))
47
+ redis.del(rk(:state))
46
48
  end
47
49
 
48
50
  def cleanup_fatal_error
49
51
  hosted_data_client.delete("api/v1/custom_dumps/#{hd_dump_id}/", reason: 'Failure during extraction or transformation') if hd_dump_id.present?
50
- redis.delete(rk(:state))
52
+ redis.del(rk(:state))
51
53
  end
52
54
 
53
55
  # TODO Support/allow single-table fatal errors?
@@ -59,28 +61,25 @@ module InstDataShipper
59
61
  end
60
62
 
61
63
  def convert_schema
62
- table_prefix = config[:table_prefix]
63
- table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
64
-
65
64
  definititions = {}
66
65
  table_schemas.each do |ts|
67
66
  ts = ts.dup
67
+ tname = table_name(ts)
68
68
 
69
- table_name = ts[:warehouse_name]
70
- table_name = table_prefix + table_name if table_prefix.present?
71
-
72
- definititions[ts[:warehouse_name]] = {
69
+ definititions[tname] = {
73
70
  dw_type: 'dimension',
74
71
  description: ts[:description],
75
- incremental: !!ts[:incremental],
76
- incremental_on: ts[:incremental] && ts[:incremental] != true ? ts[:incremental] : nil,
72
+ incremental: dumper.table_is_incremental?(ts),
73
+ incremental_on: ts.dig(:incremental, :on),
77
74
  # indexed_columns
78
- tableName: table_name,
75
+ tableName: tname,
79
76
  columns: ts[:columns].map do |col|
77
+ coltype = col[:type]
78
+ coltype ||= ts[:model].column_for_attribute(col[:from]).sql_type if col[:from].is_a?(String)
80
79
  {
81
80
  name: col[:warehouse_name],
82
81
  description: col[:description],
83
- type: col[:type] || ts[:model].column_for_attribute(col[:local_name]).sql_type,
82
+ type: coltype,
84
83
  }
85
84
  end,
86
85
  }
@@ -92,6 +91,14 @@ module InstDataShipper
92
91
  }
93
92
  end
94
93
 
94
+ def table_name(table_def)
95
+ table_prefix = config[:table_prefix]
96
+ table_prefix = table_prefix.present? ? "#{table_prefix}_" : nil
97
+ table_name = table_def[:warehouse_name]
98
+ table_name = table_prefix + table_name if table_prefix.present?
99
+ table_name
100
+ end
101
+
95
102
  def hosted_data_client
96
103
  @hosted_data_client ||= begin
97
104
  token = config[:token]
@@ -102,6 +109,8 @@ module InstDataShipper
102
109
  host = tok_content['host']
103
110
  end
104
111
 
112
+ host = "https://#{host}" unless host.include?('://')
113
+
105
114
  Faraday.new(url: host) do |faraday|
106
115
  faraday.request :multipart
107
116
  faraday.request :json
@@ -117,14 +126,16 @@ module InstDataShipper
117
126
 
118
127
  def parse_configuration(uri)
119
128
  super do |parsed_uri, cfg|
120
- if parsed_uri.username.present?
129
+ if parsed_uri.user.present?
121
130
  # hosted-data://<JWT>:<hosted_data_domain>
122
- cfg[:token] = parsed_uri.username
131
+ cfg[:token] = parsed_uri.user
123
132
  cfg[:host] = parsed_uri.host
124
133
  else
125
134
  # hosted-data://<JWT>
126
135
  cfg[:token] = parsed_uri.host
127
136
  end
137
+
138
+ cfg[:table_prefix] = parsed_uri.params[:table_prefix]
128
139
  end
129
140
  end
130
141
 
@@ -4,7 +4,7 @@ module InstDataShipper
4
4
  include Concerns::Chunking
5
5
 
6
6
  def chunk_data(generator, table:, extra: nil)
7
- warehouse_name = table_def[:warehouse_name]
7
+ warehouse_name = table[:warehouse_name]
8
8
 
9
9
  super(generator) do |batch, idx|
10
10
  bits = [warehouse_name, extra, idx].compact
@@ -5,7 +5,7 @@ module InstDataShipper
5
5
  define_hook :initialize_dump_batch
6
6
  define_hook :finalize_dump_batch
7
7
 
8
- def self.perform_dump(destinations:)
8
+ def self.perform_dump(destinations)
9
9
  raise "Must subclass Dumper to use perform_dump" if self == Dumper
10
10
 
11
11
  dumper = new(destinations)
@@ -14,48 +14,134 @@ module InstDataShipper
14
14
  dumper.tracker
15
15
  end
16
16
 
17
- protected
18
-
19
- attr_reader :executor
17
+ def self.define(include: [], schema: , &blk)
18
+ Class.new(self) do
19
+ include(*include)
20
20
 
21
- def initialize(destinations = nil, executor: nil)
22
- @raw_destinations = destinations
23
- @executor = executor
21
+ define_method(:enqueue_tasks, &blk)
22
+ define_method(:table_schemas) { schema }
23
+ end
24
24
  end
25
25
 
26
- def enqueue_tasks
27
- raise NotImplementedError
28
- end
26
+ public
29
27
 
30
28
  def begin_dump
31
29
  raise "Dump already begun" unless @raw_destinations.present?
32
30
 
33
- @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, status: 'in_progress')
31
+ @tracker = tracker = DumpBatch.create(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
34
32
 
35
- destinations.each do |dest|
36
- dest.initialize_dump()
33
+ @batch_context = context = {
34
+ # TODO Allow to be hooked by Destination, likely via initialize_dump_batch and batch_context, so that if an earlier destination fails we can resend data
35
+ # TODO Consider behavior if last is still running
36
+ incremental_since: DumpBatch.where(genre: export_genre, status: 'completed').order(created_at: :desc).first&.created_at,
37
+ }
38
+
39
+ begin
40
+ begin
41
+ destinations.each do |dest|
42
+ dest.initialize_dump()
43
+ end
44
+
45
+ run_hook(:initialize_dump_batch, context)
46
+ ensure
47
+ @batch_context = nil
48
+ context[:tracker_id] = tracker.id
49
+ context[:origin_class] = batch_context[:origin_class] || self.class.to_s
50
+ context[:destinations] = @raw_destinations
51
+ end
52
+
53
+ Sidekiq::Batch.new.tap do |batch|
54
+ context[:root_bid] = batch.bid
55
+
56
+ batch.description = "HD #{export_genre} Export #{tracker.id} Root"
57
+ batch.context = context
58
+ batch.on(:success, "#{self.class}#finalize_dump")
59
+ batch.on(:death, "#{self.class}#cleanup_fatal_error!")
60
+ batch.jobs do
61
+ enqueue_tasks
62
+ rescue => ex
63
+ delayed :cleanup_fatal_error!
64
+ InstDataShipper.handle_suppressed_error(ex)
65
+ end
66
+ end
67
+ rescue => ex
68
+ if context
69
+ batch ||= Sidekiq::Batch.new.tap do |batch|
70
+ batch.description = "HD #{export_genre} Export #{tracker.id} Early Failure Cleanup"
71
+ batch.context = context
72
+ batch.jobs do
73
+ delayed :cleanup_fatal_error!
74
+ end
75
+ end
76
+ end
77
+ raise ex
37
78
  end
79
+ end
38
80
 
39
- context = {}
40
- run_hook(:initialize_dump_batch, context)
41
-
42
- Sidekiq::Batch.new.tap do |batch|
43
- batch.description = "HD #{export_genre} Export #{tracker.id} Root"
44
- batch.context = {
45
- **context,
46
- root_bid: batch.bid,
47
- tracker_id: tracker.id,
48
- origin_class: batch_context[:origin_class] || self.class.to_s,
49
- destinations: @raw_destinations,
50
- }
51
- batch.on(:success, "#{self.class}#finalize_dump")
52
- batch.on(:death, "#{self.class}#cleanup_fatal_error!")
53
- batch.jobs do
54
- enqueue_tasks
81
+ def tracker
82
+ @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
83
+ end
84
+
85
+ def export_genre
86
+ self.class.to_s.gsub(/HD|ExportJob/, '')
87
+ end
88
+
89
+ def origin_class
90
+ batch_context[:origin_class]&.constantize || self.class
91
+ end
92
+
93
+ def table_is_incremental?(table_def)
94
+ if (inc = table_def[:incremental]).present?
95
+ differ = inc[:if]
96
+ return !!incremental_since if differ.nil?
97
+
98
+ differ = :"#{differ}".to_proc if differ.is_a?(Symbol)
99
+ differ = instance_exec(&differ) if differ.is_a?(Proc)
100
+ return !!differ
101
+ end
102
+
103
+ false
104
+ end
105
+
106
+ def incremental_since
107
+ batch_context[:incremental_since]
108
+ end
109
+
110
+ def lookup_table_schema(*identifiers)
111
+ identifiers.compact.each do |ident|
112
+ if ident.is_a?(Hash)
113
+ key = ident.keys.first
114
+ value = ident.values.first
115
+ else
116
+ key = :warehouse_name
117
+ value = ident
118
+ end
119
+
120
+ value = Array(value).compact
121
+
122
+ table_schemas.each do |ts|
123
+ return ts if value.include?(ts[key])
55
124
  end
56
125
  end
57
126
 
58
- # TODO Catch errors in here and cleanup as needed
127
+ nil
128
+ end
129
+
130
+ def lookup_table_schema!(*identifiers)
131
+ lookup_table_schema(*identifiers) || raise("No table schema found for #{identifiers.inspect}")
132
+ end
133
+
134
+ protected
135
+
136
+ attr_reader :executor
137
+
138
+ def initialize(destinations = nil, executor: nil)
139
+ @raw_destinations = Array(destinations)
140
+ @executor = executor
141
+ end
142
+
143
+ def enqueue_tasks
144
+ raise NotImplementedError
59
145
  end
60
146
 
61
147
  def upload_data(table_def, extra: nil, &datagen)
@@ -96,7 +182,7 @@ module InstDataShipper
96
182
  def finalize_dump(_status, _opts)
97
183
  run_hook(:finalize_dump_batch)
98
184
 
99
- destination.each do |dest|
185
+ destinations.each do |dest|
100
186
  dest.finalize_dump
101
187
  end
102
188
 
@@ -108,14 +194,15 @@ module InstDataShipper
108
194
 
109
195
  run_hook(:finalize_dump_batch)
110
196
 
111
- destination.each do |dest|
197
+ destinations.each do |dest|
112
198
  dest.cleanup_fatal_error
113
- rescue StandardError # rubocop:disable Lint/SuppressedException
199
+ rescue => ex
200
+ InstDataShipper.handle_suppressed_error(ex)
114
201
  end
115
202
 
116
203
  DumpBatch.find(batch_context[:tracker_id]).update(status: 'failed')
117
204
 
118
- CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid])
205
+ CanvasSync::JobBatches::Batch.delete_prematurely!(batch_context[:root_bid]) if batch_context[:root_bid].present?
119
206
  end
120
207
 
121
208
  # Helper Methods
@@ -126,23 +213,17 @@ module InstDataShipper
126
213
  end
127
214
 
128
215
  def delayed(mthd, *args, **kwargs)
129
- AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
130
- end
131
-
132
- def tracker
133
- @tracker ||= batch_context[:tracker_id].present? ? DumpBatch.find(batch_context[:tracker_id]) : nil
216
+ Jobs::AsyncCaller.perform_later(self.class.to_s, mthd.to_s, *args, **kwargs)
134
217
  end
135
218
 
136
- def export_genre
137
- self.class.to_s.gsub(/HD|ExportJob/, '')
138
- end
219
+ delegate :working_dir, to: :executor
139
220
 
140
- def origin_class
141
- batch_context[:origin_class]&.constantize || self.class
221
+ def batch
222
+ Thread.current[CanvasSync::JobBatches::CURRENT_BATCH_THREAD_KEY]
142
223
  end
143
224
 
144
- def working_dir
145
- executor.working_dir
225
+ def batch_context
226
+ @batch_context || batch&.context || {}
146
227
  end
147
228
 
148
229
  def destinations_for_table(table_def)
@@ -150,7 +231,7 @@ module InstDataShipper
150
231
  end
151
232
 
152
233
  def destinations
153
- @destinations ||= (@raw_destinations || batch_context[:destinations]).map.with_index do |dest, i|
234
+ @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
154
235
  dcls = InstDataShipper.resolve_destination(dest)
155
236
  dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
156
237
  end
@@ -4,5 +4,11 @@ module InstDataShipper
4
4
  class Engine < ::Rails::Engine
5
5
  isolate_namespace InstDataShipper
6
6
 
7
+ initializer :append_migrations do |app|
8
+ config.paths["db/migrate"].expanded.each do |expanded_path|
9
+ app.config.paths["db/migrate"] << expanded_path
10
+ end
11
+ ActiveRecord::Migrator.migrations_paths = Rails.application.paths['db/migrate'].to_a
12
+ end
7
13
  end
8
14
  end
@@ -1,7 +1,14 @@
1
+
2
+ require "sidekiq"
3
+
1
4
  module InstDataShipper
2
5
  module Jobs
3
6
  class AsyncCaller < InstDataShipper::Jobs::Base
4
- sidekiq_options retry: 6 if defined?(sidekiq_options)
7
+ sidekiq_options(retry: 0) if defined?(sidekiq_options)
8
+
9
+ def self.get_sidekiq_options
10
+ { retry: 0 }
11
+ end
5
12
 
6
13
  def self.call_from_pool(pool, clazz, method, *args, **kwargs)
7
14
  pool.add_job(
@@ -12,7 +19,8 @@ module InstDataShipper
12
19
  end
13
20
 
14
21
  def perform(clazz, method, *args, **kwargs)
15
- clazz.constantize.new(executor: self).send(method.to_sym, *args, **kwargs)
22
+ clazz = clazz.constantize if clazz.is_a?(String)
23
+ clazz.new(executor: self).send(method.to_sym, *args, **kwargs)
16
24
  end
17
25
  end
18
26
  end
@@ -3,9 +3,13 @@ module InstDataShipper
3
3
  class BasicDumpJob < InstDataShipper::Jobs::Base
4
4
  sidekiq_options retry: 3 if defined?(sidekiq_options)
5
5
 
6
- def perform(endpoints, dump_class = nil)
7
- dumper.perform_dump(endpoints: endpoints)
6
+ def perform(endpoints)
7
+
8
8
  end
9
+
10
+ protected
11
+
12
+
9
13
  end
10
14
  end
11
15
  end
@@ -1,4 +1,6 @@
1
1
  module InstDataShipper
2
+ # This class ends up fill two roles - Schema and Mapping.
3
+ # It makes for a clean API, but it's a little less canonical since, (eg) the S3 destination doesn't need column type annotations.
2
4
  class SchemaBuilder
3
5
  attr_reader :tables
4
6
 
@@ -12,24 +14,40 @@ module InstDataShipper
12
14
  builder.tables
13
15
  end
14
16
 
15
- def table(model_or_name, description = nil, as: nil, includes: nil, incremental: false, &block)
16
- as ||= model_or_name
17
- as = as.table_name if as.respond_to?(:table_name)
17
+ def extend_table_builder(&block)
18
+ @table_builder_class ||= Class.new(TableSchemaBuilder)
19
+ @table_builder_class.class_eval(&block)
20
+ end
18
21
 
22
+ def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
19
23
  tdef = {
24
+ warehouse_name: nil,
20
25
  description: description,
21
- model: model_or_name.is_a?(String) ? nil : model_or_name,
22
- warehouse_name: as.to_s,
23
- incremental: incremental,
24
26
  columns: [],
25
- includes: includes,
27
+
28
+ model: model,
29
+ query: query,
30
+ **extra,
26
31
  }
27
32
 
28
- TableSchemaBuilder.build(tdef, &block)
33
+ if model_or_name.is_a?(ActiveRecord::Relation)
34
+ raise "model specified twice" if model.present?
35
+ raise "query specified twice" if query.present?
36
+
37
+ tdef[:query] = model_or_name
38
+ tdef[:model] = model_or_name.model
39
+ elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
40
+ tdef[:warehouse_name] = model_or_name.table_name
41
+ tdef[:model] = model_or_name
42
+ else
43
+ tdef[:warehouse_name] = model_or_name
44
+ end
45
+
46
+ @table_builder_class.build(tdef, &block)
29
47
 
30
48
  @tables << tdef
31
49
 
32
- self
50
+ tdef
33
51
  end
34
52
 
35
53
  class TableSchemaBuilder
@@ -46,48 +64,82 @@ module InstDataShipper
46
64
  builder.columns
47
65
  end
48
66
 
49
- # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
50
- def column(name, *args, **kwargs, &block)
51
- name = { name => name } unless name.is_a?(Hash)
52
- raise ArgumentError, 'Exactly one name must be provided' unless name.count == 1
67
+ # def annotate(key, value)
68
+ # options[key] = value
69
+ # end
70
+
71
+ def incremental(scope="updated_at", **kwargs)
72
+ if (extras = kwargs.keys - %i[on if]).present?
73
+ raise ArgumentError, "Unsuppored options: #{extras.inspect}"
74
+ end
75
+
76
+ options[:incremental] = {
77
+ on: Array(kwargs[:on]),
78
+ scope: scope,
79
+ if: kwargs[:if],
80
+ }
81
+ end
82
+
83
+ def column(name, *args, refs: [], from: nil, **extra, &block)
84
+ from ||= name.to_s
53
85
 
54
86
  cdef = {
55
- local_name: name.keys[0].to_s,
56
- warehouse_name: name.values[0].to_s,
57
- transformer: block,
87
+ warehouse_name: name.to_s,
88
+ from: from,
89
+ **extra,
58
90
  }
59
91
 
60
- [:description, :type, :refs => :references].each do |k|
61
- if k.is_a? Hash
62
- k.each do |hk, hv|
63
- cdef[hv] = kwargs.delete(hk) if kwargs.key?(hk)
64
- end
65
- elsif kwargs.key?(k)
66
- cdef[k] = kwargs.delete(k)
67
- end
92
+ if args[0].is_a?(Symbol)
93
+ cdef[:type] = args.shift()
68
94
  end
69
95
 
70
- cdef[:references] = Array(cdef[:references])
71
-
72
- args[0..1].each do |a|
73
- k = (a.is_a?(String) && :description) || (a.is_a?(Symbol) && :type) || nil
74
- raise ArgumentError, 'Unsupported Argument' if k.nil?
75
- raise ArgumentError, "Duplicate Argument for #{k}" if cdef.key?(k)
96
+ if args[0].is_a?(String)
97
+ cdef[:description] = args.shift()
98
+ end
76
99
 
77
- cdef[k] = a
100
+ if args.present?
101
+ raise ArgumentError, "Received unexpected arguments: #{args.inspect}"
78
102
  end
79
103
 
104
+ cdef[:references] = Array(refs)
105
+
80
106
  if options[:model].is_a?(Class) && cdef[:local_name].to_s.ends_with?('_id')
81
107
  rel_name = cdef[:local_name].to_s[0...-3]
82
108
  refl = options[:model].reflections[rel_name]
83
109
  cdef[:references] << "#{refl.klass}##{refl.options[:primary_key] || 'id'}" if refl.present? && !refl.polymorphic?
84
110
  end
85
111
 
112
+ compiled_from = compile_transformer(from)
113
+
114
+ cdef[:block] = ->(row) {
115
+ value = instance_exec(row, &compiled_from)
116
+ value = instance_exec(value, row, &block) if block.present?
117
+ value
118
+ }
119
+
86
120
  @columns << cdef
87
121
 
88
- self
122
+ cdef
89
123
  end
90
- # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
124
+
125
+ protected
126
+
127
+ def compile_transformer(from)
128
+ if from.present?
129
+ if from.is_a?(Symbol)
130
+ ->(row) { row.send(from) }
131
+ elsif from.is_a?(Proc)
132
+ from
133
+ elsif from.is_a?(String)
134
+ ->(row) { row[from] }
135
+ else
136
+ raise ArgumentError, "Invalid transformer: #{from.inspect}"
137
+ end
138
+ else
139
+ ->(row) { row }
140
+ end
141
+ end
142
+
91
143
  end
92
144
  end
93
145
  end
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.1.0.beta1".freeze
2
+ VERSION = "0.1.0.beta2".freeze
3
3
  end
@@ -23,13 +23,20 @@ module InstDataShipper
23
23
  destination = @destination_aliases[type]
24
24
  end
25
25
 
26
- safe_constantize(destination)
26
+ destination.constantize
27
27
  end
28
28
 
29
29
  def start_basic_dump(*args, **kwargs, &block)
30
30
  BasicDumper.perform_dump(*args, **kwargs, &block)
31
31
  end
32
32
 
33
+ def handle_suppressed_error(ex)
34
+ logger.error "Suppressed Error: #{ex.message}"
35
+ logger.error ex.backtrace.join("\n")
36
+ Raven.capture_exception(ex) if defined?(Raven)
37
+ Sentry.capture_exception(ex) if defined?(Sentry)
38
+ end
39
+
33
40
  def logger
34
41
  return @logger if defined? @logger
35
42
  @logger = Logger.new(STDOUT)
@@ -66,6 +73,8 @@ Dir[File.dirname(__FILE__) + "/inst_data_shipper/destinations/*.rb"].each do |fi
66
73
  basename = File.basename(file, ".rb")
67
74
  next if basename == "base"
68
75
 
69
- InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.classify}")
76
+ InstDataShipper.alias_destination(basename.dasherize, "InstDataShipper::Destinations::#{basename.camelize}")
70
77
  end
71
78
 
79
+ require "inst_data_shipper/dumper"
80
+ require "inst_data_shipper/basic_dumper"
data/spec/spec_helper.rb CHANGED
@@ -7,7 +7,7 @@ require File.expand_path("../dummy/config/environment.rb", __FILE__)
7
7
  require "bundler/setup"
8
8
  require 'rspec/rails'
9
9
  require 'spec_helper'
10
- require 'factory_girl_rails'
10
+ require 'factory_bot_rails'
11
11
  require 'timecop'
12
12
  require 'webmock/rspec'
13
13
  require 'support/fake_canvas'
@@ -29,7 +29,7 @@ ActiveRecord::Migration.maintain_test_schema!
29
29
  RSpec.configure do |config|
30
30
  config.extend WithModel
31
31
 
32
- config.include FactoryGirl::Syntax::Methods
32
+ config.include FactoryBot::Syntax::Methods
33
33
  config.use_transactional_fixtures = true
34
34
  config.infer_spec_type_from_file_location!
35
35
  config.filter_rails_from_backtrace!
metadata CHANGED
@@ -1,27 +1,27 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0.beta1
4
+ version: 0.1.0.beta2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-06 00:00:00.000000000 Z
11
+ date: 2024-03-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "<"
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: '6.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "<"
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '6.0'
27
27
  - !ruby/object:Gem::Dependency
@@ -360,6 +360,20 @@ dependencies:
360
360
  - - ">="
361
361
  - !ruby/object:Gem::Version
362
362
  version: '0'
363
+ - !ruby/object:Gem::Dependency
364
+ name: faraday_middleware
365
+ requirement: !ruby/object:Gem::Requirement
366
+ requirements:
367
+ - - ">="
368
+ - !ruby/object:Gem::Version
369
+ version: '0'
370
+ type: :runtime
371
+ prerelease: false
372
+ version_requirements: !ruby/object:Gem::Requirement
373
+ requirements:
374
+ - - ">="
375
+ - !ruby/object:Gem::Version
376
+ version: '0'
363
377
  description:
364
378
  email:
365
379
  - pseng@instructure.com
@@ -369,8 +383,8 @@ extra_rdoc_files: []
369
383
  files:
370
384
  - README.md
371
385
  - Rakefile
372
- - app/models/hosted_data_dumper/dump_batch.rb
373
- - db/migrate/20240301090836_create_canvas_sync_sync_batches.rb
386
+ - app/models/inst_data_shipper/dump_batch.rb
387
+ - db/migrate/20240301090836_create_inst_data_shipper_dump_batches.rb
374
388
  - lib/inst_data_shipper.rb
375
389
  - lib/inst_data_shipper/basic_dumper.rb
376
390
  - lib/inst_data_shipper/concerns/hooks.rb