inst_data_shipper 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f7909aa44e9dabd1d43d58a5a3c2c081891104d64336294dce287c06804804df
4
- data.tar.gz: 5da874689ac1de3e016a7feefce5866b211e6f7595021b565564f796685ed104
3
+ metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
4
+ data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
5
5
  SHA512:
6
- metadata.gz: cd81e6c26e2416ce1a32de588e04f560496cfb7cfdac3f4c837828a1c65798bec405d98197032b0d8935a1ba2b24a291aa25f1b73a469ac7a9c6ef8d2286103f
7
- data.tar.gz: 66c5ccfd82128e8c5dc39c7c937ee7f4f9412743b7202e221e53c575d4b0e572f0b014b4f41ae5924b1d2d119a05cd5de2acbae4eb81022df844a1fea181faec
6
+ metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
7
+ data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
data/README.md CHANGED
@@ -55,6 +55,13 @@ class HostedDataPushJob < ApplicationJob
55
55
  # `if:` may be a Proc or a Symbol (of a method on the Dumper)
56
56
  incremental "updated_at", on: [:id], if: ->() {}
57
57
 
58
+ # Schema's may declaratively define the data source.
59
+ # This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
60
+ # In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
61
+ source :local_table
62
+ # A Proc can also be passed. The below is equivalent to the above
63
+ source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
64
+
58
65
  column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
59
66
 
60
67
  # The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
@@ -93,6 +100,10 @@ class HostedDataPushJob < ApplicationJob
93
100
  # If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
94
101
  import_local_table(SomeModel, schema_name: "my_table")
95
102
  import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
103
+
104
+ # Iterate through the Tables defined in the Schema and apply any defined `source` statements.
105
+ # This is the default behavior if `define()` is called w/o a block.
106
+ auto_enqueue_from_schema
96
107
  end
97
108
 
98
109
  def perform
@@ -118,6 +129,8 @@ class HostedDataPushJob < ApplicationJob
118
129
  def enqueue_tasks
119
130
  import_local_table(ALocalModel)
120
131
  import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
132
+
133
+ # auto_enqueue_from_schema
121
134
  end
122
135
 
123
136
  def table_schemas
@@ -1,7 +1,31 @@
1
1
  module InstDataShipper
2
2
  module DataSources
3
3
  module Base
4
- extend ActiveSupport::Concern
4
+ # This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
5
+
6
+ def self.included(base)
7
+ base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
8
+ base.extend ModuleHelperMethods
9
+ base.send(:include, Concern)
10
+ end
11
+
12
+ module Concern
13
+ extend ActiveSupport::Concern
14
+ end
15
+
16
+ module ModuleHelperMethods
17
+ def delayed(mthd_sym)
18
+ mthd = instance_method(mthd_sym)
19
+ pmthd_sym = :"_delayed_#{mthd_sym}"
20
+
21
+ alias_method pmthd_sym, mthd_sym
22
+ private pmthd_sym
23
+
24
+ define_method(mthd_sym) do |*args, **kwargs|
25
+ delayed(pmthd_sym, *args, **kwargs)
26
+ end
27
+ end
28
+ end
5
29
  end
6
30
  end
7
31
  end
@@ -2,7 +2,7 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing Canvas reports
4
4
  module CanvasReports
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  included do
8
8
  hook :initialize_dump_batch do |context|
@@ -2,17 +2,11 @@ module InstDataShipper
2
2
  module DataSources
3
3
  # This module contains the logic for processing local AR tables
4
4
  module LocalTables
5
- extend ActiveSupport::Concern
5
+ include Base
6
6
 
7
7
  public
8
8
 
9
- def import_local_table(*args, **kwargs)
10
- delayed(:_import_local_table, *args, **kwargs)
11
- end
12
-
13
- private
14
-
15
- def _import_local_table(model, schema_name: nil)
9
+ delayed def import_local_table(model, schema_name: nil)
16
10
  model = model.safe_constantize if model.is_a?(String)
17
11
 
18
12
  table_def = lookup_table_schema!(schema_name, { model: model })
@@ -40,6 +34,8 @@ module InstDataShipper
40
34
  upload_data(table_def, &inner_block)
41
35
  end
42
36
 
37
+ private
38
+
43
39
  def _resolve_model_query(relation, query, string: nil, default: nil)
44
40
  return relation if query == false
45
41
  query = default if query.nil?
@@ -7,7 +7,7 @@ module InstDataShipper
7
7
  DEFAULT_CHUNK_SIZE = 100_000
8
8
 
9
9
  def chunk_data(generator, chunk_size: nil, **kwargs)
10
- chunk_size ||= config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
10
+ chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
11
11
  slice = 1
12
12
 
13
13
  btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
@@ -24,7 +24,7 @@ module InstDataShipper
24
24
 
25
25
  def group_key
26
26
  super.tap do |k|
27
- k[:chunk_size] = config.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
27
+ k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
28
28
  end
29
29
  end
30
30
 
@@ -0,0 +1,17 @@
1
+ module InstDataShipper
2
+ module Destinations
3
+ class Speccable < Base
4
+ include Concerns::Chunking
5
+
6
+ def chunk_data(generator, table:, extra: nil)
7
+ super(generator) do |batch, idx|
8
+ yield batch
9
+ end
10
+ end
11
+
12
+ def upload_data_chunk(table_def, chunk); end
13
+
14
+ def parse_configuration(uri); end
15
+ end
16
+ end
17
+ end
@@ -18,6 +18,12 @@ module InstDataShipper
18
18
  Class.new(self) do
19
19
  include(*include)
20
20
 
21
+ if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
22
+ blk = -> { auto_enqueue_from_schema }
23
+ elsif blk.nil?
24
+ raise ArgumentError, "Must provide a block or a schema with source definitions"
25
+ end
26
+
21
27
  define_method(:enqueue_tasks, &blk)
22
28
  define_method(:schema) { schema }
23
29
  end
@@ -33,6 +39,16 @@ module InstDataShipper
33
39
  clazz.new(executor: executor)
34
40
  end
35
41
 
42
+ if defined?(Rails) && Rails.env.test?
43
+ def for_specs!
44
+ @raw_destinations = ["speccable://nil"]
45
+ @executor = InstDataShipper::Jobs::AsyncCaller.new
46
+ @tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
47
+ define_singleton_method(:spec_destination) { destinations.first }
48
+ self
49
+ end
50
+ end
51
+
36
52
  public
37
53
 
38
54
  def begin_dump
@@ -176,6 +192,14 @@ module InstDataShipper
176
192
  raise NotImplementedError
177
193
  end
178
194
 
195
+ def auto_enqueue_from_schema
196
+ schema[:tables].each do |table_def|
197
+ src = table_def[:sourcer]
198
+ next unless src.present?
199
+ instance_exec(table_def, &src)
200
+ end
201
+ end
202
+
179
203
  def upload_data(table_def, extra: nil, &datagen)
180
204
  # Allow muxing, allowing a hook to prevent some files going to certain destinations
181
205
  dests = destinations_for_table(table_def)
@@ -258,7 +282,7 @@ module InstDataShipper
258
282
  end
259
283
 
260
284
  def destinations
261
- @destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
285
+ @destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
262
286
  dcls = InstDataShipper.resolve_destination(dest)
263
287
  dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
264
288
  end
@@ -20,9 +20,10 @@ module InstDataShipper
20
20
  @schema[:version] = version
21
21
  end
22
22
 
23
- def extend_table_builder(&block)
23
+ def extend_table_builder(modul = nil, &block)
24
24
  @table_builder_class ||= Class.new(TableSchemaBuilder)
25
- @table_builder_class.class_eval(&block)
25
+ @table_builder_class.class_eval(&block) if block.present?
26
+ @table_builder_class.extend(modul) if modul.present?
26
27
  end
27
28
 
28
29
  def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
@@ -42,6 +43,7 @@ module InstDataShipper
42
43
 
43
44
  tdef[:query] = model_or_name
44
45
  tdef[:model] = model_or_name.model
46
+ tdef[:warehouse_name] = model_or_name.model.table_name
45
47
  elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
46
48
  tdef[:warehouse_name] = model_or_name.table_name
47
49
  tdef[:model] = model_or_name
@@ -49,7 +51,7 @@ module InstDataShipper
49
51
  tdef[:warehouse_name] = model_or_name
50
52
  end
51
53
 
52
- @table_builder_class.build(tdef, &block)
54
+ (@table_builder_class || TableSchemaBuilder).build(tdef, &block)
53
55
 
54
56
  @schema[:tables] << tdef
55
57
 
@@ -67,12 +69,12 @@ module InstDataShipper
67
69
  def self.build(tdef, &block)
68
70
  builder = new(tdef)
69
71
  builder.instance_exec(&block)
70
- builder.columns
72
+ builder.options
71
73
  end
72
74
 
73
- # def annotate(key, value)
74
- # options[key] = value
75
- # end
75
+ def annotate(key, value)
76
+ options[key] = value
77
+ end
76
78
 
77
79
  def version(version)
78
80
  options[:version] = version
@@ -90,6 +92,17 @@ module InstDataShipper
90
92
  }
91
93
  end
92
94
 
95
+ def source(source, override_model=nil, **kwargs)
96
+ raise "Source already set" if options[:sourcer].present?
97
+
98
+ if source.is_a?(Symbol)
99
+ mthd = :"import_#{source}"
100
+ options = self.options
101
+ source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
102
+ end
103
+ options[:sourcer] = source
104
+ end
105
+
93
106
  def column(name, *args, refs: [], from: nil, **extra, &block)
94
107
  from ||= name.to_s
95
108
 
@@ -1,3 +1,3 @@
1
1
  module InstDataShipper
2
- VERSION = "0.2.0".freeze
2
+ VERSION = "0.2.1".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: inst_data_shipper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Instructure CustomDev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-11 00:00:00.000000000 Z
11
+ date: 2024-03-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -395,6 +395,7 @@ files:
395
395
  - lib/inst_data_shipper/destinations/concerns/chunking.rb
396
396
  - lib/inst_data_shipper/destinations/hosted_data.rb
397
397
  - lib/inst_data_shipper/destinations/s3.rb
398
+ - lib/inst_data_shipper/destinations/speccable.rb
398
399
  - lib/inst_data_shipper/dumper.rb
399
400
  - lib/inst_data_shipper/engine.rb
400
401
  - lib/inst_data_shipper/jobs/async_caller.rb