inst_data_shipper 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/lib/inst_data_shipper/data_sources/base.rb +25 -1
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +1 -1
- data/lib/inst_data_shipper/data_sources/local_tables.rb +4 -8
- data/lib/inst_data_shipper/destinations/concerns/chunking.rb +2 -2
- data/lib/inst_data_shipper/destinations/speccable.rb +17 -0
- data/lib/inst_data_shipper/dumper.rb +25 -1
- data/lib/inst_data_shipper/schema_builder.rb +20 -7
- data/lib/inst_data_shipper/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
|
4
|
+
data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
|
7
|
+
data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
|
data/README.md
CHANGED
@@ -55,6 +55,13 @@ class HostedDataPushJob < ApplicationJob
|
|
55
55
|
# `if:` may be a Proc or a Symbol (of a method on the Dumper)
|
56
56
|
incremental "updated_at", on: [:id], if: ->() {}
|
57
57
|
|
58
|
+
# Schema's may declaratively define the data source.
|
59
|
+
# This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
|
60
|
+
# In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
|
61
|
+
source :local_table
|
62
|
+
# A Proc can also be passed. The below is equivalent to the above
|
63
|
+
source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
|
64
|
+
|
58
65
|
column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
|
59
66
|
|
60
67
|
# The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
|
@@ -93,6 +100,10 @@ class HostedDataPushJob < ApplicationJob
|
|
93
100
|
# If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
|
94
101
|
import_local_table(SomeModel, schema_name: "my_table")
|
95
102
|
import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
|
103
|
+
|
104
|
+
# Iterate through the Tables defined in the Schema and apply any defined `source` statements.
|
105
|
+
# This is the default behavior if `define()` is called w/o a block.
|
106
|
+
auto_enqueue_from_schema
|
96
107
|
end
|
97
108
|
|
98
109
|
def perform
|
@@ -118,6 +129,8 @@ class HostedDataPushJob < ApplicationJob
|
|
118
129
|
def enqueue_tasks
|
119
130
|
import_local_table(ALocalModel)
|
120
131
|
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
132
|
+
|
133
|
+
# auto_enqueue_from_schema
|
121
134
|
end
|
122
135
|
|
123
136
|
def table_schemas
|
@@ -1,7 +1,31 @@
|
|
1
1
|
module InstDataShipper
|
2
2
|
module DataSources
|
3
3
|
module Base
|
4
|
-
|
4
|
+
# This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
|
8
|
+
base.extend ModuleHelperMethods
|
9
|
+
base.send(:include, Concern)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Concern
|
13
|
+
extend ActiveSupport::Concern
|
14
|
+
end
|
15
|
+
|
16
|
+
module ModuleHelperMethods
|
17
|
+
def delayed(mthd_sym)
|
18
|
+
mthd = instance_method(mthd_sym)
|
19
|
+
pmthd_sym = :"_delayed_#{mthd_sym}"
|
20
|
+
|
21
|
+
alias_method pmthd_sym, mthd_sym
|
22
|
+
private pmthd_sym
|
23
|
+
|
24
|
+
define_method(mthd_sym) do |*args, **kwargs|
|
25
|
+
delayed(pmthd_sym, *args, **kwargs)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
5
29
|
end
|
6
30
|
end
|
7
31
|
end
|
@@ -2,17 +2,11 @@ module InstDataShipper
|
|
2
2
|
module DataSources
|
3
3
|
# This module contains the logic for processing local AR tables
|
4
4
|
module LocalTables
|
5
|
-
|
5
|
+
include Base
|
6
6
|
|
7
7
|
public
|
8
8
|
|
9
|
-
def import_local_table(
|
10
|
-
delayed(:_import_local_table, *args, **kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def _import_local_table(model, schema_name: nil)
|
9
|
+
delayed def import_local_table(model, schema_name: nil)
|
16
10
|
model = model.safe_constantize if model.is_a?(String)
|
17
11
|
|
18
12
|
table_def = lookup_table_schema!(schema_name, { model: model })
|
@@ -40,6 +34,8 @@ module InstDataShipper
|
|
40
34
|
upload_data(table_def, &inner_block)
|
41
35
|
end
|
42
36
|
|
37
|
+
private
|
38
|
+
|
43
39
|
def _resolve_model_query(relation, query, string: nil, default: nil)
|
44
40
|
return relation if query == false
|
45
41
|
query = default if query.nil?
|
@@ -7,7 +7,7 @@ module InstDataShipper
|
|
7
7
|
DEFAULT_CHUNK_SIZE = 100_000
|
8
8
|
|
9
9
|
def chunk_data(generator, chunk_size: nil, **kwargs)
|
10
|
-
chunk_size ||= config
|
10
|
+
chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
11
11
|
slice = 1
|
12
12
|
|
13
13
|
btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
|
@@ -24,7 +24,7 @@ module InstDataShipper
|
|
24
24
|
|
25
25
|
def group_key
|
26
26
|
super.tap do |k|
|
27
|
-
k[:chunk_size] = config
|
27
|
+
k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class Speccable < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def chunk_data(generator, table:, extra: nil)
|
7
|
+
super(generator) do |batch, idx|
|
8
|
+
yield batch
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def upload_data_chunk(table_def, chunk); end
|
13
|
+
|
14
|
+
def parse_configuration(uri); end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -18,6 +18,12 @@ module InstDataShipper
|
|
18
18
|
Class.new(self) do
|
19
19
|
include(*include)
|
20
20
|
|
21
|
+
if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
|
22
|
+
blk = -> { auto_enqueue_from_schema }
|
23
|
+
elsif blk.nil?
|
24
|
+
raise ArgumentError, "Must provide a block or a schema with source definitions"
|
25
|
+
end
|
26
|
+
|
21
27
|
define_method(:enqueue_tasks, &blk)
|
22
28
|
define_method(:schema) { schema }
|
23
29
|
end
|
@@ -33,6 +39,16 @@ module InstDataShipper
|
|
33
39
|
clazz.new(executor: executor)
|
34
40
|
end
|
35
41
|
|
42
|
+
if defined?(Rails) && Rails.env.test?
|
43
|
+
def for_specs!
|
44
|
+
@raw_destinations = ["speccable://nil"]
|
45
|
+
@executor = InstDataShipper::Jobs::AsyncCaller.new
|
46
|
+
@tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
47
|
+
define_singleton_method(:spec_destination) { destinations.first }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
36
52
|
public
|
37
53
|
|
38
54
|
def begin_dump
|
@@ -176,6 +192,14 @@ module InstDataShipper
|
|
176
192
|
raise NotImplementedError
|
177
193
|
end
|
178
194
|
|
195
|
+
def auto_enqueue_from_schema
|
196
|
+
schema[:tables].each do |table_def|
|
197
|
+
src = table_def[:sourcer]
|
198
|
+
next unless src.present?
|
199
|
+
instance_exec(table_def, &src)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
179
203
|
def upload_data(table_def, extra: nil, &datagen)
|
180
204
|
# Allow muxing, allowing a hook to prevent some files going to certain destinations
|
181
205
|
dests = destinations_for_table(table_def)
|
@@ -258,7 +282,7 @@ module InstDataShipper
|
|
258
282
|
end
|
259
283
|
|
260
284
|
def destinations
|
261
|
-
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
285
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
|
262
286
|
dcls = InstDataShipper.resolve_destination(dest)
|
263
287
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
264
288
|
end
|
@@ -20,9 +20,10 @@ module InstDataShipper
|
|
20
20
|
@schema[:version] = version
|
21
21
|
end
|
22
22
|
|
23
|
-
def extend_table_builder(&block)
|
23
|
+
def extend_table_builder(modul = nil, &block)
|
24
24
|
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
25
|
-
@table_builder_class.class_eval(&block)
|
25
|
+
@table_builder_class.class_eval(&block) if block.present?
|
26
|
+
@table_builder_class.extend(modul) if modul.present?
|
26
27
|
end
|
27
28
|
|
28
29
|
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
@@ -42,6 +43,7 @@ module InstDataShipper
|
|
42
43
|
|
43
44
|
tdef[:query] = model_or_name
|
44
45
|
tdef[:model] = model_or_name.model
|
46
|
+
tdef[:warehouse_name] = model_or_name.model.table_name
|
45
47
|
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
46
48
|
tdef[:warehouse_name] = model_or_name.table_name
|
47
49
|
tdef[:model] = model_or_name
|
@@ -49,7 +51,7 @@ module InstDataShipper
|
|
49
51
|
tdef[:warehouse_name] = model_or_name
|
50
52
|
end
|
51
53
|
|
52
|
-
@table_builder_class.build(tdef, &block)
|
54
|
+
(@table_builder_class || TableSchemaBuilder).build(tdef, &block)
|
53
55
|
|
54
56
|
@schema[:tables] << tdef
|
55
57
|
|
@@ -67,12 +69,12 @@ module InstDataShipper
|
|
67
69
|
def self.build(tdef, &block)
|
68
70
|
builder = new(tdef)
|
69
71
|
builder.instance_exec(&block)
|
70
|
-
builder.
|
72
|
+
builder.options
|
71
73
|
end
|
72
74
|
|
73
|
-
|
74
|
-
|
75
|
-
|
75
|
+
def annotate(key, value)
|
76
|
+
options[key] = value
|
77
|
+
end
|
76
78
|
|
77
79
|
def version(version)
|
78
80
|
options[:version] = version
|
@@ -90,6 +92,17 @@ module InstDataShipper
|
|
90
92
|
}
|
91
93
|
end
|
92
94
|
|
95
|
+
def source(source, override_model=nil, **kwargs)
|
96
|
+
raise "Source already set" if options[:sourcer].present?
|
97
|
+
|
98
|
+
if source.is_a?(Symbol)
|
99
|
+
mthd = :"import_#{source}"
|
100
|
+
options = self.options
|
101
|
+
source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
|
102
|
+
end
|
103
|
+
options[:sourcer] = source
|
104
|
+
end
|
105
|
+
|
93
106
|
def column(name, *args, refs: [], from: nil, **extra, &block)
|
94
107
|
from ||= name.to_s
|
95
108
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -395,6 +395,7 @@ files:
|
|
395
395
|
- lib/inst_data_shipper/destinations/concerns/chunking.rb
|
396
396
|
- lib/inst_data_shipper/destinations/hosted_data.rb
|
397
397
|
- lib/inst_data_shipper/destinations/s3.rb
|
398
|
+
- lib/inst_data_shipper/destinations/speccable.rb
|
398
399
|
- lib/inst_data_shipper/dumper.rb
|
399
400
|
- lib/inst_data_shipper/engine.rb
|
400
401
|
- lib/inst_data_shipper/jobs/async_caller.rb
|