inst_data_shipper 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -0
- data/lib/inst_data_shipper/data_sources/base.rb +25 -1
- data/lib/inst_data_shipper/data_sources/canvas_reports.rb +1 -1
- data/lib/inst_data_shipper/data_sources/local_tables.rb +4 -8
- data/lib/inst_data_shipper/destinations/concerns/chunking.rb +2 -2
- data/lib/inst_data_shipper/destinations/speccable.rb +17 -0
- data/lib/inst_data_shipper/dumper.rb +25 -1
- data/lib/inst_data_shipper/schema_builder.rb +20 -7
- data/lib/inst_data_shipper/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c84d0f1543bfcb42bce30e69ff12cc683b6f6bf51e9275f7529dd0b070bbb79
|
4
|
+
data.tar.gz: 22b5128af3f2d03b14565c656abca685c6e7196083576d1fd20a42a834690cba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14e5eb97d9c5dbe5049eb94cd3690a2ab59df7bf6e5e0c62b21e8893f4cb75ca294cce6d28a4b0f7dccd1c53d34d5c7a34be7b2f4699f1b320f74df14d50f672
|
7
|
+
data.tar.gz: e7d22728140a7fc331abb5f608da98d44013d49027bf932ef183a4f75babe797632aca83955cb77a205d6004d29a7377dde87aaba0c245a3564541b7e1f55cbe
|
data/README.md
CHANGED
@@ -55,6 +55,13 @@ class HostedDataPushJob < ApplicationJob
|
|
55
55
|
# `if:` may be a Proc or a Symbol (of a method on the Dumper)
|
56
56
|
incremental "updated_at", on: [:id], if: ->() {}
|
57
57
|
|
58
|
+
# Schema's may declaratively define the data source.
|
59
|
+
# This can be used for basic schemas where there's a 1:1 mapping between source table and destination table, and there is no conditional logic that needs to be performed.
|
60
|
+
# In order to apply these statements, your Dumper must call `auto_enqueue_from_schema`.
|
61
|
+
source :local_table
|
62
|
+
# A Proc can also be passed. The below is equivalent to the above
|
63
|
+
source ->(table_def) { import_local_table(table_def[:model] || table_def[:warehouse_name]) }
|
64
|
+
|
58
65
|
column :name_in_destinations, :maybe_optional_sql_type, "Optional description of column"
|
59
66
|
|
60
67
|
# The type may usually be omitted if the `table()` is passed a Model class, but strings are an exception to this
|
@@ -93,6 +100,10 @@ class HostedDataPushJob < ApplicationJob
|
|
93
100
|
# If the report_name/Model don't directly match the Schema, a schema_name: parameter may be passed:
|
94
101
|
import_local_table(SomeModel, schema_name: "my_table")
|
95
102
|
import_canvas_report_by_terms("some_report", terms: Term.all.pluck(:canvas_id), schema_name: "my_table")
|
103
|
+
|
104
|
+
# Iterate through the Tables defined in the Schema and apply any defined `source` statements.
|
105
|
+
# This is the default behavior if `define()` is called w/o a block.
|
106
|
+
auto_enqueue_from_schema
|
96
107
|
end
|
97
108
|
|
98
109
|
def perform
|
@@ -118,6 +129,8 @@ class HostedDataPushJob < ApplicationJob
|
|
118
129
|
def enqueue_tasks
|
119
130
|
import_local_table(ALocalModel)
|
120
131
|
import_canvas_report_by_terms("proserv_student_submissions_csv", terms: Term.all.pluck(:canvas_id))
|
132
|
+
|
133
|
+
# auto_enqueue_from_schema
|
121
134
|
end
|
122
135
|
|
123
136
|
def table_schemas
|
@@ -1,7 +1,31 @@
|
|
1
1
|
module InstDataShipper
|
2
2
|
module DataSources
|
3
3
|
module Base
|
4
|
-
|
4
|
+
# This could be a Concern, but we don't want Concern inheritance logic kicking in if it is included into another Concern.
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.extend ActiveSupport::Concern unless self.is_a?(Class) || base < ActiveSupport::Concern
|
8
|
+
base.extend ModuleHelperMethods
|
9
|
+
base.send(:include, Concern)
|
10
|
+
end
|
11
|
+
|
12
|
+
module Concern
|
13
|
+
extend ActiveSupport::Concern
|
14
|
+
end
|
15
|
+
|
16
|
+
module ModuleHelperMethods
|
17
|
+
def delayed(mthd_sym)
|
18
|
+
mthd = instance_method(mthd_sym)
|
19
|
+
pmthd_sym = :"_delayed_#{mthd_sym}"
|
20
|
+
|
21
|
+
alias_method pmthd_sym, mthd_sym
|
22
|
+
private pmthd_sym
|
23
|
+
|
24
|
+
define_method(mthd_sym) do |*args, **kwargs|
|
25
|
+
delayed(pmthd_sym, *args, **kwargs)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
5
29
|
end
|
6
30
|
end
|
7
31
|
end
|
@@ -2,17 +2,11 @@ module InstDataShipper
|
|
2
2
|
module DataSources
|
3
3
|
# This module contains the logic for processing local AR tables
|
4
4
|
module LocalTables
|
5
|
-
|
5
|
+
include Base
|
6
6
|
|
7
7
|
public
|
8
8
|
|
9
|
-
def import_local_table(
|
10
|
-
delayed(:_import_local_table, *args, **kwargs)
|
11
|
-
end
|
12
|
-
|
13
|
-
private
|
14
|
-
|
15
|
-
def _import_local_table(model, schema_name: nil)
|
9
|
+
delayed def import_local_table(model, schema_name: nil)
|
16
10
|
model = model.safe_constantize if model.is_a?(String)
|
17
11
|
|
18
12
|
table_def = lookup_table_schema!(schema_name, { model: model })
|
@@ -40,6 +34,8 @@ module InstDataShipper
|
|
40
34
|
upload_data(table_def, &inner_block)
|
41
35
|
end
|
42
36
|
|
37
|
+
private
|
38
|
+
|
43
39
|
def _resolve_model_query(relation, query, string: nil, default: nil)
|
44
40
|
return relation if query == false
|
45
41
|
query = default if query.nil?
|
@@ -7,7 +7,7 @@ module InstDataShipper
|
|
7
7
|
DEFAULT_CHUNK_SIZE = 100_000
|
8
8
|
|
9
9
|
def chunk_data(generator, chunk_size: nil, **kwargs)
|
10
|
-
chunk_size ||= config
|
10
|
+
chunk_size ||= config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
11
11
|
slice = 1
|
12
12
|
|
13
13
|
btchr = CanvasSync::BatchProcessor.new(of: chunk_size) do |batch|
|
@@ -24,7 +24,7 @@ module InstDataShipper
|
|
24
24
|
|
25
25
|
def group_key
|
26
26
|
super.tap do |k|
|
27
|
-
k[:chunk_size] = config
|
27
|
+
k[:chunk_size] = config&.dig(:params, :chunk_size) || DEFAULT_CHUNK_SIZE
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module InstDataShipper
|
2
|
+
module Destinations
|
3
|
+
class Speccable < Base
|
4
|
+
include Concerns::Chunking
|
5
|
+
|
6
|
+
def chunk_data(generator, table:, extra: nil)
|
7
|
+
super(generator) do |batch, idx|
|
8
|
+
yield batch
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def upload_data_chunk(table_def, chunk); end
|
13
|
+
|
14
|
+
def parse_configuration(uri); end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -18,6 +18,12 @@ module InstDataShipper
|
|
18
18
|
Class.new(self) do
|
19
19
|
include(*include)
|
20
20
|
|
21
|
+
if blk.nil? && schema[:tables].any? { |t| t[:sourcer].present? }
|
22
|
+
blk = -> { auto_enqueue_from_schema }
|
23
|
+
elsif blk.nil?
|
24
|
+
raise ArgumentError, "Must provide a block or a schema with source definitions"
|
25
|
+
end
|
26
|
+
|
21
27
|
define_method(:enqueue_tasks, &blk)
|
22
28
|
define_method(:schema) { schema }
|
23
29
|
end
|
@@ -33,6 +39,16 @@ module InstDataShipper
|
|
33
39
|
clazz.new(executor: executor)
|
34
40
|
end
|
35
41
|
|
42
|
+
if defined?(Rails) && Rails.env.test?
|
43
|
+
def for_specs!
|
44
|
+
@raw_destinations = ["speccable://nil"]
|
45
|
+
@executor = InstDataShipper::Jobs::AsyncCaller.new
|
46
|
+
@tracker = DumpBatch.new(job_class: self.class.to_s, genre: export_genre, status: 'in_progress')
|
47
|
+
define_singleton_method(:spec_destination) { destinations.first }
|
48
|
+
self
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
36
52
|
public
|
37
53
|
|
38
54
|
def begin_dump
|
@@ -176,6 +192,14 @@ module InstDataShipper
|
|
176
192
|
raise NotImplementedError
|
177
193
|
end
|
178
194
|
|
195
|
+
def auto_enqueue_from_schema
|
196
|
+
schema[:tables].each do |table_def|
|
197
|
+
src = table_def[:sourcer]
|
198
|
+
next unless src.present?
|
199
|
+
instance_exec(table_def, &src)
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
179
203
|
def upload_data(table_def, extra: nil, &datagen)
|
180
204
|
# Allow muxing, allowing a hook to prevent some files going to certain destinations
|
181
205
|
dests = destinations_for_table(table_def)
|
@@ -258,7 +282,7 @@ module InstDataShipper
|
|
258
282
|
end
|
259
283
|
|
260
284
|
def destinations
|
261
|
-
@destinations ||= (@raw_destinations.presence || batch_context[:destinations]).map.with_index do |dest, i|
|
285
|
+
@destinations ||= (@raw_destinations.presence || batch_context[:destinations] || []).map.with_index do |dest, i|
|
262
286
|
dcls = InstDataShipper.resolve_destination(dest)
|
263
287
|
dcls.new("#{InstDataShipper.redis_prefix}:dump#{tracker.id}:dest#{i}", dest, self)
|
264
288
|
end
|
@@ -20,9 +20,10 @@ module InstDataShipper
|
|
20
20
|
@schema[:version] = version
|
21
21
|
end
|
22
22
|
|
23
|
-
def extend_table_builder(&block)
|
23
|
+
def extend_table_builder(modul = nil, &block)
|
24
24
|
@table_builder_class ||= Class.new(TableSchemaBuilder)
|
25
|
-
@table_builder_class.class_eval(&block)
|
25
|
+
@table_builder_class.class_eval(&block) if block.present?
|
26
|
+
@table_builder_class.extend(modul) if modul.present?
|
26
27
|
end
|
27
28
|
|
28
29
|
def table(model_or_name, description = nil, model: nil, query: nil, **extra, &block)
|
@@ -42,6 +43,7 @@ module InstDataShipper
|
|
42
43
|
|
43
44
|
tdef[:query] = model_or_name
|
44
45
|
tdef[:model] = model_or_name.model
|
46
|
+
tdef[:warehouse_name] = model_or_name.model.table_name
|
45
47
|
elsif model_or_name.is_a?(Class) && model_or_name < ActiveRecord::Base
|
46
48
|
tdef[:warehouse_name] = model_or_name.table_name
|
47
49
|
tdef[:model] = model_or_name
|
@@ -49,7 +51,7 @@ module InstDataShipper
|
|
49
51
|
tdef[:warehouse_name] = model_or_name
|
50
52
|
end
|
51
53
|
|
52
|
-
@table_builder_class.build(tdef, &block)
|
54
|
+
(@table_builder_class || TableSchemaBuilder).build(tdef, &block)
|
53
55
|
|
54
56
|
@schema[:tables] << tdef
|
55
57
|
|
@@ -67,12 +69,12 @@ module InstDataShipper
|
|
67
69
|
def self.build(tdef, &block)
|
68
70
|
builder = new(tdef)
|
69
71
|
builder.instance_exec(&block)
|
70
|
-
builder.
|
72
|
+
builder.options
|
71
73
|
end
|
72
74
|
|
73
|
-
|
74
|
-
|
75
|
-
|
75
|
+
def annotate(key, value)
|
76
|
+
options[key] = value
|
77
|
+
end
|
76
78
|
|
77
79
|
def version(version)
|
78
80
|
options[:version] = version
|
@@ -90,6 +92,17 @@ module InstDataShipper
|
|
90
92
|
}
|
91
93
|
end
|
92
94
|
|
95
|
+
def source(source, override_model=nil, **kwargs)
|
96
|
+
raise "Source already set" if options[:sourcer].present?
|
97
|
+
|
98
|
+
if source.is_a?(Symbol)
|
99
|
+
mthd = :"import_#{source}"
|
100
|
+
options = self.options
|
101
|
+
source = ->(table_def) { send(mthd, override_model || options[:model] || options[:warehouse_name], **kwargs) }
|
102
|
+
end
|
103
|
+
options[:sourcer] = source
|
104
|
+
end
|
105
|
+
|
93
106
|
def column(name, *args, refs: [], from: nil, **extra, &block)
|
94
107
|
from ||= name.to_s
|
95
108
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: inst_data_shipper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Instructure CustomDev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|
@@ -395,6 +395,7 @@ files:
|
|
395
395
|
- lib/inst_data_shipper/destinations/concerns/chunking.rb
|
396
396
|
- lib/inst_data_shipper/destinations/hosted_data.rb
|
397
397
|
- lib/inst_data_shipper/destinations/s3.rb
|
398
|
+
- lib/inst_data_shipper/destinations/speccable.rb
|
398
399
|
- lib/inst_data_shipper/dumper.rb
|
399
400
|
- lib/inst_data_shipper/engine.rb
|
400
401
|
- lib/inst_data_shipper/jobs/async_caller.rb
|