beetle_etl 0.0.2 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -0
- data/beetle_etl.gemspec +5 -6
- data/lib/beetle_etl.rb +7 -12
- data/lib/beetle_etl/dsl/dsl.rb +11 -12
- data/lib/beetle_etl/dsl/transformation.rb +10 -3
- data/lib/beetle_etl/dsl/transformation_loader.rb +13 -5
- data/lib/beetle_etl/import.rb +11 -4
- data/lib/beetle_etl/naming.rb +37 -0
- data/lib/beetle_etl/steps/assign_ids.rb +14 -38
- data/lib/beetle_etl/steps/create_stage.rb +59 -0
- data/lib/beetle_etl/steps/drop_stage.rb +15 -0
- data/lib/beetle_etl/steps/load.rb +46 -61
- data/lib/beetle_etl/steps/map_relations.rb +8 -14
- data/lib/beetle_etl/steps/step.rb +1 -8
- data/lib/beetle_etl/steps/table_diff.rb +68 -89
- data/lib/beetle_etl/steps/transform.rb +2 -4
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/beetle_etl_spec.rb +3 -25
- data/spec/dsl/dsl_spec.rb +8 -15
- data/spec/dsl/transformation_loader_spec.rb +11 -4
- data/spec/dsl/transformation_spec.rb +40 -4
- data/spec/feature/example_schema.rb +2 -137
- data/spec/feature/example_transform.rb +13 -6
- data/spec/feature/feature_spec.rb +119 -18
- data/spec/steps/assign_ids_spec.rb +23 -28
- data/spec/steps/create_stage_spec.rb +89 -0
- data/spec/steps/load_spec.rb +15 -23
- data/spec/steps/map_relations_spec.rb +32 -36
- data/spec/steps/table_diff_spec.rb +41 -45
- data/spec/steps/transform_spec.rb +2 -0
- data/spec/{dependency_resolver_spec.rb → task_runner/dependency_resolver_spec.rb} +0 -0
- metadata +22 -36
- data/lib/beetle_etl/state.rb +0 -67
- data/spec/import_spec.rb +0 -7
- data/spec/state_spec.rb +0 -124
@@ -1,29 +1,23 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
class MapRelations < Step
|
3
3
|
|
4
|
-
attr_reader :relations
|
5
|
-
|
6
4
|
def initialize(table_name, relations)
|
7
5
|
super(table_name)
|
8
6
|
@relations = relations
|
9
7
|
end
|
10
8
|
|
11
9
|
def dependencies
|
12
|
-
relations.values.map { |d| AssignIds.step_name(d) }.to_set << Transform.step_name(table_name)
|
10
|
+
@relations.values.map { |d| AssignIds.step_name(d) }.to_set << Transform.step_name(table_name)
|
13
11
|
end
|
14
12
|
|
15
13
|
def run
|
16
|
-
relations.
|
17
|
-
database.
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
FT__external_id: :"ST__external_#{foreign_key_column}",
|
24
|
-
).update(
|
25
|
-
:"#{foreign_key_column}" => :"FT__id"
|
26
|
-
)
|
14
|
+
@relations.map do |foreign_key_column, foreign_table_name|
|
15
|
+
database.execute <<-SQL
|
16
|
+
UPDATE #{stage_table_name_sql} current_table
|
17
|
+
SET #{foreign_key_column} = foreign_table.id
|
18
|
+
FROM #{stage_table_name_sql(foreign_table_name)} foreign_table
|
19
|
+
WHERE current_table.external_#{foreign_key_column} = foreign_table.external_id
|
20
|
+
SQL
|
27
21
|
end
|
28
22
|
end
|
29
23
|
|
@@ -4,6 +4,7 @@ module BeetleETL
|
|
4
4
|
|
5
5
|
class Step
|
6
6
|
|
7
|
+
include BeetleETL::Naming
|
7
8
|
attr_reader :table_name
|
8
9
|
|
9
10
|
def initialize(table_name)
|
@@ -22,14 +23,6 @@ module BeetleETL
|
|
22
23
|
raise DependenciesNotDefinedError
|
23
24
|
end
|
24
25
|
|
25
|
-
def run_id
|
26
|
-
BeetleETL.state.run_id
|
27
|
-
end
|
28
|
-
|
29
|
-
def stage_schema
|
30
|
-
BeetleETL.config.stage_schema
|
31
|
-
end
|
32
|
-
|
33
26
|
def external_source
|
34
27
|
BeetleETL.config.external_source
|
35
28
|
end
|
@@ -2,7 +2,6 @@ module BeetleETL
|
|
2
2
|
class TableDiff < Step
|
3
3
|
|
4
4
|
IMPORTER_COLUMNS = %i[
|
5
|
-
import_run_id
|
6
5
|
external_id
|
7
6
|
transition
|
8
7
|
]
|
@@ -12,115 +11,95 @@ module BeetleETL
|
|
12
11
|
end
|
13
12
|
|
14
13
|
def run
|
15
|
-
%w(create keep update delete undelete).
|
16
|
-
|
17
|
-
end
|
14
|
+
%w(create keep update delete undelete).each do |transition|
|
15
|
+
public_send(:"transition_#{transition}")
|
16
|
+
end
|
18
17
|
end
|
19
18
|
|
20
19
|
def transition_create
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
database.execute <<-SQL
|
21
|
+
UPDATE #{stage_table_name_sql} stage
|
22
|
+
SET transition = 'CREATE'
|
23
|
+
WHERE NOT EXISTS (
|
24
|
+
SELECT 1
|
25
|
+
FROM #{public_table_name} public
|
26
|
+
WHERE public.external_id = stage.external_id
|
27
|
+
AND public.external_source = '#{external_source}'
|
27
28
|
)
|
28
|
-
|
29
|
-
.update(transition: 'CREATE')
|
29
|
+
SQL
|
30
30
|
end
|
31
31
|
|
32
32
|
def transition_keep
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
33
|
+
database.execute <<-SQL
|
34
|
+
UPDATE #{stage_table_name_sql} stage
|
35
|
+
SET transition = 'KEEP'
|
36
|
+
WHERE EXISTS (
|
37
|
+
SELECT 1
|
38
|
+
FROM #{public_table_name} public
|
39
|
+
WHERE public.external_id = stage.external_id
|
40
|
+
AND public.external_source = '#{external_source}'
|
41
|
+
AND public.deleted_at IS NULL
|
42
|
+
AND
|
43
|
+
(#{public_record_columns.join(', ')})
|
44
|
+
IS NOT DISTINCT FROM
|
45
|
+
(#{stage_record_columns.join(', ')})
|
46
46
|
)
|
47
|
-
|
48
|
-
.update(transition: 'KEEP')
|
47
|
+
SQL
|
49
48
|
end
|
50
49
|
|
51
50
|
def transition_update
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
51
|
+
database.execute <<-SQL
|
52
|
+
UPDATE #{stage_table_name_sql} stage
|
53
|
+
SET transition = 'UPDATE'
|
54
|
+
WHERE EXISTS (
|
55
|
+
SELECT 1
|
56
|
+
FROM #{public_table_name} public
|
57
|
+
WHERE public.external_id = stage.external_id
|
58
|
+
AND public.external_source = '#{external_source}'
|
59
|
+
AND public.deleted_at IS NULL
|
60
|
+
AND
|
61
|
+
(#{public_record_columns.join(', ')})
|
62
|
+
IS DISTINCT FROM
|
63
|
+
(#{stage_record_columns.join(', ')})
|
65
64
|
)
|
66
|
-
|
67
|
-
.update(transition: 'UPDATE')
|
65
|
+
SQL
|
68
66
|
end
|
69
67
|
|
70
68
|
def transition_delete
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
.
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
:transition
|
88
|
-
],
|
89
|
-
deleted_dataset
|
90
|
-
.select(
|
91
|
-
run_id,
|
92
|
-
:public__external_id,
|
93
|
-
'DELETE'
|
94
|
-
)
|
95
|
-
)
|
69
|
+
database.execute <<-SQL
|
70
|
+
INSERT INTO #{stage_table_name_sql}
|
71
|
+
(external_id, transition)
|
72
|
+
SELECT
|
73
|
+
public.external_id,
|
74
|
+
'DELETE'
|
75
|
+
FROM #{public_table_name_sql} public
|
76
|
+
LEFT OUTER JOIN (
|
77
|
+
SELECT *
|
78
|
+
FROM #{stage_table_name_sql}
|
79
|
+
) stage
|
80
|
+
ON (stage.external_id = public.external_id)
|
81
|
+
WHERE stage.external_id IS NULL
|
82
|
+
AND public.external_source = '#{external_source}'
|
83
|
+
AND public.deleted_at IS NULL
|
84
|
+
SQL
|
96
85
|
end
|
97
86
|
|
98
87
|
def transition_undelete
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
public__deleted_at: nil
|
88
|
+
database.execute <<-SQL
|
89
|
+
UPDATE #{stage_table_name_sql} stage
|
90
|
+
SET transition = 'UNDELETE'
|
91
|
+
WHERE EXISTS (
|
92
|
+
SELECT 1
|
93
|
+
FROM #{public_table_name_sql} public
|
94
|
+
WHERE public.external_id = stage.external_id
|
95
|
+
AND public.external_source = '#{external_source}'
|
96
|
+
AND public.deleted_at IS NOT NULL
|
109
97
|
)
|
110
|
-
|
111
|
-
.update(transition: 'UNDELETE')
|
98
|
+
SQL
|
112
99
|
end
|
113
100
|
|
114
101
|
private
|
115
102
|
|
116
|
-
def stage_table
|
117
|
-
@stage_table ||= database[:"#{stage_schema}__#{table_name}___stage"]
|
118
|
-
end
|
119
|
-
|
120
|
-
def public_table
|
121
|
-
@public_table ||= database[:"#{table_name}___public"]
|
122
|
-
end
|
123
|
-
|
124
103
|
def public_record_columns
|
125
104
|
prefixed_columns(data_columns, 'public')
|
126
105
|
end
|
@@ -134,7 +113,7 @@ module BeetleETL
|
|
134
113
|
end
|
135
114
|
|
136
115
|
def table_columns
|
137
|
-
@table_columns ||= database[
|
116
|
+
@table_columns ||= database[stage_table_name.to_sym].columns
|
138
117
|
end
|
139
118
|
|
140
119
|
def ignored_columns
|
@@ -148,7 +127,7 @@ module BeetleETL
|
|
148
127
|
end
|
149
128
|
|
150
129
|
def prefixed_columns(columns, prefix)
|
151
|
-
columns.map { |column| "#{prefix}
|
130
|
+
columns.map { |column| %Q("#{prefix}"."#{column}") }
|
152
131
|
end
|
153
132
|
|
154
133
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
class Transform < Step
|
3
3
|
|
4
|
-
attr_reader :query
|
5
|
-
|
6
4
|
def initialize(table_name, dependencies, query)
|
7
5
|
super(table_name)
|
8
6
|
@dependencies = dependencies
|
@@ -10,11 +8,11 @@ module BeetleETL
|
|
10
8
|
end
|
11
9
|
|
12
10
|
def dependencies
|
13
|
-
Set.new(@dependencies.map { |d| self.class.step_name(d) })
|
11
|
+
Set.new(@dependencies.map { |d| self.class.step_name(d) }) << CreateStage.step_name(table_name)
|
14
12
|
end
|
15
13
|
|
16
14
|
def run
|
17
|
-
database.run(query)
|
15
|
+
database.run(@query)
|
18
16
|
end
|
19
17
|
|
20
18
|
end
|
data/lib/beetle_etl/version.rb
CHANGED
data/spec/beetle_etl_spec.rb
CHANGED
@@ -1,34 +1,12 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe BeetleETL do
|
4
|
-
describe '#import' do
|
5
4
|
|
5
|
+
describe '#import' do
|
6
6
|
it 'runs the import' do
|
7
|
-
|
8
|
-
expect(BeetleETL::Import).to receive(:run)
|
7
|
+
expect(BeetleETL::Import).to receive_message_chain(:new, :run)
|
9
8
|
BeetleETL.import
|
10
9
|
end
|
11
|
-
|
12
|
-
context 'handling state' do
|
13
|
-
it 'starts the import and marks it as finished if no errors are thrown' do
|
14
|
-
allow(BeetleETL::Import).to receive(:run)
|
15
|
-
|
16
|
-
expect(BeetleETL.state).to receive(:start_import).ordered
|
17
|
-
expect(BeetleETL.state).to receive(:mark_as_succeeded).ordered
|
18
|
-
|
19
|
-
BeetleETL.import
|
20
|
-
end
|
21
|
-
|
22
|
-
it 'starts the import and marks it as failed if Import.run throws an error' do
|
23
|
-
exception = Exception.new
|
24
|
-
allow(BeetleETL::Import).to receive(:run).and_raise(exception)
|
25
|
-
|
26
|
-
expect(BeetleETL.state).to receive(:start_import).ordered
|
27
|
-
expect(BeetleETL.state).to receive(:mark_as_failed).ordered
|
28
|
-
|
29
|
-
expect { BeetleETL.import }.to raise_exception(exception)
|
30
|
-
end
|
31
|
-
end
|
32
10
|
end
|
33
11
|
|
34
12
|
describe '#config' do
|
@@ -65,6 +43,6 @@ describe BeetleETL do
|
|
65
43
|
expect(BeetleETL.database).to eql(database)
|
66
44
|
expect(BeetleETL.database).to eql(database)
|
67
45
|
end
|
68
|
-
|
69
46
|
end
|
47
|
+
|
70
48
|
end
|
data/spec/dsl/dsl_spec.rb
CHANGED
@@ -6,15 +6,16 @@ module BeetleETL
|
|
6
6
|
subject { DSL.new(:foo_table) }
|
7
7
|
|
8
8
|
describe '#stage_table' do
|
9
|
-
it 'returns the stage table name
|
10
|
-
|
11
|
-
|
9
|
+
it 'returns the current stage table name' do
|
10
|
+
expect(subject.stage_table).to eql(
|
11
|
+
BeetleETL::Naming.stage_table_name_sql(:foo_table)
|
12
|
+
)
|
12
13
|
end
|
13
|
-
end
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
it 'returns the stage table name for the given table' do
|
16
|
+
expect(subject.stage_table(:bar_table)).to eql(
|
17
|
+
BeetleETL::Naming.stage_table_name_sql(:bar_table)
|
18
|
+
)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
|
@@ -32,13 +33,5 @@ module BeetleETL
|
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
35
|
-
describe '#import_run_id' do
|
36
|
-
it 'returns the import run id defined in the config' do
|
37
|
-
id = double(:id)
|
38
|
-
allow(BeetleETL.state).to receive(:run_id) { id }
|
39
|
-
expect(subject.import_run_id).to eql(id)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
36
|
end
|
44
37
|
end
|
@@ -4,6 +4,8 @@ require 'tempfile'
|
|
4
4
|
module BeetleETL
|
5
5
|
describe TransformationLoader do
|
6
6
|
|
7
|
+
subject { TransformationLoader.new }
|
8
|
+
|
7
9
|
before :example do
|
8
10
|
data_file = tempfile_with_contents <<-FILE
|
9
11
|
import :foo do
|
@@ -13,6 +15,10 @@ module BeetleETL
|
|
13
15
|
import :bar do
|
14
16
|
'bar'
|
15
17
|
end
|
18
|
+
|
19
|
+
helpers do
|
20
|
+
"baz"
|
21
|
+
end
|
16
22
|
FILE
|
17
23
|
|
18
24
|
BeetleETL.configure do |config|
|
@@ -21,16 +27,17 @@ module BeetleETL
|
|
21
27
|
end
|
22
28
|
|
23
29
|
describe '#load' do
|
24
|
-
it 'loads
|
25
|
-
expect(Transformation).to receive(:new) do |table_name, config|
|
30
|
+
it 'loads transformations from the data file' do
|
31
|
+
expect(Transformation).to receive(:new) do |table_name, config, helpers|
|
26
32
|
expect(table_name.to_s).to eql(config.call)
|
33
|
+
expect(helpers.call).to eql("baz")
|
27
34
|
end.exactly(2).times
|
28
35
|
|
29
36
|
subject.load
|
30
37
|
end
|
31
38
|
|
32
|
-
it '
|
33
|
-
allow(Transformation).to receive(:new) do |table_name, config|
|
39
|
+
it 'returns the list of transformations' do
|
40
|
+
allow(Transformation).to receive(:new) do |table_name, config, helpers|
|
34
41
|
table_name
|
35
42
|
end
|
36
43
|
|
@@ -10,15 +10,36 @@ module BeetleETL
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
+
describe 'columns' do
|
14
|
+
it 'returns a list of payload column name symbols' do
|
15
|
+
setup = Proc.new do
|
16
|
+
columns :payload_1, 'payload_2'
|
17
|
+
end
|
18
|
+
transformation = Transformation.new(:table, setup)
|
19
|
+
|
20
|
+
expect(transformation.column_names).to match_array([
|
21
|
+
:payload_1, :payload_2
|
22
|
+
])
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'defaults to an empty array if no columns are defined' do
|
26
|
+
transformation = Transformation.new(:table, Proc.new {})
|
27
|
+
|
28
|
+
expect(transformation.column_names).to match_array([])
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
13
32
|
describe '#relations' do
|
14
33
|
it 'returns the list of foreign tables and their foreign key column' do
|
15
34
|
setup = Proc.new do
|
16
35
|
references :foreign_table, on: :foreign_table_id
|
36
|
+
references :another_foreign_table, on: :another_foreign_table_id
|
17
37
|
end
|
18
38
|
transformation = Transformation.new(:table, setup)
|
19
39
|
|
20
40
|
expect(transformation.relations).to eql({
|
21
|
-
foreign_table_id: :foreign_table
|
41
|
+
foreign_table_id: :foreign_table,
|
42
|
+
another_foreign_table_id: :another_foreign_table
|
22
43
|
})
|
23
44
|
end
|
24
45
|
end
|
@@ -36,18 +57,33 @@ module BeetleETL
|
|
36
57
|
end
|
37
58
|
|
38
59
|
describe '#query' do
|
39
|
-
it 'returns the query interpolating methods
|
60
|
+
it 'returns the query interpolating methods defined as helpers' do
|
61
|
+
helpers = Proc.new do
|
62
|
+
def foo; "foo_string"; end
|
63
|
+
end
|
40
64
|
|
41
65
|
setup = Proc.new do
|
42
|
-
def foo; "foo_string"; end
|
43
66
|
query "SELECT '#{foo}' FROM some_table"
|
44
67
|
end
|
45
|
-
|
68
|
+
|
69
|
+
transformation = Transformation.new(:table, setup, helpers)
|
46
70
|
|
47
71
|
expect(transformation.query).to eql(
|
48
72
|
"SELECT 'foo_string' FROM some_table"
|
49
73
|
)
|
50
74
|
end
|
75
|
+
|
76
|
+
it 'concatenates multiple queries' do
|
77
|
+
setup = Proc.new do
|
78
|
+
query "SOME QUERY"
|
79
|
+
query "ANOTHER QUERY"
|
80
|
+
end
|
81
|
+
transformation = Transformation.new(:table, setup)
|
82
|
+
|
83
|
+
expect(transformation.query).to eql(
|
84
|
+
"SOME QUERY;ANOTHER QUERY"
|
85
|
+
)
|
86
|
+
end
|
51
87
|
end
|
52
88
|
|
53
89
|
end
|