beetle_etl 0.0.2 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -0
- data/beetle_etl.gemspec +5 -6
- data/lib/beetle_etl.rb +7 -12
- data/lib/beetle_etl/dsl/dsl.rb +11 -12
- data/lib/beetle_etl/dsl/transformation.rb +10 -3
- data/lib/beetle_etl/dsl/transformation_loader.rb +13 -5
- data/lib/beetle_etl/import.rb +11 -4
- data/lib/beetle_etl/naming.rb +37 -0
- data/lib/beetle_etl/steps/assign_ids.rb +14 -38
- data/lib/beetle_etl/steps/create_stage.rb +59 -0
- data/lib/beetle_etl/steps/drop_stage.rb +15 -0
- data/lib/beetle_etl/steps/load.rb +46 -61
- data/lib/beetle_etl/steps/map_relations.rb +8 -14
- data/lib/beetle_etl/steps/step.rb +1 -8
- data/lib/beetle_etl/steps/table_diff.rb +68 -89
- data/lib/beetle_etl/steps/transform.rb +2 -4
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/beetle_etl_spec.rb +3 -25
- data/spec/dsl/dsl_spec.rb +8 -15
- data/spec/dsl/transformation_loader_spec.rb +11 -4
- data/spec/dsl/transformation_spec.rb +40 -4
- data/spec/feature/example_schema.rb +2 -137
- data/spec/feature/example_transform.rb +13 -6
- data/spec/feature/feature_spec.rb +119 -18
- data/spec/steps/assign_ids_spec.rb +23 -28
- data/spec/steps/create_stage_spec.rb +89 -0
- data/spec/steps/load_spec.rb +15 -23
- data/spec/steps/map_relations_spec.rb +32 -36
- data/spec/steps/table_diff_spec.rb +41 -45
- data/spec/steps/transform_spec.rb +2 -0
- data/spec/{dependency_resolver_spec.rb → task_runner/dependency_resolver_spec.rb} +0 -0
- metadata +22 -36
- data/lib/beetle_etl/state.rb +0 -67
- data/spec/import_spec.rb +0 -7
- data/spec/state_spec.rb +0 -124
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe CreateStage do
|
5
|
+
|
6
|
+
describe '#dependencies' do
|
7
|
+
it 'has no dependencies' do
|
8
|
+
subject = CreateStage.new(:example_table, double(:dependencies), double(:columns))
|
9
|
+
expect(subject.dependencies).to eql(Set.new)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#run' do
|
14
|
+
before do
|
15
|
+
BeetleETL.configure do |config|
|
16
|
+
config.database = test_database
|
17
|
+
end
|
18
|
+
|
19
|
+
test_database.execute <<-SQL
|
20
|
+
CREATE TABLE example_table (
|
21
|
+
id INTEGER,
|
22
|
+
external_id character varying(255),
|
23
|
+
external_source character varying(255),
|
24
|
+
|
25
|
+
some_string character varying(200),
|
26
|
+
some_integer integer,
|
27
|
+
some_float double precision,
|
28
|
+
|
29
|
+
dependee_a_id integer,
|
30
|
+
dependee_b_id integer,
|
31
|
+
|
32
|
+
PRIMARY KEY (id)
|
33
|
+
)
|
34
|
+
SQL
|
35
|
+
|
36
|
+
@relations = {
|
37
|
+
dependee_a_id: :dependee_a,
|
38
|
+
dependee_b_id: :dependee_b,
|
39
|
+
}
|
40
|
+
@columns = %i(some_string some_integer some_float)
|
41
|
+
end
|
42
|
+
|
43
|
+
let(:subject) do
|
44
|
+
CreateStage.new(:example_table, @relations, @columns)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'creates a stage table table with all payload columns' do
|
48
|
+
subject.run
|
49
|
+
|
50
|
+
columns = Hash[test_database.schema(subject.stage_table_name.to_sym)]
|
51
|
+
|
52
|
+
expected_columns = %i(id external_id some_string some_integer some_float)
|
53
|
+
expect(columns.keys).to include(*expected_columns)
|
54
|
+
|
55
|
+
expect(columns[:id][:db_type]).to eq('integer')
|
56
|
+
expect(columns[:external_id][:db_type]).to eq('character varying(255)')
|
57
|
+
expect(columns[:transition][:db_type]).to eq('character varying(255)')
|
58
|
+
|
59
|
+
expect(columns[:some_string][:db_type]).to eq('character varying(200)')
|
60
|
+
expect(columns[:some_integer][:db_type]).to eq('integer')
|
61
|
+
expect(columns[:some_float][:db_type]).to eq('double precision')
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'adds columns for dependent foreign key associations' do
|
65
|
+
subject.run
|
66
|
+
|
67
|
+
columns = Hash[test_database.schema(subject.stage_table_name)]
|
68
|
+
|
69
|
+
expected_columns = %i(
|
70
|
+
dependee_a_id external_dependee_a_id
|
71
|
+
dependee_b_id external_dependee_b_id
|
72
|
+
)
|
73
|
+
expect(columns.keys).to include(*expected_columns)
|
74
|
+
|
75
|
+
expect(columns[:dependee_a_id][:db_type]).to eq('integer')
|
76
|
+
expect(columns[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
|
77
|
+
|
78
|
+
expect(columns[:dependee_b_id][:db_type]).to eq('integer')
|
79
|
+
expect(columns[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'does not add foreign key columns twice if defined as payload column' do
|
83
|
+
columns = [:some_string, :dependee_a_id]
|
84
|
+
CreateStage.new(:example_table, @relations, columns).run
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
data/spec/steps/load_spec.rb
CHANGED
@@ -6,8 +6,6 @@ require 'active_support/core_ext/numeric/time'
|
|
6
6
|
module BeetleETL
|
7
7
|
describe Load do
|
8
8
|
|
9
|
-
let(:run_id) { 1 }
|
10
|
-
let(:old_run_id) { 5000 }
|
11
9
|
let(:external_source) { 'my_source' }
|
12
10
|
|
13
11
|
let(:now) { Time.now.beginning_of_day }
|
@@ -17,17 +15,14 @@ module BeetleETL
|
|
17
15
|
|
18
16
|
before do
|
19
17
|
BeetleETL.configure do |config|
|
20
|
-
config.stage_schema = 'stage'
|
21
18
|
config.external_source = external_source
|
22
19
|
config.database = test_database
|
23
20
|
end
|
24
21
|
|
25
|
-
allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
|
26
22
|
allow(subject).to receive(:now) { now }
|
27
23
|
|
28
24
|
test_database.create_schema(:stage)
|
29
|
-
test_database.create_table(
|
30
|
-
Integer :import_run_id
|
25
|
+
test_database.create_table(subject.stage_table_name.to_sym) do
|
31
26
|
Integer :id
|
32
27
|
String :external_id, size: 255
|
33
28
|
String :transition, size: 20
|
@@ -80,10 +75,9 @@ module BeetleETL
|
|
80
75
|
|
81
76
|
describe '#load_create' do
|
82
77
|
it 'loads records into the public table' do
|
83
|
-
insert_into(
|
84
|
-
[ :id , :
|
85
|
-
[ 3 ,
|
86
|
-
[ 3 , run_id , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
|
78
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
79
|
+
[ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
|
80
|
+
[ 3 , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
|
87
81
|
)
|
88
82
|
|
89
83
|
subject.load_create
|
@@ -102,10 +96,9 @@ module BeetleETL
|
|
102
96
|
[ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
|
103
97
|
)
|
104
98
|
|
105
|
-
insert_into(
|
106
|
-
[ :id , :
|
107
|
-
[ 1 ,
|
108
|
-
[ 1 , run_id , 'external_id' , 'UPDATE' , 'foo_id' , 33 , 'updated content' ] ,
|
99
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
100
|
+
[ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
|
101
|
+
[ 1 , 'external_id' , 'UPDATE' , 'foo_id' , 33 , 'updated content' ] ,
|
109
102
|
)
|
110
103
|
|
111
104
|
subject.load_update
|
@@ -124,10 +117,9 @@ module BeetleETL
|
|
124
117
|
[ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
|
125
118
|
)
|
126
119
|
|
127
|
-
insert_into(
|
128
|
-
[ :id , :
|
129
|
-
[ 1 ,
|
130
|
-
[ 1 , run_id , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
|
120
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
121
|
+
[ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
|
122
|
+
[ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
|
131
123
|
)
|
132
124
|
|
133
125
|
subject.load_delete
|
@@ -140,16 +132,15 @@ module BeetleETL
|
|
140
132
|
end
|
141
133
|
|
142
134
|
describe '#load_undelete' do
|
143
|
-
it '
|
135
|
+
it 'restores deleted records' do
|
144
136
|
insert_into(:example_table).values(
|
145
137
|
[ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
|
146
138
|
[ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
|
147
139
|
)
|
148
140
|
|
149
|
-
insert_into(
|
150
|
-
[ :id , :
|
151
|
-
[ 1 ,
|
152
|
-
[ 1 , run_id , 'external_id' , 'UNDELETE' , 'foo_id' , 33 , 'updated content' ] ,
|
141
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
142
|
+
[ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
|
143
|
+
[ 1 , 'external_id' , 'UNDELETE' , 'foo_id' , 33 , 'updated content' ] ,
|
153
144
|
)
|
154
145
|
|
155
146
|
subject.load_undelete
|
@@ -160,5 +151,6 @@ module BeetleETL
|
|
160
151
|
)
|
161
152
|
end
|
162
153
|
end
|
154
|
+
|
163
155
|
end
|
164
156
|
end
|
@@ -3,33 +3,37 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe MapRelations do
|
5
5
|
|
6
|
-
let(:
|
7
|
-
let(:
|
6
|
+
let(:dependee_a) { BeetleETL::Naming.stage_table_name(:dependee_a).to_sym }
|
7
|
+
let(:dependee_b) { BeetleETL::Naming.stage_table_name(:dependee_b).to_sym }
|
8
|
+
|
9
|
+
let(:relations) do
|
10
|
+
{
|
11
|
+
dependee_a_id: :dependee_a,
|
12
|
+
dependee_b_id: :dependee_b,
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
subject do
|
17
|
+
MapRelations.new(:depender, relations)
|
18
|
+
end
|
8
19
|
|
9
20
|
before do
|
10
21
|
BeetleETL.configure do |config|
|
11
|
-
config.stage_schema = 'stage'
|
12
22
|
config.external_source = 'my_source'
|
13
23
|
config.database = test_database
|
14
24
|
end
|
15
25
|
|
16
|
-
|
17
|
-
|
18
|
-
test_database.create_schema(:stage)
|
19
|
-
test_database.create_table(:stage__dependee_a) do
|
20
|
-
Integer :import_run_id
|
26
|
+
test_database.create_table(dependee_a) do
|
21
27
|
Integer :id
|
22
28
|
String :external_id, size: 255
|
23
29
|
end
|
24
30
|
|
25
|
-
test_database.create_table(
|
26
|
-
Integer :import_run_id
|
31
|
+
test_database.create_table(dependee_b) do
|
27
32
|
Integer :id
|
28
33
|
String :external_id, size: 255
|
29
34
|
end
|
30
35
|
|
31
|
-
test_database.create_table(
|
32
|
-
Integer :import_run_id
|
36
|
+
test_database.create_table(subject.stage_table_name.to_sym) do
|
33
37
|
String :external_id, size: 255
|
34
38
|
|
35
39
|
String :external_dependee_a_id
|
@@ -42,12 +46,7 @@ module BeetleETL
|
|
42
46
|
|
43
47
|
describe '#depenencies' do
|
44
48
|
it 'depends on Transform of the same table and AssignIds of its dependees' do
|
45
|
-
|
46
|
-
dependee_a_id: :dependee_a,
|
47
|
-
dependee_b_id: :dependee_b,
|
48
|
-
}
|
49
|
-
|
50
|
-
expect(MapRelations.new(:depender, relations).dependencies).to eql(
|
49
|
+
expect(subject.dependencies).to eql(
|
51
50
|
[
|
52
51
|
'dependee_a: AssignIds',
|
53
52
|
'dependee_b: AssignIds',
|
@@ -59,34 +58,31 @@ module BeetleETL
|
|
59
58
|
|
60
59
|
describe '#run' do
|
61
60
|
it 'maps external foreign key references to id references ' do
|
62
|
-
insert_into(
|
63
|
-
[ :
|
64
|
-
[
|
65
|
-
[
|
61
|
+
insert_into(dependee_a).values(
|
62
|
+
[ :id , :external_id ] ,
|
63
|
+
[ 1 , 'a_id' ] ,
|
64
|
+
[ 2 , 'a_id' ] ,
|
66
65
|
)
|
67
66
|
|
68
|
-
insert_into(
|
69
|
-
[ :
|
70
|
-
[
|
67
|
+
insert_into(dependee_b).values(
|
68
|
+
[ :id , :external_id ] ,
|
69
|
+
[ 26 , 'b_id' ] ,
|
71
70
|
)
|
72
71
|
|
73
|
-
insert_into(
|
74
|
-
[ :
|
75
|
-
[
|
72
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
73
|
+
[ :external_dependee_a_id , :external_dependee_b_id ] ,
|
74
|
+
[ 'a_id' , 'b_id' ] ,
|
76
75
|
)
|
77
76
|
|
78
|
-
relations = {
|
79
|
-
dependee_a_id: :dependee_a,
|
80
|
-
dependee_b_id: :dependee_b,
|
81
|
-
}
|
82
77
|
|
83
|
-
|
78
|
+
subject.run
|
84
79
|
|
85
|
-
expect(
|
86
|
-
[ :
|
87
|
-
[
|
80
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
81
|
+
[ :dependee_a_id , :dependee_b_id ] ,
|
82
|
+
[ 1 , 26 ] ,
|
88
83
|
)
|
89
84
|
end
|
90
85
|
end
|
86
|
+
|
91
87
|
end
|
92
88
|
end
|
@@ -6,22 +6,17 @@ require 'active_support/core_ext/numeric/time'
|
|
6
6
|
module BeetleETL
|
7
7
|
describe TableDiff do
|
8
8
|
|
9
|
-
let(:run_id) { 1 }
|
10
9
|
let(:external_source) { 'my_source' }
|
10
|
+
|
11
11
|
subject { TableDiff.new(:example_table) }
|
12
12
|
|
13
13
|
before do
|
14
14
|
BeetleETL.configure do |config|
|
15
|
-
config.stage_schema = 'stage'
|
16
15
|
config.external_source = external_source
|
17
16
|
config.database = test_database
|
18
17
|
end
|
19
18
|
|
20
|
-
|
21
|
-
|
22
|
-
test_database.create_schema(:stage)
|
23
|
-
test_database.create_table(:stage__example_table) do
|
24
|
-
Integer :import_run_id
|
19
|
+
test_database.create_table(subject.stage_table_name.to_sym) do
|
25
20
|
String :external_id, size: 255
|
26
21
|
String :transition, size: 20
|
27
22
|
|
@@ -68,18 +63,18 @@ module BeetleETL
|
|
68
63
|
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
69
64
|
)
|
70
65
|
|
71
|
-
insert_into(
|
72
|
-
[ :
|
73
|
-
[
|
74
|
-
[
|
66
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
67
|
+
[ :external_id ] ,
|
68
|
+
[ 'created' ] ,
|
69
|
+
[ 'existing' ] ,
|
75
70
|
)
|
76
71
|
|
77
72
|
subject.transition_create
|
78
73
|
|
79
|
-
|
80
|
-
[ :
|
81
|
-
[
|
82
|
-
[
|
74
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
75
|
+
[ :external_id , :transition ] ,
|
76
|
+
[ 'created' , 'CREATE' ] ,
|
77
|
+
[ 'existing' , nil ] ,
|
83
78
|
)
|
84
79
|
end
|
85
80
|
end
|
@@ -94,18 +89,18 @@ module BeetleETL
|
|
94
89
|
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
95
90
|
)
|
96
91
|
|
97
|
-
insert_into(
|
98
|
-
[ :
|
99
|
-
[
|
100
|
-
[
|
92
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
93
|
+
[ :external_id , :payload , :foo_id , :external_foo_id ] ,
|
94
|
+
[ 'existing' , 'existing content' , 1 , 'ignored column' ] ,
|
95
|
+
[ 'deleted' , 'deleted content' , 2 , 'ignored column' ] ,
|
101
96
|
)
|
102
97
|
|
103
98
|
subject.transition_keep
|
104
99
|
|
105
|
-
expect(
|
106
|
-
[ :
|
107
|
-
[
|
108
|
-
[
|
100
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
101
|
+
[ :external_id , :transition ] ,
|
102
|
+
[ 'existing' , 'KEEP' ] ,
|
103
|
+
[ 'deleted' , nil ] ,
|
109
104
|
)
|
110
105
|
end
|
111
106
|
end
|
@@ -121,26 +116,26 @@ module BeetleETL
|
|
121
116
|
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
|
122
117
|
)
|
123
118
|
|
124
|
-
insert_into(
|
125
|
-
[ :
|
126
|
-
[
|
127
|
-
[
|
128
|
-
[
|
119
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
120
|
+
[ :external_id , :payload , :foo_id , :external_foo_id ] ,
|
121
|
+
[ 'existing_1' , 'updated content' , 1 , 'ignored_column' ] ,
|
122
|
+
[ 'existing_2' , 'existing content' , 4 , 'ignored_column' ] ,
|
123
|
+
[ 'deleted' , 'updated content' , 3 , 'ignored_column' ] ,
|
129
124
|
)
|
130
125
|
|
131
126
|
subject.transition_update
|
132
127
|
|
133
|
-
expect(
|
134
|
-
[ :
|
135
|
-
[
|
136
|
-
[
|
137
|
-
[
|
128
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
129
|
+
[ :external_id , :transition ] ,
|
130
|
+
[ 'existing_1' , 'UPDATE' ] ,
|
131
|
+
[ 'existing_2' , 'UPDATE' ] ,
|
132
|
+
[ 'deleted' , nil ] ,
|
138
133
|
)
|
139
134
|
end
|
140
135
|
end
|
141
136
|
|
142
137
|
describe 'transition_delete' do
|
143
|
-
it 'creates records with DELETE that no loger exist in the stage table' do
|
138
|
+
it 'creates records with DELETE that no loger exist in the stage table for the given run' do
|
144
139
|
insert_into(:example_table).values(
|
145
140
|
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
146
141
|
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
@@ -149,9 +144,9 @@ module BeetleETL
|
|
149
144
|
|
150
145
|
subject.transition_delete
|
151
146
|
|
152
|
-
expect(
|
153
|
-
[ :
|
154
|
-
[
|
147
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
148
|
+
[ :external_id , :transition ] ,
|
149
|
+
[ 'existing' , 'DELETE' ] ,
|
155
150
|
)
|
156
151
|
end
|
157
152
|
end
|
@@ -164,20 +159,21 @@ module BeetleETL
|
|
164
159
|
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
165
160
|
)
|
166
161
|
|
167
|
-
insert_into(
|
168
|
-
[ :
|
169
|
-
[
|
170
|
-
[
|
162
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
163
|
+
[ :external_id , :payload , :foo_id , :external_foo_id ] ,
|
164
|
+
[ 'existing' , 'updated content' , 1 , 'ignored_column' ] ,
|
165
|
+
[ 'deleted' , 'updated content' , 2 , 'ignored_column' ] ,
|
171
166
|
)
|
172
167
|
|
173
168
|
subject.transition_undelete
|
174
169
|
|
175
|
-
expect(
|
176
|
-
[ :
|
177
|
-
[
|
178
|
-
[
|
170
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
171
|
+
[ :external_id , :transition ] ,
|
172
|
+
[ 'existing' , nil ] ,
|
173
|
+
[ 'deleted' , 'UNDELETE' ] ,
|
179
174
|
)
|
180
175
|
end
|
181
176
|
end
|
177
|
+
|
182
178
|
end
|
183
179
|
end
|