beetle_etl 0.0.2 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +6 -0
  3. data/beetle_etl.gemspec +5 -6
  4. data/lib/beetle_etl.rb +7 -12
  5. data/lib/beetle_etl/dsl/dsl.rb +11 -12
  6. data/lib/beetle_etl/dsl/transformation.rb +10 -3
  7. data/lib/beetle_etl/dsl/transformation_loader.rb +13 -5
  8. data/lib/beetle_etl/import.rb +11 -4
  9. data/lib/beetle_etl/naming.rb +37 -0
  10. data/lib/beetle_etl/steps/assign_ids.rb +14 -38
  11. data/lib/beetle_etl/steps/create_stage.rb +59 -0
  12. data/lib/beetle_etl/steps/drop_stage.rb +15 -0
  13. data/lib/beetle_etl/steps/load.rb +46 -61
  14. data/lib/beetle_etl/steps/map_relations.rb +8 -14
  15. data/lib/beetle_etl/steps/step.rb +1 -8
  16. data/lib/beetle_etl/steps/table_diff.rb +68 -89
  17. data/lib/beetle_etl/steps/transform.rb +2 -4
  18. data/lib/beetle_etl/version.rb +1 -1
  19. data/spec/beetle_etl_spec.rb +3 -25
  20. data/spec/dsl/dsl_spec.rb +8 -15
  21. data/spec/dsl/transformation_loader_spec.rb +11 -4
  22. data/spec/dsl/transformation_spec.rb +40 -4
  23. data/spec/feature/example_schema.rb +2 -137
  24. data/spec/feature/example_transform.rb +13 -6
  25. data/spec/feature/feature_spec.rb +119 -18
  26. data/spec/steps/assign_ids_spec.rb +23 -28
  27. data/spec/steps/create_stage_spec.rb +89 -0
  28. data/spec/steps/load_spec.rb +15 -23
  29. data/spec/steps/map_relations_spec.rb +32 -36
  30. data/spec/steps/table_diff_spec.rb +41 -45
  31. data/spec/steps/transform_spec.rb +2 -0
  32. data/spec/{dependency_resolver_spec.rb → task_runner/dependency_resolver_spec.rb} +0 -0
  33. metadata +22 -36
  34. data/lib/beetle_etl/state.rb +0 -67
  35. data/spec/import_spec.rb +0 -7
  36. data/spec/state_spec.rb +0 -124
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe CreateStage do
5
+
6
+ describe '#dependencies' do
7
+ it 'has no dependencies' do
8
+ subject = CreateStage.new(:example_table, double(:dependencies), double(:columns))
9
+ expect(subject.dependencies).to eql(Set.new)
10
+ end
11
+ end
12
+
13
+ describe '#run' do
14
+ before do
15
+ BeetleETL.configure do |config|
16
+ config.database = test_database
17
+ end
18
+
19
+ test_database.execute <<-SQL
20
+ CREATE TABLE example_table (
21
+ id INTEGER,
22
+ external_id character varying(255),
23
+ external_source character varying(255),
24
+
25
+ some_string character varying(200),
26
+ some_integer integer,
27
+ some_float double precision,
28
+
29
+ dependee_a_id integer,
30
+ dependee_b_id integer,
31
+
32
+ PRIMARY KEY (id)
33
+ )
34
+ SQL
35
+
36
+ @relations = {
37
+ dependee_a_id: :dependee_a,
38
+ dependee_b_id: :dependee_b,
39
+ }
40
+ @columns = %i(some_string some_integer some_float)
41
+ end
42
+
43
+ let(:subject) do
44
+ CreateStage.new(:example_table, @relations, @columns)
45
+ end
46
+
47
+ it 'creates a stage table table with all payload columns' do
48
+ subject.run
49
+
50
+ columns = Hash[test_database.schema(subject.stage_table_name.to_sym)]
51
+
52
+ expected_columns = %i(id external_id some_string some_integer some_float)
53
+ expect(columns.keys).to include(*expected_columns)
54
+
55
+ expect(columns[:id][:db_type]).to eq('integer')
56
+ expect(columns[:external_id][:db_type]).to eq('character varying(255)')
57
+ expect(columns[:transition][:db_type]).to eq('character varying(255)')
58
+
59
+ expect(columns[:some_string][:db_type]).to eq('character varying(200)')
60
+ expect(columns[:some_integer][:db_type]).to eq('integer')
61
+ expect(columns[:some_float][:db_type]).to eq('double precision')
62
+ end
63
+
64
+ it 'adds columns for dependent foreign key associations' do
65
+ subject.run
66
+
67
+ columns = Hash[test_database.schema(subject.stage_table_name)]
68
+
69
+ expected_columns = %i(
70
+ dependee_a_id external_dependee_a_id
71
+ dependee_b_id external_dependee_b_id
72
+ )
73
+ expect(columns.keys).to include(*expected_columns)
74
+
75
+ expect(columns[:dependee_a_id][:db_type]).to eq('integer')
76
+ expect(columns[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
77
+
78
+ expect(columns[:dependee_b_id][:db_type]).to eq('integer')
79
+ expect(columns[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
80
+ end
81
+
82
+ it 'does not add foreign key columns twice if defined as payload column' do
83
+ columns = [:some_string, :dependee_a_id]
84
+ CreateStage.new(:example_table, @relations, columns).run
85
+ end
86
+ end
87
+
88
+ end
89
+ end
@@ -6,8 +6,6 @@ require 'active_support/core_ext/numeric/time'
6
6
  module BeetleETL
7
7
  describe Load do
8
8
 
9
- let(:run_id) { 1 }
10
- let(:old_run_id) { 5000 }
11
9
  let(:external_source) { 'my_source' }
12
10
 
13
11
  let(:now) { Time.now.beginning_of_day }
@@ -17,17 +15,14 @@ module BeetleETL
17
15
 
18
16
  before do
19
17
  BeetleETL.configure do |config|
20
- config.stage_schema = 'stage'
21
18
  config.external_source = external_source
22
19
  config.database = test_database
23
20
  end
24
21
 
25
- allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
26
22
  allow(subject).to receive(:now) { now }
27
23
 
28
24
  test_database.create_schema(:stage)
29
- test_database.create_table(:stage__example_table) do
30
- Integer :import_run_id
25
+ test_database.create_table(subject.stage_table_name.to_sym) do
31
26
  Integer :id
32
27
  String :external_id, size: 255
33
28
  String :transition, size: 20
@@ -80,10 +75,9 @@ module BeetleETL
80
75
 
81
76
  describe '#load_create' do
82
77
  it 'loads records into the public table' do
83
- insert_into(:stage__example_table).values(
84
- [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
85
- [ 3 , old_run_id , 'external_id' , 'CREATE' , 'foo_id' , 999 , 'some content' ] ,
86
- [ 3 , run_id , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
78
+ insert_into(subject.stage_table_name.to_sym).values(
79
+ [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
80
+ [ 3 , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
87
81
  )
88
82
 
89
83
  subject.load_create
@@ -102,10 +96,9 @@ module BeetleETL
102
96
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
103
97
  )
104
98
 
105
- insert_into(:stage__example_table).values(
106
- [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
107
- [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
108
- [ 1 , run_id , 'external_id' , 'UPDATE' , 'foo_id' , 33 , 'updated content' ] ,
99
+ insert_into(subject.stage_table_name.to_sym).values(
100
+ [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
101
+ [ 1 , 'external_id' , 'UPDATE' , 'foo_id' , 33 , 'updated content' ] ,
109
102
  )
110
103
 
111
104
  subject.load_update
@@ -124,10 +117,9 @@ module BeetleETL
124
117
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
125
118
  )
126
119
 
127
- insert_into(:stage__example_table).values(
128
- [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
129
- [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
130
- [ 1 , run_id , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
120
+ insert_into(subject.stage_table_name.to_sym).values(
121
+ [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
122
+ [ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
131
123
  )
132
124
 
133
125
  subject.load_delete
@@ -140,16 +132,15 @@ module BeetleETL
140
132
  end
141
133
 
142
134
  describe '#load_undelete' do
143
- it 'reinstates deleted records' do
135
+ it 'restores deleted records' do
144
136
  insert_into(:example_table).values(
145
137
  [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
146
138
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
147
139
  )
148
140
 
149
- insert_into(:stage__example_table).values(
150
- [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
151
- [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
152
- [ 1 , run_id , 'external_id' , 'UNDELETE' , 'foo_id' , 33 , 'updated content' ] ,
141
+ insert_into(subject.stage_table_name.to_sym).values(
142
+ [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
143
+ [ 1 , 'external_id' , 'UNDELETE' , 'foo_id' , 33 , 'updated content' ] ,
153
144
  )
154
145
 
155
146
  subject.load_undelete
@@ -160,5 +151,6 @@ module BeetleETL
160
151
  )
161
152
  end
162
153
  end
154
+
163
155
  end
164
156
  end
@@ -3,33 +3,37 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe MapRelations do
5
5
 
6
- let(:run_id) { 1 }
7
- let(:previous_run_id) { 5000 }
6
+ let(:dependee_a) { BeetleETL::Naming.stage_table_name(:dependee_a).to_sym }
7
+ let(:dependee_b) { BeetleETL::Naming.stage_table_name(:dependee_b).to_sym }
8
+
9
+ let(:relations) do
10
+ {
11
+ dependee_a_id: :dependee_a,
12
+ dependee_b_id: :dependee_b,
13
+ }
14
+ end
15
+
16
+ subject do
17
+ MapRelations.new(:depender, relations)
18
+ end
8
19
 
9
20
  before do
10
21
  BeetleETL.configure do |config|
11
- config.stage_schema = 'stage'
12
22
  config.external_source = 'my_source'
13
23
  config.database = test_database
14
24
  end
15
25
 
16
- allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
17
-
18
- test_database.create_schema(:stage)
19
- test_database.create_table(:stage__dependee_a) do
20
- Integer :import_run_id
26
+ test_database.create_table(dependee_a) do
21
27
  Integer :id
22
28
  String :external_id, size: 255
23
29
  end
24
30
 
25
- test_database.create_table(:stage__dependee_b) do
26
- Integer :import_run_id
31
+ test_database.create_table(dependee_b) do
27
32
  Integer :id
28
33
  String :external_id, size: 255
29
34
  end
30
35
 
31
- test_database.create_table(:stage__depender) do
32
- Integer :import_run_id
36
+ test_database.create_table(subject.stage_table_name.to_sym) do
33
37
  String :external_id, size: 255
34
38
 
35
39
  String :external_dependee_a_id
@@ -42,12 +46,7 @@ module BeetleETL
42
46
 
43
47
  describe '#depenencies' do
44
48
  it 'depends on Transform of the same table and AssignIds of its dependees' do
45
- relations = {
46
- dependee_a_id: :dependee_a,
47
- dependee_b_id: :dependee_b,
48
- }
49
-
50
- expect(MapRelations.new(:depender, relations).dependencies).to eql(
49
+ expect(subject.dependencies).to eql(
51
50
  [
52
51
  'dependee_a: AssignIds',
53
52
  'dependee_b: AssignIds',
@@ -59,34 +58,31 @@ module BeetleETL
59
58
 
60
59
  describe '#run' do
61
60
  it 'maps external foreign key references to id references ' do
62
- insert_into(:stage__dependee_a).values(
63
- [ :import_run_id , :id , :external_id ] ,
64
- [ run_id , 1 , 'a_id' ] ,
65
- [ previous_run_id , 2 , 'a_id' ] ,
61
+ insert_into(dependee_a).values(
62
+ [ :id , :external_id ] ,
63
+ [ 1 , 'a_id' ] ,
64
+ [ 2 , 'a_id' ] ,
66
65
  )
67
66
 
68
- insert_into(:stage__dependee_b).values(
69
- [ :import_run_id , :id , :external_id ] ,
70
- [ run_id , 26 , 'b_id' ] ,
67
+ insert_into(dependee_b).values(
68
+ [ :id , :external_id ] ,
69
+ [ 26 , 'b_id' ] ,
71
70
  )
72
71
 
73
- insert_into(:stage__depender).values(
74
- [ :import_run_id , :external_dependee_a_id , :external_dependee_b_id ] ,
75
- [ run_id , 'a_id' , 'b_id' ] ,
72
+ insert_into(subject.stage_table_name.to_sym).values(
73
+ [ :external_dependee_a_id , :external_dependee_b_id ] ,
74
+ [ 'a_id' , 'b_id' ] ,
76
75
  )
77
76
 
78
- relations = {
79
- dependee_a_id: :dependee_a,
80
- dependee_b_id: :dependee_b,
81
- }
82
77
 
83
- MapRelations.new(:depender, relations).run
78
+ subject.run
84
79
 
85
- expect(:stage__depender).to have_values(
86
- [ :import_run_id , :dependee_a_id , :dependee_b_id ] ,
87
- [ run_id , 1 , 26 ] ,
80
+ expect(subject.stage_table_name.to_sym).to have_values(
81
+ [ :dependee_a_id , :dependee_b_id ] ,
82
+ [ 1 , 26 ] ,
88
83
  )
89
84
  end
90
85
  end
86
+
91
87
  end
92
88
  end
@@ -6,22 +6,17 @@ require 'active_support/core_ext/numeric/time'
6
6
  module BeetleETL
7
7
  describe TableDiff do
8
8
 
9
- let(:run_id) { 1 }
10
9
  let(:external_source) { 'my_source' }
10
+
11
11
  subject { TableDiff.new(:example_table) }
12
12
 
13
13
  before do
14
14
  BeetleETL.configure do |config|
15
- config.stage_schema = 'stage'
16
15
  config.external_source = external_source
17
16
  config.database = test_database
18
17
  end
19
18
 
20
- allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
21
-
22
- test_database.create_schema(:stage)
23
- test_database.create_table(:stage__example_table) do
24
- Integer :import_run_id
19
+ test_database.create_table(subject.stage_table_name.to_sym) do
25
20
  String :external_id, size: 255
26
21
  String :transition, size: 20
27
22
 
@@ -68,18 +63,18 @@ module BeetleETL
68
63
  [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
69
64
  )
70
65
 
71
- insert_into(:stage__example_table).values(
72
- [ :import_run_id , :external_id ] ,
73
- [ run_id , 'created' ] ,
74
- [ run_id , 'existing' ] ,
66
+ insert_into(subject.stage_table_name.to_sym).values(
67
+ [ :external_id ] ,
68
+ [ 'created' ] ,
69
+ [ 'existing' ] ,
75
70
  )
76
71
 
77
72
  subject.transition_create
78
73
 
79
- expect(:stage__example_table).to have_values(
80
- [ :import_run_id , :external_id , :transition ] ,
81
- [ run_id , 'created' , 'CREATE' ] ,
82
- [ run_id , 'existing' , nil ] ,
74
+ insert_into(subject.stage_table_name.to_sym).values(
75
+ [ :external_id , :transition ] ,
76
+ [ 'created' , 'CREATE' ] ,
77
+ [ 'existing' , nil ] ,
83
78
  )
84
79
  end
85
80
  end
@@ -94,18 +89,18 @@ module BeetleETL
94
89
  [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
95
90
  )
96
91
 
97
- insert_into(:stage__example_table).values(
98
- [ :import_run_id , :external_id , :payload , :foo_id , :external_foo_id ] ,
99
- [ run_id , 'existing' , 'existing content' , 1 , 'ignored column' ] ,
100
- [ run_id , 'deleted' , 'deleted content' , 2 , 'ignored column' ] ,
92
+ insert_into(subject.stage_table_name.to_sym).values(
93
+ [ :external_id , :payload , :foo_id , :external_foo_id ] ,
94
+ [ 'existing' , 'existing content' , 1 , 'ignored column' ] ,
95
+ [ 'deleted' , 'deleted content' , 2 , 'ignored column' ] ,
101
96
  )
102
97
 
103
98
  subject.transition_keep
104
99
 
105
- expect(:stage__example_table).to have_values(
106
- [ :import_run_id , :external_id , :transition ] ,
107
- [ run_id , 'existing' , 'KEEP' ] ,
108
- [ run_id , 'deleted' , nil ] ,
100
+ expect(subject.stage_table_name.to_sym).to have_values(
101
+ [ :external_id , :transition ] ,
102
+ [ 'existing' , 'KEEP' ] ,
103
+ [ 'deleted' , nil ] ,
109
104
  )
110
105
  end
111
106
  end
@@ -121,26 +116,26 @@ module BeetleETL
121
116
  [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
122
117
  )
123
118
 
124
- insert_into(:stage__example_table).values(
125
- [ :import_run_id , :external_id , :payload , :foo_id , :external_foo_id ] ,
126
- [ run_id , 'existing_1' , 'updated content' , 1 , 'ignored_column' ] ,
127
- [ run_id , 'existing_2' , 'existing content' , 4 , 'ignored_column' ] ,
128
- [ run_id , 'deleted' , 'updated content' , 3 , 'ignored_column' ] ,
119
+ insert_into(subject.stage_table_name.to_sym).values(
120
+ [ :external_id , :payload , :foo_id , :external_foo_id ] ,
121
+ [ 'existing_1' , 'updated content' , 1 , 'ignored_column' ] ,
122
+ [ 'existing_2' , 'existing content' , 4 , 'ignored_column' ] ,
123
+ [ 'deleted' , 'updated content' , 3 , 'ignored_column' ] ,
129
124
  )
130
125
 
131
126
  subject.transition_update
132
127
 
133
- expect(:stage__example_table).to have_values(
134
- [ :import_run_id , :external_id , :transition ] ,
135
- [ run_id , 'existing_1' , 'UPDATE' ] ,
136
- [ run_id , 'existing_2' , 'UPDATE' ] ,
137
- [ run_id , 'deleted' , nil ] ,
128
+ expect(subject.stage_table_name.to_sym).to have_values(
129
+ [ :external_id , :transition ] ,
130
+ [ 'existing_1' , 'UPDATE' ] ,
131
+ [ 'existing_2' , 'UPDATE' ] ,
132
+ [ 'deleted' , nil ] ,
138
133
  )
139
134
  end
140
135
  end
141
136
 
142
137
  describe 'transition_delete' do
143
- it 'creates records with DELETE that no loger exist in the stage table' do
138
+ it 'creates records with DELETE that no loger exist in the stage table for the given run' do
144
139
  insert_into(:example_table).values(
145
140
  [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
146
141
  [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
@@ -149,9 +144,9 @@ module BeetleETL
149
144
 
150
145
  subject.transition_delete
151
146
 
152
- expect(:stage__example_table).to have_values(
153
- [ :import_run_id , :external_id , :transition ] ,
154
- [ run_id , 'existing' , 'DELETE' ] ,
147
+ expect(subject.stage_table_name.to_sym).to have_values(
148
+ [ :external_id , :transition ] ,
149
+ [ 'existing' , 'DELETE' ] ,
155
150
  )
156
151
  end
157
152
  end
@@ -164,20 +159,21 @@ module BeetleETL
164
159
  [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
165
160
  )
166
161
 
167
- insert_into(:stage__example_table).values(
168
- [ :import_run_id , :external_id , :payload , :foo_id , :external_foo_id ] ,
169
- [ run_id , 'existing' , 'updated content' , 1 , 'ignored_column' ] ,
170
- [ run_id , 'deleted' , 'updated content' , 2 , 'ignored_column' ] ,
162
+ insert_into(subject.stage_table_name.to_sym).values(
163
+ [ :external_id , :payload , :foo_id , :external_foo_id ] ,
164
+ [ 'existing' , 'updated content' , 1 , 'ignored_column' ] ,
165
+ [ 'deleted' , 'updated content' , 2 , 'ignored_column' ] ,
171
166
  )
172
167
 
173
168
  subject.transition_undelete
174
169
 
175
- expect(:stage__example_table).to have_values(
176
- [ :import_run_id , :external_id , :transition ] ,
177
- [ run_id , 'existing' , nil ] ,
178
- [ run_id , 'deleted' , 'UNDELETE' ] ,
170
+ expect(subject.stage_table_name.to_sym).to have_values(
171
+ [ :external_id , :transition ] ,
172
+ [ 'existing' , nil ] ,
173
+ [ 'deleted' , 'UNDELETE' ] ,
179
174
  )
180
175
  end
181
176
  end
177
+
182
178
  end
183
179
  end