beetle_etl 0.0.2 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +6 -0
  3. data/beetle_etl.gemspec +5 -6
  4. data/lib/beetle_etl.rb +7 -12
  5. data/lib/beetle_etl/dsl/dsl.rb +11 -12
  6. data/lib/beetle_etl/dsl/transformation.rb +10 -3
  7. data/lib/beetle_etl/dsl/transformation_loader.rb +13 -5
  8. data/lib/beetle_etl/import.rb +11 -4
  9. data/lib/beetle_etl/naming.rb +37 -0
  10. data/lib/beetle_etl/steps/assign_ids.rb +14 -38
  11. data/lib/beetle_etl/steps/create_stage.rb +59 -0
  12. data/lib/beetle_etl/steps/drop_stage.rb +15 -0
  13. data/lib/beetle_etl/steps/load.rb +46 -61
  14. data/lib/beetle_etl/steps/map_relations.rb +8 -14
  15. data/lib/beetle_etl/steps/step.rb +1 -8
  16. data/lib/beetle_etl/steps/table_diff.rb +68 -89
  17. data/lib/beetle_etl/steps/transform.rb +2 -4
  18. data/lib/beetle_etl/version.rb +1 -1
  19. data/spec/beetle_etl_spec.rb +3 -25
  20. data/spec/dsl/dsl_spec.rb +8 -15
  21. data/spec/dsl/transformation_loader_spec.rb +11 -4
  22. data/spec/dsl/transformation_spec.rb +40 -4
  23. data/spec/feature/example_schema.rb +2 -137
  24. data/spec/feature/example_transform.rb +13 -6
  25. data/spec/feature/feature_spec.rb +119 -18
  26. data/spec/steps/assign_ids_spec.rb +23 -28
  27. data/spec/steps/create_stage_spec.rb +89 -0
  28. data/spec/steps/load_spec.rb +15 -23
  29. data/spec/steps/map_relations_spec.rb +32 -36
  30. data/spec/steps/table_diff_spec.rb +41 -45
  31. data/spec/steps/transform_spec.rb +2 -0
  32. data/spec/{dependency_resolver_spec.rb → task_runner/dependency_resolver_spec.rb} +0 -0
  33. metadata +22 -36
  34. data/lib/beetle_etl/state.rb +0 -67
  35. data/spec/import_spec.rb +0 -7
  36. data/spec/state_spec.rb +0 -124
@@ -2,13 +2,11 @@ module ExampleSchema
2
2
 
3
3
  def create_tables
4
4
  create_source_tables
5
- create_stage_tables
6
5
  create_target_tables
7
6
  end
8
7
 
9
8
  def drop_tables
10
9
  drop_source_tables
11
- drop_stage_tables
12
10
  drop_target_tables
13
11
  end
14
12
 
@@ -18,118 +16,22 @@ module ExampleSchema
18
16
  test_database.create_table :source__Organisation do
19
17
  Integer :pkOrgId
20
18
  String :Name, size: 255
19
+ String :Adresse, size: 255
21
20
  String :Abteilung, size: 255
22
21
  end
23
-
24
- test_database.create_table :source__Person do
25
- Integer :pkPersID
26
- String :Vorname, size: 255
27
- String :Nachname, size: 255
28
- Integer :fkFirma
29
- Integer :fkAdresse
30
- Integer :fkTyp
31
- end
32
-
33
- test_database.create_table :source__Veranstaltung do
34
- Integer :pkVeranstaltungId
35
- Integer :fkOrganisation
36
- end
37
-
38
- test_database.create_table :source__Veranstaltungsbesuch do
39
- Integer :fkVeranstaltung
40
- Integer :fkBesucher
41
- end
42
22
  end
43
23
 
44
24
  def drop_source_tables
45
25
  test_database.drop_schema :source, cascade: true
46
26
  end
47
27
 
48
- def create_stage_tables
49
- test_database.create_schema :stage
50
-
51
- test_database.create_table :stage__import_runs do
52
- primary_key :id
53
- DateTime :started_at
54
- DateTime :finished_at
55
- String :state, size: 255
56
- end
57
-
58
- test_database.create_table :stage__organisations do
59
- Integer :id
60
- String :external_id, size: 255
61
- foreign_key :import_run_id, :stage__import_runs
62
- index [:external_id, :import_run_id]
63
- String :transition, size: 255
64
-
65
- String :name, size: 255
66
- end
67
-
68
- test_database.create_table :stage__departments do
69
- Integer :id
70
- String :external_id, size: 255
71
- foreign_key :import_run_id, :stage__import_runs
72
- index [:external_id, :import_run_id]
73
- String :transition, size: 255
74
-
75
- String :name, size: 255
76
-
77
- String :external_organisation_id, size: 255
78
- Integer :organisation_id
79
-
80
- end
81
-
82
- test_database.create_table :stage__attendees do
83
- Integer :id
84
- String :external_id, size: 255
85
- foreign_key :import_run_id, :stage__import_runs
86
- index [:external_id, :import_run_id]
87
- String :transition, size: 255
88
-
89
- String :first_name, size: 255
90
- String :last_name, size: 255
91
- end
92
-
93
- test_database.create_table :stage__events do
94
- Integer :id
95
- String :external_id, size: 255
96
- foreign_key :import_run_id, :stage__import_runs
97
- index [:external_id, :import_run_id]
98
- String :transition, size: 255
99
-
100
- String :name, size: 255
101
- DateTime :starts_at
102
- DateTime :ends_at
103
-
104
- String :external_organisations_id, size: 255
105
- Integer :organisation_id
106
- end
107
-
108
- test_database.create_table :stage__attendees_events do
109
- Integer :id
110
- String :external_id, size: 255
111
- foreign_key :import_run_id, :stage__import_runs
112
- index [:external_id, :import_run_id]
113
- String :transition, size: 255
114
-
115
- String :external_attendee_id, size: 255
116
- Integer :attendee_id
117
-
118
- String :external_event_id, size: 255
119
- Integer :event_id
120
- end
121
- end
122
-
123
- def drop_stage_tables
124
- test_database.drop_schema :stage, cascade: true
125
- end
126
-
127
28
  def create_target_tables
128
29
  test_database.create_table :organisations do
129
30
  primary_key :id
130
31
  String :external_id, size: 255
131
32
  String :external_source, size: 255
132
33
  String :name, size: 255
34
+ String :address, size: 255
133
35
  DateTime :created_at
134
36
  DateTime :updated_at
135
37
  DateTime :deleted_at
@@ -145,46 +47,9 @@ module ExampleSchema
145
47
  DateTime :updated_at
146
48
  DateTime :deleted_at
147
49
  end
148
-
149
- test_database.create_table :attendees do
150
- primary_key :id
151
- String :external_id, size: 255
152
- String :external_source, size: 255
153
- String :first_name, size: 255
154
- String :last_name, size: 255
155
- DateTime :created_at
156
- DateTime :updated_at
157
- DateTime :deleted_at
158
- end
159
-
160
- test_database.create_table :events do
161
- primary_key :id
162
- String :external_id, size: 255
163
- String :external_source, size: 255
164
- String :name, size: 255
165
- DateTime :starts_at
166
- DateTime :ends_at
167
- foreign_key :organisation, :organisations
168
- DateTime :created_at
169
- DateTime :updated_at
170
- DateTime :deleted_at
171
- end
172
-
173
- test_database.create_table :attendees_events do
174
- foreign_key :attendee_id, :attendees, null: false
175
- foreign_key :event_id, :events, null: false
176
- primary_key [:attendee_id, :event_id]
177
- index [:attendee_id, :event_id]
178
- DateTime :created_at
179
- DateTime :updated_at
180
- DateTime :deleted_at
181
- end
182
50
  end
183
51
 
184
52
  def drop_target_tables
185
- test_database.drop_table :attendees_events
186
- test_database.drop_table :events
187
- test_database.drop_table :attendees
188
53
  test_database.drop_table :departments
189
54
  test_database.drop_table :organisations
190
55
  end
@@ -1,37 +1,44 @@
1
+ helpers do
2
+ def source_schema_helper
3
+ 'source'
4
+ end
5
+ end
6
+
1
7
  import :organisations do
8
+ columns :name, :address
9
+
2
10
  query <<-SQL
3
11
  INSERT INTO #{stage_table} (
4
12
  external_id,
5
- import_run_id,
13
+ address,
6
14
  name
7
15
  )
8
16
 
9
17
  SELECT DISTINCT
10
18
  o."Name",
11
- #{import_run_id},
19
+ o."Adresse",
12
20
  o."Name"
13
21
 
14
- FROM source."Organisation" o
22
+ FROM #{source_schema_helper}."Organisation" o
15
23
  SQL
16
24
  end
17
25
 
18
26
  import :departments do
27
+ columns :name
19
28
  references :organisations, on: :organisation_id
20
29
 
21
30
  query <<-SQL
22
31
  INSERT INTO #{stage_table} (
23
32
  external_id,
24
- import_run_id,
25
33
  name,
26
34
  external_organisation_id
27
35
  )
28
36
 
29
37
  SELECT
30
38
  #{combined_key('o."Name"', 'o."pkOrgId"')},
31
- #{import_run_id},
32
39
  o."Abteilung",
33
40
  o."Name"
34
41
 
35
- FROM source."Organisation" o
42
+ FROM #{source_schema_helper}."Organisation" o
36
43
  SQL
37
44
  end
@@ -1,48 +1,149 @@
1
1
  require 'spec_helper'
2
+ require 'timecop'
3
+
2
4
  require_relative 'example_schema'
3
5
  require 'yaml'
4
6
 
5
7
  require 'active_support/core_ext/date/calculations'
6
8
  require 'active_support/core_ext/numeric/time'
7
9
 
10
+ Thread::abort_on_exception = true
11
+
8
12
  describe BeetleETL do
9
13
 
10
14
  include ExampleSchema
11
15
 
12
- let!(:now) { Time.new(2014, 07, 17, 16, 12).beginning_of_day }
13
- before { allow(Time).to receive(:now) { now } }
16
+ let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
17
+ let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
18
+ let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
14
19
 
15
- before { create_tables }
16
- after { drop_tables }
20
+ before :each do
21
+ create_tables
17
22
 
18
- it 'is a working', :feature do
19
- insert_into(:source__Organisation).values(
20
- [ :pkOrgId , :Name , :Abteilung ] ,
21
- [ 1 , 'Apple' , 'iPhone' ] ,
22
- [ 2 , 'Apple' , 'MacBook' ] ,
23
- )
23
+ database_config_path = File.expand_path('../support/database.yml', File.dirname(__FILE__))
24
+ database_config = YAML.load(File.read(database_config_path))
24
25
 
25
26
  BeetleETL.configure do |config|
26
27
  config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
27
- config.database = test_database
28
+ config.database_config = database_config
28
29
  config.external_source = 'source_name'
29
- config.stage_schema = 'stage'
30
30
  end
31
+ end
31
32
 
33
+ after do
34
+ drop_tables
35
+ end
32
36
 
33
- BeetleETL.import
37
+ it 'performs all possible transitions', :feature do
38
+ # create, keep, update, delete, undelete
39
+
40
+ import1
41
+ import2
42
+ import3
43
+ end
44
+
45
+ def import1
46
+ # create
47
+ insert_into(:source__Organisation).values(
48
+ [ :pkOrgId , :Name , :Adresse , :Abteilung ] ,
49
+ [ 1 , 'Apple' , 'Apple Street' , 'iPhone' ] ,
50
+ [ 2 , 'Apple' , 'Apple Street' , 'MacBook' ] ,
51
+ [ 3 , 'Google' , 'Google Street' , 'Gmail' ] ,
52
+ [ 4 , 'Audi' , 'Audi Street' , 'A4' ] ,
53
+ )
34
54
 
55
+ Timecop.freeze(time1) do
56
+ BeetleETL.import
57
+ end
35
58
 
36
59
  expect(:organisations).to have_values(
37
- [ :id , :external_id , :external_source , :name , :created_at , :updated_at , :deleted_at ] ,
38
- [ 1 , 'Apple' , 'source_name' , 'Apple' , now , now , nil ]
60
+ [ :id , :external_id , :external_source , :name , :address , :created_at , :updated_at , :deleted_at ] ,
61
+ [ organisation_id('Apple') , 'Apple' , 'source_name' , 'Apple' , 'Apple Street' , time1 , time1 , nil ] ,
62
+ [ organisation_id('Google') , 'Google' , 'source_name' , 'Google' , 'Google Street' , time1 , time1 , nil ] ,
63
+ [ organisation_id('Audi') , 'Audi' , 'source_name' , 'Audi' , 'Audi Street' , time1 , time1 , nil ]
39
64
  )
40
65
 
41
66
  expect(:departments).to have_values(
42
- [ :id , :external_id , :external_source , :name , :organisation_id , :created_at , :updated_at , :deleted_at ] ,
43
- [ 1 , '[Apple,1]' , 'source_name' , 'iPhone' , 1 , now , now , nil ] ,
44
- [ 2 , '[Apple,2]' , 'source_name' , 'MacBook' , 1 , now , now , nil ] ,
67
+ [ :id , :external_id , :organisation_id , :external_source , :name , :created_at , :updated_at , :deleted_at ] ,
68
+ [ department_id('[Apple,1]') , '[Apple,1]' , organisation_id('Apple') , 'source_name' , 'iPhone' , time1 , time1 , nil ] ,
69
+ [ department_id('[Apple,2]') , '[Apple,2]' , organisation_id('Apple') , 'source_name' , 'MacBook' , time1 , time1 , nil ] ,
70
+ [ department_id('[Google,3]') , '[Google,3]' , organisation_id('Google') , 'source_name' , 'Gmail' , time1 , time1 , nil ] ,
71
+ [ department_id('[Audi,4]') , '[Audi,4]' , organisation_id('Audi') , 'source_name' , 'A4' , time1 , time1 , nil ] ,
72
+ )
73
+
74
+ test_database[:source__Organisation].truncate
75
+ end
76
+
77
+ def import2
78
+ # keep, update, delete
79
+ insert_into(:source__Organisation).values(
80
+ [ :pkOrgId , :Name , :Adresse , :Abteilung ] ,
81
+ [ 1 , 'Apple' , 'Apple Street' , 'iPhone' ] ,
82
+ [ 2 , 'Apple' , 'Apple Street' , 'MacBook' ] ,
83
+ [ 3 , 'Google' , 'NEW Google Street' , 'Google+' ] ,
84
+ # [ 4 , 'Audi' , 'Audi Street' , 'A4' ] ,
85
+ )
86
+
87
+ Timecop.freeze(time2) do
88
+ BeetleETL.import
89
+ end
90
+
91
+ expect(:organisations).to have_values(
92
+ [ :id , :external_id , :external_source , :name , :address , :created_at , :updated_at , :deleted_at ] ,
93
+ [ organisation_id('Apple') , 'Apple' , 'source_name' , 'Apple' , 'Apple Street' , time1 , time1 , nil ] ,
94
+ [ organisation_id('Google') , 'Google' , 'source_name' , 'Google' , 'NEW Google Street' , time1 , time2 , nil ] ,
95
+ [ organisation_id('Audi') , 'Audi' , 'source_name' , 'Audi' , 'Audi Street' , time1 , time2 , time2 ]
45
96
  )
97
+
98
+ expect(:departments).to have_values(
99
+ [ :id , :external_id , :organisation_id , :external_source , :name , :created_at , :updated_at , :deleted_at ] ,
100
+ [ department_id('[Apple,1]') , '[Apple,1]' , organisation_id('Apple') , 'source_name' , 'iPhone' , time1 , time1 , nil ] ,
101
+ [ department_id('[Apple,2]') , '[Apple,2]' , organisation_id('Apple') , 'source_name' , 'MacBook' , time1 , time1 , nil ] ,
102
+ [ department_id('[Google,3]') , '[Google,3]' , organisation_id('Google') , 'source_name' , 'Google+' , time1 , time2 , nil ] ,
103
+ [ department_id('[Audi,4]') , '[Audi,4]' , organisation_id('Audi') , 'source_name' , 'A4' , time1 , time2 , time2 ] ,
104
+ )
105
+
106
+ test_database[:source__Organisation].truncate
107
+ end
108
+
109
+ def import3
110
+ # undelete with update
111
+ insert_into(:source__Organisation).values(
112
+ [ :pkOrgId , :Name , :Adresse , :Abteilung ] ,
113
+ [ 1 , 'Apple' , 'Apple Street' , 'iPhone' ] ,
114
+ [ 2 , 'Apple' , 'Apple Street' , 'MacBook' ] ,
115
+ [ 3 , 'Google' , 'NEW Google Street' , 'Google+' ] ,
116
+ [ 4 , 'Audi' , 'NEW Audi Street' , 'A4' ] ,
117
+ )
118
+
119
+ Timecop.freeze(time3) do
120
+ BeetleETL.import
121
+ end
122
+
123
+ expect(:organisations).to have_values(
124
+ [ :id , :external_id , :external_source , :name , :address , :created_at , :updated_at , :deleted_at ] ,
125
+ [ organisation_id('Apple') , 'Apple' , 'source_name' , 'Apple' , 'Apple Street' , time1 , time1 , nil ] ,
126
+ [ organisation_id('Google') , 'Google' , 'source_name' , 'Google' , 'NEW Google Street' , time1 , time2 , nil ] ,
127
+ [ organisation_id('Audi') , 'Audi' , 'source_name' , 'Audi' , 'NEW Audi Street' , time1 , time3 , nil ]
128
+ )
129
+
130
+ expect(:departments).to have_values(
131
+ [ :id , :external_id , :organisation_id , :external_source , :name , :created_at , :updated_at , :deleted_at ] ,
132
+ [ department_id('[Apple,1]') , '[Apple,1]' , organisation_id('Apple') , 'source_name' , 'iPhone' , time1 , time1 , nil ] ,
133
+ [ department_id('[Apple,2]') , '[Apple,2]' , organisation_id('Apple') , 'source_name' , 'MacBook' , time1 , time1 , nil ] ,
134
+ [ department_id('[Google,3]') , '[Google,3]' , organisation_id('Google') , 'source_name' , 'Google+' , time1 , time2 , nil ] ,
135
+ [ department_id('[Audi,4]') , '[Audi,4]' , organisation_id('Audi') , 'source_name' , 'A4' , time1 , time3 , nil ] ,
136
+ )
137
+
138
+ test_database[:source__Organisation].truncate
139
+ end
140
+
141
+ def organisation_id(external_id)
142
+ test_database[:organisations].first(external_id: external_id)[:id]
143
+ end
144
+
145
+ def department_id(external_id)
146
+ test_database[:departments].first(external_id: external_id)[:id]
46
147
  end
47
148
 
48
149
  end
@@ -3,7 +3,6 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe AssignIds do
5
5
 
6
- let(:run_id) { 1 }
7
6
  let(:external_source) { 'my_source' }
8
7
  subject { AssignIds.new(:example_table) }
9
8
 
@@ -14,12 +13,8 @@ module BeetleETL
14
13
  config.database = test_database
15
14
  end
16
15
 
17
- allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
18
-
19
- test_database.create_schema(:stage)
20
- test_database.create_table(:stage__example_table) do
16
+ test_database.create_table(subject.stage_table_name.to_sym) do
21
17
  Integer :id
22
- Integer :import_run_id
23
18
  String :external_id, size: 255
24
19
  String :transition, size: 255
25
20
  end
@@ -55,18 +50,18 @@ module BeetleETL
55
50
  [ 'keep_id' , external_source ] ,
56
51
  )
57
52
 
58
- insert_into(:stage__example_table).values(
59
- [ :import_run_id , :external_id , :transition ] ,
60
- [ run_id , 'create_id' , 'CREATE' ] ,
61
- [ run_id , 'keep_id' , 'KEEP' ] ,
53
+ insert_into(subject.stage_table_name.to_sym).values(
54
+ [ :external_id , :transition ] ,
55
+ [ 'create_id' , 'CREATE' ] ,
56
+ [ 'keep_id' , 'KEEP' ] ,
62
57
  )
63
58
 
64
59
  subject.assign_new_ids
65
60
 
66
- expect(:stage__example_table).to have_values(
67
- [ :id , :import_run_id , :external_id , :transition ] ,
68
- [ 2 , run_id , 'create_id' , 'CREATE' ] ,
69
- [ nil , run_id , 'keep_id' , 'KEEP' ] ,
61
+ expect(subject.stage_table_name.to_sym).to have_values(
62
+ [ :id , :external_id , :transition ] ,
63
+ [ 2 , 'create_id' , 'CREATE' ] ,
64
+ [ nil , 'keep_id' , 'KEEP' ] ,
70
65
  )
71
66
  end
72
67
  end
@@ -81,24 +76,24 @@ module BeetleETL
81
76
  [ 'undelete_id' , external_source ] ,
82
77
  )
83
78
 
84
- insert_into(:stage__example_table).values(
85
- [ :import_run_id , :external_id , :transition ] ,
86
- [ run_id , 'create_id' , 'CREATE' ] ,
87
- [ run_id , 'keep_id' , 'KEEP' ] ,
88
- [ run_id , 'update_id' , 'UPDATE' ] ,
89
- [ run_id , 'delete_id' , 'DELETE' ] ,
90
- [ run_id , 'undelete_id' , 'UNDELETE' ] ,
79
+ insert_into(subject.stage_table_name.to_sym).values(
80
+ [ :external_id , :transition ] ,
81
+ [ 'create_id' , 'CREATE' ] ,
82
+ [ 'keep_id' , 'KEEP' ] ,
83
+ [ 'update_id' , 'UPDATE' ] ,
84
+ [ 'delete_id' , 'DELETE' ] ,
85
+ [ 'undelete_id' , 'UNDELETE' ] ,
91
86
  )
92
87
 
93
88
  subject.map_existing_ids
94
89
 
95
- expect(:stage__example_table).to have_values(
96
- [ :id , :import_run_id , :external_id , :transition ] ,
97
- [ nil , run_id , 'create_id' , 'CREATE' ] ,
98
- [ 1 , run_id , 'keep_id' , 'KEEP' ] ,
99
- [ 2 , run_id , 'update_id' , 'UPDATE' ] ,
100
- [ 3 , run_id , 'delete_id' , 'DELETE' ] ,
101
- [ 4 , run_id , 'undelete_id' , 'UNDELETE' ] ,
90
+ expect(subject.stage_table_name.to_sym).to have_values(
91
+ [ :id , :external_id , :transition ] ,
92
+ [ nil , 'create_id' , 'CREATE' ] ,
93
+ [ 1 , 'keep_id' , 'KEEP' ] ,
94
+ [ 2 , 'update_id' , 'UPDATE' ] ,
95
+ [ 3 , 'delete_id' , 'DELETE' ] ,
96
+ [ 4 , 'undelete_id' , 'UNDELETE' ] ,
102
97
  )
103
98
  end
104
99
  end