beetle_etl 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.travis.yml +12 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +31 -0
  7. data/Rakefile +5 -0
  8. data/beetle_etl.gemspec +30 -0
  9. data/lib/beetle_etl.rb +85 -0
  10. data/lib/beetle_etl/dsl/dsl.rb +37 -0
  11. data/lib/beetle_etl/dsl/transformation.rb +26 -0
  12. data/lib/beetle_etl/dsl/transformation_loader.rb +22 -0
  13. data/lib/beetle_etl/import.rb +37 -0
  14. data/lib/beetle_etl/state.rb +67 -0
  15. data/lib/beetle_etl/steps/assign_ids.rb +54 -0
  16. data/lib/beetle_etl/steps/load.rb +108 -0
  17. data/lib/beetle_etl/steps/map_relations.rb +31 -0
  18. data/lib/beetle_etl/steps/step.rb +42 -0
  19. data/lib/beetle_etl/steps/table_diff.rb +155 -0
  20. data/lib/beetle_etl/steps/transform.rb +22 -0
  21. data/lib/beetle_etl/task_runner/dependency_resolver.rb +39 -0
  22. data/lib/beetle_etl/task_runner/task_runner.rb +64 -0
  23. data/lib/beetle_etl/version.rb +3 -0
  24. data/script/postgres +12 -0
  25. data/spec/beetle_etl_spec.rb +70 -0
  26. data/spec/dependency_resolver_spec.rb +57 -0
  27. data/spec/dsl/dsl_spec.rb +44 -0
  28. data/spec/dsl/transformation_loader_spec.rb +51 -0
  29. data/spec/dsl/transformation_spec.rb +54 -0
  30. data/spec/feature/example_schema.rb +192 -0
  31. data/spec/feature/example_transform.rb +37 -0
  32. data/spec/feature/feature_spec.rb +48 -0
  33. data/spec/import_spec.rb +7 -0
  34. data/spec/spec_helper.rb +25 -0
  35. data/spec/state_spec.rb +124 -0
  36. data/spec/steps/assign_ids_spec.rb +107 -0
  37. data/spec/steps/load_spec.rb +148 -0
  38. data/spec/steps/map_relations_spec.rb +92 -0
  39. data/spec/steps/step_spec.rb +37 -0
  40. data/spec/steps/table_diff_spec.rb +183 -0
  41. data/spec/steps/transform_spec.rb +34 -0
  42. data/spec/support/database.yml.example +9 -0
  43. data/spec/support/database.yml.travis +4 -0
  44. data/spec/support/database_helpers.rb +58 -0
  45. metadata +220 -0
@@ -0,0 +1,37 @@
1
+ import :organisations do
2
+ query <<-SQL
3
+ INSERT INTO #{stage_table} (
4
+ external_id,
5
+ import_run_id,
6
+ name
7
+ )
8
+
9
+ SELECT DISTINCT
10
+ o."Name",
11
+ #{import_run_id},
12
+ o."Name"
13
+
14
+ FROM source."Organisation" o
15
+ SQL
16
+ end
17
+
18
+ import :departments do
19
+ references :organisations, on: :organisation_id
20
+
21
+ query <<-SQL
22
+ INSERT INTO #{stage_table} (
23
+ external_id,
24
+ import_run_id,
25
+ name,
26
+ external_organisation_id
27
+ )
28
+
29
+ SELECT
30
+ #{combined_key('o."Name"', 'o."pkOrgId"')},
31
+ #{import_run_id},
32
+ o."Abteilung",
33
+ o."Name"
34
+
35
+ FROM source."Organisation" o
36
+ SQL
37
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+ require_relative 'example_schema'
3
+ require 'yaml'
4
+
5
+ require 'active_support/core_ext/date/calculations'
6
+ require 'active_support/core_ext/numeric/time'
7
+
8
+ describe BeetleETL do
9
+
10
+ include ExampleSchema
11
+
12
+ let!(:now) { Time.new(2014, 07, 17, 16, 12).beginning_of_day }
13
+ before { allow(Time).to receive(:now) { now } }
14
+
15
+ before { create_tables }
16
+ after { drop_tables }
17
+
18
+ it 'is a working', :feature do
19
+ insert_into(:source__Organisation).values(
20
+ [ :pkOrgId , :Name , :Abteilung ] ,
21
+ [ 1 , 'Apple' , 'iPhone' ] ,
22
+ [ 2 , 'Apple' , 'MacBook' ] ,
23
+ )
24
+
25
+ BeetleETL.configure do |config|
26
+ config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
27
+ config.database = test_database
28
+ config.external_source = 'source_name'
29
+ config.stage_schema = 'stage'
30
+ end
31
+
32
+
33
+ BeetleETL.import
34
+
35
+
36
+ expect(:organisations).to have_values(
37
+ [ :id , :external_id , :external_source , :name , :created_at , :updated_at , :deleted_at ] ,
38
+ [ 1 , 'Apple' , 'source_name' , 'Apple' , now , now , nil ]
39
+ )
40
+
41
+ expect(:departments).to have_values(
42
+ [ :id , :external_id , :external_source , :name , :organisation_id , :created_at , :updated_at , :deleted_at ] ,
43
+ [ 1 , '[Apple,1]' , 'source_name' , 'iPhone' , 1 , now , now , nil ] ,
44
+ [ 2 , '[Apple,2]' , 'source_name' , 'MacBook' , 1 , now , now , nil ] ,
45
+ )
46
+ end
47
+
48
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe Import do
5
+
6
+ end
7
+ end
@@ -0,0 +1,25 @@
1
+ require "codeclimate-test-reporter"
2
+ CodeClimate::TestReporter.start
3
+
4
+ require_relative '../lib/beetle_etl.rb'
5
+ require_relative 'support/database_helpers.rb'
6
+
7
+ RSpec.configure do |config|
8
+
9
+ config.include SpecSupport::DatabaseHelpers
10
+ config.backtrace_exclusion_patterns = [/rspec-core/]
11
+
12
+ config.around(:each) do |example|
13
+ BeetleETL.reset
14
+ if example.metadata[:feature]
15
+ example.run
16
+ else
17
+ test_database.transaction do
18
+ example.run
19
+ raise Sequel::Error::Rollback
20
+ end
21
+ end
22
+ end
23
+
24
+ end
25
+
@@ -0,0 +1,124 @@
1
+ require 'spec_helper'
2
+
3
+ require 'active_support/core_ext/date/calculations'
4
+ require 'active_support/core_ext/numeric/time'
5
+
6
+ module BeetleETL
7
+ describe State do
8
+ subject { State.new }
9
+
10
+ before do
11
+ BeetleETL.configure do |config|
12
+ config.stage_schema = 'stage'
13
+ config.database = test_database
14
+ end
15
+
16
+ test_database.create_schema 'stage'
17
+ test_database.create_table :stage__import_runs do
18
+ primary_key :id
19
+ String :state, size: 10, null: false
20
+ DateTime :started_at, null: false
21
+ DateTime :finished_at
22
+ end
23
+ end
24
+
25
+ describe '#start_import' do
26
+ let(:now) { 1.minute.ago.beginning_of_day }
27
+
28
+ it 'registers a new import in the import_runs table' do
29
+ allow(subject).to receive(:now) { now }
30
+
31
+ subject.start_import
32
+
33
+ expect(:stage__import_runs).to have_values(
34
+ [ :id , :state , :started_at , :finished_at ] ,
35
+ [ 1 , 'RUNNING' , now , nil ]
36
+ )
37
+ end
38
+
39
+ it 'raises an exception if there is alreay an import marked as running' do
40
+ insert_into(:stage__import_runs).values(
41
+ [ :id , :state , :started_at , :finished_at ] ,
42
+ [ 1 , 'RUNNING' , now , nil ]
43
+ )
44
+
45
+ expect { subject.start_import }.to raise_exception(BeetleETL::ImportAleadyRunning)
46
+ end
47
+ end
48
+
49
+ context 'run ids' do
50
+ before do
51
+ insert_into(:stage__import_runs).values(
52
+ [ :state , :started_at , :finished_at ] ,
53
+ [ 'FAILED' , 8.days.ago , 7.days.ago ] ,
54
+ [ 'SUCCEEDED' , 6.days.ago , 5.day.ago ] ,
55
+ [ 'SUCCEEDED' , 4.days.ago , 3.days.ago ] ,
56
+ [ 'FAILED' , 2.days.ago , 1.day.ago ] ,
57
+ )
58
+ end
59
+
60
+ describe '#run_id' do
61
+ it 'returns the import‘s id after it has been started' do
62
+ subject.start_import
63
+ expect(subject.run_id).to eql(5)
64
+ end
65
+
66
+ it 'raises an exception when the import has not been started' do
67
+ expect { subject.run_id }.to raise_exception(BeetleETL::ImportNotRunning)
68
+ end
69
+ end
70
+
71
+ describe '#last_run_id' do
72
+ it 'returns nil if there is no last successful import' do
73
+ test_database[:stage__import_runs].update(state: 'FAILED')
74
+
75
+ subject.start_import
76
+ expect(subject.last_run_id).to be_nil
77
+ end
78
+
79
+ it 'returns the id of the last successul import' do
80
+ subject.start_import
81
+ expect(subject.last_run_id).to eql(3)
82
+ end
83
+ end
84
+ end
85
+
86
+ context 'marking imports' do
87
+ let(:now) { 1.minute.ago.beginning_of_day }
88
+ let(:one_day_ago) { 1.day.ago.beginning_of_day }
89
+
90
+ before do
91
+ insert_into(:stage__import_runs).values(
92
+ [ :state , :started_at , :finished_at ] ,
93
+ [ 'SUCCEEDED' , 2.days.ago , one_day_ago ] ,
94
+ )
95
+ allow(subject).to receive(:now) { now }
96
+ subject.start_import
97
+ end
98
+
99
+ describe '#mark_as_failed' do
100
+ it 'marks the current import as FAILED' do
101
+ subject.mark_as_failed
102
+
103
+ expect(:stage__import_runs).to have_values(
104
+ [ :id , :state , :finished_at ] ,
105
+ [ 1 , 'SUCCEEDED' , one_day_ago ] ,
106
+ [ 2 , 'FAILED' , now ] ,
107
+ )
108
+ end
109
+ end
110
+
111
+ describe '#mark_as_succeeded' do
112
+ it 'marks the current import as SUCCEEDED' do
113
+ subject.mark_as_succeeded
114
+
115
+ expect(:stage__import_runs).to have_values(
116
+ [ :id , :state , :finished_at ] ,
117
+ [ 1 , 'SUCCEEDED' , one_day_ago ] ,
118
+ [ 2 , 'SUCCEEDED' , now ] ,
119
+ )
120
+ end
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,107 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe AssignIds do
5
+
6
+ let(:run_id) { 1 }
7
+ let(:external_source) { 'my_source' }
8
+ subject { AssignIds.new(:example_table) }
9
+
10
+ before do
11
+ BeetleETL.configure do |config|
12
+ config.stage_schema = 'stage'
13
+ config.external_source = external_source
14
+ config.database = test_database
15
+ end
16
+
17
+ allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
18
+
19
+ test_database.create_schema(:stage)
20
+ test_database.create_table(:stage__example_table) do
21
+ Integer :id
22
+ Integer :import_run_id
23
+ String :external_id, size: 255
24
+ String :transition, size: 255
25
+ end
26
+
27
+ test_database.create_table(:example_table) do
28
+ primary_key :id
29
+ String :external_id, size: 255
30
+ String :external_source, size: 255
31
+ end
32
+
33
+ end
34
+
35
+ describe '#dependencies' do
36
+ it 'depends on TableDiff of the same table' do
37
+ expect(subject.dependencies).to eql(['example_table: TableDiff'].to_set)
38
+ end
39
+ end
40
+
41
+ describe '#run' do
42
+ it 'runs all transitions' do
43
+ %i(assign_new_ids map_existing_ids).each do |method|
44
+ expect(subject).to receive(method)
45
+ end
46
+
47
+ subject.run
48
+ end
49
+ end
50
+
51
+ describe '#assign_new_ids' do
52
+ it 'generates new ids for newly created records' do
53
+ insert_into(:example_table).values(
54
+ [ :external_id , :external_source ] ,
55
+ [ 'keep_id' , external_source ] ,
56
+ )
57
+
58
+ insert_into(:stage__example_table).values(
59
+ [ :import_run_id , :external_id , :transition ] ,
60
+ [ run_id , 'create_id' , 'CREATE' ] ,
61
+ [ run_id , 'keep_id' , 'KEEP' ] ,
62
+ )
63
+
64
+ subject.assign_new_ids
65
+
66
+ expect(:stage__example_table).to have_values(
67
+ [ :id , :import_run_id , :external_id , :transition ] ,
68
+ [ 2 , run_id , 'create_id' , 'CREATE' ] ,
69
+ [ nil , run_id , 'keep_id' , 'KEEP' ] ,
70
+ )
71
+ end
72
+ end
73
+
74
+ describe '#map_existing_ids' do
75
+ it 'assigns ids for existing records by their external id' do
76
+ insert_into(:example_table).values(
77
+ [ :external_id , :external_source ] ,
78
+ [ 'keep_id' , external_source ] ,
79
+ [ 'update_id' , external_source ] ,
80
+ [ 'delete_id' , external_source ] ,
81
+ [ 'undelete_id' , external_source ] ,
82
+ )
83
+
84
+ insert_into(:stage__example_table).values(
85
+ [ :import_run_id , :external_id , :transition ] ,
86
+ [ run_id , 'create_id' , 'CREATE' ] ,
87
+ [ run_id , 'keep_id' , 'KEEP' ] ,
88
+ [ run_id , 'update_id' , 'UPDATE' ] ,
89
+ [ run_id , 'delete_id' , 'DELETE' ] ,
90
+ [ run_id , 'undelete_id' , 'UNDELETE' ] ,
91
+ )
92
+
93
+ subject.map_existing_ids
94
+
95
+ expect(:stage__example_table).to have_values(
96
+ [ :id , :import_run_id , :external_id , :transition ] ,
97
+ [ nil , run_id , 'create_id' , 'CREATE' ] ,
98
+ [ 1 , run_id , 'keep_id' , 'KEEP' ] ,
99
+ [ 2 , run_id , 'update_id' , 'UPDATE' ] ,
100
+ [ 3 , run_id , 'delete_id' , 'DELETE' ] ,
101
+ [ 4 , run_id , 'undelete_id' , 'UNDELETE' ] ,
102
+ )
103
+ end
104
+ end
105
+
106
+ end
107
+ end
@@ -0,0 +1,148 @@
1
+ require 'spec_helper'
2
+
3
+ require 'active_support/core_ext/date/calculations'
4
+ require 'active_support/core_ext/numeric/time'
5
+
6
+ module BeetleETL
7
+ describe Load do
8
+
9
+ let(:run_id) { 1 }
10
+ let(:old_run_id) { 5000 }
11
+ let(:external_source) { 'my_source' }
12
+
13
+ let(:now) { Time.now.beginning_of_day }
14
+ let(:yesterday) { 1.day.ago.beginning_of_day }
15
+
16
+ subject { Load.new(:example_table) }
17
+
18
+ before do
19
+ BeetleETL.configure do |config|
20
+ config.stage_schema = 'stage'
21
+ config.external_source = external_source
22
+ config.database = test_database
23
+ end
24
+
25
+ allow(BeetleETL).to receive(:state) { double(:state, run_id: run_id) }
26
+ allow(subject).to receive(:now) { now }
27
+
28
+ test_database.create_schema(:stage)
29
+ test_database.create_table(:stage__example_table) do
30
+ Integer :import_run_id
31
+ Integer :id
32
+ String :external_id, size: 255
33
+ String :transition, size: 20
34
+
35
+ String :external_foo_id, size: 255
36
+ Integer :foo_id
37
+
38
+ String :payload, size: 255
39
+ end
40
+
41
+ test_database.create_table(:example_table) do
42
+ primary_key :id
43
+ String :external_id, size: 255
44
+ String :external_source, size: 255
45
+ DateTime :created_at
46
+ DateTime :updated_at
47
+ DateTime :deleted_at
48
+
49
+ String :payload, size: 255
50
+ String :ignored_attribute, size: 255
51
+ Integer :foo_id
52
+ end
53
+ end
54
+
55
+ describe '#run' do
56
+ it 'runs all load steps' do
57
+ %w(create update delete undelete).each do |transition|
58
+ expect(subject).to receive(:"load_#{transition}")
59
+ end
60
+
61
+ subject.run
62
+ end
63
+ end
64
+
65
+ describe '#load_create' do
66
+ it 'loads records into the public table' do
67
+ insert_into(:stage__example_table).values(
68
+ [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
69
+ [ 3 , old_run_id , 'external_id' , 'CREATE' , 'foo_id' , 999 , 'some content' ] ,
70
+ [ 3 , run_id , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
71
+ )
72
+
73
+ subject.load_create
74
+
75
+ expect(:example_table).to have_values(
76
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
77
+ [ 3 , 'external_id' , external_source , 22 , now , now , nil , 'content' ] ,
78
+ )
79
+ end
80
+ end
81
+
82
+ describe '#load_update' do
83
+ it 'updates existing records' do
84
+ insert_into(:example_table).values(
85
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
86
+ [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
87
+ )
88
+
89
+ insert_into(:stage__example_table).values(
90
+ [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
91
+ [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
92
+ [ 1 , run_id , 'external_id' , 'UPDATE' , 'foo_id' , 33 , 'updated content' ] ,
93
+ )
94
+
95
+ subject.load_update
96
+
97
+ expect(:example_table).to have_values(
98
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
99
+ [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
100
+ )
101
+ end
102
+ end
103
+
104
+ describe '#load_delete' do
105
+ it 'marks existing records as deleted' do
106
+ insert_into(:example_table).values(
107
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
108
+ [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
109
+ )
110
+
111
+ insert_into(:stage__example_table).values(
112
+ [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
113
+ [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
114
+ [ 1 , run_id , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
115
+ )
116
+
117
+ subject.load_delete
118
+
119
+ expect(:example_table).to have_values(
120
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
121
+ [ 1 , 'external_id' , external_source , 22 , yesterday , now , now , 'content' ] ,
122
+ )
123
+ end
124
+ end
125
+
126
+ describe '#load_undelete' do
127
+ it 'reinstates deleted records' do
128
+ insert_into(:example_table).values(
129
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
130
+ [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
131
+ )
132
+
133
+ insert_into(:stage__example_table).values(
134
+ [ :id , :import_run_id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
135
+ [ 1 , old_run_id , 'external_id' , 'UPDATE' , 'foo_id' , 999 , 'some content' ] ,
136
+ [ 1 , run_id , 'external_id' , 'UNDELETE' , 'foo_id' , 33 , 'updated content' ] ,
137
+ )
138
+
139
+ subject.load_undelete
140
+
141
+ expect(:example_table).to have_values(
142
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
143
+ [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
144
+ )
145
+ end
146
+ end
147
+ end
148
+ end