beetle_etl 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.travis.yml +12 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +31 -0
  7. data/Rakefile +5 -0
  8. data/beetle_etl.gemspec +30 -0
  9. data/lib/beetle_etl.rb +85 -0
  10. data/lib/beetle_etl/dsl/dsl.rb +37 -0
  11. data/lib/beetle_etl/dsl/transformation.rb +26 -0
  12. data/lib/beetle_etl/dsl/transformation_loader.rb +22 -0
  13. data/lib/beetle_etl/import.rb +37 -0
  14. data/lib/beetle_etl/state.rb +67 -0
  15. data/lib/beetle_etl/steps/assign_ids.rb +54 -0
  16. data/lib/beetle_etl/steps/load.rb +108 -0
  17. data/lib/beetle_etl/steps/map_relations.rb +31 -0
  18. data/lib/beetle_etl/steps/step.rb +42 -0
  19. data/lib/beetle_etl/steps/table_diff.rb +155 -0
  20. data/lib/beetle_etl/steps/transform.rb +22 -0
  21. data/lib/beetle_etl/task_runner/dependency_resolver.rb +39 -0
  22. data/lib/beetle_etl/task_runner/task_runner.rb +64 -0
  23. data/lib/beetle_etl/version.rb +3 -0
  24. data/script/postgres +12 -0
  25. data/spec/beetle_etl_spec.rb +70 -0
  26. data/spec/dependency_resolver_spec.rb +57 -0
  27. data/spec/dsl/dsl_spec.rb +44 -0
  28. data/spec/dsl/transformation_loader_spec.rb +51 -0
  29. data/spec/dsl/transformation_spec.rb +54 -0
  30. data/spec/feature/example_schema.rb +192 -0
  31. data/spec/feature/example_transform.rb +37 -0
  32. data/spec/feature/feature_spec.rb +48 -0
  33. data/spec/import_spec.rb +7 -0
  34. data/spec/spec_helper.rb +25 -0
  35. data/spec/state_spec.rb +124 -0
  36. data/spec/steps/assign_ids_spec.rb +107 -0
  37. data/spec/steps/load_spec.rb +148 -0
  38. data/spec/steps/map_relations_spec.rb +92 -0
  39. data/spec/steps/step_spec.rb +37 -0
  40. data/spec/steps/table_diff_spec.rb +183 -0
  41. data/spec/steps/transform_spec.rb +34 -0
  42. data/spec/support/database.yml.example +9 -0
  43. data/spec/support/database.yml.travis +4 -0
  44. data/spec/support/database_helpers.rb +58 -0
  45. metadata +220 -0
@@ -0,0 +1,3 @@
1
+ module BeetleETL
2
+ VERSION = "0.0.1"
3
+ end
data/script/postgres ADDED
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env bash
2
+
3
+ config_file="/usr/local/var/postgres/postgresql.conf"
4
+
5
+ case "$1" in
6
+ "run" )
7
+ postgres -D /usr/local/var/postgres -c config_file=$config_file
8
+ ;;
9
+ * )
10
+ echo "USAGE: $0 {brew|…} run"
11
+ ;;
12
+ esac
@@ -0,0 +1,70 @@
1
+ require 'spec_helper'
2
+
3
+ describe BeetleETL do
4
+ describe '#import' do
5
+
6
+ it 'runs the import' do
7
+ allow(BeetleETL).to receive(:state) { double(:state).as_null_object }
8
+ expect(BeetleETL::Import).to receive(:run)
9
+ BeetleETL.import
10
+ end
11
+
12
+ context 'handling state' do
13
+ it 'starts the import and marks it as finished if no errors are thrown' do
14
+ allow(BeetleETL::Import).to receive(:run)
15
+
16
+ expect(BeetleETL.state).to receive(:start_import).ordered
17
+ expect(BeetleETL.state).to receive(:mark_as_succeeded).ordered
18
+
19
+ BeetleETL.import
20
+ end
21
+
22
+ it 'starts the import and marks it as failed if Import.run throws an error' do
23
+ exception = Exception.new
24
+ allow(BeetleETL::Import).to receive(:run).and_raise(exception)
25
+
26
+ expect(BeetleETL.state).to receive(:start_import).ordered
27
+ expect(BeetleETL.state).to receive(:mark_as_failed).ordered
28
+
29
+ expect { BeetleETL.import }.to raise_exception(exception)
30
+ end
31
+ end
32
+ end
33
+
34
+ describe '#config' do
35
+ it 'returns a configuration object' do
36
+ expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
37
+ end
38
+ end
39
+
40
+ describe '#configure' do
41
+ it 'allows the configuration to be changed' do
42
+ expect(BeetleETL.config.external_source).to be_nil
43
+
44
+ BeetleETL.configure { |config| config.external_source = 'foo' }
45
+
46
+ expect(BeetleETL.config.external_source).to eql('foo')
47
+ end
48
+ end
49
+
50
+ describe '#database' do
51
+ let(:database) { double(:database) }
52
+
53
+ it 'returns the Sequel Database object stored in the config' do
54
+ BeetleETL.configure { |config| config.database = database }
55
+
56
+ expect(BeetleETL.database).to eql(database)
57
+ end
58
+
59
+ it 'builds and caches a Sequel Database from config when no database is passed' do
60
+ database_config = double(:database_config)
61
+ BeetleETL.configure { |config| config.database_config = database_config }
62
+
63
+ expect(Sequel).to receive(:connect).with(database_config).once { database }
64
+
65
+ expect(BeetleETL.database).to eql(database)
66
+ expect(BeetleETL.database).to eql(database)
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,57 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe DependencyResolver do
5
+
6
+ # test dependencies
7
+ #
8
+ # A
9
+ # / | \
10
+ # B | C
11
+ # | \ /|
12
+ # | D |
13
+ # \ / \|
14
+ # E F
15
+
16
+ Item = Struct.new(:name, :dependencies)
17
+
18
+ let(:a) { Item.new(:a, Set.new) }
19
+ let(:b) { Item.new(:b, Set.new([:a])) }
20
+ let(:c) { Item.new(:c, Set.new([:a])) }
21
+ let(:d) { Item.new(:d, Set.new([:a, :c])) }
22
+ let(:e) { Item.new(:e, Set.new([:c, :d])) }
23
+ let(:f) { Item.new(:f, Set.new([:c, :d])) }
24
+
25
+ def items
26
+ [a, b, c, d, e, f].shuffle
27
+ end
28
+
29
+ describe '#resolvables' do
30
+ let(:resolver) { DependencyResolver.new(items) }
31
+
32
+ it 'returns all items without dependencies when given an empty array' do
33
+ expect(resolver.resolvables([])).to match_array([a])
34
+ end
35
+
36
+ it 'returns all items with met dependencies' do
37
+ expect(resolver.resolvables([:a, :b, :c])).to match_array([d])
38
+ expect(resolver.resolvables([:a, :b, :c, :d])).to match_array([e, f])
39
+ end
40
+ end
41
+
42
+ context 'with cyclic or missing dependencies' do
43
+ let(:cyclic) { Item.new(:a, Set.new([:b])) }
44
+
45
+ it 'detects cyclic dependencies' do
46
+ expect { DependencyResolver.new([cyclic, b]) }.to \
47
+ raise_error(BeetleETL::UnsatisfiableDependenciesError)
48
+ end
49
+
50
+ it 'detects unsatisfiable dependencies' do
51
+ expect { DependencyResolver.new([b]) }.to \
52
+ raise_error(BeetleETL::UnsatisfiableDependenciesError)
53
+ end
54
+ end
55
+
56
+ end
57
+ end
@@ -0,0 +1,44 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe DSL do
5
+
6
+ subject { DSL.new(:foo_table) }
7
+
8
+ describe '#stage_table' do
9
+ it 'returns the stage table name including the schema defined in the config' do
10
+ BeetleETL.configure { |config| config.stage_schema = 'bar' }
11
+ expect(subject.stage_table).to eql('"bar"."foo_table"')
12
+ end
13
+ end
14
+
15
+ describe '#external_source' do
16
+ it 'returns the external source‘s identifier' do
17
+ expect(subject.external_source).to eql('source')
18
+ end
19
+ end
20
+
21
+ describe '#combined_key' do
22
+ it 'returns an SQL string for combined external ids' do
23
+ expect(subject.combined_key('foo', 'bar')).to eql(
24
+ %q('[' || foo || ',' || bar || ']')
25
+ )
26
+ end
27
+
28
+ it 'works with multiple arguments' do
29
+ expect(subject.combined_key('foo', 'bar', 'baz')).to eql(
30
+ %q('[' || foo || ',' || bar || ',' || baz || ']')
31
+ )
32
+ end
33
+ end
34
+
35
+ describe '#import_run_id' do
36
+ it 'returns the import run id defined in the config' do
37
+ id = double(:id)
38
+ allow(BeetleETL.state).to receive(:run_id) { id }
39
+ expect(subject.import_run_id).to eql(id)
40
+ end
41
+ end
42
+
43
+ end
44
+ end
@@ -0,0 +1,51 @@
1
+ require 'spec_helper'
2
+ require 'tempfile'
3
+
4
+ module BeetleETL
5
+ describe TransformationLoader do
6
+
7
+ before :example do
8
+ data_file = tempfile_with_contents <<-FILE
9
+ import :foo do
10
+ 'foo'
11
+ end
12
+
13
+ import :bar do
14
+ 'bar'
15
+ end
16
+ FILE
17
+
18
+ BeetleETL.configure do |config|
19
+ config.transformation_file = data_file.path
20
+ end
21
+ end
22
+
23
+ describe '#load' do
24
+ it 'loads runlist entries from the data file' do
25
+ expect(Transformation).to receive(:new) do |table_name, config|
26
+ expect(table_name.to_s).to eql(config.call)
27
+ end.exactly(2).times
28
+
29
+ subject.load
30
+ end
31
+
32
+ it 'adds every runlist entry to the entries array' do
33
+ allow(Transformation).to receive(:new) do |table_name, config|
34
+ table_name
35
+ end
36
+
37
+ transformations = subject.load
38
+
39
+ expect(transformations).to eql(%i[foo bar])
40
+ end
41
+ end
42
+
43
+ def tempfile_with_contents(contents)
44
+ Tempfile.new('transform').tap do |file|
45
+ file.write(contents)
46
+ file.close
47
+ end
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,54 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe Transformation do
5
+
6
+ describe '#table_name' do
7
+ it 'returns the given table name' do
8
+ transformation = Transformation.new(:table, Proc.new {})
9
+ expect(transformation.table_name).to eql(:table)
10
+ end
11
+ end
12
+
13
+ describe '#relations' do
14
+ it 'returns the list of foreign tables and their foreign key column' do
15
+ setup = Proc.new do
16
+ references :foreign_table, on: :foreign_table_id
17
+ end
18
+ transformation = Transformation.new(:table, setup)
19
+
20
+ expect(transformation.relations).to eql({
21
+ foreign_table_id: :foreign_table
22
+ })
23
+ end
24
+ end
25
+
26
+ describe '#dependencies' do
27
+ it 'returns the depending tables' do
28
+ setup = Proc.new do
29
+ references :foreign_table, on: :foreign_table_id
30
+ references :another_foreign_table, on: :another_foreign_table_id
31
+ end
32
+ transformation = Transformation.new(:table, setup)
33
+
34
+ expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
35
+ end
36
+ end
37
+
38
+ describe '#query' do
39
+ it 'returns the query interpolating methods in scope' do
40
+
41
+ setup = Proc.new do
42
+ def foo; "foo_string"; end
43
+ query "SELECT '#{foo}' FROM some_table"
44
+ end
45
+ transformation = Transformation.new(:table, setup)
46
+
47
+ expect(transformation.query).to eql(
48
+ "SELECT 'foo_string' FROM some_table"
49
+ )
50
+ end
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,192 @@
1
+ module ExampleSchema
2
+
3
+ def create_tables
4
+ create_source_tables
5
+ create_stage_tables
6
+ create_target_tables
7
+ end
8
+
9
+ def drop_tables
10
+ drop_source_tables
11
+ drop_stage_tables
12
+ drop_target_tables
13
+ end
14
+
15
+ def create_source_tables
16
+ test_database.create_schema :source
17
+
18
+ test_database.create_table :source__Organisation do
19
+ Integer :pkOrgId
20
+ String :Name, size: 255
21
+ String :Abteilung, size: 255
22
+ end
23
+
24
+ test_database.create_table :source__Person do
25
+ Integer :pkPersID
26
+ String :Vorname, size: 255
27
+ String :Nachname, size: 255
28
+ Integer :fkFirma
29
+ Integer :fkAdresse
30
+ Integer :fkTyp
31
+ end
32
+
33
+ test_database.create_table :source__Veranstaltung do
34
+ Integer :pkVeranstaltungId
35
+ Integer :fkOrganisation
36
+ end
37
+
38
+ test_database.create_table :source__Veranstaltungsbesuch do
39
+ Integer :fkVeranstaltung
40
+ Integer :fkBesucher
41
+ end
42
+ end
43
+
44
+ def drop_source_tables
45
+ test_database.drop_schema :source, cascade: true
46
+ end
47
+
48
+ def create_stage_tables
49
+ test_database.create_schema :stage
50
+
51
+ test_database.create_table :stage__import_runs do
52
+ primary_key :id
53
+ DateTime :started_at
54
+ DateTime :finished_at
55
+ String :state, size: 255
56
+ end
57
+
58
+ test_database.create_table :stage__organisations do
59
+ Integer :id
60
+ String :external_id, size: 255
61
+ foreign_key :import_run_id, :stage__import_runs
62
+ index [:external_id, :import_run_id]
63
+ String :transition, size: 255
64
+
65
+ String :name, size: 255
66
+ end
67
+
68
+ test_database.create_table :stage__departments do
69
+ Integer :id
70
+ String :external_id, size: 255
71
+ foreign_key :import_run_id, :stage__import_runs
72
+ index [:external_id, :import_run_id]
73
+ String :transition, size: 255
74
+
75
+ String :name, size: 255
76
+
77
+ String :external_organisation_id, size: 255
78
+ Integer :organisation_id
79
+
80
+ end
81
+
82
+ test_database.create_table :stage__attendees do
83
+ Integer :id
84
+ String :external_id, size: 255
85
+ foreign_key :import_run_id, :stage__import_runs
86
+ index [:external_id, :import_run_id]
87
+ String :transition, size: 255
88
+
89
+ String :first_name, size: 255
90
+ String :last_name, size: 255
91
+ end
92
+
93
+ test_database.create_table :stage__events do
94
+ Integer :id
95
+ String :external_id, size: 255
96
+ foreign_key :import_run_id, :stage__import_runs
97
+ index [:external_id, :import_run_id]
98
+ String :transition, size: 255
99
+
100
+ String :name, size: 255
101
+ DateTime :starts_at
102
+ DateTime :ends_at
103
+
104
+ String :external_organisations_id, size: 255
105
+ Integer :organisation_id
106
+ end
107
+
108
+ test_database.create_table :stage__attendees_events do
109
+ Integer :id
110
+ String :external_id, size: 255
111
+ foreign_key :import_run_id, :stage__import_runs
112
+ index [:external_id, :import_run_id]
113
+ String :transition, size: 255
114
+
115
+ String :external_attendee_id, size: 255
116
+ Integer :attendee_id
117
+
118
+ String :external_event_id, size: 255
119
+ Integer :event_id
120
+ end
121
+ end
122
+
123
+ def drop_stage_tables
124
+ test_database.drop_schema :stage, cascade: true
125
+ end
126
+
127
+ def create_target_tables
128
+ test_database.create_table :organisations do
129
+ primary_key :id
130
+ String :external_id, size: 255
131
+ String :external_source, size: 255
132
+ String :name, size: 255
133
+ DateTime :created_at
134
+ DateTime :updated_at
135
+ DateTime :deleted_at
136
+ end
137
+
138
+ test_database.create_table :departments do
139
+ primary_key :id
140
+ String :external_id, size: 255
141
+ String :external_source, size: 255
142
+ String :name, size: 255
143
+ foreign_key :organisation_id, :organisations
144
+ DateTime :created_at
145
+ DateTime :updated_at
146
+ DateTime :deleted_at
147
+ end
148
+
149
+ test_database.create_table :attendees do
150
+ primary_key :id
151
+ String :external_id, size: 255
152
+ String :external_source, size: 255
153
+ String :first_name, size: 255
154
+ String :last_name, size: 255
155
+ DateTime :created_at
156
+ DateTime :updated_at
157
+ DateTime :deleted_at
158
+ end
159
+
160
+ test_database.create_table :events do
161
+ primary_key :id
162
+ String :external_id, size: 255
163
+ String :external_source, size: 255
164
+ String :name, size: 255
165
+ DateTime :starts_at
166
+ DateTime :ends_at
167
+ foreign_key :organisation, :organisations
168
+ DateTime :created_at
169
+ DateTime :updated_at
170
+ DateTime :deleted_at
171
+ end
172
+
173
+ test_database.create_table :attendees_events do
174
+ foreign_key :attendee_id, :attendees, null: false
175
+ foreign_key :event_id, :events, null: false
176
+ primary_key [:attendee_id, :event_id]
177
+ index [:attendee_id, :event_id]
178
+ DateTime :created_at
179
+ DateTime :updated_at
180
+ DateTime :deleted_at
181
+ end
182
+ end
183
+
184
+ def drop_target_tables
185
+ test_database.drop_table :attendees_events
186
+ test_database.drop_table :events
187
+ test_database.drop_table :attendees
188
+ test_database.drop_table :departments
189
+ test_database.drop_table :organisations
190
+ end
191
+
192
+ end