beetle_etl 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.travis.yml +12 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +5 -0
- data/beetle_etl.gemspec +30 -0
- data/lib/beetle_etl.rb +85 -0
- data/lib/beetle_etl/dsl/dsl.rb +37 -0
- data/lib/beetle_etl/dsl/transformation.rb +26 -0
- data/lib/beetle_etl/dsl/transformation_loader.rb +22 -0
- data/lib/beetle_etl/import.rb +37 -0
- data/lib/beetle_etl/state.rb +67 -0
- data/lib/beetle_etl/steps/assign_ids.rb +54 -0
- data/lib/beetle_etl/steps/load.rb +108 -0
- data/lib/beetle_etl/steps/map_relations.rb +31 -0
- data/lib/beetle_etl/steps/step.rb +42 -0
- data/lib/beetle_etl/steps/table_diff.rb +155 -0
- data/lib/beetle_etl/steps/transform.rb +22 -0
- data/lib/beetle_etl/task_runner/dependency_resolver.rb +39 -0
- data/lib/beetle_etl/task_runner/task_runner.rb +64 -0
- data/lib/beetle_etl/version.rb +3 -0
- data/script/postgres +12 -0
- data/spec/beetle_etl_spec.rb +70 -0
- data/spec/dependency_resolver_spec.rb +57 -0
- data/spec/dsl/dsl_spec.rb +44 -0
- data/spec/dsl/transformation_loader_spec.rb +51 -0
- data/spec/dsl/transformation_spec.rb +54 -0
- data/spec/feature/example_schema.rb +192 -0
- data/spec/feature/example_transform.rb +37 -0
- data/spec/feature/feature_spec.rb +48 -0
- data/spec/import_spec.rb +7 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/state_spec.rb +124 -0
- data/spec/steps/assign_ids_spec.rb +107 -0
- data/spec/steps/load_spec.rb +148 -0
- data/spec/steps/map_relations_spec.rb +92 -0
- data/spec/steps/step_spec.rb +37 -0
- data/spec/steps/table_diff_spec.rb +183 -0
- data/spec/steps/transform_spec.rb +34 -0
- data/spec/support/database.yml.example +9 -0
- data/spec/support/database.yml.travis +4 -0
- data/spec/support/database_helpers.rb +58 -0
- metadata +220 -0
data/script/postgres
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe BeetleETL do
|
4
|
+
describe '#import' do
|
5
|
+
|
6
|
+
it 'runs the import' do
|
7
|
+
allow(BeetleETL).to receive(:state) { double(:state).as_null_object }
|
8
|
+
expect(BeetleETL::Import).to receive(:run)
|
9
|
+
BeetleETL.import
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'handling state' do
|
13
|
+
it 'starts the import and marks it as finished if no errors are thrown' do
|
14
|
+
allow(BeetleETL::Import).to receive(:run)
|
15
|
+
|
16
|
+
expect(BeetleETL.state).to receive(:start_import).ordered
|
17
|
+
expect(BeetleETL.state).to receive(:mark_as_succeeded).ordered
|
18
|
+
|
19
|
+
BeetleETL.import
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'starts the import and marks it as failed if Import.run throws an error' do
|
23
|
+
exception = Exception.new
|
24
|
+
allow(BeetleETL::Import).to receive(:run).and_raise(exception)
|
25
|
+
|
26
|
+
expect(BeetleETL.state).to receive(:start_import).ordered
|
27
|
+
expect(BeetleETL.state).to receive(:mark_as_failed).ordered
|
28
|
+
|
29
|
+
expect { BeetleETL.import }.to raise_exception(exception)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#config' do
|
35
|
+
it 'returns a configuration object' do
|
36
|
+
expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#configure' do
|
41
|
+
it 'allows the configuration to be changed' do
|
42
|
+
expect(BeetleETL.config.external_source).to be_nil
|
43
|
+
|
44
|
+
BeetleETL.configure { |config| config.external_source = 'foo' }
|
45
|
+
|
46
|
+
expect(BeetleETL.config.external_source).to eql('foo')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '#database' do
|
51
|
+
let(:database) { double(:database) }
|
52
|
+
|
53
|
+
it 'returns the Sequel Database object stored in the config' do
|
54
|
+
BeetleETL.configure { |config| config.database = database }
|
55
|
+
|
56
|
+
expect(BeetleETL.database).to eql(database)
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'builds and caches a Sequel Database from config when no database is passed' do
|
60
|
+
database_config = double(:database_config)
|
61
|
+
BeetleETL.configure { |config| config.database_config = database_config }
|
62
|
+
|
63
|
+
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
64
|
+
|
65
|
+
expect(BeetleETL.database).to eql(database)
|
66
|
+
expect(BeetleETL.database).to eql(database)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe DependencyResolver do
|
5
|
+
|
6
|
+
# test dependencies
|
7
|
+
#
|
8
|
+
# A
|
9
|
+
# / | \
|
10
|
+
# B | C
|
11
|
+
# | \ /|
|
12
|
+
# | D |
|
13
|
+
# \ / \|
|
14
|
+
# E F
|
15
|
+
|
16
|
+
Item = Struct.new(:name, :dependencies)
|
17
|
+
|
18
|
+
let(:a) { Item.new(:a, Set.new) }
|
19
|
+
let(:b) { Item.new(:b, Set.new([:a])) }
|
20
|
+
let(:c) { Item.new(:c, Set.new([:a])) }
|
21
|
+
let(:d) { Item.new(:d, Set.new([:a, :c])) }
|
22
|
+
let(:e) { Item.new(:e, Set.new([:c, :d])) }
|
23
|
+
let(:f) { Item.new(:f, Set.new([:c, :d])) }
|
24
|
+
|
25
|
+
def items
|
26
|
+
[a, b, c, d, e, f].shuffle
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#resolvables' do
|
30
|
+
let(:resolver) { DependencyResolver.new(items) }
|
31
|
+
|
32
|
+
it 'returns all items without dependencies when given an empty array' do
|
33
|
+
expect(resolver.resolvables([])).to match_array([a])
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'returns all items with met dependencies' do
|
37
|
+
expect(resolver.resolvables([:a, :b, :c])).to match_array([d])
|
38
|
+
expect(resolver.resolvables([:a, :b, :c, :d])).to match_array([e, f])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'with cyclic or missing dependencies' do
|
43
|
+
let(:cyclic) { Item.new(:a, Set.new([:b])) }
|
44
|
+
|
45
|
+
it 'detects cyclic dependencies' do
|
46
|
+
expect { DependencyResolver.new([cyclic, b]) }.to \
|
47
|
+
raise_error(BeetleETL::UnsatisfiableDependenciesError)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'detects unsatisfiable dependencies' do
|
51
|
+
expect { DependencyResolver.new([b]) }.to \
|
52
|
+
raise_error(BeetleETL::UnsatisfiableDependenciesError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe DSL do
|
5
|
+
|
6
|
+
subject { DSL.new(:foo_table) }
|
7
|
+
|
8
|
+
describe '#stage_table' do
|
9
|
+
it 'returns the stage table name including the schema defined in the config' do
|
10
|
+
BeetleETL.configure { |config| config.stage_schema = 'bar' }
|
11
|
+
expect(subject.stage_table).to eql('"bar"."foo_table"')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#external_source' do
|
16
|
+
it 'returns the external source‘s identifier' do
|
17
|
+
expect(subject.external_source).to eql('source')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe '#combined_key' do
|
22
|
+
it 'returns an SQL string for combined external ids' do
|
23
|
+
expect(subject.combined_key('foo', 'bar')).to eql(
|
24
|
+
%q('[' || foo || ',' || bar || ']')
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'works with multiple arguments' do
|
29
|
+
expect(subject.combined_key('foo', 'bar', 'baz')).to eql(
|
30
|
+
%q('[' || foo || ',' || bar || ',' || baz || ']')
|
31
|
+
)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '#import_run_id' do
|
36
|
+
it 'returns the import run id defined in the config' do
|
37
|
+
id = double(:id)
|
38
|
+
allow(BeetleETL.state).to receive(:run_id) { id }
|
39
|
+
expect(subject.import_run_id).to eql(id)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module BeetleETL
|
5
|
+
describe TransformationLoader do
|
6
|
+
|
7
|
+
before :example do
|
8
|
+
data_file = tempfile_with_contents <<-FILE
|
9
|
+
import :foo do
|
10
|
+
'foo'
|
11
|
+
end
|
12
|
+
|
13
|
+
import :bar do
|
14
|
+
'bar'
|
15
|
+
end
|
16
|
+
FILE
|
17
|
+
|
18
|
+
BeetleETL.configure do |config|
|
19
|
+
config.transformation_file = data_file.path
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe '#load' do
|
24
|
+
it 'loads runlist entries from the data file' do
|
25
|
+
expect(Transformation).to receive(:new) do |table_name, config|
|
26
|
+
expect(table_name.to_s).to eql(config.call)
|
27
|
+
end.exactly(2).times
|
28
|
+
|
29
|
+
subject.load
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'adds every runlist entry to the entries array' do
|
33
|
+
allow(Transformation).to receive(:new) do |table_name, config|
|
34
|
+
table_name
|
35
|
+
end
|
36
|
+
|
37
|
+
transformations = subject.load
|
38
|
+
|
39
|
+
expect(transformations).to eql(%i[foo bar])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tempfile_with_contents(contents)
|
44
|
+
Tempfile.new('transform').tap do |file|
|
45
|
+
file.write(contents)
|
46
|
+
file.close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe Transformation do
|
5
|
+
|
6
|
+
describe '#table_name' do
|
7
|
+
it 'returns the given table name' do
|
8
|
+
transformation = Transformation.new(:table, Proc.new {})
|
9
|
+
expect(transformation.table_name).to eql(:table)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#relations' do
|
14
|
+
it 'returns the list of foreign tables and their foreign key column' do
|
15
|
+
setup = Proc.new do
|
16
|
+
references :foreign_table, on: :foreign_table_id
|
17
|
+
end
|
18
|
+
transformation = Transformation.new(:table, setup)
|
19
|
+
|
20
|
+
expect(transformation.relations).to eql({
|
21
|
+
foreign_table_id: :foreign_table
|
22
|
+
})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe '#dependencies' do
|
27
|
+
it 'returns the depending tables' do
|
28
|
+
setup = Proc.new do
|
29
|
+
references :foreign_table, on: :foreign_table_id
|
30
|
+
references :another_foreign_table, on: :another_foreign_table_id
|
31
|
+
end
|
32
|
+
transformation = Transformation.new(:table, setup)
|
33
|
+
|
34
|
+
expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe '#query' do
|
39
|
+
it 'returns the query interpolating methods in scope' do
|
40
|
+
|
41
|
+
setup = Proc.new do
|
42
|
+
def foo; "foo_string"; end
|
43
|
+
query "SELECT '#{foo}' FROM some_table"
|
44
|
+
end
|
45
|
+
transformation = Transformation.new(:table, setup)
|
46
|
+
|
47
|
+
expect(transformation.query).to eql(
|
48
|
+
"SELECT 'foo_string' FROM some_table"
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module ExampleSchema
|
2
|
+
|
3
|
+
def create_tables
|
4
|
+
create_source_tables
|
5
|
+
create_stage_tables
|
6
|
+
create_target_tables
|
7
|
+
end
|
8
|
+
|
9
|
+
def drop_tables
|
10
|
+
drop_source_tables
|
11
|
+
drop_stage_tables
|
12
|
+
drop_target_tables
|
13
|
+
end
|
14
|
+
|
15
|
+
def create_source_tables
|
16
|
+
test_database.create_schema :source
|
17
|
+
|
18
|
+
test_database.create_table :source__Organisation do
|
19
|
+
Integer :pkOrgId
|
20
|
+
String :Name, size: 255
|
21
|
+
String :Abteilung, size: 255
|
22
|
+
end
|
23
|
+
|
24
|
+
test_database.create_table :source__Person do
|
25
|
+
Integer :pkPersID
|
26
|
+
String :Vorname, size: 255
|
27
|
+
String :Nachname, size: 255
|
28
|
+
Integer :fkFirma
|
29
|
+
Integer :fkAdresse
|
30
|
+
Integer :fkTyp
|
31
|
+
end
|
32
|
+
|
33
|
+
test_database.create_table :source__Veranstaltung do
|
34
|
+
Integer :pkVeranstaltungId
|
35
|
+
Integer :fkOrganisation
|
36
|
+
end
|
37
|
+
|
38
|
+
test_database.create_table :source__Veranstaltungsbesuch do
|
39
|
+
Integer :fkVeranstaltung
|
40
|
+
Integer :fkBesucher
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def drop_source_tables
|
45
|
+
test_database.drop_schema :source, cascade: true
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_stage_tables
|
49
|
+
test_database.create_schema :stage
|
50
|
+
|
51
|
+
test_database.create_table :stage__import_runs do
|
52
|
+
primary_key :id
|
53
|
+
DateTime :started_at
|
54
|
+
DateTime :finished_at
|
55
|
+
String :state, size: 255
|
56
|
+
end
|
57
|
+
|
58
|
+
test_database.create_table :stage__organisations do
|
59
|
+
Integer :id
|
60
|
+
String :external_id, size: 255
|
61
|
+
foreign_key :import_run_id, :stage__import_runs
|
62
|
+
index [:external_id, :import_run_id]
|
63
|
+
String :transition, size: 255
|
64
|
+
|
65
|
+
String :name, size: 255
|
66
|
+
end
|
67
|
+
|
68
|
+
test_database.create_table :stage__departments do
|
69
|
+
Integer :id
|
70
|
+
String :external_id, size: 255
|
71
|
+
foreign_key :import_run_id, :stage__import_runs
|
72
|
+
index [:external_id, :import_run_id]
|
73
|
+
String :transition, size: 255
|
74
|
+
|
75
|
+
String :name, size: 255
|
76
|
+
|
77
|
+
String :external_organisation_id, size: 255
|
78
|
+
Integer :organisation_id
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
test_database.create_table :stage__attendees do
|
83
|
+
Integer :id
|
84
|
+
String :external_id, size: 255
|
85
|
+
foreign_key :import_run_id, :stage__import_runs
|
86
|
+
index [:external_id, :import_run_id]
|
87
|
+
String :transition, size: 255
|
88
|
+
|
89
|
+
String :first_name, size: 255
|
90
|
+
String :last_name, size: 255
|
91
|
+
end
|
92
|
+
|
93
|
+
test_database.create_table :stage__events do
|
94
|
+
Integer :id
|
95
|
+
String :external_id, size: 255
|
96
|
+
foreign_key :import_run_id, :stage__import_runs
|
97
|
+
index [:external_id, :import_run_id]
|
98
|
+
String :transition, size: 255
|
99
|
+
|
100
|
+
String :name, size: 255
|
101
|
+
DateTime :starts_at
|
102
|
+
DateTime :ends_at
|
103
|
+
|
104
|
+
String :external_organisations_id, size: 255
|
105
|
+
Integer :organisation_id
|
106
|
+
end
|
107
|
+
|
108
|
+
test_database.create_table :stage__attendees_events do
|
109
|
+
Integer :id
|
110
|
+
String :external_id, size: 255
|
111
|
+
foreign_key :import_run_id, :stage__import_runs
|
112
|
+
index [:external_id, :import_run_id]
|
113
|
+
String :transition, size: 255
|
114
|
+
|
115
|
+
String :external_attendee_id, size: 255
|
116
|
+
Integer :attendee_id
|
117
|
+
|
118
|
+
String :external_event_id, size: 255
|
119
|
+
Integer :event_id
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def drop_stage_tables
|
124
|
+
test_database.drop_schema :stage, cascade: true
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_target_tables
|
128
|
+
test_database.create_table :organisations do
|
129
|
+
primary_key :id
|
130
|
+
String :external_id, size: 255
|
131
|
+
String :external_source, size: 255
|
132
|
+
String :name, size: 255
|
133
|
+
DateTime :created_at
|
134
|
+
DateTime :updated_at
|
135
|
+
DateTime :deleted_at
|
136
|
+
end
|
137
|
+
|
138
|
+
test_database.create_table :departments do
|
139
|
+
primary_key :id
|
140
|
+
String :external_id, size: 255
|
141
|
+
String :external_source, size: 255
|
142
|
+
String :name, size: 255
|
143
|
+
foreign_key :organisation_id, :organisations
|
144
|
+
DateTime :created_at
|
145
|
+
DateTime :updated_at
|
146
|
+
DateTime :deleted_at
|
147
|
+
end
|
148
|
+
|
149
|
+
test_database.create_table :attendees do
|
150
|
+
primary_key :id
|
151
|
+
String :external_id, size: 255
|
152
|
+
String :external_source, size: 255
|
153
|
+
String :first_name, size: 255
|
154
|
+
String :last_name, size: 255
|
155
|
+
DateTime :created_at
|
156
|
+
DateTime :updated_at
|
157
|
+
DateTime :deleted_at
|
158
|
+
end
|
159
|
+
|
160
|
+
test_database.create_table :events do
|
161
|
+
primary_key :id
|
162
|
+
String :external_id, size: 255
|
163
|
+
String :external_source, size: 255
|
164
|
+
String :name, size: 255
|
165
|
+
DateTime :starts_at
|
166
|
+
DateTime :ends_at
|
167
|
+
foreign_key :organisation, :organisations
|
168
|
+
DateTime :created_at
|
169
|
+
DateTime :updated_at
|
170
|
+
DateTime :deleted_at
|
171
|
+
end
|
172
|
+
|
173
|
+
test_database.create_table :attendees_events do
|
174
|
+
foreign_key :attendee_id, :attendees, null: false
|
175
|
+
foreign_key :event_id, :events, null: false
|
176
|
+
primary_key [:attendee_id, :event_id]
|
177
|
+
index [:attendee_id, :event_id]
|
178
|
+
DateTime :created_at
|
179
|
+
DateTime :updated_at
|
180
|
+
DateTime :deleted_at
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def drop_target_tables
|
185
|
+
test_database.drop_table :attendees_events
|
186
|
+
test_database.drop_table :events
|
187
|
+
test_database.drop_table :attendees
|
188
|
+
test_database.drop_table :departments
|
189
|
+
test_database.drop_table :organisations
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|