beetle_etl 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +23 -0
- data/.travis.yml +12 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +5 -0
- data/beetle_etl.gemspec +30 -0
- data/lib/beetle_etl.rb +85 -0
- data/lib/beetle_etl/dsl/dsl.rb +37 -0
- data/lib/beetle_etl/dsl/transformation.rb +26 -0
- data/lib/beetle_etl/dsl/transformation_loader.rb +22 -0
- data/lib/beetle_etl/import.rb +37 -0
- data/lib/beetle_etl/state.rb +67 -0
- data/lib/beetle_etl/steps/assign_ids.rb +54 -0
- data/lib/beetle_etl/steps/load.rb +108 -0
- data/lib/beetle_etl/steps/map_relations.rb +31 -0
- data/lib/beetle_etl/steps/step.rb +42 -0
- data/lib/beetle_etl/steps/table_diff.rb +155 -0
- data/lib/beetle_etl/steps/transform.rb +22 -0
- data/lib/beetle_etl/task_runner/dependency_resolver.rb +39 -0
- data/lib/beetle_etl/task_runner/task_runner.rb +64 -0
- data/lib/beetle_etl/version.rb +3 -0
- data/script/postgres +12 -0
- data/spec/beetle_etl_spec.rb +70 -0
- data/spec/dependency_resolver_spec.rb +57 -0
- data/spec/dsl/dsl_spec.rb +44 -0
- data/spec/dsl/transformation_loader_spec.rb +51 -0
- data/spec/dsl/transformation_spec.rb +54 -0
- data/spec/feature/example_schema.rb +192 -0
- data/spec/feature/example_transform.rb +37 -0
- data/spec/feature/feature_spec.rb +48 -0
- data/spec/import_spec.rb +7 -0
- data/spec/spec_helper.rb +25 -0
- data/spec/state_spec.rb +124 -0
- data/spec/steps/assign_ids_spec.rb +107 -0
- data/spec/steps/load_spec.rb +148 -0
- data/spec/steps/map_relations_spec.rb +92 -0
- data/spec/steps/step_spec.rb +37 -0
- data/spec/steps/table_diff_spec.rb +183 -0
- data/spec/steps/transform_spec.rb +34 -0
- data/spec/support/database.yml.example +9 -0
- data/spec/support/database.yml.travis +4 -0
- data/spec/support/database_helpers.rb +58 -0
- metadata +220 -0
data/script/postgres
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe BeetleETL do
|
4
|
+
describe '#import' do
|
5
|
+
|
6
|
+
it 'runs the import' do
|
7
|
+
allow(BeetleETL).to receive(:state) { double(:state).as_null_object }
|
8
|
+
expect(BeetleETL::Import).to receive(:run)
|
9
|
+
BeetleETL.import
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'handling state' do
|
13
|
+
it 'starts the import and marks it as finished if no errors are thrown' do
|
14
|
+
allow(BeetleETL::Import).to receive(:run)
|
15
|
+
|
16
|
+
expect(BeetleETL.state).to receive(:start_import).ordered
|
17
|
+
expect(BeetleETL.state).to receive(:mark_as_succeeded).ordered
|
18
|
+
|
19
|
+
BeetleETL.import
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'starts the import and marks it as failed if Import.run throws an error' do
|
23
|
+
exception = Exception.new
|
24
|
+
allow(BeetleETL::Import).to receive(:run).and_raise(exception)
|
25
|
+
|
26
|
+
expect(BeetleETL.state).to receive(:start_import).ordered
|
27
|
+
expect(BeetleETL.state).to receive(:mark_as_failed).ordered
|
28
|
+
|
29
|
+
expect { BeetleETL.import }.to raise_exception(exception)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#config' do
|
35
|
+
it 'returns a configuration object' do
|
36
|
+
expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#configure' do
|
41
|
+
it 'allows the configuration to be changed' do
|
42
|
+
expect(BeetleETL.config.external_source).to be_nil
|
43
|
+
|
44
|
+
BeetleETL.configure { |config| config.external_source = 'foo' }
|
45
|
+
|
46
|
+
expect(BeetleETL.config.external_source).to eql('foo')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '#database' do
|
51
|
+
let(:database) { double(:database) }
|
52
|
+
|
53
|
+
it 'returns the Sequel Database object stored in the config' do
|
54
|
+
BeetleETL.configure { |config| config.database = database }
|
55
|
+
|
56
|
+
expect(BeetleETL.database).to eql(database)
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'builds and caches a Sequel Database from config when no database is passed' do
|
60
|
+
database_config = double(:database_config)
|
61
|
+
BeetleETL.configure { |config| config.database_config = database_config }
|
62
|
+
|
63
|
+
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
64
|
+
|
65
|
+
expect(BeetleETL.database).to eql(database)
|
66
|
+
expect(BeetleETL.database).to eql(database)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe DependencyResolver do
|
5
|
+
|
6
|
+
# test dependencies
|
7
|
+
#
|
8
|
+
# A
|
9
|
+
# / | \
|
10
|
+
# B | C
|
11
|
+
# | \ /|
|
12
|
+
# | D |
|
13
|
+
# \ / \|
|
14
|
+
# E F
|
15
|
+
|
16
|
+
Item = Struct.new(:name, :dependencies)
|
17
|
+
|
18
|
+
let(:a) { Item.new(:a, Set.new) }
|
19
|
+
let(:b) { Item.new(:b, Set.new([:a])) }
|
20
|
+
let(:c) { Item.new(:c, Set.new([:a])) }
|
21
|
+
let(:d) { Item.new(:d, Set.new([:a, :c])) }
|
22
|
+
let(:e) { Item.new(:e, Set.new([:c, :d])) }
|
23
|
+
let(:f) { Item.new(:f, Set.new([:c, :d])) }
|
24
|
+
|
25
|
+
def items
|
26
|
+
[a, b, c, d, e, f].shuffle
|
27
|
+
end
|
28
|
+
|
29
|
+
describe '#resolvables' do
|
30
|
+
let(:resolver) { DependencyResolver.new(items) }
|
31
|
+
|
32
|
+
it 'returns all items without dependencies when given an empty array' do
|
33
|
+
expect(resolver.resolvables([])).to match_array([a])
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'returns all items with met dependencies' do
|
37
|
+
expect(resolver.resolvables([:a, :b, :c])).to match_array([d])
|
38
|
+
expect(resolver.resolvables([:a, :b, :c, :d])).to match_array([e, f])
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'with cyclic or missing dependencies' do
|
43
|
+
let(:cyclic) { Item.new(:a, Set.new([:b])) }
|
44
|
+
|
45
|
+
it 'detects cyclic dependencies' do
|
46
|
+
expect { DependencyResolver.new([cyclic, b]) }.to \
|
47
|
+
raise_error(BeetleETL::UnsatisfiableDependenciesError)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'detects unsatisfiable dependencies' do
|
51
|
+
expect { DependencyResolver.new([b]) }.to \
|
52
|
+
raise_error(BeetleETL::UnsatisfiableDependenciesError)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe DSL do
|
5
|
+
|
6
|
+
subject { DSL.new(:foo_table) }
|
7
|
+
|
8
|
+
describe '#stage_table' do
|
9
|
+
it 'returns the stage table name including the schema defined in the config' do
|
10
|
+
BeetleETL.configure { |config| config.stage_schema = 'bar' }
|
11
|
+
expect(subject.stage_table).to eql('"bar"."foo_table"')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
describe '#external_source' do
|
16
|
+
it 'returns the external source‘s identifier' do
|
17
|
+
expect(subject.external_source).to eql('source')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe '#combined_key' do
|
22
|
+
it 'returns an SQL string for combined external ids' do
|
23
|
+
expect(subject.combined_key('foo', 'bar')).to eql(
|
24
|
+
%q('[' || foo || ',' || bar || ']')
|
25
|
+
)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'works with multiple arguments' do
|
29
|
+
expect(subject.combined_key('foo', 'bar', 'baz')).to eql(
|
30
|
+
%q('[' || foo || ',' || bar || ',' || baz || ']')
|
31
|
+
)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '#import_run_id' do
|
36
|
+
it 'returns the import run id defined in the config' do
|
37
|
+
id = double(:id)
|
38
|
+
allow(BeetleETL.state).to receive(:run_id) { id }
|
39
|
+
expect(subject.import_run_id).to eql(id)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'tempfile'
|
3
|
+
|
4
|
+
module BeetleETL
|
5
|
+
describe TransformationLoader do
|
6
|
+
|
7
|
+
before :example do
|
8
|
+
data_file = tempfile_with_contents <<-FILE
|
9
|
+
import :foo do
|
10
|
+
'foo'
|
11
|
+
end
|
12
|
+
|
13
|
+
import :bar do
|
14
|
+
'bar'
|
15
|
+
end
|
16
|
+
FILE
|
17
|
+
|
18
|
+
BeetleETL.configure do |config|
|
19
|
+
config.transformation_file = data_file.path
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
describe '#load' do
|
24
|
+
it 'loads runlist entries from the data file' do
|
25
|
+
expect(Transformation).to receive(:new) do |table_name, config|
|
26
|
+
expect(table_name.to_s).to eql(config.call)
|
27
|
+
end.exactly(2).times
|
28
|
+
|
29
|
+
subject.load
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'adds every runlist entry to the entries array' do
|
33
|
+
allow(Transformation).to receive(:new) do |table_name, config|
|
34
|
+
table_name
|
35
|
+
end
|
36
|
+
|
37
|
+
transformations = subject.load
|
38
|
+
|
39
|
+
expect(transformations).to eql(%i[foo bar])
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def tempfile_with_contents(contents)
|
44
|
+
Tempfile.new('transform').tap do |file|
|
45
|
+
file.write(contents)
|
46
|
+
file.close
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe Transformation do
|
5
|
+
|
6
|
+
describe '#table_name' do
|
7
|
+
it 'returns the given table name' do
|
8
|
+
transformation = Transformation.new(:table, Proc.new {})
|
9
|
+
expect(transformation.table_name).to eql(:table)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#relations' do
|
14
|
+
it 'returns the list of foreign tables and their foreign key column' do
|
15
|
+
setup = Proc.new do
|
16
|
+
references :foreign_table, on: :foreign_table_id
|
17
|
+
end
|
18
|
+
transformation = Transformation.new(:table, setup)
|
19
|
+
|
20
|
+
expect(transformation.relations).to eql({
|
21
|
+
foreign_table_id: :foreign_table
|
22
|
+
})
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe '#dependencies' do
|
27
|
+
it 'returns the depending tables' do
|
28
|
+
setup = Proc.new do
|
29
|
+
references :foreign_table, on: :foreign_table_id
|
30
|
+
references :another_foreign_table, on: :another_foreign_table_id
|
31
|
+
end
|
32
|
+
transformation = Transformation.new(:table, setup)
|
33
|
+
|
34
|
+
expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe '#query' do
|
39
|
+
it 'returns the query interpolating methods in scope' do
|
40
|
+
|
41
|
+
setup = Proc.new do
|
42
|
+
def foo; "foo_string"; end
|
43
|
+
query "SELECT '#{foo}' FROM some_table"
|
44
|
+
end
|
45
|
+
transformation = Transformation.new(:table, setup)
|
46
|
+
|
47
|
+
expect(transformation.query).to eql(
|
48
|
+
"SELECT 'foo_string' FROM some_table"
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module ExampleSchema
|
2
|
+
|
3
|
+
def create_tables
|
4
|
+
create_source_tables
|
5
|
+
create_stage_tables
|
6
|
+
create_target_tables
|
7
|
+
end
|
8
|
+
|
9
|
+
def drop_tables
|
10
|
+
drop_source_tables
|
11
|
+
drop_stage_tables
|
12
|
+
drop_target_tables
|
13
|
+
end
|
14
|
+
|
15
|
+
def create_source_tables
|
16
|
+
test_database.create_schema :source
|
17
|
+
|
18
|
+
test_database.create_table :source__Organisation do
|
19
|
+
Integer :pkOrgId
|
20
|
+
String :Name, size: 255
|
21
|
+
String :Abteilung, size: 255
|
22
|
+
end
|
23
|
+
|
24
|
+
test_database.create_table :source__Person do
|
25
|
+
Integer :pkPersID
|
26
|
+
String :Vorname, size: 255
|
27
|
+
String :Nachname, size: 255
|
28
|
+
Integer :fkFirma
|
29
|
+
Integer :fkAdresse
|
30
|
+
Integer :fkTyp
|
31
|
+
end
|
32
|
+
|
33
|
+
test_database.create_table :source__Veranstaltung do
|
34
|
+
Integer :pkVeranstaltungId
|
35
|
+
Integer :fkOrganisation
|
36
|
+
end
|
37
|
+
|
38
|
+
test_database.create_table :source__Veranstaltungsbesuch do
|
39
|
+
Integer :fkVeranstaltung
|
40
|
+
Integer :fkBesucher
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def drop_source_tables
|
45
|
+
test_database.drop_schema :source, cascade: true
|
46
|
+
end
|
47
|
+
|
48
|
+
def create_stage_tables
|
49
|
+
test_database.create_schema :stage
|
50
|
+
|
51
|
+
test_database.create_table :stage__import_runs do
|
52
|
+
primary_key :id
|
53
|
+
DateTime :started_at
|
54
|
+
DateTime :finished_at
|
55
|
+
String :state, size: 255
|
56
|
+
end
|
57
|
+
|
58
|
+
test_database.create_table :stage__organisations do
|
59
|
+
Integer :id
|
60
|
+
String :external_id, size: 255
|
61
|
+
foreign_key :import_run_id, :stage__import_runs
|
62
|
+
index [:external_id, :import_run_id]
|
63
|
+
String :transition, size: 255
|
64
|
+
|
65
|
+
String :name, size: 255
|
66
|
+
end
|
67
|
+
|
68
|
+
test_database.create_table :stage__departments do
|
69
|
+
Integer :id
|
70
|
+
String :external_id, size: 255
|
71
|
+
foreign_key :import_run_id, :stage__import_runs
|
72
|
+
index [:external_id, :import_run_id]
|
73
|
+
String :transition, size: 255
|
74
|
+
|
75
|
+
String :name, size: 255
|
76
|
+
|
77
|
+
String :external_organisation_id, size: 255
|
78
|
+
Integer :organisation_id
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
test_database.create_table :stage__attendees do
|
83
|
+
Integer :id
|
84
|
+
String :external_id, size: 255
|
85
|
+
foreign_key :import_run_id, :stage__import_runs
|
86
|
+
index [:external_id, :import_run_id]
|
87
|
+
String :transition, size: 255
|
88
|
+
|
89
|
+
String :first_name, size: 255
|
90
|
+
String :last_name, size: 255
|
91
|
+
end
|
92
|
+
|
93
|
+
test_database.create_table :stage__events do
|
94
|
+
Integer :id
|
95
|
+
String :external_id, size: 255
|
96
|
+
foreign_key :import_run_id, :stage__import_runs
|
97
|
+
index [:external_id, :import_run_id]
|
98
|
+
String :transition, size: 255
|
99
|
+
|
100
|
+
String :name, size: 255
|
101
|
+
DateTime :starts_at
|
102
|
+
DateTime :ends_at
|
103
|
+
|
104
|
+
String :external_organisations_id, size: 255
|
105
|
+
Integer :organisation_id
|
106
|
+
end
|
107
|
+
|
108
|
+
test_database.create_table :stage__attendees_events do
|
109
|
+
Integer :id
|
110
|
+
String :external_id, size: 255
|
111
|
+
foreign_key :import_run_id, :stage__import_runs
|
112
|
+
index [:external_id, :import_run_id]
|
113
|
+
String :transition, size: 255
|
114
|
+
|
115
|
+
String :external_attendee_id, size: 255
|
116
|
+
Integer :attendee_id
|
117
|
+
|
118
|
+
String :external_event_id, size: 255
|
119
|
+
Integer :event_id
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def drop_stage_tables
|
124
|
+
test_database.drop_schema :stage, cascade: true
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_target_tables
|
128
|
+
test_database.create_table :organisations do
|
129
|
+
primary_key :id
|
130
|
+
String :external_id, size: 255
|
131
|
+
String :external_source, size: 255
|
132
|
+
String :name, size: 255
|
133
|
+
DateTime :created_at
|
134
|
+
DateTime :updated_at
|
135
|
+
DateTime :deleted_at
|
136
|
+
end
|
137
|
+
|
138
|
+
test_database.create_table :departments do
|
139
|
+
primary_key :id
|
140
|
+
String :external_id, size: 255
|
141
|
+
String :external_source, size: 255
|
142
|
+
String :name, size: 255
|
143
|
+
foreign_key :organisation_id, :organisations
|
144
|
+
DateTime :created_at
|
145
|
+
DateTime :updated_at
|
146
|
+
DateTime :deleted_at
|
147
|
+
end
|
148
|
+
|
149
|
+
test_database.create_table :attendees do
|
150
|
+
primary_key :id
|
151
|
+
String :external_id, size: 255
|
152
|
+
String :external_source, size: 255
|
153
|
+
String :first_name, size: 255
|
154
|
+
String :last_name, size: 255
|
155
|
+
DateTime :created_at
|
156
|
+
DateTime :updated_at
|
157
|
+
DateTime :deleted_at
|
158
|
+
end
|
159
|
+
|
160
|
+
test_database.create_table :events do
|
161
|
+
primary_key :id
|
162
|
+
String :external_id, size: 255
|
163
|
+
String :external_source, size: 255
|
164
|
+
String :name, size: 255
|
165
|
+
DateTime :starts_at
|
166
|
+
DateTime :ends_at
|
167
|
+
foreign_key :organisation, :organisations
|
168
|
+
DateTime :created_at
|
169
|
+
DateTime :updated_at
|
170
|
+
DateTime :deleted_at
|
171
|
+
end
|
172
|
+
|
173
|
+
test_database.create_table :attendees_events do
|
174
|
+
foreign_key :attendee_id, :attendees, null: false
|
175
|
+
foreign_key :event_id, :events, null: false
|
176
|
+
primary_key [:attendee_id, :event_id]
|
177
|
+
index [:attendee_id, :event_id]
|
178
|
+
DateTime :created_at
|
179
|
+
DateTime :updated_at
|
180
|
+
DateTime :deleted_at
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def drop_target_tables
|
185
|
+
test_database.drop_table :attendees_events
|
186
|
+
test_database.drop_table :events
|
187
|
+
test_database.drop_table :attendees
|
188
|
+
test_database.drop_table :departments
|
189
|
+
test_database.drop_table :organisations
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|