beetle_etl 1.0.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.byebug_history +8 -0
- data/.travis.yml +6 -1
- data/README.md +31 -9
- data/beetle_etl.gemspec +1 -1
- data/lib/beetle_etl.rb +7 -49
- data/lib/beetle_etl/configuration.rb +39 -0
- data/lib/beetle_etl/dsl/dsl.rb +6 -2
- data/lib/beetle_etl/dsl/transformation.rb +2 -2
- data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
- data/lib/beetle_etl/import.rb +15 -11
- data/lib/beetle_etl/naming.rb +10 -20
- data/lib/beetle_etl/reporter.rb +3 -2
- data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
- data/lib/beetle_etl/steps/create_stage.rb +2 -2
- data/lib/beetle_etl/steps/load.rb +2 -2
- data/lib/beetle_etl/steps/map_relations.rb +2 -2
- data/lib/beetle_etl/steps/step.rb +23 -4
- data/lib/beetle_etl/steps/transform.rb +2 -2
- data/lib/beetle_etl/testing.rb +10 -5
- data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/beetle_etl_spec.rb +6 -38
- data/spec/configuration_spec.rb +66 -0
- data/spec/dsl/dsl_spec.rb +9 -3
- data/spec/dsl/transformation_loader_spec.rb +9 -8
- data/spec/dsl/transformation_spec.rb +9 -7
- data/spec/feature/feature_spec.rb +8 -8
- data/spec/reporter_spec.rb +5 -2
- data/spec/spec_helper.rb +4 -5
- data/spec/steps/assign_ids_spec.rb +7 -7
- data/spec/steps/create_stage_spec.rb +14 -12
- data/spec/steps/load_spec.rb +9 -7
- data/spec/steps/map_relations_spec.rb +14 -8
- data/spec/steps/step_spec.rb +5 -3
- data/spec/steps/table_diff_spec.rb +7 -6
- data/spec/steps/transform_spec.rb +8 -4
- data/spec/testing_spec.rb +1 -1
- metadata +9 -5
@@ -5,8 +5,8 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
class CreateStage < Step
|
7
7
|
|
8
|
-
def initialize(table_name, relations, column_names)
|
9
|
-
super(table_name)
|
8
|
+
def initialize(config, table_name, relations, column_names)
|
9
|
+
super(config, table_name)
|
10
10
|
@relations = relations
|
11
11
|
@column_names = column_names
|
12
12
|
end
|
@@ -2,10 +2,10 @@ module BeetleETL
|
|
2
2
|
|
3
3
|
class Step
|
4
4
|
|
5
|
-
include BeetleETL::Naming
|
6
5
|
attr_reader :table_name
|
7
6
|
|
8
|
-
def initialize(table_name)
|
7
|
+
def initialize(config, table_name)
|
8
|
+
@config = config
|
9
9
|
@table_name = table_name
|
10
10
|
end
|
11
11
|
|
@@ -22,11 +22,30 @@ module BeetleETL
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def external_source
|
25
|
-
|
25
|
+
@config.external_source
|
26
26
|
end
|
27
27
|
|
28
28
|
def database
|
29
|
-
|
29
|
+
@config.database
|
30
|
+
end
|
31
|
+
|
32
|
+
# naming
|
33
|
+
|
34
|
+
def stage_table_name
|
35
|
+
BeetleETL::Naming.stage_table_name(@config.external_source, @table_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
def stage_table_name_sql(table_name = nil)
|
39
|
+
table_name ||= @table_name
|
40
|
+
BeetleETL::Naming.stage_table_name_sql(@config.external_source, table_name)
|
41
|
+
end
|
42
|
+
|
43
|
+
def target_table_name
|
44
|
+
BeetleETL::Naming.target_table_name(@config.target_schema, @table_name)
|
45
|
+
end
|
46
|
+
|
47
|
+
def target_table_name_sql
|
48
|
+
BeetleETL::Naming.target_table_name_sql(@config.target_schema, @table_name)
|
30
49
|
end
|
31
50
|
|
32
51
|
end
|
data/lib/beetle_etl/testing.rb
CHANGED
@@ -6,9 +6,14 @@ module BeetleETL
|
|
6
6
|
TargetTableNotFoundError = Class.new(StandardError)
|
7
7
|
NoTransformationFoundError = Class.new(StandardError)
|
8
8
|
|
9
|
+
def self.configure
|
10
|
+
@@config = Configuration.new
|
11
|
+
yield(@@config)
|
12
|
+
end
|
13
|
+
|
9
14
|
def with_stage_tables_for(*table_names, &block)
|
10
15
|
table_names.each do |table_name|
|
11
|
-
unless
|
16
|
+
unless @@config.database.table_exists?(table_name)
|
12
17
|
raise TargetTableNotFoundError.new <<-MSG
|
13
18
|
Missing target table "#{table_name}".
|
14
19
|
In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
|
@@ -16,12 +21,12 @@ module BeetleETL
|
|
16
21
|
end
|
17
22
|
end
|
18
23
|
|
19
|
-
test_wrapper = TestWrapper.new(table_names)
|
24
|
+
test_wrapper = TestWrapper.new(@@config, table_names)
|
20
25
|
test_wrapper.run block
|
21
26
|
end
|
22
27
|
|
23
28
|
def run_transformation(table_name)
|
24
|
-
transformations = TransformationLoader.new.load
|
29
|
+
transformations = TransformationLoader.new(@@config).load
|
25
30
|
|
26
31
|
unless transformations.map(&:table_name).include?(table_name)
|
27
32
|
raise NoTransformationFoundError.new <<-MSG
|
@@ -30,13 +35,13 @@ module BeetleETL
|
|
30
35
|
end
|
31
36
|
|
32
37
|
transformation = transformations.find { |t| t.table_name == table_name }
|
33
|
-
transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
|
38
|
+
transform = Transform.new(@@config, transformation.table_name, transformation.dependencies, transformation.query)
|
34
39
|
transform.run
|
35
40
|
end
|
36
41
|
|
37
42
|
|
38
43
|
def stage_table_name(table_name)
|
39
|
-
BeetleETL::Naming.stage_table_name(table_name)
|
44
|
+
BeetleETL::Naming.stage_table_name(@@config.external_source, table_name)
|
40
45
|
end
|
41
46
|
|
42
47
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
module Testing
|
3
|
-
class TestWrapper < Struct.new(:table_names)
|
3
|
+
class TestWrapper < Struct.new(:config, :table_names)
|
4
4
|
|
5
5
|
def run(block)
|
6
6
|
begin
|
@@ -15,18 +15,18 @@ module BeetleETL
|
|
15
15
|
|
16
16
|
def create_stages
|
17
17
|
transformations.each do |t|
|
18
|
-
CreateStage.new(t.table_name, t.relations, t.column_names).run
|
18
|
+
CreateStage.new(config, t.table_name, t.relations, t.column_names).run
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
22
|
def drop_stages
|
23
23
|
transformations.each do |t|
|
24
|
-
DropStage.new(t.table_name).run
|
24
|
+
DropStage.new(config, t.table_name).run
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def transformations
|
29
|
-
@transformations ||= TransformationLoader.new.load.find_all do |transformation|
|
29
|
+
@transformations ||= TransformationLoader.new(config).load.find_all do |transformation|
|
30
30
|
table_names.include? transformation.table_name
|
31
31
|
end
|
32
32
|
end
|
data/lib/beetle_etl/version.rb
CHANGED
data/spec/beetle_etl_spec.rb
CHANGED
@@ -4,48 +4,16 @@ describe BeetleETL do
|
|
4
4
|
|
5
5
|
describe '#import' do
|
6
6
|
it 'runs the import with reporting' do
|
7
|
+
config = double(:config, disconnect_database: nil)
|
8
|
+
runner = double(:runner)
|
7
9
|
report = double(:report)
|
8
10
|
reporter = double(:reporter, log_summary: nil)
|
9
11
|
|
10
|
-
expect(BeetleETL::Import).to
|
11
|
-
expect(
|
12
|
-
expect(BeetleETL
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
describe '#config' do
|
17
|
-
it 'returns a configuration object' do
|
18
|
-
expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
describe '#configure' do
|
23
|
-
it 'allows the configuration to be changed' do
|
24
|
-
expect(BeetleETL.config.external_source).to be_nil
|
25
|
-
|
26
|
-
BeetleETL.configure { |config| config.external_source = 'foo' }
|
27
|
-
|
28
|
-
expect(BeetleETL.config.external_source).to eql('foo')
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '#database' do
|
33
|
-
let(:database) { double(:database) }
|
34
|
-
|
35
|
-
it 'returns the Sequel Database object stored in the config' do
|
36
|
-
BeetleETL.configure { |config| config.database = database }
|
37
|
-
|
38
|
-
expect(BeetleETL.database).to eql(database)
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'builds and caches a Sequel Database from config when no database is passed' do
|
42
|
-
database_config = double(:database_config)
|
43
|
-
BeetleETL.configure { |config| config.database_config = database_config }
|
44
|
-
|
45
|
-
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
12
|
+
expect(BeetleETL::Import).to receive(:new).with(config).and_return runner
|
13
|
+
expect(runner).to receive(:run).and_return report
|
14
|
+
expect(BeetleETL::Reporter).to receive(:new).with(config, report).and_return reporter
|
46
15
|
|
47
|
-
expect(BeetleETL.
|
48
|
-
expect(BeetleETL.database).to eql(database)
|
16
|
+
expect(BeetleETL.import(config)).to eql(report)
|
49
17
|
end
|
50
18
|
end
|
51
19
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe Configuration do
|
5
|
+
|
6
|
+
subject { Configuration.new }
|
7
|
+
|
8
|
+
describe "#database" do
|
9
|
+
let(:database) { double(:database) }
|
10
|
+
|
11
|
+
it "returns the object if present" do
|
12
|
+
subject.database = database
|
13
|
+
|
14
|
+
expect(subject.database).to eql(database)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "builds a Sequel Database from config when no database is passed" do
|
18
|
+
database_config = double(:database_config)
|
19
|
+
subject.database_config = database_config
|
20
|
+
|
21
|
+
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
22
|
+
|
23
|
+
expect(subject.database).to eql(database)
|
24
|
+
expect(subject.database).to eql(database)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "raises an error if no database or database_config is passed" do
|
28
|
+
expect { subject.database }
|
29
|
+
.to raise_error(BeetleETL::InvalidConfigurationError)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "#disconnect_database" do
|
34
|
+
let(:database) { double(:database) }
|
35
|
+
|
36
|
+
it "disconnects from database if database_config was passed" do
|
37
|
+
database_config = double(:database_config)
|
38
|
+
|
39
|
+
expect(Sequel).to receive(:connect).with(database_config) { database }
|
40
|
+
expect(database).to receive(:disconnect)
|
41
|
+
|
42
|
+
subject.database_config = database_config
|
43
|
+
subject.disconnect_database
|
44
|
+
end
|
45
|
+
|
46
|
+
it "does not disconnect from database if database object was passed" do
|
47
|
+
expect(database).not_to receive(:disconnect)
|
48
|
+
|
49
|
+
subject.database = database
|
50
|
+
subject.disconnect_database
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "#target_schema" do
|
55
|
+
it "returns nil if target_schema is 'public'" do
|
56
|
+
subject.target_schema = "public"
|
57
|
+
expect(subject.target_schema).to be_nil
|
58
|
+
end
|
59
|
+
|
60
|
+
it "returns target_schema if target_schema is not 'public'" do
|
61
|
+
subject.target_schema = "foo"
|
62
|
+
expect(subject.target_schema).to eql("foo")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/spec/dsl/dsl_spec.rb
CHANGED
@@ -3,18 +3,24 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe DSL do
|
5
5
|
|
6
|
-
|
6
|
+
let(:config) do
|
7
|
+
Configuration.new.tap do |c|
|
8
|
+
c.external_source = "bar"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
subject { DSL.new(config, :foo_table) }
|
7
13
|
|
8
14
|
describe '#stage_table' do
|
9
15
|
it 'returns the current stage table name' do
|
10
16
|
expect(subject.stage_table).to eql(
|
11
|
-
BeetleETL::Naming.stage_table_name_sql(:foo_table)
|
17
|
+
BeetleETL::Naming.stage_table_name_sql("bar", :foo_table)
|
12
18
|
)
|
13
19
|
end
|
14
20
|
|
15
21
|
it 'returns the stage table name for the given table' do
|
16
22
|
expect(subject.stage_table(:bar_table)).to eql(
|
17
|
-
BeetleETL::Naming.stage_table_name_sql(:bar_table)
|
23
|
+
BeetleETL::Naming.stage_table_name_sql("bar", :bar_table)
|
18
24
|
)
|
19
25
|
end
|
20
26
|
end
|
@@ -3,9 +3,7 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe TransformationLoader do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
before do
|
6
|
+
let(:config) do
|
9
7
|
data_file = tempfile_with_contents <<-FILE
|
10
8
|
import :foo do
|
11
9
|
'foo'
|
@@ -20,15 +18,18 @@ module BeetleETL
|
|
20
18
|
end
|
21
19
|
FILE
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
Configuration.new.tap do |c|
|
22
|
+
c.transformation_file = data_file.path
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
26
|
+
subject { TransformationLoader.new(config) }
|
27
|
+
|
28
28
|
describe '#load' do
|
29
29
|
it 'loads transformations from the data file' do
|
30
|
-
expect(Transformation).to receive(:new) do |table_name,
|
31
|
-
expect(
|
30
|
+
expect(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
|
31
|
+
expect(configuration).to eql(config)
|
32
|
+
expect(table_name.to_s).to eql(setup.call)
|
32
33
|
expect(helpers.call).to eql("baz")
|
33
34
|
end.exactly(2).times
|
34
35
|
|
@@ -36,7 +37,7 @@ module BeetleETL
|
|
36
37
|
end
|
37
38
|
|
38
39
|
it 'returns the list of transformations' do
|
39
|
-
allow(Transformation).to receive(:new) do |table_name,
|
40
|
+
allow(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
|
40
41
|
table_name
|
41
42
|
end
|
42
43
|
|
@@ -3,9 +3,11 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe Transformation do
|
5
5
|
|
6
|
+
let(:config) { Configuration.new }
|
7
|
+
|
6
8
|
describe '#table_name' do
|
7
9
|
it 'returns the given table name' do
|
8
|
-
transformation = Transformation.new(:table, Proc.new {})
|
10
|
+
transformation = Transformation.new(config, :table, Proc.new {})
|
9
11
|
expect(transformation.table_name).to eql(:table)
|
10
12
|
end
|
11
13
|
end
|
@@ -15,7 +17,7 @@ module BeetleETL
|
|
15
17
|
setup = Proc.new do
|
16
18
|
columns :payload_1, 'payload_2'
|
17
19
|
end
|
18
|
-
transformation = Transformation.new(:table, setup)
|
20
|
+
transformation = Transformation.new(config, :table, setup)
|
19
21
|
|
20
22
|
expect(transformation.column_names).to match_array([
|
21
23
|
:payload_1, :payload_2
|
@@ -23,7 +25,7 @@ module BeetleETL
|
|
23
25
|
end
|
24
26
|
|
25
27
|
it 'defaults to an empty array if no columns are defined' do
|
26
|
-
transformation = Transformation.new(:table, Proc.new {})
|
28
|
+
transformation = Transformation.new(config, :table, Proc.new {})
|
27
29
|
|
28
30
|
expect(transformation.column_names).to match_array([])
|
29
31
|
end
|
@@ -35,7 +37,7 @@ module BeetleETL
|
|
35
37
|
references :foreign_table, on: :foreign_table_id
|
36
38
|
references :another_foreign_table, on: :another_foreign_table_id
|
37
39
|
end
|
38
|
-
transformation = Transformation.new(:table, setup)
|
40
|
+
transformation = Transformation.new(config, :table, setup)
|
39
41
|
|
40
42
|
expect(transformation.relations).to eql({
|
41
43
|
foreign_table_id: :foreign_table,
|
@@ -50,7 +52,7 @@ module BeetleETL
|
|
50
52
|
references :foreign_table, on: :foreign_table_id
|
51
53
|
references :another_foreign_table, on: :another_foreign_table_id
|
52
54
|
end
|
53
|
-
transformation = Transformation.new(:table, setup)
|
55
|
+
transformation = Transformation.new(config, :table, setup)
|
54
56
|
|
55
57
|
expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
|
56
58
|
end
|
@@ -66,7 +68,7 @@ module BeetleETL
|
|
66
68
|
query "SELECT '#{foo}' FROM some_table"
|
67
69
|
end
|
68
70
|
|
69
|
-
transformation = Transformation.new(:table, setup, helpers)
|
71
|
+
transformation = Transformation.new(config, :table, setup, helpers)
|
70
72
|
|
71
73
|
expect(transformation.query).to eql(
|
72
74
|
"SELECT 'foo_string' FROM some_table"
|
@@ -78,7 +80,7 @@ module BeetleETL
|
|
78
80
|
query "SOME QUERY"
|
79
81
|
query "ANOTHER QUERY"
|
80
82
|
end
|
81
|
-
transformation = Transformation.new(:table, setup)
|
83
|
+
transformation = Transformation.new(config, :table, setup)
|
82
84
|
|
83
85
|
expect(transformation.query).to eql(
|
84
86
|
"SOME QUERY;ANOTHER QUERY"
|
@@ -23,11 +23,11 @@ describe BeetleETL do
|
|
23
23
|
database_config_path = File.expand_path('../support/database.yml', File.dirname(__FILE__))
|
24
24
|
database_config = YAML.load(File.read(database_config_path))
|
25
25
|
|
26
|
-
BeetleETL.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
@config = BeetleETL::Configuration.new.tap do |c|
|
27
|
+
c.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
|
28
|
+
c.database_config = database_config
|
29
|
+
c.external_source = 'source_name'
|
30
|
+
c.logger = Logger.new(Tempfile.new("log"))
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -54,7 +54,7 @@ describe BeetleETL do
|
|
54
54
|
)
|
55
55
|
|
56
56
|
Timecop.freeze(time1) do
|
57
|
-
BeetleETL.import
|
57
|
+
BeetleETL.import(@config)
|
58
58
|
end
|
59
59
|
|
60
60
|
expect(:organisations).to have_values(
|
@@ -86,7 +86,7 @@ describe BeetleETL do
|
|
86
86
|
)
|
87
87
|
|
88
88
|
Timecop.freeze(time2) do
|
89
|
-
BeetleETL.import
|
89
|
+
BeetleETL.import(@config)
|
90
90
|
end
|
91
91
|
|
92
92
|
expect(:organisations).to have_values(
|
@@ -118,7 +118,7 @@ describe BeetleETL do
|
|
118
118
|
)
|
119
119
|
|
120
120
|
Timecop.freeze(time3) do
|
121
|
-
BeetleETL.import
|
121
|
+
BeetleETL.import(@config)
|
122
122
|
end
|
123
123
|
|
124
124
|
expect(:organisations).to have_values(
|