beetle_etl 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.byebug_history +8 -0
- data/.travis.yml +6 -1
- data/README.md +31 -9
- data/beetle_etl.gemspec +1 -1
- data/lib/beetle_etl.rb +7 -49
- data/lib/beetle_etl/configuration.rb +39 -0
- data/lib/beetle_etl/dsl/dsl.rb +6 -2
- data/lib/beetle_etl/dsl/transformation.rb +2 -2
- data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
- data/lib/beetle_etl/import.rb +15 -11
- data/lib/beetle_etl/naming.rb +10 -20
- data/lib/beetle_etl/reporter.rb +3 -2
- data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
- data/lib/beetle_etl/steps/create_stage.rb +2 -2
- data/lib/beetle_etl/steps/load.rb +2 -2
- data/lib/beetle_etl/steps/map_relations.rb +2 -2
- data/lib/beetle_etl/steps/step.rb +23 -4
- data/lib/beetle_etl/steps/transform.rb +2 -2
- data/lib/beetle_etl/testing.rb +10 -5
- data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/beetle_etl_spec.rb +6 -38
- data/spec/configuration_spec.rb +66 -0
- data/spec/dsl/dsl_spec.rb +9 -3
- data/spec/dsl/transformation_loader_spec.rb +9 -8
- data/spec/dsl/transformation_spec.rb +9 -7
- data/spec/feature/feature_spec.rb +8 -8
- data/spec/reporter_spec.rb +5 -2
- data/spec/spec_helper.rb +4 -5
- data/spec/steps/assign_ids_spec.rb +7 -7
- data/spec/steps/create_stage_spec.rb +14 -12
- data/spec/steps/load_spec.rb +9 -7
- data/spec/steps/map_relations_spec.rb +14 -8
- data/spec/steps/step_spec.rb +5 -3
- data/spec/steps/table_diff_spec.rb +7 -6
- data/spec/steps/transform_spec.rb +8 -4
- data/spec/testing_spec.rb +1 -1
- metadata +9 -5
@@ -5,8 +5,8 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
class CreateStage < Step
|
7
7
|
|
8
|
-
def initialize(table_name, relations, column_names)
|
9
|
-
super(table_name)
|
8
|
+
def initialize(config, table_name, relations, column_names)
|
9
|
+
super(config, table_name)
|
10
10
|
@relations = relations
|
11
11
|
@column_names = column_names
|
12
12
|
end
|
@@ -2,10 +2,10 @@ module BeetleETL
|
|
2
2
|
|
3
3
|
class Step
|
4
4
|
|
5
|
-
include BeetleETL::Naming
|
6
5
|
attr_reader :table_name
|
7
6
|
|
8
|
-
def initialize(table_name)
|
7
|
+
def initialize(config, table_name)
|
8
|
+
@config = config
|
9
9
|
@table_name = table_name
|
10
10
|
end
|
11
11
|
|
@@ -22,11 +22,30 @@ module BeetleETL
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def external_source
|
25
|
-
|
25
|
+
@config.external_source
|
26
26
|
end
|
27
27
|
|
28
28
|
def database
|
29
|
-
|
29
|
+
@config.database
|
30
|
+
end
|
31
|
+
|
32
|
+
# naming
|
33
|
+
|
34
|
+
def stage_table_name
|
35
|
+
BeetleETL::Naming.stage_table_name(@config.external_source, @table_name)
|
36
|
+
end
|
37
|
+
|
38
|
+
def stage_table_name_sql(table_name = nil)
|
39
|
+
table_name ||= @table_name
|
40
|
+
BeetleETL::Naming.stage_table_name_sql(@config.external_source, table_name)
|
41
|
+
end
|
42
|
+
|
43
|
+
def target_table_name
|
44
|
+
BeetleETL::Naming.target_table_name(@config.target_schema, @table_name)
|
45
|
+
end
|
46
|
+
|
47
|
+
def target_table_name_sql
|
48
|
+
BeetleETL::Naming.target_table_name_sql(@config.target_schema, @table_name)
|
30
49
|
end
|
31
50
|
|
32
51
|
end
|
data/lib/beetle_etl/testing.rb
CHANGED
@@ -6,9 +6,14 @@ module BeetleETL
|
|
6
6
|
TargetTableNotFoundError = Class.new(StandardError)
|
7
7
|
NoTransformationFoundError = Class.new(StandardError)
|
8
8
|
|
9
|
+
def self.configure
|
10
|
+
@@config = Configuration.new
|
11
|
+
yield(@@config)
|
12
|
+
end
|
13
|
+
|
9
14
|
def with_stage_tables_for(*table_names, &block)
|
10
15
|
table_names.each do |table_name|
|
11
|
-
unless
|
16
|
+
unless @@config.database.table_exists?(table_name)
|
12
17
|
raise TargetTableNotFoundError.new <<-MSG
|
13
18
|
Missing target table "#{table_name}".
|
14
19
|
In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
|
@@ -16,12 +21,12 @@ module BeetleETL
|
|
16
21
|
end
|
17
22
|
end
|
18
23
|
|
19
|
-
test_wrapper = TestWrapper.new(table_names)
|
24
|
+
test_wrapper = TestWrapper.new(@@config, table_names)
|
20
25
|
test_wrapper.run block
|
21
26
|
end
|
22
27
|
|
23
28
|
def run_transformation(table_name)
|
24
|
-
transformations = TransformationLoader.new.load
|
29
|
+
transformations = TransformationLoader.new(@@config).load
|
25
30
|
|
26
31
|
unless transformations.map(&:table_name).include?(table_name)
|
27
32
|
raise NoTransformationFoundError.new <<-MSG
|
@@ -30,13 +35,13 @@ module BeetleETL
|
|
30
35
|
end
|
31
36
|
|
32
37
|
transformation = transformations.find { |t| t.table_name == table_name }
|
33
|
-
transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
|
38
|
+
transform = Transform.new(@@config, transformation.table_name, transformation.dependencies, transformation.query)
|
34
39
|
transform.run
|
35
40
|
end
|
36
41
|
|
37
42
|
|
38
43
|
def stage_table_name(table_name)
|
39
|
-
BeetleETL::Naming.stage_table_name(table_name)
|
44
|
+
BeetleETL::Naming.stage_table_name(@@config.external_source, table_name)
|
40
45
|
end
|
41
46
|
|
42
47
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
module Testing
|
3
|
-
class TestWrapper < Struct.new(:table_names)
|
3
|
+
class TestWrapper < Struct.new(:config, :table_names)
|
4
4
|
|
5
5
|
def run(block)
|
6
6
|
begin
|
@@ -15,18 +15,18 @@ module BeetleETL
|
|
15
15
|
|
16
16
|
def create_stages
|
17
17
|
transformations.each do |t|
|
18
|
-
CreateStage.new(t.table_name, t.relations, t.column_names).run
|
18
|
+
CreateStage.new(config, t.table_name, t.relations, t.column_names).run
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
22
22
|
def drop_stages
|
23
23
|
transformations.each do |t|
|
24
|
-
DropStage.new(t.table_name).run
|
24
|
+
DropStage.new(config, t.table_name).run
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
28
|
def transformations
|
29
|
-
@transformations ||= TransformationLoader.new.load.find_all do |transformation|
|
29
|
+
@transformations ||= TransformationLoader.new(config).load.find_all do |transformation|
|
30
30
|
table_names.include? transformation.table_name
|
31
31
|
end
|
32
32
|
end
|
data/lib/beetle_etl/version.rb
CHANGED
data/spec/beetle_etl_spec.rb
CHANGED
@@ -4,48 +4,16 @@ describe BeetleETL do
|
|
4
4
|
|
5
5
|
describe '#import' do
|
6
6
|
it 'runs the import with reporting' do
|
7
|
+
config = double(:config, disconnect_database: nil)
|
8
|
+
runner = double(:runner)
|
7
9
|
report = double(:report)
|
8
10
|
reporter = double(:reporter, log_summary: nil)
|
9
11
|
|
10
|
-
expect(BeetleETL::Import).to
|
11
|
-
expect(
|
12
|
-
expect(BeetleETL
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
describe '#config' do
|
17
|
-
it 'returns a configuration object' do
|
18
|
-
expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
describe '#configure' do
|
23
|
-
it 'allows the configuration to be changed' do
|
24
|
-
expect(BeetleETL.config.external_source).to be_nil
|
25
|
-
|
26
|
-
BeetleETL.configure { |config| config.external_source = 'foo' }
|
27
|
-
|
28
|
-
expect(BeetleETL.config.external_source).to eql('foo')
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
describe '#database' do
|
33
|
-
let(:database) { double(:database) }
|
34
|
-
|
35
|
-
it 'returns the Sequel Database object stored in the config' do
|
36
|
-
BeetleETL.configure { |config| config.database = database }
|
37
|
-
|
38
|
-
expect(BeetleETL.database).to eql(database)
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'builds and caches a Sequel Database from config when no database is passed' do
|
42
|
-
database_config = double(:database_config)
|
43
|
-
BeetleETL.configure { |config| config.database_config = database_config }
|
44
|
-
|
45
|
-
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
12
|
+
expect(BeetleETL::Import).to receive(:new).with(config).and_return runner
|
13
|
+
expect(runner).to receive(:run).and_return report
|
14
|
+
expect(BeetleETL::Reporter).to receive(:new).with(config, report).and_return reporter
|
46
15
|
|
47
|
-
expect(BeetleETL.
|
48
|
-
expect(BeetleETL.database).to eql(database)
|
16
|
+
expect(BeetleETL.import(config)).to eql(report)
|
49
17
|
end
|
50
18
|
end
|
51
19
|
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
describe Configuration do
|
5
|
+
|
6
|
+
subject { Configuration.new }
|
7
|
+
|
8
|
+
describe "#database" do
|
9
|
+
let(:database) { double(:database) }
|
10
|
+
|
11
|
+
it "returns the object if present" do
|
12
|
+
subject.database = database
|
13
|
+
|
14
|
+
expect(subject.database).to eql(database)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "builds a Sequel Database from config when no database is passed" do
|
18
|
+
database_config = double(:database_config)
|
19
|
+
subject.database_config = database_config
|
20
|
+
|
21
|
+
expect(Sequel).to receive(:connect).with(database_config).once { database }
|
22
|
+
|
23
|
+
expect(subject.database).to eql(database)
|
24
|
+
expect(subject.database).to eql(database)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "raises an error if no database or database_config is passed" do
|
28
|
+
expect { subject.database }
|
29
|
+
.to raise_error(BeetleETL::InvalidConfigurationError)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "#disconnect_database" do
|
34
|
+
let(:database) { double(:database) }
|
35
|
+
|
36
|
+
it "disconnects from database if database_config was passed" do
|
37
|
+
database_config = double(:database_config)
|
38
|
+
|
39
|
+
expect(Sequel).to receive(:connect).with(database_config) { database }
|
40
|
+
expect(database).to receive(:disconnect)
|
41
|
+
|
42
|
+
subject.database_config = database_config
|
43
|
+
subject.disconnect_database
|
44
|
+
end
|
45
|
+
|
46
|
+
it "does not disconnect from database if database object was passed" do
|
47
|
+
expect(database).not_to receive(:disconnect)
|
48
|
+
|
49
|
+
subject.database = database
|
50
|
+
subject.disconnect_database
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "#target_schema" do
|
55
|
+
it "returns nil if target_schema is 'public'" do
|
56
|
+
subject.target_schema = "public"
|
57
|
+
expect(subject.target_schema).to be_nil
|
58
|
+
end
|
59
|
+
|
60
|
+
it "returns target_schema if target_schema is not 'public'" do
|
61
|
+
subject.target_schema = "foo"
|
62
|
+
expect(subject.target_schema).to eql("foo")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/spec/dsl/dsl_spec.rb
CHANGED
@@ -3,18 +3,24 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe DSL do
|
5
5
|
|
6
|
-
|
6
|
+
let(:config) do
|
7
|
+
Configuration.new.tap do |c|
|
8
|
+
c.external_source = "bar"
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
subject { DSL.new(config, :foo_table) }
|
7
13
|
|
8
14
|
describe '#stage_table' do
|
9
15
|
it 'returns the current stage table name' do
|
10
16
|
expect(subject.stage_table).to eql(
|
11
|
-
BeetleETL::Naming.stage_table_name_sql(:foo_table)
|
17
|
+
BeetleETL::Naming.stage_table_name_sql("bar", :foo_table)
|
12
18
|
)
|
13
19
|
end
|
14
20
|
|
15
21
|
it 'returns the stage table name for the given table' do
|
16
22
|
expect(subject.stage_table(:bar_table)).to eql(
|
17
|
-
BeetleETL::Naming.stage_table_name_sql(:bar_table)
|
23
|
+
BeetleETL::Naming.stage_table_name_sql("bar", :bar_table)
|
18
24
|
)
|
19
25
|
end
|
20
26
|
end
|
@@ -3,9 +3,7 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe TransformationLoader do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
before do
|
6
|
+
let(:config) do
|
9
7
|
data_file = tempfile_with_contents <<-FILE
|
10
8
|
import :foo do
|
11
9
|
'foo'
|
@@ -20,15 +18,18 @@ module BeetleETL
|
|
20
18
|
end
|
21
19
|
FILE
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
Configuration.new.tap do |c|
|
22
|
+
c.transformation_file = data_file.path
|
25
23
|
end
|
26
24
|
end
|
27
25
|
|
26
|
+
subject { TransformationLoader.new(config) }
|
27
|
+
|
28
28
|
describe '#load' do
|
29
29
|
it 'loads transformations from the data file' do
|
30
|
-
expect(Transformation).to receive(:new) do |table_name,
|
31
|
-
expect(
|
30
|
+
expect(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
|
31
|
+
expect(configuration).to eql(config)
|
32
|
+
expect(table_name.to_s).to eql(setup.call)
|
32
33
|
expect(helpers.call).to eql("baz")
|
33
34
|
end.exactly(2).times
|
34
35
|
|
@@ -36,7 +37,7 @@ module BeetleETL
|
|
36
37
|
end
|
37
38
|
|
38
39
|
it 'returns the list of transformations' do
|
39
|
-
allow(Transformation).to receive(:new) do |table_name,
|
40
|
+
allow(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
|
40
41
|
table_name
|
41
42
|
end
|
42
43
|
|
@@ -3,9 +3,11 @@ require 'spec_helper'
|
|
3
3
|
module BeetleETL
|
4
4
|
describe Transformation do
|
5
5
|
|
6
|
+
let(:config) { Configuration.new }
|
7
|
+
|
6
8
|
describe '#table_name' do
|
7
9
|
it 'returns the given table name' do
|
8
|
-
transformation = Transformation.new(:table, Proc.new {})
|
10
|
+
transformation = Transformation.new(config, :table, Proc.new {})
|
9
11
|
expect(transformation.table_name).to eql(:table)
|
10
12
|
end
|
11
13
|
end
|
@@ -15,7 +17,7 @@ module BeetleETL
|
|
15
17
|
setup = Proc.new do
|
16
18
|
columns :payload_1, 'payload_2'
|
17
19
|
end
|
18
|
-
transformation = Transformation.new(:table, setup)
|
20
|
+
transformation = Transformation.new(config, :table, setup)
|
19
21
|
|
20
22
|
expect(transformation.column_names).to match_array([
|
21
23
|
:payload_1, :payload_2
|
@@ -23,7 +25,7 @@ module BeetleETL
|
|
23
25
|
end
|
24
26
|
|
25
27
|
it 'defaults to an empty array if no columns are defined' do
|
26
|
-
transformation = Transformation.new(:table, Proc.new {})
|
28
|
+
transformation = Transformation.new(config, :table, Proc.new {})
|
27
29
|
|
28
30
|
expect(transformation.column_names).to match_array([])
|
29
31
|
end
|
@@ -35,7 +37,7 @@ module BeetleETL
|
|
35
37
|
references :foreign_table, on: :foreign_table_id
|
36
38
|
references :another_foreign_table, on: :another_foreign_table_id
|
37
39
|
end
|
38
|
-
transformation = Transformation.new(:table, setup)
|
40
|
+
transformation = Transformation.new(config, :table, setup)
|
39
41
|
|
40
42
|
expect(transformation.relations).to eql({
|
41
43
|
foreign_table_id: :foreign_table,
|
@@ -50,7 +52,7 @@ module BeetleETL
|
|
50
52
|
references :foreign_table, on: :foreign_table_id
|
51
53
|
references :another_foreign_table, on: :another_foreign_table_id
|
52
54
|
end
|
53
|
-
transformation = Transformation.new(:table, setup)
|
55
|
+
transformation = Transformation.new(config, :table, setup)
|
54
56
|
|
55
57
|
expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
|
56
58
|
end
|
@@ -66,7 +68,7 @@ module BeetleETL
|
|
66
68
|
query "SELECT '#{foo}' FROM some_table"
|
67
69
|
end
|
68
70
|
|
69
|
-
transformation = Transformation.new(:table, setup, helpers)
|
71
|
+
transformation = Transformation.new(config, :table, setup, helpers)
|
70
72
|
|
71
73
|
expect(transformation.query).to eql(
|
72
74
|
"SELECT 'foo_string' FROM some_table"
|
@@ -78,7 +80,7 @@ module BeetleETL
|
|
78
80
|
query "SOME QUERY"
|
79
81
|
query "ANOTHER QUERY"
|
80
82
|
end
|
81
|
-
transformation = Transformation.new(:table, setup)
|
83
|
+
transformation = Transformation.new(config, :table, setup)
|
82
84
|
|
83
85
|
expect(transformation.query).to eql(
|
84
86
|
"SOME QUERY;ANOTHER QUERY"
|
@@ -23,11 +23,11 @@ describe BeetleETL do
|
|
23
23
|
database_config_path = File.expand_path('../support/database.yml', File.dirname(__FILE__))
|
24
24
|
database_config = YAML.load(File.read(database_config_path))
|
25
25
|
|
26
|
-
BeetleETL.
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
@config = BeetleETL::Configuration.new.tap do |c|
|
27
|
+
c.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
|
28
|
+
c.database_config = database_config
|
29
|
+
c.external_source = 'source_name'
|
30
|
+
c.logger = Logger.new(Tempfile.new("log"))
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
@@ -54,7 +54,7 @@ describe BeetleETL do
|
|
54
54
|
)
|
55
55
|
|
56
56
|
Timecop.freeze(time1) do
|
57
|
-
BeetleETL.import
|
57
|
+
BeetleETL.import(@config)
|
58
58
|
end
|
59
59
|
|
60
60
|
expect(:organisations).to have_values(
|
@@ -86,7 +86,7 @@ describe BeetleETL do
|
|
86
86
|
)
|
87
87
|
|
88
88
|
Timecop.freeze(time2) do
|
89
|
-
BeetleETL.import
|
89
|
+
BeetleETL.import(@config)
|
90
90
|
end
|
91
91
|
|
92
92
|
expect(:organisations).to have_values(
|
@@ -118,7 +118,7 @@ describe BeetleETL do
|
|
118
118
|
)
|
119
119
|
|
120
120
|
Timecop.freeze(time3) do
|
121
|
-
BeetleETL.import
|
121
|
+
BeetleETL.import(@config)
|
122
122
|
end
|
123
123
|
|
124
124
|
expect(:organisations).to have_values(
|