beetle_etl 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +8 -0
  3. data/.travis.yml +6 -1
  4. data/README.md +31 -9
  5. data/beetle_etl.gemspec +1 -1
  6. data/lib/beetle_etl.rb +7 -49
  7. data/lib/beetle_etl/configuration.rb +39 -0
  8. data/lib/beetle_etl/dsl/dsl.rb +6 -2
  9. data/lib/beetle_etl/dsl/transformation.rb +2 -2
  10. data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
  11. data/lib/beetle_etl/import.rb +15 -11
  12. data/lib/beetle_etl/naming.rb +10 -20
  13. data/lib/beetle_etl/reporter.rb +3 -2
  14. data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
  15. data/lib/beetle_etl/steps/create_stage.rb +2 -2
  16. data/lib/beetle_etl/steps/load.rb +2 -2
  17. data/lib/beetle_etl/steps/map_relations.rb +2 -2
  18. data/lib/beetle_etl/steps/step.rb +23 -4
  19. data/lib/beetle_etl/steps/transform.rb +2 -2
  20. data/lib/beetle_etl/testing.rb +10 -5
  21. data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
  22. data/lib/beetle_etl/version.rb +1 -1
  23. data/spec/beetle_etl_spec.rb +6 -38
  24. data/spec/configuration_spec.rb +66 -0
  25. data/spec/dsl/dsl_spec.rb +9 -3
  26. data/spec/dsl/transformation_loader_spec.rb +9 -8
  27. data/spec/dsl/transformation_spec.rb +9 -7
  28. data/spec/feature/feature_spec.rb +8 -8
  29. data/spec/reporter_spec.rb +5 -2
  30. data/spec/spec_helper.rb +4 -5
  31. data/spec/steps/assign_ids_spec.rb +7 -7
  32. data/spec/steps/create_stage_spec.rb +14 -12
  33. data/spec/steps/load_spec.rb +9 -7
  34. data/spec/steps/map_relations_spec.rb +14 -8
  35. data/spec/steps/step_spec.rb +5 -3
  36. data/spec/steps/table_diff_spec.rb +7 -6
  37. data/spec/steps/transform_spec.rb +8 -4
  38. data/spec/testing_spec.rb +1 -1
  39. metadata +9 -5
@@ -5,8 +5,8 @@ module BeetleETL
5
5
 
6
6
  class CreateStage < Step
7
7
 
8
- def initialize(table_name, relations, column_names)
9
- super(table_name)
8
+ def initialize(config, table_name, relations, column_names)
9
+ super(config, table_name)
10
10
  @relations = relations
11
11
  @column_names = column_names
12
12
  end
@@ -6,8 +6,8 @@ module BeetleETL
6
6
  transition
7
7
  ]
8
8
 
9
- def initialize(table_name, relations)
10
- super(table_name)
9
+ def initialize(config, table_name, relations)
10
+ super(config, table_name)
11
11
  @relations = relations
12
12
  end
13
13
 
@@ -1,8 +1,8 @@
1
1
  module BeetleETL
2
2
  class MapRelations < Step
3
3
 
4
- def initialize(table_name, relations)
5
- super(table_name)
4
+ def initialize(config, table_name, relations)
5
+ super(config, table_name)
6
6
  @relations = relations
7
7
  end
8
8
 
@@ -2,10 +2,10 @@ module BeetleETL
2
2
 
3
3
  class Step
4
4
 
5
- include BeetleETL::Naming
6
5
  attr_reader :table_name
7
6
 
8
- def initialize(table_name)
7
+ def initialize(config, table_name)
8
+ @config = config
9
9
  @table_name = table_name
10
10
  end
11
11
 
@@ -22,11 +22,30 @@ module BeetleETL
22
22
  end
23
23
 
24
24
  def external_source
25
- BeetleETL.config.external_source
25
+ @config.external_source
26
26
  end
27
27
 
28
28
  def database
29
- BeetleETL.database
29
+ @config.database
30
+ end
31
+
32
+ # naming
33
+
34
+ def stage_table_name
35
+ BeetleETL::Naming.stage_table_name(@config.external_source, @table_name)
36
+ end
37
+
38
+ def stage_table_name_sql(table_name = nil)
39
+ table_name ||= @table_name
40
+ BeetleETL::Naming.stage_table_name_sql(@config.external_source, table_name)
41
+ end
42
+
43
+ def target_table_name
44
+ BeetleETL::Naming.target_table_name(@config.target_schema, @table_name)
45
+ end
46
+
47
+ def target_table_name_sql
48
+ BeetleETL::Naming.target_table_name_sql(@config.target_schema, @table_name)
30
49
  end
31
50
 
32
51
  end
@@ -1,8 +1,8 @@
1
1
  module BeetleETL
2
2
  class Transform < Step
3
3
 
4
- def initialize(table_name, dependencies, query)
5
- super(table_name)
4
+ def initialize(config, table_name, dependencies, query)
5
+ super(config, table_name)
6
6
  @dependencies = dependencies
7
7
  @query = query
8
8
  end
@@ -6,9 +6,14 @@ module BeetleETL
6
6
  TargetTableNotFoundError = Class.new(StandardError)
7
7
  NoTransformationFoundError = Class.new(StandardError)
8
8
 
9
+ def self.configure
10
+ @@config = Configuration.new
11
+ yield(@@config)
12
+ end
13
+
9
14
  def with_stage_tables_for(*table_names, &block)
10
15
  table_names.each do |table_name|
11
- unless BeetleETL.database.table_exists?(table_name)
16
+ unless @@config.database.table_exists?(table_name)
12
17
  raise TargetTableNotFoundError.new <<-MSG
13
18
  Missing target table "#{table_name}".
14
19
  In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
@@ -16,12 +21,12 @@ module BeetleETL
16
21
  end
17
22
  end
18
23
 
19
- test_wrapper = TestWrapper.new(table_names)
24
+ test_wrapper = TestWrapper.new(@@config, table_names)
20
25
  test_wrapper.run block
21
26
  end
22
27
 
23
28
  def run_transformation(table_name)
24
- transformations = TransformationLoader.new.load
29
+ transformations = TransformationLoader.new(@@config).load
25
30
 
26
31
  unless transformations.map(&:table_name).include?(table_name)
27
32
  raise NoTransformationFoundError.new <<-MSG
@@ -30,13 +35,13 @@ module BeetleETL
30
35
  end
31
36
 
32
37
  transformation = transformations.find { |t| t.table_name == table_name }
33
- transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
38
+ transform = Transform.new(@@config, transformation.table_name, transformation.dependencies, transformation.query)
34
39
  transform.run
35
40
  end
36
41
 
37
42
 
38
43
  def stage_table_name(table_name)
39
- BeetleETL::Naming.stage_table_name(table_name)
44
+ BeetleETL::Naming.stage_table_name(@@config.external_source, table_name)
40
45
  end
41
46
 
42
47
  end
@@ -1,6 +1,6 @@
1
1
  module BeetleETL
2
2
  module Testing
3
- class TestWrapper < Struct.new(:table_names)
3
+ class TestWrapper < Struct.new(:config, :table_names)
4
4
 
5
5
  def run(block)
6
6
  begin
@@ -15,18 +15,18 @@ module BeetleETL
15
15
 
16
16
  def create_stages
17
17
  transformations.each do |t|
18
- CreateStage.new(t.table_name, t.relations, t.column_names).run
18
+ CreateStage.new(config, t.table_name, t.relations, t.column_names).run
19
19
  end
20
20
  end
21
21
 
22
22
  def drop_stages
23
23
  transformations.each do |t|
24
- DropStage.new(t.table_name).run
24
+ DropStage.new(config, t.table_name).run
25
25
  end
26
26
  end
27
27
 
28
28
  def transformations
29
- @transformations ||= TransformationLoader.new.load.find_all do |transformation|
29
+ @transformations ||= TransformationLoader.new(config).load.find_all do |transformation|
30
30
  table_names.include? transformation.table_name
31
31
  end
32
32
  end
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "1.0.1"
2
+ VERSION = "2.0.0"
3
3
  end
@@ -4,48 +4,16 @@ describe BeetleETL do
4
4
 
5
5
  describe '#import' do
6
6
  it 'runs the import with reporting' do
7
+ config = double(:config, disconnect_database: nil)
8
+ runner = double(:runner)
7
9
  report = double(:report)
8
10
  reporter = double(:reporter, log_summary: nil)
9
11
 
10
- expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
11
- expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
12
- expect(BeetleETL.import).to eql(report)
13
- end
14
- end
15
-
16
- describe '#config' do
17
- it 'returns a configuration object' do
18
- expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
19
- end
20
- end
21
-
22
- describe '#configure' do
23
- it 'allows the configuration to be changed' do
24
- expect(BeetleETL.config.external_source).to be_nil
25
-
26
- BeetleETL.configure { |config| config.external_source = 'foo' }
27
-
28
- expect(BeetleETL.config.external_source).to eql('foo')
29
- end
30
- end
31
-
32
- describe '#database' do
33
- let(:database) { double(:database) }
34
-
35
- it 'returns the Sequel Database object stored in the config' do
36
- BeetleETL.configure { |config| config.database = database }
37
-
38
- expect(BeetleETL.database).to eql(database)
39
- end
40
-
41
- it 'builds and caches a Sequel Database from config when no database is passed' do
42
- database_config = double(:database_config)
43
- BeetleETL.configure { |config| config.database_config = database_config }
44
-
45
- expect(Sequel).to receive(:connect).with(database_config).once { database }
12
+ expect(BeetleETL::Import).to receive(:new).with(config).and_return runner
13
+ expect(runner).to receive(:run).and_return report
14
+ expect(BeetleETL::Reporter).to receive(:new).with(config, report).and_return reporter
46
15
 
47
- expect(BeetleETL.database).to eql(database)
48
- expect(BeetleETL.database).to eql(database)
16
+ expect(BeetleETL.import(config)).to eql(report)
49
17
  end
50
18
  end
51
19
 
@@ -0,0 +1,66 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe Configuration do
5
+
6
+ subject { Configuration.new }
7
+
8
+ describe "#database" do
9
+ let(:database) { double(:database) }
10
+
11
+ it "returns the object if present" do
12
+ subject.database = database
13
+
14
+ expect(subject.database).to eql(database)
15
+ end
16
+
17
+ it "builds a Sequel Database from config when no database is passed" do
18
+ database_config = double(:database_config)
19
+ subject.database_config = database_config
20
+
21
+ expect(Sequel).to receive(:connect).with(database_config).once { database }
22
+
23
+ expect(subject.database).to eql(database)
24
+ expect(subject.database).to eql(database)
25
+ end
26
+
27
+ it "raises an error if no database or database_config is passed" do
28
+ expect { subject.database }
29
+ .to raise_error(BeetleETL::InvalidConfigurationError)
30
+ end
31
+ end
32
+
33
+ describe "#disconnect_database" do
34
+ let(:database) { double(:database) }
35
+
36
+ it "disconnects from database if database_config was passed" do
37
+ database_config = double(:database_config)
38
+
39
+ expect(Sequel).to receive(:connect).with(database_config) { database }
40
+ expect(database).to receive(:disconnect)
41
+
42
+ subject.database_config = database_config
43
+ subject.disconnect_database
44
+ end
45
+
46
+ it "does not disconnect from database if database object was passed" do
47
+ expect(database).not_to receive(:disconnect)
48
+
49
+ subject.database = database
50
+ subject.disconnect_database
51
+ end
52
+ end
53
+
54
+ describe "#target_schema" do
55
+ it "returns nil if target_schema is 'public'" do
56
+ subject.target_schema = "public"
57
+ expect(subject.target_schema).to be_nil
58
+ end
59
+
60
+ it "returns target_schema if target_schema is not 'public'" do
61
+ subject.target_schema = "foo"
62
+ expect(subject.target_schema).to eql("foo")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -3,18 +3,24 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe DSL do
5
5
 
6
- subject { DSL.new(:foo_table) }
6
+ let(:config) do
7
+ Configuration.new.tap do |c|
8
+ c.external_source = "bar"
9
+ end
10
+ end
11
+
12
+ subject { DSL.new(config, :foo_table) }
7
13
 
8
14
  describe '#stage_table' do
9
15
  it 'returns the current stage table name' do
10
16
  expect(subject.stage_table).to eql(
11
- BeetleETL::Naming.stage_table_name_sql(:foo_table)
17
+ BeetleETL::Naming.stage_table_name_sql("bar", :foo_table)
12
18
  )
13
19
  end
14
20
 
15
21
  it 'returns the stage table name for the given table' do
16
22
  expect(subject.stage_table(:bar_table)).to eql(
17
- BeetleETL::Naming.stage_table_name_sql(:bar_table)
23
+ BeetleETL::Naming.stage_table_name_sql("bar", :bar_table)
18
24
  )
19
25
  end
20
26
  end
@@ -3,9 +3,7 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe TransformationLoader do
5
5
 
6
- subject { TransformationLoader.new }
7
-
8
- before do
6
+ let(:config) do
9
7
  data_file = tempfile_with_contents <<-FILE
10
8
  import :foo do
11
9
  'foo'
@@ -20,15 +18,18 @@ module BeetleETL
20
18
  end
21
19
  FILE
22
20
 
23
- BeetleETL.configure do |config|
24
- config.transformation_file = data_file.path
21
+ Configuration.new.tap do |c|
22
+ c.transformation_file = data_file.path
25
23
  end
26
24
  end
27
25
 
26
+ subject { TransformationLoader.new(config) }
27
+
28
28
  describe '#load' do
29
29
  it 'loads transformations from the data file' do
30
- expect(Transformation).to receive(:new) do |table_name, config, helpers|
31
- expect(table_name.to_s).to eql(config.call)
30
+ expect(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
31
+ expect(configuration).to eql(config)
32
+ expect(table_name.to_s).to eql(setup.call)
32
33
  expect(helpers.call).to eql("baz")
33
34
  end.exactly(2).times
34
35
 
@@ -36,7 +37,7 @@ module BeetleETL
36
37
  end
37
38
 
38
39
  it 'returns the list of transformations' do
39
- allow(Transformation).to receive(:new) do |table_name, config, helpers|
40
+ allow(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
40
41
  table_name
41
42
  end
42
43
 
@@ -3,9 +3,11 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe Transformation do
5
5
 
6
+ let(:config) { Configuration.new }
7
+
6
8
  describe '#table_name' do
7
9
  it 'returns the given table name' do
8
- transformation = Transformation.new(:table, Proc.new {})
10
+ transformation = Transformation.new(config, :table, Proc.new {})
9
11
  expect(transformation.table_name).to eql(:table)
10
12
  end
11
13
  end
@@ -15,7 +17,7 @@ module BeetleETL
15
17
  setup = Proc.new do
16
18
  columns :payload_1, 'payload_2'
17
19
  end
18
- transformation = Transformation.new(:table, setup)
20
+ transformation = Transformation.new(config, :table, setup)
19
21
 
20
22
  expect(transformation.column_names).to match_array([
21
23
  :payload_1, :payload_2
@@ -23,7 +25,7 @@ module BeetleETL
23
25
  end
24
26
 
25
27
  it 'defaults to an empty array if no columns are defined' do
26
- transformation = Transformation.new(:table, Proc.new {})
28
+ transformation = Transformation.new(config, :table, Proc.new {})
27
29
 
28
30
  expect(transformation.column_names).to match_array([])
29
31
  end
@@ -35,7 +37,7 @@ module BeetleETL
35
37
  references :foreign_table, on: :foreign_table_id
36
38
  references :another_foreign_table, on: :another_foreign_table_id
37
39
  end
38
- transformation = Transformation.new(:table, setup)
40
+ transformation = Transformation.new(config, :table, setup)
39
41
 
40
42
  expect(transformation.relations).to eql({
41
43
  foreign_table_id: :foreign_table,
@@ -50,7 +52,7 @@ module BeetleETL
50
52
  references :foreign_table, on: :foreign_table_id
51
53
  references :another_foreign_table, on: :another_foreign_table_id
52
54
  end
53
- transformation = Transformation.new(:table, setup)
55
+ transformation = Transformation.new(config, :table, setup)
54
56
 
55
57
  expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
56
58
  end
@@ -66,7 +68,7 @@ module BeetleETL
66
68
  query "SELECT '#{foo}' FROM some_table"
67
69
  end
68
70
 
69
- transformation = Transformation.new(:table, setup, helpers)
71
+ transformation = Transformation.new(config, :table, setup, helpers)
70
72
 
71
73
  expect(transformation.query).to eql(
72
74
  "SELECT 'foo_string' FROM some_table"
@@ -78,7 +80,7 @@ module BeetleETL
78
80
  query "SOME QUERY"
79
81
  query "ANOTHER QUERY"
80
82
  end
81
- transformation = Transformation.new(:table, setup)
83
+ transformation = Transformation.new(config, :table, setup)
82
84
 
83
85
  expect(transformation.query).to eql(
84
86
  "SOME QUERY;ANOTHER QUERY"
@@ -23,11 +23,11 @@ describe BeetleETL do
23
23
  database_config_path = File.expand_path('../support/database.yml', File.dirname(__FILE__))
24
24
  database_config = YAML.load(File.read(database_config_path))
25
25
 
26
- BeetleETL.configure do |config|
27
- config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
- config.database_config = database_config
29
- config.external_source = 'source_name'
30
- config.logger = Logger.new(Tempfile.new("log"))
26
+ @config = BeetleETL::Configuration.new.tap do |c|
27
+ c.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
+ c.database_config = database_config
29
+ c.external_source = 'source_name'
30
+ c.logger = Logger.new(Tempfile.new("log"))
31
31
  end
32
32
  end
33
33
 
@@ -54,7 +54,7 @@ describe BeetleETL do
54
54
  )
55
55
 
56
56
  Timecop.freeze(time1) do
57
- BeetleETL.import
57
+ BeetleETL.import(@config)
58
58
  end
59
59
 
60
60
  expect(:organisations).to have_values(
@@ -86,7 +86,7 @@ describe BeetleETL do
86
86
  )
87
87
 
88
88
  Timecop.freeze(time2) do
89
- BeetleETL.import
89
+ BeetleETL.import(@config)
90
90
  end
91
91
 
92
92
  expect(:organisations).to have_values(
@@ -118,7 +118,7 @@ describe BeetleETL do
118
118
  )
119
119
 
120
120
  Timecop.freeze(time3) do
121
- BeetleETL.import
121
+ BeetleETL.import(@config)
122
122
  end
123
123
 
124
124
  expect(:organisations).to have_values(