beetle_etl 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +8 -0
  3. data/.travis.yml +6 -1
  4. data/README.md +31 -9
  5. data/beetle_etl.gemspec +1 -1
  6. data/lib/beetle_etl.rb +7 -49
  7. data/lib/beetle_etl/configuration.rb +39 -0
  8. data/lib/beetle_etl/dsl/dsl.rb +6 -2
  9. data/lib/beetle_etl/dsl/transformation.rb +2 -2
  10. data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
  11. data/lib/beetle_etl/import.rb +15 -11
  12. data/lib/beetle_etl/naming.rb +10 -20
  13. data/lib/beetle_etl/reporter.rb +3 -2
  14. data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
  15. data/lib/beetle_etl/steps/create_stage.rb +2 -2
  16. data/lib/beetle_etl/steps/load.rb +2 -2
  17. data/lib/beetle_etl/steps/map_relations.rb +2 -2
  18. data/lib/beetle_etl/steps/step.rb +23 -4
  19. data/lib/beetle_etl/steps/transform.rb +2 -2
  20. data/lib/beetle_etl/testing.rb +10 -5
  21. data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
  22. data/lib/beetle_etl/version.rb +1 -1
  23. data/spec/beetle_etl_spec.rb +6 -38
  24. data/spec/configuration_spec.rb +66 -0
  25. data/spec/dsl/dsl_spec.rb +9 -3
  26. data/spec/dsl/transformation_loader_spec.rb +9 -8
  27. data/spec/dsl/transformation_spec.rb +9 -7
  28. data/spec/feature/feature_spec.rb +8 -8
  29. data/spec/reporter_spec.rb +5 -2
  30. data/spec/spec_helper.rb +4 -5
  31. data/spec/steps/assign_ids_spec.rb +7 -7
  32. data/spec/steps/create_stage_spec.rb +14 -12
  33. data/spec/steps/load_spec.rb +9 -7
  34. data/spec/steps/map_relations_spec.rb +14 -8
  35. data/spec/steps/step_spec.rb +5 -3
  36. data/spec/steps/table_diff_spec.rb +7 -6
  37. data/spec/steps/transform_spec.rb +8 -4
  38. data/spec/testing_spec.rb +1 -1
  39. metadata +9 -5
@@ -5,8 +5,8 @@ module BeetleETL
5
5
 
6
6
  class CreateStage < Step
7
7
 
8
- def initialize(table_name, relations, column_names)
9
- super(table_name)
8
+ def initialize(config, table_name, relations, column_names)
9
+ super(config, table_name)
10
10
  @relations = relations
11
11
  @column_names = column_names
12
12
  end
@@ -6,8 +6,8 @@ module BeetleETL
6
6
  transition
7
7
  ]
8
8
 
9
- def initialize(table_name, relations)
10
- super(table_name)
9
+ def initialize(config, table_name, relations)
10
+ super(config, table_name)
11
11
  @relations = relations
12
12
  end
13
13
 
@@ -1,8 +1,8 @@
1
1
  module BeetleETL
2
2
  class MapRelations < Step
3
3
 
4
- def initialize(table_name, relations)
5
- super(table_name)
4
+ def initialize(config, table_name, relations)
5
+ super(config, table_name)
6
6
  @relations = relations
7
7
  end
8
8
 
@@ -2,10 +2,10 @@ module BeetleETL
2
2
 
3
3
  class Step
4
4
 
5
- include BeetleETL::Naming
6
5
  attr_reader :table_name
7
6
 
8
- def initialize(table_name)
7
+ def initialize(config, table_name)
8
+ @config = config
9
9
  @table_name = table_name
10
10
  end
11
11
 
@@ -22,11 +22,30 @@ module BeetleETL
22
22
  end
23
23
 
24
24
  def external_source
25
- BeetleETL.config.external_source
25
+ @config.external_source
26
26
  end
27
27
 
28
28
  def database
29
- BeetleETL.database
29
+ @config.database
30
+ end
31
+
32
+ # naming
33
+
34
+ def stage_table_name
35
+ BeetleETL::Naming.stage_table_name(@config.external_source, @table_name)
36
+ end
37
+
38
+ def stage_table_name_sql(table_name = nil)
39
+ table_name ||= @table_name
40
+ BeetleETL::Naming.stage_table_name_sql(@config.external_source, table_name)
41
+ end
42
+
43
+ def target_table_name
44
+ BeetleETL::Naming.target_table_name(@config.target_schema, @table_name)
45
+ end
46
+
47
+ def target_table_name_sql
48
+ BeetleETL::Naming.target_table_name_sql(@config.target_schema, @table_name)
30
49
  end
31
50
 
32
51
  end
@@ -1,8 +1,8 @@
1
1
  module BeetleETL
2
2
  class Transform < Step
3
3
 
4
- def initialize(table_name, dependencies, query)
5
- super(table_name)
4
+ def initialize(config, table_name, dependencies, query)
5
+ super(config, table_name)
6
6
  @dependencies = dependencies
7
7
  @query = query
8
8
  end
@@ -6,9 +6,14 @@ module BeetleETL
6
6
  TargetTableNotFoundError = Class.new(StandardError)
7
7
  NoTransformationFoundError = Class.new(StandardError)
8
8
 
9
+ def self.configure
10
+ @@config = Configuration.new
11
+ yield(@@config)
12
+ end
13
+
9
14
  def with_stage_tables_for(*table_names, &block)
10
15
  table_names.each do |table_name|
11
- unless BeetleETL.database.table_exists?(table_name)
16
+ unless @@config.database.table_exists?(table_name)
12
17
  raise TargetTableNotFoundError.new <<-MSG
13
18
  Missing target table "#{table_name}".
14
19
  In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
@@ -16,12 +21,12 @@ module BeetleETL
16
21
  end
17
22
  end
18
23
 
19
- test_wrapper = TestWrapper.new(table_names)
24
+ test_wrapper = TestWrapper.new(@@config, table_names)
20
25
  test_wrapper.run block
21
26
  end
22
27
 
23
28
  def run_transformation(table_name)
24
- transformations = TransformationLoader.new.load
29
+ transformations = TransformationLoader.new(@@config).load
25
30
 
26
31
  unless transformations.map(&:table_name).include?(table_name)
27
32
  raise NoTransformationFoundError.new <<-MSG
@@ -30,13 +35,13 @@ module BeetleETL
30
35
  end
31
36
 
32
37
  transformation = transformations.find { |t| t.table_name == table_name }
33
- transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
38
+ transform = Transform.new(@@config, transformation.table_name, transformation.dependencies, transformation.query)
34
39
  transform.run
35
40
  end
36
41
 
37
42
 
38
43
  def stage_table_name(table_name)
39
- BeetleETL::Naming.stage_table_name(table_name)
44
+ BeetleETL::Naming.stage_table_name(@@config.external_source, table_name)
40
45
  end
41
46
 
42
47
  end
@@ -1,6 +1,6 @@
1
1
  module BeetleETL
2
2
  module Testing
3
- class TestWrapper < Struct.new(:table_names)
3
+ class TestWrapper < Struct.new(:config, :table_names)
4
4
 
5
5
  def run(block)
6
6
  begin
@@ -15,18 +15,18 @@ module BeetleETL
15
15
 
16
16
  def create_stages
17
17
  transformations.each do |t|
18
- CreateStage.new(t.table_name, t.relations, t.column_names).run
18
+ CreateStage.new(config, t.table_name, t.relations, t.column_names).run
19
19
  end
20
20
  end
21
21
 
22
22
  def drop_stages
23
23
  transformations.each do |t|
24
- DropStage.new(t.table_name).run
24
+ DropStage.new(config, t.table_name).run
25
25
  end
26
26
  end
27
27
 
28
28
  def transformations
29
- @transformations ||= TransformationLoader.new.load.find_all do |transformation|
29
+ @transformations ||= TransformationLoader.new(config).load.find_all do |transformation|
30
30
  table_names.include? transformation.table_name
31
31
  end
32
32
  end
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "1.0.1"
2
+ VERSION = "2.0.0"
3
3
  end
@@ -4,48 +4,16 @@ describe BeetleETL do
4
4
 
5
5
  describe '#import' do
6
6
  it 'runs the import with reporting' do
7
+ config = double(:config, disconnect_database: nil)
8
+ runner = double(:runner)
7
9
  report = double(:report)
8
10
  reporter = double(:reporter, log_summary: nil)
9
11
 
10
- expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
11
- expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
12
- expect(BeetleETL.import).to eql(report)
13
- end
14
- end
15
-
16
- describe '#config' do
17
- it 'returns a configuration object' do
18
- expect(BeetleETL.config).to be_a(BeetleETL::Configuration)
19
- end
20
- end
21
-
22
- describe '#configure' do
23
- it 'allows the configuration to be changed' do
24
- expect(BeetleETL.config.external_source).to be_nil
25
-
26
- BeetleETL.configure { |config| config.external_source = 'foo' }
27
-
28
- expect(BeetleETL.config.external_source).to eql('foo')
29
- end
30
- end
31
-
32
- describe '#database' do
33
- let(:database) { double(:database) }
34
-
35
- it 'returns the Sequel Database object stored in the config' do
36
- BeetleETL.configure { |config| config.database = database }
37
-
38
- expect(BeetleETL.database).to eql(database)
39
- end
40
-
41
- it 'builds and caches a Sequel Database from config when no database is passed' do
42
- database_config = double(:database_config)
43
- BeetleETL.configure { |config| config.database_config = database_config }
44
-
45
- expect(Sequel).to receive(:connect).with(database_config).once { database }
12
+ expect(BeetleETL::Import).to receive(:new).with(config).and_return runner
13
+ expect(runner).to receive(:run).and_return report
14
+ expect(BeetleETL::Reporter).to receive(:new).with(config, report).and_return reporter
46
15
 
47
- expect(BeetleETL.database).to eql(database)
48
- expect(BeetleETL.database).to eql(database)
16
+ expect(BeetleETL.import(config)).to eql(report)
49
17
  end
50
18
  end
51
19
 
@@ -0,0 +1,66 @@
1
+ require 'spec_helper'
2
+
3
+ module BeetleETL
4
+ describe Configuration do
5
+
6
+ subject { Configuration.new }
7
+
8
+ describe "#database" do
9
+ let(:database) { double(:database) }
10
+
11
+ it "returns the object if present" do
12
+ subject.database = database
13
+
14
+ expect(subject.database).to eql(database)
15
+ end
16
+
17
+ it "builds a Sequel Database from config when no database is passed" do
18
+ database_config = double(:database_config)
19
+ subject.database_config = database_config
20
+
21
+ expect(Sequel).to receive(:connect).with(database_config).once { database }
22
+
23
+ expect(subject.database).to eql(database)
24
+ expect(subject.database).to eql(database)
25
+ end
26
+
27
+ it "raises an error if no database or database_config is passed" do
28
+ expect { subject.database }
29
+ .to raise_error(BeetleETL::InvalidConfigurationError)
30
+ end
31
+ end
32
+
33
+ describe "#disconnect_database" do
34
+ let(:database) { double(:database) }
35
+
36
+ it "disconnects from database if database_config was passed" do
37
+ database_config = double(:database_config)
38
+
39
+ expect(Sequel).to receive(:connect).with(database_config) { database }
40
+ expect(database).to receive(:disconnect)
41
+
42
+ subject.database_config = database_config
43
+ subject.disconnect_database
44
+ end
45
+
46
+ it "does not disconnect from database if database object was passed" do
47
+ expect(database).not_to receive(:disconnect)
48
+
49
+ subject.database = database
50
+ subject.disconnect_database
51
+ end
52
+ end
53
+
54
+ describe "#target_schema" do
55
+ it "returns nil if target_schema is 'public'" do
56
+ subject.target_schema = "public"
57
+ expect(subject.target_schema).to be_nil
58
+ end
59
+
60
+ it "returns target_schema if target_schema is not 'public'" do
61
+ subject.target_schema = "foo"
62
+ expect(subject.target_schema).to eql("foo")
63
+ end
64
+ end
65
+ end
66
+ end
@@ -3,18 +3,24 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe DSL do
5
5
 
6
- subject { DSL.new(:foo_table) }
6
+ let(:config) do
7
+ Configuration.new.tap do |c|
8
+ c.external_source = "bar"
9
+ end
10
+ end
11
+
12
+ subject { DSL.new(config, :foo_table) }
7
13
 
8
14
  describe '#stage_table' do
9
15
  it 'returns the current stage table name' do
10
16
  expect(subject.stage_table).to eql(
11
- BeetleETL::Naming.stage_table_name_sql(:foo_table)
17
+ BeetleETL::Naming.stage_table_name_sql("bar", :foo_table)
12
18
  )
13
19
  end
14
20
 
15
21
  it 'returns the stage table name for the given table' do
16
22
  expect(subject.stage_table(:bar_table)).to eql(
17
- BeetleETL::Naming.stage_table_name_sql(:bar_table)
23
+ BeetleETL::Naming.stage_table_name_sql("bar", :bar_table)
18
24
  )
19
25
  end
20
26
  end
@@ -3,9 +3,7 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe TransformationLoader do
5
5
 
6
- subject { TransformationLoader.new }
7
-
8
- before do
6
+ let(:config) do
9
7
  data_file = tempfile_with_contents <<-FILE
10
8
  import :foo do
11
9
  'foo'
@@ -20,15 +18,18 @@ module BeetleETL
20
18
  end
21
19
  FILE
22
20
 
23
- BeetleETL.configure do |config|
24
- config.transformation_file = data_file.path
21
+ Configuration.new.tap do |c|
22
+ c.transformation_file = data_file.path
25
23
  end
26
24
  end
27
25
 
26
+ subject { TransformationLoader.new(config) }
27
+
28
28
  describe '#load' do
29
29
  it 'loads transformations from the data file' do
30
- expect(Transformation).to receive(:new) do |table_name, config, helpers|
31
- expect(table_name.to_s).to eql(config.call)
30
+ expect(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
31
+ expect(configuration).to eql(config)
32
+ expect(table_name.to_s).to eql(setup.call)
32
33
  expect(helpers.call).to eql("baz")
33
34
  end.exactly(2).times
34
35
 
@@ -36,7 +37,7 @@ module BeetleETL
36
37
  end
37
38
 
38
39
  it 'returns the list of transformations' do
39
- allow(Transformation).to receive(:new) do |table_name, config, helpers|
40
+ allow(Transformation).to receive(:new) do |configuration, table_name, setup, helpers|
40
41
  table_name
41
42
  end
42
43
 
@@ -3,9 +3,11 @@ require 'spec_helper'
3
3
  module BeetleETL
4
4
  describe Transformation do
5
5
 
6
+ let(:config) { Configuration.new }
7
+
6
8
  describe '#table_name' do
7
9
  it 'returns the given table name' do
8
- transformation = Transformation.new(:table, Proc.new {})
10
+ transformation = Transformation.new(config, :table, Proc.new {})
9
11
  expect(transformation.table_name).to eql(:table)
10
12
  end
11
13
  end
@@ -15,7 +17,7 @@ module BeetleETL
15
17
  setup = Proc.new do
16
18
  columns :payload_1, 'payload_2'
17
19
  end
18
- transformation = Transformation.new(:table, setup)
20
+ transformation = Transformation.new(config, :table, setup)
19
21
 
20
22
  expect(transformation.column_names).to match_array([
21
23
  :payload_1, :payload_2
@@ -23,7 +25,7 @@ module BeetleETL
23
25
  end
24
26
 
25
27
  it 'defaults to an empty array if no columns are defined' do
26
- transformation = Transformation.new(:table, Proc.new {})
28
+ transformation = Transformation.new(config, :table, Proc.new {})
27
29
 
28
30
  expect(transformation.column_names).to match_array([])
29
31
  end
@@ -35,7 +37,7 @@ module BeetleETL
35
37
  references :foreign_table, on: :foreign_table_id
36
38
  references :another_foreign_table, on: :another_foreign_table_id
37
39
  end
38
- transformation = Transformation.new(:table, setup)
40
+ transformation = Transformation.new(config, :table, setup)
39
41
 
40
42
  expect(transformation.relations).to eql({
41
43
  foreign_table_id: :foreign_table,
@@ -50,7 +52,7 @@ module BeetleETL
50
52
  references :foreign_table, on: :foreign_table_id
51
53
  references :another_foreign_table, on: :another_foreign_table_id
52
54
  end
53
- transformation = Transformation.new(:table, setup)
55
+ transformation = Transformation.new(config, :table, setup)
54
56
 
55
57
  expect(transformation.dependencies).to eql(Set.new([:foreign_table, :another_foreign_table]))
56
58
  end
@@ -66,7 +68,7 @@ module BeetleETL
66
68
  query "SELECT '#{foo}' FROM some_table"
67
69
  end
68
70
 
69
- transformation = Transformation.new(:table, setup, helpers)
71
+ transformation = Transformation.new(config, :table, setup, helpers)
70
72
 
71
73
  expect(transformation.query).to eql(
72
74
  "SELECT 'foo_string' FROM some_table"
@@ -78,7 +80,7 @@ module BeetleETL
78
80
  query "SOME QUERY"
79
81
  query "ANOTHER QUERY"
80
82
  end
81
- transformation = Transformation.new(:table, setup)
83
+ transformation = Transformation.new(config, :table, setup)
82
84
 
83
85
  expect(transformation.query).to eql(
84
86
  "SOME QUERY;ANOTHER QUERY"
@@ -23,11 +23,11 @@ describe BeetleETL do
23
23
  database_config_path = File.expand_path('../support/database.yml', File.dirname(__FILE__))
24
24
  database_config = YAML.load(File.read(database_config_path))
25
25
 
26
- BeetleETL.configure do |config|
27
- config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
- config.database_config = database_config
29
- config.external_source = 'source_name'
30
- config.logger = Logger.new(Tempfile.new("log"))
26
+ @config = BeetleETL::Configuration.new.tap do |c|
27
+ c.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
28
+ c.database_config = database_config
29
+ c.external_source = 'source_name'
30
+ c.logger = Logger.new(Tempfile.new("log"))
31
31
  end
32
32
  end
33
33
 
@@ -54,7 +54,7 @@ describe BeetleETL do
54
54
  )
55
55
 
56
56
  Timecop.freeze(time1) do
57
- BeetleETL.import
57
+ BeetleETL.import(@config)
58
58
  end
59
59
 
60
60
  expect(:organisations).to have_values(
@@ -86,7 +86,7 @@ describe BeetleETL do
86
86
  )
87
87
 
88
88
  Timecop.freeze(time2) do
89
- BeetleETL.import
89
+ BeetleETL.import(@config)
90
90
  end
91
91
 
92
92
  expect(:organisations).to have_values(
@@ -118,7 +118,7 @@ describe BeetleETL do
118
118
  )
119
119
 
120
120
  Timecop.freeze(time3) do
121
- BeetleETL.import
121
+ BeetleETL.import(@config)
122
122
  end
123
123
 
124
124
  expect(:organisations).to have_values(