beetle_etl 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +8 -0
  3. data/.travis.yml +6 -1
  4. data/README.md +31 -9
  5. data/beetle_etl.gemspec +1 -1
  6. data/lib/beetle_etl.rb +7 -49
  7. data/lib/beetle_etl/configuration.rb +39 -0
  8. data/lib/beetle_etl/dsl/dsl.rb +6 -2
  9. data/lib/beetle_etl/dsl/transformation.rb +2 -2
  10. data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
  11. data/lib/beetle_etl/import.rb +15 -11
  12. data/lib/beetle_etl/naming.rb +10 -20
  13. data/lib/beetle_etl/reporter.rb +3 -2
  14. data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
  15. data/lib/beetle_etl/steps/create_stage.rb +2 -2
  16. data/lib/beetle_etl/steps/load.rb +2 -2
  17. data/lib/beetle_etl/steps/map_relations.rb +2 -2
  18. data/lib/beetle_etl/steps/step.rb +23 -4
  19. data/lib/beetle_etl/steps/transform.rb +2 -2
  20. data/lib/beetle_etl/testing.rb +10 -5
  21. data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
  22. data/lib/beetle_etl/version.rb +1 -1
  23. data/spec/beetle_etl_spec.rb +6 -38
  24. data/spec/configuration_spec.rb +66 -0
  25. data/spec/dsl/dsl_spec.rb +9 -3
  26. data/spec/dsl/transformation_loader_spec.rb +9 -8
  27. data/spec/dsl/transformation_spec.rb +9 -7
  28. data/spec/feature/feature_spec.rb +8 -8
  29. data/spec/reporter_spec.rb +5 -2
  30. data/spec/spec_helper.rb +4 -5
  31. data/spec/steps/assign_ids_spec.rb +7 -7
  32. data/spec/steps/create_stage_spec.rb +14 -12
  33. data/spec/steps/load_spec.rb +9 -7
  34. data/spec/steps/map_relations_spec.rb +14 -8
  35. data/spec/steps/step_spec.rb +5 -3
  36. data/spec/steps/table_diff_spec.rb +7 -6
  37. data/spec/steps/transform_spec.rb +8 -4
  38. data/spec/testing_spec.rb +1 -1
  39. metadata +9 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c5a5a3f4d0b4170ccb1581a50a2fccaf32be2cb
4
- data.tar.gz: c8beb0f668010410cadefe1ae7ab7c7708032acc
3
+ metadata.gz: 6a726a9734d6866687319a6742cc8db41ef68b64
4
+ data.tar.gz: 82be15d660033bd3d957879ec351c0479b26623f
5
5
  SHA512:
6
- metadata.gz: 3b88ee3cd93eb9344b95bc0b31e01da0a1419d90115fe7a435541b8a336cc3eef8dbad68df819f9993dab3f326be7a91f7b89f8f1e3133b81584834c8f973d65
7
- data.tar.gz: e696f99dee13095e6ac7ee32a4fc7e0364744bafbc28caca41b8ab5ce87af3c66e832493b116070545401e9e43bbc34d7582f9d004991beac6b994d3a2d2a9f4
6
+ metadata.gz: 01c1408c035afb9d0dcadb9382a3db442318f48c74517f1376c3a6ef615675ef891df8559c172be6e6b1f7f47d2f29c4a6965493a613d93a24d439c40f224246
7
+ data.tar.gz: 111f73fd2692ed88f4ce91c6f4da838efe13d1d12fc00bfb4f63cb00383509ffa1d171bf265362ded8f01ddde7f0a0e0659d552f7be8b31188c94bb4bb984e59
@@ -0,0 +1,8 @@
1
+ continue
2
+ backtrace
3
+ stack
4
+ trace
5
+ c
6
+ continue
7
+ c
8
+ target_table_name
@@ -1,11 +1,16 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 2.0.0
4
- - 2.1.2
4
+ - 2.1.0
5
+ - 2.2.0
6
+ - 2.3.0
5
7
  addons:
6
8
  postgresql: "9.3"
7
9
  code_climate:
8
10
  repo_token: fcd6d8c28da900609a2cf903716d858621b8ce68152edbcebe6908a9a3f5d3d5
11
+ before_install:
12
+ - gem update --system
13
+ - gem update bundler
9
14
 
10
15
  before_script:
11
16
  - psql -c 'create database travis_ci_test;' -U postgres
data/README.md CHANGED
@@ -32,12 +32,34 @@ Make sure the tables you want to import contain columns named ```external_id```
32
32
 
33
33
  ### Configuration
34
34
 
35
- BeetleETL.configure do |config|
36
- config.transformation_file = # path to your transformation file
37
- config.database_config = # sequel database config
38
- # or config.database = # sequel database instance
39
- config.external_source = ‘name_of_your_source’
40
- config.logger = Logger.new(STDOUT)
35
+ Create a configuration object
36
+
37
+ configuration = BeetleETL::Configuration.new do |config|
38
+ # path to your transformation file
39
+ config.transformation_file = "../my_fancy_transformations"
40
+
41
+ # sequel database config
42
+ config.database_config = {
43
+ adapter: 'postgres'
44
+ encoding: utf8
45
+ host: my_host
46
+ database: my_database
47
+ username: 'foo'
48
+ password: 'bar'
49
+ pool: 5
50
+ pool_timeout: 360
51
+ connect_timeout: 360
52
+ }
53
+ # or config.database = # sequel database instance
54
+
55
+ # name of your soruce
56
+ config.external_source = "important_data"
57
+
58
+ # target schema in case you use postgres schemas
59
+ config.target_schema = "public" # default
60
+
61
+ # logger
62
+ config.logger = Logger.new(STDOUT) # default
41
63
  end
42
64
 
43
65
  ### Defining Imports
@@ -66,8 +88,8 @@ Fill a ```transformation``` file with import directives like this:
66
88
  ON data.org_id = o.id
67
89
  SQL
68
90
  end
69
-
70
-
91
+
92
+
71
93
  ```import``` takes the name of the table you want to fill and the configuration as arguments.
72
94
  With ```columns``` you define what columns BeetleETL is supposed to fill in your application’s table.
73
95
  The ```query``` transforms the data. Make sure that you insert into ```#{stage_table}``` as the name of the actual table, that this inserts into will be filled in by BeetleETL during runtime.
@@ -76,7 +98,7 @@ Define any foreign references your table has to other tables using the ```refrec
76
98
 
77
99
  ### Running BeetleETL
78
100
 
79
- BeetleETL.import
101
+ BeetleETL.import(configuration)
80
102
 
81
103
  ## Development
82
104
 
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency 'sequel', '>= 4.0.0'
22
22
  spec.add_runtime_dependency 'activesupport', '>= 4.0.0'
23
23
 
24
- spec.add_development_dependency 'bundler', '~> 1.6'
24
+ spec.add_development_dependency 'bundler', '~> 1.11'
25
25
  spec.add_development_dependency 'rspec', '>= 3.0.0'
26
26
  spec.add_development_dependency 'timecop', '>= 0.7.0'
27
27
  spec.add_development_dependency 'pg', '>= 0.18.0'
@@ -5,7 +5,7 @@ require 'logger'
5
5
 
6
6
  module BeetleETL
7
7
 
8
- InvalidConfigurationError = Class.new(StandardError)
8
+ require 'beetle_etl/configuration'
9
9
 
10
10
  require 'beetle_etl/dsl/dsl'
11
11
  require 'beetle_etl/dsl/transformation'
@@ -28,61 +28,19 @@ module BeetleETL
28
28
  require 'beetle_etl/import'
29
29
  require 'beetle_etl/reporter'
30
30
 
31
- class Configuration
32
- attr_accessor \
33
- :database_config,
34
- :database,
35
- :transformation_file,
36
- :stage_schema,
37
- :target_schema,
38
- :external_source,
39
- :logger
40
-
41
- def initialize
42
- @target_schema = 'public'
43
- @logger = ::Logger.new(STDOUT)
44
- end
45
- end
46
-
47
31
  class << self
48
32
 
49
- def import
33
+ def import(config = Configuration.new)
34
+ yield config if block_given?
35
+
50
36
  begin
51
- report = Import.new.run
52
- Reporter.new(report).log_summary
37
+ report = Import.new(config).run
38
+ Reporter.new(config, report).log_summary
53
39
  report
54
40
  ensure
55
- @database.disconnect if @database
56
- end
57
- end
58
-
59
- def configure
60
- yield(config)
61
- end
62
-
63
- def config
64
- @config ||= Configuration.new
65
- end
66
-
67
- def logger
68
- config.logger
69
- end
70
-
71
- def database
72
- if config.database
73
- config.database
74
- elsif config.database_config
75
- @database ||= Sequel.connect(config.database_config)
76
- else
77
- msg = "Either Sequel connection database_config or a Sequel Database object required"
78
- raise InvalidConfigurationError.new(msg)
41
+ config.disconnect_database
79
42
  end
80
43
  end
81
44
 
82
- def reset
83
- @config = nil
84
- @database = nil
85
- end
86
-
87
45
  end
88
46
  end
@@ -0,0 +1,39 @@
1
+ module BeetleETL
2
+ InvalidConfigurationError = Class.new(StandardError)
3
+
4
+ class Configuration
5
+ attr_accessor \
6
+ :transformation_file,
7
+ :stage_schema,
8
+ :external_source,
9
+ :logger
10
+
11
+ attr_writer \
12
+ :database,
13
+ :database_config,
14
+ :target_schema
15
+
16
+ def initialize
17
+ @target_schema = 'public'
18
+ @logger = ::Logger.new(STDOUT)
19
+ end
20
+
21
+ def database
22
+ if [@database, @database_config].none?
23
+ msg = "Either Sequel connection database_config or a Sequel Database object required"
24
+ raise InvalidConfigurationError.new(msg)
25
+ end
26
+
27
+ @database ||= Sequel.connect(@database_config)
28
+ end
29
+
30
+ def disconnect_database
31
+ database.disconnect if @database_config
32
+ end
33
+
34
+ def target_schema
35
+ @target_schema != 'public' ? @target_schema : nil
36
+ end
37
+
38
+ end
39
+ end
@@ -3,7 +3,8 @@ module BeetleETL
3
3
 
4
4
  attr_reader :column_names, :relations, :query_strings
5
5
 
6
- def initialize(table_name)
6
+ def initialize(config, table_name)
7
+ @config = config
7
8
  @table_name = table_name
8
9
  @column_names = []
9
10
  @relations = {}
@@ -25,7 +26,10 @@ module BeetleETL
25
26
  # query helper methods
26
27
 
27
28
  def stage_table(table_name = nil)
28
- BeetleETL::Naming.stage_table_name_sql(table_name || @table_name)
29
+ BeetleETL::Naming.stage_table_name_sql(
30
+ @config.external_source,
31
+ table_name || @table_name
32
+ )
29
33
  end
30
34
 
31
35
  def combined_key(*args)
@@ -5,9 +5,9 @@ module BeetleETL
5
5
 
6
6
  attr_reader :table_name
7
7
 
8
- def initialize(table_name, setup, helpers = nil)
8
+ def initialize(config, table_name, setup, helpers = nil)
9
9
  @table_name = table_name
10
- @parsed = DSL.new(table_name).tap do |dsl|
10
+ @parsed = DSL.new(config, table_name).tap do |dsl|
11
11
  dsl.instance_exec(&helpers) if helpers
12
12
  dsl.instance_exec(&setup)
13
13
  end
@@ -1,18 +1,19 @@
1
1
  module BeetleETL
2
2
  class TransformationLoader
3
3
 
4
- def initialize
4
+ def initialize(config)
5
+ @config = config
5
6
  @transformations = []
6
7
  @helper_definitions = nil
7
8
  end
8
9
 
9
10
  def load
10
- File.open(BeetleETL.config.transformation_file, 'r') do |file|
11
+ File.open(@config.transformation_file, 'r') do |file|
11
12
  instance_eval file.read
12
13
  end
13
14
 
14
15
  @transformations.map do |(table_name, setup)|
15
- Transformation.new(table_name, setup, @helper_definitions)
16
+ Transformation.new(@config, table_name, setup, @helper_definitions)
16
17
  end
17
18
  end
18
19
 
@@ -3,6 +3,10 @@ require 'active_support/core_ext/hash/deep_merge'
3
3
  module BeetleETL
4
4
  class Import
5
5
 
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
6
10
  def run
7
11
  setup
8
12
  import
@@ -12,14 +16,14 @@ module BeetleETL
12
16
 
13
17
  def setup
14
18
  transformations.each do |t|
15
- CreateStage.new(t.table_name, t.relations, t.column_names).run
19
+ CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
16
20
  end
17
21
  end
18
22
 
19
23
  def import
20
- data_report = AsyncStepRunner.new(data_steps).run
21
- load_report = BeetleETL.database.transaction do
22
- AsyncStepRunner.new(load_steps).run
24
+ data_report = AsyncStepRunner.new(@config, data_steps).run
25
+ load_report = @config.database.transaction do
26
+ AsyncStepRunner.new(@config, load_steps).run
23
27
  end
24
28
 
25
29
  data_report.deep_merge load_report
@@ -27,7 +31,7 @@ module BeetleETL
27
31
 
28
32
  def cleanup
29
33
  transformations.each do |t|
30
- DropStage.new(t.table_name).run
34
+ DropStage.new(@config, t.table_name).run
31
35
  end
32
36
  end
33
37
 
@@ -36,22 +40,22 @@ module BeetleETL
36
40
  def data_steps
37
41
  transformations.flat_map do |t|
38
42
  [
39
- Transform.new(t.table_name, t.dependencies, t.query),
40
- MapRelations.new(t.table_name, t.relations),
41
- TableDiff.new(t.table_name),
42
- AssignIds.new(t.table_name),
43
+ Transform.new(@config, t.table_name, t.dependencies, t.query),
44
+ MapRelations.new(@config, t.table_name, t.relations),
45
+ TableDiff.new(@config, t.table_name),
46
+ AssignIds.new(@config, t.table_name),
43
47
  ]
44
48
  end
45
49
  end
46
50
 
47
51
  def load_steps
48
52
  transformations.map do |t|
49
- Load.new(t.table_name, t.relations)
53
+ Load.new(@config, t.table_name, t.relations)
50
54
  end
51
55
  end
52
56
 
53
57
  def transformations
54
- @transformations ||= TransformationLoader.new.load
58
+ @transformations ||= TransformationLoader.new(@config).load
55
59
  end
56
60
 
57
61
  end
@@ -5,32 +5,22 @@ module BeetleETL
5
5
 
6
6
  extend self
7
7
 
8
- def stage_table_name(table_name = nil)
9
- name = (table_name || @table_name).to_s
10
- digest = Digest::MD5.hexdigest(name)
11
- "#{BeetleETL.config.external_source}-#{name}-#{digest}"[0, 63]
8
+ def stage_table_name(external_source, table_name)
9
+ digest = Digest::MD5.hexdigest(table_name.to_s)
10
+ "#{external_source.to_s}-#{table_name.to_s}-#{digest}"[0, 63]
12
11
  end
13
12
 
14
- def stage_table_name_sql(table_name = nil)
15
- %Q("#{stage_table_name(table_name)}")
13
+ def stage_table_name_sql(external_source, table_name)
14
+ %Q("#{stage_table_name(external_source, table_name)}")
16
15
  end
17
16
 
18
- def target_table_name(table_name = nil)
19
- name = (table_name || @table_name).to_s
20
- [target_schema, name].compact.join('.')
17
+ def target_table_name(target_schema, table_name)
18
+ schema = target_schema ? target_schema.to_s : nil
19
+ [schema, table_name.to_s].compact.join('.')
21
20
  end
22
21
 
23
- def target_table_name_sql(table_name = nil)
24
- name = (table_name || @table_name).to_s
25
- target_table_name= [target_schema, name].compact.join('"."')
26
- %Q("#{target_table_name}")
27
- end
28
-
29
- private
30
-
31
- def target_schema
32
- target_schema = BeetleETL.config.target_schema
33
- target_schema != 'public' ? target_schema : nil
22
+ def target_table_name_sql(target_schema, table_name)
23
+ %Q("#{target_table_name(target_schema, table_name)}")
34
24
  end
35
25
 
36
26
  end
@@ -1,12 +1,13 @@
1
1
  module BeetleETL
2
2
  class Reporter
3
3
 
4
- def initialize(report)
4
+ def initialize(config, report)
5
+ @config = config
5
6
  @report = report
6
7
  end
7
8
 
8
9
  def log_summary
9
- BeetleETL.logger.info(summary)
10
+ @config.logger.info(summary)
10
11
  end
11
12
 
12
13
  private
@@ -1,7 +1,9 @@
1
1
  module BeetleETL
2
2
  class AsyncStepRunner
3
3
 
4
- def initialize(steps)
4
+ def initialize(config, steps)
5
+ @config = config
6
+
5
7
  @dependency_resolver = DependencyResolver.new(steps)
6
8
  @steps = steps
7
9
 
@@ -39,14 +41,14 @@ module BeetleETL
39
41
  def run_step_async(step)
40
42
  Thread.new do
41
43
  begin
42
- BeetleETL.logger.info("started step #{step.name}")
44
+ @config.logger.info("started step #{step.name}")
43
45
 
44
46
  started_at = Time.now
45
47
  step.run
46
48
  finished_at = Time.now
47
49
 
48
50
  duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
49
- BeetleETL.logger.info("finished #{step.name} in #{duration}")
51
+ @config.logger.info("finished #{step.name} in #{duration}")
50
52
 
51
53
  @queue.push [
52
54
  step.table_name,
@@ -55,7 +57,7 @@ module BeetleETL
55
57
  ]
56
58
 
57
59
  rescue => e
58
- BeetleETL.logger.fatal(e.message)
60
+ @config.logger.fatal(e.message)
59
61
  raise e
60
62
  end
61
63
  end.abort_on_exception = true