beetle_etl 1.0.1 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.byebug_history +8 -0
  3. data/.travis.yml +6 -1
  4. data/README.md +31 -9
  5. data/beetle_etl.gemspec +1 -1
  6. data/lib/beetle_etl.rb +7 -49
  7. data/lib/beetle_etl/configuration.rb +39 -0
  8. data/lib/beetle_etl/dsl/dsl.rb +6 -2
  9. data/lib/beetle_etl/dsl/transformation.rb +2 -2
  10. data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
  11. data/lib/beetle_etl/import.rb +15 -11
  12. data/lib/beetle_etl/naming.rb +10 -20
  13. data/lib/beetle_etl/reporter.rb +3 -2
  14. data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
  15. data/lib/beetle_etl/steps/create_stage.rb +2 -2
  16. data/lib/beetle_etl/steps/load.rb +2 -2
  17. data/lib/beetle_etl/steps/map_relations.rb +2 -2
  18. data/lib/beetle_etl/steps/step.rb +23 -4
  19. data/lib/beetle_etl/steps/transform.rb +2 -2
  20. data/lib/beetle_etl/testing.rb +10 -5
  21. data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
  22. data/lib/beetle_etl/version.rb +1 -1
  23. data/spec/beetle_etl_spec.rb +6 -38
  24. data/spec/configuration_spec.rb +66 -0
  25. data/spec/dsl/dsl_spec.rb +9 -3
  26. data/spec/dsl/transformation_loader_spec.rb +9 -8
  27. data/spec/dsl/transformation_spec.rb +9 -7
  28. data/spec/feature/feature_spec.rb +8 -8
  29. data/spec/reporter_spec.rb +5 -2
  30. data/spec/spec_helper.rb +4 -5
  31. data/spec/steps/assign_ids_spec.rb +7 -7
  32. data/spec/steps/create_stage_spec.rb +14 -12
  33. data/spec/steps/load_spec.rb +9 -7
  34. data/spec/steps/map_relations_spec.rb +14 -8
  35. data/spec/steps/step_spec.rb +5 -3
  36. data/spec/steps/table_diff_spec.rb +7 -6
  37. data/spec/steps/transform_spec.rb +8 -4
  38. data/spec/testing_spec.rb +1 -1
  39. metadata +9 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c5a5a3f4d0b4170ccb1581a50a2fccaf32be2cb
4
- data.tar.gz: c8beb0f668010410cadefe1ae7ab7c7708032acc
3
+ metadata.gz: 6a726a9734d6866687319a6742cc8db41ef68b64
4
+ data.tar.gz: 82be15d660033bd3d957879ec351c0479b26623f
5
5
  SHA512:
6
- metadata.gz: 3b88ee3cd93eb9344b95bc0b31e01da0a1419d90115fe7a435541b8a336cc3eef8dbad68df819f9993dab3f326be7a91f7b89f8f1e3133b81584834c8f973d65
7
- data.tar.gz: e696f99dee13095e6ac7ee32a4fc7e0364744bafbc28caca41b8ab5ce87af3c66e832493b116070545401e9e43bbc34d7582f9d004991beac6b994d3a2d2a9f4
6
+ metadata.gz: 01c1408c035afb9d0dcadb9382a3db442318f48c74517f1376c3a6ef615675ef891df8559c172be6e6b1f7f47d2f29c4a6965493a613d93a24d439c40f224246
7
+ data.tar.gz: 111f73fd2692ed88f4ce91c6f4da838efe13d1d12fc00bfb4f63cb00383509ffa1d171bf265362ded8f01ddde7f0a0e0659d552f7be8b31188c94bb4bb984e59
@@ -0,0 +1,8 @@
1
+ continue
2
+ backtrace
3
+ stack
4
+ trace
5
+ c
6
+ continue
7
+ c
8
+ target_table_name
@@ -1,11 +1,16 @@
1
1
  language: ruby
2
2
  rvm:
3
3
  - 2.0.0
4
- - 2.1.2
4
+ - 2.1.0
5
+ - 2.2.0
6
+ - 2.3.0
5
7
  addons:
6
8
  postgresql: "9.3"
7
9
  code_climate:
8
10
  repo_token: fcd6d8c28da900609a2cf903716d858621b8ce68152edbcebe6908a9a3f5d3d5
11
+ before_install:
12
+ - gem update --system
13
+ - gem update bundler
9
14
 
10
15
  before_script:
11
16
  - psql -c 'create database travis_ci_test;' -U postgres
data/README.md CHANGED
@@ -32,12 +32,34 @@ Make sure the tables you want to import contain columns named ```external_id```
32
32
 
33
33
  ### Configuration
34
34
 
35
- BeetleETL.configure do |config|
36
- config.transformation_file = # path to your transformation file
37
- config.database_config = # sequel database config
38
- # or config.database = # sequel database instance
39
- config.external_source = ‘name_of_your_source’
40
- config.logger = Logger.new(STDOUT)
35
+ Create a configuration object
36
+
37
+ configuration = BeetleETL::Configuration.new do |config|
38
+ # path to your transformation file
39
+ config.transformation_file = "../my_fancy_transformations"
40
+
41
+ # sequel database config
42
+ config.database_config = {
43
+ adapter: 'postgres'
44
+ encoding: utf8
45
+ host: my_host
46
+ database: my_database
47
+ username: 'foo'
48
+ password: 'bar'
49
+ pool: 5
50
+ pool_timeout: 360
51
+ connect_timeout: 360
52
+ }
53
+ # or config.database = # sequel database instance
54
+
55
+ # name of your soruce
56
+ config.external_source = "important_data"
57
+
58
+ # target schema in case you use postgres schemas
59
+ config.target_schema = "public" # default
60
+
61
+ # logger
62
+ config.logger = Logger.new(STDOUT) # default
41
63
  end
42
64
 
43
65
  ### Defining Imports
@@ -66,8 +88,8 @@ Fill a ```transformation``` file with import directives like this:
66
88
  ON data.org_id = o.id
67
89
  SQL
68
90
  end
69
-
70
-
91
+
92
+
71
93
  ```import``` takes the name of the table you want to fill and the configuration as arguments.
72
94
  With ```columns``` you define what columns BeetleETL is supposed to fill in your application’s table.
73
95
  The ```query``` transforms the data. Make sure that you insert into ```#{stage_table}``` as the name of the actual table, that this inserts into will be filled in by BeetleETL during runtime.
@@ -76,7 +98,7 @@ Define any foreign references your table has to other tables using the ```refrec
76
98
 
77
99
  ### Running BeetleETL
78
100
 
79
- BeetleETL.import
101
+ BeetleETL.import(configuration)
80
102
 
81
103
  ## Development
82
104
 
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency 'sequel', '>= 4.0.0'
22
22
  spec.add_runtime_dependency 'activesupport', '>= 4.0.0'
23
23
 
24
- spec.add_development_dependency 'bundler', '~> 1.6'
24
+ spec.add_development_dependency 'bundler', '~> 1.11'
25
25
  spec.add_development_dependency 'rspec', '>= 3.0.0'
26
26
  spec.add_development_dependency 'timecop', '>= 0.7.0'
27
27
  spec.add_development_dependency 'pg', '>= 0.18.0'
@@ -5,7 +5,7 @@ require 'logger'
5
5
 
6
6
  module BeetleETL
7
7
 
8
- InvalidConfigurationError = Class.new(StandardError)
8
+ require 'beetle_etl/configuration'
9
9
 
10
10
  require 'beetle_etl/dsl/dsl'
11
11
  require 'beetle_etl/dsl/transformation'
@@ -28,61 +28,19 @@ module BeetleETL
28
28
  require 'beetle_etl/import'
29
29
  require 'beetle_etl/reporter'
30
30
 
31
- class Configuration
32
- attr_accessor \
33
- :database_config,
34
- :database,
35
- :transformation_file,
36
- :stage_schema,
37
- :target_schema,
38
- :external_source,
39
- :logger
40
-
41
- def initialize
42
- @target_schema = 'public'
43
- @logger = ::Logger.new(STDOUT)
44
- end
45
- end
46
-
47
31
  class << self
48
32
 
49
- def import
33
+ def import(config = Configuration.new)
34
+ yield config if block_given?
35
+
50
36
  begin
51
- report = Import.new.run
52
- Reporter.new(report).log_summary
37
+ report = Import.new(config).run
38
+ Reporter.new(config, report).log_summary
53
39
  report
54
40
  ensure
55
- @database.disconnect if @database
56
- end
57
- end
58
-
59
- def configure
60
- yield(config)
61
- end
62
-
63
- def config
64
- @config ||= Configuration.new
65
- end
66
-
67
- def logger
68
- config.logger
69
- end
70
-
71
- def database
72
- if config.database
73
- config.database
74
- elsif config.database_config
75
- @database ||= Sequel.connect(config.database_config)
76
- else
77
- msg = "Either Sequel connection database_config or a Sequel Database object required"
78
- raise InvalidConfigurationError.new(msg)
41
+ config.disconnect_database
79
42
  end
80
43
  end
81
44
 
82
- def reset
83
- @config = nil
84
- @database = nil
85
- end
86
-
87
45
  end
88
46
  end
@@ -0,0 +1,39 @@
1
+ module BeetleETL
2
+ InvalidConfigurationError = Class.new(StandardError)
3
+
4
+ class Configuration
5
+ attr_accessor \
6
+ :transformation_file,
7
+ :stage_schema,
8
+ :external_source,
9
+ :logger
10
+
11
+ attr_writer \
12
+ :database,
13
+ :database_config,
14
+ :target_schema
15
+
16
+ def initialize
17
+ @target_schema = 'public'
18
+ @logger = ::Logger.new(STDOUT)
19
+ end
20
+
21
+ def database
22
+ if [@database, @database_config].none?
23
+ msg = "Either Sequel connection database_config or a Sequel Database object required"
24
+ raise InvalidConfigurationError.new(msg)
25
+ end
26
+
27
+ @database ||= Sequel.connect(@database_config)
28
+ end
29
+
30
+ def disconnect_database
31
+ database.disconnect if @database_config
32
+ end
33
+
34
+ def target_schema
35
+ @target_schema != 'public' ? @target_schema : nil
36
+ end
37
+
38
+ end
39
+ end
@@ -3,7 +3,8 @@ module BeetleETL
3
3
 
4
4
  attr_reader :column_names, :relations, :query_strings
5
5
 
6
- def initialize(table_name)
6
+ def initialize(config, table_name)
7
+ @config = config
7
8
  @table_name = table_name
8
9
  @column_names = []
9
10
  @relations = {}
@@ -25,7 +26,10 @@ module BeetleETL
25
26
  # query helper methods
26
27
 
27
28
  def stage_table(table_name = nil)
28
- BeetleETL::Naming.stage_table_name_sql(table_name || @table_name)
29
+ BeetleETL::Naming.stage_table_name_sql(
30
+ @config.external_source,
31
+ table_name || @table_name
32
+ )
29
33
  end
30
34
 
31
35
  def combined_key(*args)
@@ -5,9 +5,9 @@ module BeetleETL
5
5
 
6
6
  attr_reader :table_name
7
7
 
8
- def initialize(table_name, setup, helpers = nil)
8
+ def initialize(config, table_name, setup, helpers = nil)
9
9
  @table_name = table_name
10
- @parsed = DSL.new(table_name).tap do |dsl|
10
+ @parsed = DSL.new(config, table_name).tap do |dsl|
11
11
  dsl.instance_exec(&helpers) if helpers
12
12
  dsl.instance_exec(&setup)
13
13
  end
@@ -1,18 +1,19 @@
1
1
  module BeetleETL
2
2
  class TransformationLoader
3
3
 
4
- def initialize
4
+ def initialize(config)
5
+ @config = config
5
6
  @transformations = []
6
7
  @helper_definitions = nil
7
8
  end
8
9
 
9
10
  def load
10
- File.open(BeetleETL.config.transformation_file, 'r') do |file|
11
+ File.open(@config.transformation_file, 'r') do |file|
11
12
  instance_eval file.read
12
13
  end
13
14
 
14
15
  @transformations.map do |(table_name, setup)|
15
- Transformation.new(table_name, setup, @helper_definitions)
16
+ Transformation.new(@config, table_name, setup, @helper_definitions)
16
17
  end
17
18
  end
18
19
 
@@ -3,6 +3,10 @@ require 'active_support/core_ext/hash/deep_merge'
3
3
  module BeetleETL
4
4
  class Import
5
5
 
6
+ def initialize(config)
7
+ @config = config
8
+ end
9
+
6
10
  def run
7
11
  setup
8
12
  import
@@ -12,14 +16,14 @@ module BeetleETL
12
16
 
13
17
  def setup
14
18
  transformations.each do |t|
15
- CreateStage.new(t.table_name, t.relations, t.column_names).run
19
+ CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
16
20
  end
17
21
  end
18
22
 
19
23
  def import
20
- data_report = AsyncStepRunner.new(data_steps).run
21
- load_report = BeetleETL.database.transaction do
22
- AsyncStepRunner.new(load_steps).run
24
+ data_report = AsyncStepRunner.new(@config, data_steps).run
25
+ load_report = @config.database.transaction do
26
+ AsyncStepRunner.new(@config, load_steps).run
23
27
  end
24
28
 
25
29
  data_report.deep_merge load_report
@@ -27,7 +31,7 @@ module BeetleETL
27
31
 
28
32
  def cleanup
29
33
  transformations.each do |t|
30
- DropStage.new(t.table_name).run
34
+ DropStage.new(@config, t.table_name).run
31
35
  end
32
36
  end
33
37
 
@@ -36,22 +40,22 @@ module BeetleETL
36
40
  def data_steps
37
41
  transformations.flat_map do |t|
38
42
  [
39
- Transform.new(t.table_name, t.dependencies, t.query),
40
- MapRelations.new(t.table_name, t.relations),
41
- TableDiff.new(t.table_name),
42
- AssignIds.new(t.table_name),
43
+ Transform.new(@config, t.table_name, t.dependencies, t.query),
44
+ MapRelations.new(@config, t.table_name, t.relations),
45
+ TableDiff.new(@config, t.table_name),
46
+ AssignIds.new(@config, t.table_name),
43
47
  ]
44
48
  end
45
49
  end
46
50
 
47
51
  def load_steps
48
52
  transformations.map do |t|
49
- Load.new(t.table_name, t.relations)
53
+ Load.new(@config, t.table_name, t.relations)
50
54
  end
51
55
  end
52
56
 
53
57
  def transformations
54
- @transformations ||= TransformationLoader.new.load
58
+ @transformations ||= TransformationLoader.new(@config).load
55
59
  end
56
60
 
57
61
  end
@@ -5,32 +5,22 @@ module BeetleETL
5
5
 
6
6
  extend self
7
7
 
8
- def stage_table_name(table_name = nil)
9
- name = (table_name || @table_name).to_s
10
- digest = Digest::MD5.hexdigest(name)
11
- "#{BeetleETL.config.external_source}-#{name}-#{digest}"[0, 63]
8
+ def stage_table_name(external_source, table_name)
9
+ digest = Digest::MD5.hexdigest(table_name.to_s)
10
+ "#{external_source.to_s}-#{table_name.to_s}-#{digest}"[0, 63]
12
11
  end
13
12
 
14
- def stage_table_name_sql(table_name = nil)
15
- %Q("#{stage_table_name(table_name)}")
13
+ def stage_table_name_sql(external_source, table_name)
14
+ %Q("#{stage_table_name(external_source, table_name)}")
16
15
  end
17
16
 
18
- def target_table_name(table_name = nil)
19
- name = (table_name || @table_name).to_s
20
- [target_schema, name].compact.join('.')
17
+ def target_table_name(target_schema, table_name)
18
+ schema = target_schema ? target_schema.to_s : nil
19
+ [schema, table_name.to_s].compact.join('.')
21
20
  end
22
21
 
23
- def target_table_name_sql(table_name = nil)
24
- name = (table_name || @table_name).to_s
25
- target_table_name= [target_schema, name].compact.join('"."')
26
- %Q("#{target_table_name}")
27
- end
28
-
29
- private
30
-
31
- def target_schema
32
- target_schema = BeetleETL.config.target_schema
33
- target_schema != 'public' ? target_schema : nil
22
+ def target_table_name_sql(target_schema, table_name)
23
+ %Q("#{target_table_name(target_schema, table_name)}")
34
24
  end
35
25
 
36
26
  end
@@ -1,12 +1,13 @@
1
1
  module BeetleETL
2
2
  class Reporter
3
3
 
4
- def initialize(report)
4
+ def initialize(config, report)
5
+ @config = config
5
6
  @report = report
6
7
  end
7
8
 
8
9
  def log_summary
9
- BeetleETL.logger.info(summary)
10
+ @config.logger.info(summary)
10
11
  end
11
12
 
12
13
  private
@@ -1,7 +1,9 @@
1
1
  module BeetleETL
2
2
  class AsyncStepRunner
3
3
 
4
- def initialize(steps)
4
+ def initialize(config, steps)
5
+ @config = config
6
+
5
7
  @dependency_resolver = DependencyResolver.new(steps)
6
8
  @steps = steps
7
9
 
@@ -39,14 +41,14 @@ module BeetleETL
39
41
  def run_step_async(step)
40
42
  Thread.new do
41
43
  begin
42
- BeetleETL.logger.info("started step #{step.name}")
44
+ @config.logger.info("started step #{step.name}")
43
45
 
44
46
  started_at = Time.now
45
47
  step.run
46
48
  finished_at = Time.now
47
49
 
48
50
  duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
49
- BeetleETL.logger.info("finished #{step.name} in #{duration}")
51
+ @config.logger.info("finished #{step.name} in #{duration}")
50
52
 
51
53
  @queue.push [
52
54
  step.table_name,
@@ -55,7 +57,7 @@ module BeetleETL
55
57
  ]
56
58
 
57
59
  rescue => e
58
- BeetleETL.logger.fatal(e.message)
60
+ @config.logger.fatal(e.message)
59
61
  raise e
60
62
  end
61
63
  end.abort_on_exception = true