beetle_etl 1.0.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.byebug_history +8 -0
- data/.travis.yml +6 -1
- data/README.md +31 -9
- data/beetle_etl.gemspec +1 -1
- data/lib/beetle_etl.rb +7 -49
- data/lib/beetle_etl/configuration.rb +39 -0
- data/lib/beetle_etl/dsl/dsl.rb +6 -2
- data/lib/beetle_etl/dsl/transformation.rb +2 -2
- data/lib/beetle_etl/dsl/transformation_loader.rb +4 -3
- data/lib/beetle_etl/import.rb +15 -11
- data/lib/beetle_etl/naming.rb +10 -20
- data/lib/beetle_etl/reporter.rb +3 -2
- data/lib/beetle_etl/step_runner/async_step_runner.rb +6 -4
- data/lib/beetle_etl/steps/create_stage.rb +2 -2
- data/lib/beetle_etl/steps/load.rb +2 -2
- data/lib/beetle_etl/steps/map_relations.rb +2 -2
- data/lib/beetle_etl/steps/step.rb +23 -4
- data/lib/beetle_etl/steps/transform.rb +2 -2
- data/lib/beetle_etl/testing.rb +10 -5
- data/lib/beetle_etl/testing/test_wrapper.rb +4 -4
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/beetle_etl_spec.rb +6 -38
- data/spec/configuration_spec.rb +66 -0
- data/spec/dsl/dsl_spec.rb +9 -3
- data/spec/dsl/transformation_loader_spec.rb +9 -8
- data/spec/dsl/transformation_spec.rb +9 -7
- data/spec/feature/feature_spec.rb +8 -8
- data/spec/reporter_spec.rb +5 -2
- data/spec/spec_helper.rb +4 -5
- data/spec/steps/assign_ids_spec.rb +7 -7
- data/spec/steps/create_stage_spec.rb +14 -12
- data/spec/steps/load_spec.rb +9 -7
- data/spec/steps/map_relations_spec.rb +14 -8
- data/spec/steps/step_spec.rb +5 -3
- data/spec/steps/table_diff_spec.rb +7 -6
- data/spec/steps/transform_spec.rb +8 -4
- data/spec/testing_spec.rb +1 -1
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6a726a9734d6866687319a6742cc8db41ef68b64
|
4
|
+
data.tar.gz: 82be15d660033bd3d957879ec351c0479b26623f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01c1408c035afb9d0dcadb9382a3db442318f48c74517f1376c3a6ef615675ef891df8559c172be6e6b1f7f47d2f29c4a6965493a613d93a24d439c40f224246
|
7
|
+
data.tar.gz: 111f73fd2692ed88f4ce91c6f4da838efe13d1d12fc00bfb4f63cb00383509ffa1d171bf265362ded8f01ddde7f0a0e0659d552f7be8b31188c94bb4bb984e59
|
data/.byebug_history
ADDED
data/.travis.yml
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
3
|
- 2.0.0
|
4
|
-
- 2.1.
|
4
|
+
- 2.1.0
|
5
|
+
- 2.2.0
|
6
|
+
- 2.3.0
|
5
7
|
addons:
|
6
8
|
postgresql: "9.3"
|
7
9
|
code_climate:
|
8
10
|
repo_token: fcd6d8c28da900609a2cf903716d858621b8ce68152edbcebe6908a9a3f5d3d5
|
11
|
+
before_install:
|
12
|
+
- gem update --system
|
13
|
+
- gem update bundler
|
9
14
|
|
10
15
|
before_script:
|
11
16
|
- psql -c 'create database travis_ci_test;' -U postgres
|
data/README.md
CHANGED
@@ -32,12 +32,34 @@ Make sure the tables you want to import contain columns named ```external_id```
|
|
32
32
|
|
33
33
|
### Configuration
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
config.
|
40
|
-
|
35
|
+
Create a configuration object
|
36
|
+
|
37
|
+
configuration = BeetleETL::Configuration.new do |config|
|
38
|
+
# path to your transformation file
|
39
|
+
config.transformation_file = "../my_fancy_transformations"
|
40
|
+
|
41
|
+
# sequel database config
|
42
|
+
config.database_config = {
|
43
|
+
adapter: 'postgres'
|
44
|
+
encoding: utf8
|
45
|
+
host: my_host
|
46
|
+
database: my_database
|
47
|
+
username: 'foo'
|
48
|
+
password: 'bar'
|
49
|
+
pool: 5
|
50
|
+
pool_timeout: 360
|
51
|
+
connect_timeout: 360
|
52
|
+
}
|
53
|
+
# or config.database = # sequel database instance
|
54
|
+
|
55
|
+
# name of your soruce
|
56
|
+
config.external_source = "important_data"
|
57
|
+
|
58
|
+
# target schema in case you use postgres schemas
|
59
|
+
config.target_schema = "public" # default
|
60
|
+
|
61
|
+
# logger
|
62
|
+
config.logger = Logger.new(STDOUT) # default
|
41
63
|
end
|
42
64
|
|
43
65
|
### Defining Imports
|
@@ -66,8 +88,8 @@ Fill a ```transformation``` file with import directives like this:
|
|
66
88
|
ON data.org_id = o.id
|
67
89
|
SQL
|
68
90
|
end
|
69
|
-
|
70
|
-
|
91
|
+
|
92
|
+
|
71
93
|
```import``` takes the name of the table you want to fill and the configuration as arguments.
|
72
94
|
With ```columns``` you define what columns BeetleETL is supposed to fill in your application’s table.
|
73
95
|
The ```query``` transforms the data. Make sure that you insert into ```#{stage_table}``` as the name of the actual table, that this inserts into will be filled in by BeetleETL during runtime.
|
@@ -76,7 +98,7 @@ Define any foreign references your table has to other tables using the ```refrec
|
|
76
98
|
|
77
99
|
### Running BeetleETL
|
78
100
|
|
79
|
-
BeetleETL.import
|
101
|
+
BeetleETL.import(configuration)
|
80
102
|
|
81
103
|
## Development
|
82
104
|
|
data/beetle_etl.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_runtime_dependency 'sequel', '>= 4.0.0'
|
22
22
|
spec.add_runtime_dependency 'activesupport', '>= 4.0.0'
|
23
23
|
|
24
|
-
spec.add_development_dependency 'bundler', '~> 1.
|
24
|
+
spec.add_development_dependency 'bundler', '~> 1.11'
|
25
25
|
spec.add_development_dependency 'rspec', '>= 3.0.0'
|
26
26
|
spec.add_development_dependency 'timecop', '>= 0.7.0'
|
27
27
|
spec.add_development_dependency 'pg', '>= 0.18.0'
|
data/lib/beetle_etl.rb
CHANGED
@@ -5,7 +5,7 @@ require 'logger'
|
|
5
5
|
|
6
6
|
module BeetleETL
|
7
7
|
|
8
|
-
|
8
|
+
require 'beetle_etl/configuration'
|
9
9
|
|
10
10
|
require 'beetle_etl/dsl/dsl'
|
11
11
|
require 'beetle_etl/dsl/transformation'
|
@@ -28,61 +28,19 @@ module BeetleETL
|
|
28
28
|
require 'beetle_etl/import'
|
29
29
|
require 'beetle_etl/reporter'
|
30
30
|
|
31
|
-
class Configuration
|
32
|
-
attr_accessor \
|
33
|
-
:database_config,
|
34
|
-
:database,
|
35
|
-
:transformation_file,
|
36
|
-
:stage_schema,
|
37
|
-
:target_schema,
|
38
|
-
:external_source,
|
39
|
-
:logger
|
40
|
-
|
41
|
-
def initialize
|
42
|
-
@target_schema = 'public'
|
43
|
-
@logger = ::Logger.new(STDOUT)
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
31
|
class << self
|
48
32
|
|
49
|
-
def import
|
33
|
+
def import(config = Configuration.new)
|
34
|
+
yield config if block_given?
|
35
|
+
|
50
36
|
begin
|
51
|
-
report = Import.new.run
|
52
|
-
Reporter.new(report).log_summary
|
37
|
+
report = Import.new(config).run
|
38
|
+
Reporter.new(config, report).log_summary
|
53
39
|
report
|
54
40
|
ensure
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
def configure
|
60
|
-
yield(config)
|
61
|
-
end
|
62
|
-
|
63
|
-
def config
|
64
|
-
@config ||= Configuration.new
|
65
|
-
end
|
66
|
-
|
67
|
-
def logger
|
68
|
-
config.logger
|
69
|
-
end
|
70
|
-
|
71
|
-
def database
|
72
|
-
if config.database
|
73
|
-
config.database
|
74
|
-
elsif config.database_config
|
75
|
-
@database ||= Sequel.connect(config.database_config)
|
76
|
-
else
|
77
|
-
msg = "Either Sequel connection database_config or a Sequel Database object required"
|
78
|
-
raise InvalidConfigurationError.new(msg)
|
41
|
+
config.disconnect_database
|
79
42
|
end
|
80
43
|
end
|
81
44
|
|
82
|
-
def reset
|
83
|
-
@config = nil
|
84
|
-
@database = nil
|
85
|
-
end
|
86
|
-
|
87
45
|
end
|
88
46
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
InvalidConfigurationError = Class.new(StandardError)
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
attr_accessor \
|
6
|
+
:transformation_file,
|
7
|
+
:stage_schema,
|
8
|
+
:external_source,
|
9
|
+
:logger
|
10
|
+
|
11
|
+
attr_writer \
|
12
|
+
:database,
|
13
|
+
:database_config,
|
14
|
+
:target_schema
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@target_schema = 'public'
|
18
|
+
@logger = ::Logger.new(STDOUT)
|
19
|
+
end
|
20
|
+
|
21
|
+
def database
|
22
|
+
if [@database, @database_config].none?
|
23
|
+
msg = "Either Sequel connection database_config or a Sequel Database object required"
|
24
|
+
raise InvalidConfigurationError.new(msg)
|
25
|
+
end
|
26
|
+
|
27
|
+
@database ||= Sequel.connect(@database_config)
|
28
|
+
end
|
29
|
+
|
30
|
+
def disconnect_database
|
31
|
+
database.disconnect if @database_config
|
32
|
+
end
|
33
|
+
|
34
|
+
def target_schema
|
35
|
+
@target_schema != 'public' ? @target_schema : nil
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
data/lib/beetle_etl/dsl/dsl.rb
CHANGED
@@ -3,7 +3,8 @@ module BeetleETL
|
|
3
3
|
|
4
4
|
attr_reader :column_names, :relations, :query_strings
|
5
5
|
|
6
|
-
def initialize(table_name)
|
6
|
+
def initialize(config, table_name)
|
7
|
+
@config = config
|
7
8
|
@table_name = table_name
|
8
9
|
@column_names = []
|
9
10
|
@relations = {}
|
@@ -25,7 +26,10 @@ module BeetleETL
|
|
25
26
|
# query helper methods
|
26
27
|
|
27
28
|
def stage_table(table_name = nil)
|
28
|
-
BeetleETL::Naming.stage_table_name_sql(
|
29
|
+
BeetleETL::Naming.stage_table_name_sql(
|
30
|
+
@config.external_source,
|
31
|
+
table_name || @table_name
|
32
|
+
)
|
29
33
|
end
|
30
34
|
|
31
35
|
def combined_key(*args)
|
@@ -5,9 +5,9 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
attr_reader :table_name
|
7
7
|
|
8
|
-
def initialize(table_name, setup, helpers = nil)
|
8
|
+
def initialize(config, table_name, setup, helpers = nil)
|
9
9
|
@table_name = table_name
|
10
|
-
@parsed = DSL.new(table_name).tap do |dsl|
|
10
|
+
@parsed = DSL.new(config, table_name).tap do |dsl|
|
11
11
|
dsl.instance_exec(&helpers) if helpers
|
12
12
|
dsl.instance_exec(&setup)
|
13
13
|
end
|
@@ -1,18 +1,19 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
class TransformationLoader
|
3
3
|
|
4
|
-
def initialize
|
4
|
+
def initialize(config)
|
5
|
+
@config = config
|
5
6
|
@transformations = []
|
6
7
|
@helper_definitions = nil
|
7
8
|
end
|
8
9
|
|
9
10
|
def load
|
10
|
-
File.open(
|
11
|
+
File.open(@config.transformation_file, 'r') do |file|
|
11
12
|
instance_eval file.read
|
12
13
|
end
|
13
14
|
|
14
15
|
@transformations.map do |(table_name, setup)|
|
15
|
-
Transformation.new(table_name, setup, @helper_definitions)
|
16
|
+
Transformation.new(@config, table_name, setup, @helper_definitions)
|
16
17
|
end
|
17
18
|
end
|
18
19
|
|
data/lib/beetle_etl/import.rb
CHANGED
@@ -3,6 +3,10 @@ require 'active_support/core_ext/hash/deep_merge'
|
|
3
3
|
module BeetleETL
|
4
4
|
class Import
|
5
5
|
|
6
|
+
def initialize(config)
|
7
|
+
@config = config
|
8
|
+
end
|
9
|
+
|
6
10
|
def run
|
7
11
|
setup
|
8
12
|
import
|
@@ -12,14 +16,14 @@ module BeetleETL
|
|
12
16
|
|
13
17
|
def setup
|
14
18
|
transformations.each do |t|
|
15
|
-
CreateStage.new(t.table_name, t.relations, t.column_names).run
|
19
|
+
CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
|
16
20
|
end
|
17
21
|
end
|
18
22
|
|
19
23
|
def import
|
20
|
-
data_report = AsyncStepRunner.new(data_steps).run
|
21
|
-
load_report =
|
22
|
-
AsyncStepRunner.new(load_steps).run
|
24
|
+
data_report = AsyncStepRunner.new(@config, data_steps).run
|
25
|
+
load_report = @config.database.transaction do
|
26
|
+
AsyncStepRunner.new(@config, load_steps).run
|
23
27
|
end
|
24
28
|
|
25
29
|
data_report.deep_merge load_report
|
@@ -27,7 +31,7 @@ module BeetleETL
|
|
27
31
|
|
28
32
|
def cleanup
|
29
33
|
transformations.each do |t|
|
30
|
-
DropStage.new(t.table_name).run
|
34
|
+
DropStage.new(@config, t.table_name).run
|
31
35
|
end
|
32
36
|
end
|
33
37
|
|
@@ -36,22 +40,22 @@ module BeetleETL
|
|
36
40
|
def data_steps
|
37
41
|
transformations.flat_map do |t|
|
38
42
|
[
|
39
|
-
Transform.new(t.table_name, t.dependencies, t.query),
|
40
|
-
MapRelations.new(t.table_name, t.relations),
|
41
|
-
TableDiff.new(t.table_name),
|
42
|
-
AssignIds.new(t.table_name),
|
43
|
+
Transform.new(@config, t.table_name, t.dependencies, t.query),
|
44
|
+
MapRelations.new(@config, t.table_name, t.relations),
|
45
|
+
TableDiff.new(@config, t.table_name),
|
46
|
+
AssignIds.new(@config, t.table_name),
|
43
47
|
]
|
44
48
|
end
|
45
49
|
end
|
46
50
|
|
47
51
|
def load_steps
|
48
52
|
transformations.map do |t|
|
49
|
-
Load.new(t.table_name, t.relations)
|
53
|
+
Load.new(@config, t.table_name, t.relations)
|
50
54
|
end
|
51
55
|
end
|
52
56
|
|
53
57
|
def transformations
|
54
|
-
@transformations ||= TransformationLoader.new.load
|
58
|
+
@transformations ||= TransformationLoader.new(@config).load
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|
data/lib/beetle_etl/naming.rb
CHANGED
@@ -5,32 +5,22 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
extend self
|
7
7
|
|
8
|
-
def stage_table_name(table_name
|
9
|
-
|
10
|
-
digest
|
11
|
-
"#{BeetleETL.config.external_source}-#{name}-#{digest}"[0, 63]
|
8
|
+
def stage_table_name(external_source, table_name)
|
9
|
+
digest = Digest::MD5.hexdigest(table_name.to_s)
|
10
|
+
"#{external_source.to_s}-#{table_name.to_s}-#{digest}"[0, 63]
|
12
11
|
end
|
13
12
|
|
14
|
-
def stage_table_name_sql(table_name
|
15
|
-
%Q("#{stage_table_name(table_name)}")
|
13
|
+
def stage_table_name_sql(external_source, table_name)
|
14
|
+
%Q("#{stage_table_name(external_source, table_name)}")
|
16
15
|
end
|
17
16
|
|
18
|
-
def target_table_name(table_name
|
19
|
-
|
20
|
-
[
|
17
|
+
def target_table_name(target_schema, table_name)
|
18
|
+
schema = target_schema ? target_schema.to_s : nil
|
19
|
+
[schema, table_name.to_s].compact.join('.')
|
21
20
|
end
|
22
21
|
|
23
|
-
def target_table_name_sql(table_name
|
24
|
-
|
25
|
-
target_table_name= [target_schema, name].compact.join('"."')
|
26
|
-
%Q("#{target_table_name}")
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def target_schema
|
32
|
-
target_schema = BeetleETL.config.target_schema
|
33
|
-
target_schema != 'public' ? target_schema : nil
|
22
|
+
def target_table_name_sql(target_schema, table_name)
|
23
|
+
%Q("#{target_table_name(target_schema, table_name)}")
|
34
24
|
end
|
35
25
|
|
36
26
|
end
|
data/lib/beetle_etl/reporter.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
class AsyncStepRunner
|
3
3
|
|
4
|
-
def initialize(steps)
|
4
|
+
def initialize(config, steps)
|
5
|
+
@config = config
|
6
|
+
|
5
7
|
@dependency_resolver = DependencyResolver.new(steps)
|
6
8
|
@steps = steps
|
7
9
|
|
@@ -39,14 +41,14 @@ module BeetleETL
|
|
39
41
|
def run_step_async(step)
|
40
42
|
Thread.new do
|
41
43
|
begin
|
42
|
-
|
44
|
+
@config.logger.info("started step #{step.name}")
|
43
45
|
|
44
46
|
started_at = Time.now
|
45
47
|
step.run
|
46
48
|
finished_at = Time.now
|
47
49
|
|
48
50
|
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
49
|
-
|
51
|
+
@config.logger.info("finished #{step.name} in #{duration}")
|
50
52
|
|
51
53
|
@queue.push [
|
52
54
|
step.table_name,
|
@@ -55,7 +57,7 @@ module BeetleETL
|
|
55
57
|
]
|
56
58
|
|
57
59
|
rescue => e
|
58
|
-
|
60
|
+
@config.logger.fatal(e.message)
|
59
61
|
raise e
|
60
62
|
end
|
61
63
|
end.abort_on_exception = true
|