beetle_etl 0.0.7 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/beetle_etl.gemspec +2 -1
- data/lib/beetle_etl/import.rb +26 -12
- data/lib/beetle_etl/reporter.rb +65 -0
- data/lib/beetle_etl/step_runner/async_step_runner.rb +83 -0
- data/lib/beetle_etl/{task_runner → step_runner}/dependency_resolver.rb +0 -0
- data/lib/beetle_etl/steps/create_stage.rb +38 -10
- data/lib/beetle_etl/steps/drop_stage.rb +0 -4
- data/lib/beetle_etl/steps/step.rb +1 -3
- data/lib/beetle_etl/steps/transform.rb +1 -1
- data/lib/beetle_etl/testing.rb +46 -0
- data/lib/beetle_etl/version.rb +1 -1
- data/lib/beetle_etl.rb +14 -6
- data/spec/beetle_etl_spec.rb +7 -3
- data/spec/dsl/transformation_loader_spec.rb +1 -9
- data/spec/feature/feature_spec.rb +4 -3
- data/spec/reporter_spec.rb +65 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/steps/create_stage_spec.rb +26 -14
- data/spec/steps/step_spec.rb +2 -2
- data/spec/steps/transform_spec.rb +0 -1
- data/spec/support/database_helpers.rb +1 -1
- data/spec/support/file_helpers.rb +14 -0
- data/spec/testing_spec.rb +76 -0
- metadata +31 -9
- data/lib/beetle_etl/task_runner/task_runner.rb +0 -71
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 788ce4239b271ab02fe67da3642f8f17e6fd275e
|
4
|
+
data.tar.gz: e89e37f2bd8ec970599249b73e97f9d66be60555
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3df6bfbcadd41a1e98f330aa35c9bcc0701c3450d9425c7c156520cae467cdccbb4adc5075a7923261d972c1341f50c49f3a09e35536270f2cdd165414b14ea7
|
7
|
+
data.tar.gz: dd92ef629e21523001a4d8371b0a8b2432845ecb371b0bde278d752b06f989e19c1b61321dc28686220f14a0b428f33b749bab52c119ec1d56284c6a71ae0e44
|
data/beetle_etl.gemspec
CHANGED
@@ -19,10 +19,11 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_runtime_dependency 'sequel', '>= 4.0.0'
|
22
|
+
spec.add_runtime_dependency 'activesupport', '>= 4.2.0'
|
22
23
|
|
23
24
|
spec.add_development_dependency 'bundler', '~> 1.6'
|
24
25
|
spec.add_development_dependency 'rspec', '>= 3.0.0'
|
25
26
|
spec.add_development_dependency 'timecop', '>= 0.7.0'
|
26
27
|
spec.add_development_dependency 'pg', '>= 0.18.0'
|
27
|
-
spec.add_development_dependency '
|
28
|
+
spec.add_development_dependency 'unindent', '~> 1.0'
|
28
29
|
end
|
data/lib/beetle_etl/import.rb
CHANGED
@@ -1,15 +1,34 @@
|
|
1
|
+
require 'active_support/core_ext/hash/deep_merge'
|
2
|
+
|
1
3
|
module BeetleETL
|
2
4
|
class Import
|
3
5
|
|
4
6
|
def run
|
5
|
-
|
6
|
-
|
7
|
-
TaskRunner.new(load_steps).run
|
8
|
-
end
|
9
|
-
rescue => e
|
10
|
-
raise e
|
7
|
+
setup
|
8
|
+
import
|
11
9
|
ensure
|
12
|
-
|
10
|
+
cleanup
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup
|
14
|
+
transformations.each do |t|
|
15
|
+
CreateStage.new(t.table_name, t.relations, t.column_names).run
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def import
|
20
|
+
data_report = AsyncStepRunner.new(data_steps).run
|
21
|
+
load_report = BeetleETL.database.transaction do
|
22
|
+
AsyncStepRunner.new(load_steps).run
|
23
|
+
end
|
24
|
+
|
25
|
+
data_report.deep_merge load_report
|
26
|
+
end
|
27
|
+
|
28
|
+
def cleanup
|
29
|
+
transformations.each do |t|
|
30
|
+
DropStage.new(t.table_name).run
|
31
|
+
end
|
13
32
|
end
|
14
33
|
|
15
34
|
private
|
@@ -17,7 +36,6 @@ module BeetleETL
|
|
17
36
|
def data_steps
|
18
37
|
transformations.flat_map do |t|
|
19
38
|
[
|
20
|
-
CreateStage.new(t.table_name, t.relations, t.column_names),
|
21
39
|
Transform.new(t.table_name, t.dependencies, t.query),
|
22
40
|
MapRelations.new(t.table_name, t.relations),
|
23
41
|
TableDiff.new(t.table_name),
|
@@ -32,10 +50,6 @@ module BeetleETL
|
|
32
50
|
end
|
33
51
|
end
|
34
52
|
|
35
|
-
def cleanup_steps
|
36
|
-
transformations.map { |t| DropStage.new(t.table_name) }
|
37
|
-
end
|
38
|
-
|
39
53
|
def transformations
|
40
54
|
@transformations ||= TransformationLoader.new.load
|
41
55
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
class Reporter
|
3
|
+
|
4
|
+
def initialize(report)
|
5
|
+
@report = report
|
6
|
+
end
|
7
|
+
|
8
|
+
def log_summary
|
9
|
+
BeetleETL.logger.info(summary)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def summary
|
15
|
+
"\n\n" +
|
16
|
+
@report.map do |(table_name, steps)|
|
17
|
+
total_duration = format_duration(sum_durations(steps))
|
18
|
+
[
|
19
|
+
table_name,
|
20
|
+
seperator("="),
|
21
|
+
step_rows(steps).join("\n"),
|
22
|
+
seperator("-"),
|
23
|
+
total_duration.rjust(line_width)
|
24
|
+
].join("\n")
|
25
|
+
end.join("\n\n") + "\n"
|
26
|
+
end
|
27
|
+
|
28
|
+
def step_rows(steps)
|
29
|
+
steps.map do |step_name, data|
|
30
|
+
label = step_name.split(": ")[1] + ":"
|
31
|
+
duration = format_duration(data[:finished_at] - data[:started_at])
|
32
|
+
line = duration.rjust(line_width)
|
33
|
+
line[2, label.length] = label
|
34
|
+
line
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def format_duration(duration)
|
39
|
+
Time.at(duration).utc.strftime("%H:%M:%S")
|
40
|
+
end
|
41
|
+
|
42
|
+
def sum_durations(steps)
|
43
|
+
steps.inject(0) do |acc, (_step_name, data)|
|
44
|
+
acc + (data[:finished_at] - data[:started_at])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def line_width
|
49
|
+
# 2 spaces
|
50
|
+
# + 1 colon
|
51
|
+
# + 1 space
|
52
|
+
# + 8 duration
|
53
|
+
12 + longest_step_name_length
|
54
|
+
end
|
55
|
+
|
56
|
+
def longest_step_name_length
|
57
|
+
@report.keys.max_by(&:length).length - 1
|
58
|
+
end
|
59
|
+
|
60
|
+
def seperator(character)
|
61
|
+
character * line_width
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
class AsyncStepRunner
|
3
|
+
|
4
|
+
def initialize(steps)
|
5
|
+
@dependency_resolver = DependencyResolver.new(steps)
|
6
|
+
@steps = steps
|
7
|
+
|
8
|
+
@queue = Queue.new
|
9
|
+
@completed = Set.new
|
10
|
+
@running = Set.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
results = {}
|
15
|
+
|
16
|
+
until all_steps_complete?
|
17
|
+
runnables.each do |step|
|
18
|
+
run_step_async(step)
|
19
|
+
mark_step_running(step.name)
|
20
|
+
end
|
21
|
+
|
22
|
+
table_name, step_name, step_data = @queue.pop
|
23
|
+
|
24
|
+
unless results.has_key?(table_name)
|
25
|
+
results[table_name] = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
results[table_name][step_name] = step_data
|
29
|
+
mark_step_completed(step_name)
|
30
|
+
end
|
31
|
+
|
32
|
+
results
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
attr_reader :running, :completed
|
38
|
+
|
39
|
+
def run_step_async(step)
|
40
|
+
Thread.new do
|
41
|
+
begin
|
42
|
+
BeetleETL.logger.info("started step #{step.name}")
|
43
|
+
|
44
|
+
started_at = Time.now
|
45
|
+
step.run
|
46
|
+
finished_at = Time.now
|
47
|
+
|
48
|
+
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
49
|
+
BeetleETL.logger.info("finished #{step.name} in #{duration}")
|
50
|
+
|
51
|
+
@queue.push [
|
52
|
+
step.table_name,
|
53
|
+
step.name,
|
54
|
+
{ started_at: started_at, finished_at: finished_at }
|
55
|
+
]
|
56
|
+
|
57
|
+
rescue => e
|
58
|
+
BeetleETL.logger.fatal(e.message)
|
59
|
+
raise e
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def mark_step_running(step_name)
|
65
|
+
running.add(step_name)
|
66
|
+
end
|
67
|
+
|
68
|
+
def mark_step_completed(step_name)
|
69
|
+
runnables.delete(step_name)
|
70
|
+
completed.add(step_name)
|
71
|
+
end
|
72
|
+
|
73
|
+
def runnables
|
74
|
+
resolvables = @dependency_resolver.resolvables(completed)
|
75
|
+
resolvables.reject { |r| running.include? r.name }
|
76
|
+
end
|
77
|
+
|
78
|
+
def all_steps_complete?
|
79
|
+
@steps.map(&:name).to_set == completed.to_set
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
File without changes
|
@@ -1,4 +1,8 @@
|
|
1
1
|
module BeetleETL
|
2
|
+
|
3
|
+
ColumnDefinitionNotFoundError = Class.new(StandardError)
|
4
|
+
NoColumnsDefinedError = Class.new(StandardError)
|
5
|
+
|
2
6
|
class CreateStage < Step
|
3
7
|
|
4
8
|
def initialize(table_name, relations, column_names)
|
@@ -7,27 +11,38 @@ module BeetleETL
|
|
7
11
|
@column_names = column_names
|
8
12
|
end
|
9
13
|
|
10
|
-
def dependencies
|
11
|
-
Set.new
|
12
|
-
end
|
13
|
-
|
14
14
|
def run
|
15
15
|
database.execute <<-SQL
|
16
|
-
CREATE TABLE #{stage_table_name_sql} (
|
16
|
+
CREATE UNLOGGED TABLE #{stage_table_name_sql} (
|
17
17
|
id integer,
|
18
18
|
external_id character varying(255),
|
19
19
|
transition character varying(255),
|
20
20
|
|
21
|
-
#{
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
)
|
21
|
+
#{column_definitions}
|
22
|
+
);
|
23
|
+
|
24
|
+
#{index_definitions}
|
26
25
|
SQL
|
27
26
|
end
|
28
27
|
|
29
28
|
private
|
30
29
|
|
30
|
+
def column_definitions
|
31
|
+
definitions = [
|
32
|
+
payload_column_definitions,
|
33
|
+
relation_column_definitions
|
34
|
+
].compact
|
35
|
+
|
36
|
+
if definitions.empty?
|
37
|
+
raise NoColumnsDefinedError.new <<-MSG
|
38
|
+
Transformation for #{table_name} has no column definitions.
|
39
|
+
Either add an array of columns or references to other tables.
|
40
|
+
MSG
|
41
|
+
end
|
42
|
+
|
43
|
+
definitions.join(',')
|
44
|
+
end
|
45
|
+
|
31
46
|
def payload_column_definitions
|
32
47
|
definitions = (@column_names - @relations.keys).map do |column_name|
|
33
48
|
"#{column_name} #{column_type(column_name)}"
|
@@ -45,6 +60,13 @@ module BeetleETL
|
|
45
60
|
definitions.join(',') if definitions.any?
|
46
61
|
end
|
47
62
|
|
63
|
+
def index_definitions
|
64
|
+
index_columns = [:external_id] + @relations.keys.map { |c| "external_#{c}" }
|
65
|
+
index_columns.map do |column_name|
|
66
|
+
"CREATE INDEX ON #{stage_table_name_sql} (#{column_name})"
|
67
|
+
end.join(";")
|
68
|
+
end
|
69
|
+
|
48
70
|
def column_type(column_name)
|
49
71
|
@column_types ||= Hash[database.schema(public_table_name.to_sym)]
|
50
72
|
.reduce({}) do |acc, (name, schema)|
|
@@ -52,6 +74,12 @@ module BeetleETL
|
|
52
74
|
acc
|
53
75
|
end
|
54
76
|
|
77
|
+
unless @column_types.has_key?(column_name)
|
78
|
+
raise ColumnDefinitionNotFoundError.new <<-MSG
|
79
|
+
Table "#{table_name}" has no column "#{column_name}".
|
80
|
+
MSG
|
81
|
+
end
|
82
|
+
|
55
83
|
@column_types[column_name]
|
56
84
|
end
|
57
85
|
|
@@ -1,7 +1,5 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
|
3
|
-
DependenciesNotDefinedError = Class.new(StandardError)
|
4
|
-
|
5
3
|
class Step
|
6
4
|
|
7
5
|
include BeetleETL::Naming
|
@@ -20,7 +18,7 @@ module BeetleETL
|
|
20
18
|
end
|
21
19
|
|
22
20
|
def dependencies
|
23
|
-
|
21
|
+
Set.new
|
24
22
|
end
|
25
23
|
|
26
24
|
def external_source
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
module Testing
|
3
|
+
|
4
|
+
TargetTableNotFoundError = Class.new(StandardError)
|
5
|
+
NoTransformationFoundError = Class.new(StandardError)
|
6
|
+
|
7
|
+
def with_stage_tables_for(*table_names, &block)
|
8
|
+
table_names.each do |table_name|
|
9
|
+
unless BeetleETL.database.table_exists?(table_name)
|
10
|
+
raise TargetTableNotFoundError.new <<-MSG
|
11
|
+
Missing target table "#{table_name}".
|
12
|
+
In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
|
13
|
+
MSG
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
import = Import.new
|
18
|
+
begin
|
19
|
+
import.setup
|
20
|
+
block.call
|
21
|
+
ensure
|
22
|
+
import.cleanup
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def run_transformation(table_name)
|
27
|
+
transformations = TransformationLoader.new.load
|
28
|
+
|
29
|
+
unless transformations.map(&:table_name).include?(table_name)
|
30
|
+
raise NoTransformationFoundError.new <<-MSG
|
31
|
+
No transformation definition found for table "#{table_name}".
|
32
|
+
MSG
|
33
|
+
end
|
34
|
+
|
35
|
+
transformation = transformations.find { |t| t.table_name == table_name }
|
36
|
+
transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
|
37
|
+
transform.run
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def stage_table_name(table_name)
|
42
|
+
BeetleETL::Naming.stage_table_name(table_name)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/beetle_etl/version.rb
CHANGED
data/lib/beetle_etl.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'beetle_etl/version'
|
2
2
|
|
3
3
|
require 'sequel'
|
4
|
+
require 'logger'
|
4
5
|
|
5
6
|
module BeetleETL
|
6
7
|
|
@@ -21,10 +22,11 @@ module BeetleETL
|
|
21
22
|
require 'beetle_etl/steps/load'
|
22
23
|
require 'beetle_etl/steps/drop_stage'
|
23
24
|
|
24
|
-
require 'beetle_etl/
|
25
|
-
require 'beetle_etl/
|
25
|
+
require 'beetle_etl/step_runner/dependency_resolver'
|
26
|
+
require 'beetle_etl/step_runner/async_step_runner'
|
26
27
|
|
27
28
|
require 'beetle_etl/import'
|
29
|
+
require 'beetle_etl/reporter'
|
28
30
|
|
29
31
|
class Configuration
|
30
32
|
attr_accessor \
|
@@ -33,10 +35,12 @@ module BeetleETL
|
|
33
35
|
:transformation_file,
|
34
36
|
:stage_schema,
|
35
37
|
:public_schema,
|
36
|
-
:external_source
|
38
|
+
:external_source,
|
39
|
+
:logger
|
37
40
|
|
38
41
|
def initialize
|
39
42
|
@public_schema = 'public'
|
43
|
+
@logger = ::Logger.new(STDOUT)
|
40
44
|
end
|
41
45
|
end
|
42
46
|
|
@@ -44,9 +48,9 @@ module BeetleETL
|
|
44
48
|
|
45
49
|
def import
|
46
50
|
begin
|
47
|
-
Import.new.run
|
48
|
-
|
49
|
-
|
51
|
+
report = Import.new.run
|
52
|
+
Reporter.new(report).log_summary
|
53
|
+
report
|
50
54
|
ensure
|
51
55
|
@database.disconnect if @database
|
52
56
|
end
|
@@ -60,6 +64,10 @@ module BeetleETL
|
|
60
64
|
@config ||= Configuration.new
|
61
65
|
end
|
62
66
|
|
67
|
+
def logger
|
68
|
+
config.logger
|
69
|
+
end
|
70
|
+
|
63
71
|
def database
|
64
72
|
if config.database
|
65
73
|
config.database
|
data/spec/beetle_etl_spec.rb
CHANGED
@@ -3,9 +3,13 @@ require 'spec_helper'
|
|
3
3
|
describe BeetleETL do
|
4
4
|
|
5
5
|
describe '#import' do
|
6
|
-
it 'runs the import' do
|
7
|
-
|
8
|
-
|
6
|
+
it 'runs the import with reporting' do
|
7
|
+
report = double(:report)
|
8
|
+
reporter = double(:reporter, log_summary: nil)
|
9
|
+
|
10
|
+
expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
|
11
|
+
expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
|
12
|
+
expect(BeetleETL.import).to eql(report)
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
@@ -1,12 +1,11 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'tempfile'
|
3
2
|
|
4
3
|
module BeetleETL
|
5
4
|
describe TransformationLoader do
|
6
5
|
|
7
6
|
subject { TransformationLoader.new }
|
8
7
|
|
9
|
-
before
|
8
|
+
before do
|
10
9
|
data_file = tempfile_with_contents <<-FILE
|
11
10
|
import :foo do
|
12
11
|
'foo'
|
@@ -47,12 +46,5 @@ module BeetleETL
|
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
50
|
-
def tempfile_with_contents(contents)
|
51
|
-
Tempfile.new('transform').tap do |file|
|
52
|
-
file.write(contents)
|
53
|
-
file.close
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
49
|
end
|
58
50
|
end
|
@@ -13,9 +13,9 @@ describe BeetleETL do
|
|
13
13
|
|
14
14
|
include ExampleSchema
|
15
15
|
|
16
|
-
let!(:time1) { Time.new(2014 ,
|
17
|
-
let!(:time2) { Time.new(2015 ,
|
18
|
-
let!(:time3) { Time.new(2015 , 11 ,
|
16
|
+
let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
|
17
|
+
let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
|
18
|
+
let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
|
19
19
|
|
20
20
|
before :each do
|
21
21
|
create_tables
|
@@ -27,6 +27,7 @@ describe BeetleETL do
|
|
27
27
|
config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
|
28
28
|
config.database_config = database_config
|
29
29
|
config.external_source = 'source_name'
|
30
|
+
config.logger = Logger.new(Tempfile.new("log"))
|
30
31
|
end
|
31
32
|
end
|
32
33
|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'unindent'
|
3
|
+
|
4
|
+
module BeetleETL
|
5
|
+
describe Reporter do
|
6
|
+
|
7
|
+
let(:report) do
|
8
|
+
{
|
9
|
+
organisations: {
|
10
|
+
"organisations: Transform" => {
|
11
|
+
:started_at => Time.new(2015, 03, 14, 16, 0),
|
12
|
+
:finished_at => Time.new(2015, 03, 14, 16, 10)
|
13
|
+
},
|
14
|
+
"organisations: MapRelations" => {
|
15
|
+
:started_at => Time.new(2015, 03, 14, 17, 0),
|
16
|
+
:finished_at => Time.new(2015, 03, 14, 17, 10)
|
17
|
+
},
|
18
|
+
"organisations: Load" => {
|
19
|
+
:started_at => Time.new(2015, 03, 14, 18, 0),
|
20
|
+
:finished_at => Time.new(2015, 03, 14, 18, 10)
|
21
|
+
},
|
22
|
+
},
|
23
|
+
departments: {
|
24
|
+
"departments: Transform" => {
|
25
|
+
:started_at => Time.new(2015, 03, 14, 16, 0),
|
26
|
+
:finished_at => Time.new(2015, 03, 14, 16, 12)
|
27
|
+
},
|
28
|
+
"departments: MapRelations" => {
|
29
|
+
:started_at => Time.new(2015, 03, 14, 17, 2),
|
30
|
+
:finished_at => Time.new(2015, 03, 14, 17, 10)
|
31
|
+
},
|
32
|
+
"departments: Load" => {
|
33
|
+
:started_at => Time.new(2015, 03, 14, 18, 10),
|
34
|
+
:finished_at => Time.new(2015, 03, 14, 19, 21, 39)
|
35
|
+
},
|
36
|
+
}
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
it "loggs a summary of all step times by table name" do
|
41
|
+
expect(BeetleETL.logger).to receive(:info).with <<-LOG.unindent
|
42
|
+
|
43
|
+
|
44
|
+
organisations
|
45
|
+
========================
|
46
|
+
Transform: 00:10:00
|
47
|
+
MapRelations: 00:10:00
|
48
|
+
Load: 00:10:00
|
49
|
+
------------------------
|
50
|
+
00:30:00
|
51
|
+
|
52
|
+
departments
|
53
|
+
========================
|
54
|
+
Transform: 00:12:00
|
55
|
+
MapRelations: 00:08:00
|
56
|
+
Load: 01:11:39
|
57
|
+
------------------------
|
58
|
+
01:31:39
|
59
|
+
LOG
|
60
|
+
|
61
|
+
Reporter.new(report).log_summary
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,14 +3,18 @@ CodeClimate::TestReporter.start
|
|
3
3
|
|
4
4
|
require_relative '../lib/beetle_etl.rb'
|
5
5
|
require_relative 'support/database_helpers.rb'
|
6
|
+
require_relative 'support/file_helpers.rb'
|
6
7
|
|
7
8
|
RSpec.configure do |config|
|
8
9
|
|
9
10
|
config.include SpecSupport::DatabaseHelpers
|
11
|
+
config.include SpecSupport::FileHelpers
|
12
|
+
|
10
13
|
config.backtrace_exclusion_patterns = [/rspec-core/]
|
11
14
|
|
12
15
|
config.around(:each) do |example|
|
13
16
|
BeetleETL.reset
|
17
|
+
|
14
18
|
if example.metadata[:feature]
|
15
19
|
example.run
|
16
20
|
else
|
@@ -47,42 +47,54 @@ module BeetleETL
|
|
47
47
|
it 'creates a stage table table with all payload columns' do
|
48
48
|
subject.run
|
49
49
|
|
50
|
-
|
50
|
+
schema = Hash[test_database.schema(subject.stage_table_name.to_sym)]
|
51
51
|
|
52
52
|
expected_columns = %i(id external_id some_string some_integer some_float)
|
53
|
-
expect(
|
53
|
+
expect(schema.keys).to include(*expected_columns)
|
54
54
|
|
55
|
-
expect(
|
56
|
-
expect(
|
57
|
-
expect(
|
55
|
+
expect(schema[:id][:db_type]).to eq('integer')
|
56
|
+
expect(schema[:external_id][:db_type]).to eq('character varying(255)')
|
57
|
+
expect(schema[:transition][:db_type]).to eq('character varying(255)')
|
58
58
|
|
59
|
-
expect(
|
60
|
-
expect(
|
61
|
-
expect(
|
59
|
+
expect(schema[:some_string][:db_type]).to eq('character varying(200)')
|
60
|
+
expect(schema[:some_integer][:db_type]).to eq('integer')
|
61
|
+
expect(schema[:some_float][:db_type]).to eq('double precision')
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'adds columns for dependent foreign key associations' do
|
65
65
|
subject.run
|
66
66
|
|
67
|
-
|
67
|
+
schema = Hash[test_database.schema(subject.stage_table_name)]
|
68
68
|
|
69
69
|
expected_columns = %i(
|
70
70
|
dependee_a_id external_dependee_a_id
|
71
71
|
dependee_b_id external_dependee_b_id
|
72
72
|
)
|
73
|
-
expect(
|
73
|
+
expect(schema.keys).to include(*expected_columns)
|
74
74
|
|
75
|
-
expect(
|
76
|
-
expect(
|
75
|
+
expect(schema[:dependee_a_id][:db_type]).to eq('integer')
|
76
|
+
expect(schema[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
|
77
77
|
|
78
|
-
expect(
|
79
|
-
expect(
|
78
|
+
expect(schema[:dependee_b_id][:db_type]).to eq('integer')
|
79
|
+
expect(schema[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
|
80
80
|
end
|
81
81
|
|
82
82
|
it 'does not add foreign key columns twice if defined as payload column' do
|
83
83
|
columns = [:some_string, :dependee_a_id]
|
84
84
|
CreateStage.new(:example_table, @relations, columns).run
|
85
85
|
end
|
86
|
+
|
87
|
+
it 'raises an error if no columns and no relations are defined' do
|
88
|
+
expect do
|
89
|
+
CreateStage.new(:example_table, {}, []).run
|
90
|
+
end.to raise_error(BeetleETL::NoColumnsDefinedError)
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'raises an error when given columns with no definition' do
|
94
|
+
expect do
|
95
|
+
CreateStage.new(:example_table, @relations, [:undefined_column]).run
|
96
|
+
end.to raise_error(BeetleETL::ColumnDefinitionNotFoundError)
|
97
|
+
end
|
86
98
|
end
|
87
99
|
|
88
100
|
end
|
data/spec/steps/step_spec.rb
CHANGED
@@ -27,8 +27,8 @@ module BeetleETL
|
|
27
27
|
end
|
28
28
|
|
29
29
|
describe '#dependencies' do
|
30
|
-
it '
|
31
|
-
expect
|
30
|
+
it 'returns an empty set' do
|
31
|
+
expect(subject.dependencies).to eql(Set.new)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'byebug'
|
2
|
+
require "spec_helper"
|
3
|
+
require "beetle_etl/testing"
|
4
|
+
|
5
|
+
describe "BeetleETL:Testing" do
|
6
|
+
|
7
|
+
include BeetleETL::Testing
|
8
|
+
|
9
|
+
before do
|
10
|
+
data_file = tempfile_with_contents <<-'FILE'
|
11
|
+
import :some_table do
|
12
|
+
columns :some_attribute
|
13
|
+
end
|
14
|
+
|
15
|
+
import :organisations do
|
16
|
+
references :some_table, on: :some_table_id
|
17
|
+
columns :name, :address
|
18
|
+
|
19
|
+
query <<-SQL
|
20
|
+
INSERT INTO #{stage_table} (external_id, address, name)
|
21
|
+
VALUES ('external_id', 'address', 'name')
|
22
|
+
SQL
|
23
|
+
end
|
24
|
+
FILE
|
25
|
+
|
26
|
+
BeetleETL.configure do |config|
|
27
|
+
config.database = test_database
|
28
|
+
config.transformation_file = data_file.path
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with properly defined target tables" do
|
33
|
+
before do
|
34
|
+
test_database.create_table :some_table do
|
35
|
+
primary_key :id
|
36
|
+
String :external_id, size: 255
|
37
|
+
String :some_attribute, size: 255
|
38
|
+
end
|
39
|
+
|
40
|
+
test_database.create_table :organisations do
|
41
|
+
primary_key :id
|
42
|
+
String :external_id, size: 255
|
43
|
+
String :name, size: 255
|
44
|
+
String :address, size: 255
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "makes stage tables available in the block" do
|
49
|
+
with_stage_tables_for(:organisations, :some_table) do
|
50
|
+
expect(test_database.table_exists?(stage_table_name(:organisations))).to be_truthy
|
51
|
+
expect(test_database.table_exists?(stage_table_name(:some_table))).to be_truthy
|
52
|
+
end
|
53
|
+
|
54
|
+
expect(test_database.table_exists?(stage_table_name(:organisations))).to be_falsey
|
55
|
+
expect(test_database.table_exists?(stage_table_name(:some_table))).to be_falsey
|
56
|
+
end
|
57
|
+
|
58
|
+
it "allows the transformation to be run insiede the block" do
|
59
|
+
with_stage_tables_for(:organisations, :some_table) do
|
60
|
+
run_transformation(:organisations)
|
61
|
+
|
62
|
+
expect(stage_table_name(:organisations)).to have_values(
|
63
|
+
[ :external_id , :address , :name ] ,
|
64
|
+
[ "external_id" , "address" , "name" ]
|
65
|
+
)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
it "raises an error if the target table cannot be found" do
|
72
|
+
expect do
|
73
|
+
with_stage_tables_for(:organisations)
|
74
|
+
end.to raise_error(BeetleETL::Testing::TargetTableNotFoundError)
|
75
|
+
end
|
76
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: beetle_etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luciano Maiwald
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 4.2.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 4.2.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,19 +95,19 @@ dependencies:
|
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: 0.18.0
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
98
|
+
name: unindent
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
|
-
- - "
|
101
|
+
- - "~>"
|
88
102
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
103
|
+
version: '1.0'
|
90
104
|
type: :development
|
91
105
|
prerelease: false
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
93
107
|
requirements:
|
94
|
-
- - "
|
108
|
+
- - "~>"
|
95
109
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
110
|
+
version: '1.0'
|
97
111
|
description: Taking care of synchronizing external data with referential data in your
|
98
112
|
application.
|
99
113
|
email:
|
@@ -115,6 +129,9 @@ files:
|
|
115
129
|
- lib/beetle_etl/dsl/transformation_loader.rb
|
116
130
|
- lib/beetle_etl/import.rb
|
117
131
|
- lib/beetle_etl/naming.rb
|
132
|
+
- lib/beetle_etl/reporter.rb
|
133
|
+
- lib/beetle_etl/step_runner/async_step_runner.rb
|
134
|
+
- lib/beetle_etl/step_runner/dependency_resolver.rb
|
118
135
|
- lib/beetle_etl/steps/assign_ids.rb
|
119
136
|
- lib/beetle_etl/steps/create_stage.rb
|
120
137
|
- lib/beetle_etl/steps/drop_stage.rb
|
@@ -123,8 +140,7 @@ files:
|
|
123
140
|
- lib/beetle_etl/steps/step.rb
|
124
141
|
- lib/beetle_etl/steps/table_diff.rb
|
125
142
|
- lib/beetle_etl/steps/transform.rb
|
126
|
-
- lib/beetle_etl/
|
127
|
-
- lib/beetle_etl/task_runner/task_runner.rb
|
143
|
+
- lib/beetle_etl/testing.rb
|
128
144
|
- lib/beetle_etl/version.rb
|
129
145
|
- script/postgres
|
130
146
|
- spec/beetle_etl_spec.rb
|
@@ -134,6 +150,7 @@ files:
|
|
134
150
|
- spec/feature/example_schema.rb
|
135
151
|
- spec/feature/example_transform.rb
|
136
152
|
- spec/feature/feature_spec.rb
|
153
|
+
- spec/reporter_spec.rb
|
137
154
|
- spec/spec_helper.rb
|
138
155
|
- spec/steps/assign_ids_spec.rb
|
139
156
|
- spec/steps/create_stage_spec.rb
|
@@ -145,7 +162,9 @@ files:
|
|
145
162
|
- spec/support/database.yml.example
|
146
163
|
- spec/support/database.yml.travis
|
147
164
|
- spec/support/database_helpers.rb
|
165
|
+
- spec/support/file_helpers.rb
|
148
166
|
- spec/task_runner/dependency_resolver_spec.rb
|
167
|
+
- spec/testing_spec.rb
|
149
168
|
homepage: https://github.com/maiwald/beetle_etl
|
150
169
|
licenses:
|
151
170
|
- MIT
|
@@ -178,6 +197,7 @@ test_files:
|
|
178
197
|
- spec/feature/example_schema.rb
|
179
198
|
- spec/feature/example_transform.rb
|
180
199
|
- spec/feature/feature_spec.rb
|
200
|
+
- spec/reporter_spec.rb
|
181
201
|
- spec/spec_helper.rb
|
182
202
|
- spec/steps/assign_ids_spec.rb
|
183
203
|
- spec/steps/create_stage_spec.rb
|
@@ -189,4 +209,6 @@ test_files:
|
|
189
209
|
- spec/support/database.yml.example
|
190
210
|
- spec/support/database.yml.travis
|
191
211
|
- spec/support/database_helpers.rb
|
212
|
+
- spec/support/file_helpers.rb
|
192
213
|
- spec/task_runner/dependency_resolver_spec.rb
|
214
|
+
- spec/testing_spec.rb
|
@@ -1,71 +0,0 @@
|
|
1
|
-
module BeetleETL
|
2
|
-
class TaskRunner
|
3
|
-
|
4
|
-
def initialize(tasks)
|
5
|
-
@dependency_resolver = DependencyResolver.new(tasks)
|
6
|
-
@tasks = tasks
|
7
|
-
|
8
|
-
@queue = Queue.new
|
9
|
-
@completed = Set.new
|
10
|
-
@running = Set.new
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
results = {}
|
15
|
-
|
16
|
-
until all_tasks_complete?
|
17
|
-
runnables.each do |task|
|
18
|
-
run_task_async(task)
|
19
|
-
mark_task_running(task.name)
|
20
|
-
end
|
21
|
-
|
22
|
-
task_name, task_data = @queue.pop
|
23
|
-
results[task_name] = task_data
|
24
|
-
mark_task_completed(task_name)
|
25
|
-
end
|
26
|
-
|
27
|
-
results
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
attr_reader :running, :completed
|
33
|
-
|
34
|
-
def run_task_async(task)
|
35
|
-
Thread.new do
|
36
|
-
started_at = now
|
37
|
-
result = task.run
|
38
|
-
finished_at = now
|
39
|
-
|
40
|
-
@queue.push [task.name, {
|
41
|
-
started_at: started_at,
|
42
|
-
finished_at: finished_at,
|
43
|
-
result: result,
|
44
|
-
}]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def mark_task_running(task_name)
|
49
|
-
running.add(task_name)
|
50
|
-
end
|
51
|
-
|
52
|
-
def mark_task_completed(task_name)
|
53
|
-
runnables.delete(task_name)
|
54
|
-
completed.add(task_name)
|
55
|
-
end
|
56
|
-
|
57
|
-
def runnables
|
58
|
-
resolvables = @dependency_resolver.resolvables(completed)
|
59
|
-
resolvables.reject { |r| running.include? r.name }
|
60
|
-
end
|
61
|
-
|
62
|
-
def all_tasks_complete?
|
63
|
-
@tasks.map(&:name).to_set == completed.to_set
|
64
|
-
end
|
65
|
-
|
66
|
-
def now
|
67
|
-
Time.now
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|