beetle_etl 0.0.7 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/beetle_etl.gemspec +2 -1
- data/lib/beetle_etl/import.rb +26 -12
- data/lib/beetle_etl/reporter.rb +65 -0
- data/lib/beetle_etl/step_runner/async_step_runner.rb +83 -0
- data/lib/beetle_etl/{task_runner → step_runner}/dependency_resolver.rb +0 -0
- data/lib/beetle_etl/steps/create_stage.rb +38 -10
- data/lib/beetle_etl/steps/drop_stage.rb +0 -4
- data/lib/beetle_etl/steps/step.rb +1 -3
- data/lib/beetle_etl/steps/transform.rb +1 -1
- data/lib/beetle_etl/testing.rb +46 -0
- data/lib/beetle_etl/version.rb +1 -1
- data/lib/beetle_etl.rb +14 -6
- data/spec/beetle_etl_spec.rb +7 -3
- data/spec/dsl/transformation_loader_spec.rb +1 -9
- data/spec/feature/feature_spec.rb +4 -3
- data/spec/reporter_spec.rb +65 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/steps/create_stage_spec.rb +26 -14
- data/spec/steps/step_spec.rb +2 -2
- data/spec/steps/transform_spec.rb +0 -1
- data/spec/support/database_helpers.rb +1 -1
- data/spec/support/file_helpers.rb +14 -0
- data/spec/testing_spec.rb +76 -0
- metadata +31 -9
- data/lib/beetle_etl/task_runner/task_runner.rb +0 -71
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 788ce4239b271ab02fe67da3642f8f17e6fd275e
|
4
|
+
data.tar.gz: e89e37f2bd8ec970599249b73e97f9d66be60555
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3df6bfbcadd41a1e98f330aa35c9bcc0701c3450d9425c7c156520cae467cdccbb4adc5075a7923261d972c1341f50c49f3a09e35536270f2cdd165414b14ea7
|
7
|
+
data.tar.gz: dd92ef629e21523001a4d8371b0a8b2432845ecb371b0bde278d752b06f989e19c1b61321dc28686220f14a0b428f33b749bab52c119ec1d56284c6a71ae0e44
|
data/beetle_etl.gemspec
CHANGED
@@ -19,10 +19,11 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_runtime_dependency 'sequel', '>= 4.0.0'
|
22
|
+
spec.add_runtime_dependency 'activesupport', '>= 4.2.0'
|
22
23
|
|
23
24
|
spec.add_development_dependency 'bundler', '~> 1.6'
|
24
25
|
spec.add_development_dependency 'rspec', '>= 3.0.0'
|
25
26
|
spec.add_development_dependency 'timecop', '>= 0.7.0'
|
26
27
|
spec.add_development_dependency 'pg', '>= 0.18.0'
|
27
|
-
spec.add_development_dependency '
|
28
|
+
spec.add_development_dependency 'unindent', '~> 1.0'
|
28
29
|
end
|
data/lib/beetle_etl/import.rb
CHANGED
@@ -1,15 +1,34 @@
|
|
1
|
+
require 'active_support/core_ext/hash/deep_merge'
|
2
|
+
|
1
3
|
module BeetleETL
|
2
4
|
class Import
|
3
5
|
|
4
6
|
def run
|
5
|
-
|
6
|
-
|
7
|
-
TaskRunner.new(load_steps).run
|
8
|
-
end
|
9
|
-
rescue => e
|
10
|
-
raise e
|
7
|
+
setup
|
8
|
+
import
|
11
9
|
ensure
|
12
|
-
|
10
|
+
cleanup
|
11
|
+
end
|
12
|
+
|
13
|
+
def setup
|
14
|
+
transformations.each do |t|
|
15
|
+
CreateStage.new(t.table_name, t.relations, t.column_names).run
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def import
|
20
|
+
data_report = AsyncStepRunner.new(data_steps).run
|
21
|
+
load_report = BeetleETL.database.transaction do
|
22
|
+
AsyncStepRunner.new(load_steps).run
|
23
|
+
end
|
24
|
+
|
25
|
+
data_report.deep_merge load_report
|
26
|
+
end
|
27
|
+
|
28
|
+
def cleanup
|
29
|
+
transformations.each do |t|
|
30
|
+
DropStage.new(t.table_name).run
|
31
|
+
end
|
13
32
|
end
|
14
33
|
|
15
34
|
private
|
@@ -17,7 +36,6 @@ module BeetleETL
|
|
17
36
|
def data_steps
|
18
37
|
transformations.flat_map do |t|
|
19
38
|
[
|
20
|
-
CreateStage.new(t.table_name, t.relations, t.column_names),
|
21
39
|
Transform.new(t.table_name, t.dependencies, t.query),
|
22
40
|
MapRelations.new(t.table_name, t.relations),
|
23
41
|
TableDiff.new(t.table_name),
|
@@ -32,10 +50,6 @@ module BeetleETL
|
|
32
50
|
end
|
33
51
|
end
|
34
52
|
|
35
|
-
def cleanup_steps
|
36
|
-
transformations.map { |t| DropStage.new(t.table_name) }
|
37
|
-
end
|
38
|
-
|
39
53
|
def transformations
|
40
54
|
@transformations ||= TransformationLoader.new.load
|
41
55
|
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
class Reporter
|
3
|
+
|
4
|
+
def initialize(report)
|
5
|
+
@report = report
|
6
|
+
end
|
7
|
+
|
8
|
+
def log_summary
|
9
|
+
BeetleETL.logger.info(summary)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def summary
|
15
|
+
"\n\n" +
|
16
|
+
@report.map do |(table_name, steps)|
|
17
|
+
total_duration = format_duration(sum_durations(steps))
|
18
|
+
[
|
19
|
+
table_name,
|
20
|
+
seperator("="),
|
21
|
+
step_rows(steps).join("\n"),
|
22
|
+
seperator("-"),
|
23
|
+
total_duration.rjust(line_width)
|
24
|
+
].join("\n")
|
25
|
+
end.join("\n\n") + "\n"
|
26
|
+
end
|
27
|
+
|
28
|
+
def step_rows(steps)
|
29
|
+
steps.map do |step_name, data|
|
30
|
+
label = step_name.split(": ")[1] + ":"
|
31
|
+
duration = format_duration(data[:finished_at] - data[:started_at])
|
32
|
+
line = duration.rjust(line_width)
|
33
|
+
line[2, label.length] = label
|
34
|
+
line
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def format_duration(duration)
|
39
|
+
Time.at(duration).utc.strftime("%H:%M:%S")
|
40
|
+
end
|
41
|
+
|
42
|
+
def sum_durations(steps)
|
43
|
+
steps.inject(0) do |acc, (_step_name, data)|
|
44
|
+
acc + (data[:finished_at] - data[:started_at])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def line_width
|
49
|
+
# 2 spaces
|
50
|
+
# + 1 colon
|
51
|
+
# + 1 space
|
52
|
+
# + 8 duration
|
53
|
+
12 + longest_step_name_length
|
54
|
+
end
|
55
|
+
|
56
|
+
def longest_step_name_length
|
57
|
+
@report.keys.max_by(&:length).length - 1
|
58
|
+
end
|
59
|
+
|
60
|
+
def seperator(character)
|
61
|
+
character * line_width
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
class AsyncStepRunner
|
3
|
+
|
4
|
+
def initialize(steps)
|
5
|
+
@dependency_resolver = DependencyResolver.new(steps)
|
6
|
+
@steps = steps
|
7
|
+
|
8
|
+
@queue = Queue.new
|
9
|
+
@completed = Set.new
|
10
|
+
@running = Set.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
results = {}
|
15
|
+
|
16
|
+
until all_steps_complete?
|
17
|
+
runnables.each do |step|
|
18
|
+
run_step_async(step)
|
19
|
+
mark_step_running(step.name)
|
20
|
+
end
|
21
|
+
|
22
|
+
table_name, step_name, step_data = @queue.pop
|
23
|
+
|
24
|
+
unless results.has_key?(table_name)
|
25
|
+
results[table_name] = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
results[table_name][step_name] = step_data
|
29
|
+
mark_step_completed(step_name)
|
30
|
+
end
|
31
|
+
|
32
|
+
results
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
attr_reader :running, :completed
|
38
|
+
|
39
|
+
def run_step_async(step)
|
40
|
+
Thread.new do
|
41
|
+
begin
|
42
|
+
BeetleETL.logger.info("started step #{step.name}")
|
43
|
+
|
44
|
+
started_at = Time.now
|
45
|
+
step.run
|
46
|
+
finished_at = Time.now
|
47
|
+
|
48
|
+
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
49
|
+
BeetleETL.logger.info("finished #{step.name} in #{duration}")
|
50
|
+
|
51
|
+
@queue.push [
|
52
|
+
step.table_name,
|
53
|
+
step.name,
|
54
|
+
{ started_at: started_at, finished_at: finished_at }
|
55
|
+
]
|
56
|
+
|
57
|
+
rescue => e
|
58
|
+
BeetleETL.logger.fatal(e.message)
|
59
|
+
raise e
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def mark_step_running(step_name)
|
65
|
+
running.add(step_name)
|
66
|
+
end
|
67
|
+
|
68
|
+
def mark_step_completed(step_name)
|
69
|
+
runnables.delete(step_name)
|
70
|
+
completed.add(step_name)
|
71
|
+
end
|
72
|
+
|
73
|
+
def runnables
|
74
|
+
resolvables = @dependency_resolver.resolvables(completed)
|
75
|
+
resolvables.reject { |r| running.include? r.name }
|
76
|
+
end
|
77
|
+
|
78
|
+
def all_steps_complete?
|
79
|
+
@steps.map(&:name).to_set == completed.to_set
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
File without changes
|
@@ -1,4 +1,8 @@
|
|
1
1
|
module BeetleETL
|
2
|
+
|
3
|
+
ColumnDefinitionNotFoundError = Class.new(StandardError)
|
4
|
+
NoColumnsDefinedError = Class.new(StandardError)
|
5
|
+
|
2
6
|
class CreateStage < Step
|
3
7
|
|
4
8
|
def initialize(table_name, relations, column_names)
|
@@ -7,27 +11,38 @@ module BeetleETL
|
|
7
11
|
@column_names = column_names
|
8
12
|
end
|
9
13
|
|
10
|
-
def dependencies
|
11
|
-
Set.new
|
12
|
-
end
|
13
|
-
|
14
14
|
def run
|
15
15
|
database.execute <<-SQL
|
16
|
-
CREATE TABLE #{stage_table_name_sql} (
|
16
|
+
CREATE UNLOGGED TABLE #{stage_table_name_sql} (
|
17
17
|
id integer,
|
18
18
|
external_id character varying(255),
|
19
19
|
transition character varying(255),
|
20
20
|
|
21
|
-
#{
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
)
|
21
|
+
#{column_definitions}
|
22
|
+
);
|
23
|
+
|
24
|
+
#{index_definitions}
|
26
25
|
SQL
|
27
26
|
end
|
28
27
|
|
29
28
|
private
|
30
29
|
|
30
|
+
def column_definitions
|
31
|
+
definitions = [
|
32
|
+
payload_column_definitions,
|
33
|
+
relation_column_definitions
|
34
|
+
].compact
|
35
|
+
|
36
|
+
if definitions.empty?
|
37
|
+
raise NoColumnsDefinedError.new <<-MSG
|
38
|
+
Transformation for #{table_name} has no column definitions.
|
39
|
+
Either add an array of columns or references to other tables.
|
40
|
+
MSG
|
41
|
+
end
|
42
|
+
|
43
|
+
definitions.join(',')
|
44
|
+
end
|
45
|
+
|
31
46
|
def payload_column_definitions
|
32
47
|
definitions = (@column_names - @relations.keys).map do |column_name|
|
33
48
|
"#{column_name} #{column_type(column_name)}"
|
@@ -45,6 +60,13 @@ module BeetleETL
|
|
45
60
|
definitions.join(',') if definitions.any?
|
46
61
|
end
|
47
62
|
|
63
|
+
def index_definitions
|
64
|
+
index_columns = [:external_id] + @relations.keys.map { |c| "external_#{c}" }
|
65
|
+
index_columns.map do |column_name|
|
66
|
+
"CREATE INDEX ON #{stage_table_name_sql} (#{column_name})"
|
67
|
+
end.join(";")
|
68
|
+
end
|
69
|
+
|
48
70
|
def column_type(column_name)
|
49
71
|
@column_types ||= Hash[database.schema(public_table_name.to_sym)]
|
50
72
|
.reduce({}) do |acc, (name, schema)|
|
@@ -52,6 +74,12 @@ module BeetleETL
|
|
52
74
|
acc
|
53
75
|
end
|
54
76
|
|
77
|
+
unless @column_types.has_key?(column_name)
|
78
|
+
raise ColumnDefinitionNotFoundError.new <<-MSG
|
79
|
+
Table "#{table_name}" has no column "#{column_name}".
|
80
|
+
MSG
|
81
|
+
end
|
82
|
+
|
55
83
|
@column_types[column_name]
|
56
84
|
end
|
57
85
|
|
@@ -1,7 +1,5 @@
|
|
1
1
|
module BeetleETL
|
2
2
|
|
3
|
-
DependenciesNotDefinedError = Class.new(StandardError)
|
4
|
-
|
5
3
|
class Step
|
6
4
|
|
7
5
|
include BeetleETL::Naming
|
@@ -20,7 +18,7 @@ module BeetleETL
|
|
20
18
|
end
|
21
19
|
|
22
20
|
def dependencies
|
23
|
-
|
21
|
+
Set.new
|
24
22
|
end
|
25
23
|
|
26
24
|
def external_source
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BeetleETL
|
2
|
+
module Testing
|
3
|
+
|
4
|
+
TargetTableNotFoundError = Class.new(StandardError)
|
5
|
+
NoTransformationFoundError = Class.new(StandardError)
|
6
|
+
|
7
|
+
def with_stage_tables_for(*table_names, &block)
|
8
|
+
table_names.each do |table_name|
|
9
|
+
unless BeetleETL.database.table_exists?(table_name)
|
10
|
+
raise TargetTableNotFoundError.new <<-MSG
|
11
|
+
Missing target table "#{table_name}".
|
12
|
+
In order to create stage tables, BeetleETL requires the target tables to exist because they provide the column definitions.
|
13
|
+
MSG
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
import = Import.new
|
18
|
+
begin
|
19
|
+
import.setup
|
20
|
+
block.call
|
21
|
+
ensure
|
22
|
+
import.cleanup
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def run_transformation(table_name)
|
27
|
+
transformations = TransformationLoader.new.load
|
28
|
+
|
29
|
+
unless transformations.map(&:table_name).include?(table_name)
|
30
|
+
raise NoTransformationFoundError.new <<-MSG
|
31
|
+
No transformation definition found for table "#{table_name}".
|
32
|
+
MSG
|
33
|
+
end
|
34
|
+
|
35
|
+
transformation = transformations.find { |t| t.table_name == table_name }
|
36
|
+
transform = Transform.new(transformation.table_name, transformation.dependencies, transformation.query)
|
37
|
+
transform.run
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
def stage_table_name(table_name)
|
42
|
+
BeetleETL::Naming.stage_table_name(table_name)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/beetle_etl/version.rb
CHANGED
data/lib/beetle_etl.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'beetle_etl/version'
|
2
2
|
|
3
3
|
require 'sequel'
|
4
|
+
require 'logger'
|
4
5
|
|
5
6
|
module BeetleETL
|
6
7
|
|
@@ -21,10 +22,11 @@ module BeetleETL
|
|
21
22
|
require 'beetle_etl/steps/load'
|
22
23
|
require 'beetle_etl/steps/drop_stage'
|
23
24
|
|
24
|
-
require 'beetle_etl/
|
25
|
-
require 'beetle_etl/
|
25
|
+
require 'beetle_etl/step_runner/dependency_resolver'
|
26
|
+
require 'beetle_etl/step_runner/async_step_runner'
|
26
27
|
|
27
28
|
require 'beetle_etl/import'
|
29
|
+
require 'beetle_etl/reporter'
|
28
30
|
|
29
31
|
class Configuration
|
30
32
|
attr_accessor \
|
@@ -33,10 +35,12 @@ module BeetleETL
|
|
33
35
|
:transformation_file,
|
34
36
|
:stage_schema,
|
35
37
|
:public_schema,
|
36
|
-
:external_source
|
38
|
+
:external_source,
|
39
|
+
:logger
|
37
40
|
|
38
41
|
def initialize
|
39
42
|
@public_schema = 'public'
|
43
|
+
@logger = ::Logger.new(STDOUT)
|
40
44
|
end
|
41
45
|
end
|
42
46
|
|
@@ -44,9 +48,9 @@ module BeetleETL
|
|
44
48
|
|
45
49
|
def import
|
46
50
|
begin
|
47
|
-
Import.new.run
|
48
|
-
|
49
|
-
|
51
|
+
report = Import.new.run
|
52
|
+
Reporter.new(report).log_summary
|
53
|
+
report
|
50
54
|
ensure
|
51
55
|
@database.disconnect if @database
|
52
56
|
end
|
@@ -60,6 +64,10 @@ module BeetleETL
|
|
60
64
|
@config ||= Configuration.new
|
61
65
|
end
|
62
66
|
|
67
|
+
def logger
|
68
|
+
config.logger
|
69
|
+
end
|
70
|
+
|
63
71
|
def database
|
64
72
|
if config.database
|
65
73
|
config.database
|
data/spec/beetle_etl_spec.rb
CHANGED
@@ -3,9 +3,13 @@ require 'spec_helper'
|
|
3
3
|
describe BeetleETL do
|
4
4
|
|
5
5
|
describe '#import' do
|
6
|
-
it 'runs the import' do
|
7
|
-
|
8
|
-
|
6
|
+
it 'runs the import with reporting' do
|
7
|
+
report = double(:report)
|
8
|
+
reporter = double(:reporter, log_summary: nil)
|
9
|
+
|
10
|
+
expect(BeetleETL::Import).to receive_message_chain(:new, :run).and_return report
|
11
|
+
expect(BeetleETL::Reporter).to receive(:new).with(report).and_return reporter
|
12
|
+
expect(BeetleETL.import).to eql(report)
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
@@ -1,12 +1,11 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require 'tempfile'
|
3
2
|
|
4
3
|
module BeetleETL
|
5
4
|
describe TransformationLoader do
|
6
5
|
|
7
6
|
subject { TransformationLoader.new }
|
8
7
|
|
9
|
-
before
|
8
|
+
before do
|
10
9
|
data_file = tempfile_with_contents <<-FILE
|
11
10
|
import :foo do
|
12
11
|
'foo'
|
@@ -47,12 +46,5 @@ module BeetleETL
|
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
50
|
-
def tempfile_with_contents(contents)
|
51
|
-
Tempfile.new('transform').tap do |file|
|
52
|
-
file.write(contents)
|
53
|
-
file.close
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
49
|
end
|
58
50
|
end
|
@@ -13,9 +13,9 @@ describe BeetleETL do
|
|
13
13
|
|
14
14
|
include ExampleSchema
|
15
15
|
|
16
|
-
let!(:time1) { Time.new(2014 ,
|
17
|
-
let!(:time2) { Time.new(2015 ,
|
18
|
-
let!(:time3) { Time.new(2015 , 11 ,
|
16
|
+
let!(:time1) { Time.new(2014 , 7 , 17 , 16 , 12).beginning_of_day }
|
17
|
+
let!(:time2) { Time.new(2015 , 2 , 8 , 22 , 18).beginning_of_day }
|
18
|
+
let!(:time3) { Time.new(2015 , 11 , 3 , 12 , 17).beginning_of_day }
|
19
19
|
|
20
20
|
before :each do
|
21
21
|
create_tables
|
@@ -27,6 +27,7 @@ describe BeetleETL do
|
|
27
27
|
config.transformation_file = File.expand_path('../example_transform.rb', __FILE__)
|
28
28
|
config.database_config = database_config
|
29
29
|
config.external_source = 'source_name'
|
30
|
+
config.logger = Logger.new(Tempfile.new("log"))
|
30
31
|
end
|
31
32
|
end
|
32
33
|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'unindent'
|
3
|
+
|
4
|
+
module BeetleETL
|
5
|
+
describe Reporter do
|
6
|
+
|
7
|
+
let(:report) do
|
8
|
+
{
|
9
|
+
organisations: {
|
10
|
+
"organisations: Transform" => {
|
11
|
+
:started_at => Time.new(2015, 03, 14, 16, 0),
|
12
|
+
:finished_at => Time.new(2015, 03, 14, 16, 10)
|
13
|
+
},
|
14
|
+
"organisations: MapRelations" => {
|
15
|
+
:started_at => Time.new(2015, 03, 14, 17, 0),
|
16
|
+
:finished_at => Time.new(2015, 03, 14, 17, 10)
|
17
|
+
},
|
18
|
+
"organisations: Load" => {
|
19
|
+
:started_at => Time.new(2015, 03, 14, 18, 0),
|
20
|
+
:finished_at => Time.new(2015, 03, 14, 18, 10)
|
21
|
+
},
|
22
|
+
},
|
23
|
+
departments: {
|
24
|
+
"departments: Transform" => {
|
25
|
+
:started_at => Time.new(2015, 03, 14, 16, 0),
|
26
|
+
:finished_at => Time.new(2015, 03, 14, 16, 12)
|
27
|
+
},
|
28
|
+
"departments: MapRelations" => {
|
29
|
+
:started_at => Time.new(2015, 03, 14, 17, 2),
|
30
|
+
:finished_at => Time.new(2015, 03, 14, 17, 10)
|
31
|
+
},
|
32
|
+
"departments: Load" => {
|
33
|
+
:started_at => Time.new(2015, 03, 14, 18, 10),
|
34
|
+
:finished_at => Time.new(2015, 03, 14, 19, 21, 39)
|
35
|
+
},
|
36
|
+
}
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
it "loggs a summary of all step times by table name" do
|
41
|
+
expect(BeetleETL.logger).to receive(:info).with <<-LOG.unindent
|
42
|
+
|
43
|
+
|
44
|
+
organisations
|
45
|
+
========================
|
46
|
+
Transform: 00:10:00
|
47
|
+
MapRelations: 00:10:00
|
48
|
+
Load: 00:10:00
|
49
|
+
------------------------
|
50
|
+
00:30:00
|
51
|
+
|
52
|
+
departments
|
53
|
+
========================
|
54
|
+
Transform: 00:12:00
|
55
|
+
MapRelations: 00:08:00
|
56
|
+
Load: 01:11:39
|
57
|
+
------------------------
|
58
|
+
01:31:39
|
59
|
+
LOG
|
60
|
+
|
61
|
+
Reporter.new(report).log_summary
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -3,14 +3,18 @@ CodeClimate::TestReporter.start
|
|
3
3
|
|
4
4
|
require_relative '../lib/beetle_etl.rb'
|
5
5
|
require_relative 'support/database_helpers.rb'
|
6
|
+
require_relative 'support/file_helpers.rb'
|
6
7
|
|
7
8
|
RSpec.configure do |config|
|
8
9
|
|
9
10
|
config.include SpecSupport::DatabaseHelpers
|
11
|
+
config.include SpecSupport::FileHelpers
|
12
|
+
|
10
13
|
config.backtrace_exclusion_patterns = [/rspec-core/]
|
11
14
|
|
12
15
|
config.around(:each) do |example|
|
13
16
|
BeetleETL.reset
|
17
|
+
|
14
18
|
if example.metadata[:feature]
|
15
19
|
example.run
|
16
20
|
else
|
@@ -47,42 +47,54 @@ module BeetleETL
|
|
47
47
|
it 'creates a stage table table with all payload columns' do
|
48
48
|
subject.run
|
49
49
|
|
50
|
-
|
50
|
+
schema = Hash[test_database.schema(subject.stage_table_name.to_sym)]
|
51
51
|
|
52
52
|
expected_columns = %i(id external_id some_string some_integer some_float)
|
53
|
-
expect(
|
53
|
+
expect(schema.keys).to include(*expected_columns)
|
54
54
|
|
55
|
-
expect(
|
56
|
-
expect(
|
57
|
-
expect(
|
55
|
+
expect(schema[:id][:db_type]).to eq('integer')
|
56
|
+
expect(schema[:external_id][:db_type]).to eq('character varying(255)')
|
57
|
+
expect(schema[:transition][:db_type]).to eq('character varying(255)')
|
58
58
|
|
59
|
-
expect(
|
60
|
-
expect(
|
61
|
-
expect(
|
59
|
+
expect(schema[:some_string][:db_type]).to eq('character varying(200)')
|
60
|
+
expect(schema[:some_integer][:db_type]).to eq('integer')
|
61
|
+
expect(schema[:some_float][:db_type]).to eq('double precision')
|
62
62
|
end
|
63
63
|
|
64
64
|
it 'adds columns for dependent foreign key associations' do
|
65
65
|
subject.run
|
66
66
|
|
67
|
-
|
67
|
+
schema = Hash[test_database.schema(subject.stage_table_name)]
|
68
68
|
|
69
69
|
expected_columns = %i(
|
70
70
|
dependee_a_id external_dependee_a_id
|
71
71
|
dependee_b_id external_dependee_b_id
|
72
72
|
)
|
73
|
-
expect(
|
73
|
+
expect(schema.keys).to include(*expected_columns)
|
74
74
|
|
75
|
-
expect(
|
76
|
-
expect(
|
75
|
+
expect(schema[:dependee_a_id][:db_type]).to eq('integer')
|
76
|
+
expect(schema[:external_dependee_a_id][:db_type]).to eq('character varying(255)')
|
77
77
|
|
78
|
-
expect(
|
79
|
-
expect(
|
78
|
+
expect(schema[:dependee_b_id][:db_type]).to eq('integer')
|
79
|
+
expect(schema[:external_dependee_b_id][:db_type]).to eq('character varying(255)')
|
80
80
|
end
|
81
81
|
|
82
82
|
it 'does not add foreign key columns twice if defined as payload column' do
|
83
83
|
columns = [:some_string, :dependee_a_id]
|
84
84
|
CreateStage.new(:example_table, @relations, columns).run
|
85
85
|
end
|
86
|
+
|
87
|
+
it 'raises an error if no columns and no relations are defined' do
|
88
|
+
expect do
|
89
|
+
CreateStage.new(:example_table, {}, []).run
|
90
|
+
end.to raise_error(BeetleETL::NoColumnsDefinedError)
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'raises an error when given columns with no definition' do
|
94
|
+
expect do
|
95
|
+
CreateStage.new(:example_table, @relations, [:undefined_column]).run
|
96
|
+
end.to raise_error(BeetleETL::ColumnDefinitionNotFoundError)
|
97
|
+
end
|
86
98
|
end
|
87
99
|
|
88
100
|
end
|
data/spec/steps/step_spec.rb
CHANGED
@@ -27,8 +27,8 @@ module BeetleETL
|
|
27
27
|
end
|
28
28
|
|
29
29
|
describe '#dependencies' do
|
30
|
-
it '
|
31
|
-
expect
|
30
|
+
it 'returns an empty set' do
|
31
|
+
expect(subject.dependencies).to eql(Set.new)
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'byebug'
|
2
|
+
require "spec_helper"
|
3
|
+
require "beetle_etl/testing"
|
4
|
+
|
5
|
+
describe "BeetleETL:Testing" do
|
6
|
+
|
7
|
+
include BeetleETL::Testing
|
8
|
+
|
9
|
+
before do
|
10
|
+
data_file = tempfile_with_contents <<-'FILE'
|
11
|
+
import :some_table do
|
12
|
+
columns :some_attribute
|
13
|
+
end
|
14
|
+
|
15
|
+
import :organisations do
|
16
|
+
references :some_table, on: :some_table_id
|
17
|
+
columns :name, :address
|
18
|
+
|
19
|
+
query <<-SQL
|
20
|
+
INSERT INTO #{stage_table} (external_id, address, name)
|
21
|
+
VALUES ('external_id', 'address', 'name')
|
22
|
+
SQL
|
23
|
+
end
|
24
|
+
FILE
|
25
|
+
|
26
|
+
BeetleETL.configure do |config|
|
27
|
+
config.database = test_database
|
28
|
+
config.transformation_file = data_file.path
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with properly defined target tables" do
|
33
|
+
before do
|
34
|
+
test_database.create_table :some_table do
|
35
|
+
primary_key :id
|
36
|
+
String :external_id, size: 255
|
37
|
+
String :some_attribute, size: 255
|
38
|
+
end
|
39
|
+
|
40
|
+
test_database.create_table :organisations do
|
41
|
+
primary_key :id
|
42
|
+
String :external_id, size: 255
|
43
|
+
String :name, size: 255
|
44
|
+
String :address, size: 255
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "makes stage tables available in the block" do
|
49
|
+
with_stage_tables_for(:organisations, :some_table) do
|
50
|
+
expect(test_database.table_exists?(stage_table_name(:organisations))).to be_truthy
|
51
|
+
expect(test_database.table_exists?(stage_table_name(:some_table))).to be_truthy
|
52
|
+
end
|
53
|
+
|
54
|
+
expect(test_database.table_exists?(stage_table_name(:organisations))).to be_falsey
|
55
|
+
expect(test_database.table_exists?(stage_table_name(:some_table))).to be_falsey
|
56
|
+
end
|
57
|
+
|
58
|
+
it "allows the transformation to be run insiede the block" do
|
59
|
+
with_stage_tables_for(:organisations, :some_table) do
|
60
|
+
run_transformation(:organisations)
|
61
|
+
|
62
|
+
expect(stage_table_name(:organisations)).to have_values(
|
63
|
+
[ :external_id , :address , :name ] ,
|
64
|
+
[ "external_id" , "address" , "name" ]
|
65
|
+
)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
it "raises an error if the target table cannot be found" do
|
72
|
+
expect do
|
73
|
+
with_stage_tables_for(:organisations)
|
74
|
+
end.to raise_error(BeetleETL::Testing::TargetTableNotFoundError)
|
75
|
+
end
|
76
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: beetle_etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luciano Maiwald
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sequel
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 4.0.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: activesupport
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 4.2.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 4.2.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,19 +95,19 @@ dependencies:
|
|
81
95
|
- !ruby/object:Gem::Version
|
82
96
|
version: 0.18.0
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
98
|
+
name: unindent
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
|
-
- - "
|
101
|
+
- - "~>"
|
88
102
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
103
|
+
version: '1.0'
|
90
104
|
type: :development
|
91
105
|
prerelease: false
|
92
106
|
version_requirements: !ruby/object:Gem::Requirement
|
93
107
|
requirements:
|
94
|
-
- - "
|
108
|
+
- - "~>"
|
95
109
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
110
|
+
version: '1.0'
|
97
111
|
description: Taking care of synchronizing external data with referential data in your
|
98
112
|
application.
|
99
113
|
email:
|
@@ -115,6 +129,9 @@ files:
|
|
115
129
|
- lib/beetle_etl/dsl/transformation_loader.rb
|
116
130
|
- lib/beetle_etl/import.rb
|
117
131
|
- lib/beetle_etl/naming.rb
|
132
|
+
- lib/beetle_etl/reporter.rb
|
133
|
+
- lib/beetle_etl/step_runner/async_step_runner.rb
|
134
|
+
- lib/beetle_etl/step_runner/dependency_resolver.rb
|
118
135
|
- lib/beetle_etl/steps/assign_ids.rb
|
119
136
|
- lib/beetle_etl/steps/create_stage.rb
|
120
137
|
- lib/beetle_etl/steps/drop_stage.rb
|
@@ -123,8 +140,7 @@ files:
|
|
123
140
|
- lib/beetle_etl/steps/step.rb
|
124
141
|
- lib/beetle_etl/steps/table_diff.rb
|
125
142
|
- lib/beetle_etl/steps/transform.rb
|
126
|
-
- lib/beetle_etl/
|
127
|
-
- lib/beetle_etl/task_runner/task_runner.rb
|
143
|
+
- lib/beetle_etl/testing.rb
|
128
144
|
- lib/beetle_etl/version.rb
|
129
145
|
- script/postgres
|
130
146
|
- spec/beetle_etl_spec.rb
|
@@ -134,6 +150,7 @@ files:
|
|
134
150
|
- spec/feature/example_schema.rb
|
135
151
|
- spec/feature/example_transform.rb
|
136
152
|
- spec/feature/feature_spec.rb
|
153
|
+
- spec/reporter_spec.rb
|
137
154
|
- spec/spec_helper.rb
|
138
155
|
- spec/steps/assign_ids_spec.rb
|
139
156
|
- spec/steps/create_stage_spec.rb
|
@@ -145,7 +162,9 @@ files:
|
|
145
162
|
- spec/support/database.yml.example
|
146
163
|
- spec/support/database.yml.travis
|
147
164
|
- spec/support/database_helpers.rb
|
165
|
+
- spec/support/file_helpers.rb
|
148
166
|
- spec/task_runner/dependency_resolver_spec.rb
|
167
|
+
- spec/testing_spec.rb
|
149
168
|
homepage: https://github.com/maiwald/beetle_etl
|
150
169
|
licenses:
|
151
170
|
- MIT
|
@@ -178,6 +197,7 @@ test_files:
|
|
178
197
|
- spec/feature/example_schema.rb
|
179
198
|
- spec/feature/example_transform.rb
|
180
199
|
- spec/feature/feature_spec.rb
|
200
|
+
- spec/reporter_spec.rb
|
181
201
|
- spec/spec_helper.rb
|
182
202
|
- spec/steps/assign_ids_spec.rb
|
183
203
|
- spec/steps/create_stage_spec.rb
|
@@ -189,4 +209,6 @@ test_files:
|
|
189
209
|
- spec/support/database.yml.example
|
190
210
|
- spec/support/database.yml.travis
|
191
211
|
- spec/support/database_helpers.rb
|
212
|
+
- spec/support/file_helpers.rb
|
192
213
|
- spec/task_runner/dependency_resolver_spec.rb
|
214
|
+
- spec/testing_spec.rb
|
@@ -1,71 +0,0 @@
|
|
1
|
-
module BeetleETL
|
2
|
-
class TaskRunner
|
3
|
-
|
4
|
-
def initialize(tasks)
|
5
|
-
@dependency_resolver = DependencyResolver.new(tasks)
|
6
|
-
@tasks = tasks
|
7
|
-
|
8
|
-
@queue = Queue.new
|
9
|
-
@completed = Set.new
|
10
|
-
@running = Set.new
|
11
|
-
end
|
12
|
-
|
13
|
-
def run
|
14
|
-
results = {}
|
15
|
-
|
16
|
-
until all_tasks_complete?
|
17
|
-
runnables.each do |task|
|
18
|
-
run_task_async(task)
|
19
|
-
mark_task_running(task.name)
|
20
|
-
end
|
21
|
-
|
22
|
-
task_name, task_data = @queue.pop
|
23
|
-
results[task_name] = task_data
|
24
|
-
mark_task_completed(task_name)
|
25
|
-
end
|
26
|
-
|
27
|
-
results
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
attr_reader :running, :completed
|
33
|
-
|
34
|
-
def run_task_async(task)
|
35
|
-
Thread.new do
|
36
|
-
started_at = now
|
37
|
-
result = task.run
|
38
|
-
finished_at = now
|
39
|
-
|
40
|
-
@queue.push [task.name, {
|
41
|
-
started_at: started_at,
|
42
|
-
finished_at: finished_at,
|
43
|
-
result: result,
|
44
|
-
}]
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def mark_task_running(task_name)
|
49
|
-
running.add(task_name)
|
50
|
-
end
|
51
|
-
|
52
|
-
def mark_task_completed(task_name)
|
53
|
-
runnables.delete(task_name)
|
54
|
-
completed.add(task_name)
|
55
|
-
end
|
56
|
-
|
57
|
-
def runnables
|
58
|
-
resolvables = @dependency_resolver.resolvables(completed)
|
59
|
-
resolvables.reject { |r| running.include? r.name }
|
60
|
-
end
|
61
|
-
|
62
|
-
def all_tasks_complete?
|
63
|
-
@tasks.map(&:name).to_set == completed.to_set
|
64
|
-
end
|
65
|
-
|
66
|
-
def now
|
67
|
-
Time.now
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
end
|