beetle_etl 2.0.1 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/beetle_etl.rb +1 -2
- data/lib/beetle_etl/import.rb +36 -32
- data/lib/beetle_etl/step_runner/abstract_step_runner.rb +47 -0
- data/lib/beetle_etl/step_runner/async_step_runner.rb +14 -46
- data/lib/beetle_etl/step_runner/sequential_step_runner.rb +13 -0
- data/lib/beetle_etl/steps/map_relations.rb +1 -1
- data/lib/beetle_etl/steps/table_diff.rb +43 -17
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/steps/map_relations_spec.rb +3 -3
- data/spec/steps/table_diff_spec.rb +56 -28
- metadata +4 -5
- data/lib/beetle_etl/steps/assign_ids.rb +0 -23
- data/spec/steps/assign_ids_spec.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 114b9229ba18051ff3bb5d606af695df0b58649c
|
4
|
+
data.tar.gz: 869afc2743aa1c5a4028bc4d48b1fa7416c4bb43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c70a1b62aa9689174177d4021eb1499ad6afdbf688338affc682d99c21633f81a4f61a64b5db3f487b7cdeddef9511cf430cef743f3eb73eedcc1d8b11562f1
|
7
|
+
data.tar.gz: d61a1b35342ec7a4cf4d78ffee9efaed6b4ce7f3c44954a0e77b1e03c81e10f4d7edc9f26acaf5635a128f1e008302a59ab81121273003ee556899d1d42bf904
|
data/lib/beetle_etl.rb
CHANGED
@@ -19,11 +19,10 @@ module BeetleETL
|
|
19
19
|
require 'beetle_etl/steps/transform'
|
20
20
|
require 'beetle_etl/steps/map_relations'
|
21
21
|
require 'beetle_etl/steps/table_diff'
|
22
|
-
require 'beetle_etl/steps/assign_ids'
|
23
22
|
require 'beetle_etl/steps/load'
|
24
23
|
require 'beetle_etl/steps/drop_stage'
|
25
24
|
|
26
|
-
require 'beetle_etl/step_runner/
|
25
|
+
require 'beetle_etl/step_runner/sequential_step_runner'
|
27
26
|
require 'beetle_etl/step_runner/async_step_runner'
|
28
27
|
|
29
28
|
require 'beetle_etl/import'
|
data/lib/beetle_etl/import.rb
CHANGED
@@ -5,57 +5,61 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
def initialize(config)
|
7
7
|
@config = config
|
8
|
-
|
9
|
-
|
10
|
-
def run
|
11
|
-
setup
|
12
|
-
import
|
13
|
-
ensure
|
14
|
-
cleanup
|
15
|
-
end
|
8
|
+
@report = {}
|
16
9
|
|
17
|
-
|
18
|
-
transformations.each do |t|
|
19
|
-
CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
|
20
|
-
end
|
10
|
+
@transformations ||= TransformationLoader.new(@config).load
|
21
11
|
end
|
22
12
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
13
|
+
def run
|
14
|
+
begin
|
15
|
+
run_setup
|
16
|
+
run_transform
|
17
|
+
run_load
|
18
|
+
ensure
|
19
|
+
run_cleanup
|
27
20
|
end
|
28
21
|
|
29
|
-
|
22
|
+
@report
|
30
23
|
end
|
31
24
|
|
32
|
-
def
|
33
|
-
transformations.
|
34
|
-
|
35
|
-
|
36
|
-
end
|
25
|
+
def run_setup
|
26
|
+
steps = @transformations.map { |t|
|
27
|
+
CreateStage.new(@config, t.table_name, t.relations, t.column_names)
|
28
|
+
}
|
37
29
|
|
38
|
-
|
30
|
+
@report.deep_merge SequentialStepRunner.new(@config, steps).run
|
31
|
+
end
|
39
32
|
|
40
|
-
def
|
41
|
-
transformations.flat_map
|
33
|
+
def run_transform
|
34
|
+
steps = @transformations.flat_map { |t|
|
42
35
|
[
|
43
36
|
Transform.new(@config, t.table_name, t.dependencies, t.query),
|
44
37
|
MapRelations.new(@config, t.table_name, t.relations),
|
45
|
-
TableDiff.new(@config, t.table_name)
|
46
|
-
AssignIds.new(@config, t.table_name),
|
38
|
+
TableDiff.new(@config, t.table_name)
|
47
39
|
]
|
48
|
-
|
40
|
+
}
|
41
|
+
|
42
|
+
@report.deep_merge AsyncStepRunner.new(@config, steps).run
|
49
43
|
end
|
50
44
|
|
51
|
-
def
|
52
|
-
transformations.map
|
45
|
+
def run_load
|
46
|
+
steps = @transformations.map { |t|
|
53
47
|
Load.new(@config, t.table_name, t.relations)
|
48
|
+
}
|
49
|
+
|
50
|
+
result = @config.database.transaction do
|
51
|
+
SequentialStepRunner.new(@config, steps).run
|
54
52
|
end
|
53
|
+
|
54
|
+
@report.deep_merge result
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
@transformations
|
57
|
+
def run_cleanup
|
58
|
+
steps = @transformations.map { |t|
|
59
|
+
DropStage.new(@config, t.table_name)
|
60
|
+
}
|
61
|
+
|
62
|
+
@report.deep_merge SequentialStepRunner.new(@config, steps).run
|
59
63
|
end
|
60
64
|
|
61
65
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'active_support/core_ext/hash/slice'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
class AbstractStepRunner
|
5
|
+
|
6
|
+
def initialize(config, steps)
|
7
|
+
@config = config
|
8
|
+
@steps = steps
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def run_step(step)
|
18
|
+
@config.logger.info("started step #{step.name}")
|
19
|
+
|
20
|
+
started_at = Time.now
|
21
|
+
step.run
|
22
|
+
finished_at = Time.now
|
23
|
+
|
24
|
+
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
25
|
+
@config.logger.info("finished #{step.name} in #{duration}")
|
26
|
+
|
27
|
+
{
|
28
|
+
step_name: step.name,
|
29
|
+
table_name: step.table_name,
|
30
|
+
started_at: started_at,
|
31
|
+
finished_at: finished_at
|
32
|
+
}
|
33
|
+
rescue => e
|
34
|
+
@config.logger.fatal(e.message)
|
35
|
+
raise e
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_result!(results, step_data)
|
39
|
+
table_name = step_data[:table_name]
|
40
|
+
step_name = step_data[:step_name]
|
41
|
+
|
42
|
+
results[table_name] ||= {}
|
43
|
+
results[table_name][step_name] = step_data.slice(:started_at, :finished_at)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -1,15 +1,17 @@
|
|
1
|
+
require_relative './abstract_step_runner'
|
2
|
+
require_relative './dependency_resolver'
|
3
|
+
|
1
4
|
module BeetleETL
|
2
|
-
class AsyncStepRunner
|
5
|
+
class AsyncStepRunner < AbstractStepRunner
|
3
6
|
|
4
7
|
def initialize(config, steps)
|
5
|
-
|
8
|
+
super(config, steps)
|
6
9
|
|
7
10
|
@dependency_resolver = DependencyResolver.new(steps)
|
8
|
-
@steps = steps
|
9
11
|
|
10
12
|
@queue = Queue.new
|
11
13
|
@completed = Set.new
|
12
|
-
@
|
14
|
+
@started = Set.new
|
13
15
|
end
|
14
16
|
|
15
17
|
def run
|
@@ -18,17 +20,13 @@ module BeetleETL
|
|
18
20
|
until all_steps_complete?
|
19
21
|
runnables.each do |step|
|
20
22
|
run_step_async(step)
|
21
|
-
|
23
|
+
@started.add(step.name)
|
22
24
|
end
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
unless results.has_key?(table_name)
|
27
|
-
results[table_name] = {}
|
28
|
-
end
|
26
|
+
step_data = @queue.pop
|
27
|
+
add_result!(results, step_data)
|
29
28
|
|
30
|
-
|
31
|
-
mark_step_completed(step_name)
|
29
|
+
@completed.add(step_data[:step_name])
|
32
30
|
end
|
33
31
|
|
34
32
|
results
|
@@ -36,49 +34,19 @@ module BeetleETL
|
|
36
34
|
|
37
35
|
private
|
38
36
|
|
39
|
-
attr_reader :running, :completed
|
40
|
-
|
41
37
|
def run_step_async(step)
|
42
38
|
Thread.new do
|
43
|
-
|
44
|
-
@config.logger.info("started step #{step.name}")
|
45
|
-
|
46
|
-
started_at = Time.now
|
47
|
-
step.run
|
48
|
-
finished_at = Time.now
|
49
|
-
|
50
|
-
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
51
|
-
@config.logger.info("finished #{step.name} in #{duration}")
|
52
|
-
|
53
|
-
@queue.push [
|
54
|
-
step.table_name,
|
55
|
-
step.name,
|
56
|
-
{ started_at: started_at, finished_at: finished_at }
|
57
|
-
]
|
58
|
-
|
59
|
-
rescue => e
|
60
|
-
@config.logger.fatal(e.message)
|
61
|
-
raise e
|
62
|
-
end
|
39
|
+
@queue.push run_step(step)
|
63
40
|
end.abort_on_exception = true
|
64
41
|
end
|
65
42
|
|
66
|
-
def mark_step_running(step_name)
|
67
|
-
running.add(step_name)
|
68
|
-
end
|
69
|
-
|
70
|
-
def mark_step_completed(step_name)
|
71
|
-
runnables.delete(step_name)
|
72
|
-
completed.add(step_name)
|
73
|
-
end
|
74
|
-
|
75
43
|
def runnables
|
76
|
-
resolvables = @dependency_resolver.resolvables(completed)
|
77
|
-
resolvables.reject { |r|
|
44
|
+
resolvables = @dependency_resolver.resolvables(@completed)
|
45
|
+
resolvables.reject { |r| @started.include? r.name }
|
78
46
|
end
|
79
47
|
|
80
48
|
def all_steps_complete?
|
81
|
-
@steps.map(&:name).to_set == completed.to_set
|
49
|
+
@steps.map(&:name).to_set == @completed.to_set
|
82
50
|
end
|
83
51
|
|
84
52
|
end
|
@@ -11,7 +11,7 @@ module BeetleETL
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def run
|
14
|
-
%w(create update delete reinstate).each do |transition|
|
14
|
+
%w(create update delete reinstate keep).each do |transition|
|
15
15
|
public_send(:"transition_#{transition}")
|
16
16
|
end
|
17
17
|
end
|
@@ -19,7 +19,9 @@ module BeetleETL
|
|
19
19
|
def transition_create
|
20
20
|
database.execute <<-SQL
|
21
21
|
UPDATE "#{target_schema}"."#{stage_table_name}" stage
|
22
|
-
SET
|
22
|
+
SET
|
23
|
+
transition = 'CREATE',
|
24
|
+
id = NEXTVAL('#{target_schema}.#{table_name}_id_seq')
|
23
25
|
WHERE NOT EXISTS (
|
24
26
|
SELECT 1
|
25
27
|
FROM "#{target_schema}"."#{table_name}" target
|
@@ -31,12 +33,13 @@ module BeetleETL
|
|
31
33
|
|
32
34
|
def transition_update
|
33
35
|
database.execute <<-SQL
|
34
|
-
UPDATE "#{target_schema}"."#{stage_table_name}"
|
35
|
-
SET
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
37
|
+
SET
|
38
|
+
transition = 'UPDATE',
|
39
|
+
id = target.id
|
40
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
41
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
42
|
+
target.external_id = stage.external_id
|
40
43
|
AND target.external_source = '#{external_source}'
|
41
44
|
AND target.deleted_at IS NULL
|
42
45
|
AND
|
@@ -44,16 +47,17 @@ module BeetleETL
|
|
44
47
|
IS DISTINCT FROM
|
45
48
|
(#{stage_record_columns.join(', ')})
|
46
49
|
)
|
50
|
+
WHERE stage_update.external_id = stage.external_id
|
47
51
|
SQL
|
48
52
|
end
|
49
53
|
|
50
54
|
def transition_delete
|
51
55
|
database.execute <<-SQL
|
52
56
|
INSERT INTO "#{target_schema}"."#{stage_table_name}"
|
53
|
-
(
|
57
|
+
(transition, id)
|
54
58
|
SELECT
|
55
|
-
|
56
|
-
|
59
|
+
'DELETE',
|
60
|
+
target.id
|
57
61
|
FROM "#{target_schema}"."#{table_name}" target
|
58
62
|
LEFT OUTER JOIN "#{target_schema}"."#{stage_table_name}" stage
|
59
63
|
ON (stage.external_id = target.external_id)
|
@@ -65,15 +69,37 @@ module BeetleETL
|
|
65
69
|
|
66
70
|
def transition_reinstate
|
67
71
|
database.execute <<-SQL
|
68
|
-
UPDATE "#{target_schema}"."#{stage_table_name}"
|
69
|
-
SET
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
73
|
+
SET
|
74
|
+
transition = 'REINSTATE',
|
75
|
+
id = target.id
|
76
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
77
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
78
|
+
target.external_id = stage.external_id
|
74
79
|
AND target.external_source = '#{external_source}'
|
75
80
|
AND target.deleted_at IS NOT NULL
|
76
81
|
)
|
82
|
+
WHERE stage_update.external_id = stage.external_id
|
83
|
+
SQL
|
84
|
+
end
|
85
|
+
|
86
|
+
def transition_keep
|
87
|
+
database.execute <<-SQL
|
88
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
89
|
+
SET
|
90
|
+
transition = 'KEEP',
|
91
|
+
id = target.id
|
92
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
93
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
94
|
+
target.external_id = stage.external_id
|
95
|
+
AND target.external_source = '#{external_source}'
|
96
|
+
AND target.deleted_at IS NULL
|
97
|
+
AND
|
98
|
+
(#{target_record_columns.join(', ')})
|
99
|
+
IS NOT DISTINCT FROM
|
100
|
+
(#{stage_record_columns.join(', ')})
|
101
|
+
)
|
102
|
+
WHERE stage_update.external_id = stage.external_id
|
77
103
|
SQL
|
78
104
|
end
|
79
105
|
|
data/lib/beetle_etl/version.rb
CHANGED
@@ -52,11 +52,11 @@ module BeetleETL
|
|
52
52
|
end
|
53
53
|
|
54
54
|
describe '#depenencies' do
|
55
|
-
it 'depends on Transform of the same table and
|
55
|
+
it 'depends on Transform of the same table and TableDiff of its dependees' do
|
56
56
|
expect(subject.dependencies).to eql(
|
57
57
|
[
|
58
|
-
'dependee_a:
|
59
|
-
'dependee_b:
|
58
|
+
'dependee_a: TableDiff',
|
59
|
+
'dependee_b: TableDiff',
|
60
60
|
'depender: Transform',
|
61
61
|
].to_set
|
62
62
|
)
|
@@ -18,6 +18,7 @@ module BeetleETL
|
|
18
18
|
|
19
19
|
before do
|
20
20
|
test_database.create_table(subject.stage_table_name.to_sym) do
|
21
|
+
Integer :id
|
21
22
|
String :external_id, size: 255
|
22
23
|
String :transition, size: 20
|
23
24
|
|
@@ -28,7 +29,7 @@ module BeetleETL
|
|
28
29
|
end
|
29
30
|
|
30
31
|
test_database.create_table(:example_table) do
|
31
|
-
|
32
|
+
primary_key :id
|
32
33
|
String :external_id, size: 255
|
33
34
|
String :external_source, size: 255
|
34
35
|
DateTime :deleted_at
|
@@ -47,7 +48,7 @@ module BeetleETL
|
|
47
48
|
|
48
49
|
describe '#run' do
|
49
50
|
it 'runs all transitions' do
|
50
|
-
%w(create update delete reinstate).each do |transition|
|
51
|
+
%w(create update delete reinstate keep).each do |transition|
|
51
52
|
expect(subject).to receive(:"transition_#{transition}")
|
52
53
|
end
|
53
54
|
|
@@ -59,11 +60,13 @@ module BeetleETL
|
|
59
60
|
it 'assigns CREATE to new records' do
|
60
61
|
|
61
62
|
insert_into(:example_table).values(
|
62
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
63
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
64
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
63
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
64
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
65
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
65
66
|
)
|
66
67
|
|
68
|
+
test_database.run "SELECT setval('public.example_table_id_seq', 99)"
|
69
|
+
|
67
70
|
insert_into(subject.stage_table_name.to_sym).values(
|
68
71
|
[ :external_id ] ,
|
69
72
|
[ 'created' ] ,
|
@@ -72,10 +75,10 @@ module BeetleETL
|
|
72
75
|
|
73
76
|
subject.transition_create
|
74
77
|
|
75
|
-
|
76
|
-
[ :external_id , :transition ] ,
|
77
|
-
[ 'created' , 'CREATE' ] ,
|
78
|
-
[ 'existing' , nil ] ,
|
78
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
79
|
+
[ :external_id , :id , :transition ] ,
|
80
|
+
[ 'created' , 100 , 'CREATE' ] ,
|
81
|
+
[ 'existing' , nil , nil ] ,
|
79
82
|
)
|
80
83
|
end
|
81
84
|
end
|
@@ -85,10 +88,10 @@ module BeetleETL
|
|
85
88
|
except externald_*_id columns and columns not contained in the stage table' do
|
86
89
|
|
87
90
|
insert_into(:example_table).values(
|
88
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
89
|
-
[ 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
90
|
-
[ 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
|
91
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
|
91
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
92
|
+
[ 1 , 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
93
|
+
[ 2 , 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
|
94
|
+
[ 3 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
|
92
95
|
)
|
93
96
|
|
94
97
|
insert_into(subject.stage_table_name.to_sym).values(
|
@@ -101,10 +104,10 @@ module BeetleETL
|
|
101
104
|
subject.transition_update
|
102
105
|
|
103
106
|
expect(subject.stage_table_name.to_sym).to have_values(
|
104
|
-
[ :external_id , :transition ] ,
|
105
|
-
[ 'existing_1' , 'UPDATE' ] ,
|
106
|
-
[ 'existing_2' , 'UPDATE' ] ,
|
107
|
-
[ 'deleted' , nil ] ,
|
107
|
+
[ :external_id , :id , :transition ] ,
|
108
|
+
[ 'existing_1' , 1 , 'UPDATE' ] ,
|
109
|
+
[ 'existing_2' , 2 , 'UPDATE' ] ,
|
110
|
+
[ 'deleted' , nil , nil ] ,
|
108
111
|
)
|
109
112
|
end
|
110
113
|
end
|
@@ -112,16 +115,16 @@ module BeetleETL
|
|
112
115
|
describe 'transition_delete' do
|
113
116
|
it 'creates records with DELETE that no loger exist in the stage table for the given run' do
|
114
117
|
insert_into(:example_table).values(
|
115
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
116
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
117
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
118
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
119
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
120
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
118
121
|
)
|
119
122
|
|
120
123
|
subject.transition_delete
|
121
124
|
|
122
125
|
expect(subject.stage_table_name.to_sym).to have_values(
|
123
|
-
[ :
|
124
|
-
[
|
126
|
+
[ :id , :transition ] ,
|
127
|
+
[ 1 , 'DELETE' ] ,
|
125
128
|
)
|
126
129
|
end
|
127
130
|
end
|
@@ -129,9 +132,9 @@ module BeetleETL
|
|
129
132
|
describe 'transition_reinstate' do
|
130
133
|
it 'assigns REINSTATE to previously deleted records' do
|
131
134
|
insert_into(:example_table).values(
|
132
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
133
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
134
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
135
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
136
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
137
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
135
138
|
)
|
136
139
|
|
137
140
|
insert_into(subject.stage_table_name.to_sym).values(
|
@@ -143,9 +146,34 @@ module BeetleETL
|
|
143
146
|
subject.transition_reinstate
|
144
147
|
|
145
148
|
expect(subject.stage_table_name.to_sym).to have_values(
|
146
|
-
[ :external_id , :transition ] ,
|
147
|
-
[ 'existing' , nil ] ,
|
148
|
-
[ 'deleted' , 'REINSTATE' ] ,
|
149
|
+
[ :external_id , :id , :transition ] ,
|
150
|
+
[ 'existing' , nil , nil ] ,
|
151
|
+
[ 'deleted' , 2 , 'REINSTATE' ] ,
|
152
|
+
)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
describe '#transition_keep' do
|
157
|
+
it 'assigns KEEP to unchanged records' do
|
158
|
+
|
159
|
+
insert_into(:example_table).values(
|
160
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
161
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
162
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
163
|
+
)
|
164
|
+
|
165
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
166
|
+
[ :external_id , :payload , :foo_id ] ,
|
167
|
+
[ 'created' , nil , nil ] ,
|
168
|
+
[ 'existing' , 'existing content' , 1 ] ,
|
169
|
+
)
|
170
|
+
|
171
|
+
subject.transition_keep
|
172
|
+
|
173
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
174
|
+
[ :external_id , :id , :transition ] ,
|
175
|
+
[ 'created' , nil , nil ] ,
|
176
|
+
[ 'existing' , 1 , 'KEEP' ] ,
|
149
177
|
)
|
150
178
|
end
|
151
179
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: beetle_etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luciano Maiwald
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -132,9 +132,10 @@ files:
|
|
132
132
|
- lib/beetle_etl/import.rb
|
133
133
|
- lib/beetle_etl/naming.rb
|
134
134
|
- lib/beetle_etl/reporter.rb
|
135
|
+
- lib/beetle_etl/step_runner/abstract_step_runner.rb
|
135
136
|
- lib/beetle_etl/step_runner/async_step_runner.rb
|
136
137
|
- lib/beetle_etl/step_runner/dependency_resolver.rb
|
137
|
-
- lib/beetle_etl/
|
138
|
+
- lib/beetle_etl/step_runner/sequential_step_runner.rb
|
138
139
|
- lib/beetle_etl/steps/create_stage.rb
|
139
140
|
- lib/beetle_etl/steps/drop_stage.rb
|
140
141
|
- lib/beetle_etl/steps/load.rb
|
@@ -158,7 +159,6 @@ files:
|
|
158
159
|
- spec/feature/feature_spec.rb
|
159
160
|
- spec/reporter_spec.rb
|
160
161
|
- spec/spec_helper.rb
|
161
|
-
- spec/steps/assign_ids_spec.rb
|
162
162
|
- spec/steps/create_stage_spec.rb
|
163
163
|
- spec/steps/load_spec.rb
|
164
164
|
- spec/steps/map_relations_spec.rb
|
@@ -208,7 +208,6 @@ test_files:
|
|
208
208
|
- spec/feature/feature_spec.rb
|
209
209
|
- spec/reporter_spec.rb
|
210
210
|
- spec/spec_helper.rb
|
211
|
-
- spec/steps/assign_ids_spec.rb
|
212
211
|
- spec/steps/create_stage_spec.rb
|
213
212
|
- spec/steps/load_spec.rb
|
214
213
|
- spec/steps/map_relations_spec.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module BeetleETL
|
2
|
-
class AssignIds < Step
|
3
|
-
|
4
|
-
def dependencies
|
5
|
-
[TableDiff.step_name(table_name)].to_set
|
6
|
-
end
|
7
|
-
|
8
|
-
def run
|
9
|
-
database.execute <<-SQL
|
10
|
-
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
11
|
-
SET id = COALESCE(target.id, NEXTVAL('#{target_schema}.#{table_name}_id_seq'))
|
12
|
-
FROM "#{target_schema}"."#{stage_table_name}" stage
|
13
|
-
LEFT OUTER JOIN "#{target_schema}"."#{table_name}" target
|
14
|
-
on (
|
15
|
-
stage.external_id = target.external_id
|
16
|
-
AND target.external_source = '#{external_source}'
|
17
|
-
)
|
18
|
-
WHERE stage_update.external_id = stage.external_id
|
19
|
-
SQL
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
module BeetleETL
|
4
|
-
describe AssignIds do
|
5
|
-
|
6
|
-
let(:external_source) { 'my_source' }
|
7
|
-
let(:another_source) { 'another_source' }
|
8
|
-
|
9
|
-
let(:config) do
|
10
|
-
OpenStruct.new({
|
11
|
-
stage_schema: 'stage',
|
12
|
-
target_schema: 'public',
|
13
|
-
external_source: external_source,
|
14
|
-
database: test_database,
|
15
|
-
})
|
16
|
-
end
|
17
|
-
|
18
|
-
subject { AssignIds.new(config, :example_table) }
|
19
|
-
|
20
|
-
describe '#dependencies' do
|
21
|
-
it 'depends on TableDiff of the same table' do
|
22
|
-
expect(subject.dependencies).to eql(['example_table: TableDiff'].to_set)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe '#run' do
|
27
|
-
before do
|
28
|
-
test_database.create_table(subject.stage_table_name.to_sym) do
|
29
|
-
Integer :id
|
30
|
-
String :external_id, size: 255
|
31
|
-
String :transition, size: 255
|
32
|
-
end
|
33
|
-
|
34
|
-
test_database.create_table(:example_table) do
|
35
|
-
primary_key :id
|
36
|
-
String :external_id, size: 255
|
37
|
-
String :external_source, size: 255
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'assigns ids for' do
|
42
|
-
# - generated ones for new records
|
43
|
-
# - mapped ones by external_id for existing records
|
44
|
-
|
45
|
-
insert_into(:example_table).values(
|
46
|
-
[ :external_id , :external_source ] ,
|
47
|
-
[ 'a' , external_source ] ,
|
48
|
-
[ 'b' , external_source ] ,
|
49
|
-
[ 'c' , external_source ] ,
|
50
|
-
[ 'd' , another_source ] ,
|
51
|
-
)
|
52
|
-
|
53
|
-
insert_into(subject.stage_table_name.to_sym).values(
|
54
|
-
[ :external_id ] ,
|
55
|
-
[ 'new value' ] ,
|
56
|
-
[ 'a' ] ,
|
57
|
-
[ 'b' ] ,
|
58
|
-
[ 'c' ] ,
|
59
|
-
)
|
60
|
-
|
61
|
-
subject.run
|
62
|
-
|
63
|
-
expect(subject.stage_table_name.to_sym).to have_values(
|
64
|
-
[ :id , :external_id ] ,
|
65
|
-
[ 1 , 'a' ] ,
|
66
|
-
[ 2 , 'b' ] ,
|
67
|
-
[ 3 , 'c' ] ,
|
68
|
-
[ 5 , 'new value' ] ,
|
69
|
-
)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|