beetle_etl 2.0.1 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/beetle_etl.rb +1 -2
- data/lib/beetle_etl/import.rb +36 -32
- data/lib/beetle_etl/step_runner/abstract_step_runner.rb +47 -0
- data/lib/beetle_etl/step_runner/async_step_runner.rb +14 -46
- data/lib/beetle_etl/step_runner/sequential_step_runner.rb +13 -0
- data/lib/beetle_etl/steps/map_relations.rb +1 -1
- data/lib/beetle_etl/steps/table_diff.rb +43 -17
- data/lib/beetle_etl/version.rb +1 -1
- data/spec/steps/map_relations_spec.rb +3 -3
- data/spec/steps/table_diff_spec.rb +56 -28
- metadata +4 -5
- data/lib/beetle_etl/steps/assign_ids.rb +0 -23
- data/spec/steps/assign_ids_spec.rb +0 -74
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 114b9229ba18051ff3bb5d606af695df0b58649c
|
4
|
+
data.tar.gz: 869afc2743aa1c5a4028bc4d48b1fa7416c4bb43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c70a1b62aa9689174177d4021eb1499ad6afdbf688338affc682d99c21633f81a4f61a64b5db3f487b7cdeddef9511cf430cef743f3eb73eedcc1d8b11562f1
|
7
|
+
data.tar.gz: d61a1b35342ec7a4cf4d78ffee9efaed6b4ce7f3c44954a0e77b1e03c81e10f4d7edc9f26acaf5635a128f1e008302a59ab81121273003ee556899d1d42bf904
|
data/lib/beetle_etl.rb
CHANGED
@@ -19,11 +19,10 @@ module BeetleETL
|
|
19
19
|
require 'beetle_etl/steps/transform'
|
20
20
|
require 'beetle_etl/steps/map_relations'
|
21
21
|
require 'beetle_etl/steps/table_diff'
|
22
|
-
require 'beetle_etl/steps/assign_ids'
|
23
22
|
require 'beetle_etl/steps/load'
|
24
23
|
require 'beetle_etl/steps/drop_stage'
|
25
24
|
|
26
|
-
require 'beetle_etl/step_runner/
|
25
|
+
require 'beetle_etl/step_runner/sequential_step_runner'
|
27
26
|
require 'beetle_etl/step_runner/async_step_runner'
|
28
27
|
|
29
28
|
require 'beetle_etl/import'
|
data/lib/beetle_etl/import.rb
CHANGED
@@ -5,57 +5,61 @@ module BeetleETL
|
|
5
5
|
|
6
6
|
def initialize(config)
|
7
7
|
@config = config
|
8
|
-
|
9
|
-
|
10
|
-
def run
|
11
|
-
setup
|
12
|
-
import
|
13
|
-
ensure
|
14
|
-
cleanup
|
15
|
-
end
|
8
|
+
@report = {}
|
16
9
|
|
17
|
-
|
18
|
-
transformations.each do |t|
|
19
|
-
CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
|
20
|
-
end
|
10
|
+
@transformations ||= TransformationLoader.new(@config).load
|
21
11
|
end
|
22
12
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
13
|
+
def run
|
14
|
+
begin
|
15
|
+
run_setup
|
16
|
+
run_transform
|
17
|
+
run_load
|
18
|
+
ensure
|
19
|
+
run_cleanup
|
27
20
|
end
|
28
21
|
|
29
|
-
|
22
|
+
@report
|
30
23
|
end
|
31
24
|
|
32
|
-
def
|
33
|
-
transformations.
|
34
|
-
|
35
|
-
|
36
|
-
end
|
25
|
+
def run_setup
|
26
|
+
steps = @transformations.map { |t|
|
27
|
+
CreateStage.new(@config, t.table_name, t.relations, t.column_names)
|
28
|
+
}
|
37
29
|
|
38
|
-
|
30
|
+
@report.deep_merge SequentialStepRunner.new(@config, steps).run
|
31
|
+
end
|
39
32
|
|
40
|
-
def
|
41
|
-
transformations.flat_map
|
33
|
+
def run_transform
|
34
|
+
steps = @transformations.flat_map { |t|
|
42
35
|
[
|
43
36
|
Transform.new(@config, t.table_name, t.dependencies, t.query),
|
44
37
|
MapRelations.new(@config, t.table_name, t.relations),
|
45
|
-
TableDiff.new(@config, t.table_name)
|
46
|
-
AssignIds.new(@config, t.table_name),
|
38
|
+
TableDiff.new(@config, t.table_name)
|
47
39
|
]
|
48
|
-
|
40
|
+
}
|
41
|
+
|
42
|
+
@report.deep_merge AsyncStepRunner.new(@config, steps).run
|
49
43
|
end
|
50
44
|
|
51
|
-
def
|
52
|
-
transformations.map
|
45
|
+
def run_load
|
46
|
+
steps = @transformations.map { |t|
|
53
47
|
Load.new(@config, t.table_name, t.relations)
|
48
|
+
}
|
49
|
+
|
50
|
+
result = @config.database.transaction do
|
51
|
+
SequentialStepRunner.new(@config, steps).run
|
54
52
|
end
|
53
|
+
|
54
|
+
@report.deep_merge result
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
@transformations
|
57
|
+
def run_cleanup
|
58
|
+
steps = @transformations.map { |t|
|
59
|
+
DropStage.new(@config, t.table_name)
|
60
|
+
}
|
61
|
+
|
62
|
+
@report.deep_merge SequentialStepRunner.new(@config, steps).run
|
59
63
|
end
|
60
64
|
|
61
65
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'active_support/core_ext/hash/slice'
|
2
|
+
|
3
|
+
module BeetleETL
|
4
|
+
class AbstractStepRunner
|
5
|
+
|
6
|
+
def initialize(config, steps)
|
7
|
+
@config = config
|
8
|
+
@steps = steps
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def run_step(step)
|
18
|
+
@config.logger.info("started step #{step.name}")
|
19
|
+
|
20
|
+
started_at = Time.now
|
21
|
+
step.run
|
22
|
+
finished_at = Time.now
|
23
|
+
|
24
|
+
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
25
|
+
@config.logger.info("finished #{step.name} in #{duration}")
|
26
|
+
|
27
|
+
{
|
28
|
+
step_name: step.name,
|
29
|
+
table_name: step.table_name,
|
30
|
+
started_at: started_at,
|
31
|
+
finished_at: finished_at
|
32
|
+
}
|
33
|
+
rescue => e
|
34
|
+
@config.logger.fatal(e.message)
|
35
|
+
raise e
|
36
|
+
end
|
37
|
+
|
38
|
+
def add_result!(results, step_data)
|
39
|
+
table_name = step_data[:table_name]
|
40
|
+
step_name = step_data[:step_name]
|
41
|
+
|
42
|
+
results[table_name] ||= {}
|
43
|
+
results[table_name][step_name] = step_data.slice(:started_at, :finished_at)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
@@ -1,15 +1,17 @@
|
|
1
|
+
require_relative './abstract_step_runner'
|
2
|
+
require_relative './dependency_resolver'
|
3
|
+
|
1
4
|
module BeetleETL
|
2
|
-
class AsyncStepRunner
|
5
|
+
class AsyncStepRunner < AbstractStepRunner
|
3
6
|
|
4
7
|
def initialize(config, steps)
|
5
|
-
|
8
|
+
super(config, steps)
|
6
9
|
|
7
10
|
@dependency_resolver = DependencyResolver.new(steps)
|
8
|
-
@steps = steps
|
9
11
|
|
10
12
|
@queue = Queue.new
|
11
13
|
@completed = Set.new
|
12
|
-
@
|
14
|
+
@started = Set.new
|
13
15
|
end
|
14
16
|
|
15
17
|
def run
|
@@ -18,17 +20,13 @@ module BeetleETL
|
|
18
20
|
until all_steps_complete?
|
19
21
|
runnables.each do |step|
|
20
22
|
run_step_async(step)
|
21
|
-
|
23
|
+
@started.add(step.name)
|
22
24
|
end
|
23
25
|
|
24
|
-
|
25
|
-
|
26
|
-
unless results.has_key?(table_name)
|
27
|
-
results[table_name] = {}
|
28
|
-
end
|
26
|
+
step_data = @queue.pop
|
27
|
+
add_result!(results, step_data)
|
29
28
|
|
30
|
-
|
31
|
-
mark_step_completed(step_name)
|
29
|
+
@completed.add(step_data[:step_name])
|
32
30
|
end
|
33
31
|
|
34
32
|
results
|
@@ -36,49 +34,19 @@ module BeetleETL
|
|
36
34
|
|
37
35
|
private
|
38
36
|
|
39
|
-
attr_reader :running, :completed
|
40
|
-
|
41
37
|
def run_step_async(step)
|
42
38
|
Thread.new do
|
43
|
-
|
44
|
-
@config.logger.info("started step #{step.name}")
|
45
|
-
|
46
|
-
started_at = Time.now
|
47
|
-
step.run
|
48
|
-
finished_at = Time.now
|
49
|
-
|
50
|
-
duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
|
51
|
-
@config.logger.info("finished #{step.name} in #{duration}")
|
52
|
-
|
53
|
-
@queue.push [
|
54
|
-
step.table_name,
|
55
|
-
step.name,
|
56
|
-
{ started_at: started_at, finished_at: finished_at }
|
57
|
-
]
|
58
|
-
|
59
|
-
rescue => e
|
60
|
-
@config.logger.fatal(e.message)
|
61
|
-
raise e
|
62
|
-
end
|
39
|
+
@queue.push run_step(step)
|
63
40
|
end.abort_on_exception = true
|
64
41
|
end
|
65
42
|
|
66
|
-
def mark_step_running(step_name)
|
67
|
-
running.add(step_name)
|
68
|
-
end
|
69
|
-
|
70
|
-
def mark_step_completed(step_name)
|
71
|
-
runnables.delete(step_name)
|
72
|
-
completed.add(step_name)
|
73
|
-
end
|
74
|
-
|
75
43
|
def runnables
|
76
|
-
resolvables = @dependency_resolver.resolvables(completed)
|
77
|
-
resolvables.reject { |r|
|
44
|
+
resolvables = @dependency_resolver.resolvables(@completed)
|
45
|
+
resolvables.reject { |r| @started.include? r.name }
|
78
46
|
end
|
79
47
|
|
80
48
|
def all_steps_complete?
|
81
|
-
@steps.map(&:name).to_set == completed.to_set
|
49
|
+
@steps.map(&:name).to_set == @completed.to_set
|
82
50
|
end
|
83
51
|
|
84
52
|
end
|
@@ -11,7 +11,7 @@ module BeetleETL
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def run
|
14
|
-
%w(create update delete reinstate).each do |transition|
|
14
|
+
%w(create update delete reinstate keep).each do |transition|
|
15
15
|
public_send(:"transition_#{transition}")
|
16
16
|
end
|
17
17
|
end
|
@@ -19,7 +19,9 @@ module BeetleETL
|
|
19
19
|
def transition_create
|
20
20
|
database.execute <<-SQL
|
21
21
|
UPDATE "#{target_schema}"."#{stage_table_name}" stage
|
22
|
-
SET
|
22
|
+
SET
|
23
|
+
transition = 'CREATE',
|
24
|
+
id = NEXTVAL('#{target_schema}.#{table_name}_id_seq')
|
23
25
|
WHERE NOT EXISTS (
|
24
26
|
SELECT 1
|
25
27
|
FROM "#{target_schema}"."#{table_name}" target
|
@@ -31,12 +33,13 @@ module BeetleETL
|
|
31
33
|
|
32
34
|
def transition_update
|
33
35
|
database.execute <<-SQL
|
34
|
-
UPDATE "#{target_schema}"."#{stage_table_name}"
|
35
|
-
SET
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
37
|
+
SET
|
38
|
+
transition = 'UPDATE',
|
39
|
+
id = target.id
|
40
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
41
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
42
|
+
target.external_id = stage.external_id
|
40
43
|
AND target.external_source = '#{external_source}'
|
41
44
|
AND target.deleted_at IS NULL
|
42
45
|
AND
|
@@ -44,16 +47,17 @@ module BeetleETL
|
|
44
47
|
IS DISTINCT FROM
|
45
48
|
(#{stage_record_columns.join(', ')})
|
46
49
|
)
|
50
|
+
WHERE stage_update.external_id = stage.external_id
|
47
51
|
SQL
|
48
52
|
end
|
49
53
|
|
50
54
|
def transition_delete
|
51
55
|
database.execute <<-SQL
|
52
56
|
INSERT INTO "#{target_schema}"."#{stage_table_name}"
|
53
|
-
(
|
57
|
+
(transition, id)
|
54
58
|
SELECT
|
55
|
-
|
56
|
-
|
59
|
+
'DELETE',
|
60
|
+
target.id
|
57
61
|
FROM "#{target_schema}"."#{table_name}" target
|
58
62
|
LEFT OUTER JOIN "#{target_schema}"."#{stage_table_name}" stage
|
59
63
|
ON (stage.external_id = target.external_id)
|
@@ -65,15 +69,37 @@ module BeetleETL
|
|
65
69
|
|
66
70
|
def transition_reinstate
|
67
71
|
database.execute <<-SQL
|
68
|
-
UPDATE "#{target_schema}"."#{stage_table_name}"
|
69
|
-
SET
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
73
|
+
SET
|
74
|
+
transition = 'REINSTATE',
|
75
|
+
id = target.id
|
76
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
77
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
78
|
+
target.external_id = stage.external_id
|
74
79
|
AND target.external_source = '#{external_source}'
|
75
80
|
AND target.deleted_at IS NOT NULL
|
76
81
|
)
|
82
|
+
WHERE stage_update.external_id = stage.external_id
|
83
|
+
SQL
|
84
|
+
end
|
85
|
+
|
86
|
+
def transition_keep
|
87
|
+
database.execute <<-SQL
|
88
|
+
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
89
|
+
SET
|
90
|
+
transition = 'KEEP',
|
91
|
+
id = target.id
|
92
|
+
FROM "#{target_schema}"."#{stage_table_name}" stage
|
93
|
+
JOIN "#{target_schema}"."#{table_name}" target ON (
|
94
|
+
target.external_id = stage.external_id
|
95
|
+
AND target.external_source = '#{external_source}'
|
96
|
+
AND target.deleted_at IS NULL
|
97
|
+
AND
|
98
|
+
(#{target_record_columns.join(', ')})
|
99
|
+
IS NOT DISTINCT FROM
|
100
|
+
(#{stage_record_columns.join(', ')})
|
101
|
+
)
|
102
|
+
WHERE stage_update.external_id = stage.external_id
|
77
103
|
SQL
|
78
104
|
end
|
79
105
|
|
data/lib/beetle_etl/version.rb
CHANGED
@@ -52,11 +52,11 @@ module BeetleETL
|
|
52
52
|
end
|
53
53
|
|
54
54
|
describe '#depenencies' do
|
55
|
-
it 'depends on Transform of the same table and
|
55
|
+
it 'depends on Transform of the same table and TableDiff of its dependees' do
|
56
56
|
expect(subject.dependencies).to eql(
|
57
57
|
[
|
58
|
-
'dependee_a:
|
59
|
-
'dependee_b:
|
58
|
+
'dependee_a: TableDiff',
|
59
|
+
'dependee_b: TableDiff',
|
60
60
|
'depender: Transform',
|
61
61
|
].to_set
|
62
62
|
)
|
@@ -18,6 +18,7 @@ module BeetleETL
|
|
18
18
|
|
19
19
|
before do
|
20
20
|
test_database.create_table(subject.stage_table_name.to_sym) do
|
21
|
+
Integer :id
|
21
22
|
String :external_id, size: 255
|
22
23
|
String :transition, size: 20
|
23
24
|
|
@@ -28,7 +29,7 @@ module BeetleETL
|
|
28
29
|
end
|
29
30
|
|
30
31
|
test_database.create_table(:example_table) do
|
31
|
-
|
32
|
+
primary_key :id
|
32
33
|
String :external_id, size: 255
|
33
34
|
String :external_source, size: 255
|
34
35
|
DateTime :deleted_at
|
@@ -47,7 +48,7 @@ module BeetleETL
|
|
47
48
|
|
48
49
|
describe '#run' do
|
49
50
|
it 'runs all transitions' do
|
50
|
-
%w(create update delete reinstate).each do |transition|
|
51
|
+
%w(create update delete reinstate keep).each do |transition|
|
51
52
|
expect(subject).to receive(:"transition_#{transition}")
|
52
53
|
end
|
53
54
|
|
@@ -59,11 +60,13 @@ module BeetleETL
|
|
59
60
|
it 'assigns CREATE to new records' do
|
60
61
|
|
61
62
|
insert_into(:example_table).values(
|
62
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
63
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
64
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
63
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
64
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
65
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
65
66
|
)
|
66
67
|
|
68
|
+
test_database.run "SELECT setval('public.example_table_id_seq', 99)"
|
69
|
+
|
67
70
|
insert_into(subject.stage_table_name.to_sym).values(
|
68
71
|
[ :external_id ] ,
|
69
72
|
[ 'created' ] ,
|
@@ -72,10 +75,10 @@ module BeetleETL
|
|
72
75
|
|
73
76
|
subject.transition_create
|
74
77
|
|
75
|
-
|
76
|
-
[ :external_id , :transition ] ,
|
77
|
-
[ 'created' , 'CREATE' ] ,
|
78
|
-
[ 'existing' , nil ] ,
|
78
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
79
|
+
[ :external_id , :id , :transition ] ,
|
80
|
+
[ 'created' , 100 , 'CREATE' ] ,
|
81
|
+
[ 'existing' , nil , nil ] ,
|
79
82
|
)
|
80
83
|
end
|
81
84
|
end
|
@@ -85,10 +88,10 @@ module BeetleETL
|
|
85
88
|
except externald_*_id columns and columns not contained in the stage table' do
|
86
89
|
|
87
90
|
insert_into(:example_table).values(
|
88
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
89
|
-
[ 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
90
|
-
[ 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
|
91
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
|
91
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
92
|
+
[ 1 , 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
93
|
+
[ 2 , 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
|
94
|
+
[ 3 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
|
92
95
|
)
|
93
96
|
|
94
97
|
insert_into(subject.stage_table_name.to_sym).values(
|
@@ -101,10 +104,10 @@ module BeetleETL
|
|
101
104
|
subject.transition_update
|
102
105
|
|
103
106
|
expect(subject.stage_table_name.to_sym).to have_values(
|
104
|
-
[ :external_id , :transition ] ,
|
105
|
-
[ 'existing_1' , 'UPDATE' ] ,
|
106
|
-
[ 'existing_2' , 'UPDATE' ] ,
|
107
|
-
[ 'deleted' , nil ] ,
|
107
|
+
[ :external_id , :id , :transition ] ,
|
108
|
+
[ 'existing_1' , 1 , 'UPDATE' ] ,
|
109
|
+
[ 'existing_2' , 2 , 'UPDATE' ] ,
|
110
|
+
[ 'deleted' , nil , nil ] ,
|
108
111
|
)
|
109
112
|
end
|
110
113
|
end
|
@@ -112,16 +115,16 @@ module BeetleETL
|
|
112
115
|
describe 'transition_delete' do
|
113
116
|
it 'creates records with DELETE that no loger exist in the stage table for the given run' do
|
114
117
|
insert_into(:example_table).values(
|
115
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
116
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
117
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
118
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
119
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
120
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
118
121
|
)
|
119
122
|
|
120
123
|
subject.transition_delete
|
121
124
|
|
122
125
|
expect(subject.stage_table_name.to_sym).to have_values(
|
123
|
-
[ :
|
124
|
-
[
|
126
|
+
[ :id , :transition ] ,
|
127
|
+
[ 1 , 'DELETE' ] ,
|
125
128
|
)
|
126
129
|
end
|
127
130
|
end
|
@@ -129,9 +132,9 @@ module BeetleETL
|
|
129
132
|
describe 'transition_reinstate' do
|
130
133
|
it 'assigns REINSTATE to previously deleted records' do
|
131
134
|
insert_into(:example_table).values(
|
132
|
-
[ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
133
|
-
[ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
134
|
-
[ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
135
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
136
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
137
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
135
138
|
)
|
136
139
|
|
137
140
|
insert_into(subject.stage_table_name.to_sym).values(
|
@@ -143,9 +146,34 @@ module BeetleETL
|
|
143
146
|
subject.transition_reinstate
|
144
147
|
|
145
148
|
expect(subject.stage_table_name.to_sym).to have_values(
|
146
|
-
[ :external_id , :transition ] ,
|
147
|
-
[ 'existing' , nil ] ,
|
148
|
-
[ 'deleted' , 'REINSTATE' ] ,
|
149
|
+
[ :external_id , :id , :transition ] ,
|
150
|
+
[ 'existing' , nil , nil ] ,
|
151
|
+
[ 'deleted' , 2 , 'REINSTATE' ] ,
|
152
|
+
)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
describe '#transition_keep' do
|
157
|
+
it 'assigns KEEP to unchanged records' do
|
158
|
+
|
159
|
+
insert_into(:example_table).values(
|
160
|
+
[ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
|
161
|
+
[ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
|
162
|
+
[ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
|
163
|
+
)
|
164
|
+
|
165
|
+
insert_into(subject.stage_table_name.to_sym).values(
|
166
|
+
[ :external_id , :payload , :foo_id ] ,
|
167
|
+
[ 'created' , nil , nil ] ,
|
168
|
+
[ 'existing' , 'existing content' , 1 ] ,
|
169
|
+
)
|
170
|
+
|
171
|
+
subject.transition_keep
|
172
|
+
|
173
|
+
expect(subject.stage_table_name.to_sym).to have_values(
|
174
|
+
[ :external_id , :id , :transition ] ,
|
175
|
+
[ 'created' , nil , nil ] ,
|
176
|
+
[ 'existing' , 1 , 'KEEP' ] ,
|
149
177
|
)
|
150
178
|
end
|
151
179
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: beetle_etl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luciano Maiwald
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -132,9 +132,10 @@ files:
|
|
132
132
|
- lib/beetle_etl/import.rb
|
133
133
|
- lib/beetle_etl/naming.rb
|
134
134
|
- lib/beetle_etl/reporter.rb
|
135
|
+
- lib/beetle_etl/step_runner/abstract_step_runner.rb
|
135
136
|
- lib/beetle_etl/step_runner/async_step_runner.rb
|
136
137
|
- lib/beetle_etl/step_runner/dependency_resolver.rb
|
137
|
-
- lib/beetle_etl/
|
138
|
+
- lib/beetle_etl/step_runner/sequential_step_runner.rb
|
138
139
|
- lib/beetle_etl/steps/create_stage.rb
|
139
140
|
- lib/beetle_etl/steps/drop_stage.rb
|
140
141
|
- lib/beetle_etl/steps/load.rb
|
@@ -158,7 +159,6 @@ files:
|
|
158
159
|
- spec/feature/feature_spec.rb
|
159
160
|
- spec/reporter_spec.rb
|
160
161
|
- spec/spec_helper.rb
|
161
|
-
- spec/steps/assign_ids_spec.rb
|
162
162
|
- spec/steps/create_stage_spec.rb
|
163
163
|
- spec/steps/load_spec.rb
|
164
164
|
- spec/steps/map_relations_spec.rb
|
@@ -208,7 +208,6 @@ test_files:
|
|
208
208
|
- spec/feature/feature_spec.rb
|
209
209
|
- spec/reporter_spec.rb
|
210
210
|
- spec/spec_helper.rb
|
211
|
-
- spec/steps/assign_ids_spec.rb
|
212
211
|
- spec/steps/create_stage_spec.rb
|
213
212
|
- spec/steps/load_spec.rb
|
214
213
|
- spec/steps/map_relations_spec.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module BeetleETL
|
2
|
-
class AssignIds < Step
|
3
|
-
|
4
|
-
def dependencies
|
5
|
-
[TableDiff.step_name(table_name)].to_set
|
6
|
-
end
|
7
|
-
|
8
|
-
def run
|
9
|
-
database.execute <<-SQL
|
10
|
-
UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
|
11
|
-
SET id = COALESCE(target.id, NEXTVAL('#{target_schema}.#{table_name}_id_seq'))
|
12
|
-
FROM "#{target_schema}"."#{stage_table_name}" stage
|
13
|
-
LEFT OUTER JOIN "#{target_schema}"."#{table_name}" target
|
14
|
-
on (
|
15
|
-
stage.external_id = target.external_id
|
16
|
-
AND target.external_source = '#{external_source}'
|
17
|
-
)
|
18
|
-
WHERE stage_update.external_id = stage.external_id
|
19
|
-
SQL
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|
@@ -1,74 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
module BeetleETL
|
4
|
-
describe AssignIds do
|
5
|
-
|
6
|
-
let(:external_source) { 'my_source' }
|
7
|
-
let(:another_source) { 'another_source' }
|
8
|
-
|
9
|
-
let(:config) do
|
10
|
-
OpenStruct.new({
|
11
|
-
stage_schema: 'stage',
|
12
|
-
target_schema: 'public',
|
13
|
-
external_source: external_source,
|
14
|
-
database: test_database,
|
15
|
-
})
|
16
|
-
end
|
17
|
-
|
18
|
-
subject { AssignIds.new(config, :example_table) }
|
19
|
-
|
20
|
-
describe '#dependencies' do
|
21
|
-
it 'depends on TableDiff of the same table' do
|
22
|
-
expect(subject.dependencies).to eql(['example_table: TableDiff'].to_set)
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
describe '#run' do
|
27
|
-
before do
|
28
|
-
test_database.create_table(subject.stage_table_name.to_sym) do
|
29
|
-
Integer :id
|
30
|
-
String :external_id, size: 255
|
31
|
-
String :transition, size: 255
|
32
|
-
end
|
33
|
-
|
34
|
-
test_database.create_table(:example_table) do
|
35
|
-
primary_key :id
|
36
|
-
String :external_id, size: 255
|
37
|
-
String :external_source, size: 255
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'assigns ids for' do
|
42
|
-
# - generated ones for new records
|
43
|
-
# - mapped ones by external_id for existing records
|
44
|
-
|
45
|
-
insert_into(:example_table).values(
|
46
|
-
[ :external_id , :external_source ] ,
|
47
|
-
[ 'a' , external_source ] ,
|
48
|
-
[ 'b' , external_source ] ,
|
49
|
-
[ 'c' , external_source ] ,
|
50
|
-
[ 'd' , another_source ] ,
|
51
|
-
)
|
52
|
-
|
53
|
-
insert_into(subject.stage_table_name.to_sym).values(
|
54
|
-
[ :external_id ] ,
|
55
|
-
[ 'new value' ] ,
|
56
|
-
[ 'a' ] ,
|
57
|
-
[ 'b' ] ,
|
58
|
-
[ 'c' ] ,
|
59
|
-
)
|
60
|
-
|
61
|
-
subject.run
|
62
|
-
|
63
|
-
expect(subject.stage_table_name.to_sym).to have_values(
|
64
|
-
[ :id , :external_id ] ,
|
65
|
-
[ 1 , 'a' ] ,
|
66
|
-
[ 2 , 'b' ] ,
|
67
|
-
[ 3 , 'c' ] ,
|
68
|
-
[ 5 , 'new value' ] ,
|
69
|
-
)
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|