beetle_etl 2.0.1 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cd21d7530444cef54966138ce38a5ec6ca0f08b
4
- data.tar.gz: 277b54c1b957edeeb78fab7434852215f571d486
3
+ metadata.gz: 114b9229ba18051ff3bb5d606af695df0b58649c
4
+ data.tar.gz: 869afc2743aa1c5a4028bc4d48b1fa7416c4bb43
5
5
  SHA512:
6
- metadata.gz: 659b6e6cde8208578833b7fd4a870f15fd706300389aef4c2c8c93786a17954bddca324398f1ca2e32bba12a96ed89f2e26b237df78efe99f33e1cbb0d9cd476
7
- data.tar.gz: cb8d7842912f24a4e82d0198f4508757f32c5051183face61e5359bfbe36a5ce353127e085992a33069d82e6afd6bc1d44a68c79d832d38fc079eca819d8726a
6
+ metadata.gz: 1c70a1b62aa9689174177d4021eb1499ad6afdbf688338affc682d99c21633f81a4f61a64b5db3f487b7cdeddef9511cf430cef743f3eb73eedcc1d8b11562f1
7
+ data.tar.gz: d61a1b35342ec7a4cf4d78ffee9efaed6b4ce7f3c44954a0e77b1e03c81e10f4d7edc9f26acaf5635a128f1e008302a59ab81121273003ee556899d1d42bf904
@@ -19,11 +19,10 @@ module BeetleETL
19
19
  require 'beetle_etl/steps/transform'
20
20
  require 'beetle_etl/steps/map_relations'
21
21
  require 'beetle_etl/steps/table_diff'
22
- require 'beetle_etl/steps/assign_ids'
23
22
  require 'beetle_etl/steps/load'
24
23
  require 'beetle_etl/steps/drop_stage'
25
24
 
26
- require 'beetle_etl/step_runner/dependency_resolver'
25
+ require 'beetle_etl/step_runner/sequential_step_runner'
27
26
  require 'beetle_etl/step_runner/async_step_runner'
28
27
 
29
28
  require 'beetle_etl/import'
@@ -5,57 +5,61 @@ module BeetleETL
5
5
 
6
6
  def initialize(config)
7
7
  @config = config
8
- end
9
-
10
- def run
11
- setup
12
- import
13
- ensure
14
- cleanup
15
- end
8
+ @report = {}
16
9
 
17
- def setup
18
- transformations.each do |t|
19
- CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
20
- end
10
+ @transformations ||= TransformationLoader.new(@config).load
21
11
  end
22
12
 
23
- def import
24
- data_report = AsyncStepRunner.new(@config, data_steps).run
25
- load_report = @config.database.transaction do
26
- AsyncStepRunner.new(@config, load_steps).run
13
+ def run
14
+ begin
15
+ run_setup
16
+ run_transform
17
+ run_load
18
+ ensure
19
+ run_cleanup
27
20
  end
28
21
 
29
- data_report.deep_merge load_report
22
+ @report
30
23
  end
31
24
 
32
- def cleanup
33
- transformations.each do |t|
34
- DropStage.new(@config, t.table_name).run
35
- end
36
- end
25
+ def run_setup
26
+ steps = @transformations.map { |t|
27
+ CreateStage.new(@config, t.table_name, t.relations, t.column_names)
28
+ }
37
29
 
38
- private
30
+ @report.deep_merge SequentialStepRunner.new(@config, steps).run
31
+ end
39
32
 
40
- def data_steps
41
- transformations.flat_map do |t|
33
+ def run_transform
34
+ steps = @transformations.flat_map { |t|
42
35
  [
43
36
  Transform.new(@config, t.table_name, t.dependencies, t.query),
44
37
  MapRelations.new(@config, t.table_name, t.relations),
45
- TableDiff.new(@config, t.table_name),
46
- AssignIds.new(@config, t.table_name),
38
+ TableDiff.new(@config, t.table_name)
47
39
  ]
48
- end
40
+ }
41
+
42
+ @report.deep_merge AsyncStepRunner.new(@config, steps).run
49
43
  end
50
44
 
51
- def load_steps
52
- transformations.map do |t|
45
+ def run_load
46
+ steps = @transformations.map { |t|
53
47
  Load.new(@config, t.table_name, t.relations)
48
+ }
49
+
50
+ result = @config.database.transaction do
51
+ SequentialStepRunner.new(@config, steps).run
54
52
  end
53
+
54
+ @report.deep_merge result
55
55
  end
56
56
 
57
- def transformations
58
- @transformations ||= TransformationLoader.new(@config).load
57
+ def run_cleanup
58
+ steps = @transformations.map { |t|
59
+ DropStage.new(@config, t.table_name)
60
+ }
61
+
62
+ @report.deep_merge SequentialStepRunner.new(@config, steps).run
59
63
  end
60
64
 
61
65
  end
@@ -0,0 +1,47 @@
1
+ require 'active_support/core_ext/hash/slice'
2
+
3
+ module BeetleETL
4
+ class AbstractStepRunner
5
+
6
+ def initialize(config, steps)
7
+ @config = config
8
+ @steps = steps
9
+ end
10
+
11
+ def run
12
+ raise NotImplementedError
13
+ end
14
+
15
+ private
16
+
17
+ def run_step(step)
18
+ @config.logger.info("started step #{step.name}")
19
+
20
+ started_at = Time.now
21
+ step.run
22
+ finished_at = Time.now
23
+
24
+ duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
25
+ @config.logger.info("finished #{step.name} in #{duration}")
26
+
27
+ {
28
+ step_name: step.name,
29
+ table_name: step.table_name,
30
+ started_at: started_at,
31
+ finished_at: finished_at
32
+ }
33
+ rescue => e
34
+ @config.logger.fatal(e.message)
35
+ raise e
36
+ end
37
+
38
+ def add_result!(results, step_data)
39
+ table_name = step_data[:table_name]
40
+ step_name = step_data[:step_name]
41
+
42
+ results[table_name] ||= {}
43
+ results[table_name][step_name] = step_data.slice(:started_at, :finished_at)
44
+ end
45
+
46
+ end
47
+ end
@@ -1,15 +1,17 @@
1
+ require_relative './abstract_step_runner'
2
+ require_relative './dependency_resolver'
3
+
1
4
  module BeetleETL
2
- class AsyncStepRunner
5
+ class AsyncStepRunner < AbstractStepRunner
3
6
 
4
7
  def initialize(config, steps)
5
- @config = config
8
+ super(config, steps)
6
9
 
7
10
  @dependency_resolver = DependencyResolver.new(steps)
8
- @steps = steps
9
11
 
10
12
  @queue = Queue.new
11
13
  @completed = Set.new
12
- @running = Set.new
14
+ @started = Set.new
13
15
  end
14
16
 
15
17
  def run
@@ -18,17 +20,13 @@ module BeetleETL
18
20
  until all_steps_complete?
19
21
  runnables.each do |step|
20
22
  run_step_async(step)
21
- mark_step_running(step.name)
23
+ @started.add(step.name)
22
24
  end
23
25
 
24
- table_name, step_name, step_data = @queue.pop
25
-
26
- unless results.has_key?(table_name)
27
- results[table_name] = {}
28
- end
26
+ step_data = @queue.pop
27
+ add_result!(results, step_data)
29
28
 
30
- results[table_name][step_name] = step_data
31
- mark_step_completed(step_name)
29
+ @completed.add(step_data[:step_name])
32
30
  end
33
31
 
34
32
  results
@@ -36,49 +34,19 @@ module BeetleETL
36
34
 
37
35
  private
38
36
 
39
- attr_reader :running, :completed
40
-
41
37
  def run_step_async(step)
42
38
  Thread.new do
43
- begin
44
- @config.logger.info("started step #{step.name}")
45
-
46
- started_at = Time.now
47
- step.run
48
- finished_at = Time.now
49
-
50
- duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
51
- @config.logger.info("finished #{step.name} in #{duration}")
52
-
53
- @queue.push [
54
- step.table_name,
55
- step.name,
56
- { started_at: started_at, finished_at: finished_at }
57
- ]
58
-
59
- rescue => e
60
- @config.logger.fatal(e.message)
61
- raise e
62
- end
39
+ @queue.push run_step(step)
63
40
  end.abort_on_exception = true
64
41
  end
65
42
 
66
- def mark_step_running(step_name)
67
- running.add(step_name)
68
- end
69
-
70
- def mark_step_completed(step_name)
71
- runnables.delete(step_name)
72
- completed.add(step_name)
73
- end
74
-
75
43
  def runnables
76
- resolvables = @dependency_resolver.resolvables(completed)
77
- resolvables.reject { |r| running.include? r.name }
44
+ resolvables = @dependency_resolver.resolvables(@completed)
45
+ resolvables.reject { |r| @started.include? r.name }
78
46
  end
79
47
 
80
48
  def all_steps_complete?
81
- @steps.map(&:name).to_set == completed.to_set
49
+ @steps.map(&:name).to_set == @completed.to_set
82
50
  end
83
51
 
84
52
  end
@@ -0,0 +1,13 @@
1
+ require_relative './abstract_step_runner'
2
+
3
+ module BeetleETL
4
+ class SequentialStepRunner < AbstractStepRunner
5
+
6
+ def run
7
+ @steps.reduce({}) do |results, step|
8
+ add_result!(results, run_step(step))
9
+ end
10
+ end
11
+
12
+ end
13
+ end
@@ -8,7 +8,7 @@ module BeetleETL
8
8
 
9
9
  def dependencies
10
10
  result = Set.new([Transform.step_name(table_name)])
11
- result.merge @relations.values.map { |d| AssignIds.step_name(d) }
11
+ result.merge @relations.values.map { |d| TableDiff.step_name(d) }
12
12
  end
13
13
 
14
14
  def run
@@ -11,7 +11,7 @@ module BeetleETL
11
11
  end
12
12
 
13
13
  def run
14
- %w(create update delete reinstate).each do |transition|
14
+ %w(create update delete reinstate keep).each do |transition|
15
15
  public_send(:"transition_#{transition}")
16
16
  end
17
17
  end
@@ -19,7 +19,9 @@ module BeetleETL
19
19
  def transition_create
20
20
  database.execute <<-SQL
21
21
  UPDATE "#{target_schema}"."#{stage_table_name}" stage
22
- SET transition = 'CREATE'
22
+ SET
23
+ transition = 'CREATE',
24
+ id = NEXTVAL('#{target_schema}.#{table_name}_id_seq')
23
25
  WHERE NOT EXISTS (
24
26
  SELECT 1
25
27
  FROM "#{target_schema}"."#{table_name}" target
@@ -31,12 +33,13 @@ module BeetleETL
31
33
 
32
34
  def transition_update
33
35
  database.execute <<-SQL
34
- UPDATE "#{target_schema}"."#{stage_table_name}" stage
35
- SET transition = 'UPDATE'
36
- WHERE EXISTS (
37
- SELECT 1
38
- FROM "#{target_schema}"."#{table_name}" target
39
- WHERE target.external_id = stage.external_id
36
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
37
+ SET
38
+ transition = 'UPDATE',
39
+ id = target.id
40
+ FROM "#{target_schema}"."#{stage_table_name}" stage
41
+ JOIN "#{target_schema}"."#{table_name}" target ON (
42
+ target.external_id = stage.external_id
40
43
  AND target.external_source = '#{external_source}'
41
44
  AND target.deleted_at IS NULL
42
45
  AND
@@ -44,16 +47,17 @@ module BeetleETL
44
47
  IS DISTINCT FROM
45
48
  (#{stage_record_columns.join(', ')})
46
49
  )
50
+ WHERE stage_update.external_id = stage.external_id
47
51
  SQL
48
52
  end
49
53
 
50
54
  def transition_delete
51
55
  database.execute <<-SQL
52
56
  INSERT INTO "#{target_schema}"."#{stage_table_name}"
53
- (external_id, transition)
57
+ (transition, id)
54
58
  SELECT
55
- target.external_id,
56
- 'DELETE'
59
+ 'DELETE',
60
+ target.id
57
61
  FROM "#{target_schema}"."#{table_name}" target
58
62
  LEFT OUTER JOIN "#{target_schema}"."#{stage_table_name}" stage
59
63
  ON (stage.external_id = target.external_id)
@@ -65,15 +69,37 @@ module BeetleETL
65
69
 
66
70
  def transition_reinstate
67
71
  database.execute <<-SQL
68
- UPDATE "#{target_schema}"."#{stage_table_name}" stage
69
- SET transition = 'REINSTATE'
70
- WHERE EXISTS (
71
- SELECT 1
72
- FROM "#{target_schema}"."#{table_name}" target
73
- WHERE target.external_id = stage.external_id
72
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
73
+ SET
74
+ transition = 'REINSTATE',
75
+ id = target.id
76
+ FROM "#{target_schema}"."#{stage_table_name}" stage
77
+ JOIN "#{target_schema}"."#{table_name}" target ON (
78
+ target.external_id = stage.external_id
74
79
  AND target.external_source = '#{external_source}'
75
80
  AND target.deleted_at IS NOT NULL
76
81
  )
82
+ WHERE stage_update.external_id = stage.external_id
83
+ SQL
84
+ end
85
+
86
+ def transition_keep
87
+ database.execute <<-SQL
88
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
89
+ SET
90
+ transition = 'KEEP',
91
+ id = target.id
92
+ FROM "#{target_schema}"."#{stage_table_name}" stage
93
+ JOIN "#{target_schema}"."#{table_name}" target ON (
94
+ target.external_id = stage.external_id
95
+ AND target.external_source = '#{external_source}'
96
+ AND target.deleted_at IS NULL
97
+ AND
98
+ (#{target_record_columns.join(', ')})
99
+ IS NOT DISTINCT FROM
100
+ (#{stage_record_columns.join(', ')})
101
+ )
102
+ WHERE stage_update.external_id = stage.external_id
77
103
  SQL
78
104
  end
79
105
 
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.3"
3
3
  end
@@ -52,11 +52,11 @@ module BeetleETL
52
52
  end
53
53
 
54
54
  describe '#depenencies' do
55
- it 'depends on Transform of the same table and AssignIds of its dependees' do
55
+ it 'depends on Transform of the same table and TableDiff of its dependees' do
56
56
  expect(subject.dependencies).to eql(
57
57
  [
58
- 'dependee_a: AssignIds',
59
- 'dependee_b: AssignIds',
58
+ 'dependee_a: TableDiff',
59
+ 'dependee_b: TableDiff',
60
60
  'depender: Transform',
61
61
  ].to_set
62
62
  )
@@ -18,6 +18,7 @@ module BeetleETL
18
18
 
19
19
  before do
20
20
  test_database.create_table(subject.stage_table_name.to_sym) do
21
+ Integer :id
21
22
  String :external_id, size: 255
22
23
  String :transition, size: 20
23
24
 
@@ -28,7 +29,7 @@ module BeetleETL
28
29
  end
29
30
 
30
31
  test_database.create_table(:example_table) do
31
- Integer :id
32
+ primary_key :id
32
33
  String :external_id, size: 255
33
34
  String :external_source, size: 255
34
35
  DateTime :deleted_at
@@ -47,7 +48,7 @@ module BeetleETL
47
48
 
48
49
  describe '#run' do
49
50
  it 'runs all transitions' do
50
- %w(create update delete reinstate).each do |transition|
51
+ %w(create update delete reinstate keep).each do |transition|
51
52
  expect(subject).to receive(:"transition_#{transition}")
52
53
  end
53
54
 
@@ -59,11 +60,13 @@ module BeetleETL
59
60
  it 'assigns CREATE to new records' do
60
61
 
61
62
  insert_into(:example_table).values(
62
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
63
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
64
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
63
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
64
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
65
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
65
66
  )
66
67
 
68
+ test_database.run "SELECT setval('public.example_table_id_seq', 99)"
69
+
67
70
  insert_into(subject.stage_table_name.to_sym).values(
68
71
  [ :external_id ] ,
69
72
  [ 'created' ] ,
@@ -72,10 +75,10 @@ module BeetleETL
72
75
 
73
76
  subject.transition_create
74
77
 
75
- insert_into(subject.stage_table_name.to_sym).values(
76
- [ :external_id , :transition ] ,
77
- [ 'created' , 'CREATE' ] ,
78
- [ 'existing' , nil ] ,
78
+ expect(subject.stage_table_name.to_sym).to have_values(
79
+ [ :external_id , :id , :transition ] ,
80
+ [ 'created' , 100 , 'CREATE' ] ,
81
+ [ 'existing' , nil , nil ] ,
79
82
  )
80
83
  end
81
84
  end
@@ -85,10 +88,10 @@ module BeetleETL
85
88
  except externald_*_id columns and columns not contained in the stage table' do
86
89
 
87
90
  insert_into(:example_table).values(
88
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
89
- [ 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
90
- [ 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
91
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
91
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
92
+ [ 1 , 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
93
+ [ 2 , 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
94
+ [ 3 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
92
95
  )
93
96
 
94
97
  insert_into(subject.stage_table_name.to_sym).values(
@@ -101,10 +104,10 @@ module BeetleETL
101
104
  subject.transition_update
102
105
 
103
106
  expect(subject.stage_table_name.to_sym).to have_values(
104
- [ :external_id , :transition ] ,
105
- [ 'existing_1' , 'UPDATE' ] ,
106
- [ 'existing_2' , 'UPDATE' ] ,
107
- [ 'deleted' , nil ] ,
107
+ [ :external_id , :id , :transition ] ,
108
+ [ 'existing_1' , 1 , 'UPDATE' ] ,
109
+ [ 'existing_2' , 2 , 'UPDATE' ] ,
110
+ [ 'deleted' , nil , nil ] ,
108
111
  )
109
112
  end
110
113
  end
@@ -112,16 +115,16 @@ module BeetleETL
112
115
  describe 'transition_delete' do
113
116
  it 'creates records with DELETE that no loger exist in the stage table for the given run' do
114
117
  insert_into(:example_table).values(
115
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
116
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
117
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
118
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
119
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
120
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
118
121
  )
119
122
 
120
123
  subject.transition_delete
121
124
 
122
125
  expect(subject.stage_table_name.to_sym).to have_values(
123
- [ :external_id , :transition ] ,
124
- [ 'existing' , 'DELETE' ] ,
126
+ [ :id , :transition ] ,
127
+ [ 1 , 'DELETE' ] ,
125
128
  )
126
129
  end
127
130
  end
@@ -129,9 +132,9 @@ module BeetleETL
129
132
  describe 'transition_reinstate' do
130
133
  it 'assigns REINSTATE to previously deleted records' do
131
134
  insert_into(:example_table).values(
132
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
133
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
134
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
135
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
136
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
137
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
135
138
  )
136
139
 
137
140
  insert_into(subject.stage_table_name.to_sym).values(
@@ -143,9 +146,34 @@ module BeetleETL
143
146
  subject.transition_reinstate
144
147
 
145
148
  expect(subject.stage_table_name.to_sym).to have_values(
146
- [ :external_id , :transition ] ,
147
- [ 'existing' , nil ] ,
148
- [ 'deleted' , 'REINSTATE' ] ,
149
+ [ :external_id , :id , :transition ] ,
150
+ [ 'existing' , nil , nil ] ,
151
+ [ 'deleted' , 2 , 'REINSTATE' ] ,
152
+ )
153
+ end
154
+ end
155
+
156
+ describe '#transition_keep' do
157
+ it 'assigns KEEP to unchanged records' do
158
+
159
+ insert_into(:example_table).values(
160
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
161
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
162
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
163
+ )
164
+
165
+ insert_into(subject.stage_table_name.to_sym).values(
166
+ [ :external_id , :payload , :foo_id ] ,
167
+ [ 'created' , nil , nil ] ,
168
+ [ 'existing' , 'existing content' , 1 ] ,
169
+ )
170
+
171
+ subject.transition_keep
172
+
173
+ expect(subject.stage_table_name.to_sym).to have_values(
174
+ [ :external_id , :id , :transition ] ,
175
+ [ 'created' , nil , nil ] ,
176
+ [ 'existing' , 1 , 'KEEP' ] ,
149
177
  )
150
178
  end
151
179
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-24 00:00:00.000000000 Z
11
+ date: 2017-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -132,9 +132,10 @@ files:
132
132
  - lib/beetle_etl/import.rb
133
133
  - lib/beetle_etl/naming.rb
134
134
  - lib/beetle_etl/reporter.rb
135
+ - lib/beetle_etl/step_runner/abstract_step_runner.rb
135
136
  - lib/beetle_etl/step_runner/async_step_runner.rb
136
137
  - lib/beetle_etl/step_runner/dependency_resolver.rb
137
- - lib/beetle_etl/steps/assign_ids.rb
138
+ - lib/beetle_etl/step_runner/sequential_step_runner.rb
138
139
  - lib/beetle_etl/steps/create_stage.rb
139
140
  - lib/beetle_etl/steps/drop_stage.rb
140
141
  - lib/beetle_etl/steps/load.rb
@@ -158,7 +159,6 @@ files:
158
159
  - spec/feature/feature_spec.rb
159
160
  - spec/reporter_spec.rb
160
161
  - spec/spec_helper.rb
161
- - spec/steps/assign_ids_spec.rb
162
162
  - spec/steps/create_stage_spec.rb
163
163
  - spec/steps/load_spec.rb
164
164
  - spec/steps/map_relations_spec.rb
@@ -208,7 +208,6 @@ test_files:
208
208
  - spec/feature/feature_spec.rb
209
209
  - spec/reporter_spec.rb
210
210
  - spec/spec_helper.rb
211
- - spec/steps/assign_ids_spec.rb
212
211
  - spec/steps/create_stage_spec.rb
213
212
  - spec/steps/load_spec.rb
214
213
  - spec/steps/map_relations_spec.rb
@@ -1,23 +0,0 @@
1
- module BeetleETL
2
- class AssignIds < Step
3
-
4
- def dependencies
5
- [TableDiff.step_name(table_name)].to_set
6
- end
7
-
8
- def run
9
- database.execute <<-SQL
10
- UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
11
- SET id = COALESCE(target.id, NEXTVAL('#{target_schema}.#{table_name}_id_seq'))
12
- FROM "#{target_schema}"."#{stage_table_name}" stage
13
- LEFT OUTER JOIN "#{target_schema}"."#{table_name}" target
14
- on (
15
- stage.external_id = target.external_id
16
- AND target.external_source = '#{external_source}'
17
- )
18
- WHERE stage_update.external_id = stage.external_id
19
- SQL
20
- end
21
-
22
- end
23
- end
@@ -1,74 +0,0 @@
1
- require 'spec_helper'
2
-
3
- module BeetleETL
4
- describe AssignIds do
5
-
6
- let(:external_source) { 'my_source' }
7
- let(:another_source) { 'another_source' }
8
-
9
- let(:config) do
10
- OpenStruct.new({
11
- stage_schema: 'stage',
12
- target_schema: 'public',
13
- external_source: external_source,
14
- database: test_database,
15
- })
16
- end
17
-
18
- subject { AssignIds.new(config, :example_table) }
19
-
20
- describe '#dependencies' do
21
- it 'depends on TableDiff of the same table' do
22
- expect(subject.dependencies).to eql(['example_table: TableDiff'].to_set)
23
- end
24
- end
25
-
26
- describe '#run' do
27
- before do
28
- test_database.create_table(subject.stage_table_name.to_sym) do
29
- Integer :id
30
- String :external_id, size: 255
31
- String :transition, size: 255
32
- end
33
-
34
- test_database.create_table(:example_table) do
35
- primary_key :id
36
- String :external_id, size: 255
37
- String :external_source, size: 255
38
- end
39
- end
40
-
41
- it 'assigns ids for' do
42
- # - generated ones for new records
43
- # - mapped ones by external_id for existing records
44
-
45
- insert_into(:example_table).values(
46
- [ :external_id , :external_source ] ,
47
- [ 'a' , external_source ] ,
48
- [ 'b' , external_source ] ,
49
- [ 'c' , external_source ] ,
50
- [ 'd' , another_source ] ,
51
- )
52
-
53
- insert_into(subject.stage_table_name.to_sym).values(
54
- [ :external_id ] ,
55
- [ 'new value' ] ,
56
- [ 'a' ] ,
57
- [ 'b' ] ,
58
- [ 'c' ] ,
59
- )
60
-
61
- subject.run
62
-
63
- expect(subject.stage_table_name.to_sym).to have_values(
64
- [ :id , :external_id ] ,
65
- [ 1 , 'a' ] ,
66
- [ 2 , 'b' ] ,
67
- [ 3 , 'c' ] ,
68
- [ 5 , 'new value' ] ,
69
- )
70
- end
71
- end
72
-
73
- end
74
- end