beetle_etl 2.0.1 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2cd21d7530444cef54966138ce38a5ec6ca0f08b
4
- data.tar.gz: 277b54c1b957edeeb78fab7434852215f571d486
3
+ metadata.gz: 114b9229ba18051ff3bb5d606af695df0b58649c
4
+ data.tar.gz: 869afc2743aa1c5a4028bc4d48b1fa7416c4bb43
5
5
  SHA512:
6
- metadata.gz: 659b6e6cde8208578833b7fd4a870f15fd706300389aef4c2c8c93786a17954bddca324398f1ca2e32bba12a96ed89f2e26b237df78efe99f33e1cbb0d9cd476
7
- data.tar.gz: cb8d7842912f24a4e82d0198f4508757f32c5051183face61e5359bfbe36a5ce353127e085992a33069d82e6afd6bc1d44a68c79d832d38fc079eca819d8726a
6
+ metadata.gz: 1c70a1b62aa9689174177d4021eb1499ad6afdbf688338affc682d99c21633f81a4f61a64b5db3f487b7cdeddef9511cf430cef743f3eb73eedcc1d8b11562f1
7
+ data.tar.gz: d61a1b35342ec7a4cf4d78ffee9efaed6b4ce7f3c44954a0e77b1e03c81e10f4d7edc9f26acaf5635a128f1e008302a59ab81121273003ee556899d1d42bf904
@@ -19,11 +19,10 @@ module BeetleETL
19
19
  require 'beetle_etl/steps/transform'
20
20
  require 'beetle_etl/steps/map_relations'
21
21
  require 'beetle_etl/steps/table_diff'
22
- require 'beetle_etl/steps/assign_ids'
23
22
  require 'beetle_etl/steps/load'
24
23
  require 'beetle_etl/steps/drop_stage'
25
24
 
26
- require 'beetle_etl/step_runner/dependency_resolver'
25
+ require 'beetle_etl/step_runner/sequential_step_runner'
27
26
  require 'beetle_etl/step_runner/async_step_runner'
28
27
 
29
28
  require 'beetle_etl/import'
@@ -5,57 +5,61 @@ module BeetleETL
5
5
 
6
6
  def initialize(config)
7
7
  @config = config
8
- end
9
-
10
- def run
11
- setup
12
- import
13
- ensure
14
- cleanup
15
- end
8
+ @report = {}
16
9
 
17
- def setup
18
- transformations.each do |t|
19
- CreateStage.new(@config, t.table_name, t.relations, t.column_names).run
20
- end
10
+ @transformations ||= TransformationLoader.new(@config).load
21
11
  end
22
12
 
23
- def import
24
- data_report = AsyncStepRunner.new(@config, data_steps).run
25
- load_report = @config.database.transaction do
26
- AsyncStepRunner.new(@config, load_steps).run
13
+ def run
14
+ begin
15
+ run_setup
16
+ run_transform
17
+ run_load
18
+ ensure
19
+ run_cleanup
27
20
  end
28
21
 
29
- data_report.deep_merge load_report
22
+ @report
30
23
  end
31
24
 
32
- def cleanup
33
- transformations.each do |t|
34
- DropStage.new(@config, t.table_name).run
35
- end
36
- end
25
+ def run_setup
26
+ steps = @transformations.map { |t|
27
+ CreateStage.new(@config, t.table_name, t.relations, t.column_names)
28
+ }
37
29
 
38
- private
30
+ @report.deep_merge SequentialStepRunner.new(@config, steps).run
31
+ end
39
32
 
40
- def data_steps
41
- transformations.flat_map do |t|
33
+ def run_transform
34
+ steps = @transformations.flat_map { |t|
42
35
  [
43
36
  Transform.new(@config, t.table_name, t.dependencies, t.query),
44
37
  MapRelations.new(@config, t.table_name, t.relations),
45
- TableDiff.new(@config, t.table_name),
46
- AssignIds.new(@config, t.table_name),
38
+ TableDiff.new(@config, t.table_name)
47
39
  ]
48
- end
40
+ }
41
+
42
+ @report.deep_merge AsyncStepRunner.new(@config, steps).run
49
43
  end
50
44
 
51
- def load_steps
52
- transformations.map do |t|
45
+ def run_load
46
+ steps = @transformations.map { |t|
53
47
  Load.new(@config, t.table_name, t.relations)
48
+ }
49
+
50
+ result = @config.database.transaction do
51
+ SequentialStepRunner.new(@config, steps).run
54
52
  end
53
+
54
+ @report.deep_merge result
55
55
  end
56
56
 
57
- def transformations
58
- @transformations ||= TransformationLoader.new(@config).load
57
+ def run_cleanup
58
+ steps = @transformations.map { |t|
59
+ DropStage.new(@config, t.table_name)
60
+ }
61
+
62
+ @report.deep_merge SequentialStepRunner.new(@config, steps).run
59
63
  end
60
64
 
61
65
  end
@@ -0,0 +1,47 @@
1
+ require 'active_support/core_ext/hash/slice'
2
+
3
+ module BeetleETL
4
+ class AbstractStepRunner
5
+
6
+ def initialize(config, steps)
7
+ @config = config
8
+ @steps = steps
9
+ end
10
+
11
+ def run
12
+ raise NotImplementedError
13
+ end
14
+
15
+ private
16
+
17
+ def run_step(step)
18
+ @config.logger.info("started step #{step.name}")
19
+
20
+ started_at = Time.now
21
+ step.run
22
+ finished_at = Time.now
23
+
24
+ duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
25
+ @config.logger.info("finished #{step.name} in #{duration}")
26
+
27
+ {
28
+ step_name: step.name,
29
+ table_name: step.table_name,
30
+ started_at: started_at,
31
+ finished_at: finished_at
32
+ }
33
+ rescue => e
34
+ @config.logger.fatal(e.message)
35
+ raise e
36
+ end
37
+
38
+ def add_result!(results, step_data)
39
+ table_name = step_data[:table_name]
40
+ step_name = step_data[:step_name]
41
+
42
+ results[table_name] ||= {}
43
+ results[table_name][step_name] = step_data.slice(:started_at, :finished_at)
44
+ end
45
+
46
+ end
47
+ end
@@ -1,15 +1,17 @@
1
+ require_relative './abstract_step_runner'
2
+ require_relative './dependency_resolver'
3
+
1
4
  module BeetleETL
2
- class AsyncStepRunner
5
+ class AsyncStepRunner < AbstractStepRunner
3
6
 
4
7
  def initialize(config, steps)
5
- @config = config
8
+ super(config, steps)
6
9
 
7
10
  @dependency_resolver = DependencyResolver.new(steps)
8
- @steps = steps
9
11
 
10
12
  @queue = Queue.new
11
13
  @completed = Set.new
12
- @running = Set.new
14
+ @started = Set.new
13
15
  end
14
16
 
15
17
  def run
@@ -18,17 +20,13 @@ module BeetleETL
18
20
  until all_steps_complete?
19
21
  runnables.each do |step|
20
22
  run_step_async(step)
21
- mark_step_running(step.name)
23
+ @started.add(step.name)
22
24
  end
23
25
 
24
- table_name, step_name, step_data = @queue.pop
25
-
26
- unless results.has_key?(table_name)
27
- results[table_name] = {}
28
- end
26
+ step_data = @queue.pop
27
+ add_result!(results, step_data)
29
28
 
30
- results[table_name][step_name] = step_data
31
- mark_step_completed(step_name)
29
+ @completed.add(step_data[:step_name])
32
30
  end
33
31
 
34
32
  results
@@ -36,49 +34,19 @@ module BeetleETL
36
34
 
37
35
  private
38
36
 
39
- attr_reader :running, :completed
40
-
41
37
  def run_step_async(step)
42
38
  Thread.new do
43
- begin
44
- @config.logger.info("started step #{step.name}")
45
-
46
- started_at = Time.now
47
- step.run
48
- finished_at = Time.now
49
-
50
- duration = Time.at(finished_at - started_at).utc.strftime("%H:%M:%S")
51
- @config.logger.info("finished #{step.name} in #{duration}")
52
-
53
- @queue.push [
54
- step.table_name,
55
- step.name,
56
- { started_at: started_at, finished_at: finished_at }
57
- ]
58
-
59
- rescue => e
60
- @config.logger.fatal(e.message)
61
- raise e
62
- end
39
+ @queue.push run_step(step)
63
40
  end.abort_on_exception = true
64
41
  end
65
42
 
66
- def mark_step_running(step_name)
67
- running.add(step_name)
68
- end
69
-
70
- def mark_step_completed(step_name)
71
- runnables.delete(step_name)
72
- completed.add(step_name)
73
- end
74
-
75
43
  def runnables
76
- resolvables = @dependency_resolver.resolvables(completed)
77
- resolvables.reject { |r| running.include? r.name }
44
+ resolvables = @dependency_resolver.resolvables(@completed)
45
+ resolvables.reject { |r| @started.include? r.name }
78
46
  end
79
47
 
80
48
  def all_steps_complete?
81
- @steps.map(&:name).to_set == completed.to_set
49
+ @steps.map(&:name).to_set == @completed.to_set
82
50
  end
83
51
 
84
52
  end
@@ -0,0 +1,13 @@
1
+ require_relative './abstract_step_runner'
2
+
3
+ module BeetleETL
4
+ class SequentialStepRunner < AbstractStepRunner
5
+
6
+ def run
7
+ @steps.reduce({}) do |results, step|
8
+ add_result!(results, run_step(step))
9
+ end
10
+ end
11
+
12
+ end
13
+ end
@@ -8,7 +8,7 @@ module BeetleETL
8
8
 
9
9
  def dependencies
10
10
  result = Set.new([Transform.step_name(table_name)])
11
- result.merge @relations.values.map { |d| AssignIds.step_name(d) }
11
+ result.merge @relations.values.map { |d| TableDiff.step_name(d) }
12
12
  end
13
13
 
14
14
  def run
@@ -11,7 +11,7 @@ module BeetleETL
11
11
  end
12
12
 
13
13
  def run
14
- %w(create update delete reinstate).each do |transition|
14
+ %w(create update delete reinstate keep).each do |transition|
15
15
  public_send(:"transition_#{transition}")
16
16
  end
17
17
  end
@@ -19,7 +19,9 @@ module BeetleETL
19
19
  def transition_create
20
20
  database.execute <<-SQL
21
21
  UPDATE "#{target_schema}"."#{stage_table_name}" stage
22
- SET transition = 'CREATE'
22
+ SET
23
+ transition = 'CREATE',
24
+ id = NEXTVAL('#{target_schema}.#{table_name}_id_seq')
23
25
  WHERE NOT EXISTS (
24
26
  SELECT 1
25
27
  FROM "#{target_schema}"."#{table_name}" target
@@ -31,12 +33,13 @@ module BeetleETL
31
33
 
32
34
  def transition_update
33
35
  database.execute <<-SQL
34
- UPDATE "#{target_schema}"."#{stage_table_name}" stage
35
- SET transition = 'UPDATE'
36
- WHERE EXISTS (
37
- SELECT 1
38
- FROM "#{target_schema}"."#{table_name}" target
39
- WHERE target.external_id = stage.external_id
36
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
37
+ SET
38
+ transition = 'UPDATE',
39
+ id = target.id
40
+ FROM "#{target_schema}"."#{stage_table_name}" stage
41
+ JOIN "#{target_schema}"."#{table_name}" target ON (
42
+ target.external_id = stage.external_id
40
43
  AND target.external_source = '#{external_source}'
41
44
  AND target.deleted_at IS NULL
42
45
  AND
@@ -44,16 +47,17 @@ module BeetleETL
44
47
  IS DISTINCT FROM
45
48
  (#{stage_record_columns.join(', ')})
46
49
  )
50
+ WHERE stage_update.external_id = stage.external_id
47
51
  SQL
48
52
  end
49
53
 
50
54
  def transition_delete
51
55
  database.execute <<-SQL
52
56
  INSERT INTO "#{target_schema}"."#{stage_table_name}"
53
- (external_id, transition)
57
+ (transition, id)
54
58
  SELECT
55
- target.external_id,
56
- 'DELETE'
59
+ 'DELETE',
60
+ target.id
57
61
  FROM "#{target_schema}"."#{table_name}" target
58
62
  LEFT OUTER JOIN "#{target_schema}"."#{stage_table_name}" stage
59
63
  ON (stage.external_id = target.external_id)
@@ -65,15 +69,37 @@ module BeetleETL
65
69
 
66
70
  def transition_reinstate
67
71
  database.execute <<-SQL
68
- UPDATE "#{target_schema}"."#{stage_table_name}" stage
69
- SET transition = 'REINSTATE'
70
- WHERE EXISTS (
71
- SELECT 1
72
- FROM "#{target_schema}"."#{table_name}" target
73
- WHERE target.external_id = stage.external_id
72
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
73
+ SET
74
+ transition = 'REINSTATE',
75
+ id = target.id
76
+ FROM "#{target_schema}"."#{stage_table_name}" stage
77
+ JOIN "#{target_schema}"."#{table_name}" target ON (
78
+ target.external_id = stage.external_id
74
79
  AND target.external_source = '#{external_source}'
75
80
  AND target.deleted_at IS NOT NULL
76
81
  )
82
+ WHERE stage_update.external_id = stage.external_id
83
+ SQL
84
+ end
85
+
86
+ def transition_keep
87
+ database.execute <<-SQL
88
+ UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
89
+ SET
90
+ transition = 'KEEP',
91
+ id = target.id
92
+ FROM "#{target_schema}"."#{stage_table_name}" stage
93
+ JOIN "#{target_schema}"."#{table_name}" target ON (
94
+ target.external_id = stage.external_id
95
+ AND target.external_source = '#{external_source}'
96
+ AND target.deleted_at IS NULL
97
+ AND
98
+ (#{target_record_columns.join(', ')})
99
+ IS NOT DISTINCT FROM
100
+ (#{stage_record_columns.join(', ')})
101
+ )
102
+ WHERE stage_update.external_id = stage.external_id
77
103
  SQL
78
104
  end
79
105
 
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "2.0.1"
2
+ VERSION = "2.0.3"
3
3
  end
@@ -52,11 +52,11 @@ module BeetleETL
52
52
  end
53
53
 
54
54
  describe '#depenencies' do
55
- it 'depends on Transform of the same table and AssignIds of its dependees' do
55
+ it 'depends on Transform of the same table and TableDiff of its dependees' do
56
56
  expect(subject.dependencies).to eql(
57
57
  [
58
- 'dependee_a: AssignIds',
59
- 'dependee_b: AssignIds',
58
+ 'dependee_a: TableDiff',
59
+ 'dependee_b: TableDiff',
60
60
  'depender: Transform',
61
61
  ].to_set
62
62
  )
@@ -18,6 +18,7 @@ module BeetleETL
18
18
 
19
19
  before do
20
20
  test_database.create_table(subject.stage_table_name.to_sym) do
21
+ Integer :id
21
22
  String :external_id, size: 255
22
23
  String :transition, size: 20
23
24
 
@@ -28,7 +29,7 @@ module BeetleETL
28
29
  end
29
30
 
30
31
  test_database.create_table(:example_table) do
31
- Integer :id
32
+ primary_key :id
32
33
  String :external_id, size: 255
33
34
  String :external_source, size: 255
34
35
  DateTime :deleted_at
@@ -47,7 +48,7 @@ module BeetleETL
47
48
 
48
49
  describe '#run' do
49
50
  it 'runs all transitions' do
50
- %w(create update delete reinstate).each do |transition|
51
+ %w(create update delete reinstate keep).each do |transition|
51
52
  expect(subject).to receive(:"transition_#{transition}")
52
53
  end
53
54
 
@@ -59,11 +60,13 @@ module BeetleETL
59
60
  it 'assigns CREATE to new records' do
60
61
 
61
62
  insert_into(:example_table).values(
62
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
63
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
64
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
63
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
64
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
65
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
65
66
  )
66
67
 
68
+ test_database.run "SELECT setval('public.example_table_id_seq', 99)"
69
+
67
70
  insert_into(subject.stage_table_name.to_sym).values(
68
71
  [ :external_id ] ,
69
72
  [ 'created' ] ,
@@ -72,10 +75,10 @@ module BeetleETL
72
75
 
73
76
  subject.transition_create
74
77
 
75
- insert_into(subject.stage_table_name.to_sym).values(
76
- [ :external_id , :transition ] ,
77
- [ 'created' , 'CREATE' ] ,
78
- [ 'existing' , nil ] ,
78
+ expect(subject.stage_table_name.to_sym).to have_values(
79
+ [ :external_id , :id , :transition ] ,
80
+ [ 'created' , 100 , 'CREATE' ] ,
81
+ [ 'existing' , nil , nil ] ,
79
82
  )
80
83
  end
81
84
  end
@@ -85,10 +88,10 @@ module BeetleETL
85
88
  except externald_*_id columns and columns not contained in the stage table' do
86
89
 
87
90
  insert_into(:example_table).values(
88
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
89
- [ 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
90
- [ 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
91
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
91
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
92
+ [ 1 , 'existing_1' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
93
+ [ 2 , 'existing_2' , external_source , 'existing content' , 'ignored content' , 2 , nil ] ,
94
+ [ 3 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 3 , 1.day.ago ] ,
92
95
  )
93
96
 
94
97
  insert_into(subject.stage_table_name.to_sym).values(
@@ -101,10 +104,10 @@ module BeetleETL
101
104
  subject.transition_update
102
105
 
103
106
  expect(subject.stage_table_name.to_sym).to have_values(
104
- [ :external_id , :transition ] ,
105
- [ 'existing_1' , 'UPDATE' ] ,
106
- [ 'existing_2' , 'UPDATE' ] ,
107
- [ 'deleted' , nil ] ,
107
+ [ :external_id , :id , :transition ] ,
108
+ [ 'existing_1' , 1 , 'UPDATE' ] ,
109
+ [ 'existing_2' , 2 , 'UPDATE' ] ,
110
+ [ 'deleted' , nil , nil ] ,
108
111
  )
109
112
  end
110
113
  end
@@ -112,16 +115,16 @@ module BeetleETL
112
115
  describe 'transition_delete' do
113
116
  it 'creates records with DELETE that no loger exist in the stage table for the given run' do
114
117
  insert_into(:example_table).values(
115
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
116
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
117
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
118
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
119
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
120
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
118
121
  )
119
122
 
120
123
  subject.transition_delete
121
124
 
122
125
  expect(subject.stage_table_name.to_sym).to have_values(
123
- [ :external_id , :transition ] ,
124
- [ 'existing' , 'DELETE' ] ,
126
+ [ :id , :transition ] ,
127
+ [ 1 , 'DELETE' ] ,
125
128
  )
126
129
  end
127
130
  end
@@ -129,9 +132,9 @@ module BeetleETL
129
132
  describe 'transition_reinstate' do
130
133
  it 'assigns REINSTATE to previously deleted records' do
131
134
  insert_into(:example_table).values(
132
- [ :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
133
- [ 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
134
- [ 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
135
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
136
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
137
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
135
138
  )
136
139
 
137
140
  insert_into(subject.stage_table_name.to_sym).values(
@@ -143,9 +146,34 @@ module BeetleETL
143
146
  subject.transition_reinstate
144
147
 
145
148
  expect(subject.stage_table_name.to_sym).to have_values(
146
- [ :external_id , :transition ] ,
147
- [ 'existing' , nil ] ,
148
- [ 'deleted' , 'REINSTATE' ] ,
149
+ [ :external_id , :id , :transition ] ,
150
+ [ 'existing' , nil , nil ] ,
151
+ [ 'deleted' , 2 , 'REINSTATE' ] ,
152
+ )
153
+ end
154
+ end
155
+
156
+ describe '#transition_keep' do
157
+ it 'assigns KEEP to unchanged records' do
158
+
159
+ insert_into(:example_table).values(
160
+ [ :id , :external_id , :external_source , :payload , :ignored_attribute , :foo_id , :deleted_at ] ,
161
+ [ 1 , 'existing' , external_source , 'existing content' , 'ignored content' , 1 , nil ] ,
162
+ [ 2 , 'deleted' , external_source , 'deleted content' , 'ignored content' , 2 , 1.day.ago ] ,
163
+ )
164
+
165
+ insert_into(subject.stage_table_name.to_sym).values(
166
+ [ :external_id , :payload , :foo_id ] ,
167
+ [ 'created' , nil , nil ] ,
168
+ [ 'existing' , 'existing content' , 1 ] ,
169
+ )
170
+
171
+ subject.transition_keep
172
+
173
+ expect(subject.stage_table_name.to_sym).to have_values(
174
+ [ :external_id , :id , :transition ] ,
175
+ [ 'created' , nil , nil ] ,
176
+ [ 'existing' , 1 , 'KEEP' ] ,
149
177
  )
150
178
  end
151
179
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-08-24 00:00:00.000000000 Z
11
+ date: 2017-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -132,9 +132,10 @@ files:
132
132
  - lib/beetle_etl/import.rb
133
133
  - lib/beetle_etl/naming.rb
134
134
  - lib/beetle_etl/reporter.rb
135
+ - lib/beetle_etl/step_runner/abstract_step_runner.rb
135
136
  - lib/beetle_etl/step_runner/async_step_runner.rb
136
137
  - lib/beetle_etl/step_runner/dependency_resolver.rb
137
- - lib/beetle_etl/steps/assign_ids.rb
138
+ - lib/beetle_etl/step_runner/sequential_step_runner.rb
138
139
  - lib/beetle_etl/steps/create_stage.rb
139
140
  - lib/beetle_etl/steps/drop_stage.rb
140
141
  - lib/beetle_etl/steps/load.rb
@@ -158,7 +159,6 @@ files:
158
159
  - spec/feature/feature_spec.rb
159
160
  - spec/reporter_spec.rb
160
161
  - spec/spec_helper.rb
161
- - spec/steps/assign_ids_spec.rb
162
162
  - spec/steps/create_stage_spec.rb
163
163
  - spec/steps/load_spec.rb
164
164
  - spec/steps/map_relations_spec.rb
@@ -208,7 +208,6 @@ test_files:
208
208
  - spec/feature/feature_spec.rb
209
209
  - spec/reporter_spec.rb
210
210
  - spec/spec_helper.rb
211
- - spec/steps/assign_ids_spec.rb
212
211
  - spec/steps/create_stage_spec.rb
213
212
  - spec/steps/load_spec.rb
214
213
  - spec/steps/map_relations_spec.rb
@@ -1,23 +0,0 @@
1
- module BeetleETL
2
- class AssignIds < Step
3
-
4
- def dependencies
5
- [TableDiff.step_name(table_name)].to_set
6
- end
7
-
8
- def run
9
- database.execute <<-SQL
10
- UPDATE "#{target_schema}"."#{stage_table_name}" stage_update
11
- SET id = COALESCE(target.id, NEXTVAL('#{target_schema}.#{table_name}_id_seq'))
12
- FROM "#{target_schema}"."#{stage_table_name}" stage
13
- LEFT OUTER JOIN "#{target_schema}"."#{table_name}" target
14
- on (
15
- stage.external_id = target.external_id
16
- AND target.external_source = '#{external_source}'
17
- )
18
- WHERE stage_update.external_id = stage.external_id
19
- SQL
20
- end
21
-
22
- end
23
- end
@@ -1,74 +0,0 @@
1
- require 'spec_helper'
2
-
3
- module BeetleETL
4
- describe AssignIds do
5
-
6
- let(:external_source) { 'my_source' }
7
- let(:another_source) { 'another_source' }
8
-
9
- let(:config) do
10
- OpenStruct.new({
11
- stage_schema: 'stage',
12
- target_schema: 'public',
13
- external_source: external_source,
14
- database: test_database,
15
- })
16
- end
17
-
18
- subject { AssignIds.new(config, :example_table) }
19
-
20
- describe '#dependencies' do
21
- it 'depends on TableDiff of the same table' do
22
- expect(subject.dependencies).to eql(['example_table: TableDiff'].to_set)
23
- end
24
- end
25
-
26
- describe '#run' do
27
- before do
28
- test_database.create_table(subject.stage_table_name.to_sym) do
29
- Integer :id
30
- String :external_id, size: 255
31
- String :transition, size: 255
32
- end
33
-
34
- test_database.create_table(:example_table) do
35
- primary_key :id
36
- String :external_id, size: 255
37
- String :external_source, size: 255
38
- end
39
- end
40
-
41
- it 'assigns ids for' do
42
- # - generated ones for new records
43
- # - mapped ones by external_id for existing records
44
-
45
- insert_into(:example_table).values(
46
- [ :external_id , :external_source ] ,
47
- [ 'a' , external_source ] ,
48
- [ 'b' , external_source ] ,
49
- [ 'c' , external_source ] ,
50
- [ 'd' , another_source ] ,
51
- )
52
-
53
- insert_into(subject.stage_table_name.to_sym).values(
54
- [ :external_id ] ,
55
- [ 'new value' ] ,
56
- [ 'a' ] ,
57
- [ 'b' ] ,
58
- [ 'c' ] ,
59
- )
60
-
61
- subject.run
62
-
63
- expect(subject.stage_table_name.to_sym).to have_values(
64
- [ :id , :external_id ] ,
65
- [ 1 , 'a' ] ,
66
- [ 2 , 'b' ] ,
67
- [ 3 , 'c' ] ,
68
- [ 5 , 'new value' ] ,
69
- )
70
- end
71
- end
72
-
73
- end
74
- end