beetle_etl 0.0.16 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07d1e92398037e916f19762ae9a373de8cd3edfa
4
- data.tar.gz: c064dfbfd3dfd4103864c51d8742f537963abf8c
3
+ metadata.gz: dacfd8801cf603b9cec442ee9ea025bc14026234
4
+ data.tar.gz: 1a183e975200bb9bdc6e631130903b6ead9739be
5
5
  SHA512:
6
- metadata.gz: b984f80cdba06d018d49fa9414cf5443b8098b0edf34f1eb149df32f5c718f1af08676ca05743e064bf7e19475ad32a31459feedb68e0a45b091378c8c869665
7
- data.tar.gz: 3b80f7d9ca88ed49130a6ef6719e9d65fb28d0dca101a2ee23e191bcb413dcf93ae213fefe194e736169cca2b2ab3e0a67444f975a741a7bf323944c4328fb1d
6
+ metadata.gz: bacbe3f6292e52b1ad23d38f5e128b1cf39ec63ded808310a8353956f2b11d349b4ea546012beb22e03edbd3de9bdd787ca0bbde329e1755ebacc23c9561a8ca
7
+ data.tar.gz: cbc51b9be8c35b70c21829af0c38f6db0898f4b5c38a2fc6138968bc63591e91f51d7558b2fff62196ba43789b3255f2347c9982d78de33a4d025e3da167084d
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
  [![Build Status](https://travis-ci.org/maiwald/beetle_etl.svg?branch=master)](https://travis-ci.org/maiwald/beetle_etl)
3
3
  [![Code Climate](https://codeclimate.com/github/maiwald/beetle_etl.png)](https://codeclimate.com/github/maiwald/beetle_etl)
4
4
 
5
- TODO: Write a gem description
5
+ BeetleETL helps you with synchronising relational databases and recurring imports of data. It is actually quite nice.
6
+
7
+ It currently only works with PostgreSQL databases.
6
8
 
7
9
  ## Installation
8
10
 
@@ -20,7 +22,53 @@ Or install it yourself as:
20
22
 
21
23
  ## Usage
22
24
 
23
- TODO: Write usage instructions here
25
+ ### Configuration
26
+
27
+ BeetleETL.configure do |config|
28
+ config.transformation_file = # path to your imports
29
+ config.database_config = # sequel database config
30
+ # or config.database = # sequel database instance
31
+ config.external_source = ‘source_name’
32
+ config.logger = Logger.new(STDOUT)
33
+ end
34
+
35
+ ### Defining Imports
36
+
37
+ Fill a file with all the tables you wish to import and write queries to select the data you want.
38
+
39
+ import :departments do
40
+ columns :name
41
+
42
+ references :organisations, on: :organisation_id
43
+
44
+ query <<-SQL
45
+ INSERT INTO #{stage_table} (
46
+ external_id,
47
+ name,
48
+ external_organisation_id
49
+ )
50
+
51
+ SELECT
52
+ o.id,
53
+ o.”dep_name”,
54
+ data.”address”
55
+
56
+ FROM ”Organisation” o
57
+ JOIN additional_data data
58
+ ON data.org_id = o.id
59
+ SQL
60
+ end
61
+
62
+
63
+ ### Running BeetleETL
64
+
65
+ BeetleETL.import
66
+
67
+ ## Development
68
+
69
+ To run the specs call
70
+
71
+ $ bundle exec rspec
24
72
 
25
73
  ## Contributing
26
74
 
@@ -34,12 +34,12 @@ module BeetleETL
34
34
  :database,
35
35
  :transformation_file,
36
36
  :stage_schema,
37
- :public_schema,
37
+ :target_schema,
38
38
  :external_source,
39
39
  :logger
40
40
 
41
41
  def initialize
42
- @public_schema = 'public'
42
+ @target_schema = 'public'
43
43
  @logger = ::Logger.new(STDOUT)
44
44
  end
45
45
  end
@@ -8,8 +8,8 @@ module BeetleETL
8
8
  def initialize(table_name, setup, helpers = nil)
9
9
  @table_name = table_name
10
10
  @parsed = DSL.new(table_name).tap do |dsl|
11
- dsl.instance_eval(&helpers) if helpers
12
- dsl.instance_eval(&setup)
11
+ dsl.instance_exec(&helpers) if helpers
12
+ dsl.instance_exec(&setup)
13
13
  end
14
14
  end
15
15
 
@@ -15,22 +15,22 @@ module BeetleETL
15
15
  %Q("#{stage_table_name(table_name)}")
16
16
  end
17
17
 
18
- def public_table_name(table_name = nil)
18
+ def target_table_name(table_name = nil)
19
19
  name = (table_name || @table_name).to_s
20
- [public_schema, name].compact.join('.')
20
+ [target_schema, name].compact.join('.')
21
21
  end
22
22
 
23
- def public_table_name_sql(table_name = nil)
23
+ def target_table_name_sql(table_name = nil)
24
24
  name = (table_name || @table_name).to_s
25
- public_table_name= [public_schema, name].compact.join('"."')
26
- %Q("#{public_table_name}")
25
+ target_table_name= [target_schema, name].compact.join('"."')
26
+ %Q("#{target_table_name}")
27
27
  end
28
28
 
29
29
  private
30
30
 
31
- def public_schema
32
- public_schema = BeetleETL.config.public_schema
33
- public_schema != 'public' ? public_schema : nil
31
+ def target_schema
32
+ target_schema = BeetleETL.config.target_schema
33
+ target_schema != 'public' ? target_schema : nil
34
34
  end
35
35
 
36
36
  end
@@ -11,7 +11,7 @@ module BeetleETL
11
11
 
12
12
  def resolvables(resolved)
13
13
  @items.select do |item|
14
- (item.dependencies.subset?(resolved.to_set) || item.dependencies.empty?) && !resolved.include?(item.name)
14
+ !resolved.include?(item.name) && all_dependencies_met?(item, resolved)
15
15
  end
16
16
  end
17
17
 
@@ -22,18 +22,15 @@ module BeetleETL
22
22
  resolved = []
23
23
 
24
24
  until items.empty?
25
- resolved_names = resolved.flatten.map(&:name).to_set
26
-
27
- resolvable = items.select do |item|
28
- item.dependencies.subset?(resolved_names) || item.dependencies.empty?
29
- end
30
-
31
- raise UnsatisfiableDependenciesError if resolvable.empty?
32
-
33
- resolvable.each { |r| items.delete r }
34
- resolved << resolvable
25
+ resolvables = items.select { |item| all_dependencies_met?(item, resolved.map(&:name)) }
26
+ raise UnsatisfiableDependenciesError if resolvables.empty?
27
+ resolvables.each { |r| resolved << items.delete(r) }
35
28
  end
36
29
  end
37
30
 
31
+ def all_dependencies_met?(item, resolved)
32
+ item.dependencies.empty? || item.dependencies.subset?(resolved.to_set)
33
+ end
34
+
38
35
  end
39
36
  end
@@ -8,12 +8,12 @@ module BeetleETL
8
8
  def run
9
9
  database.execute <<-SQL
10
10
  UPDATE #{stage_table_name_sql} stage_update
11
- SET id = COALESCE(public.id, nextval('#{table_name}_id_seq'))
11
+ SET id = COALESCE(target.id, nextval('#{table_name}_id_seq'))
12
12
  FROM #{stage_table_name_sql} stage
13
- LEFT OUTER JOIN #{public_table_name_sql} public
13
+ LEFT OUTER JOIN #{target_table_name_sql} target
14
14
  on (
15
- stage.external_id = public.external_id
16
- AND public.external_source = '#{external_source}'
15
+ stage.external_id = target.external_id
16
+ AND target.external_source = '#{external_source}'
17
17
  )
18
18
  WHERE stage_update.external_id = stage.external_id
19
19
  SQL
@@ -23,6 +23,12 @@ module BeetleETL
23
23
 
24
24
  #{index_definitions};
25
25
 
26
+ ALTER TABLE #{stage_table_name_sql}
27
+ SET (
28
+ autovacuum_enabled = false,
29
+ toast.autovacuum_enabled = false
30
+ );
31
+
26
32
  TRUNCATE TABLE #{stage_table_name_sql} RESTART IDENTITY CASCADE;
27
33
  SQL
28
34
  end
@@ -70,7 +76,7 @@ module BeetleETL
70
76
  end
71
77
 
72
78
  def column_type(column_name)
73
- @column_types ||= Hash[database.schema(public_table_name.to_sym)]
79
+ @column_types ||= Hash[database.schema(target_table_name.to_sym)]
74
80
  .reduce({}) do |acc, (name, schema)|
75
81
  acc[name.to_sym] = schema.fetch(:db_type)
76
82
  acc
@@ -12,7 +12,7 @@ module BeetleETL
12
12
  end
13
13
 
14
14
  def run
15
- %w(create update delete reinstate).each do |transition|
15
+ %w(create update delete).each do |transition|
16
16
  public_send(:"load_#{transition}")
17
17
  end
18
18
  end
@@ -25,7 +25,7 @@ module BeetleETL
25
25
  just_now = now
26
26
 
27
27
  database.execute <<-SQL
28
- INSERT INTO #{public_table_name_sql}
28
+ INSERT INTO #{target_table_name_sql}
29
29
  (#{data_columns.join(', ')}, external_source, created_at, updated_at)
30
30
  SELECT
31
31
  #{data_columns.join(', ')},
@@ -39,13 +39,14 @@ module BeetleETL
39
39
 
40
40
  def load_update
41
41
  database.execute <<-SQL
42
- UPDATE #{public_table_name_sql} public
42
+ UPDATE #{target_table_name_sql} target
43
43
  SET
44
44
  #{updatable_columns.map { |c| %Q("#{c}" = stage."#{c}") }.join(',')},
45
- "updated_at" = '#{now}'
45
+ "updated_at" = '#{now}',
46
+ deleted_at = NULL
46
47
  FROM #{stage_table_name_sql} stage
47
- WHERE stage.id = public.id
48
- AND stage.transition = 'UPDATE'
48
+ WHERE stage.id = target.id
49
+ AND stage.transition IN ('UPDATE', 'REINSTATE')
49
50
  SQL
50
51
  end
51
52
 
@@ -53,29 +54,16 @@ module BeetleETL
53
54
  just_now = now
54
55
 
55
56
  database.execute <<-SQL
56
- UPDATE #{public_table_name_sql} public
57
+ UPDATE #{target_table_name_sql} target
57
58
  SET
58
59
  updated_at = '#{just_now}',
59
60
  deleted_at = '#{just_now}'
60
61
  FROM #{stage_table_name_sql} stage
61
- WHERE stage.id = public.id
62
+ WHERE stage.id = target.id
62
63
  AND stage.transition = 'DELETE'
63
64
  SQL
64
65
  end
65
66
 
66
- def load_reinstate
67
- database.execute <<-SQL
68
- UPDATE #{public_table_name_sql} public
69
- SET
70
- #{updatable_columns.map { |c| %Q("#{c}" = stage."#{c}") }.join(',')},
71
- updated_at = '#{now}',
72
- deleted_at = NULL
73
- FROM #{stage_table_name_sql} stage
74
- WHERE stage.id = public.id
75
- AND stage.transition = 'REINSTATE'
76
- SQL
77
- end
78
-
79
67
  private
80
68
 
81
69
  def data_columns
@@ -7,7 +7,8 @@ module BeetleETL
7
7
  end
8
8
 
9
9
  def dependencies
10
- @relations.values.map { |d| AssignIds.step_name(d) }.to_set << Transform.step_name(table_name)
10
+ result = Set.new([Transform.step_name(table_name)])
11
+ result.merge @relations.values.map { |d| AssignIds.step_name(d) }
11
12
  end
12
13
 
13
14
  def run
@@ -22,9 +22,9 @@ module BeetleETL
22
22
  SET transition = 'CREATE'
23
23
  WHERE NOT EXISTS (
24
24
  SELECT 1
25
- FROM #{public_table_name} public
26
- WHERE public.external_id = stage.external_id
27
- AND public.external_source = '#{external_source}'
25
+ FROM #{target_table_name} target
26
+ WHERE target.external_id = stage.external_id
27
+ AND target.external_source = '#{external_source}'
28
28
  )
29
29
  SQL
30
30
  end
@@ -35,12 +35,12 @@ module BeetleETL
35
35
  SET transition = 'UPDATE'
36
36
  WHERE EXISTS (
37
37
  SELECT 1
38
- FROM #{public_table_name} public
39
- WHERE public.external_id = stage.external_id
40
- AND public.external_source = '#{external_source}'
41
- AND public.deleted_at IS NULL
38
+ FROM #{target_table_name} target
39
+ WHERE target.external_id = stage.external_id
40
+ AND target.external_source = '#{external_source}'
41
+ AND target.deleted_at IS NULL
42
42
  AND
43
- (#{public_record_columns.join(', ')})
43
+ (#{target_record_columns.join(', ')})
44
44
  IS DISTINCT FROM
45
45
  (#{stage_record_columns.join(', ')})
46
46
  )
@@ -52,14 +52,14 @@ module BeetleETL
52
52
  INSERT INTO #{stage_table_name_sql}
53
53
  (external_id, transition)
54
54
  SELECT
55
- public.external_id,
55
+ target.external_id,
56
56
  'DELETE'
57
- FROM #{public_table_name_sql} public
57
+ FROM #{target_table_name_sql} target
58
58
  LEFT OUTER JOIN #{stage_table_name_sql} stage
59
- ON (stage.external_id = public.external_id)
59
+ ON (stage.external_id = target.external_id)
60
60
  WHERE stage.external_id IS NULL
61
- AND public.external_source = '#{external_source}'
62
- AND public.deleted_at IS NULL
61
+ AND target.external_source = '#{external_source}'
62
+ AND target.deleted_at IS NULL
63
63
  SQL
64
64
  end
65
65
 
@@ -69,18 +69,18 @@ module BeetleETL
69
69
  SET transition = 'REINSTATE'
70
70
  WHERE EXISTS (
71
71
  SELECT 1
72
- FROM #{public_table_name_sql} public
73
- WHERE public.external_id = stage.external_id
74
- AND public.external_source = '#{external_source}'
75
- AND public.deleted_at IS NOT NULL
72
+ FROM #{target_table_name_sql} target
73
+ WHERE target.external_id = stage.external_id
74
+ AND target.external_source = '#{external_source}'
75
+ AND target.deleted_at IS NOT NULL
76
76
  )
77
77
  SQL
78
78
  end
79
79
 
80
80
  private
81
81
 
82
- def public_record_columns
83
- prefixed_columns(data_columns, 'public')
82
+ def target_record_columns
83
+ prefixed_columns(data_columns, 'target')
84
84
  end
85
85
 
86
86
  def stage_record_columns
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "0.0.16"
2
+ VERSION = "0.0.19"
3
3
  end
@@ -65,7 +65,7 @@ module BeetleETL
65
65
 
66
66
  describe '#run' do
67
67
  it 'runs all load steps' do
68
- %w(create update delete reinstate).each do |transition|
68
+ %w(create update delete).each do |transition|
69
69
  expect(subject).to receive(:"load_#{transition}")
70
70
  end
71
71
 
@@ -74,7 +74,7 @@ module BeetleETL
74
74
  end
75
75
 
76
76
  describe '#load_create' do
77
- it 'loads records into the public table' do
77
+ it 'loads records into the target table' do
78
78
  insert_into(subject.stage_table_name.to_sym).values(
79
79
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
80
80
  [ 3 , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
@@ -108,10 +108,8 @@ module BeetleETL
108
108
  [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
109
109
  )
110
110
  end
111
- end
112
111
 
113
- describe '#load_delete' do
114
- it 'marks existing records as deleted' do
112
+ it 'restores deleted records' do
115
113
  insert_into(:example_table).values(
116
114
  [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
117
115
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
@@ -119,20 +117,20 @@ module BeetleETL
119
117
 
120
118
  insert_into(subject.stage_table_name.to_sym).values(
121
119
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
122
- [ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
120
+ [ 1 , 'external_id' , 'REINSTATE' , 'foo_id' , 33 , 'updated content' ] ,
123
121
  )
124
122
 
125
- subject.load_delete
123
+ subject.load_update
126
124
 
127
125
  expect(:example_table).to have_values(
128
- [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
129
- [ 1 , 'external_id' , external_source , 22 , yesterday , now , now , 'content' ] ,
126
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
127
+ [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
130
128
  )
131
129
  end
132
130
  end
133
131
 
134
- describe '#load_reinstate' do
135
- it 'restores deleted records' do
132
+ describe '#load_delete' do
133
+ it 'marks existing records as deleted' do
136
134
  insert_into(:example_table).values(
137
135
  [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
138
136
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
@@ -140,14 +138,14 @@ module BeetleETL
140
138
 
141
139
  insert_into(subject.stage_table_name.to_sym).values(
142
140
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
143
- [ 1 , 'external_id' , 'REINSTATE' , 'foo_id' , 33 , 'updated content' ] ,
141
+ [ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
144
142
  )
145
143
 
146
- subject.load_reinstate
144
+ subject.load_delete
147
145
 
148
146
  expect(:example_table).to have_values(
149
- [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
150
- [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
147
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
148
+ [ 1 , 'external_id' , external_source , 22 , yesterday , now , now , 'content' ] ,
151
149
  )
152
150
  end
153
151
  end
@@ -34,6 +34,7 @@ module BeetleETL
34
34
  end
35
35
 
36
36
  it 'returns all items with met dependencies' do
37
+ expect(resolver.resolvables([:a])).to match_array([b, c])
37
38
  expect(resolver.resolvables([:a, :b, :c])).to match_array([d])
38
39
  expect(resolver.resolvables([:a, :b, :c, :d])).to match_array([e, f])
39
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-20 00:00:00.000000000 Z
11
+ date: 2015-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel