beetle_etl 0.0.16 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07d1e92398037e916f19762ae9a373de8cd3edfa
4
- data.tar.gz: c064dfbfd3dfd4103864c51d8742f537963abf8c
3
+ metadata.gz: dacfd8801cf603b9cec442ee9ea025bc14026234
4
+ data.tar.gz: 1a183e975200bb9bdc6e631130903b6ead9739be
5
5
  SHA512:
6
- metadata.gz: b984f80cdba06d018d49fa9414cf5443b8098b0edf34f1eb149df32f5c718f1af08676ca05743e064bf7e19475ad32a31459feedb68e0a45b091378c8c869665
7
- data.tar.gz: 3b80f7d9ca88ed49130a6ef6719e9d65fb28d0dca101a2ee23e191bcb413dcf93ae213fefe194e736169cca2b2ab3e0a67444f975a741a7bf323944c4328fb1d
6
+ metadata.gz: bacbe3f6292e52b1ad23d38f5e128b1cf39ec63ded808310a8353956f2b11d349b4ea546012beb22e03edbd3de9bdd787ca0bbde329e1755ebacc23c9561a8ca
7
+ data.tar.gz: cbc51b9be8c35b70c21829af0c38f6db0898f4b5c38a2fc6138968bc63591e91f51d7558b2fff62196ba43789b3255f2347c9982d78de33a4d025e3da167084d
data/README.md CHANGED
@@ -2,7 +2,9 @@
2
2
  [![Build Status](https://travis-ci.org/maiwald/beetle_etl.svg?branch=master)](https://travis-ci.org/maiwald/beetle_etl)
3
3
  [![Code Climate](https://codeclimate.com/github/maiwald/beetle_etl.png)](https://codeclimate.com/github/maiwald/beetle_etl)
4
4
 
5
- TODO: Write a gem description
5
+ BeetleETL helps you with synchronising relational databases and recurring imports of data. It is actually quite nice.
6
+
7
+ It currently only works with PostgreSQL databases.
6
8
 
7
9
  ## Installation
8
10
 
@@ -20,7 +22,53 @@ Or install it yourself as:
20
22
 
21
23
  ## Usage
22
24
 
23
- TODO: Write usage instructions here
25
+ ### Configuration
26
+
27
+ BeetleETL.configure do |config|
28
+ config.transformation_file = # path to your imports
29
+ config.database_config = # sequel database config
30
+ # or config.database = # sequel database instance
31
+ config.external_source = ‘source_name’
32
+ config.logger = Logger.new(STDOUT)
33
+ end
34
+
35
+ ### Defining Imports
36
+
37
+ Fill a file with all the tables you wish to import and write queries to select the data you want.
38
+
39
+ import :departments do
40
+ columns :name
41
+
42
+ references :organisations, on: :organisation_id
43
+
44
+ query <<-SQL
45
+ INSERT INTO #{stage_table} (
46
+ external_id,
47
+ name,
48
+ external_organisation_id
49
+ )
50
+
51
+ SELECT
52
+ o.id,
53
+ o.”dep_name”,
54
+ data.”address”
55
+
56
+ FROM ”Organisation” o
57
+ JOIN additional_data data
58
+ ON data.org_id = o.id
59
+ SQL
60
+ end
61
+
62
+
63
+ ### Running BeetleETL
64
+
65
+ BeetleETL.import
66
+
67
+ ## Development
68
+
69
+ To run the specs call
70
+
71
+ $ bundle exec rspec
24
72
 
25
73
  ## Contributing
26
74
 
@@ -34,12 +34,12 @@ module BeetleETL
34
34
  :database,
35
35
  :transformation_file,
36
36
  :stage_schema,
37
- :public_schema,
37
+ :target_schema,
38
38
  :external_source,
39
39
  :logger
40
40
 
41
41
  def initialize
42
- @public_schema = 'public'
42
+ @target_schema = 'public'
43
43
  @logger = ::Logger.new(STDOUT)
44
44
  end
45
45
  end
@@ -8,8 +8,8 @@ module BeetleETL
8
8
  def initialize(table_name, setup, helpers = nil)
9
9
  @table_name = table_name
10
10
  @parsed = DSL.new(table_name).tap do |dsl|
11
- dsl.instance_eval(&helpers) if helpers
12
- dsl.instance_eval(&setup)
11
+ dsl.instance_exec(&helpers) if helpers
12
+ dsl.instance_exec(&setup)
13
13
  end
14
14
  end
15
15
 
@@ -15,22 +15,22 @@ module BeetleETL
15
15
  %Q("#{stage_table_name(table_name)}")
16
16
  end
17
17
 
18
- def public_table_name(table_name = nil)
18
+ def target_table_name(table_name = nil)
19
19
  name = (table_name || @table_name).to_s
20
- [public_schema, name].compact.join('.')
20
+ [target_schema, name].compact.join('.')
21
21
  end
22
22
 
23
- def public_table_name_sql(table_name = nil)
23
+ def target_table_name_sql(table_name = nil)
24
24
  name = (table_name || @table_name).to_s
25
- public_table_name= [public_schema, name].compact.join('"."')
26
- %Q("#{public_table_name}")
25
+ target_table_name= [target_schema, name].compact.join('"."')
26
+ %Q("#{target_table_name}")
27
27
  end
28
28
 
29
29
  private
30
30
 
31
- def public_schema
32
- public_schema = BeetleETL.config.public_schema
33
- public_schema != 'public' ? public_schema : nil
31
+ def target_schema
32
+ target_schema = BeetleETL.config.target_schema
33
+ target_schema != 'public' ? target_schema : nil
34
34
  end
35
35
 
36
36
  end
@@ -11,7 +11,7 @@ module BeetleETL
11
11
 
12
12
  def resolvables(resolved)
13
13
  @items.select do |item|
14
- (item.dependencies.subset?(resolved.to_set) || item.dependencies.empty?) && !resolved.include?(item.name)
14
+ !resolved.include?(item.name) && all_dependencies_met?(item, resolved)
15
15
  end
16
16
  end
17
17
 
@@ -22,18 +22,15 @@ module BeetleETL
22
22
  resolved = []
23
23
 
24
24
  until items.empty?
25
- resolved_names = resolved.flatten.map(&:name).to_set
26
-
27
- resolvable = items.select do |item|
28
- item.dependencies.subset?(resolved_names) || item.dependencies.empty?
29
- end
30
-
31
- raise UnsatisfiableDependenciesError if resolvable.empty?
32
-
33
- resolvable.each { |r| items.delete r }
34
- resolved << resolvable
25
+ resolvables = items.select { |item| all_dependencies_met?(item, resolved.map(&:name)) }
26
+ raise UnsatisfiableDependenciesError if resolvables.empty?
27
+ resolvables.each { |r| resolved << items.delete(r) }
35
28
  end
36
29
  end
37
30
 
31
+ def all_dependencies_met?(item, resolved)
32
+ item.dependencies.empty? || item.dependencies.subset?(resolved.to_set)
33
+ end
34
+
38
35
  end
39
36
  end
@@ -8,12 +8,12 @@ module BeetleETL
8
8
  def run
9
9
  database.execute <<-SQL
10
10
  UPDATE #{stage_table_name_sql} stage_update
11
- SET id = COALESCE(public.id, nextval('#{table_name}_id_seq'))
11
+ SET id = COALESCE(target.id, nextval('#{table_name}_id_seq'))
12
12
  FROM #{stage_table_name_sql} stage
13
- LEFT OUTER JOIN #{public_table_name_sql} public
13
+ LEFT OUTER JOIN #{target_table_name_sql} target
14
14
  on (
15
- stage.external_id = public.external_id
16
- AND public.external_source = '#{external_source}'
15
+ stage.external_id = target.external_id
16
+ AND target.external_source = '#{external_source}'
17
17
  )
18
18
  WHERE stage_update.external_id = stage.external_id
19
19
  SQL
@@ -23,6 +23,12 @@ module BeetleETL
23
23
 
24
24
  #{index_definitions};
25
25
 
26
+ ALTER TABLE #{stage_table_name_sql}
27
+ SET (
28
+ autovacuum_enabled = false,
29
+ toast.autovacuum_enabled = false
30
+ );
31
+
26
32
  TRUNCATE TABLE #{stage_table_name_sql} RESTART IDENTITY CASCADE;
27
33
  SQL
28
34
  end
@@ -70,7 +76,7 @@ module BeetleETL
70
76
  end
71
77
 
72
78
  def column_type(column_name)
73
- @column_types ||= Hash[database.schema(public_table_name.to_sym)]
79
+ @column_types ||= Hash[database.schema(target_table_name.to_sym)]
74
80
  .reduce({}) do |acc, (name, schema)|
75
81
  acc[name.to_sym] = schema.fetch(:db_type)
76
82
  acc
@@ -12,7 +12,7 @@ module BeetleETL
12
12
  end
13
13
 
14
14
  def run
15
- %w(create update delete reinstate).each do |transition|
15
+ %w(create update delete).each do |transition|
16
16
  public_send(:"load_#{transition}")
17
17
  end
18
18
  end
@@ -25,7 +25,7 @@ module BeetleETL
25
25
  just_now = now
26
26
 
27
27
  database.execute <<-SQL
28
- INSERT INTO #{public_table_name_sql}
28
+ INSERT INTO #{target_table_name_sql}
29
29
  (#{data_columns.join(', ')}, external_source, created_at, updated_at)
30
30
  SELECT
31
31
  #{data_columns.join(', ')},
@@ -39,13 +39,14 @@ module BeetleETL
39
39
 
40
40
  def load_update
41
41
  database.execute <<-SQL
42
- UPDATE #{public_table_name_sql} public
42
+ UPDATE #{target_table_name_sql} target
43
43
  SET
44
44
  #{updatable_columns.map { |c| %Q("#{c}" = stage."#{c}") }.join(',')},
45
- "updated_at" = '#{now}'
45
+ "updated_at" = '#{now}',
46
+ deleted_at = NULL
46
47
  FROM #{stage_table_name_sql} stage
47
- WHERE stage.id = public.id
48
- AND stage.transition = 'UPDATE'
48
+ WHERE stage.id = target.id
49
+ AND stage.transition IN ('UPDATE', 'REINSTATE')
49
50
  SQL
50
51
  end
51
52
 
@@ -53,29 +54,16 @@ module BeetleETL
53
54
  just_now = now
54
55
 
55
56
  database.execute <<-SQL
56
- UPDATE #{public_table_name_sql} public
57
+ UPDATE #{target_table_name_sql} target
57
58
  SET
58
59
  updated_at = '#{just_now}',
59
60
  deleted_at = '#{just_now}'
60
61
  FROM #{stage_table_name_sql} stage
61
- WHERE stage.id = public.id
62
+ WHERE stage.id = target.id
62
63
  AND stage.transition = 'DELETE'
63
64
  SQL
64
65
  end
65
66
 
66
- def load_reinstate
67
- database.execute <<-SQL
68
- UPDATE #{public_table_name_sql} public
69
- SET
70
- #{updatable_columns.map { |c| %Q("#{c}" = stage."#{c}") }.join(',')},
71
- updated_at = '#{now}',
72
- deleted_at = NULL
73
- FROM #{stage_table_name_sql} stage
74
- WHERE stage.id = public.id
75
- AND stage.transition = 'REINSTATE'
76
- SQL
77
- end
78
-
79
67
  private
80
68
 
81
69
  def data_columns
@@ -7,7 +7,8 @@ module BeetleETL
7
7
  end
8
8
 
9
9
  def dependencies
10
- @relations.values.map { |d| AssignIds.step_name(d) }.to_set << Transform.step_name(table_name)
10
+ result = Set.new([Transform.step_name(table_name)])
11
+ result.merge @relations.values.map { |d| AssignIds.step_name(d) }
11
12
  end
12
13
 
13
14
  def run
@@ -22,9 +22,9 @@ module BeetleETL
22
22
  SET transition = 'CREATE'
23
23
  WHERE NOT EXISTS (
24
24
  SELECT 1
25
- FROM #{public_table_name} public
26
- WHERE public.external_id = stage.external_id
27
- AND public.external_source = '#{external_source}'
25
+ FROM #{target_table_name} target
26
+ WHERE target.external_id = stage.external_id
27
+ AND target.external_source = '#{external_source}'
28
28
  )
29
29
  SQL
30
30
  end
@@ -35,12 +35,12 @@ module BeetleETL
35
35
  SET transition = 'UPDATE'
36
36
  WHERE EXISTS (
37
37
  SELECT 1
38
- FROM #{public_table_name} public
39
- WHERE public.external_id = stage.external_id
40
- AND public.external_source = '#{external_source}'
41
- AND public.deleted_at IS NULL
38
+ FROM #{target_table_name} target
39
+ WHERE target.external_id = stage.external_id
40
+ AND target.external_source = '#{external_source}'
41
+ AND target.deleted_at IS NULL
42
42
  AND
43
- (#{public_record_columns.join(', ')})
43
+ (#{target_record_columns.join(', ')})
44
44
  IS DISTINCT FROM
45
45
  (#{stage_record_columns.join(', ')})
46
46
  )
@@ -52,14 +52,14 @@ module BeetleETL
52
52
  INSERT INTO #{stage_table_name_sql}
53
53
  (external_id, transition)
54
54
  SELECT
55
- public.external_id,
55
+ target.external_id,
56
56
  'DELETE'
57
- FROM #{public_table_name_sql} public
57
+ FROM #{target_table_name_sql} target
58
58
  LEFT OUTER JOIN #{stage_table_name_sql} stage
59
- ON (stage.external_id = public.external_id)
59
+ ON (stage.external_id = target.external_id)
60
60
  WHERE stage.external_id IS NULL
61
- AND public.external_source = '#{external_source}'
62
- AND public.deleted_at IS NULL
61
+ AND target.external_source = '#{external_source}'
62
+ AND target.deleted_at IS NULL
63
63
  SQL
64
64
  end
65
65
 
@@ -69,18 +69,18 @@ module BeetleETL
69
69
  SET transition = 'REINSTATE'
70
70
  WHERE EXISTS (
71
71
  SELECT 1
72
- FROM #{public_table_name_sql} public
73
- WHERE public.external_id = stage.external_id
74
- AND public.external_source = '#{external_source}'
75
- AND public.deleted_at IS NOT NULL
72
+ FROM #{target_table_name_sql} target
73
+ WHERE target.external_id = stage.external_id
74
+ AND target.external_source = '#{external_source}'
75
+ AND target.deleted_at IS NOT NULL
76
76
  )
77
77
  SQL
78
78
  end
79
79
 
80
80
  private
81
81
 
82
- def public_record_columns
83
- prefixed_columns(data_columns, 'public')
82
+ def target_record_columns
83
+ prefixed_columns(data_columns, 'target')
84
84
  end
85
85
 
86
86
  def stage_record_columns
@@ -1,3 +1,3 @@
1
1
  module BeetleETL
2
- VERSION = "0.0.16"
2
+ VERSION = "0.0.19"
3
3
  end
@@ -65,7 +65,7 @@ module BeetleETL
65
65
 
66
66
  describe '#run' do
67
67
  it 'runs all load steps' do
68
- %w(create update delete reinstate).each do |transition|
68
+ %w(create update delete).each do |transition|
69
69
  expect(subject).to receive(:"load_#{transition}")
70
70
  end
71
71
 
@@ -74,7 +74,7 @@ module BeetleETL
74
74
  end
75
75
 
76
76
  describe '#load_create' do
77
- it 'loads records into the public table' do
77
+ it 'loads records into the target table' do
78
78
  insert_into(subject.stage_table_name.to_sym).values(
79
79
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
80
80
  [ 3 , 'external_id' , 'CREATE' , 'foo_id' , 22 , 'content' ] ,
@@ -108,10 +108,8 @@ module BeetleETL
108
108
  [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
109
109
  )
110
110
  end
111
- end
112
111
 
113
- describe '#load_delete' do
114
- it 'marks existing records as deleted' do
112
+ it 'restores deleted records' do
115
113
  insert_into(:example_table).values(
116
114
  [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
117
115
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
@@ -119,20 +117,20 @@ module BeetleETL
119
117
 
120
118
  insert_into(subject.stage_table_name.to_sym).values(
121
119
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
122
- [ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
120
+ [ 1 , 'external_id' , 'REINSTATE' , 'foo_id' , 33 , 'updated content' ] ,
123
121
  )
124
122
 
125
- subject.load_delete
123
+ subject.load_update
126
124
 
127
125
  expect(:example_table).to have_values(
128
- [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
129
- [ 1 , 'external_id' , external_source , 22 , yesterday , now , now , 'content' ] ,
126
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
127
+ [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
130
128
  )
131
129
  end
132
130
  end
133
131
 
134
- describe '#load_reinstate' do
135
- it 'restores deleted records' do
132
+ describe '#load_delete' do
133
+ it 'marks existing records as deleted' do
136
134
  insert_into(:example_table).values(
137
135
  [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
138
136
  [ 1 , 'external_id' , external_source , 22 , yesterday , yesterday , nil , 'content' ] ,
@@ -140,14 +138,14 @@ module BeetleETL
140
138
 
141
139
  insert_into(subject.stage_table_name.to_sym).values(
142
140
  [ :id , :external_id , :transition , :external_foo_id , :foo_id , :payload ] ,
143
- [ 1 , 'external_id' , 'REINSTATE' , 'foo_id' , 33 , 'updated content' ] ,
141
+ [ 1 , 'external_id' , 'DELETE' , 'foo_id' , 33 , 'updated content' ] ,
144
142
  )
145
143
 
146
- subject.load_reinstate
144
+ subject.load_delete
147
145
 
148
146
  expect(:example_table).to have_values(
149
- [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
150
- [ 1 , 'external_id' , external_source , 33 , yesterday , now , nil , 'updated content' ] ,
147
+ [ :id , :external_id , :external_source , :foo_id , :created_at , :updated_at , :deleted_at , :payload ] ,
148
+ [ 1 , 'external_id' , external_source , 22 , yesterday , now , now , 'content' ] ,
151
149
  )
152
150
  end
153
151
  end
@@ -34,6 +34,7 @@ module BeetleETL
34
34
  end
35
35
 
36
36
  it 'returns all items with met dependencies' do
37
+ expect(resolver.resolvables([:a])).to match_array([b, c])
37
38
  expect(resolver.resolvables([:a, :b, :c])).to match_array([d])
38
39
  expect(resolver.resolvables([:a, :b, :c, :d])).to match_array([e, f])
39
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: beetle_etl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luciano Maiwald
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-20 00:00:00.000000000 Z
11
+ date: 2015-04-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sequel