postgres_upsert 2.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +57 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.travis.yml +20 -0
  7. data/Gemfile +1 -2
  8. data/Gemfile.lock +175 -85
  9. data/README.md +117 -41
  10. data/Rakefile +4 -16
  11. data/app/assets/config/manifest.js +0 -0
  12. data/bin/bundle +3 -0
  13. data/bin/rails +4 -0
  14. data/bin/rake +4 -0
  15. data/bin/setup +56 -0
  16. data/config.ru +4 -0
  17. data/config/application.rb +21 -0
  18. data/config/boot.rb +3 -0
  19. data/config/database.yml +24 -0
  20. data/config/database.yml.travis +23 -0
  21. data/config/environment.rb +5 -0
  22. data/config/environments/development.rb +41 -0
  23. data/config/environments/production.rb +79 -0
  24. data/config/environments/test.rb +42 -0
  25. data/config/locales/en.yml +23 -0
  26. data/config/routes.rb +56 -0
  27. data/config/secrets.yml +22 -0
  28. data/db/migrate/20150214192135_create_test_tables.rb +24 -0
  29. data/db/migrate/20150710162236_create_composite_models_table.rb +9 -0
  30. data/db/schema.rb +48 -0
  31. data/db/seeds.rb +7 -0
  32. data/lib/postgres_upsert.rb +38 -6
  33. data/lib/postgres_upsert/model_to_model_adapter.rb +37 -0
  34. data/lib/postgres_upsert/read_adapters/active_record_adapter.rb +37 -0
  35. data/lib/postgres_upsert/read_adapters/file_adapter.rb +42 -0
  36. data/lib/postgres_upsert/read_adapters/io_adapter.rb +42 -0
  37. data/lib/postgres_upsert/result.rb +23 -0
  38. data/lib/postgres_upsert/table_writer.rb +48 -0
  39. data/lib/postgres_upsert/write_adapters/active_record_adapter.rb +36 -0
  40. data/lib/postgres_upsert/write_adapters/table_adapter.rb +56 -0
  41. data/lib/postgres_upsert/writer.rb +130 -92
  42. data/postgres_upsert.gemspec +7 -4
  43. data/spec/composite_key_spec.rb +50 -0
  44. data/spec/fixtures/comma_with_header_duplicate.csv +3 -0
  45. data/spec/fixtures/composite_key_model.rb +4 -0
  46. data/spec/fixtures/composite_key_with_header.csv +3 -0
  47. data/spec/fixtures/composite_nonkey_with_header.csv +3 -0
  48. data/spec/fixtures/test_model_copy.rb +4 -0
  49. data/spec/from_table_spec.rb +40 -0
  50. data/spec/pg_upsert_csv_spec.rb +93 -35
  51. data/spec/rails_helper.rb +1 -0
  52. data/spec/spec_helper.rb +9 -37
  53. metadata +106 -37
  54. data/VERSION +0 -1
  55. data/lib/postgres_upsert/active_record.rb +0 -13
  56. data/spec/fixtures/2_col_binary_data.dat +0 -0
  57. data/spec/pg_upsert_binary_spec.rb +0 -35
  58. data/spec/spec.opts +0 -1
@@ -1,60 +1,89 @@
1
1
  module PostgresUpsert
2
-
3
2
  class Writer
4
3
 
5
- def initialize(klass, source, options = {})
4
+ def initialize(klass, destination, source, options = {})
6
5
  @klass = klass
6
+ @destination = destination
7
+ @source = source
7
8
  @options = options.reverse_merge({
8
- :delimiter => ",",
9
- :format => :csv,
10
- :header => true,
11
- :key_column => @klass.primary_key,
12
- :update_only => false})
13
- @source = source.instance_of?(String) ? File.open(source, 'r') : source
14
- @columns_list = get_columns
15
- generate_temp_table_name
9
+ delimiter: ',',
10
+ header: true,
11
+ unique_key: [primary_key],
12
+ update_only: false
13
+ })
14
+ @source = source
15
+ @options[:unique_key] = Array.wrap(@options[:unique_key])
16
+
16
17
  end
17
18
 
18
19
  def write
19
- if @columns_list.empty?
20
- raise "Either the :columns option or :header => true are required"
20
+ validate_options
21
+
22
+
23
+ create_temp_table
24
+
25
+ if @source.continuous_write_enabled
26
+ write_continuous
27
+ else
28
+ write_batched
21
29
  end
22
30
 
23
- csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
31
+ upsert_from_temp_table
32
+ drop_temp_table
24
33
 
25
- copy_table = @temp_table_name
26
- destination_table = get_table_name
34
+ summarize_results
35
+ end
27
36
 
28
- columns_string = columns_string_for_copy
29
- create_temp_table
37
+ private
30
38
 
31
- ActiveRecord::Base.connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{csv_options}} do
39
+ def write_continuous
40
+ csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
41
+ @copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN #{csv_options}} do
42
+ while (line = @source.gets)
43
+ next if line.strip.empty?
32
44
 
33
- while line = read_input_line do
34
- next if line.strip.size == 0
35
- ActiveRecord::Base.connection.raw_connection.put_copy_data line
45
+ database_connection.raw_connection.put_copy_data line
36
46
  end
37
47
  end
48
+ end
38
49
 
39
- if destination_table
40
- upsert_from_temp_table
41
- drop_temp_table
50
+ def write_batched
51
+ @source.gets do |line|
52
+ @copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN} do
53
+ database_connection.raw_connection.put_copy_data line
54
+ end
42
55
  end
43
56
  end
44
57
 
45
- private
58
+ def database_connection
59
+ @destination.database_connection
60
+ end
46
61
 
47
- def get_columns
48
- columns_list = @options[:columns] || []
49
- if @options[:format] != :binary && @options[:header]
50
- #if header is present, we need to strip it from io, whether we use it for the columns list or not.
51
- line = @source.gets
52
- if columns_list.empty?
53
- columns_list = line.strip.split(@options[:delimiter])
54
- end
62
+ def summarize_results
63
+ result = PostgresUpsert::Result.new(@insert_result, @update_result, @copy_result)
64
+ expected_rows = @options[:update_only] ? result.updated_rows : result.copied_rows
65
+
66
+ if result.changed_rows != expected_rows
67
+ raise "#{expected_rows} rows were copied, but #{result.changed_rows} were upserted to destination table. Check to make sure your key is unique."
55
68
  end
56
- columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
57
- return columns_list
69
+
70
+ result
71
+ end
72
+
73
+ def primary_key
74
+ @destination.primary_key
75
+ end
76
+
77
+ def destination_columns
78
+ @destination.column_names
79
+ end
80
+
81
+ def quoted_table_name
82
+ @destination.quoted_table_name
83
+ end
84
+
85
+ def source_columns
86
+ @source.columns
58
87
  end
59
88
 
60
89
  def columns_string_for_copy
@@ -63,60 +92,50 @@ module PostgresUpsert
63
92
  end
64
93
 
65
94
  def columns_string_for_select
66
- columns = @columns_list.clone
67
- columns << "created_at" if @klass.column_names.include?("created_at")
68
- columns << "updated_at" if @klass.column_names.include?("updated_at")
69
- str = get_columns_string(columns)
95
+ columns = source_columns.clone
96
+ columns << 'created_at' if inject_create_timestamp?
97
+ columns << 'updated_at' if inject_update_timestamp?
98
+ get_columns_string(columns)
70
99
  end
71
100
 
72
101
  def columns_string_for_insert
73
- columns = @columns_list.clone
74
- columns << "created_at" if @klass.column_names.include?("created_at")
75
- columns << "updated_at" if @klass.column_names.include?("updated_at")
76
- str = get_columns_string(columns)
102
+ columns = source_columns.clone
103
+ columns << 'created_at' if inject_create_timestamp?
104
+ columns << 'updated_at' if inject_update_timestamp?
105
+ get_columns_string(columns)
77
106
  end
78
107
 
79
108
  def select_string_for_insert
80
- columns = @columns_list.clone
109
+ columns = source_columns.clone
81
110
  str = get_columns_string(columns)
82
- str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("created_at")
83
- str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
111
+ str << ",'#{DateTime.now.utc}'" if inject_create_timestamp?
112
+ str << ",'#{DateTime.now.utc}'" if inject_update_timestamp?
84
113
  str
85
114
  end
86
115
 
87
- def select_string_for_create
88
- columns = @columns_list.map(&:to_sym)
89
- columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
90
- get_columns_string(columns)
116
+ def inject_create_timestamp?
117
+ destination_columns.include?('created_at') && !source_columns.include?('created_at')
91
118
  end
92
119
 
93
- def get_columns_string(columns = nil)
94
- columns ||= @columns_list
95
- columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
120
+ def inject_update_timestamp?
121
+ destination_columns.include?('updated_at') && !source_columns.include?('updated_at')
96
122
  end
97
123
 
98
- def get_table_name
99
- if @options[:table]
100
- connection.quote_table_name(@options[:table])
101
- else
102
- @klass.quoted_table_name
124
+ def select_string_for_create
125
+ columns = source_columns.map(&:to_sym)
126
+ @options[:unique_key].each do |key_component|
127
+ columns << key_component.to_sym unless columns.include?(key_component.to_sym)
103
128
  end
129
+ get_columns_string(columns)
104
130
  end
105
131
 
106
- def generate_temp_table_name
107
- @temp_table_name = "#{@klass.table_name}_temp_#{rand(1000)}"
132
+ def get_columns_string(columns = nil)
133
+ columns ||= source_columns
134
+ !columns.empty? ? "\"#{columns.join('","')}\"" : ''
108
135
  end
109
136
 
110
- def read_input_line
111
- if @options[:format] == :binary
112
- begin
113
- return @source.readpartial(10240)
114
- rescue EOFError
115
- end
116
- else
117
- line = @source.gets
118
- return line
119
- end
137
+ def generate_temp_table_name
138
+ @temp_table_name ||= "#{@table_name}_temp_#{rand(1000)}"
120
139
  end
121
140
 
122
141
  def upsert_from_temp_table
@@ -125,55 +144,74 @@ module PostgresUpsert
125
144
  end
126
145
 
127
146
  def update_from_temp_table
128
- ActiveRecord::Base.connection.execute <<-SQL
129
- UPDATE #{get_table_name} AS d
147
+ @update_result = database_connection.execute <<-SQL
148
+ UPDATE #{quoted_table_name} AS d
130
149
  #{update_set_clause}
131
150
  FROM #{@temp_table_name} as t
132
- WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
133
- AND d.#{@options[:key_column]} IS NOT NULL;
151
+ WHERE #{unique_key_select('t', 'd')}
152
+ AND #{unique_key_present('d')}
134
153
  SQL
135
154
  end
136
155
 
137
156
  def update_set_clause
138
- command = @columns_list.map do |col|
157
+ command = source_columns.map do |col|
139
158
  "\"#{col}\" = t.\"#{col}\""
140
159
  end
141
- command << "\"updated_at\" = '#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
142
- "SET #{command.join(',')}"
160
+ unless source_columns.include?('updated_at')
161
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if destination_columns.include?('updated_at')
162
+ end
163
+ "SET #{command.join(',')}"
143
164
  end
144
165
 
145
166
  def insert_from_temp_table
146
167
  columns_string = columns_string_for_insert
147
168
  select_string = select_string_for_insert
148
- ActiveRecord::Base.connection.execute <<-SQL
149
- INSERT INTO #{get_table_name} (#{columns_string})
169
+ @insert_result = database_connection.execute <<-SQL
170
+ INSERT INTO #{quoted_table_name} (#{columns_string})
150
171
  SELECT #{select_string}
151
172
  FROM #{@temp_table_name} as t
152
- WHERE NOT EXISTS
153
- (SELECT 1
154
- FROM #{get_table_name} as d
155
- WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
156
- AND t.#{@options[:key_column]} IS NOT NULL;
173
+ WHERE NOT EXISTS
174
+ (SELECT 1
175
+ FROM #{quoted_table_name} as d
176
+ WHERE #{unique_key_select('t', 'd')});
157
177
  SQL
158
178
  end
159
179
 
180
+ def unique_key_select(source, dest)
181
+ @options[:unique_key].map { |field| "#{source}.#{field} = #{dest}.#{field}" }.join(' AND ')
182
+ end
183
+
184
+ def unique_key_present(source)
185
+ @options[:unique_key].map { |field| "#{source}.#{field} IS NOT NULL" }.join(' AND ')
186
+ end
187
+
160
188
  def create_temp_table
161
- columns_string = select_string_for_create
162
- ActiveRecord::Base.connection.execute <<-SQL
189
+ generate_temp_table_name
190
+ database_connection.execute <<-SQL
163
191
  SET client_min_messages=WARNING;
164
192
  DROP TABLE IF EXISTS #{@temp_table_name};
165
193
 
166
- CREATE TEMP TABLE #{@temp_table_name}
167
- AS SELECT #{columns_string} FROM #{get_table_name} WHERE 0 = 1;
194
+ CREATE TEMP TABLE #{@temp_table_name}
195
+ AS SELECT #{select_string_for_create} FROM #{quoted_table_name} WHERE 0 = 1;
168
196
  SQL
169
197
  end
170
198
 
199
+ def validate_options
200
+ if source_columns.empty?
201
+ raise 'Either the :columns option or :header => true are required'
202
+ end
203
+
204
+ @options[:unique_key].each do |key_component|
205
+ unless source_columns.include?(key_component.to_s)
206
+ raise "Expected column '#{key_component}' was not found in source"
207
+ end
208
+ end
209
+ end
210
+
171
211
  def drop_temp_table
172
- ActiveRecord::Base.connection.execute <<-SQL
173
- DROP TABLE #{@temp_table_name}
212
+ database_connection.execute <<-SQL
213
+ DROP TABLE #{@temp_table_name}
174
214
  SQL
175
215
  end
176
216
  end
177
-
178
-
179
217
  end
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "postgres_upsert"
8
- s.version = "2.0.0"
8
+ s.version = "5.1.0"
9
9
 
10
10
  s.platform = Gem::Platform::RUBY
11
11
  s.required_ruby_version = ">= 1.8.7"
@@ -13,6 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.date = "2014-09-12"
14
14
  s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
15
15
  s.email = "thestevemitchell@gmail.com"
16
+ s.license = "MIT"
16
17
  git_files = `git ls-files`.split("\n") rescue ''
17
18
  s.files = git_files
18
19
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -22,12 +23,14 @@ Gem::Specification.new do |s|
22
23
  s.require_paths = ["lib"]
23
24
  s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
24
25
 
25
- s.add_dependency "pg", '~> 0.17.0'
26
+ s.add_dependency "pg", '>= 0.17.0'
26
27
  s.add_dependency "activerecord", '>= 3.0.0'
27
28
  s.add_dependency "rails", '>= 3.0.0'
28
29
  s.add_development_dependency "bundler"
29
- s.add_development_dependency "rdoc"
30
30
  s.add_development_dependency "pry-rails"
31
- s.add_development_dependency "rspec", "~> 2.12"
31
+ s.add_development_dependency "pry-nav"
32
+ s.add_development_dependency "rspec-rails", ">= 3.9"
33
+ s.add_development_dependency "database_cleaner-active_record"
34
+ s.add_development_dependency "rubocop"
32
35
  end
33
36
 
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with CSV format" do
4
+
5
+ before do
6
+ DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
7
+ end
8
+
9
+ def timestamp
10
+ DateTime.now.utc
11
+ end
12
+
13
+ context 'composite_key_support' do
14
+ it 'inserts records if the passed match composite key doesnt exist' do
15
+ file = File.open(File.expand_path('spec/fixtures/composite_key_with_header.csv'), 'r')
16
+
17
+ PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
18
+ expect(
19
+ CompositeKeyModel.last.attributes
20
+ ).to include("data" => "test data 2")
21
+ end
22
+
23
+ it 'updates records if the passed composite key exists' do
24
+ file = File.open(File.expand_path('spec/fixtures/composite_key_with_header.csv'), 'r')
25
+ existing = CompositeKeyModel.create(comp_key_1: 2, comp_key_2:3, data: "old stuff")
26
+
27
+ PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
28
+
29
+ expect(
30
+ CompositeKeyModel.find_by({comp_key_1: 2, comp_key_2:3}).attributes
31
+ ).to include("data" => "test data 2")
32
+
33
+ expect(
34
+ CompositeKeyModel.find_by({comp_key_1: 1, comp_key_2:2}).attributes
35
+ ).to include("data" => "test data 1")
36
+ end
37
+
38
+ it 'fails if composite keys are not unique.' do
39
+ file = File.open(File.expand_path('spec/fixtures/composite_nonkey_with_header.csv'), 'r')
40
+ existing = CompositeKeyModel.create(comp_key_1: 1, comp_key_2:2, data: "old stuff")
41
+
42
+ expect{
43
+ PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
44
+ }.to raise_error(/Check to make sure your key is unique/)
45
+ end
46
+
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,3 @@
1
+ id,data
2
+ 1,test data 1
3
+ 1,test data dupe
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class CompositeKeyModel < ActiveRecord::Base
4
+ end
@@ -0,0 +1,3 @@
1
+ comp_key_1,comp_key_2,data
2
+ 1,2,test data 1
3
+ 2,3,test data 2
@@ -0,0 +1,3 @@
1
+ comp_key_1,comp_key_2,data
2
+ 1,2,test data 1
3
+ 1,2,test data 4
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModelCopy < ActiveRecord::Base
4
+ end
@@ -0,0 +1,40 @@
1
+ require "rails_helper"
2
+
3
+ describe PostgresUpsert do
4
+ context "when passing ActiveRecord class as destination" do
5
+ context "when passing ActiveRecord clas as Source" do
6
+ let(:original_created_at) {5.days.ago.utc}
7
+
8
+ before(:each) do
9
+ TestModel.create(data: "From the before time, in the long long ago", :created_at => original_created_at)
10
+ end
11
+
12
+ it "copies the source to destination" do
13
+ PostgresUpsert.write TestModelCopy, TestModel
14
+ expect(
15
+ TestModelCopy.first.attributes
16
+ ).to eq(TestModelCopy.first.attributes)
17
+ end
18
+
19
+ context "with a large table" do
20
+ before do
21
+ csv_string = CSV.generate do |csv|
22
+ csv << %w(id data) # CSV header row
23
+ (1..100_000).each do |n|
24
+ csv << ["#{n}", "data about #{n}"]
25
+ end
26
+ end
27
+ io = StringIO.new(csv_string)
28
+ PostgresUpsert.write TestModel, io
29
+ end
30
+
31
+ it "moves like the poop through a goose" do
32
+ expect{
33
+ PostgresUpsert.write TestModelCopy, TestModel
34
+ }.to change{TestModelCopy.count}.by(100_000)
35
+
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end