postgres_upsert 2.0.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +3 -0
- data/.rubocop.yml +57 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +20 -0
- data/Gemfile +1 -2
- data/Gemfile.lock +175 -85
- data/README.md +117 -41
- data/Rakefile +4 -16
- data/app/assets/config/manifest.js +0 -0
- data/bin/bundle +3 -0
- data/bin/rails +4 -0
- data/bin/rake +4 -0
- data/bin/setup +56 -0
- data/config.ru +4 -0
- data/config/application.rb +21 -0
- data/config/boot.rb +3 -0
- data/config/database.yml +24 -0
- data/config/database.yml.travis +23 -0
- data/config/environment.rb +5 -0
- data/config/environments/development.rb +41 -0
- data/config/environments/production.rb +79 -0
- data/config/environments/test.rb +42 -0
- data/config/locales/en.yml +23 -0
- data/config/routes.rb +56 -0
- data/config/secrets.yml +22 -0
- data/db/migrate/20150214192135_create_test_tables.rb +24 -0
- data/db/migrate/20150710162236_create_composite_models_table.rb +9 -0
- data/db/schema.rb +48 -0
- data/db/seeds.rb +7 -0
- data/lib/postgres_upsert.rb +38 -6
- data/lib/postgres_upsert/model_to_model_adapter.rb +37 -0
- data/lib/postgres_upsert/read_adapters/active_record_adapter.rb +37 -0
- data/lib/postgres_upsert/read_adapters/file_adapter.rb +42 -0
- data/lib/postgres_upsert/read_adapters/io_adapter.rb +42 -0
- data/lib/postgres_upsert/result.rb +23 -0
- data/lib/postgres_upsert/table_writer.rb +48 -0
- data/lib/postgres_upsert/write_adapters/active_record_adapter.rb +36 -0
- data/lib/postgres_upsert/write_adapters/table_adapter.rb +56 -0
- data/lib/postgres_upsert/writer.rb +130 -92
- data/postgres_upsert.gemspec +7 -4
- data/spec/composite_key_spec.rb +50 -0
- data/spec/fixtures/comma_with_header_duplicate.csv +3 -0
- data/spec/fixtures/composite_key_model.rb +4 -0
- data/spec/fixtures/composite_key_with_header.csv +3 -0
- data/spec/fixtures/composite_nonkey_with_header.csv +3 -0
- data/spec/fixtures/test_model_copy.rb +4 -0
- data/spec/from_table_spec.rb +40 -0
- data/spec/pg_upsert_csv_spec.rb +93 -35
- data/spec/rails_helper.rb +1 -0
- data/spec/spec_helper.rb +9 -37
- metadata +106 -37
- data/VERSION +0 -1
- data/lib/postgres_upsert/active_record.rb +0 -13
- data/spec/fixtures/2_col_binary_data.dat +0 -0
- data/spec/pg_upsert_binary_spec.rb +0 -35
- data/spec/spec.opts +0 -1
@@ -1,60 +1,89 @@
|
|
1
1
|
module PostgresUpsert
|
2
|
-
|
3
2
|
class Writer
|
4
3
|
|
5
|
-
def initialize(klass, source, options = {})
|
4
|
+
def initialize(klass, destination, source, options = {})
|
6
5
|
@klass = klass
|
6
|
+
@destination = destination
|
7
|
+
@source = source
|
7
8
|
@options = options.reverse_merge({
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
|
13
|
-
@source = source
|
14
|
-
@
|
15
|
-
|
9
|
+
delimiter: ',',
|
10
|
+
header: true,
|
11
|
+
unique_key: [primary_key],
|
12
|
+
update_only: false
|
13
|
+
})
|
14
|
+
@source = source
|
15
|
+
@options[:unique_key] = Array.wrap(@options[:unique_key])
|
16
|
+
|
16
17
|
end
|
17
18
|
|
18
19
|
def write
|
19
|
-
|
20
|
-
|
20
|
+
validate_options
|
21
|
+
|
22
|
+
|
23
|
+
create_temp_table
|
24
|
+
|
25
|
+
if @source.continuous_write_enabled
|
26
|
+
write_continuous
|
27
|
+
else
|
28
|
+
write_batched
|
21
29
|
end
|
22
30
|
|
23
|
-
|
31
|
+
upsert_from_temp_table
|
32
|
+
drop_temp_table
|
24
33
|
|
25
|
-
|
26
|
-
|
34
|
+
summarize_results
|
35
|
+
end
|
27
36
|
|
28
|
-
|
29
|
-
create_temp_table
|
37
|
+
private
|
30
38
|
|
31
|
-
|
39
|
+
def write_continuous
|
40
|
+
csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
|
41
|
+
@copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN #{csv_options}} do
|
42
|
+
while (line = @source.gets)
|
43
|
+
next if line.strip.empty?
|
32
44
|
|
33
|
-
|
34
|
-
next if line.strip.size == 0
|
35
|
-
ActiveRecord::Base.connection.raw_connection.put_copy_data line
|
45
|
+
database_connection.raw_connection.put_copy_data line
|
36
46
|
end
|
37
47
|
end
|
48
|
+
end
|
38
49
|
|
39
|
-
|
40
|
-
|
41
|
-
|
50
|
+
def write_batched
|
51
|
+
@source.gets do |line|
|
52
|
+
@copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN} do
|
53
|
+
database_connection.raw_connection.put_copy_data line
|
54
|
+
end
|
42
55
|
end
|
43
56
|
end
|
44
57
|
|
45
|
-
|
58
|
+
def database_connection
|
59
|
+
@destination.database_connection
|
60
|
+
end
|
46
61
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
columns_list = line.strip.split(@options[:delimiter])
|
54
|
-
end
|
62
|
+
def summarize_results
|
63
|
+
result = PostgresUpsert::Result.new(@insert_result, @update_result, @copy_result)
|
64
|
+
expected_rows = @options[:update_only] ? result.updated_rows : result.copied_rows
|
65
|
+
|
66
|
+
if result.changed_rows != expected_rows
|
67
|
+
raise "#{expected_rows} rows were copied, but #{result.changed_rows} were upserted to destination table. Check to make sure your key is unique."
|
55
68
|
end
|
56
|
-
|
57
|
-
|
69
|
+
|
70
|
+
result
|
71
|
+
end
|
72
|
+
|
73
|
+
def primary_key
|
74
|
+
@destination.primary_key
|
75
|
+
end
|
76
|
+
|
77
|
+
def destination_columns
|
78
|
+
@destination.column_names
|
79
|
+
end
|
80
|
+
|
81
|
+
def quoted_table_name
|
82
|
+
@destination.quoted_table_name
|
83
|
+
end
|
84
|
+
|
85
|
+
def source_columns
|
86
|
+
@source.columns
|
58
87
|
end
|
59
88
|
|
60
89
|
def columns_string_for_copy
|
@@ -63,60 +92,50 @@ module PostgresUpsert
|
|
63
92
|
end
|
64
93
|
|
65
94
|
def columns_string_for_select
|
66
|
-
columns =
|
67
|
-
columns <<
|
68
|
-
columns <<
|
69
|
-
|
95
|
+
columns = source_columns.clone
|
96
|
+
columns << 'created_at' if inject_create_timestamp?
|
97
|
+
columns << 'updated_at' if inject_update_timestamp?
|
98
|
+
get_columns_string(columns)
|
70
99
|
end
|
71
100
|
|
72
101
|
def columns_string_for_insert
|
73
|
-
columns =
|
74
|
-
columns <<
|
75
|
-
columns <<
|
76
|
-
|
102
|
+
columns = source_columns.clone
|
103
|
+
columns << 'created_at' if inject_create_timestamp?
|
104
|
+
columns << 'updated_at' if inject_update_timestamp?
|
105
|
+
get_columns_string(columns)
|
77
106
|
end
|
78
107
|
|
79
108
|
def select_string_for_insert
|
80
|
-
columns =
|
109
|
+
columns = source_columns.clone
|
81
110
|
str = get_columns_string(columns)
|
82
|
-
str << ",'#{DateTime.now.utc}'" if
|
83
|
-
str << ",'#{DateTime.now.utc}'" if
|
111
|
+
str << ",'#{DateTime.now.utc}'" if inject_create_timestamp?
|
112
|
+
str << ",'#{DateTime.now.utc}'" if inject_update_timestamp?
|
84
113
|
str
|
85
114
|
end
|
86
115
|
|
87
|
-
def
|
88
|
-
|
89
|
-
columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
|
90
|
-
get_columns_string(columns)
|
116
|
+
def inject_create_timestamp?
|
117
|
+
destination_columns.include?('created_at') && !source_columns.include?('created_at')
|
91
118
|
end
|
92
119
|
|
93
|
-
def
|
94
|
-
|
95
|
-
columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
|
120
|
+
def inject_update_timestamp?
|
121
|
+
destination_columns.include?('updated_at') && !source_columns.include?('updated_at')
|
96
122
|
end
|
97
123
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
@klass.quoted_table_name
|
124
|
+
def select_string_for_create
|
125
|
+
columns = source_columns.map(&:to_sym)
|
126
|
+
@options[:unique_key].each do |key_component|
|
127
|
+
columns << key_component.to_sym unless columns.include?(key_component.to_sym)
|
103
128
|
end
|
129
|
+
get_columns_string(columns)
|
104
130
|
end
|
105
131
|
|
106
|
-
def
|
107
|
-
|
132
|
+
def get_columns_string(columns = nil)
|
133
|
+
columns ||= source_columns
|
134
|
+
!columns.empty? ? "\"#{columns.join('","')}\"" : ''
|
108
135
|
end
|
109
136
|
|
110
|
-
def
|
111
|
-
|
112
|
-
begin
|
113
|
-
return @source.readpartial(10240)
|
114
|
-
rescue EOFError
|
115
|
-
end
|
116
|
-
else
|
117
|
-
line = @source.gets
|
118
|
-
return line
|
119
|
-
end
|
137
|
+
def generate_temp_table_name
|
138
|
+
@temp_table_name ||= "#{@table_name}_temp_#{rand(1000)}"
|
120
139
|
end
|
121
140
|
|
122
141
|
def upsert_from_temp_table
|
@@ -125,55 +144,74 @@ module PostgresUpsert
|
|
125
144
|
end
|
126
145
|
|
127
146
|
def update_from_temp_table
|
128
|
-
|
129
|
-
UPDATE #{
|
147
|
+
@update_result = database_connection.execute <<-SQL
|
148
|
+
UPDATE #{quoted_table_name} AS d
|
130
149
|
#{update_set_clause}
|
131
150
|
FROM #{@temp_table_name} as t
|
132
|
-
WHERE t
|
133
|
-
AND d
|
151
|
+
WHERE #{unique_key_select('t', 'd')}
|
152
|
+
AND #{unique_key_present('d')}
|
134
153
|
SQL
|
135
154
|
end
|
136
155
|
|
137
156
|
def update_set_clause
|
138
|
-
command =
|
157
|
+
command = source_columns.map do |col|
|
139
158
|
"\"#{col}\" = t.\"#{col}\""
|
140
159
|
end
|
141
|
-
|
142
|
-
|
160
|
+
unless source_columns.include?('updated_at')
|
161
|
+
command << "\"updated_at\" = '#{DateTime.now.utc}'" if destination_columns.include?('updated_at')
|
162
|
+
end
|
163
|
+
"SET #{command.join(',')}"
|
143
164
|
end
|
144
165
|
|
145
166
|
def insert_from_temp_table
|
146
167
|
columns_string = columns_string_for_insert
|
147
168
|
select_string = select_string_for_insert
|
148
|
-
|
149
|
-
INSERT INTO #{
|
169
|
+
@insert_result = database_connection.execute <<-SQL
|
170
|
+
INSERT INTO #{quoted_table_name} (#{columns_string})
|
150
171
|
SELECT #{select_string}
|
151
172
|
FROM #{@temp_table_name} as t
|
152
|
-
WHERE NOT EXISTS
|
153
|
-
(SELECT 1
|
154
|
-
FROM #{
|
155
|
-
WHERE
|
156
|
-
AND t.#{@options[:key_column]} IS NOT NULL;
|
173
|
+
WHERE NOT EXISTS
|
174
|
+
(SELECT 1
|
175
|
+
FROM #{quoted_table_name} as d
|
176
|
+
WHERE #{unique_key_select('t', 'd')});
|
157
177
|
SQL
|
158
178
|
end
|
159
179
|
|
180
|
+
def unique_key_select(source, dest)
|
181
|
+
@options[:unique_key].map { |field| "#{source}.#{field} = #{dest}.#{field}" }.join(' AND ')
|
182
|
+
end
|
183
|
+
|
184
|
+
def unique_key_present(source)
|
185
|
+
@options[:unique_key].map { |field| "#{source}.#{field} IS NOT NULL" }.join(' AND ')
|
186
|
+
end
|
187
|
+
|
160
188
|
def create_temp_table
|
161
|
-
|
162
|
-
|
189
|
+
generate_temp_table_name
|
190
|
+
database_connection.execute <<-SQL
|
163
191
|
SET client_min_messages=WARNING;
|
164
192
|
DROP TABLE IF EXISTS #{@temp_table_name};
|
165
193
|
|
166
|
-
CREATE TEMP TABLE #{@temp_table_name}
|
167
|
-
AS SELECT #{
|
194
|
+
CREATE TEMP TABLE #{@temp_table_name}
|
195
|
+
AS SELECT #{select_string_for_create} FROM #{quoted_table_name} WHERE 0 = 1;
|
168
196
|
SQL
|
169
197
|
end
|
170
198
|
|
199
|
+
def validate_options
|
200
|
+
if source_columns.empty?
|
201
|
+
raise 'Either the :columns option or :header => true are required'
|
202
|
+
end
|
203
|
+
|
204
|
+
@options[:unique_key].each do |key_component|
|
205
|
+
unless source_columns.include?(key_component.to_s)
|
206
|
+
raise "Expected column '#{key_component}' was not found in source"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
|
171
211
|
def drop_temp_table
|
172
|
-
|
173
|
-
DROP TABLE #{@temp_table_name}
|
212
|
+
database_connection.execute <<-SQL
|
213
|
+
DROP TABLE #{@temp_table_name}
|
174
214
|
SQL
|
175
215
|
end
|
176
216
|
end
|
177
|
-
|
178
|
-
|
179
217
|
end
|
data/postgres_upsert.gemspec
CHANGED
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "postgres_upsert"
|
8
|
-
s.version = "
|
8
|
+
s.version = "5.1.0"
|
9
9
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
11
11
|
s.required_ruby_version = ">= 1.8.7"
|
@@ -13,6 +13,7 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.date = "2014-09-12"
|
14
14
|
s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
|
15
15
|
s.email = "thestevemitchell@gmail.com"
|
16
|
+
s.license = "MIT"
|
16
17
|
git_files = `git ls-files`.split("\n") rescue ''
|
17
18
|
s.files = git_files
|
18
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
@@ -22,12 +23,14 @@ Gem::Specification.new do |s|
|
|
22
23
|
s.require_paths = ["lib"]
|
23
24
|
s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
|
24
25
|
|
25
|
-
s.add_dependency "pg", '
|
26
|
+
s.add_dependency "pg", '>= 0.17.0'
|
26
27
|
s.add_dependency "activerecord", '>= 3.0.0'
|
27
28
|
s.add_dependency "rails", '>= 3.0.0'
|
28
29
|
s.add_development_dependency "bundler"
|
29
|
-
s.add_development_dependency "rdoc"
|
30
30
|
s.add_development_dependency "pry-rails"
|
31
|
-
s.add_development_dependency "
|
31
|
+
s.add_development_dependency "pry-nav"
|
32
|
+
s.add_development_dependency "rspec-rails", ">= 3.9"
|
33
|
+
s.add_development_dependency "database_cleaner-active_record"
|
34
|
+
s.add_development_dependency "rubocop"
|
32
35
|
end
|
33
36
|
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "pg_upsert from file with CSV format" do
|
4
|
+
|
5
|
+
before do
|
6
|
+
DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
|
7
|
+
end
|
8
|
+
|
9
|
+
def timestamp
|
10
|
+
DateTime.now.utc
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'composite_key_support' do
|
14
|
+
it 'inserts records if the passed match composite key doesnt exist' do
|
15
|
+
file = File.open(File.expand_path('spec/fixtures/composite_key_with_header.csv'), 'r')
|
16
|
+
|
17
|
+
PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
|
18
|
+
expect(
|
19
|
+
CompositeKeyModel.last.attributes
|
20
|
+
).to include("data" => "test data 2")
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'updates records if the passed composite key exists' do
|
24
|
+
file = File.open(File.expand_path('spec/fixtures/composite_key_with_header.csv'), 'r')
|
25
|
+
existing = CompositeKeyModel.create(comp_key_1: 2, comp_key_2:3, data: "old stuff")
|
26
|
+
|
27
|
+
PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
|
28
|
+
|
29
|
+
expect(
|
30
|
+
CompositeKeyModel.find_by({comp_key_1: 2, comp_key_2:3}).attributes
|
31
|
+
).to include("data" => "test data 2")
|
32
|
+
|
33
|
+
expect(
|
34
|
+
CompositeKeyModel.find_by({comp_key_1: 1, comp_key_2:2}).attributes
|
35
|
+
).to include("data" => "test data 1")
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'fails if composite keys are not unique.' do
|
39
|
+
file = File.open(File.expand_path('spec/fixtures/composite_nonkey_with_header.csv'), 'r')
|
40
|
+
existing = CompositeKeyModel.create(comp_key_1: 1, comp_key_2:2, data: "old stuff")
|
41
|
+
|
42
|
+
expect{
|
43
|
+
PostgresUpsert.write(CompositeKeyModel, file, :unique_key => ["comp_key_1", "comp_key_2"])
|
44
|
+
}.to raise_error(/Check to make sure your key is unique/)
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require "rails_helper"
|
2
|
+
|
3
|
+
describe PostgresUpsert do
|
4
|
+
context "when passing ActiveRecord class as destination" do
|
5
|
+
context "when passing ActiveRecord clas as Source" do
|
6
|
+
let(:original_created_at) {5.days.ago.utc}
|
7
|
+
|
8
|
+
before(:each) do
|
9
|
+
TestModel.create(data: "From the before time, in the long long ago", :created_at => original_created_at)
|
10
|
+
end
|
11
|
+
|
12
|
+
it "copies the source to destination" do
|
13
|
+
PostgresUpsert.write TestModelCopy, TestModel
|
14
|
+
expect(
|
15
|
+
TestModelCopy.first.attributes
|
16
|
+
).to eq(TestModelCopy.first.attributes)
|
17
|
+
end
|
18
|
+
|
19
|
+
context "with a large table" do
|
20
|
+
before do
|
21
|
+
csv_string = CSV.generate do |csv|
|
22
|
+
csv << %w(id data) # CSV header row
|
23
|
+
(1..100_000).each do |n|
|
24
|
+
csv << ["#{n}", "data about #{n}"]
|
25
|
+
end
|
26
|
+
end
|
27
|
+
io = StringIO.new(csv_string)
|
28
|
+
PostgresUpsert.write TestModel, io
|
29
|
+
end
|
30
|
+
|
31
|
+
it "moves like the poop through a goose" do
|
32
|
+
expect{
|
33
|
+
PostgresUpsert.write TestModelCopy, TestModel
|
34
|
+
}.to change{TestModelCopy.count}.by(100_000)
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|