postgres_upsert 5.0.0 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/.rubocop.yml +57 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +18 -3
- data/Gemfile +1 -2
- data/Gemfile.lock +171 -109
- data/README.md +8 -5
- data/app/assets/config/manifest.js +0 -0
- data/bin/setup +27 -0
- data/config/application.rb +0 -3
- data/config/database.yml +2 -0
- data/config/database.yml.travis +23 -0
- data/db/migrate/20150214192135_create_test_tables.rb +6 -1
- data/db/migrate/20150710162236_create_composite_models_table.rb +1 -1
- data/db/schema.rb +22 -17
- data/lib/postgres_upsert.rb +35 -6
- data/lib/postgres_upsert/model_to_model_adapter.rb +37 -0
- data/lib/postgres_upsert/read_adapters/active_record_adapter.rb +37 -0
- data/lib/postgres_upsert/read_adapters/file_adapter.rb +42 -0
- data/lib/postgres_upsert/read_adapters/io_adapter.rb +42 -0
- data/lib/postgres_upsert/result.rb +3 -4
- data/lib/postgres_upsert/table_writer.rb +7 -9
- data/lib/postgres_upsert/write_adapters/active_record_adapter.rb +36 -0
- data/lib/postgres_upsert/write_adapters/table_adapter.rb +56 -0
- data/lib/postgres_upsert/writer.rb +87 -69
- data/postgres_upsert.gemspec +6 -3
- data/spec/composite_key_spec.rb +0 -6
- data/spec/fixtures/comma_with_header_duplicate.csv +3 -0
- data/spec/fixtures/test_model_copy.rb +4 -0
- data/spec/from_table_spec.rb +40 -0
- data/spec/pg_upsert_csv_spec.rb +11 -10
- data/spec/rails_helper.rb +1 -0
- data/spec/spec_helper.rb +5 -2
- metadata +77 -31
- data/VERSION +0 -1
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
module PostgresUpsert
|
|
2
|
+
module WriteAdapters
|
|
3
|
+
class ActiveRecordAdapter
|
|
4
|
+
def initialize(destination, options)
|
|
5
|
+
@destination = destination
|
|
6
|
+
@options = sanitize_options(options)
|
|
7
|
+
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def sanitize_options(options)
|
|
11
|
+
options.slice(
|
|
12
|
+
:delimiter, :unique_key
|
|
13
|
+
).reverse_merge(
|
|
14
|
+
delimiter: ',',
|
|
15
|
+
unique_key: [primary_key],
|
|
16
|
+
)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def database_connection
|
|
20
|
+
@destination.connection
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def primary_key
|
|
24
|
+
@destination.primary_key
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def column_names
|
|
28
|
+
@destination.column_names
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def quoted_table_name
|
|
32
|
+
@destination.quoted_table_name
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
module PostgresUpsert
|
|
2
|
+
module WriteAdapters
|
|
3
|
+
class TableAdapter
|
|
4
|
+
def initialize(destination, options)
|
|
5
|
+
@destination = destination
|
|
6
|
+
@options = sanitize_options(options)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def sanitize_options(options)
|
|
10
|
+
options.slice(
|
|
11
|
+
:delimiter, :unique_key
|
|
12
|
+
).reverse_merge(
|
|
13
|
+
delimiter: ',',
|
|
14
|
+
unique_key: [primary_key],
|
|
15
|
+
)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def database_connection
|
|
19
|
+
ActiveRecord::Base.connection
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def primary_key
|
|
23
|
+
@primary_key ||= begin
|
|
24
|
+
query = <<-SELECT_KEY
|
|
25
|
+
SELECT
|
|
26
|
+
pg_attribute.attname,
|
|
27
|
+
format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
|
|
28
|
+
FROM pg_index, pg_class, pg_attribute
|
|
29
|
+
WHERE
|
|
30
|
+
pg_class.oid = '#{@destination}'::regclass AND
|
|
31
|
+
indrelid = pg_class.oid AND
|
|
32
|
+
pg_attribute.attrelid = pg_class.oid AND
|
|
33
|
+
pg_attribute.attnum = any(pg_index.indkey)
|
|
34
|
+
AND indisprimary
|
|
35
|
+
SELECT_KEY
|
|
36
|
+
|
|
37
|
+
pg_result = ActiveRecord::Base.connection.execute query
|
|
38
|
+
pg_result.each { |row| return row['attname'] }
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def column_names
|
|
43
|
+
@column_names ||= begin
|
|
44
|
+
query = "SELECT * FROM information_schema.columns WHERE TABLE_NAME = '#{@destination}'"
|
|
45
|
+
pg_result = ActiveRecord::Base.connection.execute query
|
|
46
|
+
pg_result.map { |row| row['column_name'] }
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def quoted_table_name
|
|
51
|
+
@quoted_table_name ||= database_connection.quote_table_name(@destination)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -1,36 +1,31 @@
|
|
|
1
1
|
module PostgresUpsert
|
|
2
2
|
class Writer
|
|
3
3
|
|
|
4
|
-
def initialize(klass, source, options = {})
|
|
4
|
+
def initialize(klass, destination, source, options = {})
|
|
5
5
|
@klass = klass
|
|
6
|
+
@destination = destination
|
|
7
|
+
@source = source
|
|
6
8
|
@options = options.reverse_merge({
|
|
7
|
-
:
|
|
8
|
-
:
|
|
9
|
-
:
|
|
10
|
-
:
|
|
9
|
+
delimiter: ',',
|
|
10
|
+
header: true,
|
|
11
|
+
unique_key: [primary_key],
|
|
12
|
+
update_only: false
|
|
13
|
+
})
|
|
14
|
+
@source = source
|
|
11
15
|
@options[:unique_key] = Array.wrap(@options[:unique_key])
|
|
12
|
-
|
|
13
|
-
@columns_list = get_columns
|
|
14
|
-
generate_temp_table_name
|
|
16
|
+
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def write
|
|
18
|
-
|
|
19
|
-
raise "Either the :columns option or :header => true are required"
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
|
|
20
|
+
validate_options
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
columns_string = columns_string_for_copy
|
|
22
|
+
|
|
26
23
|
create_temp_table
|
|
27
24
|
|
|
28
|
-
@
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
database_connection.raw_connection.put_copy_data line
|
|
33
|
-
end
|
|
25
|
+
if @source.continuous_write_enabled
|
|
26
|
+
write_continuous
|
|
27
|
+
else
|
|
28
|
+
write_batched
|
|
34
29
|
end
|
|
35
30
|
|
|
36
31
|
upsert_from_temp_table
|
|
@@ -41,44 +36,54 @@ module PostgresUpsert
|
|
|
41
36
|
|
|
42
37
|
private
|
|
43
38
|
|
|
39
|
+
def write_continuous
|
|
40
|
+
csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
|
|
41
|
+
@copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN #{csv_options}} do
|
|
42
|
+
while (line = @source.gets)
|
|
43
|
+
next if line.strip.empty?
|
|
44
|
+
|
|
45
|
+
database_connection.raw_connection.put_copy_data line
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def write_batched
|
|
51
|
+
@source.gets do |line|
|
|
52
|
+
@copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN} do
|
|
53
|
+
database_connection.raw_connection.put_copy_data line
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
44
58
|
def database_connection
|
|
45
|
-
@
|
|
59
|
+
@destination.database_connection
|
|
46
60
|
end
|
|
47
61
|
|
|
48
62
|
def summarize_results
|
|
49
63
|
result = PostgresUpsert::Result.new(@insert_result, @update_result, @copy_result)
|
|
50
64
|
expected_rows = @options[:update_only] ? result.updated_rows : result.copied_rows
|
|
51
|
-
|
|
65
|
+
|
|
52
66
|
if result.changed_rows != expected_rows
|
|
53
67
|
raise "#{expected_rows} rows were copied, but #{result.changed_rows} were upserted to destination table. Check to make sure your key is unique."
|
|
54
68
|
end
|
|
55
69
|
|
|
56
|
-
|
|
70
|
+
result
|
|
57
71
|
end
|
|
58
72
|
|
|
59
73
|
def primary_key
|
|
60
|
-
@
|
|
74
|
+
@destination.primary_key
|
|
61
75
|
end
|
|
62
76
|
|
|
63
|
-
def
|
|
64
|
-
@
|
|
77
|
+
def destination_columns
|
|
78
|
+
@destination.column_names
|
|
65
79
|
end
|
|
66
80
|
|
|
67
81
|
def quoted_table_name
|
|
68
|
-
@
|
|
82
|
+
@destination.quoted_table_name
|
|
69
83
|
end
|
|
70
84
|
|
|
71
|
-
def
|
|
72
|
-
|
|
73
|
-
if @options[:header]
|
|
74
|
-
#if header is present, we need to strip it from io, whether we use it for the columns list or not.
|
|
75
|
-
line = @source.gets
|
|
76
|
-
if columns_list.empty?
|
|
77
|
-
columns_list = line.strip.split(@options[:delimiter])
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
|
|
81
|
-
return columns_list
|
|
85
|
+
def source_columns
|
|
86
|
+
@source.columns
|
|
82
87
|
end
|
|
83
88
|
|
|
84
89
|
def columns_string_for_copy
|
|
@@ -87,29 +92,37 @@ module PostgresUpsert
|
|
|
87
92
|
end
|
|
88
93
|
|
|
89
94
|
def columns_string_for_select
|
|
90
|
-
columns =
|
|
91
|
-
columns <<
|
|
92
|
-
columns <<
|
|
93
|
-
|
|
95
|
+
columns = source_columns.clone
|
|
96
|
+
columns << 'created_at' if inject_create_timestamp?
|
|
97
|
+
columns << 'updated_at' if inject_update_timestamp?
|
|
98
|
+
get_columns_string(columns)
|
|
94
99
|
end
|
|
95
100
|
|
|
96
101
|
def columns_string_for_insert
|
|
97
|
-
columns =
|
|
98
|
-
columns <<
|
|
99
|
-
columns <<
|
|
100
|
-
|
|
102
|
+
columns = source_columns.clone
|
|
103
|
+
columns << 'created_at' if inject_create_timestamp?
|
|
104
|
+
columns << 'updated_at' if inject_update_timestamp?
|
|
105
|
+
get_columns_string(columns)
|
|
101
106
|
end
|
|
102
107
|
|
|
103
108
|
def select_string_for_insert
|
|
104
|
-
columns =
|
|
109
|
+
columns = source_columns.clone
|
|
105
110
|
str = get_columns_string(columns)
|
|
106
|
-
str << ",'#{DateTime.now.utc}'" if
|
|
107
|
-
str << ",'#{DateTime.now.utc}'" if
|
|
111
|
+
str << ",'#{DateTime.now.utc}'" if inject_create_timestamp?
|
|
112
|
+
str << ",'#{DateTime.now.utc}'" if inject_update_timestamp?
|
|
108
113
|
str
|
|
109
114
|
end
|
|
110
115
|
|
|
116
|
+
def inject_create_timestamp?
|
|
117
|
+
destination_columns.include?('created_at') && !source_columns.include?('created_at')
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def inject_update_timestamp?
|
|
121
|
+
destination_columns.include?('updated_at') && !source_columns.include?('updated_at')
|
|
122
|
+
end
|
|
123
|
+
|
|
111
124
|
def select_string_for_create
|
|
112
|
-
columns =
|
|
125
|
+
columns = source_columns.map(&:to_sym)
|
|
113
126
|
@options[:unique_key].each do |key_component|
|
|
114
127
|
columns << key_component.to_sym unless columns.include?(key_component.to_sym)
|
|
115
128
|
end
|
|
@@ -117,12 +130,12 @@ module PostgresUpsert
|
|
|
117
130
|
end
|
|
118
131
|
|
|
119
132
|
def get_columns_string(columns = nil)
|
|
120
|
-
columns ||=
|
|
121
|
-
columns.
|
|
133
|
+
columns ||= source_columns
|
|
134
|
+
!columns.empty? ? "\"#{columns.join('","')}\"" : ''
|
|
122
135
|
end
|
|
123
136
|
|
|
124
137
|
def generate_temp_table_name
|
|
125
|
-
@temp_table_name
|
|
138
|
+
@temp_table_name ||= "#{@table_name}_temp_#{rand(1000)}"
|
|
126
139
|
end
|
|
127
140
|
|
|
128
141
|
def upsert_from_temp_table
|
|
@@ -135,17 +148,19 @@ module PostgresUpsert
|
|
|
135
148
|
UPDATE #{quoted_table_name} AS d
|
|
136
149
|
#{update_set_clause}
|
|
137
150
|
FROM #{@temp_table_name} as t
|
|
138
|
-
WHERE #{unique_key_select(
|
|
139
|
-
AND #{unique_key_present(
|
|
151
|
+
WHERE #{unique_key_select('t', 'd')}
|
|
152
|
+
AND #{unique_key_present('d')}
|
|
140
153
|
SQL
|
|
141
154
|
end
|
|
142
155
|
|
|
143
156
|
def update_set_clause
|
|
144
|
-
command =
|
|
157
|
+
command = source_columns.map do |col|
|
|
145
158
|
"\"#{col}\" = t.\"#{col}\""
|
|
146
159
|
end
|
|
147
|
-
|
|
148
|
-
|
|
160
|
+
unless source_columns.include?('updated_at')
|
|
161
|
+
command << "\"updated_at\" = '#{DateTime.now.utc}'" if destination_columns.include?('updated_at')
|
|
162
|
+
end
|
|
163
|
+
"SET #{command.join(',')}"
|
|
149
164
|
end
|
|
150
165
|
|
|
151
166
|
def insert_from_temp_table
|
|
@@ -158,35 +173,38 @@ module PostgresUpsert
|
|
|
158
173
|
WHERE NOT EXISTS
|
|
159
174
|
(SELECT 1
|
|
160
175
|
FROM #{quoted_table_name} as d
|
|
161
|
-
WHERE #{unique_key_select(
|
|
176
|
+
WHERE #{unique_key_select('t', 'd')});
|
|
162
177
|
SQL
|
|
163
178
|
end
|
|
164
179
|
|
|
165
180
|
def unique_key_select(source, dest)
|
|
166
|
-
@options[:unique_key].map {|field| "#{source}.#{field} = #{dest}.#{field}"}.join(' AND ')
|
|
181
|
+
@options[:unique_key].map { |field| "#{source}.#{field} = #{dest}.#{field}" }.join(' AND ')
|
|
167
182
|
end
|
|
168
183
|
|
|
169
184
|
def unique_key_present(source)
|
|
170
|
-
@options[:unique_key].map {|field| "#{source}.#{field} IS NOT NULL"}.join(' AND ')
|
|
185
|
+
@options[:unique_key].map { |field| "#{source}.#{field} IS NOT NULL" }.join(' AND ')
|
|
171
186
|
end
|
|
172
187
|
|
|
173
188
|
def create_temp_table
|
|
174
|
-
|
|
175
|
-
verify_temp_has_key
|
|
189
|
+
generate_temp_table_name
|
|
176
190
|
database_connection.execute <<-SQL
|
|
177
191
|
SET client_min_messages=WARNING;
|
|
178
192
|
DROP TABLE IF EXISTS #{@temp_table_name};
|
|
179
193
|
|
|
180
194
|
CREATE TEMP TABLE #{@temp_table_name}
|
|
181
|
-
AS SELECT #{
|
|
195
|
+
AS SELECT #{select_string_for_create} FROM #{quoted_table_name} WHERE 0 = 1;
|
|
182
196
|
SQL
|
|
183
197
|
end
|
|
184
198
|
|
|
185
|
-
def
|
|
199
|
+
def validate_options
|
|
200
|
+
if source_columns.empty?
|
|
201
|
+
raise 'Either the :columns option or :header => true are required'
|
|
202
|
+
end
|
|
203
|
+
|
|
186
204
|
@options[:unique_key].each do |key_component|
|
|
187
|
-
unless
|
|
188
|
-
raise "Expected
|
|
189
|
-
|
|
205
|
+
unless source_columns.include?(key_component.to_s)
|
|
206
|
+
raise "Expected column '#{key_component}' was not found in source"
|
|
207
|
+
end
|
|
190
208
|
end
|
|
191
209
|
end
|
|
192
210
|
|
data/postgres_upsert.gemspec
CHANGED
|
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = "postgres_upsert"
|
|
8
|
-
s.version = "5.
|
|
8
|
+
s.version = "5.1.0"
|
|
9
9
|
|
|
10
10
|
s.platform = Gem::Platform::RUBY
|
|
11
11
|
s.required_ruby_version = ">= 1.8.7"
|
|
@@ -13,6 +13,7 @@ Gem::Specification.new do |s|
|
|
|
13
13
|
s.date = "2014-09-12"
|
|
14
14
|
s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
|
|
15
15
|
s.email = "thestevemitchell@gmail.com"
|
|
16
|
+
s.license = "MIT"
|
|
16
17
|
git_files = `git ls-files`.split("\n") rescue ''
|
|
17
18
|
s.files = git_files
|
|
18
19
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
|
@@ -27,7 +28,9 @@ Gem::Specification.new do |s|
|
|
|
27
28
|
s.add_dependency "rails", '>= 3.0.0'
|
|
28
29
|
s.add_development_dependency "bundler"
|
|
29
30
|
s.add_development_dependency "pry-rails"
|
|
30
|
-
s.add_development_dependency "
|
|
31
|
-
s.add_development_dependency "rspec-rails", "
|
|
31
|
+
s.add_development_dependency "pry-nav"
|
|
32
|
+
s.add_development_dependency "rspec-rails", ">= 3.9"
|
|
33
|
+
s.add_development_dependency "database_cleaner-active_record"
|
|
34
|
+
s.add_development_dependency "rubocop"
|
|
32
35
|
end
|
|
33
36
|
|
data/spec/composite_key_spec.rb
CHANGED
|
@@ -1,12 +1,6 @@
|
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
2
2
|
|
|
3
3
|
describe "pg_upsert from file with CSV format" do
|
|
4
|
-
before(:each) do
|
|
5
|
-
ActiveRecord::Base.connection.execute %{
|
|
6
|
-
TRUNCATE TABLE composite_key_models;
|
|
7
|
-
SELECT setval('composite_key_models_id_seq', 1, false);
|
|
8
|
-
}
|
|
9
|
-
end
|
|
10
4
|
|
|
11
5
|
before do
|
|
12
6
|
DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require "rails_helper"
|
|
2
|
+
|
|
3
|
+
describe PostgresUpsert do
|
|
4
|
+
context "when passing ActiveRecord class as destination" do
|
|
5
|
+
context "when passing ActiveRecord clas as Source" do
|
|
6
|
+
let(:original_created_at) {5.days.ago.utc}
|
|
7
|
+
|
|
8
|
+
before(:each) do
|
|
9
|
+
TestModel.create(data: "From the before time, in the long long ago", :created_at => original_created_at)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "copies the source to destination" do
|
|
13
|
+
PostgresUpsert.write TestModelCopy, TestModel
|
|
14
|
+
expect(
|
|
15
|
+
TestModelCopy.first.attributes
|
|
16
|
+
).to eq(TestModelCopy.first.attributes)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
context "with a large table" do
|
|
20
|
+
before do
|
|
21
|
+
csv_string = CSV.generate do |csv|
|
|
22
|
+
csv << %w(id data) # CSV header row
|
|
23
|
+
(1..100_000).each do |n|
|
|
24
|
+
csv << ["#{n}", "data about #{n}"]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
io = StringIO.new(csv_string)
|
|
28
|
+
PostgresUpsert.write TestModel, io
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "moves like the poop through a goose" do
|
|
32
|
+
expect{
|
|
33
|
+
PostgresUpsert.write TestModelCopy, TestModel
|
|
34
|
+
}.to change{TestModelCopy.count}.by(100_000)
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|