postgres_upsert 5.0.0 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,36 @@
1
+ module PostgresUpsert
2
+ module WriteAdapters
3
+ class ActiveRecordAdapter
4
+ def initialize(destination, options)
5
+ @destination = destination
6
+ @options = sanitize_options(options)
7
+
8
+ end
9
+
10
+ def sanitize_options(options)
11
+ options.slice(
12
+ :delimiter, :unique_key
13
+ ).reverse_merge(
14
+ delimiter: ',',
15
+ unique_key: [primary_key],
16
+ )
17
+ end
18
+
19
+ def database_connection
20
+ @destination.connection
21
+ end
22
+
23
+ def primary_key
24
+ @destination.primary_key
25
+ end
26
+
27
+ def column_names
28
+ @destination.column_names
29
+ end
30
+
31
+ def quoted_table_name
32
+ @destination.quoted_table_name
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,56 @@
1
+ module PostgresUpsert
2
+ module WriteAdapters
3
+ class TableAdapter
4
+ def initialize(destination, options)
5
+ @destination = destination
6
+ @options = sanitize_options(options)
7
+ end
8
+
9
+ def sanitize_options(options)
10
+ options.slice(
11
+ :delimiter, :unique_key
12
+ ).reverse_merge(
13
+ delimiter: ',',
14
+ unique_key: [primary_key],
15
+ )
16
+ end
17
+
18
+ def database_connection
19
+ ActiveRecord::Base.connection
20
+ end
21
+
22
+ def primary_key
23
+ @primary_key ||= begin
24
+ query = <<-SELECT_KEY
25
+ SELECT
26
+ pg_attribute.attname,
27
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
28
+ FROM pg_index, pg_class, pg_attribute
29
+ WHERE
30
+ pg_class.oid = '#{@destination}'::regclass AND
31
+ indrelid = pg_class.oid AND
32
+ pg_attribute.attrelid = pg_class.oid AND
33
+ pg_attribute.attnum = any(pg_index.indkey)
34
+ AND indisprimary
35
+ SELECT_KEY
36
+
37
+ pg_result = ActiveRecord::Base.connection.execute query
38
+ pg_result.each { |row| return row['attname'] }
39
+ end
40
+ end
41
+
42
+ def column_names
43
+ @column_names ||= begin
44
+ query = "SELECT * FROM information_schema.columns WHERE TABLE_NAME = '#{@destination}'"
45
+ pg_result = ActiveRecord::Base.connection.execute query
46
+ pg_result.map { |row| row['column_name'] }
47
+ end
48
+ end
49
+
50
+ def quoted_table_name
51
+ @quoted_table_name ||= database_connection.quote_table_name(@destination)
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -1,36 +1,31 @@
1
1
  module PostgresUpsert
2
2
  class Writer
3
3
 
4
- def initialize(klass, source, options = {})
4
+ def initialize(klass, destination, source, options = {})
5
5
  @klass = klass
6
+ @destination = destination
7
+ @source = source
6
8
  @options = options.reverse_merge({
7
- :delimiter => ",",
8
- :header => true,
9
- :unique_key => [primary_key],
10
- :update_only => false})
9
+ delimiter: ',',
10
+ header: true,
11
+ unique_key: [primary_key],
12
+ update_only: false
13
+ })
14
+ @source = source
11
15
  @options[:unique_key] = Array.wrap(@options[:unique_key])
12
- @source = source.instance_of?(String) ? File.open(source, 'r') : source
13
- @columns_list = get_columns
14
- generate_temp_table_name
16
+
15
17
  end
16
18
 
17
19
  def write
18
- if @columns_list.empty?
19
- raise "Either the :columns option or :header => true are required"
20
- end
21
-
22
- csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
20
+ validate_options
23
21
 
24
- copy_table = @temp_table_name
25
- columns_string = columns_string_for_copy
22
+
26
23
  create_temp_table
27
24
 
28
- @copy_result = database_connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{csv_options}} do
29
-
30
- while line = @source.gets do
31
- next if line.strip.size == 0
32
- database_connection.raw_connection.put_copy_data line
33
- end
25
+ if @source.continuous_write_enabled
26
+ write_continuous
27
+ else
28
+ write_batched
34
29
  end
35
30
 
36
31
  upsert_from_temp_table
@@ -41,44 +36,54 @@ module PostgresUpsert
41
36
 
42
37
  private
43
38
 
39
+ def write_continuous
40
+ csv_options = "DELIMITER '#{@options[:delimiter]}' CSV"
41
+ @copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN #{csv_options}} do
42
+ while (line = @source.gets)
43
+ next if line.strip.empty?
44
+
45
+ database_connection.raw_connection.put_copy_data line
46
+ end
47
+ end
48
+ end
49
+
50
+ def write_batched
51
+ @source.gets do |line|
52
+ @copy_result = database_connection.raw_connection.copy_data %{COPY #{@temp_table_name} #{columns_string_for_copy} FROM STDIN} do
53
+ database_connection.raw_connection.put_copy_data line
54
+ end
55
+ end
56
+ end
57
+
44
58
  def database_connection
45
- @klass.connection
59
+ @destination.database_connection
46
60
  end
47
61
 
48
62
  def summarize_results
49
63
  result = PostgresUpsert::Result.new(@insert_result, @update_result, @copy_result)
50
64
  expected_rows = @options[:update_only] ? result.updated_rows : result.copied_rows
51
-
65
+
52
66
  if result.changed_rows != expected_rows
53
67
  raise "#{expected_rows} rows were copied, but #{result.changed_rows} were upserted to destination table. Check to make sure your key is unique."
54
68
  end
55
69
 
56
- return result
70
+ result
57
71
  end
58
72
 
59
73
  def primary_key
60
- @klass.primary_key
74
+ @destination.primary_key
61
75
  end
62
76
 
63
- def column_names
64
- @klass.column_names
77
+ def destination_columns
78
+ @destination.column_names
65
79
  end
66
80
 
67
81
  def quoted_table_name
68
- @klass.quoted_table_name
82
+ @destination.quoted_table_name
69
83
  end
70
84
 
71
- def get_columns
72
- columns_list = @options[:columns] ? @options[:columns].map(&:to_s) : []
73
- if @options[:header]
74
- #if header is present, we need to strip it from io, whether we use it for the columns list or not.
75
- line = @source.gets
76
- if columns_list.empty?
77
- columns_list = line.strip.split(@options[:delimiter])
78
- end
79
- end
80
- columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
81
- return columns_list
85
+ def source_columns
86
+ @source.columns
82
87
  end
83
88
 
84
89
  def columns_string_for_copy
@@ -87,29 +92,37 @@ module PostgresUpsert
87
92
  end
88
93
 
89
94
  def columns_string_for_select
90
- columns = @columns_list.clone
91
- columns << "created_at" if column_names.include?("created_at")
92
- columns << "updated_at" if column_names.include?("updated_at")
93
- str = get_columns_string(columns)
95
+ columns = source_columns.clone
96
+ columns << 'created_at' if inject_create_timestamp?
97
+ columns << 'updated_at' if inject_update_timestamp?
98
+ get_columns_string(columns)
94
99
  end
95
100
 
96
101
  def columns_string_for_insert
97
- columns = @columns_list.clone
98
- columns << "created_at" if column_names.include?("created_at")
99
- columns << "updated_at" if column_names.include?("updated_at")
100
- str = get_columns_string(columns)
102
+ columns = source_columns.clone
103
+ columns << 'created_at' if inject_create_timestamp?
104
+ columns << 'updated_at' if inject_update_timestamp?
105
+ get_columns_string(columns)
101
106
  end
102
107
 
103
108
  def select_string_for_insert
104
- columns = @columns_list.clone
109
+ columns = source_columns.clone
105
110
  str = get_columns_string(columns)
106
- str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
107
- str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
111
+ str << ",'#{DateTime.now.utc}'" if inject_create_timestamp?
112
+ str << ",'#{DateTime.now.utc}'" if inject_update_timestamp?
108
113
  str
109
114
  end
110
115
 
116
+ def inject_create_timestamp?
117
+ destination_columns.include?('created_at') && !source_columns.include?('created_at')
118
+ end
119
+
120
+ def inject_update_timestamp?
121
+ destination_columns.include?('updated_at') && !source_columns.include?('updated_at')
122
+ end
123
+
111
124
  def select_string_for_create
112
- columns = @columns_list.map(&:to_sym)
125
+ columns = source_columns.map(&:to_sym)
113
126
  @options[:unique_key].each do |key_component|
114
127
  columns << key_component.to_sym unless columns.include?(key_component.to_sym)
115
128
  end
@@ -117,12 +130,12 @@ module PostgresUpsert
117
130
  end
118
131
 
119
132
  def get_columns_string(columns = nil)
120
- columns ||= @columns_list
121
- columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
133
+ columns ||= source_columns
134
+ !columns.empty? ? "\"#{columns.join('","')}\"" : ''
122
135
  end
123
136
 
124
137
  def generate_temp_table_name
125
- @temp_table_name = "#{@table_name}_temp_#{rand(1000)}"
138
+ @temp_table_name ||= "#{@table_name}_temp_#{rand(1000)}"
126
139
  end
127
140
 
128
141
  def upsert_from_temp_table
@@ -135,17 +148,19 @@ module PostgresUpsert
135
148
  UPDATE #{quoted_table_name} AS d
136
149
  #{update_set_clause}
137
150
  FROM #{@temp_table_name} as t
138
- WHERE #{unique_key_select("t", "d")}
139
- AND #{unique_key_present("d")}
151
+ WHERE #{unique_key_select('t', 'd')}
152
+ AND #{unique_key_present('d')}
140
153
  SQL
141
154
  end
142
155
 
143
156
  def update_set_clause
144
- command = @columns_list.map do |col|
157
+ command = source_columns.map do |col|
145
158
  "\"#{col}\" = t.\"#{col}\""
146
159
  end
147
- command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
148
- "SET #{command.join(',')}"
160
+ unless source_columns.include?('updated_at')
161
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if destination_columns.include?('updated_at')
162
+ end
163
+ "SET #{command.join(',')}"
149
164
  end
150
165
 
151
166
  def insert_from_temp_table
@@ -158,35 +173,38 @@ module PostgresUpsert
158
173
  WHERE NOT EXISTS
159
174
  (SELECT 1
160
175
  FROM #{quoted_table_name} as d
161
- WHERE #{unique_key_select("t", "d")});
176
+ WHERE #{unique_key_select('t', 'd')});
162
177
  SQL
163
178
  end
164
179
 
165
180
  def unique_key_select(source, dest)
166
- @options[:unique_key].map {|field| "#{source}.#{field} = #{dest}.#{field}"}.join(' AND ')
181
+ @options[:unique_key].map { |field| "#{source}.#{field} = #{dest}.#{field}" }.join(' AND ')
167
182
  end
168
183
 
169
184
  def unique_key_present(source)
170
- @options[:unique_key].map {|field| "#{source}.#{field} IS NOT NULL"}.join(' AND ')
185
+ @options[:unique_key].map { |field| "#{source}.#{field} IS NOT NULL" }.join(' AND ')
171
186
  end
172
187
 
173
188
  def create_temp_table
174
- columns_string = select_string_for_create
175
- verify_temp_has_key
189
+ generate_temp_table_name
176
190
  database_connection.execute <<-SQL
177
191
  SET client_min_messages=WARNING;
178
192
  DROP TABLE IF EXISTS #{@temp_table_name};
179
193
 
180
194
  CREATE TEMP TABLE #{@temp_table_name}
181
- AS SELECT #{columns_string} FROM #{quoted_table_name} WHERE 0 = 1;
195
+ AS SELECT #{select_string_for_create} FROM #{quoted_table_name} WHERE 0 = 1;
182
196
  SQL
183
197
  end
184
198
 
185
- def verify_temp_has_key
199
+ def validate_options
200
+ if source_columns.empty?
201
+ raise 'Either the :columns option or :header => true are required'
202
+ end
203
+
186
204
  @options[:unique_key].each do |key_component|
187
- unless @columns_list.include?(key_component.to_s)
188
- raise "Expected a unique column '#{key_component}' but the source data does not include this column. Update the :columns list or explicitly set the unique_key option.}"
189
- end
205
+ unless source_columns.include?(key_component.to_s)
206
+ raise "Expected column '#{key_component}' was not found in source"
207
+ end
190
208
  end
191
209
  end
192
210
 
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "postgres_upsert"
8
- s.version = "5.0.0"
8
+ s.version = "5.1.0"
9
9
 
10
10
  s.platform = Gem::Platform::RUBY
11
11
  s.required_ruby_version = ">= 1.8.7"
@@ -13,6 +13,7 @@ Gem::Specification.new do |s|
13
13
  s.date = "2014-09-12"
14
14
  s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
15
15
  s.email = "thestevemitchell@gmail.com"
16
+ s.license = "MIT"
16
17
  git_files = `git ls-files`.split("\n") rescue ''
17
18
  s.files = git_files
18
19
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -27,7 +28,9 @@ Gem::Specification.new do |s|
27
28
  s.add_dependency "rails", '>= 3.0.0'
28
29
  s.add_development_dependency "bundler"
29
30
  s.add_development_dependency "pry-rails"
30
- s.add_development_dependency "rspec", "~> 2.12"
31
- s.add_development_dependency "rspec-rails", "~> 2.0"
31
+ s.add_development_dependency "pry-nav"
32
+ s.add_development_dependency "rspec-rails", ">= 3.9"
33
+ s.add_development_dependency "database_cleaner-active_record"
34
+ s.add_development_dependency "rubocop"
32
35
  end
33
36
 
@@ -1,12 +1,6 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "pg_upsert from file with CSV format" do
4
- before(:each) do
5
- ActiveRecord::Base.connection.execute %{
6
- TRUNCATE TABLE composite_key_models;
7
- SELECT setval('composite_key_models_id_seq', 1, false);
8
- }
9
- end
10
4
 
11
5
  before do
12
6
  DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
@@ -0,0 +1,3 @@
1
+ id,data
2
+ 1,test data 1
3
+ 1,test data dupe
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModelCopy < ActiveRecord::Base
4
+ end
@@ -0,0 +1,40 @@
1
+ require "rails_helper"
2
+
3
+ describe PostgresUpsert do
4
+ context "when passing ActiveRecord class as destination" do
5
+ context "when passing ActiveRecord clas as Source" do
6
+ let(:original_created_at) {5.days.ago.utc}
7
+
8
+ before(:each) do
9
+ TestModel.create(data: "From the before time, in the long long ago", :created_at => original_created_at)
10
+ end
11
+
12
+ it "copies the source to destination" do
13
+ PostgresUpsert.write TestModelCopy, TestModel
14
+ expect(
15
+ TestModelCopy.first.attributes
16
+ ).to eq(TestModelCopy.first.attributes)
17
+ end
18
+
19
+ context "with a large table" do
20
+ before do
21
+ csv_string = CSV.generate do |csv|
22
+ csv << %w(id data) # CSV header row
23
+ (1..100_000).each do |n|
24
+ csv << ["#{n}", "data about #{n}"]
25
+ end
26
+ end
27
+ io = StringIO.new(csv_string)
28
+ PostgresUpsert.write TestModel, io
29
+ end
30
+
31
+ it "moves like the poop through a goose" do
32
+ expect{
33
+ PostgresUpsert.write TestModelCopy, TestModel
34
+ }.to change{TestModelCopy.count}.by(100_000)
35
+
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end