postgres_upsert 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a30c825147ef323c1e0d60d9fc08e9668b6bdf02
4
- data.tar.gz: d3f8799b61a273b4abde1eae7a6b38eea3ba614c
3
+ metadata.gz: 363018cec57166d2976cbeedc60fad9dc17e5ac7
4
+ data.tar.gz: 280a43ba9f6dea9a72111031c5c7e82089185f72
5
5
  SHA512:
6
- metadata.gz: 07649b13cae7be995e12c02e5418bde744f932127587141a160bd09f502f9342f2ce5f2038d20953eed9425043f67c056dde96dec78ed5a8fcf5ec0c11392bea
7
- data.tar.gz: e963592a3c91c61d206d018cbcc63aa07995a50165c97dbb88e9352e8b16c76cdcbaef85a9c9b655716d0e0ab387c9c155bf1890200374ce074e5405899929fd
6
+ metadata.gz: 195008d31407158e4cecf27fbd1aec45b6b3c17a078efe5dcd567e4ec8d023f187d597cf0b1a5a1e46e53c901e957a7c70b6ceccdd1aa27231f1cd2f727b194d
7
+ data.tar.gz: fbf6e4e15fc23ab1c344a1a8bdd9ba80733cc77c776b6eed98c05936a0dd6cd4889f387e43f028c8c380a2e7d4beb9f3ecc58ef5374183ff3891821077e0fb37
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- postgres_upsert (1.0.0)
4
+ postgres_upsert (1.1.0)
5
5
  activerecord (>= 3.0.0)
6
6
  pg (~> 0.17.0)
7
7
  rails (>= 3.0.0)
data/README.md CHANGED
@@ -19,9 +19,16 @@ Run the bundle command
19
19
 
20
20
  The gem will add the aditiontal class method to ActiveRecord::Base
21
21
 
22
- * pg_upsert
22
+ * pg_upsert io_object_or_file_path, [options]
23
23
 
24
- ### Using pg_upsert
24
+ io_object_or_file_path => is a file path or an io object (StringIO, FileIO, etc.)
25
+
26
+ options:
27
+ :delimiter - the string to use to delimit fields. Default is ","
28
+ :format - the format of the file (valid formats are :csv or :binary). Default is :csv
29
+ :header => specifies if the file/io source contains a header row. Either :header option must be true, or :columns list must be passed. Default true
30
+ :key_column => the primary key or unique key column on your ActiveRecord table, used to distinguish new records from existing records. Default is the primary_key of your ActiveRecord model class.
31
+ :update_only => when true, postgres_upsert will ONLY update existing records, and not insert new. Default is false.
25
32
 
26
33
  pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
27
34
  Let's first copy from a file in the database server, assuming again that we have a users table and
@@ -39,17 +46,6 @@ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
39
46
  ```
40
47
 
41
48
  In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
42
- You can also manipulate and modify the values of the file being imported before they enter into the database using a block:
43
-
44
- ```ruby
45
- User.pg_upsert "/tmp/users.csv" do |row|
46
- row[0] = "fixed string"
47
- end
48
- ```
49
-
50
- The above example will always change the value of the first column to "fixed string" before storing it into the database.
51
- For each iteration of the block row receives an array with the same order as the columns in the CSV file.
52
-
53
49
 
54
50
  To copy a binary formatted data file or IO object you can specify the format as binary
55
51
 
@@ -85,6 +81,7 @@ User.pg_upsert "/tmp/users.dat", :format => :binary, :key_column => ["external_t
85
81
 
86
82
  obviously, the field you pass must be a unique key in your database (this is not enforced at the moment, but will be)
87
83
 
84
+ passing :update_only = true will ensure that no new records are created, but records will be updated.
88
85
 
89
86
  ## Note on Patches/Pull Requests
90
87
 
@@ -7,171 +7,7 @@ module ActiveRecord
7
7
  # * You can map fields from the file to different fields in the table using a map in the options hash
8
8
  # * For further details on usage take a look at the README.md
9
9
  def self.pg_upsert path_or_io, options = {}
10
- options.reverse_merge!({:delimiter => ",", :format => :csv, :header => true, :key_column => primary_key})
11
- options_string = options[:format] == :binary ? "BINARY" : "DELIMITER '#{options[:delimiter]}' CSV"
12
-
13
- io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io
14
- columns_list = get_columns(io, options)
15
-
16
- if columns_list.empty?
17
- raise "Either the :columns option or :header => true are required"
18
- end
19
- copy_table = get_temp_table_name(options)
20
- destination_table = get_table_name(options)
21
-
22
- columns_string = columns_string_for_copy(columns_list)
23
- create_temp_table(copy_table, destination_table, columns_list, options) if destination_table
24
-
25
- connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{options_string}} do
26
- if block_given?
27
- block = Proc.new
28
- end
29
- while line = read_input_line(io, options, &block) do
30
- next if line.strip.size == 0
31
- connection.raw_connection.put_copy_data line
32
- end
33
- end
34
-
35
- if destination_table
36
- upsert_from_temp_table(copy_table, destination_table, columns_list, options)
37
- drop_temp_table(copy_table)
38
- end
39
- end
40
-
41
- private
42
-
43
- def self.get_columns(io, options)
44
- columns_list = options[:columns] || []
45
- if options[:format] != :binary && options[:header]
46
- #if header is present, we need to strip it from io, whether we use it for the columns list or not.
47
- line = io.gets
48
- if columns_list.empty?
49
- columns_list = line.strip.split(options[:delimiter])
50
- end
51
- end
52
- columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map]
53
- return columns_list
54
- end
55
-
56
- def self.columns_string_for_copy(columns_list)
57
- str = get_columns_string(columns_list)
58
- str.empty? ? str : "(#{str})"
59
- end
60
-
61
- def self.columns_string_for_select(columns_list)
62
- columns = columns_list.clone
63
- columns << "created_at" if column_names.include?("created_at")
64
- columns << "updated_at" if column_names.include?("updated_at")
65
- str = get_columns_string(columns)
66
- end
67
-
68
- def self.columns_string_for_insert(columns_list)
69
- columns = columns_list.clone
70
- columns << "created_at" if column_names.include?("created_at")
71
- columns << "updated_at" if column_names.include?("updated_at")
72
- str = get_columns_string(columns)
73
- end
74
-
75
- def self.select_string_for_insert(columns_list)
76
- columns = columns_list.clone
77
- str = get_columns_string(columns)
78
- str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
79
- str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
80
- str
81
- end
82
-
83
- def self.select_string_for_create(columns_list, options)
84
- columns = columns_list.map(&:to_sym)
85
- columns << options[:key_column].to_sym unless columns.include?(options[:key_column].to_sym)
86
- get_columns_string(columns)
87
- end
88
-
89
- def self.get_columns_string(columns_list)
90
- columns_list.size > 0 ? "\"#{columns_list.join('","')}\"" : ""
91
- end
92
-
93
- def self.get_table_name(options)
94
- if options[:table]
95
- connection.quote_table_name(options[:table])
96
- else
97
- quoted_table_name
98
- end
99
- end
100
-
101
- def self.get_temp_table_name(options)
102
- "#{table_name}_temp_#{rand(1000)}"
103
- end
104
-
105
- def self.read_input_line(io, options)
106
- if options[:format] == :binary
107
- begin
108
- return io.readpartial(10240)
109
- rescue EOFError
110
- end
111
- else
112
- line = io.gets
113
- if block_given? && line
114
- row = line.strip.split(options[:delimiter])
115
- yield(row)
116
- line = row.join(options[:delimiter]) + "\n"
117
- end
118
- return line
119
- end
120
- end
121
-
122
- def self.upsert_from_temp_table(temp_table, dest_table, columns_list, options)
123
- update_from_temp_table(temp_table, dest_table, columns_list, options)
124
- insert_from_temp_table(temp_table, dest_table, columns_list, options)
125
- end
126
-
127
- def self.update_from_temp_table(temp_table, dest_table, columns_list, options)
128
- ActiveRecord::Base.connection.execute <<-SQL
129
- UPDATE #{dest_table} AS d
130
- #{update_set_clause(columns_list)}
131
- FROM #{temp_table} as t
132
- WHERE t.#{options[:key_column]} = d.#{options[:key_column]}
133
- AND d.#{options[:key_column]} IS NOT NULL;
134
- SQL
135
- end
136
-
137
- def self.update_set_clause(columns_list)
138
- command = columns_list.map do |col|
139
- "\"#{col}\" = t.\"#{col}\""
140
- end
141
- command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
142
- "SET #{command.join(',')}"
143
- end
144
-
145
- def self.insert_from_temp_table(temp_table, dest_table, columns_list, options)
146
- columns_string = columns_string_for_insert(columns_list)
147
- select_string = select_string_for_insert(columns_list)
148
- ActiveRecord::Base.connection.execute <<-SQL
149
- INSERT INTO #{dest_table} (#{columns_string})
150
- SELECT #{select_string}
151
- FROM #{temp_table} as t
152
- WHERE NOT EXISTS
153
- (SELECT 1
154
- FROM #{dest_table} as d
155
- WHERE d.#{options[:key_column]} = t.#{options[:key_column]})
156
- AND t.#{options[:key_column]} IS NOT NULL;
157
- SQL
158
- end
159
-
160
- def self.create_temp_table(temp_table, dest_table, columns_list, options)
161
- columns_string = select_string_for_create(columns_list, options)
162
- ActiveRecord::Base.connection.execute <<-SQL
163
- SET client_min_messages=WARNING;
164
- DROP TABLE IF EXISTS #{temp_table};
165
-
166
- CREATE TEMP TABLE #{temp_table}
167
- AS SELECT #{columns_string} FROM #{dest_table} WHERE 0 = 1;
168
- SQL
169
- end
170
-
171
- def self.drop_temp_table(temp_table)
172
- ActiveRecord::Base.connection.execute <<-SQL
173
- DROP TABLE #{temp_table}
174
- SQL
10
+ PostgresUpsert::Writer.new(self, path_or_io, options).write
175
11
  end
176
12
  end
177
13
  end
@@ -0,0 +1,179 @@
1
+ module PostgresUpsert
2
+
3
+ class Writer
4
+
5
+ def initialize(klass, source, options = {})
6
+ @klass = klass
7
+ @options = options.reverse_merge({
8
+ :delimiter => ",",
9
+ :format => :csv,
10
+ :header => true,
11
+ :key_column => @klass.primary_key,
12
+ :update_only => false})
13
+ @source = source.instance_of?(String) ? File.open(source, 'r') : source
14
+ @columns_list = get_columns
15
+ generate_temp_table_name
16
+ end
17
+
18
+ def write
19
+ if @columns_list.empty?
20
+ raise "Either the :columns option or :header => true are required"
21
+ end
22
+
23
+ csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
24
+
25
+ copy_table = @temp_table_name
26
+ destination_table = get_table_name
27
+
28
+ columns_string = columns_string_for_copy
29
+ create_temp_table
30
+
31
+ ActiveRecord::Base.connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{csv_options}} do
32
+
33
+ while line = read_input_line do
34
+ next if line.strip.size == 0
35
+ ActiveRecord::Base.connection.raw_connection.put_copy_data line
36
+ end
37
+ end
38
+
39
+ if destination_table
40
+ upsert_from_temp_table
41
+ drop_temp_table
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def get_columns
48
+ columns_list = @options[:columns] || []
49
+ if @options[:format] != :binary && @options[:header]
50
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
51
+ line = @source.gets
52
+ if columns_list.empty?
53
+ columns_list = line.strip.split(@options[:delimiter])
54
+ end
55
+ end
56
+ columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
57
+ return columns_list
58
+ end
59
+
60
+ def columns_string_for_copy
61
+ str = get_columns_string
62
+ str.empty? ? str : "(#{str})"
63
+ end
64
+
65
+ def columns_string_for_select
66
+ columns = @columns_list.clone
67
+ columns << "created_at" if @klass.column_names.include?("created_at")
68
+ columns << "updated_at" if @klass.column_names.include?("updated_at")
69
+ str = get_columns_string(columns)
70
+ end
71
+
72
+ def columns_string_for_insert
73
+ columns = @columns_list.clone
74
+ columns << "created_at" if @klass.column_names.include?("created_at")
75
+ columns << "updated_at" if @klass.column_names.include?("updated_at")
76
+ str = get_columns_string(columns)
77
+ end
78
+
79
+ def select_string_for_insert
80
+ columns = @columns_list.clone
81
+ str = get_columns_string(columns)
82
+ str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("created_at")
83
+ str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
84
+ str
85
+ end
86
+
87
+ def select_string_for_create
88
+ columns = @columns_list.map(&:to_sym)
89
+ columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
90
+ get_columns_string(columns)
91
+ end
92
+
93
+ def get_columns_string(columns = nil)
94
+ columns ||= @columns_list
95
+ columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
96
+ end
97
+
98
+ def get_table_name
99
+ if @options[:table]
100
+ connection.quote_table_name(@options[:table])
101
+ else
102
+ @klass.quoted_table_name
103
+ end
104
+ end
105
+
106
+ def generate_temp_table_name
107
+ @temp_table_name = "#{@klass.table_name}_temp_#{rand(1000)}"
108
+ end
109
+
110
+ def read_input_line
111
+ if @options[:format] == :binary
112
+ begin
113
+ return @source.readpartial(10240)
114
+ rescue EOFError
115
+ end
116
+ else
117
+ line = @source.gets
118
+ return line
119
+ end
120
+ end
121
+
122
+ def upsert_from_temp_table
123
+ update_from_temp_table
124
+ insert_from_temp_table unless @options[:update_only]
125
+ end
126
+
127
+ def update_from_temp_table
128
+ ActiveRecord::Base.connection.execute <<-SQL
129
+ UPDATE #{get_table_name} AS d
130
+ #{update_set_clause}
131
+ FROM #{@temp_table_name} as t
132
+ WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
133
+ AND d.#{@options[:key_column]} IS NOT NULL;
134
+ SQL
135
+ end
136
+
137
+ def update_set_clause
138
+ command = @columns_list.map do |col|
139
+ "\"#{col}\" = t.\"#{col}\""
140
+ end
141
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
142
+ "SET #{command.join(',')}"
143
+ end
144
+
145
+ def insert_from_temp_table
146
+ columns_string = columns_string_for_insert
147
+ select_string = select_string_for_insert
148
+ ActiveRecord::Base.connection.execute <<-SQL
149
+ INSERT INTO #{get_table_name} (#{columns_string})
150
+ SELECT #{select_string}
151
+ FROM #{@temp_table_name} as t
152
+ WHERE NOT EXISTS
153
+ (SELECT 1
154
+ FROM #{get_table_name} as d
155
+ WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
156
+ AND t.#{@options[:key_column]} IS NOT NULL;
157
+ SQL
158
+ end
159
+
160
+ def create_temp_table
161
+ columns_string = select_string_for_create
162
+ ActiveRecord::Base.connection.execute <<-SQL
163
+ SET client_min_messages=WARNING;
164
+ DROP TABLE IF EXISTS #{@temp_table_name};
165
+
166
+ CREATE TEMP TABLE #{@temp_table_name}
167
+ AS SELECT #{columns_string} FROM #{get_table_name} WHERE 0 = 1;
168
+ SQL
169
+ end
170
+
171
+ def drop_temp_table
172
+ ActiveRecord::Base.connection.execute <<-SQL
173
+ DROP TABLE #{@temp_table_name}
174
+ SQL
175
+ end
176
+ end
177
+
178
+
179
+ end
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'active_record'
3
3
  require 'postgres_upsert/active_record'
4
+ require 'postgres_upsert/writer'
4
5
  require 'rails'
5
6
 
6
7
  class PostgresCopy < Rails::Railtie
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "postgres_upsert"
8
- s.version = "1.1.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.platform = Gem::Platform::RUBY
11
11
  s.required_ruby_version = ">= 1.8.7"
@@ -64,29 +64,6 @@ describe "pg_upsert from file with CSV format" do
64
64
  ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
65
65
  end
66
66
 
67
- it "should import and allow changes in block" do
68
- TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row|
69
- row[1] = 'changed this data'
70
- end
71
- expect(
72
- TestModel.first.attributes
73
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
74
- end
75
-
76
- it "should import 2 lines and allow changes in block" do
77
- TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row|
78
- row[1] = 'changed this data'
79
- end
80
-
81
- expect(
82
- TestModel.find(1).attributes
83
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
84
- expect(
85
- TestModel.find(2).attributes
86
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
87
- expect(TestModel.count).to eq 2
88
- end
89
-
90
67
  it "should not expect a header when :header is false" do
91
68
  TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
92
69
 
@@ -206,5 +183,24 @@ describe "pg_upsert from file with CSV format" do
206
183
  ).to include("id" => 1, "data" => "old stuff", "extra" => "ABC: Always Be Changing.")
207
184
  end
208
185
  end
186
+
187
+ context 'update only' do
188
+ let(:original_created_at) {5.days.ago.utc}
189
+ before(:each) do
190
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
191
+ end
192
+ it 'will only update and not insert if insert_only flag is passed.' do
193
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t", :update_only => true
194
+
195
+ expect(
196
+ TestModel.find(1).attributes
197
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at , "updated_at" => timestamp)
198
+ expect{
199
+ TestModel.find(2)
200
+ }.to raise_error(ActiveRecord::RecordNotFound)
201
+
202
+ end
203
+
204
+ end
209
205
  end
210
206
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgres_upsert
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Mitchell
@@ -124,6 +124,7 @@ files:
124
124
  - VERSION
125
125
  - lib/postgres_upsert.rb
126
126
  - lib/postgres_upsert/active_record.rb
127
+ - lib/postgres_upsert/writer.rb
127
128
  - postgres_upsert.gemspec
128
129
  - spec/fixtures/2_col_binary_data.dat
129
130
  - spec/fixtures/comma_with_header.csv