postgres_upsert 1.1.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a30c825147ef323c1e0d60d9fc08e9668b6bdf02
4
- data.tar.gz: d3f8799b61a273b4abde1eae7a6b38eea3ba614c
3
+ metadata.gz: 363018cec57166d2976cbeedc60fad9dc17e5ac7
4
+ data.tar.gz: 280a43ba9f6dea9a72111031c5c7e82089185f72
5
5
  SHA512:
6
- metadata.gz: 07649b13cae7be995e12c02e5418bde744f932127587141a160bd09f502f9342f2ce5f2038d20953eed9425043f67c056dde96dec78ed5a8fcf5ec0c11392bea
7
- data.tar.gz: e963592a3c91c61d206d018cbcc63aa07995a50165c97dbb88e9352e8b16c76cdcbaef85a9c9b655716d0e0ab387c9c155bf1890200374ce074e5405899929fd
6
+ metadata.gz: 195008d31407158e4cecf27fbd1aec45b6b3c17a078efe5dcd567e4ec8d023f187d597cf0b1a5a1e46e53c901e957a7c70b6ceccdd1aa27231f1cd2f727b194d
7
+ data.tar.gz: fbf6e4e15fc23ab1c344a1a8bdd9ba80733cc77c776b6eed98c05936a0dd6cd4889f387e43f028c8c380a2e7d4beb9f3ecc58ef5374183ff3891821077e0fb37
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- postgres_upsert (1.0.0)
4
+ postgres_upsert (1.1.0)
5
5
  activerecord (>= 3.0.0)
6
6
  pg (~> 0.17.0)
7
7
  rails (>= 3.0.0)
data/README.md CHANGED
@@ -19,9 +19,16 @@ Run the bundle command
19
19
 
20
20
  The gem will add the aditiontal class method to ActiveRecord::Base
21
21
 
22
- * pg_upsert
22
+ * pg_upsert io_object_or_file_path, [options]
23
23
 
24
- ### Using pg_upsert
24
+ io_object_or_file_path => is a file path or an io object (StringIO, FileIO, etc.)
25
+
26
+ options:
27
+ :delimiter - the string to use to delimit fields. Default is ","
28
+ :format - the format of the file (valid formats are :csv or :binary). Default is :csv
29
+ :header => specifies if the file/io source contains a header row. Either :header option must be true, or :columns list must be passed. Default true
30
+ :key_column => the primary key or unique key column on your ActiveRecord table, used to distinguish new records from existing records. Default is the primary_key of your ActiveRecord model class.
31
+ :update_only => when true, postgres_upsert will ONLY update existing records, and not insert new. Default is false.
25
32
 
26
33
  pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
27
34
  Let's first copy from a file in the database server, assuming again that we have a users table and
@@ -39,17 +46,6 @@ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
39
46
  ```
40
47
 
41
48
  In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
42
- You can also manipulate and modify the values of the file being imported before they enter into the database using a block:
43
-
44
- ```ruby
45
- User.pg_upsert "/tmp/users.csv" do |row|
46
- row[0] = "fixed string"
47
- end
48
- ```
49
-
50
- The above example will always change the value of the first column to "fixed string" before storing it into the database.
51
- For each iteration of the block row receives an array with the same order as the columns in the CSV file.
52
-
53
49
 
54
50
  To copy a binary formatted data file or IO object you can specify the format as binary
55
51
 
@@ -85,6 +81,7 @@ User.pg_upsert "/tmp/users.dat", :format => :binary, :key_column => ["external_t
85
81
 
86
82
  obviously, the field you pass must be a unique key in your database (this is not enforced at the moment, but will be)
87
83
 
84
+ passing :update_only = true will ensure that no new records are created, but records will be updated.
88
85
 
89
86
  ## Note on Patches/Pull Requests
90
87
 
@@ -7,171 +7,7 @@ module ActiveRecord
7
7
  # * You can map fields from the file to different fields in the table using a map in the options hash
8
8
  # * For further details on usage take a look at the README.md
9
9
  def self.pg_upsert path_or_io, options = {}
10
- options.reverse_merge!({:delimiter => ",", :format => :csv, :header => true, :key_column => primary_key})
11
- options_string = options[:format] == :binary ? "BINARY" : "DELIMITER '#{options[:delimiter]}' CSV"
12
-
13
- io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io
14
- columns_list = get_columns(io, options)
15
-
16
- if columns_list.empty?
17
- raise "Either the :columns option or :header => true are required"
18
- end
19
- copy_table = get_temp_table_name(options)
20
- destination_table = get_table_name(options)
21
-
22
- columns_string = columns_string_for_copy(columns_list)
23
- create_temp_table(copy_table, destination_table, columns_list, options) if destination_table
24
-
25
- connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{options_string}} do
26
- if block_given?
27
- block = Proc.new
28
- end
29
- while line = read_input_line(io, options, &block) do
30
- next if line.strip.size == 0
31
- connection.raw_connection.put_copy_data line
32
- end
33
- end
34
-
35
- if destination_table
36
- upsert_from_temp_table(copy_table, destination_table, columns_list, options)
37
- drop_temp_table(copy_table)
38
- end
39
- end
40
-
41
- private
42
-
43
- def self.get_columns(io, options)
44
- columns_list = options[:columns] || []
45
- if options[:format] != :binary && options[:header]
46
- #if header is present, we need to strip it from io, whether we use it for the columns list or not.
47
- line = io.gets
48
- if columns_list.empty?
49
- columns_list = line.strip.split(options[:delimiter])
50
- end
51
- end
52
- columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map]
53
- return columns_list
54
- end
55
-
56
- def self.columns_string_for_copy(columns_list)
57
- str = get_columns_string(columns_list)
58
- str.empty? ? str : "(#{str})"
59
- end
60
-
61
- def self.columns_string_for_select(columns_list)
62
- columns = columns_list.clone
63
- columns << "created_at" if column_names.include?("created_at")
64
- columns << "updated_at" if column_names.include?("updated_at")
65
- str = get_columns_string(columns)
66
- end
67
-
68
- def self.columns_string_for_insert(columns_list)
69
- columns = columns_list.clone
70
- columns << "created_at" if column_names.include?("created_at")
71
- columns << "updated_at" if column_names.include?("updated_at")
72
- str = get_columns_string(columns)
73
- end
74
-
75
- def self.select_string_for_insert(columns_list)
76
- columns = columns_list.clone
77
- str = get_columns_string(columns)
78
- str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
79
- str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
80
- str
81
- end
82
-
83
- def self.select_string_for_create(columns_list, options)
84
- columns = columns_list.map(&:to_sym)
85
- columns << options[:key_column].to_sym unless columns.include?(options[:key_column].to_sym)
86
- get_columns_string(columns)
87
- end
88
-
89
- def self.get_columns_string(columns_list)
90
- columns_list.size > 0 ? "\"#{columns_list.join('","')}\"" : ""
91
- end
92
-
93
- def self.get_table_name(options)
94
- if options[:table]
95
- connection.quote_table_name(options[:table])
96
- else
97
- quoted_table_name
98
- end
99
- end
100
-
101
- def self.get_temp_table_name(options)
102
- "#{table_name}_temp_#{rand(1000)}"
103
- end
104
-
105
- def self.read_input_line(io, options)
106
- if options[:format] == :binary
107
- begin
108
- return io.readpartial(10240)
109
- rescue EOFError
110
- end
111
- else
112
- line = io.gets
113
- if block_given? && line
114
- row = line.strip.split(options[:delimiter])
115
- yield(row)
116
- line = row.join(options[:delimiter]) + "\n"
117
- end
118
- return line
119
- end
120
- end
121
-
122
- def self.upsert_from_temp_table(temp_table, dest_table, columns_list, options)
123
- update_from_temp_table(temp_table, dest_table, columns_list, options)
124
- insert_from_temp_table(temp_table, dest_table, columns_list, options)
125
- end
126
-
127
- def self.update_from_temp_table(temp_table, dest_table, columns_list, options)
128
- ActiveRecord::Base.connection.execute <<-SQL
129
- UPDATE #{dest_table} AS d
130
- #{update_set_clause(columns_list)}
131
- FROM #{temp_table} as t
132
- WHERE t.#{options[:key_column]} = d.#{options[:key_column]}
133
- AND d.#{options[:key_column]} IS NOT NULL;
134
- SQL
135
- end
136
-
137
- def self.update_set_clause(columns_list)
138
- command = columns_list.map do |col|
139
- "\"#{col}\" = t.\"#{col}\""
140
- end
141
- command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
142
- "SET #{command.join(',')}"
143
- end
144
-
145
- def self.insert_from_temp_table(temp_table, dest_table, columns_list, options)
146
- columns_string = columns_string_for_insert(columns_list)
147
- select_string = select_string_for_insert(columns_list)
148
- ActiveRecord::Base.connection.execute <<-SQL
149
- INSERT INTO #{dest_table} (#{columns_string})
150
- SELECT #{select_string}
151
- FROM #{temp_table} as t
152
- WHERE NOT EXISTS
153
- (SELECT 1
154
- FROM #{dest_table} as d
155
- WHERE d.#{options[:key_column]} = t.#{options[:key_column]})
156
- AND t.#{options[:key_column]} IS NOT NULL;
157
- SQL
158
- end
159
-
160
- def self.create_temp_table(temp_table, dest_table, columns_list, options)
161
- columns_string = select_string_for_create(columns_list, options)
162
- ActiveRecord::Base.connection.execute <<-SQL
163
- SET client_min_messages=WARNING;
164
- DROP TABLE IF EXISTS #{temp_table};
165
-
166
- CREATE TEMP TABLE #{temp_table}
167
- AS SELECT #{columns_string} FROM #{dest_table} WHERE 0 = 1;
168
- SQL
169
- end
170
-
171
- def self.drop_temp_table(temp_table)
172
- ActiveRecord::Base.connection.execute <<-SQL
173
- DROP TABLE #{temp_table}
174
- SQL
10
+ PostgresUpsert::Writer.new(self, path_or_io, options).write
175
11
  end
176
12
  end
177
13
  end
@@ -0,0 +1,179 @@
1
+ module PostgresUpsert
2
+
3
+ class Writer
4
+
5
+ def initialize(klass, source, options = {})
6
+ @klass = klass
7
+ @options = options.reverse_merge({
8
+ :delimiter => ",",
9
+ :format => :csv,
10
+ :header => true,
11
+ :key_column => @klass.primary_key,
12
+ :update_only => false})
13
+ @source = source.instance_of?(String) ? File.open(source, 'r') : source
14
+ @columns_list = get_columns
15
+ generate_temp_table_name
16
+ end
17
+
18
+ def write
19
+ if @columns_list.empty?
20
+ raise "Either the :columns option or :header => true are required"
21
+ end
22
+
23
+ csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
24
+
25
+ copy_table = @temp_table_name
26
+ destination_table = get_table_name
27
+
28
+ columns_string = columns_string_for_copy
29
+ create_temp_table
30
+
31
+ ActiveRecord::Base.connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{csv_options}} do
32
+
33
+ while line = read_input_line do
34
+ next if line.strip.size == 0
35
+ ActiveRecord::Base.connection.raw_connection.put_copy_data line
36
+ end
37
+ end
38
+
39
+ if destination_table
40
+ upsert_from_temp_table
41
+ drop_temp_table
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def get_columns
48
+ columns_list = @options[:columns] || []
49
+ if @options[:format] != :binary && @options[:header]
50
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
51
+ line = @source.gets
52
+ if columns_list.empty?
53
+ columns_list = line.strip.split(@options[:delimiter])
54
+ end
55
+ end
56
+ columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
57
+ return columns_list
58
+ end
59
+
60
+ def columns_string_for_copy
61
+ str = get_columns_string
62
+ str.empty? ? str : "(#{str})"
63
+ end
64
+
65
+ def columns_string_for_select
66
+ columns = @columns_list.clone
67
+ columns << "created_at" if @klass.column_names.include?("created_at")
68
+ columns << "updated_at" if @klass.column_names.include?("updated_at")
69
+ str = get_columns_string(columns)
70
+ end
71
+
72
+ def columns_string_for_insert
73
+ columns = @columns_list.clone
74
+ columns << "created_at" if @klass.column_names.include?("created_at")
75
+ columns << "updated_at" if @klass.column_names.include?("updated_at")
76
+ str = get_columns_string(columns)
77
+ end
78
+
79
+ def select_string_for_insert
80
+ columns = @columns_list.clone
81
+ str = get_columns_string(columns)
82
+ str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("created_at")
83
+ str << ",'#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
84
+ str
85
+ end
86
+
87
+ def select_string_for_create
88
+ columns = @columns_list.map(&:to_sym)
89
+ columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
90
+ get_columns_string(columns)
91
+ end
92
+
93
+ def get_columns_string(columns = nil)
94
+ columns ||= @columns_list
95
+ columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
96
+ end
97
+
98
+ def get_table_name
99
+ if @options[:table]
100
+ connection.quote_table_name(@options[:table])
101
+ else
102
+ @klass.quoted_table_name
103
+ end
104
+ end
105
+
106
+ def generate_temp_table_name
107
+ @temp_table_name = "#{@klass.table_name}_temp_#{rand(1000)}"
108
+ end
109
+
110
+ def read_input_line
111
+ if @options[:format] == :binary
112
+ begin
113
+ return @source.readpartial(10240)
114
+ rescue EOFError
115
+ end
116
+ else
117
+ line = @source.gets
118
+ return line
119
+ end
120
+ end
121
+
122
+ def upsert_from_temp_table
123
+ update_from_temp_table
124
+ insert_from_temp_table unless @options[:update_only]
125
+ end
126
+
127
+ def update_from_temp_table
128
+ ActiveRecord::Base.connection.execute <<-SQL
129
+ UPDATE #{get_table_name} AS d
130
+ #{update_set_clause}
131
+ FROM #{@temp_table_name} as t
132
+ WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
133
+ AND d.#{@options[:key_column]} IS NOT NULL;
134
+ SQL
135
+ end
136
+
137
+ def update_set_clause
138
+ command = @columns_list.map do |col|
139
+ "\"#{col}\" = t.\"#{col}\""
140
+ end
141
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if @klass.column_names.include?("updated_at")
142
+ "SET #{command.join(',')}"
143
+ end
144
+
145
+ def insert_from_temp_table
146
+ columns_string = columns_string_for_insert
147
+ select_string = select_string_for_insert
148
+ ActiveRecord::Base.connection.execute <<-SQL
149
+ INSERT INTO #{get_table_name} (#{columns_string})
150
+ SELECT #{select_string}
151
+ FROM #{@temp_table_name} as t
152
+ WHERE NOT EXISTS
153
+ (SELECT 1
154
+ FROM #{get_table_name} as d
155
+ WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
156
+ AND t.#{@options[:key_column]} IS NOT NULL;
157
+ SQL
158
+ end
159
+
160
+ def create_temp_table
161
+ columns_string = select_string_for_create
162
+ ActiveRecord::Base.connection.execute <<-SQL
163
+ SET client_min_messages=WARNING;
164
+ DROP TABLE IF EXISTS #{@temp_table_name};
165
+
166
+ CREATE TEMP TABLE #{@temp_table_name}
167
+ AS SELECT #{columns_string} FROM #{get_table_name} WHERE 0 = 1;
168
+ SQL
169
+ end
170
+
171
+ def drop_temp_table
172
+ ActiveRecord::Base.connection.execute <<-SQL
173
+ DROP TABLE #{@temp_table_name}
174
+ SQL
175
+ end
176
+ end
177
+
178
+
179
+ end
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'active_record'
3
3
  require 'postgres_upsert/active_record'
4
+ require 'postgres_upsert/writer'
4
5
  require 'rails'
5
6
 
6
7
  class PostgresCopy < Rails::Railtie
@@ -5,7 +5,7 @@ $:.unshift lib unless $:.include?(lib)
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "postgres_upsert"
8
- s.version = "1.1.0"
8
+ s.version = "2.0.0"
9
9
 
10
10
  s.platform = Gem::Platform::RUBY
11
11
  s.required_ruby_version = ">= 1.8.7"
@@ -64,29 +64,6 @@ describe "pg_upsert from file with CSV format" do
64
64
  ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
65
65
  end
66
66
 
67
- it "should import and allow changes in block" do
68
- TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row|
69
- row[1] = 'changed this data'
70
- end
71
- expect(
72
- TestModel.first.attributes
73
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
74
- end
75
-
76
- it "should import 2 lines and allow changes in block" do
77
- TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row|
78
- row[1] = 'changed this data'
79
- end
80
-
81
- expect(
82
- TestModel.find(1).attributes
83
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
84
- expect(
85
- TestModel.find(2).attributes
86
- ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
87
- expect(TestModel.count).to eq 2
88
- end
89
-
90
67
  it "should not expect a header when :header is false" do
91
68
  TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
92
69
 
@@ -206,5 +183,24 @@ describe "pg_upsert from file with CSV format" do
206
183
  ).to include("id" => 1, "data" => "old stuff", "extra" => "ABC: Always Be Changing.")
207
184
  end
208
185
  end
186
+
187
+ context 'update only' do
188
+ let(:original_created_at) {5.days.ago.utc}
189
+ before(:each) do
190
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
191
+ end
192
+ it 'will only update and not insert if insert_only flag is passed.' do
193
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t", :update_only => true
194
+
195
+ expect(
196
+ TestModel.find(1).attributes
197
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at , "updated_at" => timestamp)
198
+ expect{
199
+ TestModel.find(2)
200
+ }.to raise_error(ActiveRecord::RecordNotFound)
201
+
202
+ end
203
+
204
+ end
209
205
  end
210
206
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postgres_upsert
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Steve Mitchell
@@ -124,6 +124,7 @@ files:
124
124
  - VERSION
125
125
  - lib/postgres_upsert.rb
126
126
  - lib/postgres_upsert/active_record.rb
127
+ - lib/postgres_upsert/writer.rb
127
128
  - postgres_upsert.gemspec
128
129
  - spec/fixtures/2_col_binary_data.dat
129
130
  - spec/fixtures/comma_with_header.csv