postgres_upsert 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 00a22a21bea95bc98d7a991c5ed30a5bbea6a67c
4
+ data.tar.gz: 9824edf28ac08e7a8aed28ebcdee8c07c46ffa11
5
+ SHA512:
6
+ metadata.gz: b6183d68e0791491f6417ffbc99b7f2a984090aa3640f1348cfe74e39e11011edce3985029b259bd28296fc7725ad4def14db3f209d95b15f7324d2ebb5dbba3
7
+ data.tar.gz: 4dd39aa87168d5866874de9c6a11097731e015bbdea624535164f413cf1fabb7c9fbca73612c805289db800383d134c5e88f1198efe2cc9d7d365a72f4f2a9be
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # specify gem dependencies in activerecord-postgres-hstore.gemspec
4
+ # except the platform-specific dependencies below
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,112 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_upsert (0.1.0)
5
+ activerecord (>= 3.0.0)
6
+ pg (~> 0.17.0)
7
+ rails (>= 3.0.0)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ actionmailer (4.0.3)
13
+ actionpack (= 4.0.3)
14
+ mail (~> 2.5.4)
15
+ actionpack (4.0.3)
16
+ activesupport (= 4.0.3)
17
+ builder (~> 3.1.0)
18
+ erubis (~> 2.7.0)
19
+ rack (~> 1.5.2)
20
+ rack-test (~> 0.6.2)
21
+ activemodel (4.0.3)
22
+ activesupport (= 4.0.3)
23
+ builder (~> 3.1.0)
24
+ activerecord (4.0.3)
25
+ activemodel (= 4.0.3)
26
+ activerecord-deprecated_finders (~> 1.0.2)
27
+ activesupport (= 4.0.3)
28
+ arel (~> 4.0.0)
29
+ activerecord-deprecated_finders (1.0.3)
30
+ activesupport (4.0.3)
31
+ i18n (~> 0.6, >= 0.6.4)
32
+ minitest (~> 4.2)
33
+ multi_json (~> 1.3)
34
+ thread_safe (~> 0.1)
35
+ tzinfo (~> 0.3.37)
36
+ arel (4.0.2)
37
+ builder (3.1.4)
38
+ coderay (1.1.0)
39
+ diff-lcs (1.1.3)
40
+ erubis (2.7.0)
41
+ hike (1.2.3)
42
+ i18n (0.6.11)
43
+ json (1.7.6)
44
+ mail (2.5.4)
45
+ mime-types (~> 1.16)
46
+ treetop (~> 1.4.8)
47
+ method_source (0.8.2)
48
+ mime-types (1.25.1)
49
+ minitest (4.7.5)
50
+ multi_json (1.10.1)
51
+ pg (0.17.1)
52
+ polyglot (0.3.5)
53
+ pry (0.10.1)
54
+ coderay (~> 1.1.0)
55
+ method_source (~> 0.8.1)
56
+ slop (~> 3.4)
57
+ pry-rails (0.3.2)
58
+ pry (>= 0.9.10)
59
+ rack (1.5.2)
60
+ rack-test (0.6.2)
61
+ rack (>= 1.0)
62
+ rails (4.0.3)
63
+ actionmailer (= 4.0.3)
64
+ actionpack (= 4.0.3)
65
+ activerecord (= 4.0.3)
66
+ activesupport (= 4.0.3)
67
+ bundler (>= 1.3.0, < 2.0)
68
+ railties (= 4.0.3)
69
+ sprockets-rails (~> 2.0.0)
70
+ railties (4.0.3)
71
+ actionpack (= 4.0.3)
72
+ activesupport (= 4.0.3)
73
+ rake (>= 0.8.7)
74
+ thor (>= 0.18.1, < 2.0)
75
+ rake (10.3.2)
76
+ rdoc (3.12)
77
+ json (~> 1.4)
78
+ rspec (2.12.0)
79
+ rspec-core (~> 2.12.0)
80
+ rspec-expectations (~> 2.12.0)
81
+ rspec-mocks (~> 2.12.0)
82
+ rspec-core (2.12.2)
83
+ rspec-expectations (2.12.1)
84
+ diff-lcs (~> 1.1.3)
85
+ rspec-mocks (2.12.2)
86
+ slop (3.6.0)
87
+ sprockets (2.11.0)
88
+ hike (~> 1.2)
89
+ multi_json (~> 1.0)
90
+ rack (~> 1.0)
91
+ tilt (~> 1.1, != 1.3.0)
92
+ sprockets-rails (2.0.1)
93
+ actionpack (>= 3.0)
94
+ activesupport (>= 3.0)
95
+ sprockets (~> 2.8)
96
+ thor (0.19.1)
97
+ thread_safe (0.3.4)
98
+ tilt (1.4.1)
99
+ treetop (1.4.15)
100
+ polyglot
101
+ polyglot (>= 0.3.1)
102
+ tzinfo (0.3.40)
103
+
104
+ PLATFORMS
105
+ ruby
106
+
107
+ DEPENDENCIES
108
+ bundler
109
+ postgres_upsert!
110
+ pry-rails
111
+ rdoc
112
+ rspec (~> 2.12)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Steve Mitchell
4
+ Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # postgres_upsert
2
+
3
+ Allows your rails app to load data in a very fast way, avoiding calls to ActiveRecord.
4
+
5
+ Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
6
+
7
+
8
+ ## Install
9
+
10
+ Put it in your Gemfile
11
+
12
+ gem 'postgres_upsert'
13
+
14
+ Run the bundle command
15
+
16
+ bundle
17
+
18
+ ## Usage
19
+
20
+ The gem will add the aditiontal class method to ActiveRecord::Base
21
+
22
+ * pg_upsert
23
+
24
+ ### Using pg_upsert
25
+
26
+ pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
27
+ Let's first copy from a file in the database server, assuming again that we have a users table and
28
+ that we are in the Rails console:
29
+
30
+ ```ruby
31
+ User.pg_upsert "/tmp/users.csv"
32
+ ```
33
+
34
+ This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
35
+ If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
36
+
37
+ ```ruby
38
+ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
39
+ ```
40
+
41
+ In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
42
+ You can also manipulate and modify the values of the file being imported before they enter into the database using a block:
43
+
44
+ ```ruby
45
+ User.pg_upsert "/tmp/users.csv" do |row|
46
+ row[0] = "fixed string"
47
+ end
48
+ ```
49
+
50
+ The above example will always change the value of the first column to "fixed string" before storing it into the database.
51
+ For each iteration of the block row receives an array with the same order as the columns in the CSV file.
52
+
53
+
54
+ To copy a binary formatted data file or IO object you can specify the format as binary
55
+
56
+ ```ruby
57
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
58
+ ```
59
+
60
+ Which will generate the following SQL command:
61
+
62
+ ```sql
63
+ COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
64
+ ```
65
+
66
+ NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
67
+
68
+
69
+ pg_upsert supports 'upsert' or 'merge' operations. In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case. Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
70
+
71
+ This merge/upsert happend in 5 steps (assume your data table is called "users")
72
+ * create a temp table named users_temp_### where "###" is a random number. In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
73
+ * COPY the data to user_temp
74
+ * issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
75
+ * issue a query to update all records in users with the data in users_temp_### (matching on primary key)
76
+ * drop the temp table.
77
+
78
+ ## Note on Patches/Pull Requests
79
+
80
+ * Fork the project
81
+ * add your feature/fix to your fork(rpsec tests pleaze)
82
+ * submit a PR
83
+ * If you find an issue but can't fix in in a PR, please log an issue. I'll do my best.
84
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+ require 'bundler/gem_tasks'
4
+ require 'rubygems'
5
+ require 'rspec/core/rake_task'
6
+ require 'rdoc/task'
7
+
8
+ task :default => :spec
9
+
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ Rake::RDocTask.new do |rdoc|
13
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
14
+ rdoc.rdoc_dir = 'rdoc'
15
+ rdoc.title = "postgres_upsert #{version}"
16
+ rdoc.rdoc_files.include('README*')
17
+ rdoc.rdoc_files.include('lib/**/*.rb')
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'active_record'
3
+ require 'postgres_upsert/active_record'
4
+ require 'rails'
5
+
6
+ class PostgresCopy < Rails::Railtie
7
+
8
+ initializer 'postgres_upsert' do
9
+ ActiveSupport.on_load :active_record do
10
+ require "postgres_upsert/active_record"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,177 @@
1
+ module ActiveRecord
2
+ class Base
3
+ # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
4
+
5
+ # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
6
+ # * You can change the default delimiter passing delimiter: '' in the options hash
7
+ # * You can map fields from the file to different fields in the table using a map in the options hash
8
+ # * For further details on usage take a look at the README.md
9
+ def self.pg_upsert path_or_io, options = {}
10
+ options.reverse_merge!({:delimiter => ",", :format => :csv, :header => true})
11
+ options_string = options[:format] == :binary ? "BINARY" : "DELIMITER '#{options[:delimiter]}' CSV"
12
+
13
+ io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io
14
+ columns_list = get_columns(io, options)
15
+
16
+ if columns_list.empty?
17
+ raise "Either the :columns option or :header => true are required"
18
+ end
19
+ copy_table = get_temp_table_name(options)
20
+ destination_table = get_table_name(options)
21
+
22
+ columns_string = columns_string_for_copy(columns_list)
23
+ create_temp_table(copy_table, destination_table, columns_list) if destination_table
24
+
25
+ connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{options_string}} do
26
+ if block_given?
27
+ block = Proc.new
28
+ end
29
+ while line = read_input_line(io, options, &block) do
30
+ next if line.strip.size == 0
31
+ connection.raw_connection.put_copy_data line
32
+ end
33
+ end
34
+
35
+ if destination_table
36
+ upsert_from_temp_table(copy_table, destination_table, columns_list)
37
+ drop_temp_table(copy_table)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def self.get_columns(io, options)
44
+ columns_list = options[:columns] || []
45
+ if options[:format] != :binary && options[:header]
46
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
47
+ line = io.gets
48
+ if columns_list.empty?
49
+ columns_list = line.strip.split(options[:delimiter])
50
+ end
51
+ end
52
+ columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map]
53
+ return columns_list
54
+ end
55
+
56
+ def self.columns_string_for_copy(columns_list)
57
+ str = get_columns_string(columns_list)
58
+ str.empty? ? str : "(#{str})"
59
+ end
60
+
61
+ def self.columns_string_for_select(columns_list)
62
+ columns = columns_list.clone
63
+ columns << "created_at" if column_names.include?("created_at")
64
+ columns << "updated_at" if column_names.include?("updated_at")
65
+ str = get_columns_string(columns)
66
+ end
67
+
68
+ def self.columns_string_for_insert(columns_list)
69
+ columns = columns_list.clone
70
+ columns << "created_at" if column_names.include?("created_at")
71
+ columns << "updated_at" if column_names.include?("updated_at")
72
+ str = get_columns_string(columns)
73
+ end
74
+
75
+ def self.select_string_for_insert(columns_list)
76
+ columns = columns_list.clone
77
+ str = get_columns_string(columns)
78
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
79
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
80
+ str
81
+ end
82
+
83
+ def self.select_string_for_create(columns_list)
84
+ columns = columns_list.map(&:to_sym)
85
+ columns << primary_key.to_sym unless columns.include?(primary_key.to_sym)
86
+ get_columns_string(columns)
87
+ end
88
+
89
+ def self.get_columns_string(columns_list)
90
+ columns_list.size > 0 ? "\"#{columns_list.join('","')}\"" : ""
91
+ end
92
+
93
+ def self.get_table_name(options)
94
+ if options[:table]
95
+ connection.quote_table_name(options[:table])
96
+ else
97
+ quoted_table_name
98
+ end
99
+ end
100
+
101
+ def self.get_temp_table_name(options)
102
+ "#{table_name}_temp_#{rand(1000)}"
103
+ end
104
+
105
+ def self.read_input_line(io, options)
106
+ if options[:format] == :binary
107
+ begin
108
+ return io.readpartial(10240)
109
+ rescue EOFError
110
+ end
111
+ else
112
+ line = io.gets
113
+ if block_given? && line
114
+ row = line.strip.split(options[:delimiter])
115
+ yield(row)
116
+ line = row.join(options[:delimiter]) + "\n"
117
+ end
118
+ return line
119
+ end
120
+ end
121
+
122
+ def self.upsert_from_temp_table(temp_table, dest_table, columns_list)
123
+ update_from_temp_table(temp_table, dest_table, columns_list)
124
+ insert_from_temp_table(temp_table, dest_table, columns_list)
125
+ end
126
+
127
+ def self.update_from_temp_table(temp_table, dest_table, columns_list)
128
+ ActiveRecord::Base.connection.execute <<-SQL
129
+ UPDATE #{dest_table} AS d
130
+ #{update_set_clause(columns_list)}
131
+ FROM #{temp_table} as t
132
+ WHERE t.#{primary_key} = d.#{primary_key}
133
+ AND d.#{primary_key} IS NOT NULL;
134
+ SQL
135
+ end
136
+
137
+ def self.update_set_clause(columns_list)
138
+ command = columns_list.map do |col|
139
+ "\"#{col}\" = t.\"#{col}\""
140
+ end
141
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
142
+ "SET #{command.join(',')}"
143
+ end
144
+
145
+ def self.insert_from_temp_table(temp_table, dest_table, columns_list)
146
+ columns_string = columns_string_for_insert(columns_list)
147
+ select_string = select_string_for_insert(columns_list)
148
+ ActiveRecord::Base.connection.execute <<-SQL
149
+ INSERT INTO #{dest_table} (#{columns_string})
150
+ SELECT #{select_string}
151
+ FROM #{temp_table} as t
152
+ WHERE NOT EXISTS
153
+ (SELECT 1
154
+ FROM #{dest_table} as d
155
+ WHERE d.#{primary_key} = t.#{primary_key})
156
+ AND t.#{primary_key} IS NOT NULL;
157
+ SQL
158
+ end
159
+
160
+ def self.create_temp_table(temp_table, dest_table, columns_list)
161
+ columns_string = select_string_for_create(columns_list)
162
+ ActiveRecord::Base.connection.execute <<-SQL
163
+ SET client_min_messages=WARNING;
164
+ DROP TABLE IF EXISTS #{temp_table};
165
+
166
+ CREATE TEMP TABLE #{temp_table}
167
+ AS SELECT #{columns_string} FROM #{dest_table} WHERE 0 = 1;
168
+ SQL
169
+ end
170
+
171
+ def self.drop_temp_table(temp_table)
172
+ ActiveRecord::Base.connection.execute <<-SQL
173
+ DROP TABLE #{temp_table}
174
+ SQL
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "postgres_upsert"
8
+ s.version = "1.0.0"
9
+
10
+ s.platform = Gem::Platform::RUBY
11
+ s.required_ruby_version = ">= 1.8.7"
12
+ s.authors = ["Steve Mitchell"]
13
+ s.date = "2014-09-12"
14
+ s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
15
+ s.email = "thestevemitchell@gmail.com"
16
+ git_files = `git ls-files`.split("\n") rescue ''
17
+ s.files = git_files
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = []
20
+ s.require_paths = %w(lib)
21
+ s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
22
+ s.require_paths = ["lib"]
23
+ s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
24
+
25
+ s.add_dependency "pg", '~> 0.17.0'
26
+ s.add_dependency "activerecord", '>= 3.0.0'
27
+ s.add_dependency "rails", '>= 3.0.0'
28
+ s.add_development_dependency "bundler"
29
+ s.add_development_dependency "rdoc"
30
+ s.add_development_dependency "pry-rails"
31
+ s.add_development_dependency "rspec", "~> 2.12"
32
+ end
33
+
Binary file
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,"test, the data 1"
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test, the data 1
@@ -0,0 +1 @@
1
+ 1,test data 1
@@ -0,0 +1,5 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ReservedWordModel < ActiveRecord::Base
4
+ end
5
+
@@ -0,0 +1,2 @@
1
+ id select group
2
+ 1 test select group name
@@ -0,0 +1,2 @@
1
+ cod;info
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id;data
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ cod info
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ data id
2
+ this is a wrong separator;1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+ 2 test data 2
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModel < ActiveRecord::Base
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ThreeColumn < ActiveRecord::Base
4
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with binary data" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc.to_s
17
+ end
18
+
19
+ it "imports from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
21
+
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "throws an error when importing binary file without columns list" do
28
+ # Since binary data never has a header row, we'll require explicit columns list
29
+ expect{
30
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
31
+ }.to raise_error "Either the :columns option or :header => true are required"
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,187 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with CSV format" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc
17
+ end
18
+
19
+ it "should import from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
21
+ expect(
22
+ TestModel.first.attributes
23
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
24
+ end
25
+
26
+ it "correctly handles delimiters in content" do
27
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
28
+ expect(
29
+ TestModel.first.attributes
30
+ ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
31
+ end
32
+
33
+ it "throws error if csv is malformed" do
34
+ expect{
35
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
36
+ }.to raise_error
37
+ end
38
+
39
+ it "throws error if the csv has mixed delimiters" do
40
+ expect{
41
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
42
+ }.to raise_error
43
+ end
44
+
45
+ it "should import from IO without field_map" do
46
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
47
+ expect(
48
+ TestModel.first.attributes
49
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
50
+ end
51
+
52
+ it "should import with custom delimiter from path" do
53
+ TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
54
+ expect(
55
+ TestModel.first.attributes
56
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
57
+ end
58
+
59
+ it "should import with custom delimiter from IO" do
60
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
61
+ expect(
62
+ TestModel.first.attributes
63
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
64
+ end
65
+
66
+ it "should import and allow changes in block" do
67
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row|
68
+ row[1] = 'changed this data'
69
+ end
70
+ expect(
71
+ TestModel.first.attributes
72
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
73
+ end
74
+
75
+ it "should import 2 lines and allow changes in block" do
76
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row|
77
+ row[1] = 'changed this data'
78
+ end
79
+
80
+ expect(
81
+ TestModel.find(1).attributes
82
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
83
+ expect(
84
+ TestModel.find(2).attributes
85
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
86
+ expect(TestModel.count).to eq 2
87
+ end
88
+
89
+ it "should not expect a header when :header is false" do
90
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
91
+
92
+ expect(
93
+ TestModel.first.attributes
94
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
95
+ end
96
+
97
+ it "should be able to map the header in the file to diferent column names" do
98
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
99
+
100
+ expect(
101
+ TestModel.first.attributes
102
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
103
+ end
104
+
105
+ it "should be able to map the header in the file to diferent column names with custom delimiter" do
106
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
107
+
108
+ expect(
109
+ TestModel.first.attributes
110
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
111
+ end
112
+
113
+ it "should ignore empty lines" do
114
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
115
+
116
+ expect(
117
+ TestModel.first.attributes
118
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
119
+ end
120
+
121
+ it "should not create timestamps when the model does not include them" do
122
+ ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
123
+
124
+ expect(
125
+ ReservedWordModel.first.attributes
126
+ ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
127
+ end
128
+
129
+ context "upserting data to handle inserts and creates" do
130
+ let(:original_created_at) {5.days.ago.utc}
131
+
132
+ before(:each) do
133
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
134
+ end
135
+
136
+ it "should not violate primary key constraint" do
137
+ expect{
138
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
139
+ }.to_not raise_error
140
+ end
141
+
142
+ it "should upsert (update existing records and insert new records)" do
143
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
144
+
145
+ expect(
146
+ TestModel.find(1).attributes
147
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
148
+ expect(
149
+ TestModel.find(2).attributes
150
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
151
+ end
152
+
153
+ it "should require columns option if no header" do
154
+ expect{
155
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
156
+ }.to raise_error("Either the :columns option or :header => true are required")
157
+ end
158
+
159
+ it "should clean up the temp table after completion" do
160
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
161
+
162
+ ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
163
+ end
164
+
165
+ it "should gracefully drop the temp table if it already exists" do
166
+ ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
167
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
168
+
169
+ expect(
170
+ TestModel.find(1).attributes
171
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
172
+ expect(
173
+ TestModel.find(2).attributes
174
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
175
+ end
176
+
177
+ it "should be able to copy using custom set of columns" do
178
+ ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
179
+ ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
180
+
181
+ expect(
182
+ ThreeColumn.first.attributes
183
+ ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
184
+ end
185
+ end
186
+ end
187
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,47 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fixtures/test_model'
4
+ require 'fixtures/three_column'
5
+ require 'fixtures/reserved_word_model'
6
+ require 'rspec'
7
+ require 'rspec/autorun'
8
+
9
+ RSpec.configure do |config|
10
+ config.before(:suite) do
11
+ # we create a test database if it does not exist
12
+ # I do not use database users or password for the tests, using ident authentication instead
13
+ begin
14
+ ActiveRecord::Base.establish_connection(
15
+ :adapter => "postgresql",
16
+ :host => "localhost",
17
+ :username => "postgres",
18
+ :password => "postgres",
19
+ :port => 5432,
20
+ :database => "ar_pg_copy_test"
21
+ )
22
+ ActiveRecord::Base.connection.execute %{
23
+ SET client_min_messages TO warning;
24
+ DROP TABLE IF EXISTS test_models;
25
+ DROP TABLE IF EXISTS three_columns;
26
+ DROP TABLE IF EXISTS reserved_word_models;
27
+ CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
28
+ CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
29
+ CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
30
+ }
31
+ rescue Exception => e
32
+ puts "Exception: #{e}"
33
+ ActiveRecord::Base.establish_connection(
34
+ :adapter => "postgresql",
35
+ :host => "localhost",
36
+ :username => "postgres",
37
+ :password => "postgres",
38
+ :port => 5432,
39
+ :database => "postgres"
40
+ )
41
+ ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
42
+ ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
43
+ retry
44
+ end
45
+ end
46
+
47
+ end
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Steve Mitchell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.17.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.17.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 3.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 3.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rails
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry-rails
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: '2.12'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '2.12'
111
+ description: Uses Postgres's powerful COPY command to upsert large sets of data into
112
+ ActiveRecord tables
113
+ email: thestevemitchell@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - Gemfile
120
+ - Gemfile.lock
121
+ - LICENSE
122
+ - README.md
123
+ - Rakefile
124
+ - VERSION
125
+ - lib/postgres_upsert.rb
126
+ - lib/postgres_upsert/active_record.rb
127
+ - postgres_upsert.gemspec
128
+ - spec/fixtures/2_col_binary_data.dat
129
+ - spec/fixtures/comma_with_header.csv
130
+ - spec/fixtures/comma_with_header_and_comma_values.csv
131
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
132
+ - spec/fixtures/comma_without_header.csv
133
+ - spec/fixtures/reserved_word_model.rb
134
+ - spec/fixtures/reserved_words.csv
135
+ - spec/fixtures/semicolon_with_different_header.csv
136
+ - spec/fixtures/semicolon_with_header.csv
137
+ - spec/fixtures/tab_only_data.csv
138
+ - spec/fixtures/tab_with_different_header.csv
139
+ - spec/fixtures/tab_with_error.csv
140
+ - spec/fixtures/tab_with_extra_line.csv
141
+ - spec/fixtures/tab_with_header.csv
142
+ - spec/fixtures/tab_with_two_lines.csv
143
+ - spec/fixtures/test_model.rb
144
+ - spec/fixtures/three_column.rb
145
+ - spec/pg_upsert_binary_spec.rb
146
+ - spec/pg_upsert_csv_spec.rb
147
+ - spec/spec.opts
148
+ - spec/spec_helper.rb
149
+ homepage: https://github.com/theSteveMitchell/postgres_upsert
150
+ licenses: []
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '>='
159
+ - !ruby/object:Gem::Version
160
+ version: 1.8.7
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.0.6
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: A rubygem that integrates with ActiveRecord to insert/update large data sets
172
+ into the database efficiently
173
+ test_files:
174
+ - spec/fixtures/2_col_binary_data.dat
175
+ - spec/fixtures/comma_with_header.csv
176
+ - spec/fixtures/comma_with_header_and_comma_values.csv
177
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
178
+ - spec/fixtures/comma_without_header.csv
179
+ - spec/fixtures/reserved_word_model.rb
180
+ - spec/fixtures/reserved_words.csv
181
+ - spec/fixtures/semicolon_with_different_header.csv
182
+ - spec/fixtures/semicolon_with_header.csv
183
+ - spec/fixtures/tab_only_data.csv
184
+ - spec/fixtures/tab_with_different_header.csv
185
+ - spec/fixtures/tab_with_error.csv
186
+ - spec/fixtures/tab_with_extra_line.csv
187
+ - spec/fixtures/tab_with_header.csv
188
+ - spec/fixtures/tab_with_two_lines.csv
189
+ - spec/fixtures/test_model.rb
190
+ - spec/fixtures/three_column.rb
191
+ - spec/pg_upsert_binary_spec.rb
192
+ - spec/pg_upsert_csv_spec.rb
193
+ - spec/spec.opts
194
+ - spec/spec_helper.rb