postgres_upsert 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 00a22a21bea95bc98d7a991c5ed30a5bbea6a67c
4
+ data.tar.gz: 9824edf28ac08e7a8aed28ebcdee8c07c46ffa11
5
+ SHA512:
6
+ metadata.gz: b6183d68e0791491f6417ffbc99b7f2a984090aa3640f1348cfe74e39e11011edce3985029b259bd28296fc7725ad4def14db3f209d95b15f7324d2ebb5dbba3
7
+ data.tar.gz: 4dd39aa87168d5866874de9c6a11097731e015bbdea624535164f413cf1fabb7c9fbca73612c805289db800383d134c5e88f1198efe2cc9d7d365a72f4f2a9be
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # specify gem dependencies in activerecord-postgres-hstore.gemspec
4
+ # except the platform-specific dependencies below
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,112 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_upsert (0.1.0)
5
+ activerecord (>= 3.0.0)
6
+ pg (~> 0.17.0)
7
+ rails (>= 3.0.0)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ actionmailer (4.0.3)
13
+ actionpack (= 4.0.3)
14
+ mail (~> 2.5.4)
15
+ actionpack (4.0.3)
16
+ activesupport (= 4.0.3)
17
+ builder (~> 3.1.0)
18
+ erubis (~> 2.7.0)
19
+ rack (~> 1.5.2)
20
+ rack-test (~> 0.6.2)
21
+ activemodel (4.0.3)
22
+ activesupport (= 4.0.3)
23
+ builder (~> 3.1.0)
24
+ activerecord (4.0.3)
25
+ activemodel (= 4.0.3)
26
+ activerecord-deprecated_finders (~> 1.0.2)
27
+ activesupport (= 4.0.3)
28
+ arel (~> 4.0.0)
29
+ activerecord-deprecated_finders (1.0.3)
30
+ activesupport (4.0.3)
31
+ i18n (~> 0.6, >= 0.6.4)
32
+ minitest (~> 4.2)
33
+ multi_json (~> 1.3)
34
+ thread_safe (~> 0.1)
35
+ tzinfo (~> 0.3.37)
36
+ arel (4.0.2)
37
+ builder (3.1.4)
38
+ coderay (1.1.0)
39
+ diff-lcs (1.1.3)
40
+ erubis (2.7.0)
41
+ hike (1.2.3)
42
+ i18n (0.6.11)
43
+ json (1.7.6)
44
+ mail (2.5.4)
45
+ mime-types (~> 1.16)
46
+ treetop (~> 1.4.8)
47
+ method_source (0.8.2)
48
+ mime-types (1.25.1)
49
+ minitest (4.7.5)
50
+ multi_json (1.10.1)
51
+ pg (0.17.1)
52
+ polyglot (0.3.5)
53
+ pry (0.10.1)
54
+ coderay (~> 1.1.0)
55
+ method_source (~> 0.8.1)
56
+ slop (~> 3.4)
57
+ pry-rails (0.3.2)
58
+ pry (>= 0.9.10)
59
+ rack (1.5.2)
60
+ rack-test (0.6.2)
61
+ rack (>= 1.0)
62
+ rails (4.0.3)
63
+ actionmailer (= 4.0.3)
64
+ actionpack (= 4.0.3)
65
+ activerecord (= 4.0.3)
66
+ activesupport (= 4.0.3)
67
+ bundler (>= 1.3.0, < 2.0)
68
+ railties (= 4.0.3)
69
+ sprockets-rails (~> 2.0.0)
70
+ railties (4.0.3)
71
+ actionpack (= 4.0.3)
72
+ activesupport (= 4.0.3)
73
+ rake (>= 0.8.7)
74
+ thor (>= 0.18.1, < 2.0)
75
+ rake (10.3.2)
76
+ rdoc (3.12)
77
+ json (~> 1.4)
78
+ rspec (2.12.0)
79
+ rspec-core (~> 2.12.0)
80
+ rspec-expectations (~> 2.12.0)
81
+ rspec-mocks (~> 2.12.0)
82
+ rspec-core (2.12.2)
83
+ rspec-expectations (2.12.1)
84
+ diff-lcs (~> 1.1.3)
85
+ rspec-mocks (2.12.2)
86
+ slop (3.6.0)
87
+ sprockets (2.11.0)
88
+ hike (~> 1.2)
89
+ multi_json (~> 1.0)
90
+ rack (~> 1.0)
91
+ tilt (~> 1.1, != 1.3.0)
92
+ sprockets-rails (2.0.1)
93
+ actionpack (>= 3.0)
94
+ activesupport (>= 3.0)
95
+ sprockets (~> 2.8)
96
+ thor (0.19.1)
97
+ thread_safe (0.3.4)
98
+ tilt (1.4.1)
99
+ treetop (1.4.15)
100
+ polyglot
101
+ polyglot (>= 0.3.1)
102
+ tzinfo (0.3.40)
103
+
104
+ PLATFORMS
105
+ ruby
106
+
107
+ DEPENDENCIES
108
+ bundler
109
+ postgres_upsert!
110
+ pry-rails
111
+ rdoc
112
+ rspec (~> 2.12)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Steve Mitchell
4
+ Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # postgres_upsert
2
+
3
+ Allows your rails app to load data in a very fast way, avoiding calls to ActiveRecord.
4
+
5
+ Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
6
+
7
+
8
+ ## Install
9
+
10
+ Put it in your Gemfile
11
+
12
+ gem 'postgres_upsert'
13
+
14
+ Run the bundle command
15
+
16
+ bundle
17
+
18
+ ## Usage
19
+
20
+ The gem will add the aditiontal class method to ActiveRecord::Base
21
+
22
+ * pg_upsert
23
+
24
+ ### Using pg_upsert
25
+
26
+ pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
27
+ Let's first copy from a file in the database server, assuming again that we have a users table and
28
+ that we are in the Rails console:
29
+
30
+ ```ruby
31
+ User.pg_upsert "/tmp/users.csv"
32
+ ```
33
+
34
+ This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
35
+ If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
36
+
37
+ ```ruby
38
+ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
39
+ ```
40
+
41
+ In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
42
+ You can also manipulate and modify the values of the file being imported before they enter into the database using a block:
43
+
44
+ ```ruby
45
+ User.pg_upsert "/tmp/users.csv" do |row|
46
+ row[0] = "fixed string"
47
+ end
48
+ ```
49
+
50
+ The above example will always change the value of the first column to "fixed string" before storing it into the database.
51
+ For each iteration of the block row receives an array with the same order as the columns in the CSV file.
52
+
53
+
54
+ To copy a binary formatted data file or IO object you can specify the format as binary
55
+
56
+ ```ruby
57
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
58
+ ```
59
+
60
+ Which will generate the following SQL command:
61
+
62
+ ```sql
63
+ COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
64
+ ```
65
+
66
+ NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
67
+
68
+
69
+ pg_upsert supports 'upsert' or 'merge' operations. In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case. Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
70
+
71
+ This merge/upsert happend in 5 steps (assume your data table is called "users")
72
+ * create a temp table named users_temp_### where "###" is a random number. In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
73
+ * COPY the data to user_temp
74
+ * issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
75
+ * issue a query to update all records in users with the data in users_temp_### (matching on primary key)
76
+ * drop the temp table.
77
+
78
+ ## Note on Patches/Pull Requests
79
+
80
+ * Fork the project
81
+ * add your feature/fix to your fork(rpsec tests pleaze)
82
+ * submit a PR
83
+ * If you find an issue but can't fix in in a PR, please log an issue. I'll do my best.
84
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+ require 'bundler/gem_tasks'
4
+ require 'rubygems'
5
+ require 'rspec/core/rake_task'
6
+ require 'rdoc/task'
7
+
8
+ task :default => :spec
9
+
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ Rake::RDocTask.new do |rdoc|
13
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
14
+ rdoc.rdoc_dir = 'rdoc'
15
+ rdoc.title = "postgres_upsert #{version}"
16
+ rdoc.rdoc_files.include('README*')
17
+ rdoc.rdoc_files.include('lib/**/*.rb')
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'active_record'
3
+ require 'postgres_upsert/active_record'
4
+ require 'rails'
5
+
6
+ class PostgresCopy < Rails::Railtie
7
+
8
+ initializer 'postgres_upsert' do
9
+ ActiveSupport.on_load :active_record do
10
+ require "postgres_upsert/active_record"
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,177 @@
1
+ module ActiveRecord
2
+ class Base
3
+ # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
4
+
5
+ # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
6
+ # * You can change the default delimiter passing delimiter: '' in the options hash
7
+ # * You can map fields from the file to different fields in the table using a map in the options hash
8
+ # * For further details on usage take a look at the README.md
9
+ def self.pg_upsert path_or_io, options = {}
10
+ options.reverse_merge!({:delimiter => ",", :format => :csv, :header => true})
11
+ options_string = options[:format] == :binary ? "BINARY" : "DELIMITER '#{options[:delimiter]}' CSV"
12
+
13
+ io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io
14
+ columns_list = get_columns(io, options)
15
+
16
+ if columns_list.empty?
17
+ raise "Either the :columns option or :header => true are required"
18
+ end
19
+ copy_table = get_temp_table_name(options)
20
+ destination_table = get_table_name(options)
21
+
22
+ columns_string = columns_string_for_copy(columns_list)
23
+ create_temp_table(copy_table, destination_table, columns_list) if destination_table
24
+
25
+ connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{options_string}} do
26
+ if block_given?
27
+ block = Proc.new
28
+ end
29
+ while line = read_input_line(io, options, &block) do
30
+ next if line.strip.size == 0
31
+ connection.raw_connection.put_copy_data line
32
+ end
33
+ end
34
+
35
+ if destination_table
36
+ upsert_from_temp_table(copy_table, destination_table, columns_list)
37
+ drop_temp_table(copy_table)
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def self.get_columns(io, options)
44
+ columns_list = options[:columns] || []
45
+ if options[:format] != :binary && options[:header]
46
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
47
+ line = io.gets
48
+ if columns_list.empty?
49
+ columns_list = line.strip.split(options[:delimiter])
50
+ end
51
+ end
52
+ columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map]
53
+ return columns_list
54
+ end
55
+
56
+ def self.columns_string_for_copy(columns_list)
57
+ str = get_columns_string(columns_list)
58
+ str.empty? ? str : "(#{str})"
59
+ end
60
+
61
+ def self.columns_string_for_select(columns_list)
62
+ columns = columns_list.clone
63
+ columns << "created_at" if column_names.include?("created_at")
64
+ columns << "updated_at" if column_names.include?("updated_at")
65
+ str = get_columns_string(columns)
66
+ end
67
+
68
+ def self.columns_string_for_insert(columns_list)
69
+ columns = columns_list.clone
70
+ columns << "created_at" if column_names.include?("created_at")
71
+ columns << "updated_at" if column_names.include?("updated_at")
72
+ str = get_columns_string(columns)
73
+ end
74
+
75
+ def self.select_string_for_insert(columns_list)
76
+ columns = columns_list.clone
77
+ str = get_columns_string(columns)
78
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
79
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
80
+ str
81
+ end
82
+
83
+ def self.select_string_for_create(columns_list)
84
+ columns = columns_list.map(&:to_sym)
85
+ columns << primary_key.to_sym unless columns.include?(primary_key.to_sym)
86
+ get_columns_string(columns)
87
+ end
88
+
89
+ def self.get_columns_string(columns_list)
90
+ columns_list.size > 0 ? "\"#{columns_list.join('","')}\"" : ""
91
+ end
92
+
93
+ def self.get_table_name(options)
94
+ if options[:table]
95
+ connection.quote_table_name(options[:table])
96
+ else
97
+ quoted_table_name
98
+ end
99
+ end
100
+
101
+ def self.get_temp_table_name(options)
102
+ "#{table_name}_temp_#{rand(1000)}"
103
+ end
104
+
105
+ def self.read_input_line(io, options)
106
+ if options[:format] == :binary
107
+ begin
108
+ return io.readpartial(10240)
109
+ rescue EOFError
110
+ end
111
+ else
112
+ line = io.gets
113
+ if block_given? && line
114
+ row = line.strip.split(options[:delimiter])
115
+ yield(row)
116
+ line = row.join(options[:delimiter]) + "\n"
117
+ end
118
+ return line
119
+ end
120
+ end
121
+
122
+ def self.upsert_from_temp_table(temp_table, dest_table, columns_list)
123
+ update_from_temp_table(temp_table, dest_table, columns_list)
124
+ insert_from_temp_table(temp_table, dest_table, columns_list)
125
+ end
126
+
127
+ def self.update_from_temp_table(temp_table, dest_table, columns_list)
128
+ ActiveRecord::Base.connection.execute <<-SQL
129
+ UPDATE #{dest_table} AS d
130
+ #{update_set_clause(columns_list)}
131
+ FROM #{temp_table} as t
132
+ WHERE t.#{primary_key} = d.#{primary_key}
133
+ AND d.#{primary_key} IS NOT NULL;
134
+ SQL
135
+ end
136
+
137
+ def self.update_set_clause(columns_list)
138
+ command = columns_list.map do |col|
139
+ "\"#{col}\" = t.\"#{col}\""
140
+ end
141
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
142
+ "SET #{command.join(',')}"
143
+ end
144
+
145
+ def self.insert_from_temp_table(temp_table, dest_table, columns_list)
146
+ columns_string = columns_string_for_insert(columns_list)
147
+ select_string = select_string_for_insert(columns_list)
148
+ ActiveRecord::Base.connection.execute <<-SQL
149
+ INSERT INTO #{dest_table} (#{columns_string})
150
+ SELECT #{select_string}
151
+ FROM #{temp_table} as t
152
+ WHERE NOT EXISTS
153
+ (SELECT 1
154
+ FROM #{dest_table} as d
155
+ WHERE d.#{primary_key} = t.#{primary_key})
156
+ AND t.#{primary_key} IS NOT NULL;
157
+ SQL
158
+ end
159
+
160
+ def self.create_temp_table(temp_table, dest_table, columns_list)
161
+ columns_string = select_string_for_create(columns_list)
162
+ ActiveRecord::Base.connection.execute <<-SQL
163
+ SET client_min_messages=WARNING;
164
+ DROP TABLE IF EXISTS #{temp_table};
165
+
166
+ CREATE TEMP TABLE #{temp_table}
167
+ AS SELECT #{columns_string} FROM #{dest_table} WHERE 0 = 1;
168
+ SQL
169
+ end
170
+
171
+ def self.drop_temp_table(temp_table)
172
+ ActiveRecord::Base.connection.execute <<-SQL
173
+ DROP TABLE #{temp_table}
174
+ SQL
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "postgres_upsert"
8
+ s.version = "1.0.0"
9
+
10
+ s.platform = Gem::Platform::RUBY
11
+ s.required_ruby_version = ">= 1.8.7"
12
+ s.authors = ["Steve Mitchell"]
13
+ s.date = "2014-09-12"
14
+ s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
15
+ s.email = "thestevemitchell@gmail.com"
16
+ git_files = `git ls-files`.split("\n") rescue ''
17
+ s.files = git_files
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = []
20
+ s.require_paths = %w(lib)
21
+ s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
22
+ s.require_paths = ["lib"]
23
+ s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
24
+
25
+ s.add_dependency "pg", '~> 0.17.0'
26
+ s.add_dependency "activerecord", '>= 3.0.0'
27
+ s.add_dependency "rails", '>= 3.0.0'
28
+ s.add_development_dependency "bundler"
29
+ s.add_development_dependency "rdoc"
30
+ s.add_development_dependency "pry-rails"
31
+ s.add_development_dependency "rspec", "~> 2.12"
32
+ end
33
+
Binary file
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,"test, the data 1"
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test, the data 1
@@ -0,0 +1 @@
1
+ 1,test data 1
@@ -0,0 +1,5 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ReservedWordModel < ActiveRecord::Base
4
+ end
5
+
@@ -0,0 +1,2 @@
1
+ id select group
2
+ 1 test select group name
@@ -0,0 +1,2 @@
1
+ cod;info
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id;data
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ cod info
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ data id
2
+ this is a wrong separator;1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+ 2 test data 2
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModel < ActiveRecord::Base
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ThreeColumn < ActiveRecord::Base
4
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with binary data" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc.to_s
17
+ end
18
+
19
+ it "imports from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
21
+
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "throws an error when importing binary file without columns list" do
28
+ # Since binary data never has a header row, we'll require explicit columns list
29
+ expect{
30
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
31
+ }.to raise_error "Either the :columns option or :header => true are required"
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,187 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with CSV format" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc
17
+ end
18
+
19
+ it "should import from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
21
+ expect(
22
+ TestModel.first.attributes
23
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
24
+ end
25
+
26
+ it "correctly handles delimiters in content" do
27
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
28
+ expect(
29
+ TestModel.first.attributes
30
+ ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
31
+ end
32
+
33
+ it "throws error if csv is malformed" do
34
+ expect{
35
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
36
+ }.to raise_error
37
+ end
38
+
39
+ it "throws error if the csv has mixed delimiters" do
40
+ expect{
41
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
42
+ }.to raise_error
43
+ end
44
+
45
+ it "should import from IO without field_map" do
46
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
47
+ expect(
48
+ TestModel.first.attributes
49
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
50
+ end
51
+
52
+ it "should import with custom delimiter from path" do
53
+ TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
54
+ expect(
55
+ TestModel.first.attributes
56
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
57
+ end
58
+
59
+ it "should import with custom delimiter from IO" do
60
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
61
+ expect(
62
+ TestModel.first.attributes
63
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
64
+ end
65
+
66
+ it "should import and allow changes in block" do
67
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row|
68
+ row[1] = 'changed this data'
69
+ end
70
+ expect(
71
+ TestModel.first.attributes
72
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
73
+ end
74
+
75
+ it "should import 2 lines and allow changes in block" do
76
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row|
77
+ row[1] = 'changed this data'
78
+ end
79
+
80
+ expect(
81
+ TestModel.find(1).attributes
82
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
83
+ expect(
84
+ TestModel.find(2).attributes
85
+ ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
86
+ expect(TestModel.count).to eq 2
87
+ end
88
+
89
+ it "should not expect a header when :header is false" do
90
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
91
+
92
+ expect(
93
+ TestModel.first.attributes
94
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
95
+ end
96
+
97
+ it "should be able to map the header in the file to diferent column names" do
98
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
99
+
100
+ expect(
101
+ TestModel.first.attributes
102
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
103
+ end
104
+
105
+ it "should be able to map the header in the file to diferent column names with custom delimiter" do
106
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
107
+
108
+ expect(
109
+ TestModel.first.attributes
110
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
111
+ end
112
+
113
+ it "should ignore empty lines" do
114
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
115
+
116
+ expect(
117
+ TestModel.first.attributes
118
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
119
+ end
120
+
121
+ it "should not create timestamps when the model does not include them" do
122
+ ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
123
+
124
+ expect(
125
+ ReservedWordModel.first.attributes
126
+ ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
127
+ end
128
+
129
+ context "upserting data to handle inserts and creates" do
130
+ let(:original_created_at) {5.days.ago.utc}
131
+
132
+ before(:each) do
133
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
134
+ end
135
+
136
+ it "should not violate primary key constraint" do
137
+ expect{
138
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
139
+ }.to_not raise_error
140
+ end
141
+
142
+ it "should upsert (update existing records and insert new records)" do
143
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
144
+
145
+ expect(
146
+ TestModel.find(1).attributes
147
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
148
+ expect(
149
+ TestModel.find(2).attributes
150
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
151
+ end
152
+
153
+ it "should require columns option if no header" do
154
+ expect{
155
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
156
+ }.to raise_error("Either the :columns option or :header => true are required")
157
+ end
158
+
159
+ it "should clean up the temp table after completion" do
160
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
161
+
162
+ ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
163
+ end
164
+
165
+ it "should gracefully drop the temp table if it already exists" do
166
+ ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
167
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
168
+
169
+ expect(
170
+ TestModel.find(1).attributes
171
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
172
+ expect(
173
+ TestModel.find(2).attributes
174
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
175
+ end
176
+
177
+ it "should be able to copy using custom set of columns" do
178
+ ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
179
+ ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
180
+
181
+ expect(
182
+ ThreeColumn.first.attributes
183
+ ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
184
+ end
185
+ end
186
+ end
187
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,47 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fixtures/test_model'
4
+ require 'fixtures/three_column'
5
+ require 'fixtures/reserved_word_model'
6
+ require 'rspec'
7
+ require 'rspec/autorun'
8
+
9
+ RSpec.configure do |config|
10
+ config.before(:suite) do
11
+ # we create a test database if it does not exist
12
+ # I do not use database users or password for the tests, using ident authentication instead
13
+ begin
14
+ ActiveRecord::Base.establish_connection(
15
+ :adapter => "postgresql",
16
+ :host => "localhost",
17
+ :username => "postgres",
18
+ :password => "postgres",
19
+ :port => 5432,
20
+ :database => "ar_pg_copy_test"
21
+ )
22
+ ActiveRecord::Base.connection.execute %{
23
+ SET client_min_messages TO warning;
24
+ DROP TABLE IF EXISTS test_models;
25
+ DROP TABLE IF EXISTS three_columns;
26
+ DROP TABLE IF EXISTS reserved_word_models;
27
+ CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
28
+ CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
29
+ CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
30
+ }
31
+ rescue Exception => e
32
+ puts "Exception: #{e}"
33
+ ActiveRecord::Base.establish_connection(
34
+ :adapter => "postgresql",
35
+ :host => "localhost",
36
+ :username => "postgres",
37
+ :password => "postgres",
38
+ :port => 5432,
39
+ :database => "postgres"
40
+ )
41
+ ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
42
+ ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
43
+ retry
44
+ end
45
+ end
46
+
47
+ end
metadata ADDED
@@ -0,0 +1,194 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Steve Mitchell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pg
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: 0.17.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: 0.17.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: activerecord
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 3.0.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 3.0.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: rails
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rdoc
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry-rails
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: '2.12'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '2.12'
111
+ description: Uses Postgres's powerful COPY command to upsert large sets of data into
112
+ ActiveRecord tables
113
+ email: thestevemitchell@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - Gemfile
120
+ - Gemfile.lock
121
+ - LICENSE
122
+ - README.md
123
+ - Rakefile
124
+ - VERSION
125
+ - lib/postgres_upsert.rb
126
+ - lib/postgres_upsert/active_record.rb
127
+ - postgres_upsert.gemspec
128
+ - spec/fixtures/2_col_binary_data.dat
129
+ - spec/fixtures/comma_with_header.csv
130
+ - spec/fixtures/comma_with_header_and_comma_values.csv
131
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
132
+ - spec/fixtures/comma_without_header.csv
133
+ - spec/fixtures/reserved_word_model.rb
134
+ - spec/fixtures/reserved_words.csv
135
+ - spec/fixtures/semicolon_with_different_header.csv
136
+ - spec/fixtures/semicolon_with_header.csv
137
+ - spec/fixtures/tab_only_data.csv
138
+ - spec/fixtures/tab_with_different_header.csv
139
+ - spec/fixtures/tab_with_error.csv
140
+ - spec/fixtures/tab_with_extra_line.csv
141
+ - spec/fixtures/tab_with_header.csv
142
+ - spec/fixtures/tab_with_two_lines.csv
143
+ - spec/fixtures/test_model.rb
144
+ - spec/fixtures/three_column.rb
145
+ - spec/pg_upsert_binary_spec.rb
146
+ - spec/pg_upsert_csv_spec.rb
147
+ - spec/spec.opts
148
+ - spec/spec_helper.rb
149
+ homepage: https://github.com/theSteveMitchell/postgres_upsert
150
+ licenses: []
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - '>='
159
+ - !ruby/object:Gem::Version
160
+ version: 1.8.7
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.0.6
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: A rubygem that integrates with ActiveRecord to insert/update large data sets
172
+ into the database efficiently
173
+ test_files:
174
+ - spec/fixtures/2_col_binary_data.dat
175
+ - spec/fixtures/comma_with_header.csv
176
+ - spec/fixtures/comma_with_header_and_comma_values.csv
177
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
178
+ - spec/fixtures/comma_without_header.csv
179
+ - spec/fixtures/reserved_word_model.rb
180
+ - spec/fixtures/reserved_words.csv
181
+ - spec/fixtures/semicolon_with_different_header.csv
182
+ - spec/fixtures/semicolon_with_header.csv
183
+ - spec/fixtures/tab_only_data.csv
184
+ - spec/fixtures/tab_with_different_header.csv
185
+ - spec/fixtures/tab_with_error.csv
186
+ - spec/fixtures/tab_with_extra_line.csv
187
+ - spec/fixtures/tab_with_header.csv
188
+ - spec/fixtures/tab_with_two_lines.csv
189
+ - spec/fixtures/test_model.rb
190
+ - spec/fixtures/three_column.rb
191
+ - spec/pg_upsert_binary_spec.rb
192
+ - spec/pg_upsert_csv_spec.rb
193
+ - spec/spec.opts
194
+ - spec/spec_helper.rb