postgres_upsert 3.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 670c45802ef4d5b50510800787f75426181d0722
4
+ data.tar.gz: a249301be5d55ceaabc1d0d2606f834c65a51361
5
+ SHA512:
6
+ metadata.gz: 0341063e94b7cf9a64e3c35f5052d988eae2f93ac5aa8aa0255813312836176529d918c16d59cd1fe032391ac8482fe878c3162ea20a61073b0654faebb4b0c0
7
+ data.tar.gz: e1eada0840b8f2bf2999783cd1db72c153c8d2a11823789c2505f4da2902a08c10a976cc0a77b906f43e124d0cc5462f485096d3d4d1643e967b8184e31499c6
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # specify gem dependencies in activerecord-postgres-hstore.gemspec
4
+ # except the platform-specific dependencies below
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,146 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_upsert (3.0.0-java)
5
+ activerecord (>= 3.0.0)
6
+ activerecord-jdbcpostgresql-adapter
7
+ rails (>= 3.0.0)
8
+ sequel
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ actionmailer (4.2.0)
14
+ actionpack (= 4.2.0)
15
+ actionview (= 4.2.0)
16
+ activejob (= 4.2.0)
17
+ mail (~> 2.5, >= 2.5.4)
18
+ rails-dom-testing (~> 1.0, >= 1.0.5)
19
+ actionpack (4.2.0)
20
+ actionview (= 4.2.0)
21
+ activesupport (= 4.2.0)
22
+ rack (~> 1.6.0)
23
+ rack-test (~> 0.6.2)
24
+ rails-dom-testing (~> 1.0, >= 1.0.5)
25
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
26
+ actionview (4.2.0)
27
+ activesupport (= 4.2.0)
28
+ builder (~> 3.1)
29
+ erubis (~> 2.7.0)
30
+ rails-dom-testing (~> 1.0, >= 1.0.5)
31
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
32
+ activejob (4.2.0)
33
+ activesupport (= 4.2.0)
34
+ globalid (>= 0.3.0)
35
+ activemodel (4.2.0)
36
+ activesupport (= 4.2.0)
37
+ builder (~> 3.1)
38
+ activerecord (4.2.0)
39
+ activemodel (= 4.2.0)
40
+ activesupport (= 4.2.0)
41
+ arel (~> 6.0)
42
+ activerecord-jdbc-adapter (1.3.14)
43
+ activerecord (>= 2.2)
44
+ activerecord-jdbcpostgresql-adapter (1.3.14)
45
+ activerecord-jdbc-adapter (~> 1.3.14)
46
+ jdbc-postgres (>= 9.1)
47
+ activesupport (4.2.0)
48
+ i18n (~> 0.7)
49
+ json (~> 1.7, >= 1.7.7)
50
+ minitest (~> 5.1)
51
+ thread_safe (~> 0.3, >= 0.3.4)
52
+ tzinfo (~> 1.1)
53
+ arel (6.0.0)
54
+ builder (3.2.2)
55
+ coderay (1.1.0)
56
+ diff-lcs (1.2.5)
57
+ erubis (2.7.0)
58
+ ffi (1.9.6-java)
59
+ globalid (0.3.2)
60
+ activesupport (>= 4.1.0)
61
+ hike (1.2.3)
62
+ i18n (0.7.0)
63
+ jdbc-postgres (9.3.1102)
64
+ json (1.8.2-java)
65
+ loofah (2.0.1)
66
+ nokogiri (>= 1.5.9)
67
+ mail (2.6.3)
68
+ mime-types (>= 1.16, < 3)
69
+ method_source (0.8.2)
70
+ mime-types (2.4.3)
71
+ minitest (5.5.1)
72
+ multi_json (1.10.1)
73
+ nokogiri (1.6.6.2-java)
74
+ pry (0.10.1-java)
75
+ coderay (~> 1.1.0)
76
+ method_source (~> 0.8.1)
77
+ slop (~> 3.4)
78
+ spoon (~> 0.0)
79
+ pry-rails (0.3.3)
80
+ pry (>= 0.9.10)
81
+ rack (1.6.0)
82
+ rack-test (0.6.3)
83
+ rack (>= 1.0)
84
+ rails (4.2.0)
85
+ actionmailer (= 4.2.0)
86
+ actionpack (= 4.2.0)
87
+ actionview (= 4.2.0)
88
+ activejob (= 4.2.0)
89
+ activemodel (= 4.2.0)
90
+ activerecord (= 4.2.0)
91
+ activesupport (= 4.2.0)
92
+ bundler (>= 1.3.0, < 2.0)
93
+ railties (= 4.2.0)
94
+ sprockets-rails
95
+ rails-deprecated_sanitizer (1.0.3)
96
+ activesupport (>= 4.2.0.alpha)
97
+ rails-dom-testing (1.0.5)
98
+ activesupport (>= 4.2.0.beta, < 5.0)
99
+ nokogiri (~> 1.6.0)
100
+ rails-deprecated_sanitizer (>= 1.0.1)
101
+ rails-html-sanitizer (1.0.1)
102
+ loofah (~> 2.0)
103
+ railties (4.2.0)
104
+ actionpack (= 4.2.0)
105
+ activesupport (= 4.2.0)
106
+ rake (>= 0.8.7)
107
+ thor (>= 0.18.1, < 2.0)
108
+ rake (10.4.2)
109
+ rdoc (4.2.0)
110
+ json (~> 1.4)
111
+ rspec (2.99.0)
112
+ rspec-core (~> 2.99.0)
113
+ rspec-expectations (~> 2.99.0)
114
+ rspec-mocks (~> 2.99.0)
115
+ rspec-core (2.99.2)
116
+ rspec-expectations (2.99.2)
117
+ diff-lcs (>= 1.1.3, < 2.0)
118
+ rspec-mocks (2.99.3)
119
+ sequel (4.19.0)
120
+ slop (3.6.0)
121
+ spoon (0.0.4)
122
+ ffi
123
+ sprockets (2.12.3)
124
+ hike (~> 1.2)
125
+ multi_json (~> 1.0)
126
+ rack (~> 1.0)
127
+ tilt (~> 1.1, != 1.3.0)
128
+ sprockets-rails (2.2.4)
129
+ actionpack (>= 3.0)
130
+ activesupport (>= 3.0)
131
+ sprockets (>= 2.8, < 4.0)
132
+ thor (0.19.1)
133
+ thread_safe (0.3.4-java)
134
+ tilt (1.4.1)
135
+ tzinfo (1.2.2)
136
+ thread_safe (~> 0.1)
137
+
138
+ PLATFORMS
139
+ java
140
+
141
+ DEPENDENCIES
142
+ bundler
143
+ postgres_upsert!
144
+ pry-rails
145
+ rdoc
146
+ rspec (~> 2.12)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Steve Mitchell
4
+ Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,92 @@
1
+ # postgres_upsert
2
+
3
+ Allows your rails app to load data in a very fast way, avoiding calls to ActiveRecord.
4
+
5
+ Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
6
+
7
+
8
+ ## Install
9
+
10
+ Put it in your Gemfile
11
+
12
+ gem 'postgres_upsert'
13
+
14
+ Run the bundle command
15
+
16
+ bundle
17
+
18
+ ## Usage
19
+
20
+ The gem will add the aditiontal class method to ActiveRecord::Base
21
+
22
+ * pg_upsert io_object_or_file_path, [options]
23
+
24
+ io_object_or_file_path => is a file path or an io object (StringIO, FileIO, etc.)
25
+
26
+ options:
27
+ :delimiter - the string to use to delimit fields. Default is ","
28
+ :format - the format of the file (valid formats are :csv or :binary). Default is :csv
29
+ :header => specifies if the file/io source contains a header row. Either :header option must be true, or :columns list must be passed. Default true
30
+ :key_column => the primary key or unique key column on your ActiveRecord table, used to distinguish new records from existing records. Default is the primary_key of your ActiveRecord model class.
31
+ :update_only => when true, postgres_upsert will ONLY update existing records, and not insert new. Default is false.
32
+
33
+ pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
34
+ Let's first copy from a file in the database server, assuming again that we have a users table and
35
+ that we are in the Rails console:
36
+
37
+ ```ruby
38
+ User.pg_upsert "/tmp/users.csv"
39
+ ```
40
+
41
+ This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
42
+ If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
43
+
44
+ ```ruby
45
+ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
46
+ ```
47
+
48
+ In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
49
+
50
+ To copy a binary formatted data file or IO object you can specify the format as binary
51
+
52
+ ```ruby
53
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
54
+ ```
55
+
56
+ Which will generate the following SQL command:
57
+
58
+ ```sql
59
+ COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
60
+ ```
61
+
62
+ NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
63
+
64
+
65
+ pg_upsert supports 'upsert' or 'merge' operations. In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case. Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
66
+
67
+ This merge/upsert happend in 5 steps (assume your data table is called "users")
68
+ * create a temp table named users_temp_### where "###" is a random number. In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
69
+ * COPY the data to user_temp
70
+ * issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
71
+ * issue a query to update all records in users with the data in users_temp_### (matching on primary key)
72
+ * drop the temp table.
73
+
74
+ ### overriding the key_column
75
+
76
+ By default pg_upsert uses the primary key on your ActiveRecord table to determine if each record should be inserted or updated. You can override the column using the :key_field option:
77
+
78
+ ```ruby
79
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :key_column => ["external_twitter_id"]
80
+ ```
81
+
82
+ obviously, the field you pass must be a unique key in your database (this is not enforced at the moment, but will be)
83
+
84
+ passing :update_only = true will ensure that no new records are created, but records will be updated.
85
+
86
+ ## Note on Patches/Pull Requests
87
+
88
+ * Fork the project
89
+ * add your feature/fix to your fork(rpsec tests pleaze)
90
+ * submit a PR
91
+ * If you find an issue but can't fix in in a PR, please log an issue. I'll do my best.
92
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+ require 'bundler/gem_tasks'
4
+ require 'rubygems'
5
+ require 'rspec/core/rake_task'
6
+ require 'rdoc/task'
7
+
8
+ task :default => :spec
9
+
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ Rake::RDocTask.new do |rdoc|
13
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
14
+ rdoc.rdoc_dir = 'rdoc'
15
+ rdoc.title = "postgres_upsert #{version}"
16
+ rdoc.rdoc_files.include('README*')
17
+ rdoc.rdoc_files.include('lib/**/*.rb')
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ class Base
3
+ # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
4
+
5
+ # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
6
+ # * You can change the default delimiter passing delimiter: '' in the options hash
7
+ # * You can map fields from the file to different fields in the table using a map in the options hash
8
+ # * For further details on usage take a look at the README.md
9
+ def self.pg_upsert path_or_io, options = {}
10
+ PostgresUpsert::Writer.new(table_name, path_or_io, options).write
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,208 @@
1
+ require 'sequel'
2
+
3
+
4
+ module PostgresUpsert
5
+
6
+ class Writer
7
+
8
+ def initialize(table_name, source, options = {})
9
+ @table_name = table_name
10
+ @options = options.reverse_merge({
11
+ :delimiter => ",",
12
+ :format => :csv,
13
+ :header => true,
14
+ :key_column => primary_key,
15
+ :update_only => false})
16
+ @source = source.instance_of?(String) ? File.open(source, 'r') : source
17
+ @columns_list = get_columns
18
+ generate_temp_table_name
19
+ end
20
+
21
+ def write
22
+ if @columns_list.empty?
23
+ raise "Either the :columns option or :header => true are required"
24
+ end
25
+
26
+ csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
27
+
28
+ copy_table = @temp_table_name
29
+
30
+ columns_string = columns_string_for_copy
31
+
32
+ base_connection = Sequel.connect(ActiveRecord::Base.connection.config[:url])
33
+
34
+ base_connection.synchronize do |connection|
35
+ create_temp_table(connection)
36
+ copy_manager = org.postgresql.copy.CopyManager.new(connection)
37
+ stream = copy_manager.copy_in("COPY #{copy_table} #{columns_string} FROM STDIN WITH #{csv_options}")
38
+ while line = read_input_line do
39
+ next if line.strip.size == 0
40
+ line = line.to_java_bytes
41
+ stream.write_to_copy(line, 0, line.length)
42
+ end
43
+ stream.end_copy
44
+ upsert_from_temp_table(connection)
45
+ drop_temp_table(connection)
46
+ end
47
+
48
+ end
49
+
50
+ private
51
+
52
+ def primary_key
53
+ @primary_key ||= begin
54
+ query = <<-sql
55
+ SELECT
56
+ pg_attribute.attname,
57
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
58
+ FROM pg_index, pg_class, pg_attribute
59
+ WHERE
60
+ pg_class.oid = '#{@table_name}'::regclass AND
61
+ indrelid = pg_class.oid AND
62
+ pg_attribute.attrelid = pg_class.oid AND
63
+ pg_attribute.attnum = any(pg_index.indkey)
64
+ AND indisprimary
65
+ sql
66
+
67
+ pg_result = ActiveRecord::Base.connection.execute query
68
+ pg_result.each{ |row| return row['attname'] }
69
+ end
70
+ end
71
+
72
+ def column_names
73
+ @column_names ||= begin
74
+ query = "SELECT * FROM information_schema.columns WHERE TABLE_NAME = '#{@table_name}'"
75
+ pg_result = ActiveRecord::Base.connection.execute query
76
+ pg_result.map{ |row| row['column_name'] }
77
+ end
78
+ end
79
+
80
+ def get_columns
81
+ columns_list = @options[:columns] || []
82
+ if @options[:format] != :binary && @options[:header]
83
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
84
+ line = @source.gets
85
+ if columns_list.empty?
86
+ columns_list = line.strip.split(@options[:delimiter])
87
+ end
88
+ end
89
+ columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
90
+ return columns_list
91
+ end
92
+
93
+ def columns_string_for_copy
94
+ str = get_columns_string
95
+ str.empty? ? str : "(#{str})"
96
+ end
97
+
98
+ def columns_string_for_select
99
+ columns = @columns_list.clone
100
+ columns << "created_at" if column_names.include?("created_at")
101
+ columns << "updated_at" if column_names.include?("updated_at")
102
+ str = get_columns_string(columns)
103
+ end
104
+
105
+ def columns_string_for_insert
106
+ columns = @columns_list.clone
107
+ columns << "created_at" if column_names.include?("created_at")
108
+ columns << "updated_at" if column_names.include?("updated_at")
109
+ str = get_columns_string(columns)
110
+ end
111
+
112
+ def select_string_for_insert
113
+ columns = @columns_list.clone
114
+ str = get_columns_string(columns)
115
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
116
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
117
+ str
118
+ end
119
+
120
+ def select_string_for_create
121
+ columns = @columns_list.map(&:to_sym)
122
+ columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
123
+ get_columns_string(columns)
124
+ end
125
+
126
+ def get_columns_string(columns = nil)
127
+ columns ||= @columns_list
128
+ columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
129
+ end
130
+
131
+ def quoted_table_name
132
+ @quoted_table_name ||= ActiveRecord::Base.connection.quote_table_name(@table_name)
133
+ end
134
+
135
+ def generate_temp_table_name
136
+ @temp_table_name = "#{@table_name}_temp_#{rand(1000)}"
137
+ end
138
+
139
+ def read_input_line
140
+ if @options[:format] == :binary
141
+ begin
142
+ return @source.readpartial(10240)
143
+ rescue EOFError
144
+ end
145
+ else
146
+ line = @source.gets
147
+ return line
148
+ end
149
+ end
150
+
151
+ def upsert_from_temp_table(connection)
152
+ update_from_temp_table(connection)
153
+ insert_from_temp_table(connection) unless @options[:update_only]
154
+ end
155
+
156
+ def update_from_temp_table(connection)
157
+ connection.execSQLUpdate <<-SQL
158
+ UPDATE #{quoted_table_name} AS d
159
+ #{update_set_clause}
160
+ FROM #{@temp_table_name} as t
161
+ WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
162
+ AND d.#{@options[:key_column]} IS NOT NULL;
163
+ SQL
164
+ end
165
+
166
+ def update_set_clause
167
+ command = @columns_list.map do |col|
168
+ "\"#{col}\" = t.\"#{col}\""
169
+ end
170
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
171
+ "SET #{command.join(',')}"
172
+ end
173
+
174
+ def insert_from_temp_table(connection)
175
+ columns_string = columns_string_for_insert
176
+ select_string = select_string_for_insert
177
+ connection.execSQLUpdate <<-SQL
178
+ INSERT INTO #{quoted_table_name} (#{columns_string})
179
+ SELECT #{select_string}
180
+ FROM #{@temp_table_name} as t
181
+ WHERE NOT EXISTS
182
+ (SELECT 1
183
+ FROM #{quoted_table_name} as d
184
+ WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
185
+ AND t.#{@options[:key_column]} IS NOT NULL;
186
+ SQL
187
+ end
188
+
189
+ def create_temp_table(connection)
190
+ columns_string = select_string_for_create
191
+ connection.execSQLUpdate <<-SQL
192
+ SET client_min_messages=WARNING;
193
+ DROP TABLE IF EXISTS #{@temp_table_name};
194
+
195
+ CREATE TEMP TABLE #{@temp_table_name}
196
+ AS SELECT #{columns_string} FROM #{quoted_table_name} WHERE 0 = 1;
197
+ SQL
198
+ end
199
+
200
+ def drop_temp_table(connection)
201
+ connection.execSQLUpdate <<-SQL
202
+ DROP TABLE #{@temp_table_name}
203
+ SQL
204
+ end
205
+ end
206
+
207
+
208
+ end
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'active_record'
3
+ require 'postgres_upsert/active_record'
4
+ require 'postgres_upsert/writer'
5
+ require 'rails'
6
+
7
+ class PostgresCopy < Rails::Railtie
8
+
9
+ initializer 'postgres_upsert' do
10
+ ActiveSupport.on_load :active_record do
11
+ require "postgres_upsert/active_record"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "postgres_upsert"
8
+ s.version = "3.1.0"
9
+
10
+ s.platform = 'java'
11
+ s.authors = ["Steve Mitchell"]
12
+ s.date = "2014-09-12"
13
+ s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
14
+ s.email = "thestevemitchell@gmail.com"
15
+ git_files = `git ls-files`.split("\n") rescue ''
16
+ s.files = git_files
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = []
19
+ s.require_paths = %w(lib)
20
+ s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
21
+ s.require_paths = ["lib"]
22
+ s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
23
+ s.license = "MIT"
24
+
25
+ s.add_dependency "activerecord-jdbcpostgresql-adapter"
26
+ s.add_dependency "sequel"
27
+ s.add_dependency "activerecord", '>= 3.0.0'
28
+ s.add_dependency "rails", '>= 3.0.0'
29
+ s.add_development_dependency "bundler"
30
+ s.add_development_dependency "rdoc"
31
+ s.add_development_dependency "pry-rails"
32
+ s.add_development_dependency "rspec", "~> 2.12"
33
+ end
34
+
Binary file
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,"test, the data 1"
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test, the data 1
@@ -0,0 +1 @@
1
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ data,extra
2
+ old stuff,ABC: Always Be Changing.
@@ -0,0 +1,5 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ReservedWordModel < ActiveRecord::Base
4
+ end
5
+
@@ -0,0 +1,2 @@
1
+ id select group
2
+ 1 test select group name
@@ -0,0 +1,2 @@
1
+ cod;info
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id;data
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ cod info
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ data id
2
+ this is a wrong separator;1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+ 2 test data 2
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModel < ActiveRecord::Base
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ThreeColumn < ActiveRecord::Base
4
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with binary data" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc.to_s
17
+ end
18
+
19
+ it "imports from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
21
+
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "throws an error when importing binary file without columns list" do
28
+ # Since binary data never has a header row, we'll require explicit columns list
29
+ expect{
30
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
31
+ }.to raise_error "Either the :columns option or :header => true are required"
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,206 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with CSV format" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ TRUNCATE TABLE three_columns;
8
+ SELECT setval('test_models_id_seq', 1, false);
9
+ }
10
+ end
11
+
12
+ before do
13
+ DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
14
+ end
15
+
16
+ def timestamp
17
+ DateTime.now.utc
18
+ end
19
+
20
+ it "should import from file if path is passed without field_map" do
21
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "correctly handles delimiters in content" do
28
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
29
+ expect(
30
+ TestModel.first.attributes
31
+ ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
32
+ end
33
+
34
+ it "throws error if csv is malformed" do
35
+ expect{
36
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
37
+ }.to raise_error
38
+ end
39
+
40
+ it "throws error if the csv has mixed delimiters" do
41
+ expect{
42
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
43
+ }.to raise_error
44
+ end
45
+
46
+ it "should import from IO without field_map" do
47
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
48
+ expect(
49
+ TestModel.first.attributes
50
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
51
+ end
52
+
53
+ it "should import with custom delimiter from path" do
54
+ TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
55
+ expect(
56
+ TestModel.first.attributes
57
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
58
+ end
59
+
60
+ it "should import with custom delimiter from IO" do
61
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
62
+ expect(
63
+ TestModel.first.attributes
64
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
65
+ end
66
+
67
+ it "should not expect a header when :header is false" do
68
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
69
+
70
+ expect(
71
+ TestModel.first.attributes
72
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
73
+ end
74
+
75
+ it "should be able to map the header in the file to diferent column names" do
76
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
77
+
78
+ expect(
79
+ TestModel.first.attributes
80
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
81
+ end
82
+
83
+ it "should be able to map the header in the file to diferent column names with custom delimiter" do
84
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
85
+
86
+ expect(
87
+ TestModel.first.attributes
88
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
89
+ end
90
+
91
+ it "should ignore empty lines" do
92
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
93
+
94
+ expect(
95
+ TestModel.first.attributes
96
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
97
+ end
98
+
99
+ it "should not create timestamps when the model does not include them" do
100
+ ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
101
+
102
+ expect(
103
+ ReservedWordModel.first.attributes
104
+ ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
105
+ end
106
+
107
+ context "upserting data to handle inserts and creates" do
108
+ let(:original_created_at) {5.days.ago.utc}
109
+
110
+ before(:each) do
111
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
112
+ end
113
+
114
+ it "should not violate primary key constraint" do
115
+ expect{
116
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
117
+ }.to_not raise_error
118
+ end
119
+
120
+ it "should upsert (update existing records and insert new records)" do
121
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
122
+
123
+ expect(
124
+ TestModel.find(1).attributes
125
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
126
+ expect(
127
+ TestModel.find(2).attributes
128
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
129
+ end
130
+
131
+ it "should require columns option if no header" do
132
+ expect{
133
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
134
+ }.to raise_error("Either the :columns option or :header => true are required")
135
+ end
136
+
137
+ it "should clean up the temp table after completion" do
138
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
139
+
140
+ ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
141
+ end
142
+
143
+ it "should gracefully drop the temp table if it already exists" do
144
+ ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
145
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
146
+
147
+ expect(
148
+ TestModel.find(1).attributes
149
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
150
+ expect(
151
+ TestModel.find(2).attributes
152
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
153
+ end
154
+
155
+ it "should be able to copy using custom set of columns" do
156
+ ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
157
+ ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
158
+
159
+ expect(
160
+ ThreeColumn.first.attributes
161
+ ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
162
+ end
163
+ end
164
+
165
+ context 'overriding the comparison column' do
166
+ it 'updates records based the match column option if its passed in' do
167
+ three_col = ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!")
168
+ file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
169
+
170
+
171
+ ThreeColumn.pg_upsert(file, :key_column => "data")
172
+ expect(
173
+ three_col.reload.extra
174
+ ).to eq("ABC: Always Be Changing.")
175
+ end
176
+
177
+ it 'inserts records if the passed match column doesnt exist' do
178
+ file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
179
+
180
+ ThreeColumn.pg_upsert(file, :key_column => "data")
181
+ expect(
182
+ ThreeColumn.last.attributes
183
+ ).to include("id" => 1, "data" => "old stuff", "extra" => "ABC: Always Be Changing.")
184
+ end
185
+ end
186
+
187
+ context 'update only' do
188
+ let(:original_created_at) {5.days.ago.utc}
189
+ before(:each) do
190
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
191
+ end
192
+ it 'will only update and not insert if insert_only flag is passed.' do
193
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t", :update_only => true
194
+
195
+ expect(
196
+ TestModel.find(1).attributes
197
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at , "updated_at" => timestamp)
198
+ expect{
199
+ TestModel.find(2)
200
+ }.to raise_error(ActiveRecord::RecordNotFound)
201
+
202
+ end
203
+
204
+ end
205
+ end
206
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,43 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fixtures/test_model'
4
+ require 'fixtures/three_column'
5
+ require 'fixtures/reserved_word_model'
6
+ require 'rspec'
7
+ require 'rspec/autorun'
8
+
9
+ RSpec.configure do |config|
10
+ config.before(:suite) do
11
+ # we create a test database if it does not exist
12
+ # I do not use database users or password for the tests, using ident authentication instead
13
+ begin
14
+ ActiveRecord::Base.establish_connection(
15
+ :adapter => "postgresql",
16
+ :host => "localhost",
17
+ :port => 5432,
18
+ :database => "ar_pg_copy_test"
19
+ )
20
+ ActiveRecord::Base.connection.execute %{
21
+ SET client_min_messages TO warning;
22
+ DROP TABLE IF EXISTS test_models;
23
+ DROP TABLE IF EXISTS three_columns;
24
+ DROP TABLE IF EXISTS reserved_word_models;
25
+ CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
26
+ CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
27
+ CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
28
+ }
29
+ rescue Exception => e
30
+ puts "Exception: #{e}"
31
+ ActiveRecord::Base.establish_connection(
32
+ :adapter => "postgresql",
33
+ :host => "localhost",
34
+ :port => 5432,
35
+ :database => "postgres"
36
+ )
37
+ ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
38
+ ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
39
+ retry
40
+ end
41
+ end
42
+
43
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.1.0
5
+ platform: java
6
+ authors:
7
+ - Steve Mitchell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord-jdbcpostgresql-adapter
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: sequel
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: activerecord
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: 3.0.0
53
+ prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 3.0.0
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: 3.0.0
67
+ prerelease: false
68
+ type: :runtime
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdoc
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ prerelease: false
96
+ type: :development
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry-rails
99
+ version_requirements: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirement: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ prerelease: false
110
+ type: :development
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '2.12'
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '2.12'
123
+ prerelease: false
124
+ type: :development
125
+ description: Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables
126
+ email: thestevemitchell@gmail.com
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - .gitignore
132
+ - Gemfile
133
+ - Gemfile.lock
134
+ - LICENSE
135
+ - README.md
136
+ - Rakefile
137
+ - VERSION
138
+ - lib/postgres_upsert.rb
139
+ - lib/postgres_upsert/active_record.rb
140
+ - lib/postgres_upsert/writer.rb
141
+ - postgres_upsert.gemspec
142
+ - spec/fixtures/2_col_binary_data.dat
143
+ - spec/fixtures/comma_with_header.csv
144
+ - spec/fixtures/comma_with_header_and_comma_values.csv
145
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
146
+ - spec/fixtures/comma_without_header.csv
147
+ - spec/fixtures/no_id.csv
148
+ - spec/fixtures/reserved_word_model.rb
149
+ - spec/fixtures/reserved_words.csv
150
+ - spec/fixtures/semicolon_with_different_header.csv
151
+ - spec/fixtures/semicolon_with_header.csv
152
+ - spec/fixtures/tab_only_data.csv
153
+ - spec/fixtures/tab_with_different_header.csv
154
+ - spec/fixtures/tab_with_error.csv
155
+ - spec/fixtures/tab_with_extra_line.csv
156
+ - spec/fixtures/tab_with_header.csv
157
+ - spec/fixtures/tab_with_two_lines.csv
158
+ - spec/fixtures/test_model.rb
159
+ - spec/fixtures/three_column.rb
160
+ - spec/pg_upsert_binary_spec.rb
161
+ - spec/pg_upsert_csv_spec.rb
162
+ - spec/spec.opts
163
+ - spec/spec_helper.rb
164
+ homepage: https://github.com/theSteveMitchell/postgres_upsert
165
+ licenses:
166
+ - MIT
167
+ metadata: {}
168
+ post_install_message:
169
+ rdoc_options: []
170
+ require_paths:
171
+ - lib
172
+ required_ruby_version: !ruby/object:Gem::Requirement
173
+ requirements:
174
+ - - '>='
175
+ - !ruby/object:Gem::Version
176
+ version: '0'
177
+ required_rubygems_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ requirements: []
183
+ rubyforge_project:
184
+ rubygems_version: 2.1.9
185
+ signing_key:
186
+ specification_version: 4
187
+ summary: A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently
188
+ test_files:
189
+ - spec/fixtures/2_col_binary_data.dat
190
+ - spec/fixtures/comma_with_header.csv
191
+ - spec/fixtures/comma_with_header_and_comma_values.csv
192
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
193
+ - spec/fixtures/comma_without_header.csv
194
+ - spec/fixtures/no_id.csv
195
+ - spec/fixtures/reserved_word_model.rb
196
+ - spec/fixtures/reserved_words.csv
197
+ - spec/fixtures/semicolon_with_different_header.csv
198
+ - spec/fixtures/semicolon_with_header.csv
199
+ - spec/fixtures/tab_only_data.csv
200
+ - spec/fixtures/tab_with_different_header.csv
201
+ - spec/fixtures/tab_with_error.csv
202
+ - spec/fixtures/tab_with_extra_line.csv
203
+ - spec/fixtures/tab_with_header.csv
204
+ - spec/fixtures/tab_with_two_lines.csv
205
+ - spec/fixtures/test_model.rb
206
+ - spec/fixtures/three_column.rb
207
+ - spec/pg_upsert_binary_spec.rb
208
+ - spec/pg_upsert_csv_spec.rb
209
+ - spec/spec.opts
210
+ - spec/spec_helper.rb