postgres_upsert 3.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 670c45802ef4d5b50510800787f75426181d0722
4
+ data.tar.gz: a249301be5d55ceaabc1d0d2606f834c65a51361
5
+ SHA512:
6
+ metadata.gz: 0341063e94b7cf9a64e3c35f5052d988eae2f93ac5aa8aa0255813312836176529d918c16d59cd1fe032391ac8482fe878c3162ea20a61073b0654faebb4b0c0
7
+ data.tar.gz: e1eada0840b8f2bf2999783cd1db72c153c8d2a11823789c2505f4da2902a08c10a976cc0a77b906f43e124d0cc5462f485096d3d4d1643e967b8184e31499c6
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ # Gemfile.lock
30
+ # .ruby-version
31
+ # .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # specify gem dependencies in activerecord-postgres-hstore.gemspec
4
+ # except the platform-specific dependencies below
5
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,146 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ postgres_upsert (3.0.0-java)
5
+ activerecord (>= 3.0.0)
6
+ activerecord-jdbcpostgresql-adapter
7
+ rails (>= 3.0.0)
8
+ sequel
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ actionmailer (4.2.0)
14
+ actionpack (= 4.2.0)
15
+ actionview (= 4.2.0)
16
+ activejob (= 4.2.0)
17
+ mail (~> 2.5, >= 2.5.4)
18
+ rails-dom-testing (~> 1.0, >= 1.0.5)
19
+ actionpack (4.2.0)
20
+ actionview (= 4.2.0)
21
+ activesupport (= 4.2.0)
22
+ rack (~> 1.6.0)
23
+ rack-test (~> 0.6.2)
24
+ rails-dom-testing (~> 1.0, >= 1.0.5)
25
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
26
+ actionview (4.2.0)
27
+ activesupport (= 4.2.0)
28
+ builder (~> 3.1)
29
+ erubis (~> 2.7.0)
30
+ rails-dom-testing (~> 1.0, >= 1.0.5)
31
+ rails-html-sanitizer (~> 1.0, >= 1.0.1)
32
+ activejob (4.2.0)
33
+ activesupport (= 4.2.0)
34
+ globalid (>= 0.3.0)
35
+ activemodel (4.2.0)
36
+ activesupport (= 4.2.0)
37
+ builder (~> 3.1)
38
+ activerecord (4.2.0)
39
+ activemodel (= 4.2.0)
40
+ activesupport (= 4.2.0)
41
+ arel (~> 6.0)
42
+ activerecord-jdbc-adapter (1.3.14)
43
+ activerecord (>= 2.2)
44
+ activerecord-jdbcpostgresql-adapter (1.3.14)
45
+ activerecord-jdbc-adapter (~> 1.3.14)
46
+ jdbc-postgres (>= 9.1)
47
+ activesupport (4.2.0)
48
+ i18n (~> 0.7)
49
+ json (~> 1.7, >= 1.7.7)
50
+ minitest (~> 5.1)
51
+ thread_safe (~> 0.3, >= 0.3.4)
52
+ tzinfo (~> 1.1)
53
+ arel (6.0.0)
54
+ builder (3.2.2)
55
+ coderay (1.1.0)
56
+ diff-lcs (1.2.5)
57
+ erubis (2.7.0)
58
+ ffi (1.9.6-java)
59
+ globalid (0.3.2)
60
+ activesupport (>= 4.1.0)
61
+ hike (1.2.3)
62
+ i18n (0.7.0)
63
+ jdbc-postgres (9.3.1102)
64
+ json (1.8.2-java)
65
+ loofah (2.0.1)
66
+ nokogiri (>= 1.5.9)
67
+ mail (2.6.3)
68
+ mime-types (>= 1.16, < 3)
69
+ method_source (0.8.2)
70
+ mime-types (2.4.3)
71
+ minitest (5.5.1)
72
+ multi_json (1.10.1)
73
+ nokogiri (1.6.6.2-java)
74
+ pry (0.10.1-java)
75
+ coderay (~> 1.1.0)
76
+ method_source (~> 0.8.1)
77
+ slop (~> 3.4)
78
+ spoon (~> 0.0)
79
+ pry-rails (0.3.3)
80
+ pry (>= 0.9.10)
81
+ rack (1.6.0)
82
+ rack-test (0.6.3)
83
+ rack (>= 1.0)
84
+ rails (4.2.0)
85
+ actionmailer (= 4.2.0)
86
+ actionpack (= 4.2.0)
87
+ actionview (= 4.2.0)
88
+ activejob (= 4.2.0)
89
+ activemodel (= 4.2.0)
90
+ activerecord (= 4.2.0)
91
+ activesupport (= 4.2.0)
92
+ bundler (>= 1.3.0, < 2.0)
93
+ railties (= 4.2.0)
94
+ sprockets-rails
95
+ rails-deprecated_sanitizer (1.0.3)
96
+ activesupport (>= 4.2.0.alpha)
97
+ rails-dom-testing (1.0.5)
98
+ activesupport (>= 4.2.0.beta, < 5.0)
99
+ nokogiri (~> 1.6.0)
100
+ rails-deprecated_sanitizer (>= 1.0.1)
101
+ rails-html-sanitizer (1.0.1)
102
+ loofah (~> 2.0)
103
+ railties (4.2.0)
104
+ actionpack (= 4.2.0)
105
+ activesupport (= 4.2.0)
106
+ rake (>= 0.8.7)
107
+ thor (>= 0.18.1, < 2.0)
108
+ rake (10.4.2)
109
+ rdoc (4.2.0)
110
+ json (~> 1.4)
111
+ rspec (2.99.0)
112
+ rspec-core (~> 2.99.0)
113
+ rspec-expectations (~> 2.99.0)
114
+ rspec-mocks (~> 2.99.0)
115
+ rspec-core (2.99.2)
116
+ rspec-expectations (2.99.2)
117
+ diff-lcs (>= 1.1.3, < 2.0)
118
+ rspec-mocks (2.99.3)
119
+ sequel (4.19.0)
120
+ slop (3.6.0)
121
+ spoon (0.0.4)
122
+ ffi
123
+ sprockets (2.12.3)
124
+ hike (~> 1.2)
125
+ multi_json (~> 1.0)
126
+ rack (~> 1.0)
127
+ tilt (~> 1.1, != 1.3.0)
128
+ sprockets-rails (2.2.4)
129
+ actionpack (>= 3.0)
130
+ activesupport (>= 3.0)
131
+ sprockets (>= 2.8, < 4.0)
132
+ thor (0.19.1)
133
+ thread_safe (0.3.4-java)
134
+ tilt (1.4.1)
135
+ tzinfo (1.2.2)
136
+ thread_safe (~> 0.1)
137
+
138
+ PLATFORMS
139
+ java
140
+
141
+ DEPENDENCIES
142
+ bundler
143
+ postgres_upsert!
144
+ pry-rails
145
+ rdoc
146
+ rspec (~> 2.12)
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Steve Mitchell
4
+ Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,92 @@
1
+ # postgres_upsert
2
+
3
+ Allows your rails app to load data in a very fast way, avoiding calls to ActiveRecord.
4
+
5
+ Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
6
+
7
+
8
+ ## Install
9
+
10
+ Put it in your Gemfile
11
+
12
+ gem 'postgres_upsert'
13
+
14
+ Run the bundle command
15
+
16
+ bundle
17
+
18
+ ## Usage
19
+
20
+ The gem will add the aditiontal class method to ActiveRecord::Base
21
+
22
+ * pg_upsert io_object_or_file_path, [options]
23
+
24
+ io_object_or_file_path => is a file path or an io object (StringIO, FileIO, etc.)
25
+
26
+ options:
27
+ :delimiter - the string to use to delimit fields. Default is ","
28
+ :format - the format of the file (valid formats are :csv or :binary). Default is :csv
29
+ :header => specifies if the file/io source contains a header row. Either :header option must be true, or :columns list must be passed. Default true
30
+ :key_column => the primary key or unique key column on your ActiveRecord table, used to distinguish new records from existing records. Default is the primary_key of your ActiveRecord model class.
31
+ :update_only => when true, postgres_upsert will ONLY update existing records, and not insert new. Default is false.
32
+
33
+ pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
34
+ Let's first copy from a file in the database server, assuming again that we have a users table and
35
+ that we are in the Rails console:
36
+
37
+ ```ruby
38
+ User.pg_upsert "/tmp/users.csv"
39
+ ```
40
+
41
+ This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
42
+ If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
43
+
44
+ ```ruby
45
+ User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
46
+ ```
47
+
48
+ In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
49
+
50
+ To copy a binary formatted data file or IO object you can specify the format as binary
51
+
52
+ ```ruby
53
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
54
+ ```
55
+
56
+ Which will generate the following SQL command:
57
+
58
+ ```sql
59
+ COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
60
+ ```
61
+
62
+ NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
63
+
64
+
65
+ pg_upsert supports 'upsert' or 'merge' operations. In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case. Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
66
+
67
+ This merge/upsert happend in 5 steps (assume your data table is called "users")
68
+ * create a temp table named users_temp_### where "###" is a random number. In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
69
+ * COPY the data to user_temp
70
+ * issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
71
+ * issue a query to update all records in users with the data in users_temp_### (matching on primary key)
72
+ * drop the temp table.
73
+
74
+ ### overriding the key_column
75
+
76
+ By default pg_upsert uses the primary key on your ActiveRecord table to determine if each record should be inserted or updated. You can override the column using the :key_field option:
77
+
78
+ ```ruby
79
+ User.pg_upsert "/tmp/users.dat", :format => :binary, :key_column => ["external_twitter_id"]
80
+ ```
81
+
82
+ obviously, the field you pass must be a unique key in your database (this is not enforced at the moment, but will be)
83
+
84
+ passing :update_only = true will ensure that no new records are created, but records will be updated.
85
+
86
+ ## Note on Patches/Pull Requests
87
+
88
+ * Fork the project
89
+ * add your feature/fix to your fork(rpsec tests pleaze)
90
+ * submit a PR
91
+ * If you find an issue but can't fix in in a PR, please log an issue. I'll do my best.
92
+
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.unshift File.expand_path("../lib", __FILE__)
3
+ require 'bundler/gem_tasks'
4
+ require 'rubygems'
5
+ require 'rspec/core/rake_task'
6
+ require 'rdoc/task'
7
+
8
+ task :default => :spec
9
+
10
+ RSpec::Core::RakeTask.new(:spec)
11
+
12
+ Rake::RDocTask.new do |rdoc|
13
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
14
+ rdoc.rdoc_dir = 'rdoc'
15
+ rdoc.title = "postgres_upsert #{version}"
16
+ rdoc.rdoc_files.include('README*')
17
+ rdoc.rdoc_files.include('lib/**/*.rb')
18
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,13 @@
1
+ module ActiveRecord
2
+ class Base
3
+ # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
4
+
5
+ # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
6
+ # * You can change the default delimiter passing delimiter: '' in the options hash
7
+ # * You can map fields from the file to different fields in the table using a map in the options hash
8
+ # * For further details on usage take a look at the README.md
9
+ def self.pg_upsert path_or_io, options = {}
10
+ PostgresUpsert::Writer.new(table_name, path_or_io, options).write
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,208 @@
1
+ require 'sequel'
2
+
3
+
4
+ module PostgresUpsert
5
+
6
+ class Writer
7
+
8
+ def initialize(table_name, source, options = {})
9
+ @table_name = table_name
10
+ @options = options.reverse_merge({
11
+ :delimiter => ",",
12
+ :format => :csv,
13
+ :header => true,
14
+ :key_column => primary_key,
15
+ :update_only => false})
16
+ @source = source.instance_of?(String) ? File.open(source, 'r') : source
17
+ @columns_list = get_columns
18
+ generate_temp_table_name
19
+ end
20
+
21
+ def write
22
+ if @columns_list.empty?
23
+ raise "Either the :columns option or :header => true are required"
24
+ end
25
+
26
+ csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
27
+
28
+ copy_table = @temp_table_name
29
+
30
+ columns_string = columns_string_for_copy
31
+
32
+ base_connection = Sequel.connect(ActiveRecord::Base.connection.config[:url])
33
+
34
+ base_connection.synchronize do |connection|
35
+ create_temp_table(connection)
36
+ copy_manager = org.postgresql.copy.CopyManager.new(connection)
37
+ stream = copy_manager.copy_in("COPY #{copy_table} #{columns_string} FROM STDIN WITH #{csv_options}")
38
+ while line = read_input_line do
39
+ next if line.strip.size == 0
40
+ line = line.to_java_bytes
41
+ stream.write_to_copy(line, 0, line.length)
42
+ end
43
+ stream.end_copy
44
+ upsert_from_temp_table(connection)
45
+ drop_temp_table(connection)
46
+ end
47
+
48
+ end
49
+
50
+ private
51
+
52
+ def primary_key
53
+ @primary_key ||= begin
54
+ query = <<-sql
55
+ SELECT
56
+ pg_attribute.attname,
57
+ format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
58
+ FROM pg_index, pg_class, pg_attribute
59
+ WHERE
60
+ pg_class.oid = '#{@table_name}'::regclass AND
61
+ indrelid = pg_class.oid AND
62
+ pg_attribute.attrelid = pg_class.oid AND
63
+ pg_attribute.attnum = any(pg_index.indkey)
64
+ AND indisprimary
65
+ sql
66
+
67
+ pg_result = ActiveRecord::Base.connection.execute query
68
+ pg_result.each{ |row| return row['attname'] }
69
+ end
70
+ end
71
+
72
+ def column_names
73
+ @column_names ||= begin
74
+ query = "SELECT * FROM information_schema.columns WHERE TABLE_NAME = '#{@table_name}'"
75
+ pg_result = ActiveRecord::Base.connection.execute query
76
+ pg_result.map{ |row| row['column_name'] }
77
+ end
78
+ end
79
+
80
+ def get_columns
81
+ columns_list = @options[:columns] || []
82
+ if @options[:format] != :binary && @options[:header]
83
+ #if header is present, we need to strip it from io, whether we use it for the columns list or not.
84
+ line = @source.gets
85
+ if columns_list.empty?
86
+ columns_list = line.strip.split(@options[:delimiter])
87
+ end
88
+ end
89
+ columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
90
+ return columns_list
91
+ end
92
+
93
+ def columns_string_for_copy
94
+ str = get_columns_string
95
+ str.empty? ? str : "(#{str})"
96
+ end
97
+
98
+ def columns_string_for_select
99
+ columns = @columns_list.clone
100
+ columns << "created_at" if column_names.include?("created_at")
101
+ columns << "updated_at" if column_names.include?("updated_at")
102
+ str = get_columns_string(columns)
103
+ end
104
+
105
+ def columns_string_for_insert
106
+ columns = @columns_list.clone
107
+ columns << "created_at" if column_names.include?("created_at")
108
+ columns << "updated_at" if column_names.include?("updated_at")
109
+ str = get_columns_string(columns)
110
+ end
111
+
112
+ def select_string_for_insert
113
+ columns = @columns_list.clone
114
+ str = get_columns_string(columns)
115
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
116
+ str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
117
+ str
118
+ end
119
+
120
+ def select_string_for_create
121
+ columns = @columns_list.map(&:to_sym)
122
+ columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
123
+ get_columns_string(columns)
124
+ end
125
+
126
+ def get_columns_string(columns = nil)
127
+ columns ||= @columns_list
128
+ columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
129
+ end
130
+
131
+ def quoted_table_name
132
+ @quoted_table_name ||= ActiveRecord::Base.connection.quote_table_name(@table_name)
133
+ end
134
+
135
+ def generate_temp_table_name
136
+ @temp_table_name = "#{@table_name}_temp_#{rand(1000)}"
137
+ end
138
+
139
+ def read_input_line
140
+ if @options[:format] == :binary
141
+ begin
142
+ return @source.readpartial(10240)
143
+ rescue EOFError
144
+ end
145
+ else
146
+ line = @source.gets
147
+ return line
148
+ end
149
+ end
150
+
151
+ def upsert_from_temp_table(connection)
152
+ update_from_temp_table(connection)
153
+ insert_from_temp_table(connection) unless @options[:update_only]
154
+ end
155
+
156
+ def update_from_temp_table(connection)
157
+ connection.execSQLUpdate <<-SQL
158
+ UPDATE #{quoted_table_name} AS d
159
+ #{update_set_clause}
160
+ FROM #{@temp_table_name} as t
161
+ WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
162
+ AND d.#{@options[:key_column]} IS NOT NULL;
163
+ SQL
164
+ end
165
+
166
+ def update_set_clause
167
+ command = @columns_list.map do |col|
168
+ "\"#{col}\" = t.\"#{col}\""
169
+ end
170
+ command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
171
+ "SET #{command.join(',')}"
172
+ end
173
+
174
+ def insert_from_temp_table(connection)
175
+ columns_string = columns_string_for_insert
176
+ select_string = select_string_for_insert
177
+ connection.execSQLUpdate <<-SQL
178
+ INSERT INTO #{quoted_table_name} (#{columns_string})
179
+ SELECT #{select_string}
180
+ FROM #{@temp_table_name} as t
181
+ WHERE NOT EXISTS
182
+ (SELECT 1
183
+ FROM #{quoted_table_name} as d
184
+ WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
185
+ AND t.#{@options[:key_column]} IS NOT NULL;
186
+ SQL
187
+ end
188
+
189
+ def create_temp_table(connection)
190
+ columns_string = select_string_for_create
191
+ connection.execSQLUpdate <<-SQL
192
+ SET client_min_messages=WARNING;
193
+ DROP TABLE IF EXISTS #{@temp_table_name};
194
+
195
+ CREATE TEMP TABLE #{@temp_table_name}
196
+ AS SELECT #{columns_string} FROM #{quoted_table_name} WHERE 0 = 1;
197
+ SQL
198
+ end
199
+
200
+ def drop_temp_table(connection)
201
+ connection.execSQLUpdate <<-SQL
202
+ DROP TABLE #{@temp_table_name}
203
+ SQL
204
+ end
205
+ end
206
+
207
+
208
+ end
@@ -0,0 +1,14 @@
1
+ require 'rubygems'
2
+ require 'active_record'
3
+ require 'postgres_upsert/active_record'
4
+ require 'postgres_upsert/writer'
5
+ require 'rails'
6
+
7
+ class PostgresCopy < Rails::Railtie
8
+
9
+ initializer 'postgres_upsert' do
10
+ ActiveSupport.on_load :active_record do
11
+ require "postgres_upsert/active_record"
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib/', __FILE__)
3
+ $:.unshift lib unless $:.include?(lib)
4
+
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "postgres_upsert"
8
+ s.version = "3.1.0"
9
+
10
+ s.platform = 'java'
11
+ s.authors = ["Steve Mitchell"]
12
+ s.date = "2014-09-12"
13
+ s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
14
+ s.email = "thestevemitchell@gmail.com"
15
+ git_files = `git ls-files`.split("\n") rescue ''
16
+ s.files = git_files
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = []
19
+ s.require_paths = %w(lib)
20
+ s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
21
+ s.require_paths = ["lib"]
22
+ s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
23
+ s.license = "MIT"
24
+
25
+ s.add_dependency "activerecord-jdbcpostgresql-adapter"
26
+ s.add_dependency "sequel"
27
+ s.add_dependency "activerecord", '>= 3.0.0'
28
+ s.add_dependency "rails", '>= 3.0.0'
29
+ s.add_development_dependency "bundler"
30
+ s.add_development_dependency "rdoc"
31
+ s.add_development_dependency "pry-rails"
32
+ s.add_development_dependency "rspec", "~> 2.12"
33
+ end
34
+
Binary file
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,"test, the data 1"
@@ -0,0 +1,2 @@
1
+ id,data
2
+ 1,test, the data 1
@@ -0,0 +1 @@
1
+ 1,test data 1
@@ -0,0 +1,2 @@
1
+ data,extra
2
+ old stuff,ABC: Always Be Changing.
@@ -0,0 +1,5 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ReservedWordModel < ActiveRecord::Base
4
+ end
5
+
@@ -0,0 +1,2 @@
1
+ id select group
2
+ 1 test select group name
@@ -0,0 +1,2 @@
1
+ cod;info
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id;data
2
+ 1;test data 1
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ cod info
2
+ 1 test data 1
@@ -0,0 +1,2 @@
1
+ data id
2
+ this is a wrong separator;1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+
@@ -0,0 +1,2 @@
1
+ id data
2
+ 1 test data 1
@@ -0,0 +1,3 @@
1
+ id data
2
+ 1 test data 1
3
+ 2 test data 2
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class TestModel < ActiveRecord::Base
4
+ end
@@ -0,0 +1,4 @@
1
+ require 'postgres_upsert'
2
+
3
+ class ThreeColumn < ActiveRecord::Base
4
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with binary data" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ SELECT setval('test_models_id_seq', 1, false);
8
+ }
9
+ end
10
+
11
+ before do
12
+ DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
13
+ end
14
+
15
+ def timestamp
16
+ DateTime.now.utc.to_s
17
+ end
18
+
19
+ it "imports from file if path is passed without field_map" do
20
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
21
+
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "throws an error when importing binary file without columns list" do
28
+ # Since binary data never has a header row, we'll require explicit columns list
29
+ expect{
30
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
31
+ }.to raise_error "Either the :columns option or :header => true are required"
32
+ end
33
+
34
+ end
35
+
@@ -0,0 +1,206 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "pg_upsert from file with CSV format" do
4
+ before(:each) do
5
+ ActiveRecord::Base.connection.execute %{
6
+ TRUNCATE TABLE test_models;
7
+ TRUNCATE TABLE three_columns;
8
+ SELECT setval('test_models_id_seq', 1, false);
9
+ }
10
+ end
11
+
12
+ before do
13
+ DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
14
+ end
15
+
16
+ def timestamp
17
+ DateTime.now.utc
18
+ end
19
+
20
+ it "should import from file if path is passed without field_map" do
21
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
22
+ expect(
23
+ TestModel.first.attributes
24
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
25
+ end
26
+
27
+ it "correctly handles delimiters in content" do
28
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
29
+ expect(
30
+ TestModel.first.attributes
31
+ ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
32
+ end
33
+
34
+ it "throws error if csv is malformed" do
35
+ expect{
36
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
37
+ }.to raise_error
38
+ end
39
+
40
+ it "throws error if the csv has mixed delimiters" do
41
+ expect{
42
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
43
+ }.to raise_error
44
+ end
45
+
46
+ it "should import from IO without field_map" do
47
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
48
+ expect(
49
+ TestModel.first.attributes
50
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
51
+ end
52
+
53
+ it "should import with custom delimiter from path" do
54
+ TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
55
+ expect(
56
+ TestModel.first.attributes
57
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
58
+ end
59
+
60
+ it "should import with custom delimiter from IO" do
61
+ TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
62
+ expect(
63
+ TestModel.first.attributes
64
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
65
+ end
66
+
67
+ it "should not expect a header when :header is false" do
68
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
69
+
70
+ expect(
71
+ TestModel.first.attributes
72
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
73
+ end
74
+
75
+ it "should be able to map the header in the file to diferent column names" do
76
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
77
+
78
+ expect(
79
+ TestModel.first.attributes
80
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
81
+ end
82
+
83
+ it "should be able to map the header in the file to diferent column names with custom delimiter" do
84
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
85
+
86
+ expect(
87
+ TestModel.first.attributes
88
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
89
+ end
90
+
91
+ it "should ignore empty lines" do
92
+ TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
93
+
94
+ expect(
95
+ TestModel.first.attributes
96
+ ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
97
+ end
98
+
99
+ it "should not create timestamps when the model does not include them" do
100
+ ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
101
+
102
+ expect(
103
+ ReservedWordModel.first.attributes
104
+ ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
105
+ end
106
+
107
+ context "upserting data to handle inserts and creates" do
108
+ let(:original_created_at) {5.days.ago.utc}
109
+
110
+ before(:each) do
111
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
112
+ end
113
+
114
+ it "should not violate primary key constraint" do
115
+ expect{
116
+ TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
117
+ }.to_not raise_error
118
+ end
119
+
120
+ it "should upsert (update existing records and insert new records)" do
121
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
122
+
123
+ expect(
124
+ TestModel.find(1).attributes
125
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
126
+ expect(
127
+ TestModel.find(2).attributes
128
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
129
+ end
130
+
131
+ it "should require columns option if no header" do
132
+ expect{
133
+ TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
134
+ }.to raise_error("Either the :columns option or :header => true are required")
135
+ end
136
+
137
+ it "should clean up the temp table after completion" do
138
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
139
+
140
+ ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
141
+ end
142
+
143
+ it "should gracefully drop the temp table if it already exists" do
144
+ ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
145
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
146
+
147
+ expect(
148
+ TestModel.find(1).attributes
149
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
150
+ expect(
151
+ TestModel.find(2).attributes
152
+ ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
153
+ end
154
+
155
+ it "should be able to copy using custom set of columns" do
156
+ ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
157
+ ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
158
+
159
+ expect(
160
+ ThreeColumn.first.attributes
161
+ ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
162
+ end
163
+ end
164
+
165
+ context 'overriding the comparison column' do
166
+ it 'updates records based the match column option if its passed in' do
167
+ three_col = ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!")
168
+ file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
169
+
170
+
171
+ ThreeColumn.pg_upsert(file, :key_column => "data")
172
+ expect(
173
+ three_col.reload.extra
174
+ ).to eq("ABC: Always Be Changing.")
175
+ end
176
+
177
+ it 'inserts records if the passed match column doesnt exist' do
178
+ file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
179
+
180
+ ThreeColumn.pg_upsert(file, :key_column => "data")
181
+ expect(
182
+ ThreeColumn.last.attributes
183
+ ).to include("id" => 1, "data" => "old stuff", "extra" => "ABC: Always Be Changing.")
184
+ end
185
+ end
186
+
187
+ context 'update only' do
188
+ let(:original_created_at) {5.days.ago.utc}
189
+ before(:each) do
190
+ TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
191
+ end
192
+ it 'will only update and not insert if insert_only flag is passed.' do
193
+ TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t", :update_only => true
194
+
195
+ expect(
196
+ TestModel.find(1).attributes
197
+ ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at , "updated_at" => timestamp)
198
+ expect{
199
+ TestModel.find(2)
200
+ }.to raise_error(ActiveRecord::RecordNotFound)
201
+
202
+ end
203
+
204
+ end
205
+ end
206
+
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,43 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'fixtures/test_model'
4
+ require 'fixtures/three_column'
5
+ require 'fixtures/reserved_word_model'
6
+ require 'rspec'
7
+ require 'rspec/autorun'
8
+
9
+ RSpec.configure do |config|
10
+ config.before(:suite) do
11
+ # we create a test database if it does not exist
12
+ # I do not use database users or password for the tests, using ident authentication instead
13
+ begin
14
+ ActiveRecord::Base.establish_connection(
15
+ :adapter => "postgresql",
16
+ :host => "localhost",
17
+ :port => 5432,
18
+ :database => "ar_pg_copy_test"
19
+ )
20
+ ActiveRecord::Base.connection.execute %{
21
+ SET client_min_messages TO warning;
22
+ DROP TABLE IF EXISTS test_models;
23
+ DROP TABLE IF EXISTS three_columns;
24
+ DROP TABLE IF EXISTS reserved_word_models;
25
+ CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
26
+ CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
27
+ CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
28
+ }
29
+ rescue Exception => e
30
+ puts "Exception: #{e}"
31
+ ActiveRecord::Base.establish_connection(
32
+ :adapter => "postgresql",
33
+ :host => "localhost",
34
+ :port => 5432,
35
+ :database => "postgres"
36
+ )
37
+ ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
38
+ ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
39
+ retry
40
+ end
41
+ end
42
+
43
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: postgres_upsert
3
+ version: !ruby/object:Gem::Version
4
+ version: 3.1.0
5
+ platform: java
6
+ authors:
7
+ - Steve Mitchell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-12 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: activerecord-jdbcpostgresql-adapter
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '>='
23
+ - !ruby/object:Gem::Version
24
+ version: '0'
25
+ prerelease: false
26
+ type: :runtime
27
+ - !ruby/object:Gem::Dependency
28
+ name: sequel
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ requirement: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ prerelease: false
40
+ type: :runtime
41
+ - !ruby/object:Gem::Dependency
42
+ name: activerecord
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 3.0.0
48
+ requirement: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - '>='
51
+ - !ruby/object:Gem::Version
52
+ version: 3.0.0
53
+ prerelease: false
54
+ type: :runtime
55
+ - !ruby/object:Gem::Dependency
56
+ name: rails
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 3.0.0
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - '>='
65
+ - !ruby/object:Gem::Version
66
+ version: 3.0.0
67
+ prerelease: false
68
+ type: :runtime
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ version_requirements: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ requirement: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ prerelease: false
82
+ type: :development
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdoc
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirement: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ prerelease: false
96
+ type: :development
97
+ - !ruby/object:Gem::Dependency
98
+ name: pry-rails
99
+ version_requirements: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirement: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - '>='
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ prerelease: false
110
+ type: :development
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '2.12'
118
+ requirement: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ~>
121
+ - !ruby/object:Gem::Version
122
+ version: '2.12'
123
+ prerelease: false
124
+ type: :development
125
+ description: Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables
126
+ email: thestevemitchell@gmail.com
127
+ executables: []
128
+ extensions: []
129
+ extra_rdoc_files: []
130
+ files:
131
+ - .gitignore
132
+ - Gemfile
133
+ - Gemfile.lock
134
+ - LICENSE
135
+ - README.md
136
+ - Rakefile
137
+ - VERSION
138
+ - lib/postgres_upsert.rb
139
+ - lib/postgres_upsert/active_record.rb
140
+ - lib/postgres_upsert/writer.rb
141
+ - postgres_upsert.gemspec
142
+ - spec/fixtures/2_col_binary_data.dat
143
+ - spec/fixtures/comma_with_header.csv
144
+ - spec/fixtures/comma_with_header_and_comma_values.csv
145
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
146
+ - spec/fixtures/comma_without_header.csv
147
+ - spec/fixtures/no_id.csv
148
+ - spec/fixtures/reserved_word_model.rb
149
+ - spec/fixtures/reserved_words.csv
150
+ - spec/fixtures/semicolon_with_different_header.csv
151
+ - spec/fixtures/semicolon_with_header.csv
152
+ - spec/fixtures/tab_only_data.csv
153
+ - spec/fixtures/tab_with_different_header.csv
154
+ - spec/fixtures/tab_with_error.csv
155
+ - spec/fixtures/tab_with_extra_line.csv
156
+ - spec/fixtures/tab_with_header.csv
157
+ - spec/fixtures/tab_with_two_lines.csv
158
+ - spec/fixtures/test_model.rb
159
+ - spec/fixtures/three_column.rb
160
+ - spec/pg_upsert_binary_spec.rb
161
+ - spec/pg_upsert_csv_spec.rb
162
+ - spec/spec.opts
163
+ - spec/spec_helper.rb
164
+ homepage: https://github.com/theSteveMitchell/postgres_upsert
165
+ licenses:
166
+ - MIT
167
+ metadata: {}
168
+ post_install_message:
169
+ rdoc_options: []
170
+ require_paths:
171
+ - lib
172
+ required_ruby_version: !ruby/object:Gem::Requirement
173
+ requirements:
174
+ - - '>='
175
+ - !ruby/object:Gem::Version
176
+ version: '0'
177
+ required_rubygems_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ requirements: []
183
+ rubyforge_project:
184
+ rubygems_version: 2.1.9
185
+ signing_key:
186
+ specification_version: 4
187
+ summary: A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently
188
+ test_files:
189
+ - spec/fixtures/2_col_binary_data.dat
190
+ - spec/fixtures/comma_with_header.csv
191
+ - spec/fixtures/comma_with_header_and_comma_values.csv
192
+ - spec/fixtures/comma_with_header_and_unquoted_comma.csv
193
+ - spec/fixtures/comma_without_header.csv
194
+ - spec/fixtures/no_id.csv
195
+ - spec/fixtures/reserved_word_model.rb
196
+ - spec/fixtures/reserved_words.csv
197
+ - spec/fixtures/semicolon_with_different_header.csv
198
+ - spec/fixtures/semicolon_with_header.csv
199
+ - spec/fixtures/tab_only_data.csv
200
+ - spec/fixtures/tab_with_different_header.csv
201
+ - spec/fixtures/tab_with_error.csv
202
+ - spec/fixtures/tab_with_extra_line.csv
203
+ - spec/fixtures/tab_with_header.csv
204
+ - spec/fixtures/tab_with_two_lines.csv
205
+ - spec/fixtures/test_model.rb
206
+ - spec/fixtures/three_column.rb
207
+ - spec/pg_upsert_binary_spec.rb
208
+ - spec/pg_upsert_csv_spec.rb
209
+ - spec/spec.opts
210
+ - spec/spec_helper.rb