RubyGems - postgres_upsert - Versions diffs - 1.0.0 - Mend

postgres_upsert 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/.gitignore +34 -0
data/Gemfile +5 -0
data/Gemfile.lock +112 -0
data/LICENSE +22 -0
data/README.md +84 -0
data/Rakefile +18 -0
data/VERSION +1 -0
data/lib/postgres_upsert.rb +13 -0
data/lib/postgres_upsert/active_record.rb +177 -0
data/postgres_upsert.gemspec +33 -0
data/spec/fixtures/2_col_binary_data.dat +0 -0
data/spec/fixtures/comma_with_header.csv +2 -0
data/spec/fixtures/comma_with_header_and_comma_values.csv +2 -0
data/spec/fixtures/comma_with_header_and_unquoted_comma.csv +2 -0
data/spec/fixtures/comma_without_header.csv +1 -0
data/spec/fixtures/reserved_word_model.rb +5 -0
data/spec/fixtures/reserved_words.csv +2 -0
data/spec/fixtures/semicolon_with_different_header.csv +2 -0
data/spec/fixtures/semicolon_with_header.csv +2 -0
data/spec/fixtures/tab_only_data.csv +2 -0
data/spec/fixtures/tab_with_different_header.csv +2 -0
data/spec/fixtures/tab_with_error.csv +2 -0
data/spec/fixtures/tab_with_extra_line.csv +3 -0
data/spec/fixtures/tab_with_header.csv +2 -0
data/spec/fixtures/tab_with_two_lines.csv +3 -0
data/spec/fixtures/test_model.rb +4 -0
data/spec/fixtures/three_column.rb +4 -0
data/spec/pg_upsert_binary_spec.rb +35 -0
data/spec/pg_upsert_csv_spec.rb +187 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +47 -0
metadata +194 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 00a22a21bea95bc98d7a991c5ed30a5bbea6a67c
+  data.tar.gz: 9824edf28ac08e7a8aed28ebcdee8c07c46ffa11
+SHA512:
+  metadata.gz: b6183d68e0791491f6417ffbc99b7f2a984090aa3640f1348cfe74e39e11011edce3985029b259bd28296fc7725ad4def14db3f209d95b15f7324d2ebb5dbba3
+  data.tar.gz: 4dd39aa87168d5866874de9c6a11097731e015bbdea624535164f413cf1fabb7c9fbca73612c805289db800383d134c5e88f1198efe2cc9d7d365a72f4f2a9be

data/.gitignore ADDED Viewed

@@ -0,0 +1,34 @@
+*.gem
+*.rbc
+/.config
+/coverage/
+/InstalledFiles
+/pkg/
+/spec/reports/
+/test/tmp/
+/test/version_tmp/
+/tmp/
+## Specific to RubyMotion:
+.dat*
+.repl_history
+build/
+## Documentation cache and generated files:
+/.yardoc/
+/_yardoc/
+/doc/
+/rdoc/
+## Environment normalisation:
+/.bundle/
+/lib/bundler/man/
+# for a library or gem, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# Gemfile.lock
+# .ruby-version
+# .ruby-gemset
+# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
+.rvmrc

data/Gemfile ADDED Viewed

@@ -0,0 +1,5 @@
+source 'https://rubygems.org'
+# specify gem dependencies in activerecord-postgres-hstore.gemspec
+# except the platform-specific dependencies below
+gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,112 @@
+PATH
+  remote: .
+  specs:
+    postgres_upsert (0.1.0)
+      activerecord (>= 3.0.0)
+      pg (~> 0.17.0)
+      rails (>= 3.0.0)
+GEM
+  remote: https://rubygems.org/
+  specs:
+    actionmailer (4.0.3)
+      actionpack (= 4.0.3)
+      mail (~> 2.5.4)
+    actionpack (4.0.3)
+      activesupport (= 4.0.3)
+      builder (~> 3.1.0)
+      erubis (~> 2.7.0)
+      rack (~> 1.5.2)
+      rack-test (~> 0.6.2)
+    activemodel (4.0.3)
+      activesupport (= 4.0.3)
+      builder (~> 3.1.0)
+    activerecord (4.0.3)
+      activemodel (= 4.0.3)
+      activerecord-deprecated_finders (~> 1.0.2)
+      activesupport (= 4.0.3)
+      arel (~> 4.0.0)
+    activerecord-deprecated_finders (1.0.3)
+    activesupport (4.0.3)
+      i18n (~> 0.6, >= 0.6.4)
+      minitest (~> 4.2)
+      multi_json (~> 1.3)
+      thread_safe (~> 0.1)
+      tzinfo (~> 0.3.37)
+    arel (4.0.2)
+    builder (3.1.4)
+    coderay (1.1.0)
+    diff-lcs (1.1.3)
+    erubis (2.7.0)
+    hike (1.2.3)
+    i18n (0.6.11)
+    json (1.7.6)
+    mail (2.5.4)
+      mime-types (~> 1.16)
+      treetop (~> 1.4.8)
+    method_source (0.8.2)
+    mime-types (1.25.1)
+    minitest (4.7.5)
+    multi_json (1.10.1)
+    pg (0.17.1)
+    polyglot (0.3.5)
+    pry (0.10.1)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+    pry-rails (0.3.2)
+      pry (>= 0.9.10)
+    rack (1.5.2)
+    rack-test (0.6.2)
+      rack (>= 1.0)
+    rails (4.0.3)
+      actionmailer (= 4.0.3)
+      actionpack (= 4.0.3)
+      activerecord (= 4.0.3)
+      activesupport (= 4.0.3)
+      bundler (>= 1.3.0, < 2.0)
+      railties (= 4.0.3)
+      sprockets-rails (~> 2.0.0)
+    railties (4.0.3)
+      actionpack (= 4.0.3)
+      activesupport (= 4.0.3)
+      rake (>= 0.8.7)
+      thor (>= 0.18.1, < 2.0)
+    rake (10.3.2)
+    rdoc (3.12)
+      json (~> 1.4)
+    rspec (2.12.0)
+      rspec-core (~> 2.12.0)
+      rspec-expectations (~> 2.12.0)
+      rspec-mocks (~> 2.12.0)
+    rspec-core (2.12.2)
+    rspec-expectations (2.12.1)
+      diff-lcs (~> 1.1.3)
+    rspec-mocks (2.12.2)
+    slop (3.6.0)
+    sprockets (2.11.0)
+      hike (~> 1.2)
+      multi_json (~> 1.0)
+      rack (~> 1.0)
+      tilt (~> 1.1, != 1.3.0)
+    sprockets-rails (2.0.1)
+      actionpack (>= 3.0)
+      activesupport (>= 3.0)
+      sprockets (~> 2.8)
+    thor (0.19.1)
+    thread_safe (0.3.4)
+    tilt (1.4.1)
+    treetop (1.4.15)
+      polyglot
+      polyglot (>= 0.3.1)
+    tzinfo (0.3.40)
+PLATFORMS
+  ruby
+DEPENDENCIES
+  bundler
+  postgres_upsert!
+  pry-rails
+  rdoc
+  rspec (~> 2.12)

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+Copyright (c) 2014 Steve Mitchell
+Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,84 @@
+# postgres_upsert
+Allows your  rails app to load data in a very fast way, avoiding calls to ActiveRecord.
+Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
+## Install
+Put it in your Gemfile
+    gem 'postgres_upsert'
+Run the bundle command
+    bundle
+## Usage
+The gem will add the aditiontal class method to ActiveRecord::Base
+* pg_upsert
+### Using pg_upsert
+pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
+Let's first copy from a file in the database server, assuming again that we have a users table and
+that we are in the Rails console:
+```ruby
+User.pg_upsert "/tmp/users.csv"
+```
+This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
+If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
+```ruby
+User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
+```
+In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
+You can also manipulate and modify the values of the file being imported before they enter into the database using a block:
+```ruby
+User.pg_upsert "/tmp/users.csv" do |row|
+  row[0] = "fixed string"
+end
+```
+The above example will always change the value of the first column to "fixed string" before storing it into the database.
+For each iteration of the block row receives an array with the same order as the columns in the CSV file.
+To copy a binary formatted data file or IO object you can specify the format as binary
+```ruby
+User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
+```
+Which will generate the following SQL command:
+```sql
+COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
+```
+NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
+pg_upsert  supports 'upsert' or 'merge' operations.  In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case.  Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
+This merge/upsert happend in 5 steps (assume your data table is called "users")
+* create a temp table named users_temp_### where "###" is a random number.  In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
+* COPY the data to user_temp
+* issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
+* issue a query to update all records in users with the data in users_temp_### (matching on primary key)
+* drop the temp table.
+## Note on Patches/Pull Requests
+* Fork the project
+* add your feature/fix to your fork(rpsec tests pleaze)
+* submit a PR
+* If you find an issue but can't fix in in a PR, please log an issue.  I'll do my best.

data/Rakefile ADDED Viewed

@@ -0,0 +1,18 @@
+# -*- encoding: utf-8 -*-
+$:.unshift File.expand_path("../lib", __FILE__)
+require 'bundler/gem_tasks'
+require 'rubygems'
+require 'rspec/core/rake_task'
+require 'rdoc/task'
+task :default => :spec
+RSpec::Core::RakeTask.new(:spec)
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "postgres_upsert #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/postgres_upsert.rb ADDED Viewed

@@ -0,0 +1,13 @@
+require 'rubygems'
+require 'active_record'
+require 'postgres_upsert/active_record'
+require 'rails'
+class PostgresCopy < Rails::Railtie
+  initializer 'postgres_upsert' do
+    ActiveSupport.on_load :active_record do
+      require "postgres_upsert/active_record"
+    end
+  end
+end

data/lib/postgres_upsert/active_record.rb ADDED Viewed

@@ -0,0 +1,177 @@
+module ActiveRecord
+  class Base
+    # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
+    # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
+    # * You can change the default delimiter passing delimiter: '' in the options hash
+    # * You can map fields from the file to different fields in the table using a map in the options hash
+    # * For further details on usage take a look at the README.md
+    def self.pg_upsert path_or_io, options = {}
+      options.reverse_merge!({:delimiter => ",", :format => :csv, :header => true})
+      options_string = options[:format] == :binary ? "BINARY" : "DELIMITER '#{options[:delimiter]}' CSV"
+      io = path_or_io.instance_of?(String) ? File.open(path_or_io, 'r') : path_or_io
+      columns_list = get_columns(io, options)
+      if columns_list.empty?
+        raise "Either the :columns option or :header => true are required"
+      end
+      copy_table = get_temp_table_name(options)
+      destination_table = get_table_name(options)
+      columns_string = columns_string_for_copy(columns_list)
+      create_temp_table(copy_table, destination_table, columns_list) if destination_table
+      connection.raw_connection.copy_data %{COPY #{copy_table} #{columns_string} FROM STDIN #{options_string}} do
+        if block_given?
+          block = Proc.new
+        end
+        while line = read_input_line(io, options, &block) do
+          next if line.strip.size == 0
+          connection.raw_connection.put_copy_data line
+        end
+      end
+      if destination_table
+        upsert_from_temp_table(copy_table, destination_table, columns_list)
+        drop_temp_table(copy_table)
+      end
+    end
+    private
+    def self.get_columns(io, options)
+      columns_list = options[:columns] || []
+      if options[:format] != :binary && options[:header]
+        #if header is present, we need to strip it from io, whether we use it for the columns list or not.
+        line = io.gets
+          if columns_list.empty?
+            columns_list = line.strip.split(options[:delimiter])
+          end
+      end
+      columns_list = columns_list.map{|c| options[:map][c.to_s] } if options[:map]
+      return columns_list
+    end
+    def self.columns_string_for_copy(columns_list)
+      str = get_columns_string(columns_list)
+      str.empty? ? str : "(#{str})"
+    end
+    def self.columns_string_for_select(columns_list)
+      columns = columns_list.clone
+      columns << "created_at" if column_names.include?("created_at")
+      columns << "updated_at" if column_names.include?("updated_at")
+      str = get_columns_string(columns)
+    end
+    def self.columns_string_for_insert(columns_list)
+      columns = columns_list.clone
+      columns << "created_at" if column_names.include?("created_at")
+      columns << "updated_at" if column_names.include?("updated_at")
+      str = get_columns_string(columns)
+    end
+    def self.select_string_for_insert(columns_list)
+      columns = columns_list.clone
+      str = get_columns_string(columns)
+      str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
+      str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
+      str
+    end
+    def self.select_string_for_create(columns_list)
+      columns = columns_list.map(&:to_sym)
+      columns << primary_key.to_sym unless columns.include?(primary_key.to_sym)
+      get_columns_string(columns)
+    end
+    def self.get_columns_string(columns_list)
+      columns_list.size > 0 ? "\"#{columns_list.join('","')}\"" : ""
+    end
+    def self.get_table_name(options)
+      if options[:table]
+        connection.quote_table_name(options[:table])
+      else
+        quoted_table_name
+      end
+    end
+    def self.get_temp_table_name(options)
+      "#{table_name}_temp_#{rand(1000)}"
+    end
+    def self.read_input_line(io, options)
+      if options[:format] == :binary
+        begin
+          return io.readpartial(10240)
+        rescue EOFError
+        end
+      else
+        line = io.gets
+        if block_given? && line
+          row = line.strip.split(options[:delimiter])
+          yield(row)
+          line = row.join(options[:delimiter]) + "\n"
+        end
+        return line
+      end
+    end
+    def self.upsert_from_temp_table(temp_table, dest_table, columns_list)
+      update_from_temp_table(temp_table, dest_table, columns_list)
+      insert_from_temp_table(temp_table, dest_table, columns_list)
+    end
+    def self.update_from_temp_table(temp_table, dest_table, columns_list)
+      ActiveRecord::Base.connection.execute <<-SQL
+        UPDATE #{dest_table} AS d
+          #{update_set_clause(columns_list)}
+          FROM #{temp_table} as t
+          WHERE t.#{primary_key} = d.#{primary_key}
+          AND d.#{primary_key} IS NOT NULL;
+      SQL
+    end
+    def self.update_set_clause(columns_list)
+      command = columns_list.map do |col|
+        "\"#{col}\" = t.\"#{col}\""
+      end
+      command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
+      "SET #{command.join(',')}"
+    end
+    def self.insert_from_temp_table(temp_table, dest_table, columns_list)
+      columns_string = columns_string_for_insert(columns_list)
+      select_string = select_string_for_insert(columns_list)
+      ActiveRecord::Base.connection.execute <<-SQL
+        INSERT INTO #{dest_table} (#{columns_string})
+          SELECT #{select_string}
+          FROM #{temp_table} as t
+          WHERE NOT EXISTS
+            (SELECT 1
+                  FROM #{dest_table} as d
+                  WHERE d.#{primary_key} = t.#{primary_key})
+          AND t.#{primary_key} IS NOT NULL;
+      SQL
+    end
+    def self.create_temp_table(temp_table, dest_table, columns_list)
+      columns_string = select_string_for_create(columns_list)
+      ActiveRecord::Base.connection.execute <<-SQL
+        SET client_min_messages=WARNING;
+        DROP TABLE IF EXISTS #{temp_table};
+        CREATE TEMP TABLE #{temp_table}
+          AS SELECT #{columns_string} FROM #{dest_table} WHERE 0 = 1;
+      SQL
+    end
+    def self.drop_temp_table(temp_table)
+      ActiveRecord::Base.connection.execute <<-SQL
+        DROP TABLE #{temp_table}
+      SQL
+    end
+  end
+end

data/postgres_upsert.gemspec ADDED Viewed

@@ -0,0 +1,33 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib/', __FILE__)
+$:.unshift lib unless $:.include?(lib)
+Gem::Specification.new do |s|
+  s.name = "postgres_upsert"
+  s.version = "1.0.0"
+  s.platform    = Gem::Platform::RUBY
+  s.required_ruby_version     = ">= 1.8.7"
+  s.authors = ["Steve Mitchell"]
+  s.date = "2014-09-12"
+  s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
+  s.email = "thestevemitchell@gmail.com"
+  git_files            = `git ls-files`.split("\n") rescue ''
+  s.files              = git_files
+  s.test_files         = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables        = []
+  s.require_paths      = %w(lib)
+  s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
+  s.require_paths = ["lib"]
+  s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
+  s.add_dependency "pg", '~> 0.17.0'
+  s.add_dependency "activerecord", '>= 3.0.0'
+  s.add_dependency "rails", '>= 3.0.0'
+  s.add_development_dependency "bundler"
+  s.add_development_dependency "rdoc"
+  s.add_development_dependency "pry-rails"
+  s.add_development_dependency "rspec", "~> 2.12"
+end

data/spec/fixtures/2_col_binary_data.dat ADDED Viewed

Binary file

data/spec/fixtures/comma_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,test data 1

data/spec/fixtures/comma_with_header_and_comma_values.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,"test, the data 1"

data/spec/fixtures/comma_with_header_and_unquoted_comma.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,test, the data 1

data/spec/fixtures/comma_without_header.csv ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1,test data 1

data/spec/fixtures/reserved_word_model.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'postgres_upsert'
+class ReservedWordModel < ActiveRecord::Base
+end

data/spec/fixtures/reserved_words.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id select group
2	+ 1 test select group name

data/spec/fixtures/semicolon_with_different_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ cod;info
2	+ 1;test data 1

data/spec/fixtures/semicolon_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id;data
2	+ 1;test data 1

data/spec/fixtures/tab_only_data.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id data
2	+ 1 test data 1

data/spec/fixtures/tab_with_different_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ cod info
2	+ 1 test data 1

data/spec/fixtures/tab_with_error.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ data id
2	+ this is a wrong separator;1

data/spec/fixtures/tab_with_extra_line.csv ADDED Viewed

@@ -0,0 +1,3 @@
+id	data
+1	test data 1

data/spec/fixtures/tab_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id data
2	+ 1 test data 1

data/spec/fixtures/tab_with_two_lines.csv ADDED Viewed

@@ -0,0 +1,3 @@
+id	data
+1	test data 1
+2	test data 2

data/spec/fixtures/test_model.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'postgres_upsert'
+class TestModel < ActiveRecord::Base
+end

data/spec/fixtures/three_column.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'postgres_upsert'
+class ThreeColumn < ActiveRecord::Base
+end

data/spec/pg_upsert_binary_spec.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe "pg_upsert from file with binary data" do
+  before(:each) do
+    ActiveRecord::Base.connection.execute %{
+      TRUNCATE TABLE test_models;
+      SELECT setval('test_models_id_seq', 1, false);
+    }
+  end
+  before do
+    DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
+  end
+  def timestamp
+    DateTime.now.utc.to_s
+  end
+  it "imports from file if path is passed without field_map" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "throws an error when importing binary file without columns list" do
+    # Since binary data never has a header row, we'll require explicit columns list
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
+    }.to raise_error "Either the :columns option or :header => true are required"
+  end
+end

data/spec/pg_upsert_csv_spec.rb ADDED Viewed

@@ -0,0 +1,187 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe "pg_upsert from file with CSV format" do
+  before(:each) do
+    ActiveRecord::Base.connection.execute %{
+      TRUNCATE TABLE test_models;
+      SELECT setval('test_models_id_seq', 1, false);
+    }
+  end
+  before do
+    DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
+  end
+  def timestamp
+    DateTime.now.utc
+  end
+  it "should import from file if path is passed without field_map" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "correctly handles delimiters in content" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "throws error if csv is malformed" do
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
+    }.to raise_error
+  end
+  it "throws error if the csv has mixed delimiters" do
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
+    }.to raise_error
+  end
+  it "should import from IO without field_map" do
+    TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import with custom delimiter from path" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import with custom delimiter from IO" do
+    TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import and allow changes in block" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')) do |row|
+      row[1] = 'changed this data'
+    end
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import 2 lines and allow changes in block" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_two_lines.csv'), 'r'), :delimiter => "\t") do |row|
+      row[1] = 'changed this data'
+    end
+    expect(
+      TestModel.find(1).attributes
+    ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
+    expect(
+      TestModel.find(2).attributes
+    ).to include('data' => 'changed this data', 'created_at' => timestamp, 'updated_at' => timestamp)
+    expect(TestModel.count).to eq 2
+  end
+  it "should not expect a header when :header is false" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should be able to map the header in the file to diferent column names" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should be able to map the header in the file to diferent column names with custom delimiter" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should ignore empty lines" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should not create timestamps when the model does not include them" do
+    ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
+    expect(
+      ReservedWordModel.first.attributes
+    ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
+  end
+  context "upserting data to handle inserts and creates" do
+    let(:original_created_at) {5.days.ago.utc}
+    before(:each) do
+      TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
+    end
+    it "should not violate primary key constraint" do
+      expect{
+        TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
+      }.to_not raise_error
+    end
+    it "should upsert (update existing records and insert new records)" do
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      expect(
+        TestModel.find(1).attributes
+      ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
+      expect(
+        TestModel.find(2).attributes
+      ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
+    end
+    it "should require columns option if no header" do
+      expect{
+        TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
+      }.to raise_error("Either the :columns option or :header => true are required")
+    end
+    it "should clean up the temp table after completion" do
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
+    end
+    it "should gracefully drop the temp table if it already exists" do
+      ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      expect(
+        TestModel.find(1).attributes
+      ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
+      expect(
+        TestModel.find(2).attributes
+      ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
+    end
+    it "should be able to copy using custom set of columns" do
+      ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
+      ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
+      expect(
+        ThreeColumn.first.attributes
+      ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
+    end
+  end
+end

data/spec/spec.opts ADDED Viewed

	@@ -0,0 +1 @@
1	+ --color

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,47 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'fixtures/test_model'
+require 'fixtures/three_column'
+require 'fixtures/reserved_word_model'
+require 'rspec'
+require 'rspec/autorun'
+RSpec.configure do |config|
+  config.before(:suite) do
+    # we create a test database if it does not exist
+    # I do not use database users or password for the tests, using ident authentication instead
+    begin
+      ActiveRecord::Base.establish_connection(
+        :adapter  => "postgresql",
+        :host     => "localhost",
+        :username => "postgres",
+        :password => "postgres",
+        :port     => 5432,
+        :database => "ar_pg_copy_test"
+      )
+      ActiveRecord::Base.connection.execute %{
+        SET client_min_messages TO warning;
+        DROP TABLE IF EXISTS test_models;
+        DROP TABLE IF EXISTS three_columns;
+        DROP TABLE IF EXISTS reserved_word_models;
+        CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
+        CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
+        CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
+      }
+    rescue Exception => e
+      puts "Exception: #{e}"
+      ActiveRecord::Base.establish_connection(
+        :adapter  => "postgresql",
+        :host     => "localhost",
+        :username => "postgres",
+        :password => "postgres",
+        :port     => 5432,
+        :database => "postgres"
+      )
+      ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
+      ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
+      retry
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,194 @@
+--- !ruby/object:Gem::Specification
+name: postgres_upsert
+version: !ruby/object:Gem::Version
+  version: 1.0.0
+platform: ruby
+authors:
+- Steve Mitchell
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-09-12 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: pg
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.17.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.17.0
+- !ruby/object:Gem::Dependency
+  name: activerecord
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+- !ruby/object:Gem::Dependency
+  name: rails
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: pry-rails
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+description: Uses Postgres's powerful COPY command to upsert large sets of data into
+  ActiveRecord tables
+email: thestevemitchell@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Gemfile.lock
+- LICENSE
+- README.md
+- Rakefile
+- VERSION
+- lib/postgres_upsert.rb
+- lib/postgres_upsert/active_record.rb
+- postgres_upsert.gemspec
+- spec/fixtures/2_col_binary_data.dat
+- spec/fixtures/comma_with_header.csv
+- spec/fixtures/comma_with_header_and_comma_values.csv
+- spec/fixtures/comma_with_header_and_unquoted_comma.csv
+- spec/fixtures/comma_without_header.csv
+- spec/fixtures/reserved_word_model.rb
+- spec/fixtures/reserved_words.csv
+- spec/fixtures/semicolon_with_different_header.csv
+- spec/fixtures/semicolon_with_header.csv
+- spec/fixtures/tab_only_data.csv
+- spec/fixtures/tab_with_different_header.csv
+- spec/fixtures/tab_with_error.csv
+- spec/fixtures/tab_with_extra_line.csv
+- spec/fixtures/tab_with_header.csv
+- spec/fixtures/tab_with_two_lines.csv
+- spec/fixtures/test_model.rb
+- spec/fixtures/three_column.rb
+- spec/pg_upsert_binary_spec.rb
+- spec/pg_upsert_csv_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb
+homepage: https://github.com/theSteveMitchell/postgres_upsert
+licenses: []
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: 1.8.7
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.6
+signing_key:
+specification_version: 4
+summary: A rubygem that integrates with ActiveRecord to insert/update large data sets
+  into the database efficiently
+test_files:
+- spec/fixtures/2_col_binary_data.dat
+- spec/fixtures/comma_with_header.csv
+- spec/fixtures/comma_with_header_and_comma_values.csv
+- spec/fixtures/comma_with_header_and_unquoted_comma.csv
+- spec/fixtures/comma_without_header.csv
+- spec/fixtures/reserved_word_model.rb
+- spec/fixtures/reserved_words.csv
+- spec/fixtures/semicolon_with_different_header.csv
+- spec/fixtures/semicolon_with_header.csv
+- spec/fixtures/tab_only_data.csv
+- spec/fixtures/tab_with_different_header.csv
+- spec/fixtures/tab_with_error.csv
+- spec/fixtures/tab_with_extra_line.csv
+- spec/fixtures/tab_with_header.csv
+- spec/fixtures/tab_with_two_lines.csv
+- spec/fixtures/test_model.rb
+- spec/fixtures/three_column.rb
+- spec/pg_upsert_binary_spec.rb
+- spec/pg_upsert_csv_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb