RubyGems - postgres_upsert - Versions diffs - 3.1.0-java - Mend

postgres_upsert 3.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +7 -0
data/.gitignore +34 -0
data/Gemfile +5 -0
data/Gemfile.lock +146 -0
data/LICENSE +22 -0
data/README.md +92 -0
data/Rakefile +18 -0
data/VERSION +1 -0
data/lib/postgres_upsert/active_record.rb +13 -0
data/lib/postgres_upsert/writer.rb +208 -0
data/lib/postgres_upsert.rb +14 -0
data/postgres_upsert.gemspec +34 -0
data/spec/fixtures/2_col_binary_data.dat +0 -0
data/spec/fixtures/comma_with_header.csv +2 -0
data/spec/fixtures/comma_with_header_and_comma_values.csv +2 -0
data/spec/fixtures/comma_with_header_and_unquoted_comma.csv +2 -0
data/spec/fixtures/comma_without_header.csv +1 -0
data/spec/fixtures/no_id.csv +2 -0
data/spec/fixtures/reserved_word_model.rb +5 -0
data/spec/fixtures/reserved_words.csv +2 -0
data/spec/fixtures/semicolon_with_different_header.csv +2 -0
data/spec/fixtures/semicolon_with_header.csv +2 -0
data/spec/fixtures/tab_only_data.csv +2 -0
data/spec/fixtures/tab_with_different_header.csv +2 -0
data/spec/fixtures/tab_with_error.csv +2 -0
data/spec/fixtures/tab_with_extra_line.csv +3 -0
data/spec/fixtures/tab_with_header.csv +2 -0
data/spec/fixtures/tab_with_two_lines.csv +3 -0
data/spec/fixtures/test_model.rb +4 -0
data/spec/fixtures/three_column.rb +4 -0
data/spec/pg_upsert_binary_spec.rb +35 -0
data/spec/pg_upsert_csv_spec.rb +206 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +43 -0
metadata +210 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 670c45802ef4d5b50510800787f75426181d0722
+  data.tar.gz: a249301be5d55ceaabc1d0d2606f834c65a51361
+SHA512:
+  metadata.gz: 0341063e94b7cf9a64e3c35f5052d988eae2f93ac5aa8aa0255813312836176529d918c16d59cd1fe032391ac8482fe878c3162ea20a61073b0654faebb4b0c0
+  data.tar.gz: e1eada0840b8f2bf2999783cd1db72c153c8d2a11823789c2505f4da2902a08c10a976cc0a77b906f43e124d0cc5462f485096d3d4d1643e967b8184e31499c6

data/.gitignore ADDED Viewed

@@ -0,0 +1,34 @@
+*.gem
+*.rbc
+/.config
+/coverage/
+/InstalledFiles
+/pkg/
+/spec/reports/
+/test/tmp/
+/test/version_tmp/
+/tmp/
+## Specific to RubyMotion:
+.dat*
+.repl_history
+build/
+## Documentation cache and generated files:
+/.yardoc/
+/_yardoc/
+/doc/
+/rdoc/
+## Environment normalisation:
+/.bundle/
+/lib/bundler/man/
+# for a library or gem, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# Gemfile.lock
+# .ruby-version
+# .ruby-gemset
+# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
+.rvmrc

data/Gemfile ADDED Viewed

@@ -0,0 +1,5 @@
+source 'https://rubygems.org'
+# specify gem dependencies in activerecord-postgres-hstore.gemspec
+# except the platform-specific dependencies below
+gemspec

data/Gemfile.lock ADDED Viewed

@@ -0,0 +1,146 @@
+PATH
+  remote: .
+  specs:
+    postgres_upsert (3.0.0-java)
+      activerecord (>= 3.0.0)
+      activerecord-jdbcpostgresql-adapter
+      rails (>= 3.0.0)
+      sequel
+GEM
+  remote: https://rubygems.org/
+  specs:
+    actionmailer (4.2.0)
+      actionpack (= 4.2.0)
+      actionview (= 4.2.0)
+      activejob (= 4.2.0)
+      mail (~> 2.5, >= 2.5.4)
+      rails-dom-testing (~> 1.0, >= 1.0.5)
+    actionpack (4.2.0)
+      actionview (= 4.2.0)
+      activesupport (= 4.2.0)
+      rack (~> 1.6.0)
+      rack-test (~> 0.6.2)
+      rails-dom-testing (~> 1.0, >= 1.0.5)
+      rails-html-sanitizer (~> 1.0, >= 1.0.1)
+    actionview (4.2.0)
+      activesupport (= 4.2.0)
+      builder (~> 3.1)
+      erubis (~> 2.7.0)
+      rails-dom-testing (~> 1.0, >= 1.0.5)
+      rails-html-sanitizer (~> 1.0, >= 1.0.1)
+    activejob (4.2.0)
+      activesupport (= 4.2.0)
+      globalid (>= 0.3.0)
+    activemodel (4.2.0)
+      activesupport (= 4.2.0)
+      builder (~> 3.1)
+    activerecord (4.2.0)
+      activemodel (= 4.2.0)
+      activesupport (= 4.2.0)
+      arel (~> 6.0)
+    activerecord-jdbc-adapter (1.3.14)
+      activerecord (>= 2.2)
+    activerecord-jdbcpostgresql-adapter (1.3.14)
+      activerecord-jdbc-adapter (~> 1.3.14)
+      jdbc-postgres (>= 9.1)
+    activesupport (4.2.0)
+      i18n (~> 0.7)
+      json (~> 1.7, >= 1.7.7)
+      minitest (~> 5.1)
+      thread_safe (~> 0.3, >= 0.3.4)
+      tzinfo (~> 1.1)
+    arel (6.0.0)
+    builder (3.2.2)
+    coderay (1.1.0)
+    diff-lcs (1.2.5)
+    erubis (2.7.0)
+    ffi (1.9.6-java)
+    globalid (0.3.2)
+      activesupport (>= 4.1.0)
+    hike (1.2.3)
+    i18n (0.7.0)
+    jdbc-postgres (9.3.1102)
+    json (1.8.2-java)
+    loofah (2.0.1)
+      nokogiri (>= 1.5.9)
+    mail (2.6.3)
+      mime-types (>= 1.16, < 3)
+    method_source (0.8.2)
+    mime-types (2.4.3)
+    minitest (5.5.1)
+    multi_json (1.10.1)
+    nokogiri (1.6.6.2-java)
+    pry (0.10.1-java)
+      coderay (~> 1.1.0)
+      method_source (~> 0.8.1)
+      slop (~> 3.4)
+      spoon (~> 0.0)
+    pry-rails (0.3.3)
+      pry (>= 0.9.10)
+    rack (1.6.0)
+    rack-test (0.6.3)
+      rack (>= 1.0)
+    rails (4.2.0)
+      actionmailer (= 4.2.0)
+      actionpack (= 4.2.0)
+      actionview (= 4.2.0)
+      activejob (= 4.2.0)
+      activemodel (= 4.2.0)
+      activerecord (= 4.2.0)
+      activesupport (= 4.2.0)
+      bundler (>= 1.3.0, < 2.0)
+      railties (= 4.2.0)
+      sprockets-rails
+    rails-deprecated_sanitizer (1.0.3)
+      activesupport (>= 4.2.0.alpha)
+    rails-dom-testing (1.0.5)
+      activesupport (>= 4.2.0.beta, < 5.0)
+      nokogiri (~> 1.6.0)
+      rails-deprecated_sanitizer (>= 1.0.1)
+    rails-html-sanitizer (1.0.1)
+      loofah (~> 2.0)
+    railties (4.2.0)
+      actionpack (= 4.2.0)
+      activesupport (= 4.2.0)
+      rake (>= 0.8.7)
+      thor (>= 0.18.1, < 2.0)
+    rake (10.4.2)
+    rdoc (4.2.0)
+      json (~> 1.4)
+    rspec (2.99.0)
+      rspec-core (~> 2.99.0)
+      rspec-expectations (~> 2.99.0)
+      rspec-mocks (~> 2.99.0)
+    rspec-core (2.99.2)
+    rspec-expectations (2.99.2)
+      diff-lcs (>= 1.1.3, < 2.0)
+    rspec-mocks (2.99.3)
+    sequel (4.19.0)
+    slop (3.6.0)
+    spoon (0.0.4)
+      ffi
+    sprockets (2.12.3)
+      hike (~> 1.2)
+      multi_json (~> 1.0)
+      rack (~> 1.0)
+      tilt (~> 1.1, != 1.3.0)
+    sprockets-rails (2.2.4)
+      actionpack (>= 3.0)
+      activesupport (>= 3.0)
+      sprockets (>= 2.8, < 4.0)
+    thor (0.19.1)
+    thread_safe (0.3.4-java)
+    tilt (1.4.1)
+    tzinfo (1.2.2)
+      thread_safe (~> 0.1)
+PLATFORMS
+  java
+DEPENDENCIES
+  bundler
+  postgres_upsert!
+  pry-rails
+  rdoc
+  rspec (~> 2.12)

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+The MIT License (MIT)
+Copyright (c) 2014 Steve Mitchell
+Based on work Copyright (c) Diogo Biazus https://github.com/diogob/postgres-copy
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

data/README.md ADDED Viewed

@@ -0,0 +1,92 @@
+# postgres_upsert
+Allows your  rails app to load data in a very fast way, avoiding calls to ActiveRecord.
+Using the PG gem and postgres's powerful COPY command, you can create thousands of rails objects in your db in a single query.
+## Install
+Put it in your Gemfile
+    gem 'postgres_upsert'
+Run the bundle command
+    bundle
+## Usage
+The gem will add the aditiontal class method to ActiveRecord::Base
+* pg_upsert io_object_or_file_path, [options]
+io_object_or_file_path => is a file path or an io object (StringIO, FileIO, etc.)
+options:
+:delimiter - the string to use to delimit fields.  Default is ","
+:format - the format of the file (valid formats are :csv or :binary).  Default is :csv
+:header => specifies if the file/io source contains a header row.  Either :header option must be true, or :columns list must be passed.  Default true
+:key_column => the primary key or unique key column on your ActiveRecord table, used to distinguish new records from existing records.  Default is the primary_key of your ActiveRecord model class.
+:update_only => when true, postgres_upsert will ONLY update existing records, and not insert new.  Default is false.
+pg_upsert will allow you to copy data from an arbritary IO object or from a file in the database server (when you pass the path as string).
+Let's first copy from a file in the database server, assuming again that we have a users table and
+that we are in the Rails console:
+```ruby
+User.pg_upsert "/tmp/users.csv"
+```
+This command will use the headers in the CSV file as fields of the target table, so beware to always have a header in the files you want to import.
+If the column names in the CSV header do not match the field names of the target table, you can pass a map in the options parameter.
+```ruby
+User.pg_upsert "/tmp/users.csv", :map => {'name' => 'first_name'}
+```
+In the above example the header name in the CSV file will be mapped to the field called first_name in the users table.
+To copy a binary formatted data file or IO object you can specify the format as binary
+```ruby
+User.pg_upsert "/tmp/users.dat", :format => :binary, :columns => ["id, "name"]
+```
+Which will generate the following SQL command:
+```sql
+COPY users (id, name) FROM '/tmp/users.dat' WITH BINARY
+```
+NOTE: binary files do not include header columns, so passing a :columns array is required for binary files.
+pg_upsert  supports 'upsert' or 'merge' operations.  In other words, the data source can contain both new and existing objects, and pg_upsert will handle either case.  Since the Postgres native COPY command does not handle updating existing records, pg_upsert accomplishes update and insert using an intermediary temp table:
+This merge/upsert happend in 5 steps (assume your data table is called "users")
+* create a temp table named users_temp_### where "###" is a random number.  In postgres temp tables are only visible to the current database session, so naming conflicts should not be a problem.
+* COPY the data to user_temp
+* issue a query to insert all new records from users_temp_### into users (newness is determined by the presence of the primary key in the users table)
+* issue a query to update all records in users with the data in users_temp_### (matching on primary key)
+* drop the temp table.
+### overriding the key_column
+By default pg_upsert uses the primary key on your ActiveRecord table to determine if each record should be inserted or updated.  You can override the column using the :key_field option:
+```ruby
+User.pg_upsert "/tmp/users.dat", :format => :binary, :key_column => ["external_twitter_id"]
+```
+obviously, the field you pass must be a unique key in your database (this is not enforced at the moment, but will be)
+passing :update_only = true will ensure that no new records are created, but records will be updated.
+## Note on Patches/Pull Requests
+* Fork the project
+* add your feature/fix to your fork(rpsec tests pleaze)
+* submit a PR
+* If you find an issue but can't fix in in a PR, please log an issue.  I'll do my best.

data/Rakefile ADDED Viewed

@@ -0,0 +1,18 @@
+# -*- encoding: utf-8 -*-
+$:.unshift File.expand_path("../lib", __FILE__)
+require 'bundler/gem_tasks'
+require 'rubygems'
+require 'rspec/core/rake_task'
+require 'rdoc/task'
+task :default => :spec
+RSpec::Core::RakeTask.new(:spec)
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "postgres_upsert #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.0

data/lib/postgres_upsert/active_record.rb ADDED Viewed

@@ -0,0 +1,13 @@
+module ActiveRecord
+  class Base
+    # Copy data to a file passed as a string (the file path) or to lines that are passed to a block
+    # Copy data from a CSV that can be passed as a string (the file path) or as an IO object.
+    # * You can change the default delimiter passing delimiter: '' in the options hash
+    # * You can map fields from the file to different fields in the table using a map in the options hash
+    # * For further details on usage take a look at the README.md
+    def self.pg_upsert path_or_io, options = {}
+      PostgresUpsert::Writer.new(table_name, path_or_io, options).write
+    end
+  end
+end

data/lib/postgres_upsert/writer.rb ADDED Viewed

@@ -0,0 +1,208 @@
+require 'sequel'
+module PostgresUpsert
+  class Writer
+    def initialize(table_name, source, options = {})
+      @table_name = table_name
+      @options = options.reverse_merge({
+        :delimiter => ",",
+        :format => :csv,
+        :header => true,
+        :key_column => primary_key,
+        :update_only => false})
+      @source = source.instance_of?(String) ? File.open(source, 'r') : source
+      @columns_list = get_columns
+      generate_temp_table_name
+    end
+    def write
+      if @columns_list.empty?
+        raise "Either the :columns option or :header => true are required"
+      end
+      csv_options = @options[:format] == :binary ? "BINARY" : "DELIMITER '#{@options[:delimiter]}' CSV"
+      copy_table = @temp_table_name
+      columns_string = columns_string_for_copy
+      base_connection = Sequel.connect(ActiveRecord::Base.connection.config[:url])
+      base_connection.synchronize do |connection|
+        create_temp_table(connection)
+        copy_manager = org.postgresql.copy.CopyManager.new(connection)
+        stream = copy_manager.copy_in("COPY #{copy_table} #{columns_string} FROM STDIN WITH #{csv_options}")
+        while line = read_input_line do
+          next if line.strip.size == 0
+          line = line.to_java_bytes
+          stream.write_to_copy(line, 0, line.length)
+        end
+        stream.end_copy
+        upsert_from_temp_table(connection)
+        drop_temp_table(connection)
+      end
+    end
+  private
+    def primary_key
+      @primary_key ||= begin
+        query = <<-sql
+          SELECT
+            pg_attribute.attname,
+            format_type(pg_attribute.atttypid, pg_attribute.atttypmod)
+          FROM pg_index, pg_class, pg_attribute
+          WHERE
+            pg_class.oid = '#{@table_name}'::regclass AND
+            indrelid = pg_class.oid AND
+            pg_attribute.attrelid = pg_class.oid AND
+            pg_attribute.attnum = any(pg_index.indkey)
+          AND indisprimary
+        sql
+        pg_result = ActiveRecord::Base.connection.execute query
+        pg_result.each{ |row| return row['attname'] }
+      end
+    end
+    def column_names
+      @column_names ||= begin
+        query = "SELECT * FROM information_schema.columns WHERE TABLE_NAME = '#{@table_name}'"
+        pg_result = ActiveRecord::Base.connection.execute query
+        pg_result.map{ |row| row['column_name'] }
+      end
+    end
+    def get_columns
+      columns_list = @options[:columns] || []
+      if @options[:format] != :binary && @options[:header]
+        #if header is present, we need to strip it from io, whether we use it for the columns list or not.
+        line = @source.gets
+          if columns_list.empty?
+            columns_list = line.strip.split(@options[:delimiter])
+          end
+      end
+      columns_list = columns_list.map{|c| @options[:map][c.to_s] } if @options[:map]
+      return columns_list
+    end
+    def columns_string_for_copy
+      str = get_columns_string
+      str.empty? ? str : "(#{str})"
+    end
+    def columns_string_for_select
+      columns = @columns_list.clone
+      columns << "created_at" if column_names.include?("created_at")
+      columns << "updated_at" if column_names.include?("updated_at")
+      str = get_columns_string(columns)
+    end
+    def columns_string_for_insert
+      columns = @columns_list.clone
+      columns << "created_at" if column_names.include?("created_at")
+      columns << "updated_at" if column_names.include?("updated_at")
+      str = get_columns_string(columns)
+    end
+    def select_string_for_insert
+      columns = @columns_list.clone
+      str = get_columns_string(columns)
+      str << ",'#{DateTime.now.utc}'" if column_names.include?("created_at")
+      str << ",'#{DateTime.now.utc}'" if column_names.include?("updated_at")
+      str
+    end
+    def select_string_for_create
+      columns = @columns_list.map(&:to_sym)
+      columns << @options[:key_column].to_sym unless columns.include?(@options[:key_column].to_sym)
+      get_columns_string(columns)
+    end
+    def get_columns_string(columns = nil)
+      columns ||= @columns_list
+      columns.size > 0 ? "\"#{columns.join('","')}\"" : ""
+    end
+    def quoted_table_name
+      @quoted_table_name ||= ActiveRecord::Base.connection.quote_table_name(@table_name)
+    end
+    def generate_temp_table_name
+      @temp_table_name = "#{@table_name}_temp_#{rand(1000)}"
+    end
+    def read_input_line
+      if @options[:format] == :binary
+        begin
+          return @source.readpartial(10240)
+        rescue EOFError
+        end
+      else
+        line = @source.gets
+        return line
+      end
+    end
+    def upsert_from_temp_table(connection)
+      update_from_temp_table(connection)
+      insert_from_temp_table(connection) unless @options[:update_only]
+    end
+    def update_from_temp_table(connection)
+      connection.execSQLUpdate <<-SQL
+        UPDATE #{quoted_table_name} AS d
+          #{update_set_clause}
+          FROM #{@temp_table_name} as t
+          WHERE t.#{@options[:key_column]} = d.#{@options[:key_column]}
+          AND d.#{@options[:key_column]} IS NOT NULL;
+      SQL
+    end
+    def update_set_clause
+      command = @columns_list.map do |col|
+        "\"#{col}\" = t.\"#{col}\""
+      end
+      command << "\"updated_at\" = '#{DateTime.now.utc}'" if column_names.include?("updated_at")
+      "SET #{command.join(',')}"
+    end
+    def insert_from_temp_table(connection)
+      columns_string = columns_string_for_insert
+      select_string = select_string_for_insert
+      connection.execSQLUpdate <<-SQL
+        INSERT INTO #{quoted_table_name} (#{columns_string})
+          SELECT #{select_string}
+          FROM #{@temp_table_name} as t
+          WHERE NOT EXISTS
+            (SELECT 1
+                  FROM #{quoted_table_name} as d
+                  WHERE d.#{@options[:key_column]} = t.#{@options[:key_column]})
+          AND t.#{@options[:key_column]} IS NOT NULL;
+      SQL
+    end
+    def create_temp_table(connection)
+      columns_string = select_string_for_create
+      connection.execSQLUpdate <<-SQL
+        SET client_min_messages=WARNING;
+        DROP TABLE IF EXISTS #{@temp_table_name};
+        CREATE TEMP TABLE #{@temp_table_name}
+          AS SELECT #{columns_string} FROM #{quoted_table_name} WHERE 0 = 1;
+      SQL
+    end
+    def drop_temp_table(connection)
+      connection.execSQLUpdate <<-SQL
+        DROP TABLE #{@temp_table_name}
+      SQL
+    end
+  end
+end

data/lib/postgres_upsert.rb ADDED Viewed

@@ -0,0 +1,14 @@
+require 'rubygems'
+require 'active_record'
+require 'postgres_upsert/active_record'
+require 'postgres_upsert/writer'
+require 'rails'
+class PostgresCopy < Rails::Railtie
+  initializer 'postgres_upsert' do
+    ActiveSupport.on_load :active_record do
+      require "postgres_upsert/active_record"
+    end
+  end
+end

data/postgres_upsert.gemspec ADDED Viewed

@@ -0,0 +1,34 @@
+# -*- encoding: utf-8 -*-
+lib = File.expand_path('../lib/', __FILE__)
+$:.unshift lib unless $:.include?(lib)
+Gem::Specification.new do |s|
+  s.name = "postgres_upsert"
+  s.version = "3.1.0"
+  s.platform    = 'java'
+  s.authors = ["Steve Mitchell"]
+  s.date = "2014-09-12"
+  s.description = "Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables"
+  s.email = "thestevemitchell@gmail.com"
+  git_files            = `git ls-files`.split("\n") rescue ''
+  s.files              = git_files
+  s.test_files         = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables        = []
+  s.require_paths      = %w(lib)
+  s.homepage = "https://github.com/theSteveMitchell/postgres_upsert"
+  s.require_paths = ["lib"]
+  s.summary = "A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently"
+  s.license = "MIT"
+  s.add_dependency "activerecord-jdbcpostgresql-adapter"
+  s.add_dependency "sequel"
+  s.add_dependency "activerecord", '>= 3.0.0'
+  s.add_dependency "rails", '>= 3.0.0'
+  s.add_development_dependency "bundler"
+  s.add_development_dependency "rdoc"
+  s.add_development_dependency "pry-rails"
+  s.add_development_dependency "rspec", "~> 2.12"
+end

data/spec/fixtures/2_col_binary_data.dat ADDED Viewed

Binary file

data/spec/fixtures/comma_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,test data 1

data/spec/fixtures/comma_with_header_and_comma_values.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,"test, the data 1"

data/spec/fixtures/comma_with_header_and_unquoted_comma.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id,data
2	+ 1,test, the data 1

data/spec/fixtures/comma_without_header.csv ADDED Viewed

	@@ -0,0 +1 @@
1	+ 1,test data 1

data/spec/fixtures/no_id.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ data,extra
2	+ old stuff,ABC: Always Be Changing.

data/spec/fixtures/reserved_word_model.rb ADDED Viewed

@@ -0,0 +1,5 @@
+require 'postgres_upsert'
+class ReservedWordModel < ActiveRecord::Base
+end

data/spec/fixtures/reserved_words.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id select group
2	+ 1 test select group name

data/spec/fixtures/semicolon_with_different_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ cod;info
2	+ 1;test data 1

data/spec/fixtures/semicolon_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id;data
2	+ 1;test data 1

data/spec/fixtures/tab_only_data.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id data
2	+ 1 test data 1

data/spec/fixtures/tab_with_different_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ cod info
2	+ 1 test data 1

data/spec/fixtures/tab_with_error.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ data id
2	+ this is a wrong separator;1

data/spec/fixtures/tab_with_extra_line.csv ADDED Viewed

@@ -0,0 +1,3 @@
+id	data
+1	test data 1

data/spec/fixtures/tab_with_header.csv ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ id data
2	+ 1 test data 1

data/spec/fixtures/tab_with_two_lines.csv ADDED Viewed

@@ -0,0 +1,3 @@
+id	data
+1	test data 1
+2	test data 2

data/spec/fixtures/test_model.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'postgres_upsert'
+class TestModel < ActiveRecord::Base
+end

data/spec/fixtures/three_column.rb ADDED Viewed

@@ -0,0 +1,4 @@
+require 'postgres_upsert'
+class ThreeColumn < ActiveRecord::Base
+end

data/spec/pg_upsert_binary_spec.rb ADDED Viewed

@@ -0,0 +1,35 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe "pg_upsert from file with binary data" do
+  before(:each) do
+    ActiveRecord::Base.connection.execute %{
+      TRUNCATE TABLE test_models;
+      SELECT setval('test_models_id_seq', 1, false);
+    }
+  end
+  before do
+    DateTime.stub(:now).and_return (DateTime.parse("2012-01-01").utc)
+  end
+  def timestamp
+    DateTime.now.utc.to_s
+  end
+  it "imports from file if path is passed without field_map" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary, columns: [:id, :data]
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'text', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "throws an error when importing binary file without columns list" do
+    # Since binary data never has a header row, we'll require explicit columns list
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
+    }.to raise_error "Either the :columns option or :header => true are required"
+  end
+end

data/spec/pg_upsert_csv_spec.rb ADDED Viewed

@@ -0,0 +1,206 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe "pg_upsert from file with CSV format" do
+  before(:each) do
+    ActiveRecord::Base.connection.execute %{
+      TRUNCATE TABLE test_models;
+      TRUNCATE TABLE three_columns;
+      SELECT setval('test_models_id_seq', 1, false);
+    }
+  end
+  before do
+    DateTime.stub_chain(:now, :utc).and_return (DateTime.parse("2012-01-01").utc)
+  end
+  def timestamp
+    DateTime.now.utc
+  end
+  it "should import from file if path is passed without field_map" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "correctly handles delimiters in content" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_comma_values.csv')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test, the data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "throws error if csv is malformed" do
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header_and_unquoted_comma.csv')
+    }.to raise_error
+  end
+  it "throws error if the csv has mixed delimiters" do
+    expect{
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_error.csv'), :delimiter => "\t"
+    }.to raise_error
+  end
+  it "should import from IO without field_map" do
+    TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/comma_with_header.csv'), 'r')
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import with custom delimiter from path" do
+    TestModel.pg_upsert File.expand_path('spec/fixtures/semicolon_with_header.csv'), :delimiter => ';'
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should import with custom delimiter from IO" do
+    TestModel.pg_upsert File.open(File.expand_path('spec/fixtures/semicolon_with_header.csv'), 'r'), :delimiter => ';'
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should not expect a header when :header is false" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/comma_without_header.csv'), 'r'), :header => false, :columns => [:id,:data])
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should be able to map the header in the file to diferent column names" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_different_header.csv'), 'r'), :delimiter => "\t", :map => {'cod' => 'id', 'info' => 'data'})
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should be able to map the header in the file to diferent column names with custom delimiter" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/semicolon_with_different_header.csv'), 'r'), :delimiter => ';', :map => {'cod' => 'id', 'info' => 'data'})
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should ignore empty lines" do
+    TestModel.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_with_extra_line.csv'), 'r'), :delimiter => "\t")
+    expect(
+      TestModel.first.attributes
+    ).to include('data' => 'test data 1', 'created_at' => timestamp, 'updated_at' => timestamp)
+  end
+  it "should not create timestamps when the model does not include them" do
+    ReservedWordModel.pg_upsert File.expand_path('spec/fixtures/reserved_words.csv'), :delimiter => "\t"
+    expect(
+      ReservedWordModel.first.attributes
+    ).to eq("group"=>"group name", "id"=>1, "select"=>"test select")
+  end
+  context "upserting data to handle inserts and creates" do
+    let(:original_created_at) {5.days.ago.utc}
+    before(:each) do
+      TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
+    end
+    it "should not violate primary key constraint" do
+      expect{
+        TestModel.pg_upsert File.expand_path('spec/fixtures/comma_with_header.csv')
+      }.to_not raise_error
+    end
+    it "should upsert (update existing records and insert new records)" do
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      expect(
+        TestModel.find(1).attributes
+      ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
+      expect(
+        TestModel.find(2).attributes
+      ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
+    end
+    it "should require columns option if no header" do
+      expect{
+        TestModel.pg_upsert File.expand_path('spec/fixtures/2_col_binary_data.dat'), :format => :binary
+      }.to raise_error("Either the :columns option or :header => true are required")
+    end
+    it "should clean up the temp table after completion" do
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      ActiveRecord::Base.connection.tables.should_not include("test_models_temp")
+    end
+    it "should gracefully drop the temp table if it already exists" do
+      ActiveRecord::Base.connection.execute "CREATE TEMP TABLE test_models_temp (LIKE test_models);"
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t"
+      expect(
+        TestModel.find(1).attributes
+      ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at, "updated_at" => timestamp)
+      expect(
+        TestModel.find(2).attributes
+      ).to eq("id"=>2, "data"=>"test data 2", "created_at" => timestamp, "updated_at" => timestamp)
+    end
+    it "should be able to copy using custom set of columns" do
+      ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!", created_at: original_created_at)
+      ThreeColumn.pg_upsert(File.open(File.expand_path('spec/fixtures/tab_only_data.csv'), 'r'), :delimiter => "\t", :columns => ["id", "data"])
+      expect(
+        ThreeColumn.first.attributes
+      ).to eq('id' => 1, 'data' => 'test data 1', 'extra' => "neva change!", 'created_at' => original_created_at, 'updated_at' => timestamp)
+    end
+  end
+  context 'overriding the comparison column' do
+    it 'updates records based the match column option if its passed in' do
+      three_col = ThreeColumn.create(id: 1, data: "old stuff", extra: "neva change!")
+      file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
+      ThreeColumn.pg_upsert(file, :key_column => "data")
+      expect(
+        three_col.reload.extra
+      ).to eq("ABC: Always Be Changing.")
+    end
+    it 'inserts records if the passed match column doesnt exist' do
+      file = File.open(File.expand_path('spec/fixtures/no_id.csv'), 'r')
+      ThreeColumn.pg_upsert(file, :key_column => "data")
+      expect(
+        ThreeColumn.last.attributes
+      ).to include("id" => 1, "data" => "old stuff", "extra" => "ABC: Always Be Changing.")
+    end
+  end
+  context 'update only' do
+    let(:original_created_at) {5.days.ago.utc}
+    before(:each) do
+      TestModel.create(id: 1, data: "From the before time, in the long long ago", :created_at => original_created_at)
+    end
+    it 'will only update and not insert if insert_only flag is passed.' do
+      TestModel.pg_upsert File.expand_path('spec/fixtures/tab_with_two_lines.csv'), :delimiter => "\t", :update_only => true
+      expect(
+        TestModel.find(1).attributes
+      ).to eq("id"=>1, "data"=>"test data 1", "created_at" => original_created_at  , "updated_at" => timestamp)
+      expect{
+        TestModel.find(2)
+      }.to raise_error(ActiveRecord::RecordNotFound)
+    end
+  end
+end

data/spec/spec.opts ADDED Viewed

	@@ -0,0 +1 @@
1	+ --color

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,43 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'fixtures/test_model'
+require 'fixtures/three_column'
+require 'fixtures/reserved_word_model'
+require 'rspec'
+require 'rspec/autorun'
+RSpec.configure do |config|
+  config.before(:suite) do
+    # we create a test database if it does not exist
+    # I do not use database users or password for the tests, using ident authentication instead
+    begin
+      ActiveRecord::Base.establish_connection(
+        :adapter  => "postgresql",
+        :host     => "localhost",
+        :port     => 5432,
+        :database => "ar_pg_copy_test"
+      )
+      ActiveRecord::Base.connection.execute %{
+        SET client_min_messages TO warning;
+        DROP TABLE IF EXISTS test_models;
+        DROP TABLE IF EXISTS three_columns;
+        DROP TABLE IF EXISTS reserved_word_models;
+        CREATE TABLE test_models (id serial PRIMARY KEY, data text, created_at timestamp with time zone, updated_at timestamp with time zone );
+        CREATE TABLE three_columns (id serial PRIMARY KEY, data text, extra text, created_at timestamp with time zone, updated_at timestamp with time zone );
+        CREATE TABLE reserved_word_models (id serial PRIMARY KEY, "select" text, "group" text);
+      }
+    rescue Exception => e
+      puts "Exception: #{e}"
+      ActiveRecord::Base.establish_connection(
+        :adapter  => "postgresql",
+        :host     => "localhost",
+        :port     => 5432,
+        :database => "postgres"
+      )
+      ActiveRecord::Base.connection.execute "DROP DATABASE IF EXISTS ar_pg_copy_test"
+      ActiveRecord::Base.connection.execute "CREATE DATABASE ar_pg_copy_test;"
+      retry
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,210 @@
+--- !ruby/object:Gem::Specification
+name: postgres_upsert
+version: !ruby/object:Gem::Version
+  version: 3.1.0
+platform: java
+authors:
+- Steve Mitchell
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-09-12 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: activerecord-jdbcpostgresql-adapter
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  prerelease: false
+  type: :runtime
+- !ruby/object:Gem::Dependency
+  name: sequel
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  prerelease: false
+  type: :runtime
+- !ruby/object:Gem::Dependency
+  name: activerecord
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  prerelease: false
+  type: :runtime
+- !ruby/object:Gem::Dependency
+  name: rails
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: 3.0.0
+  prerelease: false
+  type: :runtime
+- !ruby/object:Gem::Dependency
+  name: bundler
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  prerelease: false
+  type: :development
+- !ruby/object:Gem::Dependency
+  name: rdoc
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  prerelease: false
+  type: :development
+- !ruby/object:Gem::Dependency
+  name: pry-rails
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  prerelease: false
+  type: :development
+- !ruby/object:Gem::Dependency
+  name: rspec
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '2.12'
+  prerelease: false
+  type: :development
+description: Uses Postgres's powerful COPY command to upsert large sets of data into ActiveRecord tables
+email: thestevemitchell@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- Gemfile.lock
+- LICENSE
+- README.md
+- Rakefile
+- VERSION
+- lib/postgres_upsert.rb
+- lib/postgres_upsert/active_record.rb
+- lib/postgres_upsert/writer.rb
+- postgres_upsert.gemspec
+- spec/fixtures/2_col_binary_data.dat
+- spec/fixtures/comma_with_header.csv
+- spec/fixtures/comma_with_header_and_comma_values.csv
+- spec/fixtures/comma_with_header_and_unquoted_comma.csv
+- spec/fixtures/comma_without_header.csv
+- spec/fixtures/no_id.csv
+- spec/fixtures/reserved_word_model.rb
+- spec/fixtures/reserved_words.csv
+- spec/fixtures/semicolon_with_different_header.csv
+- spec/fixtures/semicolon_with_header.csv
+- spec/fixtures/tab_only_data.csv
+- spec/fixtures/tab_with_different_header.csv
+- spec/fixtures/tab_with_error.csv
+- spec/fixtures/tab_with_extra_line.csv
+- spec/fixtures/tab_with_header.csv
+- spec/fixtures/tab_with_two_lines.csv
+- spec/fixtures/test_model.rb
+- spec/fixtures/three_column.rb
+- spec/pg_upsert_binary_spec.rb
+- spec/pg_upsert_csv_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb
+homepage: https://github.com/theSteveMitchell/postgres_upsert
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.1.9
+signing_key:
+specification_version: 4
+summary: A rubygem that integrates with ActiveRecord to insert/update large data sets into the database efficiently
+test_files:
+- spec/fixtures/2_col_binary_data.dat
+- spec/fixtures/comma_with_header.csv
+- spec/fixtures/comma_with_header_and_comma_values.csv
+- spec/fixtures/comma_with_header_and_unquoted_comma.csv
+- spec/fixtures/comma_without_header.csv
+- spec/fixtures/no_id.csv
+- spec/fixtures/reserved_word_model.rb
+- spec/fixtures/reserved_words.csv
+- spec/fixtures/semicolon_with_different_header.csv
+- spec/fixtures/semicolon_with_header.csv
+- spec/fixtures/tab_only_data.csv
+- spec/fixtures/tab_with_different_header.csv
+- spec/fixtures/tab_with_error.csv
+- spec/fixtures/tab_with_extra_line.csv
+- spec/fixtures/tab_with_header.csv
+- spec/fixtures/tab_with_two_lines.csv
+- spec/fixtures/test_model.rb
+- spec/fixtures/three_column.rb
+- spec/pg_upsert_binary_spec.rb
+- spec/pg_upsert_csv_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb