pg_data_encoder 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -6
- data/examples/fast_load.rb +47 -0
- data/lib/pg_data_encoder/encode_for_copy.rb +9 -1
- data/lib/pg_data_encoder/version.rb +1 -1
- data/spec/fixtures/float.dat +0 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/fixtures/timestamp.dat +0 -0
- data/spec/verify_data_formats_spec.rb +53 -0
- metadata +8 -3
    
        data/README.md
    CHANGED
    
    | @@ -14,10 +14,9 @@ With it you can make a bulk insert like this | |
| 14 14 |  | 
| 15 15 | 
             
                Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
         | 
| 16 16 |  | 
| 17 | 
            -
             | 
| 18 | 
            -
            I can get 1600 inserts/sec on my overworked macbook pro.
         | 
| 17 | 
            +
            ## Try it out yourself,   in the examples folder there is a simple test
         | 
| 19 18 |  | 
| 20 | 
            -
             | 
| 19 | 
            +
            on my i3 box with an ssd drive I can get 270,000 inserts a second with an hstore and indexes
         | 
| 21 20 |  | 
| 22 21 | 
             
            NOTE: Only a few of the many data types are supported.  check below for more details
         | 
| 23 22 |  | 
| @@ -62,12 +61,10 @@ or | |
| 62 61 |  | 
| 63 62 |  | 
| 64 63 |  | 
| 65 | 
            -
             | 
| 66 64 | 
             
            ## Added type support
         | 
| 67 65 |  | 
| 68 | 
            -
              Currently it supports Integers, Strings, Hstore.
         | 
| 66 | 
            +
              Currently it supports Integers, Strings, Hstore, Floats (double precision), Timestamp.
         | 
| 69 67 |  | 
| 70 | 
            -
              Help would be appreciated for DateTime, Float, Double, ...
         | 
| 71 68 | 
             
            ## Contributing
         | 
| 72 69 |  | 
| 73 70 |  | 
| @@ -0,0 +1,47 @@ | |
| 1 | 
            +
            require 'active_record'
         | 
| 2 | 
            +
            require 'postgres-copy'
         | 
| 3 | 
            +
            require 'pg_data_encoder'
         | 
| 4 | 
            +
            require 'benchmark'
         | 
| 5 | 
            +
            # Create a test db before running
         | 
| 6 | 
            +
            # add any needed username, password, port
         | 
| 7 | 
            +
            # install the required gems
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # gem install postgres-copy pg_data_encoder activerecord --no-ri --no-rdoc
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            ActiveSupport.on_load :active_record do
         | 
| 13 | 
            +
              require "postgres-copy/active_record"
         | 
| 14 | 
            +
            end
         | 
| 15 | 
            +
            ActiveRecord::Base.establish_connection(
         | 
| 16 | 
            +
                    :adapter  => "postgresql",
         | 
| 17 | 
            +
                    :host     => "localhost",
         | 
| 18 | 
            +
                    :database => "test"
         | 
| 19 | 
            +
            )
         | 
| 20 | 
            +
            ActiveRecord::Base.connection.execute %{
         | 
| 21 | 
            +
              SET client_min_messages TO warning;
         | 
| 22 | 
            +
              DROP TABLE IF EXISTS test_models;
         | 
| 23 | 
            +
              CREATE TABLE test_models (id serial PRIMARY KEY, data VARCHAR);
         | 
| 24 | 
            +
            }
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            class TestModel < ActiveRecord::Base
         | 
| 27 | 
            +
            end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            encoder = PgDataEncoder::EncodeForCopy.new
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            puts "Loading data to disk"
         | 
| 32 | 
            +
            puts Benchmark.measure {
         | 
| 33 | 
            +
              0.upto(1_000_000).each {|i|
         | 
| 34 | 
            +
                encoder.add ["test data"]
         | 
| 35 | 
            +
              }
         | 
| 36 | 
            +
            }
         | 
| 37 | 
            +
            puts "inserting into db"
         | 
| 38 | 
            +
            puts Benchmark.measure {
         | 
| 39 | 
            +
              TestModel.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:data])
         | 
| 40 | 
            +
            }
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            encoder.remove
         | 
| 43 | 
            +
            # Results on my i5 with ssd backed postgres server
         | 
| 44 | 
            +
            # 11.7 seconds to generate data file.   3.7 seconds to insert 1,000,000 simple items into a table.
         | 
| 45 | 
            +
            #
         | 
| 46 | 
            +
            # 11.670000   0.010000  11.680000 ( 11.733414)
         | 
| 47 | 
            +
            #  0.030000   0.000000   0.030000 (  3.782371)
         | 
| @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            require 'tempfile'
         | 
| 2 2 | 
             
            require 'stringio'
         | 
| 3 3 | 
             
            module PgDataEncoder
         | 
| 4 | 
            +
              POSTGRES_EPOCH_DATE = (Time.utc(2000,1,1).to_f * 1_000_000).to_i
         | 
| 4 5 | 
             
              class EncodeForCopy
         | 
| 5 6 | 
             
                def initialize(options = {})
         | 
| 6 7 | 
             
                  @options = options
         | 
| @@ -10,7 +11,6 @@ module PgDataEncoder | |
| 10 11 |  | 
| 11 12 | 
             
                def add(row)
         | 
| 12 13 | 
             
                  setup_io if !@io
         | 
| 13 | 
            -
             | 
| 14 14 | 
             
                  @io.write([row.size].pack("n"))
         | 
| 15 15 | 
             
                  row.each {|col|
         | 
| 16 16 | 
             
                    encode_field(@io, col)
         | 
| @@ -56,6 +56,10 @@ module PgDataEncoder | |
| 56 56 | 
             
                    buf = [field].pack("N")
         | 
| 57 57 | 
             
                    io.write([buf.bytesize].pack("N"))
         | 
| 58 58 | 
             
                    io.write(buf)
         | 
| 59 | 
            +
                  when Float
         | 
| 60 | 
            +
                    buf = [field].pack("G")
         | 
| 61 | 
            +
                    io.write([buf.bytesize].pack("N"))
         | 
| 62 | 
            +
                    io.write(buf)
         | 
| 59 63 | 
             
                  when nil
         | 
| 60 64 | 
             
                    io.write([-1].pack("N"))
         | 
| 61 65 | 
             
                  when String
         | 
| @@ -75,6 +79,10 @@ module PgDataEncoder | |
| 75 79 | 
             
                    }
         | 
| 76 80 | 
             
                    io.write([hash_io.pos].pack("N"))  # assumed identifier for hstore column
         | 
| 77 81 | 
             
                    io.write(hash_io.string)
         | 
| 82 | 
            +
                  when Time
         | 
| 83 | 
            +
                    buf = [(field.to_f * 1_000_000 - POSTGRES_EPOCH_DATE).to_i].pack("L!>")
         | 
| 84 | 
            +
                    io.write([buf.bytesize].pack("N"))
         | 
| 85 | 
            +
                    io.write(buf)
         | 
| 78 86 | 
             
                  else
         | 
| 79 87 | 
             
                    raise Exception.new("Unsupported Format: #{field.class.name}")
         | 
| 80 88 | 
             
                  end
         | 
| Binary file | 
    
        data/spec/fixtures/output.dat
    CHANGED
    
    | Binary file | 
| Binary file | 
| @@ -27,4 +27,57 @@ describe "generating data" do | |
| 27 27 | 
             
                str.should == existing_data
         | 
| 28 28 | 
             
              end
         | 
| 29 29 |  | 
| 30 | 
            +
             | 
| 31 | 
            +
              it 'should encode timestamp data correctly' do
         | 
| 32 | 
            +
                encoder = PgDataEncoder::EncodeForCopy.new
         | 
| 33 | 
            +
                encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
         | 
| 34 | 
            +
                encoder.close
         | 
| 35 | 
            +
                io = encoder.get_io
         | 
| 36 | 
            +
                existing_data = filedata("timestamp.dat")
         | 
| 37 | 
            +
                str = io.read
         | 
| 38 | 
            +
                io.class.name.should == "StringIO"
         | 
| 39 | 
            +
                str.force_encoding("ASCII-8BIT")
         | 
| 40 | 
            +
                #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
         | 
| 41 | 
            +
                str.should == existing_data
         | 
| 42 | 
            +
              end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              it 'should encode float correctly from tempfile' do
         | 
| 45 | 
            +
                encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
         | 
| 46 | 
            +
                encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
         | 
| 47 | 
            +
                encoder.close
         | 
| 48 | 
            +
                io = encoder.get_io
         | 
| 49 | 
            +
                existing_data = filedata("timestamp.dat")
         | 
| 50 | 
            +
                str = io.read
         | 
| 51 | 
            +
                io.class.name.should == "Tempfile"
         | 
| 52 | 
            +
                str.force_encoding("ASCII-8BIT")
         | 
| 53 | 
            +
                #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
         | 
| 54 | 
            +
                str.should == existing_data
         | 
| 55 | 
            +
              end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              it 'should encode float data correctly' do
         | 
| 58 | 
            +
                encoder = PgDataEncoder::EncodeForCopy.new
         | 
| 59 | 
            +
                encoder.add [1234567.1234567]
         | 
| 60 | 
            +
                encoder.close
         | 
| 61 | 
            +
                io = encoder.get_io
         | 
| 62 | 
            +
                existing_data = filedata("float.dat")
         | 
| 63 | 
            +
                str = io.read
         | 
| 64 | 
            +
                io.class.name.should == "StringIO"
         | 
| 65 | 
            +
                str.force_encoding("ASCII-8BIT")
         | 
| 66 | 
            +
                #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
         | 
| 67 | 
            +
                str.should == existing_data
         | 
| 68 | 
            +
              end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
              it 'should encode float correctly from tempfile' do
         | 
| 71 | 
            +
                encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
         | 
| 72 | 
            +
                encoder.add [1234567.1234567]
         | 
| 73 | 
            +
                encoder.close
         | 
| 74 | 
            +
                io = encoder.get_io
         | 
| 75 | 
            +
                existing_data = filedata("float.dat")
         | 
| 76 | 
            +
                str = io.read
         | 
| 77 | 
            +
                io.class.name.should == "Tempfile"
         | 
| 78 | 
            +
                str.force_encoding("ASCII-8BIT")
         | 
| 79 | 
            +
                #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
         | 
| 80 | 
            +
                str.should == existing_data
         | 
| 81 | 
            +
              end
         | 
| 82 | 
            +
             | 
| 30 83 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: pg_data_encoder
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.2
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date:  | 
| 12 | 
            +
            date: 2013-09-09 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rspec
         | 
| @@ -56,13 +56,16 @@ files: | |
| 56 56 | 
             
            - LICENSE.txt
         | 
| 57 57 | 
             
            - README.md
         | 
| 58 58 | 
             
            - Rakefile
         | 
| 59 | 
            +
            - examples/fast_load.rb
         | 
| 59 60 | 
             
            - lib/pg_data_encoder.rb
         | 
| 60 61 | 
             
            - lib/pg_data_encoder/encode_for_copy.rb
         | 
| 61 62 | 
             
            - lib/pg_data_encoder/version.rb
         | 
| 62 63 | 
             
            - pg_data_encoder.gemspec
         | 
| 63 64 | 
             
            - spec/fixtures/3_col_hstore.dat
         | 
| 64 65 | 
             
            - spec/fixtures/3_col_hstore.txt
         | 
| 66 | 
            +
            - spec/fixtures/float.dat
         | 
| 65 67 | 
             
            - spec/fixtures/output.dat
         | 
| 68 | 
            +
            - spec/fixtures/timestamp.dat
         | 
| 66 69 | 
             
            - spec/spec_helper.rb
         | 
| 67 70 | 
             
            - spec/verify_data_formats_spec.rb
         | 
| 68 71 | 
             
            homepage: https://github.com/pbrumm/pg_data_encoder
         | 
| @@ -85,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 85 88 | 
             
                  version: '0'
         | 
| 86 89 | 
             
            requirements: []
         | 
| 87 90 | 
             
            rubyforge_project: 
         | 
| 88 | 
            -
            rubygems_version: 1.8. | 
| 91 | 
            +
            rubygems_version: 1.8.23
         | 
| 89 92 | 
             
            signing_key: 
         | 
| 90 93 | 
             
            specification_version: 3
         | 
| 91 94 | 
             
            summary: for faster input of data into postgres you can use this to generate the binary
         | 
| @@ -93,6 +96,8 @@ summary: for faster input of data into postgres you can use this to generate the | |
| 93 96 | 
             
            test_files:
         | 
| 94 97 | 
             
            - spec/fixtures/3_col_hstore.dat
         | 
| 95 98 | 
             
            - spec/fixtures/3_col_hstore.txt
         | 
| 99 | 
            +
            - spec/fixtures/float.dat
         | 
| 96 100 | 
             
            - spec/fixtures/output.dat
         | 
| 101 | 
            +
            - spec/fixtures/timestamp.dat
         | 
| 97 102 | 
             
            - spec/spec_helper.rb
         | 
| 98 103 | 
             
            - spec/verify_data_formats_spec.rb
         |