pg_data_encoder 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -14,10 +14,9 @@ With it you can make a bulk insert like this
14
14
 
15
15
  Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
16
16
 
17
- With a fairly complicated table that includes an index and an hstore + index.
18
- I can get 1600 inserts/sec on my overworked macbook pro.
17
+ ## Try it out yourself, in the examples folder there is a simple test
19
18
 
20
- Your usage may vary
19
+ on my i3 box with an ssd drive I can get 270,000 inserts a second with an hstore and indexes
21
20
 
22
21
  NOTE: Only a few of the many data types are supported. check below for more details
23
22
 
@@ -62,12 +61,10 @@ or
62
61
 
63
62
 
64
63
 
65
-
66
64
  ## Added type support
67
65
 
68
- Currently it supports Integers, Strings, Hstore.
66
+ Currently it supports Integers, Strings, Hstore, Floats (double precision), Timestamp.
69
67
 
70
- Help would be appreciated for DateTime, Float, Double, ...
71
68
  ## Contributing
72
69
 
73
70
 
@@ -0,0 +1,47 @@
1
+ require 'active_record'
2
+ require 'postgres-copy'
3
+ require 'pg_data_encoder'
4
+ require 'benchmark'
5
+ # Create a test db before running
6
+ # add any needed username, password, port
7
+ # install the required gems
8
+ #
9
+ # gem install postgres-copy pg_data_encoder activerecord --no-ri --no-rdoc
10
+
11
+
12
+ ActiveSupport.on_load :active_record do
13
+ require "postgres-copy/active_record"
14
+ end
15
+ ActiveRecord::Base.establish_connection(
16
+ :adapter => "postgresql",
17
+ :host => "localhost",
18
+ :database => "test"
19
+ )
20
+ ActiveRecord::Base.connection.execute %{
21
+ SET client_min_messages TO warning;
22
+ DROP TABLE IF EXISTS test_models;
23
+ CREATE TABLE test_models (id serial PRIMARY KEY, data VARCHAR);
24
+ }
25
+
26
+ class TestModel < ActiveRecord::Base
27
+ end
28
+
29
+ encoder = PgDataEncoder::EncodeForCopy.new
30
+
31
+ puts "Loading data to disk"
32
+ puts Benchmark.measure {
33
+ 0.upto(1_000_000).each {|i|
34
+ encoder.add ["test data"]
35
+ }
36
+ }
37
+ puts "inserting into db"
38
+ puts Benchmark.measure {
39
+ TestModel.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:data])
40
+ }
41
+
42
+ encoder.remove
43
+ # Results on my i5 with ssd backed postgres server
44
+ # 11.7 seconds to generate data file. 3.7 seconds to insert 1,000,000 simple items into a table.
45
+ #
46
+ # 11.670000 0.010000 11.680000 ( 11.733414)
47
+ # 0.030000 0.000000 0.030000 ( 3.782371)
@@ -1,6 +1,7 @@
1
1
  require 'tempfile'
2
2
  require 'stringio'
3
3
  module PgDataEncoder
4
+ POSTGRES_EPOCH_DATE = (Time.utc(2000,1,1).to_f * 1_000_000).to_i
4
5
  class EncodeForCopy
5
6
  def initialize(options = {})
6
7
  @options = options
@@ -10,7 +11,6 @@ module PgDataEncoder
10
11
 
11
12
  def add(row)
12
13
  setup_io if !@io
13
-
14
14
  @io.write([row.size].pack("n"))
15
15
  row.each {|col|
16
16
  encode_field(@io, col)
@@ -56,6 +56,10 @@ module PgDataEncoder
56
56
  buf = [field].pack("N")
57
57
  io.write([buf.bytesize].pack("N"))
58
58
  io.write(buf)
59
+ when Float
60
+ buf = [field].pack("G")
61
+ io.write([buf.bytesize].pack("N"))
62
+ io.write(buf)
59
63
  when nil
60
64
  io.write([-1].pack("N"))
61
65
  when String
@@ -75,6 +79,10 @@ module PgDataEncoder
75
79
  }
76
80
  io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
77
81
  io.write(hash_io.string)
82
+ when Time
83
+ buf = [(field.to_f * 1_000_000 - POSTGRES_EPOCH_DATE).to_i].pack("L!>")
84
+ io.write([buf.bytesize].pack("N"))
85
+ io.write(buf)
78
86
  else
79
87
  raise Exception.new("Unsupported Format: #{field.class.name}")
80
88
  end
@@ -1,3 +1,3 @@
1
1
  module PgDataEncoder
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
Binary file
Binary file
Binary file
@@ -27,4 +27,57 @@ describe "generating data" do
27
27
  str.should == existing_data
28
28
  end
29
29
 
30
+
31
+ it 'should encode timestamp data correctly' do
32
+ encoder = PgDataEncoder::EncodeForCopy.new
33
+ encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
34
+ encoder.close
35
+ io = encoder.get_io
36
+ existing_data = filedata("timestamp.dat")
37
+ str = io.read
38
+ io.class.name.should == "StringIO"
39
+ str.force_encoding("ASCII-8BIT")
40
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
41
+ str.should == existing_data
42
+ end
43
+
44
+ it 'should encode float correctly from tempfile' do
45
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
46
+ encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
47
+ encoder.close
48
+ io = encoder.get_io
49
+ existing_data = filedata("timestamp.dat")
50
+ str = io.read
51
+ io.class.name.should == "Tempfile"
52
+ str.force_encoding("ASCII-8BIT")
53
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
54
+ str.should == existing_data
55
+ end
56
+
57
+ it 'should encode float data correctly' do
58
+ encoder = PgDataEncoder::EncodeForCopy.new
59
+ encoder.add [1234567.1234567]
60
+ encoder.close
61
+ io = encoder.get_io
62
+ existing_data = filedata("float.dat")
63
+ str = io.read
64
+ io.class.name.should == "StringIO"
65
+ str.force_encoding("ASCII-8BIT")
66
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
67
+ str.should == existing_data
68
+ end
69
+
70
+ it 'should encode float correctly from tempfile' do
71
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
72
+ encoder.add [1234567.1234567]
73
+ encoder.close
74
+ io = encoder.get_io
75
+ existing_data = filedata("float.dat")
76
+ str = io.read
77
+ io.class.name.should == "Tempfile"
78
+ str.force_encoding("ASCII-8BIT")
79
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
80
+ str.should == existing_data
81
+ end
82
+
30
83
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_data_encoder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-12 00:00:00.000000000 Z
12
+ date: 2013-09-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -56,13 +56,16 @@ files:
56
56
  - LICENSE.txt
57
57
  - README.md
58
58
  - Rakefile
59
+ - examples/fast_load.rb
59
60
  - lib/pg_data_encoder.rb
60
61
  - lib/pg_data_encoder/encode_for_copy.rb
61
62
  - lib/pg_data_encoder/version.rb
62
63
  - pg_data_encoder.gemspec
63
64
  - spec/fixtures/3_col_hstore.dat
64
65
  - spec/fixtures/3_col_hstore.txt
66
+ - spec/fixtures/float.dat
65
67
  - spec/fixtures/output.dat
68
+ - spec/fixtures/timestamp.dat
66
69
  - spec/spec_helper.rb
67
70
  - spec/verify_data_formats_spec.rb
68
71
  homepage: https://github.com/pbrumm/pg_data_encoder
@@ -85,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
88
  version: '0'
86
89
  requirements: []
87
90
  rubyforge_project:
88
- rubygems_version: 1.8.24
91
+ rubygems_version: 1.8.23
89
92
  signing_key:
90
93
  specification_version: 3
91
94
  summary: for faster input of data into postgres you can use this to generate the binary
@@ -93,6 +96,8 @@ summary: for faster input of data into postgres you can use this to generate the
93
96
  test_files:
94
97
  - spec/fixtures/3_col_hstore.dat
95
98
  - spec/fixtures/3_col_hstore.txt
99
+ - spec/fixtures/float.dat
96
100
  - spec/fixtures/output.dat
101
+ - spec/fixtures/timestamp.dat
97
102
  - spec/spec_helper.rb
98
103
  - spec/verify_data_formats_spec.rb