pg_data_encoder 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -14,10 +14,9 @@ With it you can make a bulk insert like this
14
14
 
15
15
  Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
16
16
 
17
- With a fairly complicated table that includes an index and an hstore + index.
18
- I can get 1600 inserts/sec on my overworked macbook pro.
17
+ ## Try it out yourself, in the examples folder there is a simple test
19
18
 
20
- Your usage may vary
19
+ on my i3 box with an ssd drive I can get 270,000 inserts a second with an hstore and indexes
21
20
 
22
21
  NOTE: Only a few of the many data types are supported. check below for more details
23
22
 
@@ -62,12 +61,10 @@ or
62
61
 
63
62
 
64
63
 
65
-
66
64
  ## Added type support
67
65
 
68
- Currently it supports Integers, Strings, Hstore.
66
+ Currently it supports Integers, Strings, Hstore, Floats (double precision), Timestamp.
69
67
 
70
- Help would be appreciated for DateTime, Float, Double, ...
71
68
  ## Contributing
72
69
 
73
70
 
@@ -0,0 +1,47 @@
1
+ require 'active_record'
2
+ require 'postgres-copy'
3
+ require 'pg_data_encoder'
4
+ require 'benchmark'
5
+ # Create a test db before running
6
+ # add any needed username, password, port
7
+ # install the required gems
8
+ #
9
+ # gem install postgres-copy pg_data_encoder activerecord --no-ri --no-rdoc
10
+
11
+
12
+ ActiveSupport.on_load :active_record do
13
+ require "postgres-copy/active_record"
14
+ end
15
+ ActiveRecord::Base.establish_connection(
16
+ :adapter => "postgresql",
17
+ :host => "localhost",
18
+ :database => "test"
19
+ )
20
+ ActiveRecord::Base.connection.execute %{
21
+ SET client_min_messages TO warning;
22
+ DROP TABLE IF EXISTS test_models;
23
+ CREATE TABLE test_models (id serial PRIMARY KEY, data VARCHAR);
24
+ }
25
+
26
+ class TestModel < ActiveRecord::Base
27
+ end
28
+
29
+ encoder = PgDataEncoder::EncodeForCopy.new
30
+
31
+ puts "Loading data to disk"
32
+ puts Benchmark.measure {
33
+ 0.upto(1_000_000).each {|i|
34
+ encoder.add ["test data"]
35
+ }
36
+ }
37
+ puts "inserting into db"
38
+ puts Benchmark.measure {
39
+ TestModel.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:data])
40
+ }
41
+
42
+ encoder.remove
43
+ # Results on my i5 with ssd backed postgres server
44
+ # 11.7 seconds to generate data file. 3.7 seconds to insert 1,000,000 simple items into a table.
45
+ #
46
+ # 11.670000 0.010000 11.680000 ( 11.733414)
47
+ # 0.030000 0.000000 0.030000 ( 3.782371)
@@ -1,6 +1,7 @@
1
1
  require 'tempfile'
2
2
  require 'stringio'
3
3
  module PgDataEncoder
4
+ POSTGRES_EPOCH_DATE = (Time.utc(2000,1,1).to_f * 1_000_000).to_i
4
5
  class EncodeForCopy
5
6
  def initialize(options = {})
6
7
  @options = options
@@ -10,7 +11,6 @@ module PgDataEncoder
10
11
 
11
12
  def add(row)
12
13
  setup_io if !@io
13
-
14
14
  @io.write([row.size].pack("n"))
15
15
  row.each {|col|
16
16
  encode_field(@io, col)
@@ -56,6 +56,10 @@ module PgDataEncoder
56
56
  buf = [field].pack("N")
57
57
  io.write([buf.bytesize].pack("N"))
58
58
  io.write(buf)
59
+ when Float
60
+ buf = [field].pack("G")
61
+ io.write([buf.bytesize].pack("N"))
62
+ io.write(buf)
59
63
  when nil
60
64
  io.write([-1].pack("N"))
61
65
  when String
@@ -75,6 +79,10 @@ module PgDataEncoder
75
79
  }
76
80
  io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
77
81
  io.write(hash_io.string)
82
+ when Time
83
+ buf = [(field.to_f * 1_000_000 - POSTGRES_EPOCH_DATE).to_i].pack("L!>")
84
+ io.write([buf.bytesize].pack("N"))
85
+ io.write(buf)
78
86
  else
79
87
  raise Exception.new("Unsupported Format: #{field.class.name}")
80
88
  end
@@ -1,3 +1,3 @@
1
1
  module PgDataEncoder
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
Binary file
Binary file
Binary file
@@ -27,4 +27,57 @@ describe "generating data" do
27
27
  str.should == existing_data
28
28
  end
29
29
 
30
+
31
+ it 'should encode timestamp data correctly' do
32
+ encoder = PgDataEncoder::EncodeForCopy.new
33
+ encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
34
+ encoder.close
35
+ io = encoder.get_io
36
+ existing_data = filedata("timestamp.dat")
37
+ str = io.read
38
+ io.class.name.should == "StringIO"
39
+ str.force_encoding("ASCII-8BIT")
40
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
41
+ str.should == existing_data
42
+ end
43
+
44
+ it 'should encode float correctly from tempfile' do
45
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
46
+ encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
47
+ encoder.close
48
+ io = encoder.get_io
49
+ existing_data = filedata("timestamp.dat")
50
+ str = io.read
51
+ io.class.name.should == "Tempfile"
52
+ str.force_encoding("ASCII-8BIT")
53
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
54
+ str.should == existing_data
55
+ end
56
+
57
+ it 'should encode float data correctly' do
58
+ encoder = PgDataEncoder::EncodeForCopy.new
59
+ encoder.add [1234567.1234567]
60
+ encoder.close
61
+ io = encoder.get_io
62
+ existing_data = filedata("float.dat")
63
+ str = io.read
64
+ io.class.name.should == "StringIO"
65
+ str.force_encoding("ASCII-8BIT")
66
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
67
+ str.should == existing_data
68
+ end
69
+
70
+ it 'should encode float correctly from tempfile' do
71
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
72
+ encoder.add [1234567.1234567]
73
+ encoder.close
74
+ io = encoder.get_io
75
+ existing_data = filedata("float.dat")
76
+ str = io.read
77
+ io.class.name.should == "Tempfile"
78
+ str.force_encoding("ASCII-8BIT")
79
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
80
+ str.should == existing_data
81
+ end
82
+
30
83
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pg_data_encoder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-12 00:00:00.000000000 Z
12
+ date: 2013-09-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -56,13 +56,16 @@ files:
56
56
  - LICENSE.txt
57
57
  - README.md
58
58
  - Rakefile
59
+ - examples/fast_load.rb
59
60
  - lib/pg_data_encoder.rb
60
61
  - lib/pg_data_encoder/encode_for_copy.rb
61
62
  - lib/pg_data_encoder/version.rb
62
63
  - pg_data_encoder.gemspec
63
64
  - spec/fixtures/3_col_hstore.dat
64
65
  - spec/fixtures/3_col_hstore.txt
66
+ - spec/fixtures/float.dat
65
67
  - spec/fixtures/output.dat
68
+ - spec/fixtures/timestamp.dat
66
69
  - spec/spec_helper.rb
67
70
  - spec/verify_data_formats_spec.rb
68
71
  homepage: https://github.com/pbrumm/pg_data_encoder
@@ -85,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
85
88
  version: '0'
86
89
  requirements: []
87
90
  rubyforge_project:
88
- rubygems_version: 1.8.24
91
+ rubygems_version: 1.8.23
89
92
  signing_key:
90
93
  specification_version: 3
91
94
  summary: for faster input of data into postgres you can use this to generate the binary
@@ -93,6 +96,8 @@ summary: for faster input of data into postgres you can use this to generate the
93
96
  test_files:
94
97
  - spec/fixtures/3_col_hstore.dat
95
98
  - spec/fixtures/3_col_hstore.txt
99
+ - spec/fixtures/float.dat
96
100
  - spec/fixtures/output.dat
101
+ - spec/fixtures/timestamp.dat
97
102
  - spec/spec_helper.rb
98
103
  - spec/verify_data_formats_spec.rb