pg_data_encoder 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +3 -6
- data/examples/fast_load.rb +47 -0
- data/lib/pg_data_encoder/encode_for_copy.rb +9 -1
- data/lib/pg_data_encoder/version.rb +1 -1
- data/spec/fixtures/float.dat +0 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/fixtures/timestamp.dat +0 -0
- data/spec/verify_data_formats_spec.rb +53 -0
- metadata +8 -3
data/README.md
CHANGED
@@ -14,10 +14,9 @@ With it you can make a bulk insert like this
|
|
14
14
|
|
15
15
|
Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
|
16
16
|
|
17
|
-
|
18
|
-
I can get 1600 inserts/sec on my overworked macbook pro.
|
17
|
+
## Try it out yourself, in the examples folder there is a simple test
|
19
18
|
|
20
|
-
|
19
|
+
on my i3 box with an ssd drive I can get 270,000 inserts a second with an hstore and indexes
|
21
20
|
|
22
21
|
NOTE: Only a few of the many data types are supported. check below for more details
|
23
22
|
|
@@ -62,12 +61,10 @@ or
|
|
62
61
|
|
63
62
|
|
64
63
|
|
65
|
-
|
66
64
|
## Added type support
|
67
65
|
|
68
|
-
Currently it supports Integers, Strings, Hstore.
|
66
|
+
Currently it supports Integers, Strings, Hstore, Floats (double precision), Timestamp.
|
69
67
|
|
70
|
-
Help would be appreciated for DateTime, Float, Double, ...
|
71
68
|
## Contributing
|
72
69
|
|
73
70
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'postgres-copy'
|
3
|
+
require 'pg_data_encoder'
|
4
|
+
require 'benchmark'
|
5
|
+
# Create a test db before running
|
6
|
+
# add any needed username, password, port
|
7
|
+
# install the required gems
|
8
|
+
#
|
9
|
+
# gem install postgres-copy pg_data_encoder activerecord --no-ri --no-rdoc
|
10
|
+
|
11
|
+
|
12
|
+
ActiveSupport.on_load :active_record do
|
13
|
+
require "postgres-copy/active_record"
|
14
|
+
end
|
15
|
+
ActiveRecord::Base.establish_connection(
|
16
|
+
:adapter => "postgresql",
|
17
|
+
:host => "localhost",
|
18
|
+
:database => "test"
|
19
|
+
)
|
20
|
+
ActiveRecord::Base.connection.execute %{
|
21
|
+
SET client_min_messages TO warning;
|
22
|
+
DROP TABLE IF EXISTS test_models;
|
23
|
+
CREATE TABLE test_models (id serial PRIMARY KEY, data VARCHAR);
|
24
|
+
}
|
25
|
+
|
26
|
+
class TestModel < ActiveRecord::Base
|
27
|
+
end
|
28
|
+
|
29
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
30
|
+
|
31
|
+
puts "Loading data to disk"
|
32
|
+
puts Benchmark.measure {
|
33
|
+
0.upto(1_000_000).each {|i|
|
34
|
+
encoder.add ["test data"]
|
35
|
+
}
|
36
|
+
}
|
37
|
+
puts "inserting into db"
|
38
|
+
puts Benchmark.measure {
|
39
|
+
TestModel.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:data])
|
40
|
+
}
|
41
|
+
|
42
|
+
encoder.remove
|
43
|
+
# Results on my i5 with ssd backed postgres server
|
44
|
+
# 11.7 seconds to generate data file. 3.7 seconds to insert 1,000,000 simple items into a table.
|
45
|
+
#
|
46
|
+
# 11.670000 0.010000 11.680000 ( 11.733414)
|
47
|
+
# 0.030000 0.000000 0.030000 ( 3.782371)
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'tempfile'
|
2
2
|
require 'stringio'
|
3
3
|
module PgDataEncoder
|
4
|
+
POSTGRES_EPOCH_DATE = (Time.utc(2000,1,1).to_f * 1_000_000).to_i
|
4
5
|
class EncodeForCopy
|
5
6
|
def initialize(options = {})
|
6
7
|
@options = options
|
@@ -10,7 +11,6 @@ module PgDataEncoder
|
|
10
11
|
|
11
12
|
def add(row)
|
12
13
|
setup_io if !@io
|
13
|
-
|
14
14
|
@io.write([row.size].pack("n"))
|
15
15
|
row.each {|col|
|
16
16
|
encode_field(@io, col)
|
@@ -56,6 +56,10 @@ module PgDataEncoder
|
|
56
56
|
buf = [field].pack("N")
|
57
57
|
io.write([buf.bytesize].pack("N"))
|
58
58
|
io.write(buf)
|
59
|
+
when Float
|
60
|
+
buf = [field].pack("G")
|
61
|
+
io.write([buf.bytesize].pack("N"))
|
62
|
+
io.write(buf)
|
59
63
|
when nil
|
60
64
|
io.write([-1].pack("N"))
|
61
65
|
when String
|
@@ -75,6 +79,10 @@ module PgDataEncoder
|
|
75
79
|
}
|
76
80
|
io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
|
77
81
|
io.write(hash_io.string)
|
82
|
+
when Time
|
83
|
+
buf = [(field.to_f * 1_000_000 - POSTGRES_EPOCH_DATE).to_i].pack("L!>")
|
84
|
+
io.write([buf.bytesize].pack("N"))
|
85
|
+
io.write(buf)
|
78
86
|
else
|
79
87
|
raise Exception.new("Unsupported Format: #{field.class.name}")
|
80
88
|
end
|
Binary file
|
data/spec/fixtures/output.dat
CHANGED
Binary file
|
Binary file
|
@@ -27,4 +27,57 @@ describe "generating data" do
|
|
27
27
|
str.should == existing_data
|
28
28
|
end
|
29
29
|
|
30
|
+
|
31
|
+
it 'should encode timestamp data correctly' do
|
32
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
33
|
+
encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
|
34
|
+
encoder.close
|
35
|
+
io = encoder.get_io
|
36
|
+
existing_data = filedata("timestamp.dat")
|
37
|
+
str = io.read
|
38
|
+
io.class.name.should == "StringIO"
|
39
|
+
str.force_encoding("ASCII-8BIT")
|
40
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
41
|
+
str.should == existing_data
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should encode float correctly from tempfile' do
|
45
|
+
encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
|
46
|
+
encoder.add [Time.parse("2013-06-11 15:03:54.62605 UTC")]
|
47
|
+
encoder.close
|
48
|
+
io = encoder.get_io
|
49
|
+
existing_data = filedata("timestamp.dat")
|
50
|
+
str = io.read
|
51
|
+
io.class.name.should == "Tempfile"
|
52
|
+
str.force_encoding("ASCII-8BIT")
|
53
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
54
|
+
str.should == existing_data
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'should encode float data correctly' do
|
58
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
59
|
+
encoder.add [1234567.1234567]
|
60
|
+
encoder.close
|
61
|
+
io = encoder.get_io
|
62
|
+
existing_data = filedata("float.dat")
|
63
|
+
str = io.read
|
64
|
+
io.class.name.should == "StringIO"
|
65
|
+
str.force_encoding("ASCII-8BIT")
|
66
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
67
|
+
str.should == existing_data
|
68
|
+
end
|
69
|
+
|
70
|
+
it 'should encode float correctly from tempfile' do
|
71
|
+
encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
|
72
|
+
encoder.add [1234567.1234567]
|
73
|
+
encoder.close
|
74
|
+
io = encoder.get_io
|
75
|
+
existing_data = filedata("float.dat")
|
76
|
+
str = io.read
|
77
|
+
io.class.name.should == "Tempfile"
|
78
|
+
str.force_encoding("ASCII-8BIT")
|
79
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
80
|
+
str.should == existing_data
|
81
|
+
end
|
82
|
+
|
30
83
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pg_data_encoder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-09-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -56,13 +56,16 @@ files:
|
|
56
56
|
- LICENSE.txt
|
57
57
|
- README.md
|
58
58
|
- Rakefile
|
59
|
+
- examples/fast_load.rb
|
59
60
|
- lib/pg_data_encoder.rb
|
60
61
|
- lib/pg_data_encoder/encode_for_copy.rb
|
61
62
|
- lib/pg_data_encoder/version.rb
|
62
63
|
- pg_data_encoder.gemspec
|
63
64
|
- spec/fixtures/3_col_hstore.dat
|
64
65
|
- spec/fixtures/3_col_hstore.txt
|
66
|
+
- spec/fixtures/float.dat
|
65
67
|
- spec/fixtures/output.dat
|
68
|
+
- spec/fixtures/timestamp.dat
|
66
69
|
- spec/spec_helper.rb
|
67
70
|
- spec/verify_data_formats_spec.rb
|
68
71
|
homepage: https://github.com/pbrumm/pg_data_encoder
|
@@ -85,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
88
|
version: '0'
|
86
89
|
requirements: []
|
87
90
|
rubyforge_project:
|
88
|
-
rubygems_version: 1.8.
|
91
|
+
rubygems_version: 1.8.23
|
89
92
|
signing_key:
|
90
93
|
specification_version: 3
|
91
94
|
summary: for faster input of data into postgres you can use this to generate the binary
|
@@ -93,6 +96,8 @@ summary: for faster input of data into postgres you can use this to generate the
|
|
93
96
|
test_files:
|
94
97
|
- spec/fixtures/3_col_hstore.dat
|
95
98
|
- spec/fixtures/3_col_hstore.txt
|
99
|
+
- spec/fixtures/float.dat
|
96
100
|
- spec/fixtures/output.dat
|
101
|
+
- spec/fixtures/timestamp.dat
|
97
102
|
- spec/spec_helper.rb
|
98
103
|
- spec/verify_data_formats_spec.rb
|