pg_data_encoder 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +79 -0
- data/Rakefile +1 -0
- data/lib/pg_data_encoder/encode_for_copy.rb +84 -0
- data/lib/pg_data_encoder/version.rb +3 -0
- data/lib/pg_data_encoder.rb +7 -0
- data/pg_data_encoder.gemspec +21 -0
- data/spec/fixtures/3_col_hstore.dat +0 -0
- data/spec/fixtures/3_col_hstore.txt +1 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/verify_data_formats_spec.rb +30 -0
- metadata +98 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Pete Brumm
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# PgDataEncoder
|
2
|
+
|
3
|
+
Creates a binary data file that can be imported into postgres's copy from command
|
4
|
+
|
5
|
+
Works well in collaboration with the postgres-copy gem
|
6
|
+
|
7
|
+
https://github.com/diogob/postgres-copy
|
8
|
+
|
9
|
+
With it you can make a bulk insert like this
|
10
|
+
|
11
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
12
|
+
encoder.add [1, "test", "first"]
|
13
|
+
encoder.add [2, "test2", "second"]
|
14
|
+
|
15
|
+
Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
|
16
|
+
|
17
|
+
With a fairly complicated table that includes an index and an hstore + index.
|
18
|
+
I can get 1600 inserts/sec on my overworked macbook pro.
|
19
|
+
|
20
|
+
Your usage may vary
|
21
|
+
|
22
|
+
NOTE: Only a few of the many data types are supported. check below for more details
|
23
|
+
|
24
|
+
## Installation
|
25
|
+
|
26
|
+
Add this line to your application's Gemfile:
|
27
|
+
|
28
|
+
gem 'pg_data_encoder'
|
29
|
+
|
30
|
+
And then execute:
|
31
|
+
|
32
|
+
$ bundle
|
33
|
+
|
34
|
+
Or install it yourself as:
|
35
|
+
|
36
|
+
$ gem install pg_data_encoder
|
37
|
+
|
38
|
+
## Usage
|
39
|
+
|
40
|
+
pg = PgDataEncoder::EncodeForCopy.new
|
41
|
+
pg.add([1,2,3,4,"text"])
|
42
|
+
io = pg.get_io
|
43
|
+
|
44
|
+
For large imports you can use the use_tempfile => true option to enable Tempfile usage. otherwise it uses StringIO
|
45
|
+
|
46
|
+
pg = PgDataEncoder::EncodeForCopy.new(use_tempfile: true)
|
47
|
+
pg.add([1,2,3,4,"text"])
|
48
|
+
io = pg.get_io
|
49
|
+
|
50
|
+
pg.remove # to delete your file
|
51
|
+
|
52
|
+
## Notes
|
53
|
+
|
54
|
+
|
55
|
+
Columns must line up on the incoming table. if they don't you need to filter the copy to not need them
|
56
|
+
|
57
|
+
COPY table_name FROM STDIN BINARY
|
58
|
+
|
59
|
+
or
|
60
|
+
|
61
|
+
COPY table_name(field1, field2) FROM STDIN BINARY
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
## Added type support
|
67
|
+
|
68
|
+
Currently it supports Integers, Strings, Hstore.
|
69
|
+
|
70
|
+
Help would be appreciated for DateTime, Float, Double, ...
|
71
|
+
## Contributing
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
1. Fork it
|
76
|
+
2. Create your feature branch (`git checkout -b feature/new_feature`)
|
77
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
78
|
+
4. Push to the branch (`git push origin feature/new_feature`)
|
79
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'stringio'
|
3
|
+
module PgDataEncoder
|
4
|
+
class EncodeForCopy
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
@closed = false
|
8
|
+
@io = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def add(row)
|
12
|
+
setup_io if !@io
|
13
|
+
|
14
|
+
@io.write([row.size].pack("n"))
|
15
|
+
row.each {|col|
|
16
|
+
encode_field(@io, col)
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def close
|
21
|
+
@closed = true
|
22
|
+
@io.write([-1].pack("n"))
|
23
|
+
@io.rewind
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_io
|
27
|
+
if !@closed
|
28
|
+
close
|
29
|
+
end
|
30
|
+
@io
|
31
|
+
end
|
32
|
+
|
33
|
+
def remove
|
34
|
+
if @io.kind_of?(Tempfile)
|
35
|
+
@io.close
|
36
|
+
@io.unlink
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def setup_io
|
43
|
+
if @options[:use_tempfile] == true
|
44
|
+
@io = Tempfile.new("copy_binary", :encoding => 'ascii-8bit')
|
45
|
+
@io.unlink
|
46
|
+
else
|
47
|
+
@io = StringIO.new
|
48
|
+
end
|
49
|
+
@io.write("PGCOPY\n\377\r\n\0")
|
50
|
+
@io.write([0,0].pack("NN"))
|
51
|
+
end
|
52
|
+
|
53
|
+
def encode_field(io, field, depth=0)
|
54
|
+
case field
|
55
|
+
when Integer
|
56
|
+
buf = [field].pack("N")
|
57
|
+
io.write([buf.bytesize].pack("N"))
|
58
|
+
io.write(buf)
|
59
|
+
when nil
|
60
|
+
io.write([-1].pack("N"))
|
61
|
+
when String
|
62
|
+
buf = field.encode("UTF-8")
|
63
|
+
io.write([buf.bytesize].pack("N"))
|
64
|
+
io.write(buf)
|
65
|
+
when Hash
|
66
|
+
raise Exception.new("Hash's can't contain hashes") if depth > 0
|
67
|
+
hash_io = StringIO.new
|
68
|
+
|
69
|
+
hash_io.write([field.size].pack("N"))
|
70
|
+
field.each_pair {|key,val|
|
71
|
+
buf = key.to_s.encode("UTF-8")
|
72
|
+
hash_io.write([buf.bytesize].pack("N"))
|
73
|
+
hash_io.write(buf.to_s)
|
74
|
+
encode_field(hash_io, val.nil? ? val : val.to_s, depth + 1)
|
75
|
+
}
|
76
|
+
io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
|
77
|
+
io.write(hash_io.string)
|
78
|
+
else
|
79
|
+
raise Exception.new("Unsupported Format: #{field.class.name}")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'pg_data_encoder/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "pg_data_encoder"
|
8
|
+
gem.version = PgDataEncoder::VERSION
|
9
|
+
gem.authors = ["Pete Brumm"]
|
10
|
+
gem.email = ["pete@petebrumm.com"]
|
11
|
+
gem.description = %q{Creates a binary data file that can be imported into postgres's copy from command}
|
12
|
+
gem.summary = %q{for faster input of data into postgres you can use this to generate the binary import and run COPY FROM}
|
13
|
+
gem.homepage = "https://github.com/pbrumm/pg_data_encoder"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
gem.add_development_dependency("rspec", ">= 2.12.0")
|
20
|
+
gem.add_development_dependency("rspec-core", ">= 2.12.0")
|
21
|
+
end
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
1 text "a"=>"1", "b"=>"asdf"
|
Binary file
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'rspec'
|
4
|
+
require 'rspec/autorun'
|
5
|
+
|
6
|
+
require 'pg_data_encoder'
|
7
|
+
|
8
|
+
RSpec.configure do |config|
|
9
|
+
config.before(:suite) do
|
10
|
+
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def filedata(filename)
|
16
|
+
str = nil
|
17
|
+
File.open("spec/fixtures/#{filename}", "r:ASCII-8BIT") {|io|
|
18
|
+
str = io.read
|
19
|
+
}
|
20
|
+
str
|
21
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "generating data" do
|
4
|
+
it 'should encode hstore data correctly' do
|
5
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
6
|
+
encoder.add [1, "text", {a: 1, b: "asdf"}]
|
7
|
+
encoder.close
|
8
|
+
io = encoder.get_io
|
9
|
+
existing_data = filedata("3_col_hstore.dat")
|
10
|
+
str = io.read
|
11
|
+
io.class.name.should == "StringIO"
|
12
|
+
str.force_encoding("ASCII-8BIT")
|
13
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
14
|
+
str.should == existing_data
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should encode hstore data correctly from tempfile' do
|
18
|
+
encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
|
19
|
+
encoder.add [1, "text", {a: 1, b: "asdf"}]
|
20
|
+
encoder.close
|
21
|
+
io = encoder.get_io
|
22
|
+
existing_data = filedata("3_col_hstore.dat")
|
23
|
+
str = io.read
|
24
|
+
io.class.name.should == "Tempfile"
|
25
|
+
str.force_encoding("ASCII-8BIT")
|
26
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
27
|
+
str.should == existing_data
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pg_data_encoder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Pete Brumm
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.12.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 2.12.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec-core
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 2.12.0
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.12.0
|
46
|
+
description: Creates a binary data file that can be imported into postgres's copy
|
47
|
+
from command
|
48
|
+
email:
|
49
|
+
- pete@petebrumm.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- LICENSE.txt
|
57
|
+
- README.md
|
58
|
+
- Rakefile
|
59
|
+
- lib/pg_data_encoder.rb
|
60
|
+
- lib/pg_data_encoder/encode_for_copy.rb
|
61
|
+
- lib/pg_data_encoder/version.rb
|
62
|
+
- pg_data_encoder.gemspec
|
63
|
+
- spec/fixtures/3_col_hstore.dat
|
64
|
+
- spec/fixtures/3_col_hstore.txt
|
65
|
+
- spec/fixtures/output.dat
|
66
|
+
- spec/spec_helper.rb
|
67
|
+
- spec/verify_data_formats_spec.rb
|
68
|
+
homepage: https://github.com/pbrumm/pg_data_encoder
|
69
|
+
licenses: []
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.8.24
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: for faster input of data into postgres you can use this to generate the binary
|
92
|
+
import and run COPY FROM
|
93
|
+
test_files:
|
94
|
+
- spec/fixtures/3_col_hstore.dat
|
95
|
+
- spec/fixtures/3_col_hstore.txt
|
96
|
+
- spec/fixtures/output.dat
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
- spec/verify_data_formats_spec.rb
|