pg_data_encoder 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +79 -0
- data/Rakefile +1 -0
- data/lib/pg_data_encoder/encode_for_copy.rb +84 -0
- data/lib/pg_data_encoder/version.rb +3 -0
- data/lib/pg_data_encoder.rb +7 -0
- data/pg_data_encoder.gemspec +21 -0
- data/spec/fixtures/3_col_hstore.dat +0 -0
- data/spec/fixtures/3_col_hstore.txt +1 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/verify_data_formats_spec.rb +30 -0
- metadata +98 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Pete Brumm
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# PgDataEncoder
|
2
|
+
|
3
|
+
Creates a binary data file that can be imported into postgres's copy from command
|
4
|
+
|
5
|
+
Works well in collaboration with the postgres-copy gem
|
6
|
+
|
7
|
+
https://github.com/diogob/postgres-copy
|
8
|
+
|
9
|
+
With it you can make a bulk insert like this
|
10
|
+
|
11
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
12
|
+
encoder.add [1, "test", "first"]
|
13
|
+
encoder.add [2, "test2", "second"]
|
14
|
+
|
15
|
+
Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
|
16
|
+
|
17
|
+
With a fairly complicated table that includes an index and an hstore + index.
|
18
|
+
I can get 1600 inserts/sec on my overworked macbook pro.
|
19
|
+
|
20
|
+
Your usage may vary
|
21
|
+
|
22
|
+
NOTE: Only a few of the many data types are supported. check below for more details
|
23
|
+
|
24
|
+
## Installation
|
25
|
+
|
26
|
+
Add this line to your application's Gemfile:
|
27
|
+
|
28
|
+
gem 'pg_data_encoder'
|
29
|
+
|
30
|
+
And then execute:
|
31
|
+
|
32
|
+
$ bundle
|
33
|
+
|
34
|
+
Or install it yourself as:
|
35
|
+
|
36
|
+
$ gem install pg_data_encoder
|
37
|
+
|
38
|
+
## Usage
|
39
|
+
|
40
|
+
pg = PgDataEncoder::EncodeForCopy.new
|
41
|
+
pg.add([1,2,3,4,"text"])
|
42
|
+
io = pg.get_io
|
43
|
+
|
44
|
+
For large imports you can use the use_tempfile => true option to enable Tempfile usage. otherwise it uses StringIO
|
45
|
+
|
46
|
+
pg = PgDataEncoder::EncodeForCopy.new(use_tempfile: true)
|
47
|
+
pg.add([1,2,3,4,"text"])
|
48
|
+
io = pg.get_io
|
49
|
+
|
50
|
+
pg.remove # to delete your file
|
51
|
+
|
52
|
+
## Notes
|
53
|
+
|
54
|
+
|
55
|
+
Columns must line up on the incoming table. if they don't you need to filter the copy to not need them
|
56
|
+
|
57
|
+
COPY table_name FROM STDIN BINARY
|
58
|
+
|
59
|
+
or
|
60
|
+
|
61
|
+
COPY table_name(field1, field2) FROM STDIN BINARY
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
## Added type support
|
67
|
+
|
68
|
+
Currently it supports Integers, Strings, Hstore.
|
69
|
+
|
70
|
+
Help would be appreciated for DateTime, Float, Double, ...
|
71
|
+
## Contributing
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
1. Fork it
|
76
|
+
2. Create your feature branch (`git checkout -b feature/new_feature`)
|
77
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
78
|
+
4. Push to the branch (`git push origin feature/new_feature`)
|
79
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'stringio'
|
3
|
+
module PgDataEncoder
|
4
|
+
class EncodeForCopy
|
5
|
+
def initialize(options = {})
|
6
|
+
@options = options
|
7
|
+
@closed = false
|
8
|
+
@io = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
def add(row)
|
12
|
+
setup_io if !@io
|
13
|
+
|
14
|
+
@io.write([row.size].pack("n"))
|
15
|
+
row.each {|col|
|
16
|
+
encode_field(@io, col)
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def close
|
21
|
+
@closed = true
|
22
|
+
@io.write([-1].pack("n"))
|
23
|
+
@io.rewind
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_io
|
27
|
+
if !@closed
|
28
|
+
close
|
29
|
+
end
|
30
|
+
@io
|
31
|
+
end
|
32
|
+
|
33
|
+
def remove
|
34
|
+
if @io.kind_of?(Tempfile)
|
35
|
+
@io.close
|
36
|
+
@io.unlink
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def setup_io
|
43
|
+
if @options[:use_tempfile] == true
|
44
|
+
@io = Tempfile.new("copy_binary", :encoding => 'ascii-8bit')
|
45
|
+
@io.unlink
|
46
|
+
else
|
47
|
+
@io = StringIO.new
|
48
|
+
end
|
49
|
+
@io.write("PGCOPY\n\377\r\n\0")
|
50
|
+
@io.write([0,0].pack("NN"))
|
51
|
+
end
|
52
|
+
|
53
|
+
def encode_field(io, field, depth=0)
|
54
|
+
case field
|
55
|
+
when Integer
|
56
|
+
buf = [field].pack("N")
|
57
|
+
io.write([buf.bytesize].pack("N"))
|
58
|
+
io.write(buf)
|
59
|
+
when nil
|
60
|
+
io.write([-1].pack("N"))
|
61
|
+
when String
|
62
|
+
buf = field.encode("UTF-8")
|
63
|
+
io.write([buf.bytesize].pack("N"))
|
64
|
+
io.write(buf)
|
65
|
+
when Hash
|
66
|
+
raise Exception.new("Hash's can't contain hashes") if depth > 0
|
67
|
+
hash_io = StringIO.new
|
68
|
+
|
69
|
+
hash_io.write([field.size].pack("N"))
|
70
|
+
field.each_pair {|key,val|
|
71
|
+
buf = key.to_s.encode("UTF-8")
|
72
|
+
hash_io.write([buf.bytesize].pack("N"))
|
73
|
+
hash_io.write(buf.to_s)
|
74
|
+
encode_field(hash_io, val.nil? ? val : val.to_s, depth + 1)
|
75
|
+
}
|
76
|
+
io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
|
77
|
+
io.write(hash_io.string)
|
78
|
+
else
|
79
|
+
raise Exception.new("Unsupported Format: #{field.class.name}")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'pg_data_encoder/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "pg_data_encoder"
|
8
|
+
gem.version = PgDataEncoder::VERSION
|
9
|
+
gem.authors = ["Pete Brumm"]
|
10
|
+
gem.email = ["pete@petebrumm.com"]
|
11
|
+
gem.description = %q{Creates a binary data file that can be imported into postgres's copy from command}
|
12
|
+
gem.summary = %q{for faster input of data into postgres you can use this to generate the binary import and run COPY FROM}
|
13
|
+
gem.homepage = "https://github.com/pbrumm/pg_data_encoder"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
gem.add_development_dependency("rspec", ">= 2.12.0")
|
20
|
+
gem.add_development_dependency("rspec-core", ">= 2.12.0")
|
21
|
+
end
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
1 text "a"=>"1", "b"=>"asdf"
|
Binary file
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'rspec'
|
4
|
+
require 'rspec/autorun'
|
5
|
+
|
6
|
+
require 'pg_data_encoder'
|
7
|
+
|
8
|
+
RSpec.configure do |config|
|
9
|
+
config.before(:suite) do
|
10
|
+
|
11
|
+
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def filedata(filename)
|
16
|
+
str = nil
|
17
|
+
File.open("spec/fixtures/#{filename}", "r:ASCII-8BIT") {|io|
|
18
|
+
str = io.read
|
19
|
+
}
|
20
|
+
str
|
21
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "generating data" do
|
4
|
+
it 'should encode hstore data correctly' do
|
5
|
+
encoder = PgDataEncoder::EncodeForCopy.new
|
6
|
+
encoder.add [1, "text", {a: 1, b: "asdf"}]
|
7
|
+
encoder.close
|
8
|
+
io = encoder.get_io
|
9
|
+
existing_data = filedata("3_col_hstore.dat")
|
10
|
+
str = io.read
|
11
|
+
io.class.name.should == "StringIO"
|
12
|
+
str.force_encoding("ASCII-8BIT")
|
13
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
14
|
+
str.should == existing_data
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should encode hstore data correctly from tempfile' do
|
18
|
+
encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
|
19
|
+
encoder.add [1, "text", {a: 1, b: "asdf"}]
|
20
|
+
encoder.close
|
21
|
+
io = encoder.get_io
|
22
|
+
existing_data = filedata("3_col_hstore.dat")
|
23
|
+
str = io.read
|
24
|
+
io.class.name.should == "Tempfile"
|
25
|
+
str.force_encoding("ASCII-8BIT")
|
26
|
+
#File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
27
|
+
str.should == existing_data
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pg_data_encoder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Pete Brumm
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-12-12 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.12.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 2.12.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rspec-core
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 2.12.0
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.12.0
|
46
|
+
description: Creates a binary data file that can be imported into postgres's copy
|
47
|
+
from command
|
48
|
+
email:
|
49
|
+
- pete@petebrumm.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- LICENSE.txt
|
57
|
+
- README.md
|
58
|
+
- Rakefile
|
59
|
+
- lib/pg_data_encoder.rb
|
60
|
+
- lib/pg_data_encoder/encode_for_copy.rb
|
61
|
+
- lib/pg_data_encoder/version.rb
|
62
|
+
- pg_data_encoder.gemspec
|
63
|
+
- spec/fixtures/3_col_hstore.dat
|
64
|
+
- spec/fixtures/3_col_hstore.txt
|
65
|
+
- spec/fixtures/output.dat
|
66
|
+
- spec/spec_helper.rb
|
67
|
+
- spec/verify_data_formats_spec.rb
|
68
|
+
homepage: https://github.com/pbrumm/pg_data_encoder
|
69
|
+
licenses: []
|
70
|
+
post_install_message:
|
71
|
+
rdoc_options: []
|
72
|
+
require_paths:
|
73
|
+
- lib
|
74
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ! '>='
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
requirements: []
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.8.24
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: for faster input of data into postgres you can use this to generate the binary
|
92
|
+
import and run COPY FROM
|
93
|
+
test_files:
|
94
|
+
- spec/fixtures/3_col_hstore.dat
|
95
|
+
- spec/fixtures/3_col_hstore.txt
|
96
|
+
- spec/fixtures/output.dat
|
97
|
+
- spec/spec_helper.rb
|
98
|
+
- spec/verify_data_formats_spec.rb
|