pg_data_encoder 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_data_encoder.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Pete Brumm
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # PgDataEncoder
2
+
3
+ Creates a binary data file that can be imported into postgres's copy from command
4
+
5
+ Works well in collaboration with the postgres-copy gem
6
+
7
+ https://github.com/diogob/postgres-copy
8
+
9
+ With it you can make a bulk insert like this
10
+
11
+ encoder = PgDataEncoder::EncodeForCopy.new
12
+ encoder.add [1, "test", "first"]
13
+ encoder.add [2, "test2", "second"]
14
+
15
+ Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
16
+
17
+ With a fairly complicated table that includes an index and an hstore + index.
18
+ I can get 1600 inserts/sec on my overworked macbook pro.
19
+
20
+ Your usage may vary
21
+
22
+ NOTE: Only a few of the many data types are supported. check below for more details
23
+
24
+ ## Installation
25
+
26
+ Add this line to your application's Gemfile:
27
+
28
+ gem 'pg_data_encoder'
29
+
30
+ And then execute:
31
+
32
+ $ bundle
33
+
34
+ Or install it yourself as:
35
+
36
+ $ gem install pg_data_encoder
37
+
38
+ ## Usage
39
+
40
+ pg = PgDataEncoder::EncodeForCopy.new
41
+ pg.add([1,2,3,4,"text"])
42
+ io = pg.get_io
43
+
44
+ For large imports you can use the use_tempfile => true option to enable Tempfile usage. otherwise it uses StringIO
45
+
46
+ pg = PgDataEncoder::EncodeForCopy.new(use_tempfile: true)
47
+ pg.add([1,2,3,4,"text"])
48
+ io = pg.get_io
49
+
50
+ pg.remove # to delete your file
51
+
52
+ ## Notes
53
+
54
+
55
+ Columns must line up on the incoming table. if they don't you need to filter the copy to not need them
56
+
57
+ COPY table_name FROM STDIN BINARY
58
+
59
+ or
60
+
61
+ COPY table_name(field1, field2) FROM STDIN BINARY
62
+
63
+
64
+
65
+
66
+ ## Added type support
67
+
68
+ Currently it supports Integers, Strings, Hstore.
69
+
70
+ Help would be appreciated for DateTime, Float, Double, ...
71
+ ## Contributing
72
+
73
+
74
+
75
+ 1. Fork it
76
+ 2. Create your feature branch (`git checkout -b feature/new_feature`)
77
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
78
+ 4. Push to the branch (`git push origin feature/new_feature`)
79
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,84 @@
1
+ require 'tempfile'
2
+ require 'stringio'
3
+ module PgDataEncoder
4
+ class EncodeForCopy
5
+ def initialize(options = {})
6
+ @options = options
7
+ @closed = false
8
+ @io = nil
9
+ end
10
+
11
+ def add(row)
12
+ setup_io if !@io
13
+
14
+ @io.write([row.size].pack("n"))
15
+ row.each {|col|
16
+ encode_field(@io, col)
17
+ }
18
+ end
19
+
20
+ def close
21
+ @closed = true
22
+ @io.write([-1].pack("n"))
23
+ @io.rewind
24
+ end
25
+
26
+ def get_io
27
+ if !@closed
28
+ close
29
+ end
30
+ @io
31
+ end
32
+
33
+ def remove
34
+ if @io.kind_of?(Tempfile)
35
+ @io.close
36
+ @io.unlink
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def setup_io
43
+ if @options[:use_tempfile] == true
44
+ @io = Tempfile.new("copy_binary", :encoding => 'ascii-8bit')
45
+ @io.unlink
46
+ else
47
+ @io = StringIO.new
48
+ end
49
+ @io.write("PGCOPY\n\377\r\n\0")
50
+ @io.write([0,0].pack("NN"))
51
+ end
52
+
53
+ def encode_field(io, field, depth=0)
54
+ case field
55
+ when Integer
56
+ buf = [field].pack("N")
57
+ io.write([buf.bytesize].pack("N"))
58
+ io.write(buf)
59
+ when nil
60
+ io.write([-1].pack("N"))
61
+ when String
62
+ buf = field.encode("UTF-8")
63
+ io.write([buf.bytesize].pack("N"))
64
+ io.write(buf)
65
+ when Hash
66
+ raise Exception.new("Hash's can't contain hashes") if depth > 0
67
+ hash_io = StringIO.new
68
+
69
+ hash_io.write([field.size].pack("N"))
70
+ field.each_pair {|key,val|
71
+ buf = key.to_s.encode("UTF-8")
72
+ hash_io.write([buf.bytesize].pack("N"))
73
+ hash_io.write(buf.to_s)
74
+ encode_field(hash_io, val.nil? ? val : val.to_s, depth + 1)
75
+ }
76
+ io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
77
+ io.write(hash_io.string)
78
+ else
79
+ raise Exception.new("Unsupported Format: #{field.class.name}")
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,3 @@
1
+ module PgDataEncoder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "pg_data_encoder/version"
2
+
3
+ require 'pg_data_encoder/encode_for_copy'
4
+
5
+ module PgDataEncoder
6
+ # Your code goes here...
7
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_data_encoder/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "pg_data_encoder"
8
+ gem.version = PgDataEncoder::VERSION
9
+ gem.authors = ["Pete Brumm"]
10
+ gem.email = ["pete@petebrumm.com"]
11
+ gem.description = %q{Creates a binary data file that can be imported into postgres's copy from command}
12
+ gem.summary = %q{for faster input of data into postgres you can use this to generate the binary import and run COPY FROM}
13
+ gem.homepage = "https://github.com/pbrumm/pg_data_encoder"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ gem.add_development_dependency("rspec", ">= 2.12.0")
20
+ gem.add_development_dependency("rspec-core", ">= 2.12.0")
21
+ end
Binary file
@@ -0,0 +1 @@
1
+ 1 text "a"=>"1", "b"=>"asdf"
Binary file
@@ -0,0 +1,21 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rspec'
4
+ require 'rspec/autorun'
5
+
6
+ require 'pg_data_encoder'
7
+
8
+ RSpec.configure do |config|
9
+ config.before(:suite) do
10
+
11
+
12
+ end
13
+ end
14
+
15
+ def filedata(filename)
16
+ str = nil
17
+ File.open("spec/fixtures/#{filename}", "r:ASCII-8BIT") {|io|
18
+ str = io.read
19
+ }
20
+ str
21
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "generating data" do
4
+ it 'should encode hstore data correctly' do
5
+ encoder = PgDataEncoder::EncodeForCopy.new
6
+ encoder.add [1, "text", {a: 1, b: "asdf"}]
7
+ encoder.close
8
+ io = encoder.get_io
9
+ existing_data = filedata("3_col_hstore.dat")
10
+ str = io.read
11
+ io.class.name.should == "StringIO"
12
+ str.force_encoding("ASCII-8BIT")
13
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
14
+ str.should == existing_data
15
+ end
16
+
17
+ it 'should encode hstore data correctly from tempfile' do
18
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
19
+ encoder.add [1, "text", {a: 1, b: "asdf"}]
20
+ encoder.close
21
+ io = encoder.get_io
22
+ existing_data = filedata("3_col_hstore.dat")
23
+ str = io.read
24
+ io.class.name.should == "Tempfile"
25
+ str.force_encoding("ASCII-8BIT")
26
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
27
+ str.should == existing_data
28
+ end
29
+
30
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_data_encoder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Pete Brumm
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.12.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.12.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec-core
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 2.12.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 2.12.0
46
+ description: Creates a binary data file that can be imported into postgres's copy
47
+ from command
48
+ email:
49
+ - pete@petebrumm.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - lib/pg_data_encoder.rb
60
+ - lib/pg_data_encoder/encode_for_copy.rb
61
+ - lib/pg_data_encoder/version.rb
62
+ - pg_data_encoder.gemspec
63
+ - spec/fixtures/3_col_hstore.dat
64
+ - spec/fixtures/3_col_hstore.txt
65
+ - spec/fixtures/output.dat
66
+ - spec/spec_helper.rb
67
+ - spec/verify_data_formats_spec.rb
68
+ homepage: https://github.com/pbrumm/pg_data_encoder
69
+ licenses: []
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 1.8.24
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: for faster input of data into postgres you can use this to generate the binary
92
+ import and run COPY FROM
93
+ test_files:
94
+ - spec/fixtures/3_col_hstore.dat
95
+ - spec/fixtures/3_col_hstore.txt
96
+ - spec/fixtures/output.dat
97
+ - spec/spec_helper.rb
98
+ - spec/verify_data_formats_spec.rb