pg_data_encoder 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_data_encoder.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Pete Brumm
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # PgDataEncoder
2
+
3
+ Creates a binary data file that can be imported into postgres's copy from command
4
+
5
+ Works well in collaboration with the postgres-copy gem
6
+
7
+ https://github.com/diogob/postgres-copy
8
+
9
+ With it you can make a bulk insert like this
10
+
11
+ encoder = PgDataEncoder::EncodeForCopy.new
12
+ encoder.add [1, "test", "first"]
13
+ encoder.add [2, "test2", "second"]
14
+
15
+ Product.pg_copy_from(encoder.get_io, :format => :binary, :columns => [:id, :name, :desc])
16
+
17
+ With a fairly complicated table that includes an index and an hstore + index.
18
+ I can get 1600 inserts/sec on my overworked macbook pro.
19
+
20
+ Your usage may vary
21
+
22
+ NOTE: Only a few of the many data types are supported. check below for more details
23
+
24
+ ## Installation
25
+
26
+ Add this line to your application's Gemfile:
27
+
28
+ gem 'pg_data_encoder'
29
+
30
+ And then execute:
31
+
32
+ $ bundle
33
+
34
+ Or install it yourself as:
35
+
36
+ $ gem install pg_data_encoder
37
+
38
+ ## Usage
39
+
40
+ pg = PgDataEncoder::EncodeForCopy.new
41
+ pg.add([1,2,3,4,"text"])
42
+ io = pg.get_io
43
+
44
+ For large imports you can use the use_tempfile => true option to enable Tempfile usage. otherwise it uses StringIO
45
+
46
+ pg = PgDataEncoder::EncodeForCopy.new(use_tempfile: true)
47
+ pg.add([1,2,3,4,"text"])
48
+ io = pg.get_io
49
+
50
+ pg.remove # to delete your file
51
+
52
+ ## Notes
53
+
54
+
55
+ Columns must line up on the incoming table. if they don't you need to filter the copy to not need them
56
+
57
+ COPY table_name FROM STDIN BINARY
58
+
59
+ or
60
+
61
+ COPY table_name(field1, field2) FROM STDIN BINARY
62
+
63
+
64
+
65
+
66
+ ## Added type support
67
+
68
+ Currently it supports Integers, Strings, Hstore.
69
+
70
+ Help would be appreciated for DateTime, Float, Double, ...
71
+ ## Contributing
72
+
73
+
74
+
75
+ 1. Fork it
76
+ 2. Create your feature branch (`git checkout -b feature/new_feature`)
77
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
78
+ 4. Push to the branch (`git push origin feature/new_feature`)
79
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,84 @@
1
+ require 'tempfile'
2
+ require 'stringio'
3
+ module PgDataEncoder
4
+ class EncodeForCopy
5
+ def initialize(options = {})
6
+ @options = options
7
+ @closed = false
8
+ @io = nil
9
+ end
10
+
11
+ def add(row)
12
+ setup_io if !@io
13
+
14
+ @io.write([row.size].pack("n"))
15
+ row.each {|col|
16
+ encode_field(@io, col)
17
+ }
18
+ end
19
+
20
+ def close
21
+ @closed = true
22
+ @io.write([-1].pack("n"))
23
+ @io.rewind
24
+ end
25
+
26
+ def get_io
27
+ if !@closed
28
+ close
29
+ end
30
+ @io
31
+ end
32
+
33
+ def remove
34
+ if @io.kind_of?(Tempfile)
35
+ @io.close
36
+ @io.unlink
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def setup_io
43
+ if @options[:use_tempfile] == true
44
+ @io = Tempfile.new("copy_binary", :encoding => 'ascii-8bit')
45
+ @io.unlink
46
+ else
47
+ @io = StringIO.new
48
+ end
49
+ @io.write("PGCOPY\n\377\r\n\0")
50
+ @io.write([0,0].pack("NN"))
51
+ end
52
+
53
+ def encode_field(io, field, depth=0)
54
+ case field
55
+ when Integer
56
+ buf = [field].pack("N")
57
+ io.write([buf.bytesize].pack("N"))
58
+ io.write(buf)
59
+ when nil
60
+ io.write([-1].pack("N"))
61
+ when String
62
+ buf = field.encode("UTF-8")
63
+ io.write([buf.bytesize].pack("N"))
64
+ io.write(buf)
65
+ when Hash
66
+ raise Exception.new("Hash's can't contain hashes") if depth > 0
67
+ hash_io = StringIO.new
68
+
69
+ hash_io.write([field.size].pack("N"))
70
+ field.each_pair {|key,val|
71
+ buf = key.to_s.encode("UTF-8")
72
+ hash_io.write([buf.bytesize].pack("N"))
73
+ hash_io.write(buf.to_s)
74
+ encode_field(hash_io, val.nil? ? val : val.to_s, depth + 1)
75
+ }
76
+ io.write([hash_io.pos].pack("N")) # assumed identifier for hstore column
77
+ io.write(hash_io.string)
78
+ else
79
+ raise Exception.new("Unsupported Format: #{field.class.name}")
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,3 @@
1
+ module PgDataEncoder
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "pg_data_encoder/version"
2
+
3
+ require 'pg_data_encoder/encode_for_copy'
4
+
5
+ module PgDataEncoder
6
+ # Your code goes here...
7
+ end
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'pg_data_encoder/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "pg_data_encoder"
8
+ gem.version = PgDataEncoder::VERSION
9
+ gem.authors = ["Pete Brumm"]
10
+ gem.email = ["pete@petebrumm.com"]
11
+ gem.description = %q{Creates a binary data file that can be imported into postgres's copy from command}
12
+ gem.summary = %q{for faster input of data into postgres you can use this to generate the binary import and run COPY FROM}
13
+ gem.homepage = "https://github.com/pbrumm/pg_data_encoder"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ gem.add_development_dependency("rspec", ">= 2.12.0")
20
+ gem.add_development_dependency("rspec-core", ">= 2.12.0")
21
+ end
Binary file
@@ -0,0 +1 @@
1
+ 1 text "a"=>"1", "b"=>"asdf"
Binary file
@@ -0,0 +1,21 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'rspec'
4
+ require 'rspec/autorun'
5
+
6
+ require 'pg_data_encoder'
7
+
8
+ RSpec.configure do |config|
9
+ config.before(:suite) do
10
+
11
+
12
+ end
13
+ end
14
+
15
+ def filedata(filename)
16
+ str = nil
17
+ File.open("spec/fixtures/#{filename}", "r:ASCII-8BIT") {|io|
18
+ str = io.read
19
+ }
20
+ str
21
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "generating data" do
4
+ it 'should encode hstore data correctly' do
5
+ encoder = PgDataEncoder::EncodeForCopy.new
6
+ encoder.add [1, "text", {a: 1, b: "asdf"}]
7
+ encoder.close
8
+ io = encoder.get_io
9
+ existing_data = filedata("3_col_hstore.dat")
10
+ str = io.read
11
+ io.class.name.should == "StringIO"
12
+ str.force_encoding("ASCII-8BIT")
13
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
14
+ str.should == existing_data
15
+ end
16
+
17
+ it 'should encode hstore data correctly from tempfile' do
18
+ encoder = PgDataEncoder::EncodeForCopy.new(:use_tempfile => true)
19
+ encoder.add [1, "text", {a: 1, b: "asdf"}]
20
+ encoder.close
21
+ io = encoder.get_io
22
+ existing_data = filedata("3_col_hstore.dat")
23
+ str = io.read
24
+ io.class.name.should == "Tempfile"
25
+ str.force_encoding("ASCII-8BIT")
26
+ #File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
27
+ str.should == existing_data
28
+ end
29
+
30
+ end
metadata ADDED
@@ -0,0 +1,98 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pg_data_encoder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Pete Brumm
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-12-12 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 2.12.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.12.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec-core
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 2.12.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 2.12.0
46
+ description: Creates a binary data file that can be imported into postgres's copy
47
+ from command
48
+ email:
49
+ - pete@petebrumm.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - lib/pg_data_encoder.rb
60
+ - lib/pg_data_encoder/encode_for_copy.rb
61
+ - lib/pg_data_encoder/version.rb
62
+ - pg_data_encoder.gemspec
63
+ - spec/fixtures/3_col_hstore.dat
64
+ - spec/fixtures/3_col_hstore.txt
65
+ - spec/fixtures/output.dat
66
+ - spec/spec_helper.rb
67
+ - spec/verify_data_formats_spec.rb
68
+ homepage: https://github.com/pbrumm/pg_data_encoder
69
+ licenses: []
70
+ post_install_message:
71
+ rdoc_options: []
72
+ require_paths:
73
+ - lib
74
+ required_ruby_version: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ! '>='
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ requirements: []
87
+ rubyforge_project:
88
+ rubygems_version: 1.8.24
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: for faster input of data into postgres you can use this to generate the binary
92
+ import and run COPY FROM
93
+ test_files:
94
+ - spec/fixtures/3_col_hstore.dat
95
+ - spec/fixtures/3_col_hstore.txt
96
+ - spec/fixtures/output.dat
97
+ - spec/spec_helper.rb
98
+ - spec/verify_data_formats_spec.rb