activerecord-copy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rubocop.yml +40 -0
  4. data/.travis.yml +9 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +49 -0
  8. data/README.md +40 -0
  9. data/Rakefile +1 -0
  10. data/activerecord-copy.gemspec +25 -0
  11. data/lib/activerecord-copy.rb +92 -0
  12. data/lib/activerecord-copy/constants.rb +18 -0
  13. data/lib/activerecord-copy/decoder.rb +176 -0
  14. data/lib/activerecord-copy/encode_for_copy.rb +253 -0
  15. data/lib/activerecord-copy/exception.rb +4 -0
  16. data/lib/activerecord-copy/temp_buffer.rb +38 -0
  17. data/lib/activerecord-copy/version.rb +3 -0
  18. data/spec/big_write_spec.rb +17 -0
  19. data/spec/errors_spec.rb +8 -0
  20. data/spec/fixtures/3_col_array.txt +1 -0
  21. data/spec/fixtures/3_col_hstore.dat +0 -0
  22. data/spec/fixtures/3_col_hstore.txt +1 -0
  23. data/spec/fixtures/3_column_array.dat +0 -0
  24. data/spec/fixtures/array_with_two.dat +0 -0
  25. data/spec/fixtures/array_with_two2.dat +0 -0
  26. data/spec/fixtures/big_str_array.dat +0 -0
  27. data/spec/fixtures/big_str_array2.dat +0 -0
  28. data/spec/fixtures/bigint.dat +0 -0
  29. data/spec/fixtures/date.dat +0 -0
  30. data/spec/fixtures/date2.dat +0 -0
  31. data/spec/fixtures/date2000.dat +0 -0
  32. data/spec/fixtures/dates.dat +0 -0
  33. data/spec/fixtures/dates_p924.dat +0 -0
  34. data/spec/fixtures/dates_pg935.dat +0 -0
  35. data/spec/fixtures/empty_uuid.dat +0 -0
  36. data/spec/fixtures/falseclass.dat +0 -0
  37. data/spec/fixtures/float.dat +0 -0
  38. data/spec/fixtures/hstore_utf8.dat +0 -0
  39. data/spec/fixtures/intarray.dat +0 -0
  40. data/spec/fixtures/json.dat +0 -0
  41. data/spec/fixtures/json_array.dat +0 -0
  42. data/spec/fixtures/just_an_array.dat +0 -0
  43. data/spec/fixtures/just_an_array2.dat +0 -0
  44. data/spec/fixtures/multiline_hstore.dat +0 -0
  45. data/spec/fixtures/output.dat +0 -0
  46. data/spec/fixtures/timestamp.dat +0 -0
  47. data/spec/fixtures/timestamp_9.3.dat +0 -0
  48. data/spec/fixtures/timestamp_big.dat +0 -0
  49. data/spec/fixtures/timestamp_rounding.dat +0 -0
  50. data/spec/fixtures/trueclass.dat +0 -0
  51. data/spec/fixtures/utf8.dat +0 -0
  52. data/spec/fixtures/uuid.dat +0 -0
  53. data/spec/fixtures/uuid_array.dat +0 -0
  54. data/spec/multiline_spec.rb +17 -0
  55. data/spec/spec_helper.rb +22 -0
  56. data/spec/verify_data_formats_spec.rb +415 -0
  57. data/spec/verify_decoder_spec.rb +263 -0
  58. metadata +182 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c58fb95429ac77c914ddd9bdcc6980c3a4c99f87
4
+ data.tar.gz: 51e89df1b40433574b1dc3472ef8e823f15627fd
5
+ SHA512:
6
+ metadata.gz: 078585c52f4266c53000067f0a78a1d7c94b8f73dfc52e9ad9e9fa7bed7c35e499f7ee317fbb6089cfcfe99e802b59549d87910308a3ab89c2e5ffce8326612e
7
+ data.tar.gz: 0d95073114fa52abfd5ef597eab4fd62e6fef155c4a039fd552f02d6b34a08c822e508a78747d49a15554c05bbafe100e3483ceab139a7083d89a3c2e495f4c6
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rubocop.yml ADDED
@@ -0,0 +1,40 @@
1
+ require: rubocop-rspec
2
+
3
+ AllCops:
4
+ TargetRubyVersion: 2.3
5
+
6
+ LineLength:
7
+ Enabled: false
8
+
9
+ FrozenStringLiteralComment:
10
+ Enabled: false
11
+
12
+ ClassLength:
13
+ Enabled: false
14
+
15
+ RescueModifier:
16
+ Enabled: false
17
+
18
+ AccessorMethodName:
19
+ Enabled: false
20
+
21
+ Documentation:
22
+ Enabled: false
23
+
24
+ AbcSize:
25
+ Enabled: false
26
+
27
+ CyclomaticComplexity:
28
+ Enabled: false
29
+
30
+ PerceivedComplexity:
31
+ Enabled: false
32
+
33
+ MethodLength:
34
+ Enabled: false
35
+
36
+ BlockNesting:
37
+ Enabled: false
38
+
39
+ RSpec/DescribeClass:
40
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0-p648
4
+ - 2.1.9
5
+ - 2.2.5
6
+ - 2.3.1
7
+ - 2.4.0-preview1
8
+ - rbx-3.20
9
+ script: bundle exec rspec
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0 2017-07-22
4
+
5
+ * Initial release after fork from pg_data_encoder
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_data_encoder.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,49 @@
1
+ Copyright (c) 2017 Lukas Fittl
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ ---
25
+
26
+ pg_data_encoder work is:
27
+
28
+ Copyright (c) 2012 Pete Brumm
29
+
30
+ MIT License
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining
33
+ a copy of this software and associated documentation files (the
34
+ "Software"), to deal in the Software without restriction, including
35
+ without limitation the rights to use, copy, modify, merge, publish,
36
+ distribute, sublicense, and/or sell copies of the Software, and to
37
+ permit persons to whom the Software is furnished to do so, subject to
38
+ the following conditions:
39
+
40
+ The above copyright notice and this permission notice shall be
41
+ included in all copies or substantial portions of the Software.
42
+
43
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
44
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
45
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
46
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
47
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
48
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
49
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # activerecord-copy [ ![](https://img.shields.io/gem/v/activerecord-copy.svg)](https://rubygems.org/gems/activerecord-copy) [ ![](https://img.shields.io/gem/dt/activerecord-copy.svg)](https://rubygems.org/gems/activerecord-copy)
2
+
3
+ Library to assist using binary COPY into PostgreSQL with activerecord.
4
+
5
+ Binary copy functionality is based on [pg_data_encoder](https://github.com/pbrumm/pg_data_encoder),
6
+ but modified to support additional types, and to prefer column type specifications
7
+ over inferred data types.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'activerecord-copy'
14
+
15
+ ## Usage
16
+
17
+ ```ruby
18
+ class MyModel < ApplicationRecord
19
+ include ActiveRecordCopy
20
+ end
21
+
22
+ MyModel.copy_from_client do
23
+
24
+ end
25
+ ```
26
+
27
+ ## Authors
28
+
29
+ * [Lukas Fittl](https://github.com/lfittl)
30
+
31
+ Credits to [Pete Brumm](https://github.com/pbrumm) who wrote pg_data_encoder and
32
+ which this library repurposes.
33
+
34
+ ## LICENSE
35
+
36
+ Copyright (c) 2017, Lukas Fittl <lukas@fittl><br>
37
+ activerecord-copy is licensed under the MIT license, see LICENSE file for details.
38
+
39
+ pg_data_encoder is Copyright (c) 2012, Pete Brumm<br>
40
+ pg_data_encoder is included under the terms of the MIT license, see LICENSE file for details.
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'activerecord-copy/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'activerecord-copy'
8
+ gem.version = ActiveRecordCopy::VERSION
9
+ gem.authors = ['Lukas Fittl']
10
+ gem.email = ['lukas@fittl.com']
11
+ gem.description = 'Supports binary COPY into PostgreSQL with activerecord'
12
+ gem.summary = 'Convenient methods to load data quickly into Postgres'
13
+ gem.homepage = 'https://github.com/lfittl/activerecord-copy'
14
+
15
+ gem.license = 'MIT'
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency('activerecord', '>= 3.1')
22
+
23
+ gem.add_development_dependency('rspec', '>= 2.12.0')
24
+ gem.add_development_dependency('rspec-core', '>= 2.12.0')
25
+ end
@@ -0,0 +1,92 @@
1
+ require 'activerecord-copy/version'
2
+
3
+ require 'activerecord-copy/constants'
4
+ require 'activerecord-copy/exception'
5
+ require 'activerecord-copy/temp_buffer'
6
+
7
+ require 'activerecord-copy/encode_for_copy'
8
+ require 'activerecord-copy/decoder'
9
+
10
+ require 'json'
11
+
12
+ require 'active_support'
13
+
14
+ module ActiveRecordCopy
15
+ module CopyFromClient
16
+ extend ActiveSupport::Concern
17
+
18
+ class CopyHandler
19
+ def initialize(columns:, model_class:, table_name:)
20
+ @columns = columns
21
+ @model_class = model_class
22
+ @connection = model_class.connection.raw_connection
23
+ @table_name = table_name
24
+ @column_types = columns.map do |c|
25
+ column = model_class.columns_hash[c.to_s]
26
+ raise format('Could not find column %s on %s', c, model_class.table_name) if column.nil?
27
+
28
+ if column.type == :integer
29
+ if column.limit == 8
30
+ :bigint
31
+ elsif column.limit == 2
32
+ :smallint
33
+ else
34
+ :integer
35
+ end
36
+ else
37
+ column.type
38
+ end
39
+ end
40
+
41
+ reset
42
+ end
43
+
44
+ def <<(row)
45
+ @encoder.add row
46
+ end
47
+
48
+ def close
49
+ run_copy
50
+ end
51
+
52
+ private
53
+
54
+ def run_copy
55
+ io = @encoder.get_io
56
+
57
+ @connection.copy_data %{COPY #{@table_name}("#{@columns.join('","')}") FROM STDIN BINARY} do
58
+ begin
59
+ while chunk = io.readpartial(10_240) # rubocop:disable Lint/AssignmentInCondition
60
+ @connection.put_copy_data chunk
61
+ end
62
+ rescue EOFError # rubocop:disable Lint/HandleExceptions
63
+ end
64
+ end
65
+
66
+ @encoder.remove
67
+ reset
68
+
69
+ nil
70
+ end
71
+
72
+ def reset
73
+ @encoder = ActiveRecordCopy::EncodeForCopy.new column_types: @column_types
74
+ @row_count = 0
75
+ end
76
+ end
77
+
78
+ class_methods do
79
+ def copy_from_client(columns, table_name: nil, &_block)
80
+ table_name ||= quoted_table_name
81
+ handler = CopyHandler.new(columns: columns, model_class: self, table_name: table_name)
82
+ yield(handler)
83
+ handler.close
84
+ true
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ if defined?(ActiveRecord::Base)
91
+ ActiveRecord::Base.extend(ActiveRecordCopy::CopyFromClient)
92
+ end
@@ -0,0 +1,18 @@
1
+ module ActiveRecordCopy
2
+ PACKED_UINT_8 = 'C'.freeze # 8-bit unsigned (unsigned char)
3
+ PACKED_UINT_16 = 'n'.freeze # 16-bit unsigned, network (big-endian) byte order
4
+ PACKED_UINT_32 = 'N'.freeze # 32-bit unsigned, network (big-endian) byte order
5
+ PACKED_UINT_64 = 'Q>'.freeze # 64-bit unsigned, big endian
6
+ PACKED_FLOAT_64 = 'G'.freeze # double-precision, network (big-endian) byte order
7
+ PACKED_HEX_STRING = 'H*'.freeze # hex string (high nibble first)
8
+
9
+ INT_TYPE_OID = 23
10
+ TEXT_TYPE_OID = 25
11
+ UUID_TYPE_OID = 2950
12
+ VARCHAR_TYPE_OID = 1043
13
+
14
+ ASCII_8BIT_ENCODING = 'ASCII-8BIT'.freeze
15
+ UTF_8_ENCODING = 'UTF-8'.freeze
16
+
17
+ POSTGRES_EPOCH_TIME = (Time.utc(2000, 1, 1).to_f * 1_000_000).to_i
18
+ end
@@ -0,0 +1,176 @@
1
+ require 'tempfile'
2
+ require 'stringio'
3
+
4
+ module ActiveRecordCopy
5
+ class Decoder
6
+ def initialize(options = {})
7
+ @options = options
8
+ @closed = false
9
+ if options[:column_types].is_a?(Array)
10
+ map = {}
11
+ options[:column_types].each_with_index do |c, i|
12
+ map[i] = c
13
+ end
14
+ options[:column_types] = map
15
+ else
16
+ options[:column_types] ||= {}
17
+ end
18
+ @io = nil
19
+ end
20
+
21
+ def read_line
22
+ return nil if @closed
23
+ setup_io unless @io
24
+ row = []
25
+ bytes = @io.read(2)
26
+ # p bytes
27
+ column_count = bytes.unpack(PACKED_UINT_16).first
28
+ if column_count == 65_535
29
+ @closed = true
30
+ return nil
31
+ end
32
+ # @io.write([row.size].pack(PACKED_UINT_32))
33
+ 0.upto(column_count - 1).each do |index|
34
+ field = decode_field(@io)
35
+ row[index] = if field.nil?
36
+ field
37
+ elsif @options[:column_types][index]
38
+ map_field(field, @options[:column_types][index])
39
+ else
40
+ field
41
+ end
42
+ end
43
+ row
44
+ end
45
+
46
+ def each
47
+ loop do
48
+ result = read_line
49
+ break unless result
50
+ yield result
51
+ break if @closed
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def setup_io
58
+ if @options[:file]
59
+ @io = File.open(@options[:file], 'r:' + ASCII_8BIT_ENCODING)
60
+ elsif !@options[:io].nil?
61
+ @io = @options[:io]
62
+ else
63
+ raise 'NO io present'
64
+ end
65
+ header = "PGCOPY\n\377\r\n\0".force_encoding(ASCII_8BIT_ENCODING)
66
+ result = @io.read(header.bytesize)
67
+ raise 'invalid format' if result != header
68
+ # p @io.read(10)
69
+
70
+ @io.read(2) # blank
71
+ @io.read(6) # blank
72
+ end
73
+
74
+ def decode_field(io)
75
+ bytes = io.read(4)
76
+
77
+ if bytes == "\xFF\xFF\xFF\xFF".force_encoding(ASCII_8BIT_ENCODING)
78
+ return nil
79
+ else
80
+ io.read(bytes.unpack(PACKED_UINT_32).first)
81
+ end
82
+ end
83
+
84
+ def map_field(data, type)
85
+ # p [type, data]
86
+
87
+ case type
88
+ when :int, :integer
89
+ data.unpack(PACKED_UINT_32).first
90
+ when :bytea
91
+ data
92
+ when :bigint
93
+ data.unpack(PACKED_UINT_64).first
94
+ when :float, :double
95
+ data.unpack(PACKED_FLOAT_64).first
96
+ when :boolean
97
+ v = data.unpack(PACKED_UINT_8).first
98
+ v == 1
99
+ when :string, :text, :character
100
+ data.force_encoding(UTF_8_ENCODING)
101
+ when :json
102
+ JSON.load(data)
103
+ when :uuid
104
+ r = data.unpack('H*').first
105
+ "#{r[0..7]}-#{r[8..11]}-#{r[12..15]}-#{r[16..19]}-#{r[20..-1]}"
106
+ when :uuid_raw
107
+ r = data.unpack('H*').first
108
+ when :array, :"integer[]", :"uuid[]", :"character[]"
109
+ io = StringIO.new(data)
110
+ io.read(4) # unknown
111
+ io.read(4) # unknown
112
+ atype_raw = io.read(4)
113
+ return [] if atype_raw.nil?
114
+ atype = atype_raw.unpack(PACKED_UINT_32).first # string type?
115
+ return [] if io.pos == io.size
116
+ size = io.read(4).unpack(PACKED_UINT_32).first
117
+ io.read(4) # should be 1 for dimension
118
+ # p [atype, size]
119
+ # p data
120
+ case atype
121
+ when UUID_TYPE_OID
122
+ 0.upto(size - 1).map do
123
+ io.read(4) # size
124
+ r = io.read(16).unpack(PACKED_HEX_STRING).first
125
+ "#{r[0..7]}-#{r[8..11]}-#{r[12..15]}-#{r[16..19]}-#{r[20..-1]}"
126
+ end
127
+ when TEXT_TYPE_OID, VARCHAR_TYPE_OID
128
+ 0.upto(size - 1).map do
129
+ size = io.read(4).unpack(PACKED_UINT_32).first
130
+ io.read(size)
131
+ end
132
+ when INT_TYPE_OID
133
+ 0.upto(size - 1).map do
134
+ size = io.read(4).unpack(PACKED_UINT_32).first
135
+ bytes = io.read(size)
136
+ bytes.unpack(PACKED_UINT_32).first
137
+ end
138
+ else
139
+ raise "Unsupported Array type #{atype}"
140
+ end
141
+ when :hstore, :hash
142
+ io = StringIO.new(data)
143
+ fields = io.read(4).unpack(PACKED_UINT_32).first
144
+ h = {}
145
+
146
+ 0.upto(fields - 1).each do
147
+ key_size = io.read(4).unpack(PACKED_UINT_32).first
148
+ key = io.read(key_size).force_encoding("UTF-8")
149
+ value_size = io.read(4).unpack(PACKED_UINT_32).first
150
+ if value_size == 4294967295 # nil "\xFF\xFF\xFF\xFF"
151
+ value = nil
152
+ else
153
+ value = io.read(value_size)
154
+ value = value.force_encoding("UTF-8") if !value.nil?
155
+ end
156
+ h[key] = value
157
+ end
158
+ raise "remaining hstore bytes!" if io.pos != io.size
159
+ h
160
+ when :time, :timestamp
161
+ d = data.unpack("L!>").first
162
+ Time.at((d + POSTGRES_EPOCH_TIME) / 1_000_000.0).utc
163
+ when :date
164
+ # couldn't find another way to get signed network byte order
165
+ m = 0b0111_1111_1111_1111_1111_1111_1111_1111
166
+ d = data.unpack(PACKED_UINT_32).first
167
+ d = (d & m) - m - 1 if data.bytes[0] & 0b1000_0000 > 0 # negative number
168
+
169
+ # p [data, d, Date.jd(d + Date.new(2000,1,1).jd)]
170
+ Date.jd(d + Date.new(2000, 1, 1).jd)
171
+ else
172
+ raise "Unsupported format #{type}"
173
+ end
174
+ end
175
+ end
176
+ end