activerecord-copy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rubocop.yml +40 -0
  4. data/.travis.yml +9 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +49 -0
  8. data/README.md +40 -0
  9. data/Rakefile +1 -0
  10. data/activerecord-copy.gemspec +25 -0
  11. data/lib/activerecord-copy.rb +92 -0
  12. data/lib/activerecord-copy/constants.rb +18 -0
  13. data/lib/activerecord-copy/decoder.rb +176 -0
  14. data/lib/activerecord-copy/encode_for_copy.rb +253 -0
  15. data/lib/activerecord-copy/exception.rb +4 -0
  16. data/lib/activerecord-copy/temp_buffer.rb +38 -0
  17. data/lib/activerecord-copy/version.rb +3 -0
  18. data/spec/big_write_spec.rb +17 -0
  19. data/spec/errors_spec.rb +8 -0
  20. data/spec/fixtures/3_col_array.txt +1 -0
  21. data/spec/fixtures/3_col_hstore.dat +0 -0
  22. data/spec/fixtures/3_col_hstore.txt +1 -0
  23. data/spec/fixtures/3_column_array.dat +0 -0
  24. data/spec/fixtures/array_with_two.dat +0 -0
  25. data/spec/fixtures/array_with_two2.dat +0 -0
  26. data/spec/fixtures/big_str_array.dat +0 -0
  27. data/spec/fixtures/big_str_array2.dat +0 -0
  28. data/spec/fixtures/bigint.dat +0 -0
  29. data/spec/fixtures/date.dat +0 -0
  30. data/spec/fixtures/date2.dat +0 -0
  31. data/spec/fixtures/date2000.dat +0 -0
  32. data/spec/fixtures/dates.dat +0 -0
  33. data/spec/fixtures/dates_p924.dat +0 -0
  34. data/spec/fixtures/dates_pg935.dat +0 -0
  35. data/spec/fixtures/empty_uuid.dat +0 -0
  36. data/spec/fixtures/falseclass.dat +0 -0
  37. data/spec/fixtures/float.dat +0 -0
  38. data/spec/fixtures/hstore_utf8.dat +0 -0
  39. data/spec/fixtures/intarray.dat +0 -0
  40. data/spec/fixtures/json.dat +0 -0
  41. data/spec/fixtures/json_array.dat +0 -0
  42. data/spec/fixtures/just_an_array.dat +0 -0
  43. data/spec/fixtures/just_an_array2.dat +0 -0
  44. data/spec/fixtures/multiline_hstore.dat +0 -0
  45. data/spec/fixtures/output.dat +0 -0
  46. data/spec/fixtures/timestamp.dat +0 -0
  47. data/spec/fixtures/timestamp_9.3.dat +0 -0
  48. data/spec/fixtures/timestamp_big.dat +0 -0
  49. data/spec/fixtures/timestamp_rounding.dat +0 -0
  50. data/spec/fixtures/trueclass.dat +0 -0
  51. data/spec/fixtures/utf8.dat +0 -0
  52. data/spec/fixtures/uuid.dat +0 -0
  53. data/spec/fixtures/uuid_array.dat +0 -0
  54. data/spec/multiline_spec.rb +17 -0
  55. data/spec/spec_helper.rb +22 -0
  56. data/spec/verify_data_formats_spec.rb +415 -0
  57. data/spec/verify_decoder_spec.rb +263 -0
  58. metadata +182 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c58fb95429ac77c914ddd9bdcc6980c3a4c99f87
4
+ data.tar.gz: 51e89df1b40433574b1dc3472ef8e823f15627fd
5
+ SHA512:
6
+ metadata.gz: 078585c52f4266c53000067f0a78a1d7c94b8f73dfc52e9ad9e9fa7bed7c35e499f7ee317fbb6089cfcfe99e802b59549d87910308a3ab89c2e5ffce8326612e
7
+ data.tar.gz: 0d95073114fa52abfd5ef597eab4fd62e6fef155c4a039fd552f02d6b34a08c822e508a78747d49a15554c05bbafe100e3483ceab139a7083d89a3c2e495f4c6
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rubocop.yml ADDED
@@ -0,0 +1,40 @@
1
+ require: rubocop-rspec
2
+
3
+ AllCops:
4
+ TargetRubyVersion: 2.3
5
+
6
+ LineLength:
7
+ Enabled: false
8
+
9
+ FrozenStringLiteralComment:
10
+ Enabled: false
11
+
12
+ ClassLength:
13
+ Enabled: false
14
+
15
+ RescueModifier:
16
+ Enabled: false
17
+
18
+ AccessorMethodName:
19
+ Enabled: false
20
+
21
+ Documentation:
22
+ Enabled: false
23
+
24
+ AbcSize:
25
+ Enabled: false
26
+
27
+ CyclomaticComplexity:
28
+ Enabled: false
29
+
30
+ PerceivedComplexity:
31
+ Enabled: false
32
+
33
+ MethodLength:
34
+ Enabled: false
35
+
36
+ BlockNesting:
37
+ Enabled: false
38
+
39
+ RSpec/DescribeClass:
40
+ Enabled: false
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0-p648
4
+ - 2.1.9
5
+ - 2.2.5
6
+ - 2.3.1
7
+ - 2.4.0-preview1
8
+ - rbx-3.20
9
+ script: bundle exec rspec
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ # Changelog
2
+
3
+ ## 1.0.0 2017-07-22
4
+
5
+ * Initial release after fork from pg_data_encoder
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pg_data_encoder.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,49 @@
1
+ Copyright (c) 2017 Lukas Fittl
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ ---
25
+
26
+ pg_data_encoder work is:
27
+
28
+ Copyright (c) 2012 Pete Brumm
29
+
30
+ MIT License
31
+
32
+ Permission is hereby granted, free of charge, to any person obtaining
33
+ a copy of this software and associated documentation files (the
34
+ "Software"), to deal in the Software without restriction, including
35
+ without limitation the rights to use, copy, modify, merge, publish,
36
+ distribute, sublicense, and/or sell copies of the Software, and to
37
+ permit persons to whom the Software is furnished to do so, subject to
38
+ the following conditions:
39
+
40
+ The above copyright notice and this permission notice shall be
41
+ included in all copies or substantial portions of the Software.
42
+
43
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
44
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
45
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
46
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
47
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
48
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
49
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # activerecord-copy [ ![](https://img.shields.io/gem/v/activerecord-copy.svg)](https://rubygems.org/gems/activerecord-copy) [ ![](https://img.shields.io/gem/dt/activerecord-copy.svg)](https://rubygems.org/gems/activerecord-copy)
2
+
3
+ Library to assist using binary COPY into PostgreSQL with activerecord.
4
+
5
+ Binary copy functionality is based on [pg_data_encoder](https://github.com/pbrumm/pg_data_encoder),
6
+ but modified to support additional types, and to prefer column type specifications
7
+ over inferred data types.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'activerecord-copy'
14
+
15
+ ## Usage
16
+
17
+ ```ruby
18
+ class MyModel < ApplicationRecord
19
+ include ActiveRecordCopy
20
+ end
21
+
22
+ MyModel.copy_from_client do
23
+
24
+ end
25
+ ```
26
+
27
+ ## Authors
28
+
29
+ * [Lukas Fittl](https://github.com/lfittl)
30
+
31
+ Credits to [Pete Brumm](https://github.com/pbrumm) who wrote pg_data_encoder and
32
+ which this library repurposes.
33
+
34
+ ## LICENSE
35
+
36
+ Copyright (c) 2017, Lukas Fittl <lukas@fittl><br>
37
+ activerecord-copy is licensed under the MIT license, see LICENSE file for details.
38
+
39
+ pg_data_encoder is Copyright (c) 2012, Pete Brumm<br>
40
+ pg_data_encoder is included under the terms of the MIT license, see LICENSE file for details.
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'activerecord-copy/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = 'activerecord-copy'
8
+ gem.version = ActiveRecordCopy::VERSION
9
+ gem.authors = ['Lukas Fittl']
10
+ gem.email = ['lukas@fittl.com']
11
+ gem.description = 'Supports binary COPY into PostgreSQL with activerecord'
12
+ gem.summary = 'Convenient methods to load data quickly into Postgres'
13
+ gem.homepage = 'https://github.com/lfittl/activerecord-copy'
14
+
15
+ gem.license = 'MIT'
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ['lib']
20
+
21
+ gem.add_dependency('activerecord', '>= 3.1')
22
+
23
+ gem.add_development_dependency('rspec', '>= 2.12.0')
24
+ gem.add_development_dependency('rspec-core', '>= 2.12.0')
25
+ end
@@ -0,0 +1,92 @@
1
+ require 'activerecord-copy/version'
2
+
3
+ require 'activerecord-copy/constants'
4
+ require 'activerecord-copy/exception'
5
+ require 'activerecord-copy/temp_buffer'
6
+
7
+ require 'activerecord-copy/encode_for_copy'
8
+ require 'activerecord-copy/decoder'
9
+
10
+ require 'json'
11
+
12
+ require 'active_support'
13
+
14
+ module ActiveRecordCopy
15
+ module CopyFromClient
16
+ extend ActiveSupport::Concern
17
+
18
+ class CopyHandler
19
+ def initialize(columns:, model_class:, table_name:)
20
+ @columns = columns
21
+ @model_class = model_class
22
+ @connection = model_class.connection.raw_connection
23
+ @table_name = table_name
24
+ @column_types = columns.map do |c|
25
+ column = model_class.columns_hash[c.to_s]
26
+ raise format('Could not find column %s on %s', c, model_class.table_name) if column.nil?
27
+
28
+ if column.type == :integer
29
+ if column.limit == 8
30
+ :bigint
31
+ elsif column.limit == 2
32
+ :smallint
33
+ else
34
+ :integer
35
+ end
36
+ else
37
+ column.type
38
+ end
39
+ end
40
+
41
+ reset
42
+ end
43
+
44
+ def <<(row)
45
+ @encoder.add row
46
+ end
47
+
48
+ def close
49
+ run_copy
50
+ end
51
+
52
+ private
53
+
54
+ def run_copy
55
+ io = @encoder.get_io
56
+
57
+ @connection.copy_data %{COPY #{@table_name}("#{@columns.join('","')}") FROM STDIN BINARY} do
58
+ begin
59
+ while chunk = io.readpartial(10_240) # rubocop:disable Lint/AssignmentInCondition
60
+ @connection.put_copy_data chunk
61
+ end
62
+ rescue EOFError # rubocop:disable Lint/HandleExceptions
63
+ end
64
+ end
65
+
66
+ @encoder.remove
67
+ reset
68
+
69
+ nil
70
+ end
71
+
72
+ def reset
73
+ @encoder = ActiveRecordCopy::EncodeForCopy.new column_types: @column_types
74
+ @row_count = 0
75
+ end
76
+ end
77
+
78
+ class_methods do
79
+ def copy_from_client(columns, table_name: nil, &_block)
80
+ table_name ||= quoted_table_name
81
+ handler = CopyHandler.new(columns: columns, model_class: self, table_name: table_name)
82
+ yield(handler)
83
+ handler.close
84
+ true
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ if defined?(ActiveRecord::Base)
91
+ ActiveRecord::Base.extend(ActiveRecordCopy::CopyFromClient)
92
+ end
@@ -0,0 +1,18 @@
1
+ module ActiveRecordCopy
2
+ PACKED_UINT_8 = 'C'.freeze # 8-bit unsigned (unsigned char)
3
+ PACKED_UINT_16 = 'n'.freeze # 16-bit unsigned, network (big-endian) byte order
4
+ PACKED_UINT_32 = 'N'.freeze # 32-bit unsigned, network (big-endian) byte order
5
+ PACKED_UINT_64 = 'Q>'.freeze # 64-bit unsigned, big endian
6
+ PACKED_FLOAT_64 = 'G'.freeze # double-precision, network (big-endian) byte order
7
+ PACKED_HEX_STRING = 'H*'.freeze # hex string (high nibble first)
8
+
9
+ INT_TYPE_OID = 23
10
+ TEXT_TYPE_OID = 25
11
+ UUID_TYPE_OID = 2950
12
+ VARCHAR_TYPE_OID = 1043
13
+
14
+ ASCII_8BIT_ENCODING = 'ASCII-8BIT'.freeze
15
+ UTF_8_ENCODING = 'UTF-8'.freeze
16
+
17
+ POSTGRES_EPOCH_TIME = (Time.utc(2000, 1, 1).to_f * 1_000_000).to_i
18
+ end
@@ -0,0 +1,176 @@
1
+ require 'tempfile'
2
+ require 'stringio'
3
+
4
+ module ActiveRecordCopy
5
+ class Decoder
6
+ def initialize(options = {})
7
+ @options = options
8
+ @closed = false
9
+ if options[:column_types].is_a?(Array)
10
+ map = {}
11
+ options[:column_types].each_with_index do |c, i|
12
+ map[i] = c
13
+ end
14
+ options[:column_types] = map
15
+ else
16
+ options[:column_types] ||= {}
17
+ end
18
+ @io = nil
19
+ end
20
+
21
+ def read_line
22
+ return nil if @closed
23
+ setup_io unless @io
24
+ row = []
25
+ bytes = @io.read(2)
26
+ # p bytes
27
+ column_count = bytes.unpack(PACKED_UINT_16).first
28
+ if column_count == 65_535
29
+ @closed = true
30
+ return nil
31
+ end
32
+ # @io.write([row.size].pack(PACKED_UINT_32))
33
+ 0.upto(column_count - 1).each do |index|
34
+ field = decode_field(@io)
35
+ row[index] = if field.nil?
36
+ field
37
+ elsif @options[:column_types][index]
38
+ map_field(field, @options[:column_types][index])
39
+ else
40
+ field
41
+ end
42
+ end
43
+ row
44
+ end
45
+
46
+ def each
47
+ loop do
48
+ result = read_line
49
+ break unless result
50
+ yield result
51
+ break if @closed
52
+ end
53
+ end
54
+
55
+ private
56
+
57
+ def setup_io
58
+ if @options[:file]
59
+ @io = File.open(@options[:file], 'r:' + ASCII_8BIT_ENCODING)
60
+ elsif !@options[:io].nil?
61
+ @io = @options[:io]
62
+ else
63
+ raise 'NO io present'
64
+ end
65
+ header = "PGCOPY\n\377\r\n\0".force_encoding(ASCII_8BIT_ENCODING)
66
+ result = @io.read(header.bytesize)
67
+ raise 'invalid format' if result != header
68
+ # p @io.read(10)
69
+
70
+ @io.read(2) # blank
71
+ @io.read(6) # blank
72
+ end
73
+
74
+ def decode_field(io)
75
+ bytes = io.read(4)
76
+
77
+ if bytes == "\xFF\xFF\xFF\xFF".force_encoding(ASCII_8BIT_ENCODING)
78
+ return nil
79
+ else
80
+ io.read(bytes.unpack(PACKED_UINT_32).first)
81
+ end
82
+ end
83
+
84
+ def map_field(data, type)
85
+ # p [type, data]
86
+
87
+ case type
88
+ when :int, :integer
89
+ data.unpack(PACKED_UINT_32).first
90
+ when :bytea
91
+ data
92
+ when :bigint
93
+ data.unpack(PACKED_UINT_64).first
94
+ when :float, :double
95
+ data.unpack(PACKED_FLOAT_64).first
96
+ when :boolean
97
+ v = data.unpack(PACKED_UINT_8).first
98
+ v == 1
99
+ when :string, :text, :character
100
+ data.force_encoding(UTF_8_ENCODING)
101
+ when :json
102
+ JSON.load(data)
103
+ when :uuid
104
+ r = data.unpack('H*').first
105
+ "#{r[0..7]}-#{r[8..11]}-#{r[12..15]}-#{r[16..19]}-#{r[20..-1]}"
106
+ when :uuid_raw
107
+ r = data.unpack('H*').first
108
+ when :array, :"integer[]", :"uuid[]", :"character[]"
109
+ io = StringIO.new(data)
110
+ io.read(4) # unknown
111
+ io.read(4) # unknown
112
+ atype_raw = io.read(4)
113
+ return [] if atype_raw.nil?
114
+ atype = atype_raw.unpack(PACKED_UINT_32).first # string type?
115
+ return [] if io.pos == io.size
116
+ size = io.read(4).unpack(PACKED_UINT_32).first
117
+ io.read(4) # should be 1 for dimension
118
+ # p [atype, size]
119
+ # p data
120
+ case atype
121
+ when UUID_TYPE_OID
122
+ 0.upto(size - 1).map do
123
+ io.read(4) # size
124
+ r = io.read(16).unpack(PACKED_HEX_STRING).first
125
+ "#{r[0..7]}-#{r[8..11]}-#{r[12..15]}-#{r[16..19]}-#{r[20..-1]}"
126
+ end
127
+ when TEXT_TYPE_OID, VARCHAR_TYPE_OID
128
+ 0.upto(size - 1).map do
129
+ size = io.read(4).unpack(PACKED_UINT_32).first
130
+ io.read(size)
131
+ end
132
+ when INT_TYPE_OID
133
+ 0.upto(size - 1).map do
134
+ size = io.read(4).unpack(PACKED_UINT_32).first
135
+ bytes = io.read(size)
136
+ bytes.unpack(PACKED_UINT_32).first
137
+ end
138
+ else
139
+ raise "Unsupported Array type #{atype}"
140
+ end
141
+ when :hstore, :hash
142
+ io = StringIO.new(data)
143
+ fields = io.read(4).unpack(PACKED_UINT_32).first
144
+ h = {}
145
+
146
+ 0.upto(fields - 1).each do
147
+ key_size = io.read(4).unpack(PACKED_UINT_32).first
148
+ key = io.read(key_size).force_encoding("UTF-8")
149
+ value_size = io.read(4).unpack(PACKED_UINT_32).first
150
+ if value_size == 4294967295 # nil "\xFF\xFF\xFF\xFF"
151
+ value = nil
152
+ else
153
+ value = io.read(value_size)
154
+ value = value.force_encoding("UTF-8") if !value.nil?
155
+ end
156
+ h[key] = value
157
+ end
158
+ raise "remaining hstore bytes!" if io.pos != io.size
159
+ h
160
+ when :time, :timestamp
161
+ d = data.unpack("L!>").first
162
+ Time.at((d + POSTGRES_EPOCH_TIME) / 1_000_000.0).utc
163
+ when :date
164
+ # couldn't find another way to get signed network byte order
165
+ m = 0b0111_1111_1111_1111_1111_1111_1111_1111
166
+ d = data.unpack(PACKED_UINT_32).first
167
+ d = (d & m) - m - 1 if data.bytes[0] & 0b1000_0000 > 0 # negative number
168
+
169
+ # p [data, d, Date.jd(d + Date.new(2000,1,1).jd)]
170
+ Date.jd(d + Date.new(2000, 1, 1).jd)
171
+ else
172
+ raise "Unsupported format #{type}"
173
+ end
174
+ end
175
+ end
176
+ end