activerecord-copy 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/.rubocop.yml +40 -0
  4. data/.travis.yml +9 -0
  5. data/CHANGELOG.md +5 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +49 -0
  8. data/README.md +40 -0
  9. data/Rakefile +1 -0
  10. data/activerecord-copy.gemspec +25 -0
  11. data/lib/activerecord-copy.rb +92 -0
  12. data/lib/activerecord-copy/constants.rb +18 -0
  13. data/lib/activerecord-copy/decoder.rb +176 -0
  14. data/lib/activerecord-copy/encode_for_copy.rb +253 -0
  15. data/lib/activerecord-copy/exception.rb +4 -0
  16. data/lib/activerecord-copy/temp_buffer.rb +38 -0
  17. data/lib/activerecord-copy/version.rb +3 -0
  18. data/spec/big_write_spec.rb +17 -0
  19. data/spec/errors_spec.rb +8 -0
  20. data/spec/fixtures/3_col_array.txt +1 -0
  21. data/spec/fixtures/3_col_hstore.dat +0 -0
  22. data/spec/fixtures/3_col_hstore.txt +1 -0
  23. data/spec/fixtures/3_column_array.dat +0 -0
  24. data/spec/fixtures/array_with_two.dat +0 -0
  25. data/spec/fixtures/array_with_two2.dat +0 -0
  26. data/spec/fixtures/big_str_array.dat +0 -0
  27. data/spec/fixtures/big_str_array2.dat +0 -0
  28. data/spec/fixtures/bigint.dat +0 -0
  29. data/spec/fixtures/date.dat +0 -0
  30. data/spec/fixtures/date2.dat +0 -0
  31. data/spec/fixtures/date2000.dat +0 -0
  32. data/spec/fixtures/dates.dat +0 -0
  33. data/spec/fixtures/dates_p924.dat +0 -0
  34. data/spec/fixtures/dates_pg935.dat +0 -0
  35. data/spec/fixtures/empty_uuid.dat +0 -0
  36. data/spec/fixtures/falseclass.dat +0 -0
  37. data/spec/fixtures/float.dat +0 -0
  38. data/spec/fixtures/hstore_utf8.dat +0 -0
  39. data/spec/fixtures/intarray.dat +0 -0
  40. data/spec/fixtures/json.dat +0 -0
  41. data/spec/fixtures/json_array.dat +0 -0
  42. data/spec/fixtures/just_an_array.dat +0 -0
  43. data/spec/fixtures/just_an_array2.dat +0 -0
  44. data/spec/fixtures/multiline_hstore.dat +0 -0
  45. data/spec/fixtures/output.dat +0 -0
  46. data/spec/fixtures/timestamp.dat +0 -0
  47. data/spec/fixtures/timestamp_9.3.dat +0 -0
  48. data/spec/fixtures/timestamp_big.dat +0 -0
  49. data/spec/fixtures/timestamp_rounding.dat +0 -0
  50. data/spec/fixtures/trueclass.dat +0 -0
  51. data/spec/fixtures/utf8.dat +0 -0
  52. data/spec/fixtures/uuid.dat +0 -0
  53. data/spec/fixtures/uuid_array.dat +0 -0
  54. data/spec/multiline_spec.rb +17 -0
  55. data/spec/spec_helper.rb +22 -0
  56. data/spec/verify_data_formats_spec.rb +415 -0
  57. data/spec/verify_decoder_spec.rb +263 -0
  58. metadata +182 -0
@@ -0,0 +1,253 @@
1
+ require 'tempfile'
2
+ require 'stringio'
3
+
4
+ module ActiveRecordCopy
5
+ class EncodeForCopy
6
+ def initialize(options = {})
7
+ @options = options
8
+ @closed = false
9
+ @column_types = @options[:column_types] || {}
10
+ @io = nil
11
+ @buffer = TempBuffer.new
12
+ end
13
+
14
+ def add(row)
15
+ setup_io unless @io
16
+ @io.write([row.size].pack(PACKED_UINT_16))
17
+ row.each_with_index do |col, index|
18
+ encode_field(@buffer, col, index)
19
+ next if @buffer.empty?
20
+ @io.write(@buffer.read)
21
+ @buffer.reopen
22
+ end
23
+ end
24
+
25
+ def close
26
+ @closed = true
27
+ unless @buffer.empty?
28
+ @io.write(@buffer.read)
29
+ @buffer.reopen
30
+ end
31
+ @io.write([-1].pack(PACKED_UINT_16)) rescue raise Exception, 'No rows have been added to the encoder!'
32
+ @io.rewind
33
+ end
34
+
35
+ def get_io
36
+ close unless @closed
37
+ @io
38
+ end
39
+
40
+ def remove
41
+ return unless @io.is_a?(Tempfile)
42
+
43
+ @io.close
44
+ @io.unlink
45
+ end
46
+
47
+ private
48
+
49
+ def setup_io
50
+ if @options[:use_tempfile] == true
51
+ @io = Tempfile.new('copy_binary', encoding: 'ascii-8bit')
52
+ @io.unlink unless @options[:skip_unlink] == true
53
+ else
54
+ @io = StringIO.new
55
+ end
56
+ @io.write("PGCOPY\n\377\r\n\0")
57
+ @io.write([0, 0].pack(PACKED_UINT_32 + PACKED_UINT_32))
58
+ end
59
+
60
+ def write_field(io, buf)
61
+ io.write([buf.bytesize].pack(PACKED_UINT_32))
62
+ io.write(buf)
63
+ end
64
+
65
+ def encode_field(io, field, index, depth = 0)
66
+ # Nil is an exception in that any kind of field type can have a nil value transmitted
67
+ if field.nil?
68
+ io.write([-1].pack(PACKED_UINT_32))
69
+ return
70
+ end
71
+
72
+ if field.is_a?(Array) && ![:json, :jsonb].include?(@column_types[index])
73
+ encode_array(io, field, index)
74
+ return
75
+ end
76
+
77
+ case @column_types[index]
78
+ when :bigint
79
+ buf = [field.to_i].pack(PACKED_UINT_64)
80
+ write_field(io, buf)
81
+ when :integer
82
+ buf = [field.to_i].pack(PACKED_UINT_32)
83
+ write_field(io, buf)
84
+ when :smallint
85
+ buf = [field.to_i].pack(PACKED_UINT_16)
86
+ write_field(io, buf)
87
+ when :numeric
88
+ encode_numeric(io, field)
89
+ when :float
90
+ buf = [field].pack(PACKED_FLOAT_64)
91
+ write_field(io, buf)
92
+ when :uuid
93
+ buf = [field.delete('-')].pack(PACKED_HEX_STRING)
94
+ write_field(io, buf)
95
+ when :inet
96
+ encode_ip_addr(io, IPAddr.new(field))
97
+ when :binary
98
+ write_field(io, field)
99
+ when :json
100
+ buf = field.to_json.encode(UTF_8_ENCODING)
101
+ write_field(io, buf)
102
+ when :jsonb
103
+ encode_jsonb(io, field)
104
+ else
105
+ encode_based_on_input(io, field, index, depth)
106
+ end
107
+ end
108
+
109
+ def encode_based_on_input(io, field, index, depth)
110
+ case field
111
+ when Integer
112
+ buf = [field].pack(PACKED_UINT_32)
113
+ write_field(io, buf)
114
+ when Float
115
+ buf = [field].pack(PACKED_FLOAT_64)
116
+ write_field(io, buf)
117
+ when true
118
+ buf = [1].pack(PACKED_UINT_8)
119
+ write_field(io, buf)
120
+ when false
121
+ buf = [0].pack(PACKED_UINT_8)
122
+ write_field(io, buf)
123
+ when String
124
+ buf = field.encode(UTF_8_ENCODING)
125
+ write_field(io, buf)
126
+ when Hash
127
+ raise Exception, "Hash's can't contain hashes" if depth > 0
128
+ hash_io = TempBuffer.new
129
+ hash_io.write([field.size].pack(PACKED_UINT_32))
130
+ field.each_pair do |key, val|
131
+ buf = key.to_s.encode(UTF_8_ENCODING)
132
+ write_field(hash_io, buf)
133
+ encode_field(hash_io, val.nil? ? val : val.to_s, index, depth + 1)
134
+ end
135
+ io.write([hash_io.pos].pack(PACKED_UINT_32)) # size of hstore data
136
+ io.write(hash_io.string)
137
+ when Time
138
+ buf = [(field.tv_sec * 1_000_000 + field.tv_usec - POSTGRES_EPOCH_TIME).to_i].pack(PACKED_UINT_64)
139
+ write_field(io, buf)
140
+ when Date
141
+ buf = [(field - Date.new(2000, 1, 1)).to_i].pack(PACKED_UINT_32)
142
+ write_field(io, buf)
143
+ when IPAddr
144
+ encode_ip_addr(io, field)
145
+ else
146
+ raise Exception, "Unsupported Format: #{field.class.name}"
147
+ end
148
+ end
149
+
150
+ def encode_array(io, field, index)
151
+ array_io = TempBuffer.new
152
+ field.compact!
153
+ completed = false
154
+ case field[0]
155
+ when String
156
+ if @column_types[index] == :uuid
157
+ array_io.write([1].pack(PACKED_UINT_32)) # unknown
158
+ array_io.write([0].pack(PACKED_UINT_32)) # unknown
159
+
160
+ array_io.write([UUID_TYPE_OID].pack(PACKED_UINT_32))
161
+ array_io.write([field.size].pack(PACKED_UINT_32))
162
+ array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
163
+
164
+ field.each do |val|
165
+ buf = [val.delete('-')].pack(PACKED_HEX_STRING)
166
+ write_field(array_io, buf)
167
+ end
168
+ else
169
+ array_io.write([1].pack(PACKED_UINT_32)) # unknown
170
+ array_io.write([0].pack(PACKED_UINT_32)) # unknown
171
+
172
+ array_io.write([VARCHAR_TYPE_OID].pack(PACKED_UINT_32))
173
+ array_io.write([field.size].pack(PACKED_UINT_32))
174
+ array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
175
+
176
+ field.each do |val|
177
+ buf = val.to_s.encode(UTF_8_ENCODING)
178
+ write_field(array_io, buf)
179
+ end
180
+ end
181
+ when Integer
182
+ array_io.write([1].pack(PACKED_UINT_32)) # unknown
183
+ array_io.write([0].pack(PACKED_UINT_32)) # unknown
184
+
185
+ array_io.write([INT_TYPE_OID].pack(PACKED_UINT_32))
186
+ array_io.write([field.size].pack(PACKED_UINT_32))
187
+ array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
188
+
189
+ field.each do |val|
190
+ buf = [val.to_i].pack(PACKED_UINT_32)
191
+ write_field(array_io, buf)
192
+ end
193
+ when nil
194
+ io.write([-1].pack(PACKED_UINT_32))
195
+ completed = true
196
+ else
197
+ raise Exception, 'Arrays support int or string only'
198
+ end
199
+
200
+ unless completed
201
+ io.write([array_io.pos].pack(PACKED_UINT_32))
202
+ io.write(array_io.string)
203
+ end
204
+ end
205
+
206
+ def encode_ip_addr(io, ip_addr)
207
+ if ip_addr.ipv6?
208
+ io.write([4 + 16].pack(PACKED_UINT_32)) # Field data size
209
+ io.write([3].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET6)
210
+ io.write([128].pack(PACKED_UINT_8)) # Bits
211
+ io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
212
+ io.write([16].pack(PACKED_UINT_8)) # Address length in bytes
213
+ else
214
+ io.write([4 + 4].pack(PACKED_UINT_32)) # Field data size
215
+ io.write([2].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET)
216
+ io.write([32].pack(PACKED_UINT_8)) # Bits
217
+ io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
218
+ io.write([4].pack(PACKED_UINT_8)) # Address length in bytes
219
+ end
220
+ io.write(ip_addr.hton)
221
+ end
222
+
223
+ def encode_jsonb(io, field)
224
+ buf = field.to_json.encode(UTF_8_ENCODING)
225
+ io.write([1 + buf.bytesize].pack(PACKED_UINT_32))
226
+ io.write([1].pack(PACKED_UINT_8)) # JSONB format version 1
227
+ io.write(buf)
228
+ end
229
+
230
+ NUMERIC_DEC_DIGITS = 4 # NBASE=10000
231
+ def encode_numeric(io, field)
232
+ float_str = field.to_s
233
+ digits_base10 = float_str.scan(/\d/).map(&:to_i)
234
+ weight_base10 = float_str.index('.')
235
+ sign = field < 0.0 ? 0x4000 : 0
236
+ dscale = digits_base10.size - weight_base10
237
+
238
+ digits_before_decpoint = digits_base10[0..weight_base10].reverse.each_slice(NUMERIC_DEC_DIGITS).map { |d| d.reverse.map(&:to_s).join.to_i }.reverse
239
+ digits_after_decpoint = digits_base10[weight_base10..-1].each_slice(NUMERIC_DEC_DIGITS).map { |d| d.map(&:to_s).join.to_i }
240
+
241
+ weight = digits_before_decpoint.size - 1
242
+ digits = digits_before_decpoint + digits_after_decpoint
243
+
244
+ io.write([2 * 4 + 2 * digits.size].pack(PACKED_UINT_32)) # Field data size
245
+ io.write([digits.size].pack(PACKED_UINT_16)) # ndigits
246
+ io.write([weight].pack(PACKED_UINT_16)) # weight
247
+ io.write([sign].pack(PACKED_UINT_16)) # sign
248
+ io.write([dscale].pack(PACKED_UINT_16)) # dscale
249
+
250
+ digits.each { |d| io.write([d].pack(PACKED_UINT_16)) } # NumericDigits
251
+ end
252
+ end
253
+ end
@@ -0,0 +1,4 @@
1
+ module ActiveRecordCopy
2
+ class Exception < StandardError
3
+ end
4
+ end
@@ -0,0 +1,38 @@
1
+ module ActiveRecordCopy
2
+ class TempBuffer
3
+ def initialize
4
+ @st = ''.force_encoding(ASCII_8BIT_ENCODING)
5
+ end
6
+
7
+ def size
8
+ @st.bytesize
9
+ end
10
+
11
+ def write(st)
12
+ @st << st.dup.force_encoding(ASCII_8BIT_ENCODING)
13
+ end
14
+
15
+ def rewind
16
+ end
17
+
18
+ def reopen
19
+ @st = ''
20
+ end
21
+
22
+ def read
23
+ @st
24
+ end
25
+
26
+ def pos
27
+ @st.bytesize
28
+ end
29
+
30
+ def string
31
+ @st
32
+ end
33
+
34
+ def empty?
35
+ @st.empty?
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,3 @@
1
+ module ActiveRecordCopy
2
+ VERSION = '1.0.0'.freeze
3
+ end
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'benchmark'
3
+
4
+ describe 'testing changes with large imports and speed issues' do
5
+ it 'imports lots of data quickly' do
6
+ encoder = ActiveRecordCopy::EncodeForCopy.new(temp_file: true)
7
+
8
+ puts Benchmark.measure {
9
+ 0.upto(100_000) do
10
+ encoder.add [1, 'text', { a: 1, b: 'asdf' }]
11
+ end
12
+ }
13
+
14
+ encoder.close
15
+ _ = encoder.get_io
16
+ end
17
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe 'throwing errors' do
4
+ it 'raises an error when no rows have been added to the encoder' do
5
+ encoder = ActiveRecordCopy::EncodeForCopy.new
6
+ expect { encoder.close }.to raise_error(ActiveRecordCopy::Exception)
7
+ end
8
+ end
@@ -0,0 +1 @@
1
+ 1 hi {hi,there,rubyist}
Binary file
@@ -0,0 +1 @@
1
+ 1 text "a"=>"1", "b"=>"asdf"
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,17 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe 'multiline hstore' do
4
+ it 'encodes multiline hstore data correctly' do
5
+ encoder = ActiveRecordCopy::EncodeForCopy.new
6
+ encoder.add [1, { a: 1, b: 2 }]
7
+ encoder.add [2, { a: 1, b: 3 }]
8
+ encoder.close
9
+ io = encoder.get_io
10
+ existing_data = filedata('multiline_hstore.dat')
11
+ str = io.read
12
+ expect(io.class.name).to eq 'StringIO'
13
+ str.force_encoding('ASCII-8BIT')
14
+ # File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
15
+ expect(str).to eq existing_data
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rspec'
5
+ require 'activerecord-copy'
6
+
7
+ RSpec.configure do |config|
8
+ config.before(:suite) do
9
+ end
10
+ end
11
+
12
+ def filedata(filename)
13
+ str = nil
14
+ File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT') do |io|
15
+ str = io.read
16
+ end
17
+ str
18
+ end
19
+
20
+ def fileio(filename)
21
+ File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT')
22
+ end