activerecord-copy 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/lib/activerecord-copy/encode_for_copy.rb +125 -20
- data/lib/activerecord-copy/version.rb +1 -1
- data/spec/fixtures/range_test.dat +0 -0
- data/spec/verify_data_formats_spec.rb +23 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 050aa5f637d6f6d6a155e4d9f90c30062d62398e
|
4
|
+
data.tar.gz: b6c02d2fc46f8bfd2021b60ece7f42a19d2822ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ffb2c84b4fa363f904dfd82cccb99116d96227b72b5cd871a6c647057b6fa229ad7f1c2ff3645e3ad1a124c7c83ce25486a0203837fab62a8b02745d186f833
|
7
|
+
data.tar.gz: 851ad2c3cfc6a986dbc184e2672b369cf83d3dadc8b33ee4709ff58f1451c8c86d82be1bd7f3d9ba8c5a8dd66c2f8cf0ee3897f2e422f6b42d0bf9028713eb82
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,23 @@
|
|
1
1
|
require 'tempfile'
|
2
2
|
require 'stringio'
|
3
|
+
require 'ipaddr'
|
3
4
|
|
4
5
|
module ActiveRecordCopy
|
6
|
+
class IntermediateBuffer
|
7
|
+
attr_reader :bytes
|
8
|
+
def initialize
|
9
|
+
@bytes = ''
|
10
|
+
end
|
11
|
+
|
12
|
+
def write(b)
|
13
|
+
@bytes += b
|
14
|
+
end
|
15
|
+
|
16
|
+
def size
|
17
|
+
@bytes.size
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
5
21
|
class EncodeForCopy
|
6
22
|
def initialize(options = {})
|
7
23
|
@options = options
|
@@ -62,19 +78,9 @@ module ActiveRecordCopy
|
|
62
78
|
io.write(buf)
|
63
79
|
end
|
64
80
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
io.write([-1].pack(PACKED_UINT_32))
|
69
|
-
return
|
70
|
-
end
|
71
|
-
|
72
|
-
if field.is_a?(Array) && ![:json, :jsonb].include?(@column_types[index])
|
73
|
-
encode_array(io, field, index)
|
74
|
-
return
|
75
|
-
end
|
76
|
-
|
77
|
-
case @column_types[index]
|
81
|
+
# Primitive types that can also appear in ranges/arrays/etc
|
82
|
+
def write_simple_field(io, field, type)
|
83
|
+
case type
|
78
84
|
when :bigint
|
79
85
|
buf = [field.to_i].pack(PACKED_UINT_64)
|
80
86
|
write_field(io, buf)
|
@@ -89,6 +95,32 @@ module ActiveRecordCopy
|
|
89
95
|
when :float
|
90
96
|
buf = [field].pack(PACKED_FLOAT_64)
|
91
97
|
write_field(io, buf)
|
98
|
+
when :timestamp, :timestamptz
|
99
|
+
buf = [(field.tv_sec * 1_000_000 + field.tv_usec - POSTGRES_EPOCH_TIME).to_i].pack(PACKED_UINT_64)
|
100
|
+
write_field(io, buf)
|
101
|
+
when :date
|
102
|
+
buf = [(field - Date.new(2000, 1, 1)).to_i].pack(PACKED_UINT_32)
|
103
|
+
write_field(io, buf)
|
104
|
+
else
|
105
|
+
raise Exception, "Unsupported simple type: #{type}"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def encode_field(io, field, index, depth = 0)
|
110
|
+
# Nil is an exception in that any kind of field type can have a nil value transmitted
|
111
|
+
if field.nil?
|
112
|
+
io.write([-1].pack(PACKED_UINT_32))
|
113
|
+
return
|
114
|
+
end
|
115
|
+
|
116
|
+
if field.is_a?(Array) && ![:json, :jsonb].include?(@column_types[index])
|
117
|
+
encode_array(io, field, index)
|
118
|
+
return
|
119
|
+
end
|
120
|
+
|
121
|
+
case @column_types[index]
|
122
|
+
when :bigint, :integer, :smallint, :numeric, :float
|
123
|
+
write_simple_field(io, field, @column_types[index])
|
92
124
|
when :uuid
|
93
125
|
buf = [field.delete('-')].pack(PACKED_HEX_STRING)
|
94
126
|
write_field(io, buf)
|
@@ -101,6 +133,8 @@ module ActiveRecordCopy
|
|
101
133
|
write_field(io, buf)
|
102
134
|
when :jsonb
|
103
135
|
encode_jsonb(io, field)
|
136
|
+
when :int4range, :int8range, :numrange, :tsrange, :tstzrange, :daterange
|
137
|
+
encode_range(io, field, @column_types[index])
|
104
138
|
else
|
105
139
|
encode_based_on_input(io, field, index, depth)
|
106
140
|
end
|
@@ -135,13 +169,26 @@ module ActiveRecordCopy
|
|
135
169
|
io.write([hash_io.pos].pack(PACKED_UINT_32)) # size of hstore data
|
136
170
|
io.write(hash_io.string)
|
137
171
|
when Time
|
138
|
-
|
139
|
-
write_field(io, buf)
|
172
|
+
write_simple_field(io, field, :timestamp)
|
140
173
|
when Date
|
141
|
-
|
142
|
-
write_field(io, buf)
|
174
|
+
write_simple_field(io, field, :date)
|
143
175
|
when IPAddr
|
144
176
|
encode_ip_addr(io, field)
|
177
|
+
when Range
|
178
|
+
range_type = case field.begin
|
179
|
+
when Integer
|
180
|
+
:int4range
|
181
|
+
when Float
|
182
|
+
:numrange
|
183
|
+
when Time
|
184
|
+
:tstzrange
|
185
|
+
when Date
|
186
|
+
:daterange
|
187
|
+
else
|
188
|
+
raise Exception, "Unsupported range input type #{field.begin.class.name} for index #{index}"
|
189
|
+
end
|
190
|
+
|
191
|
+
encode_range(io, field, range_type)
|
145
192
|
else
|
146
193
|
raise Exception, "Unsupported Format: #{field.class.name}"
|
147
194
|
end
|
@@ -220,6 +267,49 @@ module ActiveRecordCopy
|
|
220
267
|
io.write(ip_addr.hton)
|
221
268
|
end
|
222
269
|
|
270
|
+
# From the Postgres source:
|
271
|
+
# Binary representation: The first byte is the flags, then the lower bound
|
272
|
+
# (if present), then the upper bound (if present). Each bound is represented
|
273
|
+
# by a 4-byte length header and the binary representation of that bound (as
|
274
|
+
# returned by a call to the send function for the subtype).
|
275
|
+
RANGE_LB_INC = 0x02 # lower bound is inclusive
|
276
|
+
RANGE_UB_INC = 0x04 # upper bound is inclusive
|
277
|
+
RANGE_LB_INF = 0x08 # lower bound is -infinity
|
278
|
+
RANGE_UB_INF = 0x10 # upper bound is +infinity
|
279
|
+
def encode_range(io, range, range_type)
|
280
|
+
field_data_type = case range_type
|
281
|
+
when :int4range
|
282
|
+
:integer
|
283
|
+
when :int8range
|
284
|
+
:bigint
|
285
|
+
when :numrange
|
286
|
+
:numeric
|
287
|
+
when :tsrange
|
288
|
+
:timestamp
|
289
|
+
when :tstzrange
|
290
|
+
:timestamptz
|
291
|
+
when :daterange
|
292
|
+
:date
|
293
|
+
else
|
294
|
+
raise Exception, "Unsupported range type: #{range_type}"
|
295
|
+
end
|
296
|
+
flags = 0
|
297
|
+
flags |= RANGE_LB_INC # Ruby ranges always include the lower bound
|
298
|
+
flags |= RANGE_UB_INC unless range.exclude_end?
|
299
|
+
flags |= RANGE_LB_INF if range.begin.respond_to?(:infinite?) && range.begin.infinite?
|
300
|
+
flags |= RANGE_UB_INF if range.end.respond_to?(:infinite?) && range.end.infinite?
|
301
|
+
tmp_io = IntermediateBuffer.new
|
302
|
+
tmp_io.write([flags].pack(PACKED_UINT_8))
|
303
|
+
if range.begin && (!range.begin.respond_to?(:infinite?) || !range.begin.infinite?)
|
304
|
+
write_simple_field(tmp_io, range.begin, field_data_type)
|
305
|
+
end
|
306
|
+
if range.end && (!range.end.respond_to?(:infinite?) || !range.end.infinite?)
|
307
|
+
write_simple_field(tmp_io, range.end, field_data_type)
|
308
|
+
end
|
309
|
+
io.write([tmp_io.size].pack(PACKED_UINT_32))
|
310
|
+
io.write(tmp_io.bytes)
|
311
|
+
end
|
312
|
+
|
223
313
|
def encode_jsonb(io, field)
|
224
314
|
buf = field.to_json.encode(UTF_8_ENCODING)
|
225
315
|
io.write([1 + buf.bytesize].pack(PACKED_UINT_32))
|
@@ -227,7 +317,19 @@ module ActiveRecordCopy
|
|
227
317
|
io.write(buf)
|
228
318
|
end
|
229
319
|
|
230
|
-
|
320
|
+
NUMERIC_NBASE = 10000
|
321
|
+
def base10_to_base10000(intval)
|
322
|
+
digits = []
|
323
|
+
loop do
|
324
|
+
newintval = intval / NUMERIC_NBASE
|
325
|
+
digits << intval - newintval * NUMERIC_NBASE
|
326
|
+
intval = newintval
|
327
|
+
break if intval == 0
|
328
|
+
end
|
329
|
+
digits
|
330
|
+
end
|
331
|
+
|
332
|
+
NUMERIC_DEC_DIGITS = 4
|
231
333
|
def encode_numeric(io, field)
|
232
334
|
float_str = field.to_s
|
233
335
|
digits_base10 = float_str.scan(/\d/).map(&:to_i)
|
@@ -235,8 +337,11 @@ module ActiveRecordCopy
|
|
235
337
|
sign = field < 0.0 ? 0x4000 : 0
|
236
338
|
dscale = digits_base10.size - weight_base10
|
237
339
|
|
238
|
-
|
239
|
-
|
340
|
+
int_part, frac_part = float_str.split('.')
|
341
|
+
frac_part += '0' * (NUMERIC_DEC_DIGITS - frac_part.size % NUMERIC_DEC_DIGITS) # Add trailing zeroes so digit calculations are correct
|
342
|
+
|
343
|
+
digits_before_decpoint = base10_to_base10000(int_part.to_i)
|
344
|
+
digits_after_decpoint = base10_to_base10000(frac_part.to_i).reverse
|
240
345
|
|
241
346
|
weight = digits_before_decpoint.size - 1
|
242
347
|
digits = digits_before_decpoint + digits_after_decpoint
|
Binary file
|
@@ -412,4 +412,27 @@ describe 'generating data' do
|
|
412
412
|
# File.open('spec/fixtures/output.dat', 'w:ASCII-8BIT') {|out| out.write(str) }
|
413
413
|
expect(str).to eq existing_data
|
414
414
|
end
|
415
|
+
|
416
|
+
# CREATE TABLE test(i4r int4range, i8r int8range, nr numrange, tr tsrange, tzr tstzrange, dr daterange);
|
417
|
+
# INSERT INTO test VALUES ('[12, 14)', '[223372033854775802, 223372033854775810)', '[12.5,13.88211]', '[2010-01-01 15:20, 2010-01-01 15:30)', '[2018-05-24 00:00:00+00,)', '[2018-05-24,)');
|
418
|
+
# \copy test TO range_test.dat WITH (FORMAT BINARY);
|
419
|
+
it 'encodes range data correctly' do
|
420
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new(column_types: { 0 => :int4range, 1 => :int8range, 2 => :numrange, 3 => :tsrange, 4 => :tstzrange, 5 => :daterange })
|
421
|
+
encoder.add([
|
422
|
+
12...14,
|
423
|
+
223372033854775802...223372033854775810,
|
424
|
+
12.5..13.88211,
|
425
|
+
Time.parse('2010-01-01 15:20+00')...Time.parse('2010-01-01 15:30+00'),
|
426
|
+
Time.parse('2018-05-24 00:00:00+00')...Float::INFINITY,
|
427
|
+
Date.parse('2018-05-24')...Float::INFINITY
|
428
|
+
])
|
429
|
+
encoder.close
|
430
|
+
io = encoder.get_io
|
431
|
+
existing_data = filedata('range_test.dat')
|
432
|
+
str = io.read
|
433
|
+
expect(io.class.name).to eq 'StringIO'
|
434
|
+
str.force_encoding('ASCII-8BIT')
|
435
|
+
#File.open('spec/fixtures/output.dat', 'w:ASCII-8BIT') {|out| out.write(str) }
|
436
|
+
expect(str).to eq existing_data
|
437
|
+
end
|
415
438
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: activerecord-copy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lukas Fittl
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- spec/fixtures/just_an_array2.dat
|
104
104
|
- spec/fixtures/multiline_hstore.dat
|
105
105
|
- spec/fixtures/output.dat
|
106
|
+
- spec/fixtures/range_test.dat
|
106
107
|
- spec/fixtures/timestamp.dat
|
107
108
|
- spec/fixtures/timestamp_9.3.dat
|
108
109
|
- spec/fixtures/timestamp_big.dat
|
@@ -135,7 +136,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
135
136
|
version: '0'
|
136
137
|
requirements: []
|
137
138
|
rubyforge_project:
|
138
|
-
rubygems_version: 2.
|
139
|
+
rubygems_version: 2.6.13
|
139
140
|
signing_key:
|
140
141
|
specification_version: 4
|
141
142
|
summary: Convenient methods to load data quickly into Postgres
|
@@ -168,6 +169,7 @@ test_files:
|
|
168
169
|
- spec/fixtures/just_an_array2.dat
|
169
170
|
- spec/fixtures/multiline_hstore.dat
|
170
171
|
- spec/fixtures/output.dat
|
172
|
+
- spec/fixtures/range_test.dat
|
171
173
|
- spec/fixtures/timestamp.dat
|
172
174
|
- spec/fixtures/timestamp_9.3.dat
|
173
175
|
- spec/fixtures/timestamp_big.dat
|