activerecord-copy 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rubocop.yml +40 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +49 -0
- data/README.md +40 -0
- data/Rakefile +1 -0
- data/activerecord-copy.gemspec +25 -0
- data/lib/activerecord-copy.rb +92 -0
- data/lib/activerecord-copy/constants.rb +18 -0
- data/lib/activerecord-copy/decoder.rb +176 -0
- data/lib/activerecord-copy/encode_for_copy.rb +253 -0
- data/lib/activerecord-copy/exception.rb +4 -0
- data/lib/activerecord-copy/temp_buffer.rb +38 -0
- data/lib/activerecord-copy/version.rb +3 -0
- data/spec/big_write_spec.rb +17 -0
- data/spec/errors_spec.rb +8 -0
- data/spec/fixtures/3_col_array.txt +1 -0
- data/spec/fixtures/3_col_hstore.dat +0 -0
- data/spec/fixtures/3_col_hstore.txt +1 -0
- data/spec/fixtures/3_column_array.dat +0 -0
- data/spec/fixtures/array_with_two.dat +0 -0
- data/spec/fixtures/array_with_two2.dat +0 -0
- data/spec/fixtures/big_str_array.dat +0 -0
- data/spec/fixtures/big_str_array2.dat +0 -0
- data/spec/fixtures/bigint.dat +0 -0
- data/spec/fixtures/date.dat +0 -0
- data/spec/fixtures/date2.dat +0 -0
- data/spec/fixtures/date2000.dat +0 -0
- data/spec/fixtures/dates.dat +0 -0
- data/spec/fixtures/dates_p924.dat +0 -0
- data/spec/fixtures/dates_pg935.dat +0 -0
- data/spec/fixtures/empty_uuid.dat +0 -0
- data/spec/fixtures/falseclass.dat +0 -0
- data/spec/fixtures/float.dat +0 -0
- data/spec/fixtures/hstore_utf8.dat +0 -0
- data/spec/fixtures/intarray.dat +0 -0
- data/spec/fixtures/json.dat +0 -0
- data/spec/fixtures/json_array.dat +0 -0
- data/spec/fixtures/just_an_array.dat +0 -0
- data/spec/fixtures/just_an_array2.dat +0 -0
- data/spec/fixtures/multiline_hstore.dat +0 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/fixtures/timestamp.dat +0 -0
- data/spec/fixtures/timestamp_9.3.dat +0 -0
- data/spec/fixtures/timestamp_big.dat +0 -0
- data/spec/fixtures/timestamp_rounding.dat +0 -0
- data/spec/fixtures/trueclass.dat +0 -0
- data/spec/fixtures/utf8.dat +0 -0
- data/spec/fixtures/uuid.dat +0 -0
- data/spec/fixtures/uuid_array.dat +0 -0
- data/spec/multiline_spec.rb +17 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/verify_data_formats_spec.rb +415 -0
- data/spec/verify_decoder_spec.rb +263 -0
- metadata +182 -0
@@ -0,0 +1,253 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
module ActiveRecordCopy
|
5
|
+
class EncodeForCopy
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@closed = false
|
9
|
+
@column_types = @options[:column_types] || {}
|
10
|
+
@io = nil
|
11
|
+
@buffer = TempBuffer.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(row)
|
15
|
+
setup_io unless @io
|
16
|
+
@io.write([row.size].pack(PACKED_UINT_16))
|
17
|
+
row.each_with_index do |col, index|
|
18
|
+
encode_field(@buffer, col, index)
|
19
|
+
next if @buffer.empty?
|
20
|
+
@io.write(@buffer.read)
|
21
|
+
@buffer.reopen
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def close
|
26
|
+
@closed = true
|
27
|
+
unless @buffer.empty?
|
28
|
+
@io.write(@buffer.read)
|
29
|
+
@buffer.reopen
|
30
|
+
end
|
31
|
+
@io.write([-1].pack(PACKED_UINT_16)) rescue raise Exception, 'No rows have been added to the encoder!'
|
32
|
+
@io.rewind
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_io
|
36
|
+
close unless @closed
|
37
|
+
@io
|
38
|
+
end
|
39
|
+
|
40
|
+
def remove
|
41
|
+
return unless @io.is_a?(Tempfile)
|
42
|
+
|
43
|
+
@io.close
|
44
|
+
@io.unlink
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def setup_io
|
50
|
+
if @options[:use_tempfile] == true
|
51
|
+
@io = Tempfile.new('copy_binary', encoding: 'ascii-8bit')
|
52
|
+
@io.unlink unless @options[:skip_unlink] == true
|
53
|
+
else
|
54
|
+
@io = StringIO.new
|
55
|
+
end
|
56
|
+
@io.write("PGCOPY\n\377\r\n\0")
|
57
|
+
@io.write([0, 0].pack(PACKED_UINT_32 + PACKED_UINT_32))
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_field(io, buf)
|
61
|
+
io.write([buf.bytesize].pack(PACKED_UINT_32))
|
62
|
+
io.write(buf)
|
63
|
+
end
|
64
|
+
|
65
|
+
def encode_field(io, field, index, depth = 0)
|
66
|
+
# Nil is an exception in that any kind of field type can have a nil value transmitted
|
67
|
+
if field.nil?
|
68
|
+
io.write([-1].pack(PACKED_UINT_32))
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
72
|
+
if field.is_a?(Array) && ![:json, :jsonb].include?(@column_types[index])
|
73
|
+
encode_array(io, field, index)
|
74
|
+
return
|
75
|
+
end
|
76
|
+
|
77
|
+
case @column_types[index]
|
78
|
+
when :bigint
|
79
|
+
buf = [field.to_i].pack(PACKED_UINT_64)
|
80
|
+
write_field(io, buf)
|
81
|
+
when :integer
|
82
|
+
buf = [field.to_i].pack(PACKED_UINT_32)
|
83
|
+
write_field(io, buf)
|
84
|
+
when :smallint
|
85
|
+
buf = [field.to_i].pack(PACKED_UINT_16)
|
86
|
+
write_field(io, buf)
|
87
|
+
when :numeric
|
88
|
+
encode_numeric(io, field)
|
89
|
+
when :float
|
90
|
+
buf = [field].pack(PACKED_FLOAT_64)
|
91
|
+
write_field(io, buf)
|
92
|
+
when :uuid
|
93
|
+
buf = [field.delete('-')].pack(PACKED_HEX_STRING)
|
94
|
+
write_field(io, buf)
|
95
|
+
when :inet
|
96
|
+
encode_ip_addr(io, IPAddr.new(field))
|
97
|
+
when :binary
|
98
|
+
write_field(io, field)
|
99
|
+
when :json
|
100
|
+
buf = field.to_json.encode(UTF_8_ENCODING)
|
101
|
+
write_field(io, buf)
|
102
|
+
when :jsonb
|
103
|
+
encode_jsonb(io, field)
|
104
|
+
else
|
105
|
+
encode_based_on_input(io, field, index, depth)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def encode_based_on_input(io, field, index, depth)
|
110
|
+
case field
|
111
|
+
when Integer
|
112
|
+
buf = [field].pack(PACKED_UINT_32)
|
113
|
+
write_field(io, buf)
|
114
|
+
when Float
|
115
|
+
buf = [field].pack(PACKED_FLOAT_64)
|
116
|
+
write_field(io, buf)
|
117
|
+
when true
|
118
|
+
buf = [1].pack(PACKED_UINT_8)
|
119
|
+
write_field(io, buf)
|
120
|
+
when false
|
121
|
+
buf = [0].pack(PACKED_UINT_8)
|
122
|
+
write_field(io, buf)
|
123
|
+
when String
|
124
|
+
buf = field.encode(UTF_8_ENCODING)
|
125
|
+
write_field(io, buf)
|
126
|
+
when Hash
|
127
|
+
raise Exception, "Hash's can't contain hashes" if depth > 0
|
128
|
+
hash_io = TempBuffer.new
|
129
|
+
hash_io.write([field.size].pack(PACKED_UINT_32))
|
130
|
+
field.each_pair do |key, val|
|
131
|
+
buf = key.to_s.encode(UTF_8_ENCODING)
|
132
|
+
write_field(hash_io, buf)
|
133
|
+
encode_field(hash_io, val.nil? ? val : val.to_s, index, depth + 1)
|
134
|
+
end
|
135
|
+
io.write([hash_io.pos].pack(PACKED_UINT_32)) # size of hstore data
|
136
|
+
io.write(hash_io.string)
|
137
|
+
when Time
|
138
|
+
buf = [(field.tv_sec * 1_000_000 + field.tv_usec - POSTGRES_EPOCH_TIME).to_i].pack(PACKED_UINT_64)
|
139
|
+
write_field(io, buf)
|
140
|
+
when Date
|
141
|
+
buf = [(field - Date.new(2000, 1, 1)).to_i].pack(PACKED_UINT_32)
|
142
|
+
write_field(io, buf)
|
143
|
+
when IPAddr
|
144
|
+
encode_ip_addr(io, field)
|
145
|
+
else
|
146
|
+
raise Exception, "Unsupported Format: #{field.class.name}"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def encode_array(io, field, index)
|
151
|
+
array_io = TempBuffer.new
|
152
|
+
field.compact!
|
153
|
+
completed = false
|
154
|
+
case field[0]
|
155
|
+
when String
|
156
|
+
if @column_types[index] == :uuid
|
157
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
158
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
159
|
+
|
160
|
+
array_io.write([UUID_TYPE_OID].pack(PACKED_UINT_32))
|
161
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
162
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
163
|
+
|
164
|
+
field.each do |val|
|
165
|
+
buf = [val.delete('-')].pack(PACKED_HEX_STRING)
|
166
|
+
write_field(array_io, buf)
|
167
|
+
end
|
168
|
+
else
|
169
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
170
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
171
|
+
|
172
|
+
array_io.write([VARCHAR_TYPE_OID].pack(PACKED_UINT_32))
|
173
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
174
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
175
|
+
|
176
|
+
field.each do |val|
|
177
|
+
buf = val.to_s.encode(UTF_8_ENCODING)
|
178
|
+
write_field(array_io, buf)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
when Integer
|
182
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
183
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
184
|
+
|
185
|
+
array_io.write([INT_TYPE_OID].pack(PACKED_UINT_32))
|
186
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
187
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
188
|
+
|
189
|
+
field.each do |val|
|
190
|
+
buf = [val.to_i].pack(PACKED_UINT_32)
|
191
|
+
write_field(array_io, buf)
|
192
|
+
end
|
193
|
+
when nil
|
194
|
+
io.write([-1].pack(PACKED_UINT_32))
|
195
|
+
completed = true
|
196
|
+
else
|
197
|
+
raise Exception, 'Arrays support int or string only'
|
198
|
+
end
|
199
|
+
|
200
|
+
unless completed
|
201
|
+
io.write([array_io.pos].pack(PACKED_UINT_32))
|
202
|
+
io.write(array_io.string)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def encode_ip_addr(io, ip_addr)
|
207
|
+
if ip_addr.ipv6?
|
208
|
+
io.write([4 + 16].pack(PACKED_UINT_32)) # Field data size
|
209
|
+
io.write([3].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET6)
|
210
|
+
io.write([128].pack(PACKED_UINT_8)) # Bits
|
211
|
+
io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
|
212
|
+
io.write([16].pack(PACKED_UINT_8)) # Address length in bytes
|
213
|
+
else
|
214
|
+
io.write([4 + 4].pack(PACKED_UINT_32)) # Field data size
|
215
|
+
io.write([2].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET)
|
216
|
+
io.write([32].pack(PACKED_UINT_8)) # Bits
|
217
|
+
io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
|
218
|
+
io.write([4].pack(PACKED_UINT_8)) # Address length in bytes
|
219
|
+
end
|
220
|
+
io.write(ip_addr.hton)
|
221
|
+
end
|
222
|
+
|
223
|
+
def encode_jsonb(io, field)
|
224
|
+
buf = field.to_json.encode(UTF_8_ENCODING)
|
225
|
+
io.write([1 + buf.bytesize].pack(PACKED_UINT_32))
|
226
|
+
io.write([1].pack(PACKED_UINT_8)) # JSONB format version 1
|
227
|
+
io.write(buf)
|
228
|
+
end
|
229
|
+
|
230
|
+
NUMERIC_DEC_DIGITS = 4 # NBASE=10000
|
231
|
+
def encode_numeric(io, field)
|
232
|
+
float_str = field.to_s
|
233
|
+
digits_base10 = float_str.scan(/\d/).map(&:to_i)
|
234
|
+
weight_base10 = float_str.index('.')
|
235
|
+
sign = field < 0.0 ? 0x4000 : 0
|
236
|
+
dscale = digits_base10.size - weight_base10
|
237
|
+
|
238
|
+
digits_before_decpoint = digits_base10[0..weight_base10].reverse.each_slice(NUMERIC_DEC_DIGITS).map { |d| d.reverse.map(&:to_s).join.to_i }.reverse
|
239
|
+
digits_after_decpoint = digits_base10[weight_base10..-1].each_slice(NUMERIC_DEC_DIGITS).map { |d| d.map(&:to_s).join.to_i }
|
240
|
+
|
241
|
+
weight = digits_before_decpoint.size - 1
|
242
|
+
digits = digits_before_decpoint + digits_after_decpoint
|
243
|
+
|
244
|
+
io.write([2 * 4 + 2 * digits.size].pack(PACKED_UINT_32)) # Field data size
|
245
|
+
io.write([digits.size].pack(PACKED_UINT_16)) # ndigits
|
246
|
+
io.write([weight].pack(PACKED_UINT_16)) # weight
|
247
|
+
io.write([sign].pack(PACKED_UINT_16)) # sign
|
248
|
+
io.write([dscale].pack(PACKED_UINT_16)) # dscale
|
249
|
+
|
250
|
+
digits.each { |d| io.write([d].pack(PACKED_UINT_16)) } # NumericDigits
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module ActiveRecordCopy
|
2
|
+
class TempBuffer
|
3
|
+
def initialize
|
4
|
+
@st = ''.force_encoding(ASCII_8BIT_ENCODING)
|
5
|
+
end
|
6
|
+
|
7
|
+
def size
|
8
|
+
@st.bytesize
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(st)
|
12
|
+
@st << st.dup.force_encoding(ASCII_8BIT_ENCODING)
|
13
|
+
end
|
14
|
+
|
15
|
+
def rewind
|
16
|
+
end
|
17
|
+
|
18
|
+
def reopen
|
19
|
+
@st = ''
|
20
|
+
end
|
21
|
+
|
22
|
+
def read
|
23
|
+
@st
|
24
|
+
end
|
25
|
+
|
26
|
+
def pos
|
27
|
+
@st.bytesize
|
28
|
+
end
|
29
|
+
|
30
|
+
def string
|
31
|
+
@st
|
32
|
+
end
|
33
|
+
|
34
|
+
def empty?
|
35
|
+
@st.empty?
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
describe 'testing changes with large imports and speed issues' do
|
5
|
+
it 'imports lots of data quickly' do
|
6
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new(temp_file: true)
|
7
|
+
|
8
|
+
puts Benchmark.measure {
|
9
|
+
0.upto(100_000) do
|
10
|
+
encoder.add [1, 'text', { a: 1, b: 'asdf' }]
|
11
|
+
end
|
12
|
+
}
|
13
|
+
|
14
|
+
encoder.close
|
15
|
+
_ = encoder.get_io
|
16
|
+
end
|
17
|
+
end
|
data/spec/errors_spec.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe 'throwing errors' do
|
4
|
+
it 'raises an error when no rows have been added to the encoder' do
|
5
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new
|
6
|
+
expect { encoder.close }.to raise_error(ActiveRecordCopy::Exception)
|
7
|
+
end
|
8
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
1 hi {hi,there,rubyist}
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
1 text "a"=>"1", "b"=>"asdf"
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe 'multiline hstore' do
|
4
|
+
it 'encodes multiline hstore data correctly' do
|
5
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new
|
6
|
+
encoder.add [1, { a: 1, b: 2 }]
|
7
|
+
encoder.add [2, { a: 1, b: 3 }]
|
8
|
+
encoder.close
|
9
|
+
io = encoder.get_io
|
10
|
+
existing_data = filedata('multiline_hstore.dat')
|
11
|
+
str = io.read
|
12
|
+
expect(io.class.name).to eq 'StringIO'
|
13
|
+
str.force_encoding('ASCII-8BIT')
|
14
|
+
# File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
15
|
+
expect(str).to eq existing_data
|
16
|
+
end
|
17
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
require 'rspec'
|
5
|
+
require 'activerecord-copy'
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.before(:suite) do
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def filedata(filename)
|
13
|
+
str = nil
|
14
|
+
File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT') do |io|
|
15
|
+
str = io.read
|
16
|
+
end
|
17
|
+
str
|
18
|
+
end
|
19
|
+
|
20
|
+
def fileio(filename)
|
21
|
+
File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT')
|
22
|
+
end
|