activerecord-copy 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.rubocop.yml +40 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +49 -0
- data/README.md +40 -0
- data/Rakefile +1 -0
- data/activerecord-copy.gemspec +25 -0
- data/lib/activerecord-copy.rb +92 -0
- data/lib/activerecord-copy/constants.rb +18 -0
- data/lib/activerecord-copy/decoder.rb +176 -0
- data/lib/activerecord-copy/encode_for_copy.rb +253 -0
- data/lib/activerecord-copy/exception.rb +4 -0
- data/lib/activerecord-copy/temp_buffer.rb +38 -0
- data/lib/activerecord-copy/version.rb +3 -0
- data/spec/big_write_spec.rb +17 -0
- data/spec/errors_spec.rb +8 -0
- data/spec/fixtures/3_col_array.txt +1 -0
- data/spec/fixtures/3_col_hstore.dat +0 -0
- data/spec/fixtures/3_col_hstore.txt +1 -0
- data/spec/fixtures/3_column_array.dat +0 -0
- data/spec/fixtures/array_with_two.dat +0 -0
- data/spec/fixtures/array_with_two2.dat +0 -0
- data/spec/fixtures/big_str_array.dat +0 -0
- data/spec/fixtures/big_str_array2.dat +0 -0
- data/spec/fixtures/bigint.dat +0 -0
- data/spec/fixtures/date.dat +0 -0
- data/spec/fixtures/date2.dat +0 -0
- data/spec/fixtures/date2000.dat +0 -0
- data/spec/fixtures/dates.dat +0 -0
- data/spec/fixtures/dates_p924.dat +0 -0
- data/spec/fixtures/dates_pg935.dat +0 -0
- data/spec/fixtures/empty_uuid.dat +0 -0
- data/spec/fixtures/falseclass.dat +0 -0
- data/spec/fixtures/float.dat +0 -0
- data/spec/fixtures/hstore_utf8.dat +0 -0
- data/spec/fixtures/intarray.dat +0 -0
- data/spec/fixtures/json.dat +0 -0
- data/spec/fixtures/json_array.dat +0 -0
- data/spec/fixtures/just_an_array.dat +0 -0
- data/spec/fixtures/just_an_array2.dat +0 -0
- data/spec/fixtures/multiline_hstore.dat +0 -0
- data/spec/fixtures/output.dat +0 -0
- data/spec/fixtures/timestamp.dat +0 -0
- data/spec/fixtures/timestamp_9.3.dat +0 -0
- data/spec/fixtures/timestamp_big.dat +0 -0
- data/spec/fixtures/timestamp_rounding.dat +0 -0
- data/spec/fixtures/trueclass.dat +0 -0
- data/spec/fixtures/utf8.dat +0 -0
- data/spec/fixtures/uuid.dat +0 -0
- data/spec/fixtures/uuid_array.dat +0 -0
- data/spec/multiline_spec.rb +17 -0
- data/spec/spec_helper.rb +22 -0
- data/spec/verify_data_formats_spec.rb +415 -0
- data/spec/verify_decoder_spec.rb +263 -0
- metadata +182 -0
@@ -0,0 +1,253 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
module ActiveRecordCopy
|
5
|
+
class EncodeForCopy
|
6
|
+
def initialize(options = {})
|
7
|
+
@options = options
|
8
|
+
@closed = false
|
9
|
+
@column_types = @options[:column_types] || {}
|
10
|
+
@io = nil
|
11
|
+
@buffer = TempBuffer.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def add(row)
|
15
|
+
setup_io unless @io
|
16
|
+
@io.write([row.size].pack(PACKED_UINT_16))
|
17
|
+
row.each_with_index do |col, index|
|
18
|
+
encode_field(@buffer, col, index)
|
19
|
+
next if @buffer.empty?
|
20
|
+
@io.write(@buffer.read)
|
21
|
+
@buffer.reopen
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def close
|
26
|
+
@closed = true
|
27
|
+
unless @buffer.empty?
|
28
|
+
@io.write(@buffer.read)
|
29
|
+
@buffer.reopen
|
30
|
+
end
|
31
|
+
@io.write([-1].pack(PACKED_UINT_16)) rescue raise Exception, 'No rows have been added to the encoder!'
|
32
|
+
@io.rewind
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_io
|
36
|
+
close unless @closed
|
37
|
+
@io
|
38
|
+
end
|
39
|
+
|
40
|
+
def remove
|
41
|
+
return unless @io.is_a?(Tempfile)
|
42
|
+
|
43
|
+
@io.close
|
44
|
+
@io.unlink
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def setup_io
|
50
|
+
if @options[:use_tempfile] == true
|
51
|
+
@io = Tempfile.new('copy_binary', encoding: 'ascii-8bit')
|
52
|
+
@io.unlink unless @options[:skip_unlink] == true
|
53
|
+
else
|
54
|
+
@io = StringIO.new
|
55
|
+
end
|
56
|
+
@io.write("PGCOPY\n\377\r\n\0")
|
57
|
+
@io.write([0, 0].pack(PACKED_UINT_32 + PACKED_UINT_32))
|
58
|
+
end
|
59
|
+
|
60
|
+
def write_field(io, buf)
|
61
|
+
io.write([buf.bytesize].pack(PACKED_UINT_32))
|
62
|
+
io.write(buf)
|
63
|
+
end
|
64
|
+
|
65
|
+
def encode_field(io, field, index, depth = 0)
|
66
|
+
# Nil is an exception in that any kind of field type can have a nil value transmitted
|
67
|
+
if field.nil?
|
68
|
+
io.write([-1].pack(PACKED_UINT_32))
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
72
|
+
if field.is_a?(Array) && ![:json, :jsonb].include?(@column_types[index])
|
73
|
+
encode_array(io, field, index)
|
74
|
+
return
|
75
|
+
end
|
76
|
+
|
77
|
+
case @column_types[index]
|
78
|
+
when :bigint
|
79
|
+
buf = [field.to_i].pack(PACKED_UINT_64)
|
80
|
+
write_field(io, buf)
|
81
|
+
when :integer
|
82
|
+
buf = [field.to_i].pack(PACKED_UINT_32)
|
83
|
+
write_field(io, buf)
|
84
|
+
when :smallint
|
85
|
+
buf = [field.to_i].pack(PACKED_UINT_16)
|
86
|
+
write_field(io, buf)
|
87
|
+
when :numeric
|
88
|
+
encode_numeric(io, field)
|
89
|
+
when :float
|
90
|
+
buf = [field].pack(PACKED_FLOAT_64)
|
91
|
+
write_field(io, buf)
|
92
|
+
when :uuid
|
93
|
+
buf = [field.delete('-')].pack(PACKED_HEX_STRING)
|
94
|
+
write_field(io, buf)
|
95
|
+
when :inet
|
96
|
+
encode_ip_addr(io, IPAddr.new(field))
|
97
|
+
when :binary
|
98
|
+
write_field(io, field)
|
99
|
+
when :json
|
100
|
+
buf = field.to_json.encode(UTF_8_ENCODING)
|
101
|
+
write_field(io, buf)
|
102
|
+
when :jsonb
|
103
|
+
encode_jsonb(io, field)
|
104
|
+
else
|
105
|
+
encode_based_on_input(io, field, index, depth)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def encode_based_on_input(io, field, index, depth)
|
110
|
+
case field
|
111
|
+
when Integer
|
112
|
+
buf = [field].pack(PACKED_UINT_32)
|
113
|
+
write_field(io, buf)
|
114
|
+
when Float
|
115
|
+
buf = [field].pack(PACKED_FLOAT_64)
|
116
|
+
write_field(io, buf)
|
117
|
+
when true
|
118
|
+
buf = [1].pack(PACKED_UINT_8)
|
119
|
+
write_field(io, buf)
|
120
|
+
when false
|
121
|
+
buf = [0].pack(PACKED_UINT_8)
|
122
|
+
write_field(io, buf)
|
123
|
+
when String
|
124
|
+
buf = field.encode(UTF_8_ENCODING)
|
125
|
+
write_field(io, buf)
|
126
|
+
when Hash
|
127
|
+
raise Exception, "Hash's can't contain hashes" if depth > 0
|
128
|
+
hash_io = TempBuffer.new
|
129
|
+
hash_io.write([field.size].pack(PACKED_UINT_32))
|
130
|
+
field.each_pair do |key, val|
|
131
|
+
buf = key.to_s.encode(UTF_8_ENCODING)
|
132
|
+
write_field(hash_io, buf)
|
133
|
+
encode_field(hash_io, val.nil? ? val : val.to_s, index, depth + 1)
|
134
|
+
end
|
135
|
+
io.write([hash_io.pos].pack(PACKED_UINT_32)) # size of hstore data
|
136
|
+
io.write(hash_io.string)
|
137
|
+
when Time
|
138
|
+
buf = [(field.tv_sec * 1_000_000 + field.tv_usec - POSTGRES_EPOCH_TIME).to_i].pack(PACKED_UINT_64)
|
139
|
+
write_field(io, buf)
|
140
|
+
when Date
|
141
|
+
buf = [(field - Date.new(2000, 1, 1)).to_i].pack(PACKED_UINT_32)
|
142
|
+
write_field(io, buf)
|
143
|
+
when IPAddr
|
144
|
+
encode_ip_addr(io, field)
|
145
|
+
else
|
146
|
+
raise Exception, "Unsupported Format: #{field.class.name}"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def encode_array(io, field, index)
|
151
|
+
array_io = TempBuffer.new
|
152
|
+
field.compact!
|
153
|
+
completed = false
|
154
|
+
case field[0]
|
155
|
+
when String
|
156
|
+
if @column_types[index] == :uuid
|
157
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
158
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
159
|
+
|
160
|
+
array_io.write([UUID_TYPE_OID].pack(PACKED_UINT_32))
|
161
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
162
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
163
|
+
|
164
|
+
field.each do |val|
|
165
|
+
buf = [val.delete('-')].pack(PACKED_HEX_STRING)
|
166
|
+
write_field(array_io, buf)
|
167
|
+
end
|
168
|
+
else
|
169
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
170
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
171
|
+
|
172
|
+
array_io.write([VARCHAR_TYPE_OID].pack(PACKED_UINT_32))
|
173
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
174
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
175
|
+
|
176
|
+
field.each do |val|
|
177
|
+
buf = val.to_s.encode(UTF_8_ENCODING)
|
178
|
+
write_field(array_io, buf)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
when Integer
|
182
|
+
array_io.write([1].pack(PACKED_UINT_32)) # unknown
|
183
|
+
array_io.write([0].pack(PACKED_UINT_32)) # unknown
|
184
|
+
|
185
|
+
array_io.write([INT_TYPE_OID].pack(PACKED_UINT_32))
|
186
|
+
array_io.write([field.size].pack(PACKED_UINT_32))
|
187
|
+
array_io.write([1].pack(PACKED_UINT_32)) # forcing single dimension array for now
|
188
|
+
|
189
|
+
field.each do |val|
|
190
|
+
buf = [val.to_i].pack(PACKED_UINT_32)
|
191
|
+
write_field(array_io, buf)
|
192
|
+
end
|
193
|
+
when nil
|
194
|
+
io.write([-1].pack(PACKED_UINT_32))
|
195
|
+
completed = true
|
196
|
+
else
|
197
|
+
raise Exception, 'Arrays support int or string only'
|
198
|
+
end
|
199
|
+
|
200
|
+
unless completed
|
201
|
+
io.write([array_io.pos].pack(PACKED_UINT_32))
|
202
|
+
io.write(array_io.string)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
def encode_ip_addr(io, ip_addr)
|
207
|
+
if ip_addr.ipv6?
|
208
|
+
io.write([4 + 16].pack(PACKED_UINT_32)) # Field data size
|
209
|
+
io.write([3].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET6)
|
210
|
+
io.write([128].pack(PACKED_UINT_8)) # Bits
|
211
|
+
io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
|
212
|
+
io.write([16].pack(PACKED_UINT_8)) # Address length in bytes
|
213
|
+
else
|
214
|
+
io.write([4 + 4].pack(PACKED_UINT_32)) # Field data size
|
215
|
+
io.write([2].pack(PACKED_UINT_8)) # Family (PGSQL_AF_INET)
|
216
|
+
io.write([32].pack(PACKED_UINT_8)) # Bits
|
217
|
+
io.write([0].pack(PACKED_UINT_8)) # Is CIDR? => No
|
218
|
+
io.write([4].pack(PACKED_UINT_8)) # Address length in bytes
|
219
|
+
end
|
220
|
+
io.write(ip_addr.hton)
|
221
|
+
end
|
222
|
+
|
223
|
+
def encode_jsonb(io, field)
|
224
|
+
buf = field.to_json.encode(UTF_8_ENCODING)
|
225
|
+
io.write([1 + buf.bytesize].pack(PACKED_UINT_32))
|
226
|
+
io.write([1].pack(PACKED_UINT_8)) # JSONB format version 1
|
227
|
+
io.write(buf)
|
228
|
+
end
|
229
|
+
|
230
|
+
NUMERIC_DEC_DIGITS = 4 # NBASE=10000
|
231
|
+
def encode_numeric(io, field)
|
232
|
+
float_str = field.to_s
|
233
|
+
digits_base10 = float_str.scan(/\d/).map(&:to_i)
|
234
|
+
weight_base10 = float_str.index('.')
|
235
|
+
sign = field < 0.0 ? 0x4000 : 0
|
236
|
+
dscale = digits_base10.size - weight_base10
|
237
|
+
|
238
|
+
digits_before_decpoint = digits_base10[0..weight_base10].reverse.each_slice(NUMERIC_DEC_DIGITS).map { |d| d.reverse.map(&:to_s).join.to_i }.reverse
|
239
|
+
digits_after_decpoint = digits_base10[weight_base10..-1].each_slice(NUMERIC_DEC_DIGITS).map { |d| d.map(&:to_s).join.to_i }
|
240
|
+
|
241
|
+
weight = digits_before_decpoint.size - 1
|
242
|
+
digits = digits_before_decpoint + digits_after_decpoint
|
243
|
+
|
244
|
+
io.write([2 * 4 + 2 * digits.size].pack(PACKED_UINT_32)) # Field data size
|
245
|
+
io.write([digits.size].pack(PACKED_UINT_16)) # ndigits
|
246
|
+
io.write([weight].pack(PACKED_UINT_16)) # weight
|
247
|
+
io.write([sign].pack(PACKED_UINT_16)) # sign
|
248
|
+
io.write([dscale].pack(PACKED_UINT_16)) # dscale
|
249
|
+
|
250
|
+
digits.each { |d| io.write([d].pack(PACKED_UINT_16)) } # NumericDigits
|
251
|
+
end
|
252
|
+
end
|
253
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module ActiveRecordCopy
|
2
|
+
class TempBuffer
|
3
|
+
def initialize
|
4
|
+
@st = ''.force_encoding(ASCII_8BIT_ENCODING)
|
5
|
+
end
|
6
|
+
|
7
|
+
def size
|
8
|
+
@st.bytesize
|
9
|
+
end
|
10
|
+
|
11
|
+
def write(st)
|
12
|
+
@st << st.dup.force_encoding(ASCII_8BIT_ENCODING)
|
13
|
+
end
|
14
|
+
|
15
|
+
def rewind
|
16
|
+
end
|
17
|
+
|
18
|
+
def reopen
|
19
|
+
@st = ''
|
20
|
+
end
|
21
|
+
|
22
|
+
def read
|
23
|
+
@st
|
24
|
+
end
|
25
|
+
|
26
|
+
def pos
|
27
|
+
@st.bytesize
|
28
|
+
end
|
29
|
+
|
30
|
+
def string
|
31
|
+
@st
|
32
|
+
end
|
33
|
+
|
34
|
+
def empty?
|
35
|
+
@st.empty?
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
describe 'testing changes with large imports and speed issues' do
|
5
|
+
it 'imports lots of data quickly' do
|
6
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new(temp_file: true)
|
7
|
+
|
8
|
+
puts Benchmark.measure {
|
9
|
+
0.upto(100_000) do
|
10
|
+
encoder.add [1, 'text', { a: 1, b: 'asdf' }]
|
11
|
+
end
|
12
|
+
}
|
13
|
+
|
14
|
+
encoder.close
|
15
|
+
_ = encoder.get_io
|
16
|
+
end
|
17
|
+
end
|
data/spec/errors_spec.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe 'throwing errors' do
|
4
|
+
it 'raises an error when no rows have been added to the encoder' do
|
5
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new
|
6
|
+
expect { encoder.close }.to raise_error(ActiveRecordCopy::Exception)
|
7
|
+
end
|
8
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
1 hi {hi,there,rubyist}
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
1 text "a"=>"1", "b"=>"asdf"
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe 'multiline hstore' do
|
4
|
+
it 'encodes multiline hstore data correctly' do
|
5
|
+
encoder = ActiveRecordCopy::EncodeForCopy.new
|
6
|
+
encoder.add [1, { a: 1, b: 2 }]
|
7
|
+
encoder.add [2, { a: 1, b: 3 }]
|
8
|
+
encoder.close
|
9
|
+
io = encoder.get_io
|
10
|
+
existing_data = filedata('multiline_hstore.dat')
|
11
|
+
str = io.read
|
12
|
+
expect(io.class.name).to eq 'StringIO'
|
13
|
+
str.force_encoding('ASCII-8BIT')
|
14
|
+
# File.open("spec/fixtures/output.dat", "w:ASCII-8BIT") {|out| out.write(str) }
|
15
|
+
expect(str).to eq existing_data
|
16
|
+
end
|
17
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
|
4
|
+
require 'rspec'
|
5
|
+
require 'activerecord-copy'
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.before(:suite) do
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def filedata(filename)
|
13
|
+
str = nil
|
14
|
+
File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT') do |io|
|
15
|
+
str = io.read
|
16
|
+
end
|
17
|
+
str
|
18
|
+
end
|
19
|
+
|
20
|
+
def fileio(filename)
|
21
|
+
File.open("spec/fixtures/#{filename}", 'r:ASCII-8BIT')
|
22
|
+
end
|