red-arrow-format 23.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE.txt +202 -0
- data/NOTICE.txt +2 -0
- data/README.md +61 -0
- data/Rakefile +67 -0
- data/lib/arrow-format/array.rb +476 -0
- data/lib/arrow-format/bitmap.rb +44 -0
- data/lib/arrow-format/error.rb +34 -0
- data/lib/arrow-format/field.rb +33 -0
- data/lib/arrow-format/file-reader.rb +213 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
- data/lib/arrow-format/readable.rb +271 -0
- data/lib/arrow-format/record-batch.rb +36 -0
- data/lib/arrow-format/schema.rb +24 -0
- data/lib/arrow-format/streaming-pull-reader.rb +243 -0
- data/lib/arrow-format/streaming-reader.rb +50 -0
- data/lib/arrow-format/type.rb +704 -0
- data/lib/arrow-format/version.rb +26 -0
- data/lib/arrow-format.rb +20 -0
- data/red-arrow-format.gemspec +57 -0
- metadata +137 -0
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
require "bigdecimal"
|
|
18
|
+
|
|
19
|
+
require_relative "bitmap"
|
|
20
|
+
|
|
21
|
+
module ArrowFormat
|
|
22
|
+
class Array
|
|
23
|
+
attr_reader :type
|
|
24
|
+
attr_reader :size
|
|
25
|
+
alias_method :length, :size
|
|
26
|
+
def initialize(type, size, validity_buffer)
|
|
27
|
+
@type = type
|
|
28
|
+
@size = size
|
|
29
|
+
@validity_buffer = validity_buffer
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def valid?(i)
|
|
33
|
+
return true if @validity_buffer.nil?
|
|
34
|
+
(@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def null?(i)
|
|
38
|
+
not valid?(i)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
def apply_validity(array)
|
|
43
|
+
return array if @validity_buffer.nil?
|
|
44
|
+
@validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
|
|
45
|
+
@validity_bitmap.each_with_index do |bit, i|
|
|
46
|
+
array[i] = nil if bit.zero?
|
|
47
|
+
end
|
|
48
|
+
array
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class NullArray < Array
|
|
53
|
+
def initialize(type, size)
|
|
54
|
+
super(type, size, nil)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def to_a
|
|
58
|
+
[nil] * @size
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
class BooleanArray < Array
|
|
63
|
+
def initialize(type, size, validity_buffer, values_buffer)
|
|
64
|
+
super(type, size, validity_buffer)
|
|
65
|
+
@values_buffer = values_buffer
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def to_a
|
|
69
|
+
@values_bitmap ||= Bitmap.new(@values_buffer, @size)
|
|
70
|
+
values = @values_bitmap.each.collect do |bit|
|
|
71
|
+
not bit.zero?
|
|
72
|
+
end
|
|
73
|
+
apply_validity(values)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
class IntArray < Array
|
|
78
|
+
def initialize(type, size, validity_buffer, values_buffer)
|
|
79
|
+
super(type, size, validity_buffer)
|
|
80
|
+
@values_buffer = values_buffer
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def to_a
|
|
84
|
+
apply_validity(@values_buffer.values(@type.buffer_type, 0, @size))
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
class Int8Array < IntArray
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
class UInt8Array < IntArray
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
class Int16Array < IntArray
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
class UInt16Array < IntArray
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
class Int32Array < IntArray
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
class UInt32Array < IntArray
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
class Int64Array < IntArray
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
class UInt64Array < IntArray
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
class FloatingPointArray < Array
|
|
113
|
+
def initialize(type, size, validity_buffer, values_buffer)
|
|
114
|
+
super(type, size, validity_buffer)
|
|
115
|
+
@values_buffer = values_buffer
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
class Float32Array < FloatingPointArray
|
|
120
|
+
def to_a
|
|
121
|
+
apply_validity(@values_buffer.values(:f32, 0, @size))
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
class Float64Array < FloatingPointArray
|
|
126
|
+
def to_a
|
|
127
|
+
apply_validity(@values_buffer.values(:f64, 0, @size))
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
class TemporalArray < Array
|
|
132
|
+
def initialize(type, size, validity_buffer, values_buffer)
|
|
133
|
+
super(type, size, validity_buffer)
|
|
134
|
+
@values_buffer = values_buffer
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
class DateArray < TemporalArray
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
class Date32Array < DateArray
|
|
142
|
+
def to_a
|
|
143
|
+
apply_validity(@values_buffer.values(:s32, 0, @size))
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
class Date64Array < DateArray
|
|
148
|
+
def to_a
|
|
149
|
+
apply_validity(@values_buffer.values(:s64, 0, @size))
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
class TimeArray < TemporalArray
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
class Time32Array < TimeArray
|
|
157
|
+
def to_a
|
|
158
|
+
apply_validity(@values_buffer.values(:s32, 0, @size))
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
class Time64Array < TimeArray
|
|
163
|
+
def to_a
|
|
164
|
+
apply_validity(@values_buffer.values(:s64, 0, @size))
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
class TimestampArray < TemporalArray
|
|
169
|
+
def to_a
|
|
170
|
+
apply_validity(@values_buffer.values(:s64, 0, @size))
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
class IntervalArray < TemporalArray
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
class YearMonthIntervalArray < IntervalArray
|
|
178
|
+
def to_a
|
|
179
|
+
apply_validity(@values_buffer.values(:s32, 0, @size))
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
class DayTimeIntervalArray < IntervalArray
|
|
184
|
+
def to_a
|
|
185
|
+
values = @values_buffer.
|
|
186
|
+
each(:s32, 0, @size * 2).
|
|
187
|
+
each_slice(2).
|
|
188
|
+
collect do |(_, day), (_, time)|
|
|
189
|
+
[day, time]
|
|
190
|
+
end
|
|
191
|
+
apply_validity(values)
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
class MonthDayNanoIntervalArray < IntervalArray
|
|
196
|
+
def to_a
|
|
197
|
+
buffer_types = [:s32, :s32, :s64]
|
|
198
|
+
value_size = IO::Buffer.size_of(buffer_types)
|
|
199
|
+
values = @size.times.collect do |i|
|
|
200
|
+
offset = value_size * i
|
|
201
|
+
@values_buffer.get_values(buffer_types, offset)
|
|
202
|
+
end
|
|
203
|
+
apply_validity(values)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
class DurationArray < TemporalArray
|
|
208
|
+
def to_a
|
|
209
|
+
apply_validity(@values_buffer.values(:s64, 0, @size))
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
class VariableSizeBinaryLayoutArray < Array
|
|
214
|
+
def initialize(type, size, validity_buffer, offsets_buffer, values_buffer)
|
|
215
|
+
super(type, size, validity_buffer)
|
|
216
|
+
@offsets_buffer = offsets_buffer
|
|
217
|
+
@values_buffer = values_buffer
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def to_a
|
|
221
|
+
values = @offsets_buffer.
|
|
222
|
+
each(buffer_type, 0, @size + 1).
|
|
223
|
+
each_cons(2).
|
|
224
|
+
collect do |(_, offset), (_, next_offset)|
|
|
225
|
+
length = next_offset - offset
|
|
226
|
+
@values_buffer.get_string(offset, length, encoding)
|
|
227
|
+
end
|
|
228
|
+
apply_validity(values)
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
class BinaryArray < VariableSizeBinaryLayoutArray
|
|
233
|
+
private
|
|
234
|
+
def buffer_type
|
|
235
|
+
:s32 # TODO: big endian support
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def encoding
|
|
239
|
+
Encoding::ASCII_8BIT
|
|
240
|
+
end
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
class LargeBinaryArray < VariableSizeBinaryLayoutArray
|
|
244
|
+
private
|
|
245
|
+
def buffer_type
|
|
246
|
+
:s64 # TODO: big endian support
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def encoding
|
|
250
|
+
Encoding::ASCII_8BIT
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
class UTF8Array < VariableSizeBinaryLayoutArray
|
|
255
|
+
private
|
|
256
|
+
def buffer_type
|
|
257
|
+
:s32 # TODO: big endian support
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
def encoding
|
|
261
|
+
Encoding::UTF_8
|
|
262
|
+
end
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
class LargeUTF8Array < VariableSizeBinaryLayoutArray
|
|
266
|
+
private
|
|
267
|
+
def buffer_type
|
|
268
|
+
:s64 # TODO: big endian support
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def encoding
|
|
272
|
+
Encoding::UTF_8
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
class FixedSizeBinaryArray < Array
|
|
277
|
+
def initialize(type, size, validity_buffer, values_buffer)
|
|
278
|
+
super(type, size, validity_buffer)
|
|
279
|
+
@values_buffer = values_buffer
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def to_a
|
|
283
|
+
byte_width = @type.byte_width
|
|
284
|
+
values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
|
|
285
|
+
@values_buffer.get_string(offset, byte_width)
|
|
286
|
+
end
|
|
287
|
+
apply_validity(values)
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
class DecimalArray < FixedSizeBinaryArray
|
|
292
|
+
def to_a
|
|
293
|
+
byte_width = @type.byte_width
|
|
294
|
+
buffer_types = [:u64] * (byte_width / 8 - 1) + [:s64]
|
|
295
|
+
values = 0.step(@size * byte_width - 1, byte_width).collect do |offset|
|
|
296
|
+
@values_buffer.get_values(buffer_types, offset)
|
|
297
|
+
end
|
|
298
|
+
apply_validity(values).collect do |value|
|
|
299
|
+
if value.nil?
|
|
300
|
+
nil
|
|
301
|
+
else
|
|
302
|
+
BigDecimal(format_value(value))
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
private
|
|
308
|
+
def format_value(components)
|
|
309
|
+
highest = components.last
|
|
310
|
+
width = @type.precision
|
|
311
|
+
width += 1 if highest < 0
|
|
312
|
+
value = 0
|
|
313
|
+
components.reverse_each do |component|
|
|
314
|
+
value = (value << 64) + component
|
|
315
|
+
end
|
|
316
|
+
string = value.to_s
|
|
317
|
+
if @type.scale < 0
|
|
318
|
+
string << ("0" * -@type.scale)
|
|
319
|
+
elsif @type.scale > 0
|
|
320
|
+
n_digits = string.bytesize
|
|
321
|
+
n_digits -= 1 if value < 0
|
|
322
|
+
if n_digits < @type.scale
|
|
323
|
+
prefix = "0." + ("0" * (@type.scale - n_digits - 1))
|
|
324
|
+
if value < 0
|
|
325
|
+
string[1, 0] = prefix
|
|
326
|
+
else
|
|
327
|
+
string[0, 0] = prefix
|
|
328
|
+
end
|
|
329
|
+
else
|
|
330
|
+
string[-@type.scale, 0] = "."
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
string
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
class Decimal128Array < DecimalArray
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
class Decimal256Array < DecimalArray
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
class VariableSizeListArray < Array
|
|
344
|
+
def initialize(type, size, validity_buffer, offsets_buffer, child)
|
|
345
|
+
super(type, size, validity_buffer)
|
|
346
|
+
@offsets_buffer = offsets_buffer
|
|
347
|
+
@child = child
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
def to_a
|
|
351
|
+
child_values = @child.to_a
|
|
352
|
+
values = @offsets_buffer.
|
|
353
|
+
each(offset_type, 0, @size + 1).
|
|
354
|
+
each_cons(2).
|
|
355
|
+
collect do |(_, offset), (_, next_offset)|
|
|
356
|
+
child_values[offset...next_offset]
|
|
357
|
+
end
|
|
358
|
+
apply_validity(values)
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
class ListArray < VariableSizeListArray
|
|
363
|
+
private
|
|
364
|
+
def offset_type
|
|
365
|
+
:s32 # TODO: big endian support
|
|
366
|
+
end
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
class LargeListArray < VariableSizeListArray
|
|
370
|
+
private
|
|
371
|
+
def offset_type
|
|
372
|
+
:s64 # TODO: big endian support
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
class StructArray < Array
|
|
377
|
+
def initialize(type, size, validity_buffer, children)
|
|
378
|
+
super(type, size, validity_buffer)
|
|
379
|
+
@children = children
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def to_a
|
|
383
|
+
if @children.empty?
|
|
384
|
+
values = [[]] * @size
|
|
385
|
+
else
|
|
386
|
+
children_values = @children.collect(&:to_a)
|
|
387
|
+
values = children_values[0].zip(*children_values[1..-1])
|
|
388
|
+
end
|
|
389
|
+
apply_validity(values)
|
|
390
|
+
end
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
class MapArray < VariableSizeListArray
|
|
394
|
+
def to_a
|
|
395
|
+
super.collect do |entries|
|
|
396
|
+
if entries.nil?
|
|
397
|
+
entries
|
|
398
|
+
else
|
|
399
|
+
hash = {}
|
|
400
|
+
entries.each do |key, value|
|
|
401
|
+
hash[key] = value
|
|
402
|
+
end
|
|
403
|
+
hash
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
private
|
|
409
|
+
def offset_type
|
|
410
|
+
:s32 # TODO: big endian support
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
class UnionArray < Array
|
|
415
|
+
def initialize(type, size, types_buffer, children)
|
|
416
|
+
super(type, size, nil)
|
|
417
|
+
@types_buffer = types_buffer
|
|
418
|
+
@children = children
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
class DenseUnionArray < UnionArray
|
|
423
|
+
def initialize(type,
|
|
424
|
+
size,
|
|
425
|
+
types_buffer,
|
|
426
|
+
offsets_buffer,
|
|
427
|
+
children)
|
|
428
|
+
super(type, size, types_buffer, children)
|
|
429
|
+
@offsets_buffer = offsets_buffer
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
def to_a
|
|
433
|
+
children_values = @children.collect(&:to_a)
|
|
434
|
+
types = @types_buffer.each(:S8, 0, @size)
|
|
435
|
+
offsets = @offsets_buffer.each(:s32, 0, @size)
|
|
436
|
+
types.zip(offsets).collect do |(_, type), (_, offset)|
|
|
437
|
+
index = @type.resolve_type_index(type)
|
|
438
|
+
children_values[index][offset]
|
|
439
|
+
end
|
|
440
|
+
end
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
class SparseUnionArray < UnionArray
|
|
444
|
+
def to_a
|
|
445
|
+
children_values = @children.collect(&:to_a)
|
|
446
|
+
@types_buffer.each(:S8, 0, @size).with_index.collect do |(_, type), i|
|
|
447
|
+
index = @type.resolve_type_index(type)
|
|
448
|
+
children_values[index][i]
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
class DictionaryArray < Array
|
|
454
|
+
def initialize(type, size, validity_buffer, indices_buffer, dictionary)
|
|
455
|
+
super(type, size, validity_buffer)
|
|
456
|
+
@indices_buffer = indices_buffer
|
|
457
|
+
@dictionary = dictionary
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
def to_a
|
|
461
|
+
values = []
|
|
462
|
+
@dictionary.each do |dictionary_chunk|
|
|
463
|
+
values.concat(dictionary_chunk.to_a)
|
|
464
|
+
end
|
|
465
|
+
buffer_type = @type.index_type.buffer_type
|
|
466
|
+
indices = apply_validity(@indices_buffer.values(buffer_type, 0, @size))
|
|
467
|
+
indices.collect do |index|
|
|
468
|
+
if index.nil?
|
|
469
|
+
nil
|
|
470
|
+
else
|
|
471
|
+
values[index]
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
end
|
|
476
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
module ArrowFormat
|
|
18
|
+
class Bitmap
|
|
19
|
+
include Enumerable
|
|
20
|
+
|
|
21
|
+
def initialize(buffer, n_values)
|
|
22
|
+
@buffer = buffer
|
|
23
|
+
@n_values = n_values
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def each
|
|
27
|
+
return to_enum(__method__) unless block_given?
|
|
28
|
+
|
|
29
|
+
n_bytes = @n_values / 8
|
|
30
|
+
@buffer.each(:U8, 0, n_bytes) do |offset, value|
|
|
31
|
+
7.times do |i|
|
|
32
|
+
yield(value & (1 << (i % 8)))
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
remained_bits = @n_values % 8
|
|
36
|
+
unless remained_bits.zero?
|
|
37
|
+
value = @buffer.get_value(:U8, n_bytes)
|
|
38
|
+
remained_bits.times do |i|
|
|
39
|
+
yield(value & (1 << (i % 8)))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
module ArrowFormat
|
|
18
|
+
class Error < StandardError
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
class ReadError < Error
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
class FileReadError < ReadError
|
|
25
|
+
attr_reader :buffer
|
|
26
|
+
def initialize(buffer, message)
|
|
27
|
+
@buffer = buffer
|
|
28
|
+
super("#{message}: #{@buffer}")
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
class TypeError < Error
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
module ArrowFormat
|
|
18
|
+
class Field
|
|
19
|
+
attr_reader :name
|
|
20
|
+
attr_reader :type
|
|
21
|
+
attr_reader :dictionary_id
|
|
22
|
+
def initialize(name, type, nullable, dictionary_id)
|
|
23
|
+
@name = name
|
|
24
|
+
@type = type
|
|
25
|
+
@nullable = nullable
|
|
26
|
+
@dictionary_id = dictionary_id
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def nullable?
|
|
30
|
+
@nullable
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|