red-arrow-format 23.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE.txt +202 -0
- data/NOTICE.txt +2 -0
- data/README.md +61 -0
- data/Rakefile +67 -0
- data/lib/arrow-format/array.rb +476 -0
- data/lib/arrow-format/bitmap.rb +44 -0
- data/lib/arrow-format/error.rb +34 -0
- data/lib/arrow-format/field.rb +33 -0
- data/lib/arrow-format/file-reader.rb +213 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
- data/lib/arrow-format/readable.rb +271 -0
- data/lib/arrow-format/record-batch.rb +36 -0
- data/lib/arrow-format/schema.rb +24 -0
- data/lib/arrow-format/streaming-pull-reader.rb +243 -0
- data/lib/arrow-format/streaming-reader.rb +50 -0
- data/lib/arrow-format/type.rb +704 -0
- data/lib/arrow-format/version.rb +26 -0
- data/lib/arrow-format.rb +20 -0
- data/red-arrow-format.gemspec +57 -0
- metadata +137 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
require_relative "streaming-reader"
|
|
19
|
+
|
|
20
|
+
require_relative "org/apache/arrow/flatbuf/block"
|
|
21
|
+
require_relative "org/apache/arrow/flatbuf/footer"
|
|
22
|
+
|
|
23
|
+
module ArrowFormat
|
|
24
|
+
class FileReader
|
|
25
|
+
include Enumerable
|
|
26
|
+
include Readable
|
|
27
|
+
|
|
28
|
+
MAGIC = "ARROW1".b.freeze
|
|
29
|
+
MAGIC_BUFFER = IO::Buffer.for(MAGIC)
|
|
30
|
+
START_MARKER_SIZE = MAGIC_BUFFER.size
|
|
31
|
+
END_MARKER_SIZE = MAGIC_BUFFER.size
|
|
32
|
+
# <magic number "ARROW1">
|
|
33
|
+
# <empty padding bytes [to 8 byte boundary]>
|
|
34
|
+
STREAMING_FORMAT_START_OFFSET = 8
|
|
35
|
+
CONTINUATION_BUFFER =
|
|
36
|
+
IO::Buffer.for(MessagePullReader::CONTINUATION_STRING)
|
|
37
|
+
FOOTER_SIZE_FORMAT = :s32
|
|
38
|
+
FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT)
|
|
39
|
+
|
|
40
|
+
def initialize(input)
|
|
41
|
+
case input
|
|
42
|
+
when IO
|
|
43
|
+
@buffer = IO::Buffer.map(input, nil, 0, IO::Buffer::READONLY)
|
|
44
|
+
when String
|
|
45
|
+
@buffer = IO::Buffer.for(input)
|
|
46
|
+
else
|
|
47
|
+
@buffer = input
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
validate
|
|
51
|
+
@footer = read_footer
|
|
52
|
+
@record_batch_blocks = @footer.record_batches
|
|
53
|
+
@schema = read_schema(@footer.schema)
|
|
54
|
+
@dictionaries = read_dictionaries
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def n_record_batches
|
|
58
|
+
@record_batch_blocks.size
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def read(i)
|
|
62
|
+
fb_message, body = read_block(@record_batch_blocks[i])
|
|
63
|
+
fb_header = fb_message.header
|
|
64
|
+
unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::RecordBatch)
|
|
65
|
+
raise FileReadError.new(@buffer,
|
|
66
|
+
"Not a record batch message: #{i}: " +
|
|
67
|
+
fb_header.class.name)
|
|
68
|
+
end
|
|
69
|
+
read_record_batch(fb_header, @schema, body)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def each
|
|
73
|
+
return to_enum(__method__) {n_record_batches} unless block_given?
|
|
74
|
+
|
|
75
|
+
@record_batch_blocks.size.times do |i|
|
|
76
|
+
yield(read(i))
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
def validate
|
|
82
|
+
minimum_size = STREAMING_FORMAT_START_OFFSET +
|
|
83
|
+
FOOTER_SIZE_SIZE +
|
|
84
|
+
END_MARKER_SIZE
|
|
85
|
+
if @buffer.size < minimum_size
|
|
86
|
+
raise FileReadError.new(@buffer,
|
|
87
|
+
"Input must be larger than or equal to " +
|
|
88
|
+
"#{minimum_size}: #{@buffer.size}")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
start_marker = @buffer.slice(0, START_MARKER_SIZE)
|
|
92
|
+
if start_marker != MAGIC_BUFFER
|
|
93
|
+
raise FileReadError.new(@buffer, "No start marker")
|
|
94
|
+
end
|
|
95
|
+
end_marker = @buffer.slice(@buffer.size - END_MARKER_SIZE,
|
|
96
|
+
END_MARKER_SIZE)
|
|
97
|
+
if end_marker != MAGIC_BUFFER
|
|
98
|
+
raise FileReadError.new(@buffer, "No end marker")
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def read_footer
|
|
103
|
+
footer_size_offset = @buffer.size - END_MARKER_SIZE - FOOTER_SIZE_SIZE
|
|
104
|
+
footer_size = @buffer.get_value(FOOTER_SIZE_FORMAT, footer_size_offset)
|
|
105
|
+
footer_data = @buffer.slice(footer_size_offset - footer_size,
|
|
106
|
+
footer_size)
|
|
107
|
+
Org::Apache::Arrow::Flatbuf::Footer.new(footer_data)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def read_block(block)
|
|
111
|
+
offset = block.offset
|
|
112
|
+
|
|
113
|
+
# If we can report property error information, we can use
|
|
114
|
+
# MessagePullReader here.
|
|
115
|
+
#
|
|
116
|
+
# message_pull_reader = MessagePullReader.new do |message, body|
|
|
117
|
+
# return read_record_batch(message.header, @schema, body)
|
|
118
|
+
# end
|
|
119
|
+
# chunk = @buffer.slice(offset,
|
|
120
|
+
# MessagePullReader::CONTINUATION_SIZE +
|
|
121
|
+
# MessagePullReader::METADATA_LENGTH_SIZE +
|
|
122
|
+
# block.meta_data_length +
|
|
123
|
+
# block.body_length)
|
|
124
|
+
# message_pull_reader.consume(chunk)
|
|
125
|
+
|
|
126
|
+
continuation_size = CONTINUATION_BUFFER.size
|
|
127
|
+
continuation = @buffer.slice(offset, continuation_size)
|
|
128
|
+
unless continuation == CONTINUATION_BUFFER
|
|
129
|
+
raise FileReadError.new(@buffer,
|
|
130
|
+
"Invalid continuation: #{i}: " +
|
|
131
|
+
continuation.inspect)
|
|
132
|
+
end
|
|
133
|
+
offset += continuation_size
|
|
134
|
+
|
|
135
|
+
metadata_length_type = MessagePullReader::METADATA_LENGTH_TYPE
|
|
136
|
+
metadata_length_size = MessagePullReader::METADATA_LENGTH_SIZE
|
|
137
|
+
metadata_length = @buffer.get_value(metadata_length_type, offset)
|
|
138
|
+
expected_metadata_length =
|
|
139
|
+
block.meta_data_length -
|
|
140
|
+
continuation_size -
|
|
141
|
+
metadata_length_size
|
|
142
|
+
unless metadata_length == expected_metadata_length
|
|
143
|
+
raise FileReadError.new(@buffer,
|
|
144
|
+
"Invalid metadata length #{i}: " +
|
|
145
|
+
"expected:#{expected_metadata_length} " +
|
|
146
|
+
"actual:#{metadata_length}")
|
|
147
|
+
end
|
|
148
|
+
offset += metadata_length_size
|
|
149
|
+
|
|
150
|
+
metadata = @buffer.slice(offset, metadata_length)
|
|
151
|
+
fb_message = Org::Apache::Arrow::Flatbuf::Message.new(metadata)
|
|
152
|
+
offset += metadata_length
|
|
153
|
+
|
|
154
|
+
body = @buffer.slice(offset, block.body_length)
|
|
155
|
+
|
|
156
|
+
[fb_message, body]
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def read_dictionaries
|
|
160
|
+
dictionary_blocks = @footer.dictionaries
|
|
161
|
+
return nil if dictionary_blocks.nil?
|
|
162
|
+
|
|
163
|
+
dictionary_fields = {}
|
|
164
|
+
@schema.fields.each do |field|
|
|
165
|
+
next unless field.type.is_a?(DictionaryType)
|
|
166
|
+
dictionary_fields[field.dictionary_id] = field
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
dictionaries = {}
|
|
170
|
+
dictionary_blocks.each do |block|
|
|
171
|
+
fb_message, body = read_block(block)
|
|
172
|
+
fb_header = fb_message.header
|
|
173
|
+
unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::DictionaryBatch)
|
|
174
|
+
raise FileReadError.new(@buffer,
|
|
175
|
+
"Not a dictionary batch message: " +
|
|
176
|
+
fb_header.inspect)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
id = fb_header.id
|
|
180
|
+
if fb_header.delta?
|
|
181
|
+
unless dictionaries.key?(id)
|
|
182
|
+
raise FileReadError.new(@buffer,
|
|
183
|
+
"A delta dictionary batch message " +
|
|
184
|
+
"must exist after a non delta " +
|
|
185
|
+
"dictionary batch message: " +
|
|
186
|
+
fb_header.inspect)
|
|
187
|
+
end
|
|
188
|
+
else
|
|
189
|
+
if dictionaries.key?(id)
|
|
190
|
+
raise FileReadError.new(@buffer,
|
|
191
|
+
"Multiple non delta dictionary batch " +
|
|
192
|
+
"messages for the same ID is invalid: " +
|
|
193
|
+
fb_header.inspect)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
value_type = dictionary_fields[id].type.value_type
|
|
198
|
+
schema = Schema.new([Field.new("dummy", value_type, true, nil)])
|
|
199
|
+
record_batch = read_record_batch(fb_header.data, schema, body)
|
|
200
|
+
if fb_header.delta?
|
|
201
|
+
dictionaries[id] << record_batch.columns[0]
|
|
202
|
+
else
|
|
203
|
+
dictionaries[id] = [record_batch.columns[0]]
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
dictionaries
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def find_dictionary(id)
|
|
210
|
+
@dictionaries[id]
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Opaque binary data
|
|
15
|
+
class Binary < ::FlatBuffers::Table
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Logically the same as Binary, but the internal representation uses a view
|
|
15
|
+
# struct that contains the string length and either the string's entire data
|
|
16
|
+
# inline (for small strings) or an inlined prefix, an index of another buffer,
|
|
17
|
+
# and an offset pointing to a slice in that buffer (for non-small strings).
|
|
18
|
+
#
|
|
19
|
+
# Since it uses a variable number of data buffers, each Field with this type
|
|
20
|
+
# must have a corresponding entry in `variadicBufferCounts`.
|
|
21
|
+
class BinaryView < ::FlatBuffers::Table
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //File.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Footer (//File.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class Block < ::FlatBuffers::Struct
|
|
15
|
+
# Length of the data (this is aligned so there can be a gap between this and
|
|
16
|
+
# the metadata).
|
|
17
|
+
def body_length
|
|
18
|
+
field_offset = 16
|
|
19
|
+
@view.unpack_long(field_offset)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Length of the metadata
|
|
23
|
+
def meta_data_length
|
|
24
|
+
field_offset = 8
|
|
25
|
+
@view.unpack_int(field_offset)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Index to the start of the RecordBlock (note this is past the Message header)
|
|
29
|
+
def offset
|
|
30
|
+
field_offset = 0
|
|
31
|
+
@view.unpack_long(field_offset)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Message.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/compression_type"
|
|
9
|
+
require_relative "../../../apache/arrow/flatbuf/body_compression_method"
|
|
10
|
+
|
|
11
|
+
module ArrowFormat
|
|
12
|
+
module Org
|
|
13
|
+
module Apache
|
|
14
|
+
module Arrow
|
|
15
|
+
module Flatbuf
|
|
16
|
+
# Optional compression for the memory buffers constituting IPC message
|
|
17
|
+
# bodies. Intended for use with RecordBatch but could be used for other
|
|
18
|
+
# message types
|
|
19
|
+
class BodyCompression < ::FlatBuffers::Table
|
|
20
|
+
# Compressor library.
|
|
21
|
+
# For LZ4_FRAME, each compressed buffer must consist of a single frame.
|
|
22
|
+
def codec
|
|
23
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
24
|
+
if field_offset.zero?
|
|
25
|
+
enum_value = 0
|
|
26
|
+
else
|
|
27
|
+
enum_value = @view.unpack_byte(field_offset)
|
|
28
|
+
end
|
|
29
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::CompressionType.try_convert(enum_value) || enum_value
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Indicates the way the record batch body was compressed
|
|
33
|
+
def method
|
|
34
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
35
|
+
if field_offset.zero?
|
|
36
|
+
enum_value = 0
|
|
37
|
+
else
|
|
38
|
+
enum_value = @view.unpack_byte(field_offset)
|
|
39
|
+
end
|
|
40
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::BodyCompressionMethod.try_convert(enum_value) || enum_value
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Message.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Provided for forward compatibility in case we need to support different
|
|
15
|
+
# strategies for compressing the IPC message body (like whole-body
|
|
16
|
+
# compression rather than buffer-level) in the future
|
|
17
|
+
class BodyCompressionMethod < ::FlatBuffers::Enum
|
|
18
|
+
# Each constituent buffer is first compressed with the indicated
|
|
19
|
+
# compressor, and then written with the uncompressed length in the first 8
|
|
20
|
+
# bytes as a 64-bit little-endian signed integer followed by the compressed
|
|
21
|
+
# buffer bytes (and then padding as required by the protocol). The
|
|
22
|
+
# uncompressed length may be set to -1 to indicate that the data that
|
|
23
|
+
# follows is not compressed, which can be useful for cases where
|
|
24
|
+
# compression does not yield appreciable savings.
|
|
25
|
+
BUFFER = register("BUFFER", 0)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class Bool < ::FlatBuffers::Table
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# ----------------------------------------------------------------------
|
|
15
|
+
# A Buffer represents a single contiguous memory segment
|
|
16
|
+
class Buffer < ::FlatBuffers::Struct
|
|
17
|
+
# The absolute length (in bytes) of the memory buffer. The memory is found
|
|
18
|
+
# from offset (inclusive) to offset + length (non-inclusive). When building
|
|
19
|
+
# messages using the encapsulated IPC message, padding bytes may be written
|
|
20
|
+
# after a buffer, but such padding bytes do not need to be accounted for in
|
|
21
|
+
# the size here.
|
|
22
|
+
def length
|
|
23
|
+
field_offset = 8
|
|
24
|
+
@view.unpack_long(field_offset)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# The relative offset into the shared memory page where the bytes for this
|
|
28
|
+
# buffer starts
|
|
29
|
+
def offset
|
|
30
|
+
field_offset = 0
|
|
31
|
+
@view.unpack_long(field_offset)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Message.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class CompressionType < ::FlatBuffers::Enum
|
|
15
|
+
LZ4_FRAME = register("LZ4_FRAME", 0)
|
|
16
|
+
ZSTD = register("ZSTD", 1)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/date_unit"
|
|
9
|
+
|
|
10
|
+
module ArrowFormat
|
|
11
|
+
module Org
|
|
12
|
+
module Apache
|
|
13
|
+
module Arrow
|
|
14
|
+
module Flatbuf
|
|
15
|
+
# Date is either a 32-bit or 64-bit signed integer type representing an
|
|
16
|
+
# elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
|
|
17
|
+
#
|
|
18
|
+
# * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
|
|
19
|
+
# leap seconds), where the values are evenly divisible by 86400000
|
|
20
|
+
# * Days (32 bits) since the UNIX epoch
|
|
21
|
+
class Date < ::FlatBuffers::Table
|
|
22
|
+
def unit
|
|
23
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
24
|
+
if field_offset.zero?
|
|
25
|
+
enum_value = 1
|
|
26
|
+
else
|
|
27
|
+
enum_value = @view.unpack_short(field_offset)
|
|
28
|
+
end
|
|
29
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::DateUnit.try_convert(enum_value) || enum_value
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class DateUnit < ::FlatBuffers::Enum
|
|
15
|
+
DAY = register("DAY", 0)
|
|
16
|
+
MILLISECOND = register("MILLISECOND", 1)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Exact decimal value represented as an integer value in two's
|
|
15
|
+
# complement. Currently 32-bit (4-byte), 64-bit (8-byte),
|
|
16
|
+
# 128-bit (16-byte) and 256-bit (32-byte) integers are used.
|
|
17
|
+
# The representation uses the endianness indicated in the Schema.
|
|
18
|
+
class Decimal < ::FlatBuffers::Table
|
|
19
|
+
# Number of bits per value. The accepted widths are 32, 64, 128 and 256.
|
|
20
|
+
# We use bitWidth for consistency with Int::bitWidth.
|
|
21
|
+
def bit_width
|
|
22
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
23
|
+
return 128 if field_offset.zero?
|
|
24
|
+
|
|
25
|
+
@view.unpack_int(field_offset)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Total number of decimal digits
|
|
29
|
+
def precision
|
|
30
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
31
|
+
return 0 if field_offset.zero?
|
|
32
|
+
|
|
33
|
+
@view.unpack_int(field_offset)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Number of digits after the decimal point "."
|
|
37
|
+
def scale
|
|
38
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
39
|
+
return 0 if field_offset.zero?
|
|
40
|
+
|
|
41
|
+
@view.unpack_int(field_offset)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Message.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/record_batch"
|
|
9
|
+
|
|
10
|
+
module ArrowFormat
|
|
11
|
+
module Org
|
|
12
|
+
module Apache
|
|
13
|
+
module Arrow
|
|
14
|
+
module Flatbuf
|
|
15
|
+
# For sending dictionary encoding information. Any Field can be
|
|
16
|
+
# dictionary-encoded, but in this case none of its children may be
|
|
17
|
+
# dictionary-encoded.
|
|
18
|
+
# There is one vector / column per dictionary, but that vector / column
|
|
19
|
+
# may be spread across multiple dictionary batches by using the isDelta
|
|
20
|
+
# flag
|
|
21
|
+
class DictionaryBatch < ::FlatBuffers::Table
|
|
22
|
+
def data
|
|
23
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
24
|
+
return nil if field_offset.zero?
|
|
25
|
+
|
|
26
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::RecordBatch, field_offset)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def id
|
|
30
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
31
|
+
return 0 if field_offset.zero?
|
|
32
|
+
|
|
33
|
+
@view.unpack_long(field_offset)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# If isDelta is true the values in the dictionary are to be appended to a
|
|
37
|
+
# dictionary with the indicated id. If isDelta is false this dictionary
|
|
38
|
+
# should replace the existing dictionary.
|
|
39
|
+
def delta?
|
|
40
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
41
|
+
return false if field_offset.zero?
|
|
42
|
+
|
|
43
|
+
@view.unpack_bool(field_offset)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/dictionary_kind"
|
|
9
|
+
require_relative "../../../apache/arrow/flatbuf/int"
|
|
10
|
+
|
|
11
|
+
module ArrowFormat
|
|
12
|
+
module Org
|
|
13
|
+
module Apache
|
|
14
|
+
module Arrow
|
|
15
|
+
module Flatbuf
|
|
16
|
+
class DictionaryEncoding < ::FlatBuffers::Table
|
|
17
|
+
def dictionary_kind
|
|
18
|
+
field_offset = @view.unpack_virtual_offset(10)
|
|
19
|
+
if field_offset.zero?
|
|
20
|
+
enum_value = 0
|
|
21
|
+
else
|
|
22
|
+
enum_value = @view.unpack_short(field_offset)
|
|
23
|
+
end
|
|
24
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::DictionaryKind.try_convert(enum_value) || enum_value
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# The known dictionary id in the application where this data is used. In
|
|
28
|
+
# the file or streaming formats, the dictionary ids are found in the
|
|
29
|
+
# DictionaryBatch messages
|
|
30
|
+
def id
|
|
31
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
32
|
+
return 0 if field_offset.zero?
|
|
33
|
+
|
|
34
|
+
@view.unpack_long(field_offset)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# The dictionary indices are constrained to be non-negative integers. If
|
|
38
|
+
# this field is null, the indices must be signed int32. To maximize
|
|
39
|
+
# cross-language compatibility and performance, implementations are
|
|
40
|
+
# recommended to prefer signed integer types over unsigned integer types
|
|
41
|
+
# and to avoid uint64 indices unless they are required by an application.
|
|
42
|
+
def index_type
|
|
43
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
44
|
+
return nil if field_offset.zero?
|
|
45
|
+
|
|
46
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# By default, dictionaries are not ordered, or the order does not have
|
|
50
|
+
# semantic meaning. In some statistical, applications, dictionary-encoding
|
|
51
|
+
# is used to represent ordered categorical data, and we provide a way to
|
|
52
|
+
# preserve that metadata here
|
|
53
|
+
def ordered?
|
|
54
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
55
|
+
return false if field_offset.zero?
|
|
56
|
+
|
|
57
|
+
@view.unpack_bool(field_offset)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|