red-arrow-format 23.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE.txt +202 -0
- data/NOTICE.txt +2 -0
- data/README.md +61 -0
- data/Rakefile +67 -0
- data/lib/arrow-format/array.rb +476 -0
- data/lib/arrow-format/bitmap.rb +44 -0
- data/lib/arrow-format/error.rb +34 -0
- data/lib/arrow-format/field.rb +33 -0
- data/lib/arrow-format/file-reader.rb +213 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
- data/lib/arrow-format/readable.rb +271 -0
- data/lib/arrow-format/record-batch.rb +36 -0
- data/lib/arrow-format/schema.rb +24 -0
- data/lib/arrow-format/streaming-pull-reader.rb +243 -0
- data/lib/arrow-format/streaming-reader.rb +50 -0
- data/lib/arrow-format/type.rb +704 -0
- data/lib/arrow-format/version.rb +26 -0
- data/lib/arrow-format.rb +20 -0
- data/red-arrow-format.gemspec +57 -0
- metadata +137 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# ----------------------------------------------------------------------
|
|
15
|
+
# Top-level Type value, enabling extensible type-specific metadata. We can
|
|
16
|
+
# add new logical types to Type without breaking backwards compatibility
|
|
17
|
+
class Type < ::FlatBuffers::Union
|
|
18
|
+
NONE = register("NONE", 0, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
|
|
19
|
+
NULL = register("Null", 1, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Null", "../../../apache/arrow/flatbuf/null")
|
|
20
|
+
INT = register("Int", 2, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", "../../../apache/arrow/flatbuf/int")
|
|
21
|
+
FLOATING_POINT = register("FloatingPoint", 3, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FloatingPoint", "../../../apache/arrow/flatbuf/floating_point")
|
|
22
|
+
BINARY = register("Binary", 4, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Binary", "../../../apache/arrow/flatbuf/binary")
|
|
23
|
+
UTF8 = register("Utf8", 5, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8", "../../../apache/arrow/flatbuf/utf8")
|
|
24
|
+
BOOL = register("Bool", 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Bool", "../../../apache/arrow/flatbuf/bool")
|
|
25
|
+
DECIMAL = register("Decimal", 7, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Decimal", "../../../apache/arrow/flatbuf/decimal")
|
|
26
|
+
DATE = register("Date", 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Date", "../../../apache/arrow/flatbuf/date")
|
|
27
|
+
TIME = register("Time", 9, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Time", "../../../apache/arrow/flatbuf/time")
|
|
28
|
+
TIMESTAMP = register("Timestamp", 10, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Timestamp", "../../../apache/arrow/flatbuf/timestamp")
|
|
29
|
+
INTERVAL = register("Interval", 11, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Interval", "../../../apache/arrow/flatbuf/interval")
|
|
30
|
+
LIST = register("List", 12, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::List", "../../../apache/arrow/flatbuf/list")
|
|
31
|
+
STRUCT_ = register("Struct_", 13, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Struct", "../../../apache/arrow/flatbuf/struct_")
|
|
32
|
+
UNION = register("Union", 14, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Union", "../../../apache/arrow/flatbuf/union")
|
|
33
|
+
FIXED_SIZE_BINARY = register("FixedSizeBinary", 15, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FixedSizeBinary", "../../../apache/arrow/flatbuf/fixed_size_binary")
|
|
34
|
+
FIXED_SIZE_LIST = register("FixedSizeList", 16, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FixedSizeList", "../../../apache/arrow/flatbuf/fixed_size_list")
|
|
35
|
+
MAP = register("Map", 17, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Map", "../../../apache/arrow/flatbuf/map")
|
|
36
|
+
DURATION = register("Duration", 18, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Duration", "../../../apache/arrow/flatbuf/duration")
|
|
37
|
+
LARGE_BINARY = register("LargeBinary", 19, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeBinary", "../../../apache/arrow/flatbuf/large_binary")
|
|
38
|
+
LARGE_UTF8 = register("LargeUtf8", 20, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeUtf8", "../../../apache/arrow/flatbuf/large_utf8")
|
|
39
|
+
LARGE_LIST = register("LargeList", 21, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeList", "../../../apache/arrow/flatbuf/large_list")
|
|
40
|
+
RUN_END_ENCODED = register("RunEndEncoded", 22, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::RunEndEncoded", "../../../apache/arrow/flatbuf/run_end_encoded")
|
|
41
|
+
BINARY_VIEW = register("BinaryView", 23, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::BinaryView", "../../../apache/arrow/flatbuf/binary_view")
|
|
42
|
+
UTF8VIEW = register("Utf8View", 24, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
|
|
43
|
+
LIST_VIEW = register("ListView", 25, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::ListView", "../../../apache/arrow/flatbuf/list_view")
|
|
44
|
+
LARGE_LIST_VIEW = register("LargeListView", 26, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeListView", "../../../apache/arrow/flatbuf/large_list_view")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
private def require_table_class
|
|
48
|
+
require_relative @require_path
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/union_mode"
|
|
9
|
+
|
|
10
|
+
module ArrowFormat
|
|
11
|
+
module Org
|
|
12
|
+
module Apache
|
|
13
|
+
module Arrow
|
|
14
|
+
module Flatbuf
|
|
15
|
+
# A union is a complex type with children in Field
|
|
16
|
+
# By default ids in the type vector refer to the offsets in the children
|
|
17
|
+
# optionally typeIds provides an indirection between the child offset and the type id
|
|
18
|
+
# for each child `typeIds[offset]` is the id used in the type vector
|
|
19
|
+
class Union < ::FlatBuffers::Table
|
|
20
|
+
def mode
|
|
21
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
22
|
+
if field_offset.zero?
|
|
23
|
+
enum_value = 0
|
|
24
|
+
else
|
|
25
|
+
enum_value = @view.unpack_short(field_offset)
|
|
26
|
+
end
|
|
27
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::UnionMode.try_convert(enum_value) || enum_value
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def type_ids
|
|
31
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
32
|
+
return nil if field_offset.zero?
|
|
33
|
+
|
|
34
|
+
element_size = 4
|
|
35
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
36
|
+
@view.unpack_int(element_offset)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class UnionMode < ::FlatBuffers::Enum
|
|
15
|
+
SPARSE = register("Sparse", 0)
|
|
16
|
+
DENSE = register("Dense", 1)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Unicode with UTF-8 encoding
|
|
15
|
+
class Utf8 < ::FlatBuffers::Table
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# Logically the same as Utf8, but the internal representation uses a view
|
|
15
|
+
# struct that contains the string length and either the string's entire data
|
|
16
|
+
# inline (for small strings) or an inlined prefix, an index of another buffer,
|
|
17
|
+
# and an offset pointing to a slice in that buffer (for non-small strings).
|
|
18
|
+
#
|
|
19
|
+
# Since it uses a variable number of data buffers, each Field with this type
|
|
20
|
+
# must have a corresponding entry in `variadicBufferCounts`.
|
|
21
|
+
class Utf8View < ::FlatBuffers::Table
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
require_relative "array"
|
|
19
|
+
require_relative "field"
|
|
20
|
+
require_relative "record-batch"
|
|
21
|
+
require_relative "schema"
|
|
22
|
+
require_relative "type"
|
|
23
|
+
|
|
24
|
+
require_relative "org/apache/arrow/flatbuf/binary"
|
|
25
|
+
require_relative "org/apache/arrow/flatbuf/bool"
|
|
26
|
+
require_relative "org/apache/arrow/flatbuf/date"
|
|
27
|
+
require_relative "org/apache/arrow/flatbuf/date_unit"
|
|
28
|
+
require_relative "org/apache/arrow/flatbuf/decimal"
|
|
29
|
+
require_relative "org/apache/arrow/flatbuf/dictionary_encoding"
|
|
30
|
+
require_relative "org/apache/arrow/flatbuf/dictionary_batch"
|
|
31
|
+
require_relative "org/apache/arrow/flatbuf/duration"
|
|
32
|
+
require_relative "org/apache/arrow/flatbuf/fixed_size_binary"
|
|
33
|
+
require_relative "org/apache/arrow/flatbuf/floating_point"
|
|
34
|
+
require_relative "org/apache/arrow/flatbuf/int"
|
|
35
|
+
require_relative "org/apache/arrow/flatbuf/interval"
|
|
36
|
+
require_relative "org/apache/arrow/flatbuf/interval_unit"
|
|
37
|
+
require_relative "org/apache/arrow/flatbuf/large_binary"
|
|
38
|
+
require_relative "org/apache/arrow/flatbuf/large_list"
|
|
39
|
+
require_relative "org/apache/arrow/flatbuf/large_utf8"
|
|
40
|
+
require_relative "org/apache/arrow/flatbuf/list"
|
|
41
|
+
require_relative "org/apache/arrow/flatbuf/map"
|
|
42
|
+
require_relative "org/apache/arrow/flatbuf/message"
|
|
43
|
+
require_relative "org/apache/arrow/flatbuf/null"
|
|
44
|
+
require_relative "org/apache/arrow/flatbuf/precision"
|
|
45
|
+
require_relative "org/apache/arrow/flatbuf/record_batch"
|
|
46
|
+
require_relative "org/apache/arrow/flatbuf/schema"
|
|
47
|
+
require_relative "org/apache/arrow/flatbuf/struct_"
|
|
48
|
+
require_relative "org/apache/arrow/flatbuf/time"
|
|
49
|
+
require_relative "org/apache/arrow/flatbuf/time_unit"
|
|
50
|
+
require_relative "org/apache/arrow/flatbuf/timestamp"
|
|
51
|
+
require_relative "org/apache/arrow/flatbuf/union"
|
|
52
|
+
require_relative "org/apache/arrow/flatbuf/union_mode"
|
|
53
|
+
require_relative "org/apache/arrow/flatbuf/utf8"
|
|
54
|
+
|
|
55
|
+
module ArrowFormat
|
|
56
|
+
module Readable
|
|
57
|
+
private
|
|
58
|
+
def read_schema(fb_schema)
|
|
59
|
+
fields = fb_schema.fields.collect do |fb_field|
|
|
60
|
+
read_field(fb_field)
|
|
61
|
+
end
|
|
62
|
+
Schema.new(fields)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def read_field(fb_field)
|
|
66
|
+
fb_type = fb_field.type
|
|
67
|
+
case fb_type
|
|
68
|
+
when Org::Apache::Arrow::Flatbuf::Null
|
|
69
|
+
type = NullType.singleton
|
|
70
|
+
when Org::Apache::Arrow::Flatbuf::Bool
|
|
71
|
+
type = BooleanType.singleton
|
|
72
|
+
when Org::Apache::Arrow::Flatbuf::Int
|
|
73
|
+
type = read_type_int(fb_type)
|
|
74
|
+
when Org::Apache::Arrow::Flatbuf::FloatingPoint
|
|
75
|
+
case fb_type.precision
|
|
76
|
+
when Org::Apache::Arrow::Flatbuf::Precision::SINGLE
|
|
77
|
+
type = Float32Type.singleton
|
|
78
|
+
when Org::Apache::Arrow::Flatbuf::Precision::DOUBLE
|
|
79
|
+
type = Float64Type.singleton
|
|
80
|
+
end
|
|
81
|
+
when Org::Apache::Arrow::Flatbuf::Date
|
|
82
|
+
case fb_type.unit
|
|
83
|
+
when Org::Apache::Arrow::Flatbuf::DateUnit::DAY
|
|
84
|
+
type = Date32Type.singleton
|
|
85
|
+
when Org::Apache::Arrow::Flatbuf::DateUnit::MILLISECOND
|
|
86
|
+
type = Date64Type.singleton
|
|
87
|
+
end
|
|
88
|
+
when Org::Apache::Arrow::Flatbuf::Time
|
|
89
|
+
case fb_type.bit_width
|
|
90
|
+
when 32
|
|
91
|
+
case fb_type.unit
|
|
92
|
+
when Org::Apache::Arrow::Flatbuf::TimeUnit::SECOND
|
|
93
|
+
type = Time32Type.new(:second)
|
|
94
|
+
when Org::Apache::Arrow::Flatbuf::TimeUnit::MILLISECOND
|
|
95
|
+
type = Time32Type.new(:millisecond)
|
|
96
|
+
end
|
|
97
|
+
when 64
|
|
98
|
+
case fb_type.unit
|
|
99
|
+
when Org::Apache::Arrow::Flatbuf::TimeUnit::MICROSECOND
|
|
100
|
+
type = Time64Type.new(:microsecond)
|
|
101
|
+
when Org::Apache::Arrow::Flatbuf::TimeUnit::NANOSECOND
|
|
102
|
+
type = Time64Type.new(:nanosecond)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
when Org::Apache::Arrow::Flatbuf::Timestamp
|
|
106
|
+
unit = fb_type.unit.name.downcase.to_sym
|
|
107
|
+
type = TimestampType.new(unit, fb_type.timezone)
|
|
108
|
+
when Org::Apache::Arrow::Flatbuf::Interval
|
|
109
|
+
case fb_type.unit
|
|
110
|
+
when Org::Apache::Arrow::Flatbuf::IntervalUnit::YEAR_MONTH
|
|
111
|
+
type = YearMonthIntervalType.new
|
|
112
|
+
when Org::Apache::Arrow::Flatbuf::IntervalUnit::DAY_TIME
|
|
113
|
+
type = DayTimeIntervalType.new
|
|
114
|
+
when Org::Apache::Arrow::Flatbuf::IntervalUnit::MONTH_DAY_NANO
|
|
115
|
+
type = MonthDayNanoIntervalType.new
|
|
116
|
+
end
|
|
117
|
+
when Org::Apache::Arrow::Flatbuf::Duration
|
|
118
|
+
unit = fb_type.unit.name.downcase.to_sym
|
|
119
|
+
type = DurationType.new(unit)
|
|
120
|
+
when Org::Apache::Arrow::Flatbuf::List
|
|
121
|
+
type = ListType.new(read_field(fb_field.children[0]))
|
|
122
|
+
when Org::Apache::Arrow::Flatbuf::LargeList
|
|
123
|
+
type = LargeListType.new(read_field(fb_field.children[0]))
|
|
124
|
+
when Org::Apache::Arrow::Flatbuf::Struct
|
|
125
|
+
children = fb_field.children.collect {|child| read_field(child)}
|
|
126
|
+
type = StructType.new(children)
|
|
127
|
+
when Org::Apache::Arrow::Flatbuf::Union
|
|
128
|
+
children = fb_field.children.collect {|child| read_field(child)}
|
|
129
|
+
type_ids = fb_type.type_ids
|
|
130
|
+
case fb_type.mode
|
|
131
|
+
when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE
|
|
132
|
+
type = DenseUnionType.new(children, type_ids)
|
|
133
|
+
when Org::Apache::Arrow::Flatbuf::UnionMode::SPARSE
|
|
134
|
+
type = SparseUnionType.new(children, type_ids)
|
|
135
|
+
end
|
|
136
|
+
when Org::Apache::Arrow::Flatbuf::Map
|
|
137
|
+
type = MapType.new(read_field(fb_field.children[0]))
|
|
138
|
+
when Org::Apache::Arrow::Flatbuf::Binary
|
|
139
|
+
type = BinaryType.singleton
|
|
140
|
+
when Org::Apache::Arrow::Flatbuf::LargeBinary
|
|
141
|
+
type = LargeBinaryType.singleton
|
|
142
|
+
when Org::Apache::Arrow::Flatbuf::Utf8
|
|
143
|
+
type = UTF8Type.singleton
|
|
144
|
+
when Org::Apache::Arrow::Flatbuf::LargeUtf8
|
|
145
|
+
type = LargeUTF8Type.singleton
|
|
146
|
+
when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
|
|
147
|
+
type = FixedSizeBinaryType.new(fb_type.byte_width)
|
|
148
|
+
when Org::Apache::Arrow::Flatbuf::Decimal
|
|
149
|
+
case fb_type.bit_width
|
|
150
|
+
when 128
|
|
151
|
+
type = Decimal128Type.new(fb_type.precision, fb_type.scale)
|
|
152
|
+
when 256
|
|
153
|
+
type = Decimal256Type.new(fb_type.precision, fb_type.scale)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
dictionary = fb_field.dictionary
|
|
158
|
+
if dictionary
|
|
159
|
+
dictionary_id = dictionary.id
|
|
160
|
+
index_type = read_type_int(dictionary.index_type)
|
|
161
|
+
type = DictionaryType.new(index_type, type, dictionary.ordered?)
|
|
162
|
+
else
|
|
163
|
+
dictionary_id = nil
|
|
164
|
+
end
|
|
165
|
+
Field.new(fb_field.name, type, fb_field.nullable?, dictionary_id)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def read_type_int(fb_type)
|
|
169
|
+
case fb_type.bit_width
|
|
170
|
+
when 8
|
|
171
|
+
if fb_type.signed?
|
|
172
|
+
Int8Type.singleton
|
|
173
|
+
else
|
|
174
|
+
UInt8Type.singleton
|
|
175
|
+
end
|
|
176
|
+
when 16
|
|
177
|
+
if fb_type.signed?
|
|
178
|
+
Int16Type.singleton
|
|
179
|
+
else
|
|
180
|
+
UInt16Type.singleton
|
|
181
|
+
end
|
|
182
|
+
when 32
|
|
183
|
+
if fb_type.signed?
|
|
184
|
+
Int32Type.singleton
|
|
185
|
+
else
|
|
186
|
+
UInt32Type.singleton
|
|
187
|
+
end
|
|
188
|
+
when 64
|
|
189
|
+
if fb_type.signed?
|
|
190
|
+
Int64Type.singleton
|
|
191
|
+
else
|
|
192
|
+
UInt64Type.singleton
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def read_record_batch(fb_record_batch, schema, body)
|
|
198
|
+
n_rows = fb_record_batch.length
|
|
199
|
+
nodes = fb_record_batch.nodes
|
|
200
|
+
buffers = fb_record_batch.buffers
|
|
201
|
+
columns = schema.fields.collect do |field|
|
|
202
|
+
read_column(field, nodes, buffers, body)
|
|
203
|
+
end
|
|
204
|
+
RecordBatch.new(schema, n_rows, columns)
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def read_column(field, nodes, buffers, body)
|
|
208
|
+
node = nodes.shift
|
|
209
|
+
length = node.length
|
|
210
|
+
|
|
211
|
+
return field.type.build_array(length) if field.type.is_a?(NullType)
|
|
212
|
+
|
|
213
|
+
validity_buffer = buffers.shift
|
|
214
|
+
if validity_buffer.length.zero?
|
|
215
|
+
validity = nil
|
|
216
|
+
else
|
|
217
|
+
validity = body.slice(validity_buffer.offset, validity_buffer.length)
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
case field.type
|
|
221
|
+
when BooleanType,
|
|
222
|
+
NumberType,
|
|
223
|
+
TemporalType
|
|
224
|
+
values_buffer = buffers.shift
|
|
225
|
+
values = body.slice(values_buffer.offset, values_buffer.length)
|
|
226
|
+
field.type.build_array(length, validity, values)
|
|
227
|
+
when VariableSizeBinaryType
|
|
228
|
+
offsets_buffer = buffers.shift
|
|
229
|
+
values_buffer = buffers.shift
|
|
230
|
+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
|
|
231
|
+
values = body.slice(values_buffer.offset, values_buffer.length)
|
|
232
|
+
field.type.build_array(length, validity, offsets, values)
|
|
233
|
+
when FixedSizeBinaryType
|
|
234
|
+
values_buffer = buffers.shift
|
|
235
|
+
values = body.slice(values_buffer.offset, values_buffer.length)
|
|
236
|
+
field.type.build_array(length, validity, values)
|
|
237
|
+
when VariableSizeListType
|
|
238
|
+
offsets_buffer = buffers.shift
|
|
239
|
+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
|
|
240
|
+
child = read_column(field.type.child, nodes, buffers, body)
|
|
241
|
+
field.type.build_array(length, validity, offsets, child)
|
|
242
|
+
when StructType
|
|
243
|
+
children = field.type.children.collect do |child|
|
|
244
|
+
read_column(child, nodes, buffers, body)
|
|
245
|
+
end
|
|
246
|
+
field.type.build_array(length, validity, children)
|
|
247
|
+
when DenseUnionType
|
|
248
|
+
# dense union type doesn't have validity.
|
|
249
|
+
types = validity
|
|
250
|
+
offsets_buffer = buffers.shift
|
|
251
|
+
offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
|
|
252
|
+
children = field.type.children.collect do |child|
|
|
253
|
+
read_column(child, nodes, buffers, body)
|
|
254
|
+
end
|
|
255
|
+
field.type.build_array(length, types, offsets, children)
|
|
256
|
+
when SparseUnionType
|
|
257
|
+
# sparse union type doesn't have validity.
|
|
258
|
+
types = validity
|
|
259
|
+
children = field.type.children.collect do |child|
|
|
260
|
+
read_column(child, nodes, buffers, body)
|
|
261
|
+
end
|
|
262
|
+
field.type.build_array(length, types, children)
|
|
263
|
+
when DictionaryType
|
|
264
|
+
indices_buffer = buffers.shift
|
|
265
|
+
indices = body.slice(indices_buffer.offset, indices_buffer.length)
|
|
266
|
+
dictionary = find_dictionary(field.dictionary_id)
|
|
267
|
+
field.type.build_array(length, validity, indices, dictionary)
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
module ArrowFormat
|
|
18
|
+
class RecordBatch
|
|
19
|
+
attr_reader :schema
|
|
20
|
+
attr_reader :n_rows
|
|
21
|
+
attr_reader :columns
|
|
22
|
+
def initialize(schema, n_rows, columns)
|
|
23
|
+
@schema = schema
|
|
24
|
+
@n_rows = n_rows
|
|
25
|
+
@columns = columns
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def to_h
|
|
29
|
+
hash = {}
|
|
30
|
+
@schema.fields.zip(@columns) do |field, column|
|
|
31
|
+
hash[field.name] = column
|
|
32
|
+
end
|
|
33
|
+
hash
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# or more contributor license agreements. See the NOTICE file
|
|
2
|
+
# distributed with this work for additional information
|
|
3
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
4
|
+
# to you under the Apache License, Version 2.0 (the
|
|
5
|
+
# "License"); you may not use this file except in compliance
|
|
6
|
+
# with the License. You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing,
|
|
11
|
+
# software distributed under the License is distributed on an
|
|
12
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
13
|
+
# KIND, either express or implied. See the License for the
|
|
14
|
+
# specific language governing permissions and limitations
|
|
15
|
+
# under the License.
|
|
16
|
+
|
|
17
|
+
module ArrowFormat
|
|
18
|
+
class Schema
|
|
19
|
+
attr_reader :fields
|
|
20
|
+
def initialize(fields)
|
|
21
|
+
@fields = fields
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|