red-arrow-format 23.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE.txt +202 -0
  4. data/NOTICE.txt +2 -0
  5. data/README.md +61 -0
  6. data/Rakefile +67 -0
  7. data/lib/arrow-format/array.rb +476 -0
  8. data/lib/arrow-format/bitmap.rb +44 -0
  9. data/lib/arrow-format/error.rb +34 -0
  10. data/lib/arrow-format/field.rb +33 -0
  11. data/lib/arrow-format/file-reader.rb +213 -0
  12. data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
  13. data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
  14. data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
  15. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
  16. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
  17. data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
  18. data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
  19. data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
  20. data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
  21. data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
  22. data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
  23. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
  24. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
  25. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
  26. data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
  27. data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
  28. data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
  29. data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
  30. data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
  31. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
  32. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
  33. data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
  34. data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
  35. data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
  36. data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
  37. data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
  38. data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
  39. data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
  40. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
  41. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
  42. data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
  43. data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
  44. data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
  45. data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
  46. data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
  47. data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
  48. data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
  49. data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
  50. data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
  51. data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
  52. data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
  53. data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
  54. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
  55. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
  56. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
  57. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
  58. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
  59. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
  60. data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
  61. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
  62. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
  63. data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
  64. data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
  65. data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
  66. data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
  67. data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
  68. data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
  69. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
  70. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
  71. data/lib/arrow-format/readable.rb +271 -0
  72. data/lib/arrow-format/record-batch.rb +36 -0
  73. data/lib/arrow-format/schema.rb +24 -0
  74. data/lib/arrow-format/streaming-pull-reader.rb +243 -0
  75. data/lib/arrow-format/streaming-reader.rb +50 -0
  76. data/lib/arrow-format/type.rb +704 -0
  77. data/lib/arrow-format/version.rb +26 -0
  78. data/lib/arrow-format.rb +20 -0
  79. data/red-arrow-format.gemspec +57 -0
  80. metadata +137 -0
@@ -0,0 +1,55 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # ----------------------------------------------------------------------
15
+ # Top-level Type value, enabling extensible type-specific metadata. We can
16
+ # add new logical types to Type without breaking backwards compatibility
17
+ class Type < ::FlatBuffers::Union
18
+ NONE = register("NONE", 0, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
19
+ NULL = register("Null", 1, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Null", "../../../apache/arrow/flatbuf/null")
20
+ INT = register("Int", 2, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", "../../../apache/arrow/flatbuf/int")
21
+ FLOATING_POINT = register("FloatingPoint", 3, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FloatingPoint", "../../../apache/arrow/flatbuf/floating_point")
22
+ BINARY = register("Binary", 4, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Binary", "../../../apache/arrow/flatbuf/binary")
23
+ UTF8 = register("Utf8", 5, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8", "../../../apache/arrow/flatbuf/utf8")
24
+ BOOL = register("Bool", 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Bool", "../../../apache/arrow/flatbuf/bool")
25
+ DECIMAL = register("Decimal", 7, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Decimal", "../../../apache/arrow/flatbuf/decimal")
26
+ DATE = register("Date", 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Date", "../../../apache/arrow/flatbuf/date")
27
+ TIME = register("Time", 9, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Time", "../../../apache/arrow/flatbuf/time")
28
+ TIMESTAMP = register("Timestamp", 10, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Timestamp", "../../../apache/arrow/flatbuf/timestamp")
29
+ INTERVAL = register("Interval", 11, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Interval", "../../../apache/arrow/flatbuf/interval")
30
+ LIST = register("List", 12, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::List", "../../../apache/arrow/flatbuf/list")
31
+ STRUCT_ = register("Struct_", 13, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Struct", "../../../apache/arrow/flatbuf/struct_")
32
+ UNION = register("Union", 14, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Union", "../../../apache/arrow/flatbuf/union")
33
+ FIXED_SIZE_BINARY = register("FixedSizeBinary", 15, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FixedSizeBinary", "../../../apache/arrow/flatbuf/fixed_size_binary")
34
+ FIXED_SIZE_LIST = register("FixedSizeList", 16, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::FixedSizeList", "../../../apache/arrow/flatbuf/fixed_size_list")
35
+ MAP = register("Map", 17, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Map", "../../../apache/arrow/flatbuf/map")
36
+ DURATION = register("Duration", 18, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Duration", "../../../apache/arrow/flatbuf/duration")
37
+ LARGE_BINARY = register("LargeBinary", 19, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeBinary", "../../../apache/arrow/flatbuf/large_binary")
38
+ LARGE_UTF8 = register("LargeUtf8", 20, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeUtf8", "../../../apache/arrow/flatbuf/large_utf8")
39
+ LARGE_LIST = register("LargeList", 21, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeList", "../../../apache/arrow/flatbuf/large_list")
40
+ RUN_END_ENCODED = register("RunEndEncoded", 22, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::RunEndEncoded", "../../../apache/arrow/flatbuf/run_end_encoded")
41
+ BINARY_VIEW = register("BinaryView", 23, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::BinaryView", "../../../apache/arrow/flatbuf/binary_view")
42
+ UTF8VIEW = register("Utf8View", 24, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
43
+ LIST_VIEW = register("ListView", 25, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::ListView", "../../../apache/arrow/flatbuf/list_view")
44
+ LARGE_LIST_VIEW = register("LargeListView", 26, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::LargeListView", "../../../apache/arrow/flatbuf/large_list_view")
45
+
46
+
47
+ private def require_table_class
48
+ require_relative @require_path
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,44 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/union_mode"
9
+
10
+ module ArrowFormat
11
+ module Org
12
+ module Apache
13
+ module Arrow
14
+ module Flatbuf
15
+ # A union is a complex type with children in Field
16
+ # By default ids in the type vector refer to the offsets in the children
17
+ # optionally typeIds provides an indirection between the child offset and the type id
18
+ # for each child `typeIds[offset]` is the id used in the type vector
19
+ class Union < ::FlatBuffers::Table
20
+ def mode
21
+ field_offset = @view.unpack_virtual_offset(4)
22
+ if field_offset.zero?
23
+ enum_value = 0
24
+ else
25
+ enum_value = @view.unpack_short(field_offset)
26
+ end
27
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::UnionMode.try_convert(enum_value) || enum_value
28
+ end
29
+
30
+ def type_ids
31
+ field_offset = @view.unpack_virtual_offset(6)
32
+ return nil if field_offset.zero?
33
+
34
+ element_size = 4
35
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
36
+ @view.unpack_int(element_offset)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,22 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class UnionMode < ::FlatBuffers::Enum
15
+ SPARSE = register("Sparse", 0)
16
+ DENSE = register("Dense", 1)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Unicode with UTF-8 encoding
15
+ class Utf8 < ::FlatBuffers::Table
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,27 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Logically the same as Utf8, but the internal representation uses a view
15
+ # struct that contains the string length and either the string's entire data
16
+ # inline (for small strings) or an inlined prefix, an index of another buffer,
17
+ # and an offset pointing to a slice in that buffer (for non-small strings).
18
+ #
19
+ # Since it uses a variable number of data buffers, each Field with this type
20
+ # must have a corresponding entry in `variadicBufferCounts`.
21
+ class Utf8View < ::FlatBuffers::Table
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,271 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ require_relative "array"
19
+ require_relative "field"
20
+ require_relative "record-batch"
21
+ require_relative "schema"
22
+ require_relative "type"
23
+
24
+ require_relative "org/apache/arrow/flatbuf/binary"
25
+ require_relative "org/apache/arrow/flatbuf/bool"
26
+ require_relative "org/apache/arrow/flatbuf/date"
27
+ require_relative "org/apache/arrow/flatbuf/date_unit"
28
+ require_relative "org/apache/arrow/flatbuf/decimal"
29
+ require_relative "org/apache/arrow/flatbuf/dictionary_encoding"
30
+ require_relative "org/apache/arrow/flatbuf/dictionary_batch"
31
+ require_relative "org/apache/arrow/flatbuf/duration"
32
+ require_relative "org/apache/arrow/flatbuf/fixed_size_binary"
33
+ require_relative "org/apache/arrow/flatbuf/floating_point"
34
+ require_relative "org/apache/arrow/flatbuf/int"
35
+ require_relative "org/apache/arrow/flatbuf/interval"
36
+ require_relative "org/apache/arrow/flatbuf/interval_unit"
37
+ require_relative "org/apache/arrow/flatbuf/large_binary"
38
+ require_relative "org/apache/arrow/flatbuf/large_list"
39
+ require_relative "org/apache/arrow/flatbuf/large_utf8"
40
+ require_relative "org/apache/arrow/flatbuf/list"
41
+ require_relative "org/apache/arrow/flatbuf/map"
42
+ require_relative "org/apache/arrow/flatbuf/message"
43
+ require_relative "org/apache/arrow/flatbuf/null"
44
+ require_relative "org/apache/arrow/flatbuf/precision"
45
+ require_relative "org/apache/arrow/flatbuf/record_batch"
46
+ require_relative "org/apache/arrow/flatbuf/schema"
47
+ require_relative "org/apache/arrow/flatbuf/struct_"
48
+ require_relative "org/apache/arrow/flatbuf/time"
49
+ require_relative "org/apache/arrow/flatbuf/time_unit"
50
+ require_relative "org/apache/arrow/flatbuf/timestamp"
51
+ require_relative "org/apache/arrow/flatbuf/union"
52
+ require_relative "org/apache/arrow/flatbuf/union_mode"
53
+ require_relative "org/apache/arrow/flatbuf/utf8"
54
+
55
+ module ArrowFormat
56
+ module Readable
57
+ private
58
+ def read_schema(fb_schema)
59
+ fields = fb_schema.fields.collect do |fb_field|
60
+ read_field(fb_field)
61
+ end
62
+ Schema.new(fields)
63
+ end
64
+
65
+ def read_field(fb_field)
66
+ fb_type = fb_field.type
67
+ case fb_type
68
+ when Org::Apache::Arrow::Flatbuf::Null
69
+ type = NullType.singleton
70
+ when Org::Apache::Arrow::Flatbuf::Bool
71
+ type = BooleanType.singleton
72
+ when Org::Apache::Arrow::Flatbuf::Int
73
+ type = read_type_int(fb_type)
74
+ when Org::Apache::Arrow::Flatbuf::FloatingPoint
75
+ case fb_type.precision
76
+ when Org::Apache::Arrow::Flatbuf::Precision::SINGLE
77
+ type = Float32Type.singleton
78
+ when Org::Apache::Arrow::Flatbuf::Precision::DOUBLE
79
+ type = Float64Type.singleton
80
+ end
81
+ when Org::Apache::Arrow::Flatbuf::Date
82
+ case fb_type.unit
83
+ when Org::Apache::Arrow::Flatbuf::DateUnit::DAY
84
+ type = Date32Type.singleton
85
+ when Org::Apache::Arrow::Flatbuf::DateUnit::MILLISECOND
86
+ type = Date64Type.singleton
87
+ end
88
+ when Org::Apache::Arrow::Flatbuf::Time
89
+ case fb_type.bit_width
90
+ when 32
91
+ case fb_type.unit
92
+ when Org::Apache::Arrow::Flatbuf::TimeUnit::SECOND
93
+ type = Time32Type.new(:second)
94
+ when Org::Apache::Arrow::Flatbuf::TimeUnit::MILLISECOND
95
+ type = Time32Type.new(:millisecond)
96
+ end
97
+ when 64
98
+ case fb_type.unit
99
+ when Org::Apache::Arrow::Flatbuf::TimeUnit::MICROSECOND
100
+ type = Time64Type.new(:microsecond)
101
+ when Org::Apache::Arrow::Flatbuf::TimeUnit::NANOSECOND
102
+ type = Time64Type.new(:nanosecond)
103
+ end
104
+ end
105
+ when Org::Apache::Arrow::Flatbuf::Timestamp
106
+ unit = fb_type.unit.name.downcase.to_sym
107
+ type = TimestampType.new(unit, fb_type.timezone)
108
+ when Org::Apache::Arrow::Flatbuf::Interval
109
+ case fb_type.unit
110
+ when Org::Apache::Arrow::Flatbuf::IntervalUnit::YEAR_MONTH
111
+ type = YearMonthIntervalType.new
112
+ when Org::Apache::Arrow::Flatbuf::IntervalUnit::DAY_TIME
113
+ type = DayTimeIntervalType.new
114
+ when Org::Apache::Arrow::Flatbuf::IntervalUnit::MONTH_DAY_NANO
115
+ type = MonthDayNanoIntervalType.new
116
+ end
117
+ when Org::Apache::Arrow::Flatbuf::Duration
118
+ unit = fb_type.unit.name.downcase.to_sym
119
+ type = DurationType.new(unit)
120
+ when Org::Apache::Arrow::Flatbuf::List
121
+ type = ListType.new(read_field(fb_field.children[0]))
122
+ when Org::Apache::Arrow::Flatbuf::LargeList
123
+ type = LargeListType.new(read_field(fb_field.children[0]))
124
+ when Org::Apache::Arrow::Flatbuf::Struct
125
+ children = fb_field.children.collect {|child| read_field(child)}
126
+ type = StructType.new(children)
127
+ when Org::Apache::Arrow::Flatbuf::Union
128
+ children = fb_field.children.collect {|child| read_field(child)}
129
+ type_ids = fb_type.type_ids
130
+ case fb_type.mode
131
+ when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE
132
+ type = DenseUnionType.new(children, type_ids)
133
+ when Org::Apache::Arrow::Flatbuf::UnionMode::SPARSE
134
+ type = SparseUnionType.new(children, type_ids)
135
+ end
136
+ when Org::Apache::Arrow::Flatbuf::Map
137
+ type = MapType.new(read_field(fb_field.children[0]))
138
+ when Org::Apache::Arrow::Flatbuf::Binary
139
+ type = BinaryType.singleton
140
+ when Org::Apache::Arrow::Flatbuf::LargeBinary
141
+ type = LargeBinaryType.singleton
142
+ when Org::Apache::Arrow::Flatbuf::Utf8
143
+ type = UTF8Type.singleton
144
+ when Org::Apache::Arrow::Flatbuf::LargeUtf8
145
+ type = LargeUTF8Type.singleton
146
+ when Org::Apache::Arrow::Flatbuf::FixedSizeBinary
147
+ type = FixedSizeBinaryType.new(fb_type.byte_width)
148
+ when Org::Apache::Arrow::Flatbuf::Decimal
149
+ case fb_type.bit_width
150
+ when 128
151
+ type = Decimal128Type.new(fb_type.precision, fb_type.scale)
152
+ when 256
153
+ type = Decimal256Type.new(fb_type.precision, fb_type.scale)
154
+ end
155
+ end
156
+
157
+ dictionary = fb_field.dictionary
158
+ if dictionary
159
+ dictionary_id = dictionary.id
160
+ index_type = read_type_int(dictionary.index_type)
161
+ type = DictionaryType.new(index_type, type, dictionary.ordered?)
162
+ else
163
+ dictionary_id = nil
164
+ end
165
+ Field.new(fb_field.name, type, fb_field.nullable?, dictionary_id)
166
+ end
167
+
168
+ def read_type_int(fb_type)
169
+ case fb_type.bit_width
170
+ when 8
171
+ if fb_type.signed?
172
+ Int8Type.singleton
173
+ else
174
+ UInt8Type.singleton
175
+ end
176
+ when 16
177
+ if fb_type.signed?
178
+ Int16Type.singleton
179
+ else
180
+ UInt16Type.singleton
181
+ end
182
+ when 32
183
+ if fb_type.signed?
184
+ Int32Type.singleton
185
+ else
186
+ UInt32Type.singleton
187
+ end
188
+ when 64
189
+ if fb_type.signed?
190
+ Int64Type.singleton
191
+ else
192
+ UInt64Type.singleton
193
+ end
194
+ end
195
+ end
196
+
197
+ def read_record_batch(fb_record_batch, schema, body)
198
+ n_rows = fb_record_batch.length
199
+ nodes = fb_record_batch.nodes
200
+ buffers = fb_record_batch.buffers
201
+ columns = schema.fields.collect do |field|
202
+ read_column(field, nodes, buffers, body)
203
+ end
204
+ RecordBatch.new(schema, n_rows, columns)
205
+ end
206
+
207
+ def read_column(field, nodes, buffers, body)
208
+ node = nodes.shift
209
+ length = node.length
210
+
211
+ return field.type.build_array(length) if field.type.is_a?(NullType)
212
+
213
+ validity_buffer = buffers.shift
214
+ if validity_buffer.length.zero?
215
+ validity = nil
216
+ else
217
+ validity = body.slice(validity_buffer.offset, validity_buffer.length)
218
+ end
219
+
220
+ case field.type
221
+ when BooleanType,
222
+ NumberType,
223
+ TemporalType
224
+ values_buffer = buffers.shift
225
+ values = body.slice(values_buffer.offset, values_buffer.length)
226
+ field.type.build_array(length, validity, values)
227
+ when VariableSizeBinaryType
228
+ offsets_buffer = buffers.shift
229
+ values_buffer = buffers.shift
230
+ offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
231
+ values = body.slice(values_buffer.offset, values_buffer.length)
232
+ field.type.build_array(length, validity, offsets, values)
233
+ when FixedSizeBinaryType
234
+ values_buffer = buffers.shift
235
+ values = body.slice(values_buffer.offset, values_buffer.length)
236
+ field.type.build_array(length, validity, values)
237
+ when VariableSizeListType
238
+ offsets_buffer = buffers.shift
239
+ offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
240
+ child = read_column(field.type.child, nodes, buffers, body)
241
+ field.type.build_array(length, validity, offsets, child)
242
+ when StructType
243
+ children = field.type.children.collect do |child|
244
+ read_column(child, nodes, buffers, body)
245
+ end
246
+ field.type.build_array(length, validity, children)
247
+ when DenseUnionType
248
+ # dense union type doesn't have validity.
249
+ types = validity
250
+ offsets_buffer = buffers.shift
251
+ offsets = body.slice(offsets_buffer.offset, offsets_buffer.length)
252
+ children = field.type.children.collect do |child|
253
+ read_column(child, nodes, buffers, body)
254
+ end
255
+ field.type.build_array(length, types, offsets, children)
256
+ when SparseUnionType
257
+ # sparse union type doesn't have validity.
258
+ types = validity
259
+ children = field.type.children.collect do |child|
260
+ read_column(child, nodes, buffers, body)
261
+ end
262
+ field.type.build_array(length, types, children)
263
+ when DictionaryType
264
+ indices_buffer = buffers.shift
265
+ indices = body.slice(indices_buffer.offset, indices_buffer.length)
266
+ dictionary = find_dictionary(field.dictionary_id)
267
+ field.type.build_array(length, validity, indices, dictionary)
268
+ end
269
+ end
270
+ end
271
+ end
@@ -0,0 +1,36 @@
1
+ # or more contributor license agreements. See the NOTICE file
2
+ # distributed with this work for additional information
3
+ # regarding copyright ownership. The ASF licenses this file
4
+ # to you under the Apache License, Version 2.0 (the
5
+ # "License"); you may not use this file except in compliance
6
+ # with the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing,
11
+ # software distributed under the License is distributed on an
12
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13
+ # KIND, either express or implied. See the License for the
14
+ # specific language governing permissions and limitations
15
+ # under the License.
16
+
17
+ module ArrowFormat
18
+ class RecordBatch
19
+ attr_reader :schema
20
+ attr_reader :n_rows
21
+ attr_reader :columns
22
+ def initialize(schema, n_rows, columns)
23
+ @schema = schema
24
+ @n_rows = n_rows
25
+ @columns = columns
26
+ end
27
+
28
+ def to_h
29
+ hash = {}
30
+ @schema.fields.zip(@columns) do |field, column|
31
+ hash[field.name] = column
32
+ end
33
+ hash
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,24 @@
1
+ # or more contributor license agreements. See the NOTICE file
2
+ # distributed with this work for additional information
3
+ # regarding copyright ownership. The ASF licenses this file
4
+ # to you under the Apache License, Version 2.0 (the
5
+ # "License"); you may not use this file except in compliance
6
+ # with the License. You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing,
11
+ # software distributed under the License is distributed on an
12
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13
+ # KIND, either express or implied. See the License for the
14
+ # specific language governing permissions and limitations
15
+ # under the License.
16
+
17
+ module ArrowFormat
18
+ class Schema
19
+ attr_reader :fields
20
+ def initialize(fields)
21
+ @fields = fields
22
+ end
23
+ end
24
+ end