red-arrow-format 23.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE.txt +202 -0
  4. data/NOTICE.txt +2 -0
  5. data/README.md +61 -0
  6. data/Rakefile +67 -0
  7. data/lib/arrow-format/array.rb +476 -0
  8. data/lib/arrow-format/bitmap.rb +44 -0
  9. data/lib/arrow-format/error.rb +34 -0
  10. data/lib/arrow-format/field.rb +33 -0
  11. data/lib/arrow-format/file-reader.rb +213 -0
  12. data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
  13. data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
  14. data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
  15. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
  16. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
  17. data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
  18. data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
  19. data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
  20. data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
  21. data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
  22. data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
  23. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
  24. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
  25. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
  26. data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
  27. data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
  28. data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
  29. data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
  30. data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
  31. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
  32. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
  33. data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
  34. data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
  35. data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
  36. data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
  37. data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
  38. data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
  39. data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
  40. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
  41. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
  42. data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
  43. data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
  44. data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
  45. data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
  46. data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
  47. data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
  48. data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
  49. data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
  50. data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
  51. data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
  52. data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
  53. data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
  54. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
  55. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
  56. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
  57. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
  58. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
  59. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
  60. data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
  61. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
  62. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
  63. data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
  64. data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
  65. data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
  66. data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
  67. data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
  68. data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
  69. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
  70. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
  71. data/lib/arrow-format/readable.rb +271 -0
  72. data/lib/arrow-format/record-batch.rb +36 -0
  73. data/lib/arrow-format/schema.rb +24 -0
  74. data/lib/arrow-format/streaming-pull-reader.rb +243 -0
  75. data/lib/arrow-format/streaming-reader.rb +50 -0
  76. data/lib/arrow-format/type.rb +704 -0
  77. data/lib/arrow-format/version.rb +26 -0
  78. data/lib/arrow-format.rb +20 -0
  79. data/red-arrow-format.gemspec +57 -0
  80. metadata +137 -0
@@ -0,0 +1,213 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ require_relative "streaming-reader"
19
+
20
+ require_relative "org/apache/arrow/flatbuf/block"
21
+ require_relative "org/apache/arrow/flatbuf/footer"
22
+
23
+ module ArrowFormat
24
+ class FileReader
25
+ include Enumerable
26
+ include Readable
27
+
28
+ MAGIC = "ARROW1".b.freeze
29
+ MAGIC_BUFFER = IO::Buffer.for(MAGIC)
30
+ START_MARKER_SIZE = MAGIC_BUFFER.size
31
+ END_MARKER_SIZE = MAGIC_BUFFER.size
32
+ # <magic number "ARROW1">
33
+ # <empty padding bytes [to 8 byte boundary]>
34
+ STREAMING_FORMAT_START_OFFSET = 8
35
+ CONTINUATION_BUFFER =
36
+ IO::Buffer.for(MessagePullReader::CONTINUATION_STRING)
37
+ FOOTER_SIZE_FORMAT = :s32
38
+ FOOTER_SIZE_SIZE = IO::Buffer.size_of(FOOTER_SIZE_FORMAT)
39
+
40
+ def initialize(input)
41
+ case input
42
+ when IO
43
+ @buffer = IO::Buffer.map(input, nil, 0, IO::Buffer::READONLY)
44
+ when String
45
+ @buffer = IO::Buffer.for(input)
46
+ else
47
+ @buffer = input
48
+ end
49
+
50
+ validate
51
+ @footer = read_footer
52
+ @record_batch_blocks = @footer.record_batches
53
+ @schema = read_schema(@footer.schema)
54
+ @dictionaries = read_dictionaries
55
+ end
56
+
57
+ def n_record_batches
58
+ @record_batch_blocks.size
59
+ end
60
+
61
+ def read(i)
62
+ fb_message, body = read_block(@record_batch_blocks[i])
63
+ fb_header = fb_message.header
64
+ unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::RecordBatch)
65
+ raise FileReadError.new(@buffer,
66
+ "Not a record batch message: #{i}: " +
67
+ fb_header.class.name)
68
+ end
69
+ read_record_batch(fb_header, @schema, body)
70
+ end
71
+
72
+ def each
73
+ return to_enum(__method__) {n_record_batches} unless block_given?
74
+
75
+ @record_batch_blocks.size.times do |i|
76
+ yield(read(i))
77
+ end
78
+ end
79
+
80
+ private
81
+ def validate
82
+ minimum_size = STREAMING_FORMAT_START_OFFSET +
83
+ FOOTER_SIZE_SIZE +
84
+ END_MARKER_SIZE
85
+ if @buffer.size < minimum_size
86
+ raise FileReadError.new(@buffer,
87
+ "Input must be larger than or equal to " +
88
+ "#{minimum_size}: #{@buffer.size}")
89
+ end
90
+
91
+ start_marker = @buffer.slice(0, START_MARKER_SIZE)
92
+ if start_marker != MAGIC_BUFFER
93
+ raise FileReadError.new(@buffer, "No start marker")
94
+ end
95
+ end_marker = @buffer.slice(@buffer.size - END_MARKER_SIZE,
96
+ END_MARKER_SIZE)
97
+ if end_marker != MAGIC_BUFFER
98
+ raise FileReadError.new(@buffer, "No end marker")
99
+ end
100
+ end
101
+
102
+ def read_footer
103
+ footer_size_offset = @buffer.size - END_MARKER_SIZE - FOOTER_SIZE_SIZE
104
+ footer_size = @buffer.get_value(FOOTER_SIZE_FORMAT, footer_size_offset)
105
+ footer_data = @buffer.slice(footer_size_offset - footer_size,
106
+ footer_size)
107
+ Org::Apache::Arrow::Flatbuf::Footer.new(footer_data)
108
+ end
109
+
110
+ def read_block(block)
111
+ offset = block.offset
112
+
113
+ # If we can report property error information, we can use
114
+ # MessagePullReader here.
115
+ #
116
+ # message_pull_reader = MessagePullReader.new do |message, body|
117
+ # return read_record_batch(message.header, @schema, body)
118
+ # end
119
+ # chunk = @buffer.slice(offset,
120
+ # MessagePullReader::CONTINUATION_SIZE +
121
+ # MessagePullReader::METADATA_LENGTH_SIZE +
122
+ # block.meta_data_length +
123
+ # block.body_length)
124
+ # message_pull_reader.consume(chunk)
125
+
126
+ continuation_size = CONTINUATION_BUFFER.size
127
+ continuation = @buffer.slice(offset, continuation_size)
128
+ unless continuation == CONTINUATION_BUFFER
129
+ raise FileReadError.new(@buffer,
130
+ "Invalid continuation: #{i}: " +
131
+ continuation.inspect)
132
+ end
133
+ offset += continuation_size
134
+
135
+ metadata_length_type = MessagePullReader::METADATA_LENGTH_TYPE
136
+ metadata_length_size = MessagePullReader::METADATA_LENGTH_SIZE
137
+ metadata_length = @buffer.get_value(metadata_length_type, offset)
138
+ expected_metadata_length =
139
+ block.meta_data_length -
140
+ continuation_size -
141
+ metadata_length_size
142
+ unless metadata_length == expected_metadata_length
143
+ raise FileReadError.new(@buffer,
144
+ "Invalid metadata length #{i}: " +
145
+ "expected:#{expected_metadata_length} " +
146
+ "actual:#{metadata_length}")
147
+ end
148
+ offset += metadata_length_size
149
+
150
+ metadata = @buffer.slice(offset, metadata_length)
151
+ fb_message = Org::Apache::Arrow::Flatbuf::Message.new(metadata)
152
+ offset += metadata_length
153
+
154
+ body = @buffer.slice(offset, block.body_length)
155
+
156
+ [fb_message, body]
157
+ end
158
+
159
+ def read_dictionaries
160
+ dictionary_blocks = @footer.dictionaries
161
+ return nil if dictionary_blocks.nil?
162
+
163
+ dictionary_fields = {}
164
+ @schema.fields.each do |field|
165
+ next unless field.type.is_a?(DictionaryType)
166
+ dictionary_fields[field.dictionary_id] = field
167
+ end
168
+
169
+ dictionaries = {}
170
+ dictionary_blocks.each do |block|
171
+ fb_message, body = read_block(block)
172
+ fb_header = fb_message.header
173
+ unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::DictionaryBatch)
174
+ raise FileReadError.new(@buffer,
175
+ "Not a dictionary batch message: " +
176
+ fb_header.inspect)
177
+ end
178
+
179
+ id = fb_header.id
180
+ if fb_header.delta?
181
+ unless dictionaries.key?(id)
182
+ raise FileReadError.new(@buffer,
183
+ "A delta dictionary batch message " +
184
+ "must exist after a non delta " +
185
+ "dictionary batch message: " +
186
+ fb_header.inspect)
187
+ end
188
+ else
189
+ if dictionaries.key?(id)
190
+ raise FileReadError.new(@buffer,
191
+ "Multiple non delta dictionary batch " +
192
+ "messages for the same ID is invalid: " +
193
+ fb_header.inspect)
194
+ end
195
+ end
196
+
197
+ value_type = dictionary_fields[id].type.value_type
198
+ schema = Schema.new([Field.new("dummy", value_type, true, nil)])
199
+ record_batch = read_record_batch(fb_header.data, schema, body)
200
+ if fb_header.delta?
201
+ dictionaries[id] << record_batch.columns[0]
202
+ else
203
+ dictionaries[id] = [record_batch.columns[0]]
204
+ end
205
+ end
206
+ dictionaries
207
+ end
208
+
209
+ def find_dictionary(id)
210
+ @dictionaries[id]
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,21 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Opaque binary data
15
+ class Binary < ::FlatBuffers::Table
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,27 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Logically the same as Binary, but the internal representation uses a view
15
+ # struct that contains the string length and either the string's entire data
16
+ # inline (for small strings) or an inlined prefix, an index of another buffer,
17
+ # and an offset pointing to a slice in that buffer (for non-small strings).
18
+ #
19
+ # Since it uses a variable number of data buffers, each Field with this type
20
+ # must have a corresponding entry in `variadicBufferCounts`.
21
+ class BinaryView < ::FlatBuffers::Table
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,38 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //File.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Footer (//File.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class Block < ::FlatBuffers::Struct
15
+ # Length of the data (this is aligned so there can be a gap between this and
16
+ # the metadata).
17
+ def body_length
18
+ field_offset = 16
19
+ @view.unpack_long(field_offset)
20
+ end
21
+
22
+ # Length of the metadata
23
+ def meta_data_length
24
+ field_offset = 8
25
+ @view.unpack_int(field_offset)
26
+ end
27
+
28
+ # Index to the start of the RecordBlock (note this is past the Message header)
29
+ def offset
30
+ field_offset = 0
31
+ @view.unpack_long(field_offset)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,47 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Message.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/compression_type"
9
+ require_relative "../../../apache/arrow/flatbuf/body_compression_method"
10
+
11
+ module ArrowFormat
12
+ module Org
13
+ module Apache
14
+ module Arrow
15
+ module Flatbuf
16
+ # Optional compression for the memory buffers constituting IPC message
17
+ # bodies. Intended for use with RecordBatch but could be used for other
18
+ # message types
19
+ class BodyCompression < ::FlatBuffers::Table
20
+ # Compressor library.
21
+ # For LZ4_FRAME, each compressed buffer must consist of a single frame.
22
+ def codec
23
+ field_offset = @view.unpack_virtual_offset(4)
24
+ if field_offset.zero?
25
+ enum_value = 0
26
+ else
27
+ enum_value = @view.unpack_byte(field_offset)
28
+ end
29
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::CompressionType.try_convert(enum_value) || enum_value
30
+ end
31
+
32
+ # Indicates the way the record batch body was compressed
33
+ def method
34
+ field_offset = @view.unpack_virtual_offset(6)
35
+ if field_offset.zero?
36
+ enum_value = 0
37
+ else
38
+ enum_value = @view.unpack_byte(field_offset)
39
+ end
40
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::BodyCompressionMethod.try_convert(enum_value) || enum_value
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,31 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Message.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Provided for forward compatibility in case we need to support different
15
+ # strategies for compressing the IPC message body (like whole-body
16
+ # compression rather than buffer-level) in the future
17
+ class BodyCompressionMethod < ::FlatBuffers::Enum
18
+ # Each constituent buffer is first compressed with the indicated
19
+ # compressor, and then written with the uncompressed length in the first 8
20
+ # bytes as a 64-bit little-endian signed integer followed by the compressed
21
+ # buffer bytes (and then padding as required by the protocol). The
22
+ # uncompressed length may be set to -1 to indicate that the data that
23
+ # follows is not compressed, which can be useful for cases where
24
+ # compression does not yield appreciable savings.
25
+ BUFFER = register("BUFFER", 0)
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class Bool < ::FlatBuffers::Table
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,38 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # ----------------------------------------------------------------------
15
+ # A Buffer represents a single contiguous memory segment
16
+ class Buffer < ::FlatBuffers::Struct
17
+ # The absolute length (in bytes) of the memory buffer. The memory is found
18
+ # from offset (inclusive) to offset + length (non-inclusive). When building
19
+ # messages using the encapsulated IPC message, padding bytes may be written
20
+ # after a buffer, but such padding bytes do not need to be accounted for in
21
+ # the size here.
22
+ def length
23
+ field_offset = 8
24
+ @view.unpack_long(field_offset)
25
+ end
26
+
27
+ # The relative offset into the shared memory page where the bytes for this
28
+ # buffer starts
29
+ def offset
30
+ field_offset = 0
31
+ @view.unpack_long(field_offset)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,22 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Message.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class CompressionType < ::FlatBuffers::Enum
15
+ LZ4_FRAME = register("LZ4_FRAME", 0)
16
+ ZSTD = register("ZSTD", 1)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,36 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/date_unit"
9
+
10
+ module ArrowFormat
11
+ module Org
12
+ module Apache
13
+ module Arrow
14
+ module Flatbuf
15
+ # Date is either a 32-bit or 64-bit signed integer type representing an
16
+ # elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
17
+ #
18
+ # * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
19
+ # leap seconds), where the values are evenly divisible by 86400000
20
+ # * Days (32 bits) since the UNIX epoch
21
+ class Date < ::FlatBuffers::Table
22
+ def unit
23
+ field_offset = @view.unpack_virtual_offset(4)
24
+ if field_offset.zero?
25
+ enum_value = 1
26
+ else
27
+ enum_value = @view.unpack_short(field_offset)
28
+ end
29
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::DateUnit.try_convert(enum_value) || enum_value
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,22 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class DateUnit < ::FlatBuffers::Enum
15
+ DAY = register("DAY", 0)
16
+ MILLISECOND = register("MILLISECOND", 1)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,48 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # Exact decimal value represented as an integer value in two's
15
+ # complement. Currently 32-bit (4-byte), 64-bit (8-byte),
16
+ # 128-bit (16-byte) and 256-bit (32-byte) integers are used.
17
+ # The representation uses the endianness indicated in the Schema.
18
+ class Decimal < ::FlatBuffers::Table
19
+ # Number of bits per value. The accepted widths are 32, 64, 128 and 256.
20
+ # We use bitWidth for consistency with Int::bitWidth.
21
+ def bit_width
22
+ field_offset = @view.unpack_virtual_offset(8)
23
+ return 128 if field_offset.zero?
24
+
25
+ @view.unpack_int(field_offset)
26
+ end
27
+
28
+ # Total number of decimal digits
29
+ def precision
30
+ field_offset = @view.unpack_virtual_offset(4)
31
+ return 0 if field_offset.zero?
32
+
33
+ @view.unpack_int(field_offset)
34
+ end
35
+
36
+ # Number of digits after the decimal point "."
37
+ def scale
38
+ field_offset = @view.unpack_virtual_offset(6)
39
+ return 0 if field_offset.zero?
40
+
41
+ @view.unpack_int(field_offset)
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,50 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Message.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/record_batch"
9
+
10
+ module ArrowFormat
11
+ module Org
12
+ module Apache
13
+ module Arrow
14
+ module Flatbuf
15
+ # For sending dictionary encoding information. Any Field can be
16
+ # dictionary-encoded, but in this case none of its children may be
17
+ # dictionary-encoded.
18
+ # There is one vector / column per dictionary, but that vector / column
19
+ # may be spread across multiple dictionary batches by using the isDelta
20
+ # flag
21
+ class DictionaryBatch < ::FlatBuffers::Table
22
+ def data
23
+ field_offset = @view.unpack_virtual_offset(6)
24
+ return nil if field_offset.zero?
25
+
26
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::RecordBatch, field_offset)
27
+ end
28
+
29
+ def id
30
+ field_offset = @view.unpack_virtual_offset(4)
31
+ return 0 if field_offset.zero?
32
+
33
+ @view.unpack_long(field_offset)
34
+ end
35
+
36
+ # If isDelta is true the values in the dictionary are to be appended to a
37
+ # dictionary with the indicated id. If isDelta is false this dictionary
38
+ # should replace the existing dictionary.
39
+ def delta?
40
+ field_offset = @view.unpack_virtual_offset(8)
41
+ return false if field_offset.zero?
42
+
43
+ @view.unpack_bool(field_offset)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,64 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/dictionary_kind"
9
+ require_relative "../../../apache/arrow/flatbuf/int"
10
+
11
+ module ArrowFormat
12
+ module Org
13
+ module Apache
14
+ module Arrow
15
+ module Flatbuf
16
+ class DictionaryEncoding < ::FlatBuffers::Table
17
+ def dictionary_kind
18
+ field_offset = @view.unpack_virtual_offset(10)
19
+ if field_offset.zero?
20
+ enum_value = 0
21
+ else
22
+ enum_value = @view.unpack_short(field_offset)
23
+ end
24
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::DictionaryKind.try_convert(enum_value) || enum_value
25
+ end
26
+
27
+ # The known dictionary id in the application where this data is used. In
28
+ # the file or streaming formats, the dictionary ids are found in the
29
+ # DictionaryBatch messages
30
+ def id
31
+ field_offset = @view.unpack_virtual_offset(4)
32
+ return 0 if field_offset.zero?
33
+
34
+ @view.unpack_long(field_offset)
35
+ end
36
+
37
+ # The dictionary indices are constrained to be non-negative integers. If
38
+ # this field is null, the indices must be signed int32. To maximize
39
+ # cross-language compatibility and performance, implementations are
40
+ # recommended to prefer signed integer types over unsigned integer types
41
+ # and to avoid uint64 indices unless they are required by an application.
42
+ def index_type
43
+ field_offset = @view.unpack_virtual_offset(6)
44
+ return nil if field_offset.zero?
45
+
46
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
47
+ end
48
+
49
+ # By default, dictionaries are not ordered, or the order does not have
50
+ # semantic meaning. In some statistical, applications, dictionary-encoding
51
+ # is used to represent ordered categorical data, and we provide a way to
52
+ # preserve that metadata here
53
+ def ordered?
54
+ field_offset = @view.unpack_virtual_offset(8)
55
+ return false if field_offset.zero?
56
+
57
+ @view.unpack_bool(field_offset)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end