red-arrow-format 23.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +26 -0
- data/LICENSE.txt +202 -0
- data/NOTICE.txt +2 -0
- data/README.md +61 -0
- data/Rakefile +67 -0
- data/lib/arrow-format/array.rb +476 -0
- data/lib/arrow-format/bitmap.rb +44 -0
- data/lib/arrow-format/error.rb +34 -0
- data/lib/arrow-format/field.rb +33 -0
- data/lib/arrow-format/file-reader.rb +213 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
- data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
- data/lib/arrow-format/readable.rb +271 -0
- data/lib/arrow-format/record-batch.rb +36 -0
- data/lib/arrow-format/schema.rb +24 -0
- data/lib/arrow-format/streaming-pull-reader.rb +243 -0
- data/lib/arrow-format/streaming-reader.rb +50 -0
- data/lib/arrow-format/type.rb +704 -0
- data/lib/arrow-format/version.rb +26 -0
- data/lib/arrow-format.rb +20 -0
- data/red-arrow-format.gemspec +57 -0
- metadata +137 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //SparseTensor.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class SparseTensorIndex < ::FlatBuffers::Union
|
|
15
|
+
NONE = register("NONE", 0, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
|
|
16
|
+
SPARSE_TENSOR_INDEX_COO = register("SparseTensorIndexCOO", 1, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseTensorIndexCOO", "../../../apache/arrow/flatbuf/sparse_tensor_index_coo")
|
|
17
|
+
SPARSE_MATRIX_INDEX_CSX = register("SparseMatrixIndexCSX", 2, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseMatrixIndexCSX", "../../../apache/arrow/flatbuf/sparse_matrix_index_csx")
|
|
18
|
+
SPARSE_TENSOR_INDEX_CSF = register("SparseTensorIndexCSF", 3, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseTensorIndexCSF", "../../../apache/arrow/flatbuf/sparse_tensor_index_csf")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
private def require_table_class
|
|
22
|
+
require_relative @require_path
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //SparseTensor.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/buffer"
|
|
9
|
+
require_relative "../../../apache/arrow/flatbuf/int"
|
|
10
|
+
|
|
11
|
+
module ArrowFormat
|
|
12
|
+
module Org
|
|
13
|
+
module Apache
|
|
14
|
+
module Arrow
|
|
15
|
+
module Flatbuf
|
|
16
|
+
# ----------------------------------------------------------------------
|
|
17
|
+
# EXPERIMENTAL: Data structures for sparse tensors
|
|
18
|
+
# Coordinate (COO) format of sparse tensor index.
|
|
19
|
+
#
|
|
20
|
+
# COO's index list are represented as a NxM matrix,
|
|
21
|
+
# where N is the number of non-zero values,
|
|
22
|
+
# and M is the number of dimensions of a sparse tensor.
|
|
23
|
+
#
|
|
24
|
+
# indicesBuffer stores the location and size of the data of this indices
|
|
25
|
+
# matrix. The value type and the stride of the indices matrix is
|
|
26
|
+
# specified in indicesType and indicesStrides fields.
|
|
27
|
+
#
|
|
28
|
+
# For example, let X be a 2x3x4x5 tensor, and it has the following
|
|
29
|
+
# 6 non-zero values:
|
|
30
|
+
# ```text
|
|
31
|
+
# X[0, 1, 2, 0] := 1
|
|
32
|
+
# X[1, 1, 2, 3] := 2
|
|
33
|
+
# X[0, 2, 1, 0] := 3
|
|
34
|
+
# X[0, 1, 3, 0] := 4
|
|
35
|
+
# X[0, 1, 2, 1] := 5
|
|
36
|
+
# X[1, 2, 0, 4] := 6
|
|
37
|
+
# ```
|
|
38
|
+
# In COO format, the index matrix of X is the following 4x6 matrix:
|
|
39
|
+
# ```text
|
|
40
|
+
# [[0, 0, 0, 0, 1, 1],
|
|
41
|
+
# [1, 1, 1, 2, 1, 2],
|
|
42
|
+
# [2, 2, 3, 1, 2, 0],
|
|
43
|
+
# [0, 1, 0, 0, 3, 4]]
|
|
44
|
+
# ```
|
|
45
|
+
# When isCanonical is true, the indices is sorted in lexicographical order
|
|
46
|
+
# (row-major order), and it does not have duplicated entries. Otherwise,
|
|
47
|
+
# the indices may not be sorted, or may have duplicated entries.
|
|
48
|
+
class SparseTensorIndexCOO < ::FlatBuffers::Table
|
|
49
|
+
# The location and size of the indices matrix's data
|
|
50
|
+
def indices_buffer
|
|
51
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
52
|
+
return nil if field_offset.zero?
|
|
53
|
+
|
|
54
|
+
@view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, field_offset)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Non-negative byte offsets to advance one value cell along each dimension
|
|
58
|
+
# If omitted, default to row-major order (C-like).
|
|
59
|
+
def indices_strides
|
|
60
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
61
|
+
return nil if field_offset.zero?
|
|
62
|
+
|
|
63
|
+
element_size = 8
|
|
64
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
65
|
+
@view.unpack_long(element_offset)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# The type of values in indicesBuffer
|
|
70
|
+
def indices_type
|
|
71
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
72
|
+
return nil if field_offset.zero?
|
|
73
|
+
|
|
74
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# This flag is true if and only if the indices matrix is sorted in
|
|
78
|
+
# row-major order, and does not have duplicated entries.
|
|
79
|
+
# This sort order is the same as of Tensorflow's SparseTensor,
|
|
80
|
+
# but it is inverse order of SciPy's canonical coo_matrix
|
|
81
|
+
# (SciPy employs column-major order for its coo_matrix).
|
|
82
|
+
def canonical?
|
|
83
|
+
field_offset = @view.unpack_virtual_offset(10)
|
|
84
|
+
return false if field_offset.zero?
|
|
85
|
+
|
|
86
|
+
@view.unpack_bool(field_offset)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //SparseTensor.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/buffer"
|
|
9
|
+
require_relative "../../../apache/arrow/flatbuf/int"
|
|
10
|
+
|
|
11
|
+
module ArrowFormat
|
|
12
|
+
module Org
|
|
13
|
+
module Apache
|
|
14
|
+
module Arrow
|
|
15
|
+
module Flatbuf
|
|
16
|
+
# Compressed Sparse Fiber (CSF) sparse tensor index.
|
|
17
|
+
class SparseTensorIndexCSF < ::FlatBuffers::Table
|
|
18
|
+
# axisOrder stores the sequence in which dimensions were traversed to
|
|
19
|
+
# produce the prefix tree.
|
|
20
|
+
# For example, the axisOrder for the above X is:
|
|
21
|
+
# ```text
|
|
22
|
+
# axisOrder(X) = [0, 1, 2, 3].
|
|
23
|
+
# ```
|
|
24
|
+
def axis_order
|
|
25
|
+
field_offset = @view.unpack_virtual_offset(12)
|
|
26
|
+
return nil if field_offset.zero?
|
|
27
|
+
|
|
28
|
+
element_size = 4
|
|
29
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
30
|
+
@view.unpack_int(element_offset)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# indicesBuffers stores values of nodes.
|
|
35
|
+
# Each tensor dimension corresponds to a buffer in indicesBuffers.
|
|
36
|
+
# For example, the indicesBuffers for the above X is:
|
|
37
|
+
# ```text
|
|
38
|
+
# indicesBuffer(X) = [
|
|
39
|
+
# [0, 1],
|
|
40
|
+
# [0, 1, 1],
|
|
41
|
+
# [0, 0, 1, 1],
|
|
42
|
+
# [1, 2, 0, 2, 0, 0, 1, 2]
|
|
43
|
+
# ].
|
|
44
|
+
# ```
|
|
45
|
+
def indices_buffers
|
|
46
|
+
field_offset = @view.unpack_virtual_offset(10)
|
|
47
|
+
return nil if field_offset.zero?
|
|
48
|
+
|
|
49
|
+
element_size = 16
|
|
50
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
51
|
+
@view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, element_offset)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# The type of values in indicesBuffers
|
|
56
|
+
def indices_type
|
|
57
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
58
|
+
return nil if field_offset.zero?
|
|
59
|
+
|
|
60
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# indptrBuffers stores the sparsity structure.
|
|
64
|
+
# Each two consecutive dimensions in a tensor correspond to a buffer in
|
|
65
|
+
# indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
|
|
66
|
+
# and `indptrBuffers[dim][i + 1]` signify a range of nodes in
|
|
67
|
+
# `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
|
|
68
|
+
#
|
|
69
|
+
# For example, the indptrBuffers for the above X is:
|
|
70
|
+
# ```text
|
|
71
|
+
# indptrBuffer(X) = [
|
|
72
|
+
# [0, 2, 3],
|
|
73
|
+
# [0, 1, 3, 4],
|
|
74
|
+
# [0, 2, 4, 5, 8]
|
|
75
|
+
# ].
|
|
76
|
+
# ```
|
|
77
|
+
def indptr_buffers
|
|
78
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
79
|
+
return nil if field_offset.zero?
|
|
80
|
+
|
|
81
|
+
element_size = 16
|
|
82
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
83
|
+
@view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, element_offset)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# CSF is a generalization of compressed sparse row (CSR) index.
|
|
88
|
+
# See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
|
|
89
|
+
#
|
|
90
|
+
# CSF index recursively compresses each dimension of a tensor into a set
|
|
91
|
+
# of prefix trees. Each path from a root to leaf forms one tensor
|
|
92
|
+
# non-zero index. CSF is implemented with two arrays of buffers and one
|
|
93
|
+
# arrays of integers.
|
|
94
|
+
#
|
|
95
|
+
# For example, let X be a 2x3x4x5 tensor and let it have the following
|
|
96
|
+
# 8 non-zero values:
|
|
97
|
+
# ```text
|
|
98
|
+
# X[0, 0, 0, 1] := 1
|
|
99
|
+
# X[0, 0, 0, 2] := 2
|
|
100
|
+
# X[0, 1, 0, 0] := 3
|
|
101
|
+
# X[0, 1, 0, 2] := 4
|
|
102
|
+
# X[0, 1, 1, 0] := 5
|
|
103
|
+
# X[1, 1, 1, 0] := 6
|
|
104
|
+
# X[1, 1, 1, 1] := 7
|
|
105
|
+
# X[1, 1, 1, 2] := 8
|
|
106
|
+
# ```
|
|
107
|
+
# As a prefix tree this would be represented as:
|
|
108
|
+
# ```text
|
|
109
|
+
# 0 1
|
|
110
|
+
# / \ |
|
|
111
|
+
# 0 1 1
|
|
112
|
+
# / / \ |
|
|
113
|
+
# 0 0 1 1
|
|
114
|
+
# /| /| | /| |
|
|
115
|
+
# 1 2 0 2 0 0 1 2
|
|
116
|
+
# ```
|
|
117
|
+
# The type of values in indptrBuffers
|
|
118
|
+
def indptr_type
|
|
119
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
120
|
+
return nil if field_offset.zero?
|
|
121
|
+
|
|
122
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
|
|
15
|
+
# (according to the physical memory layout). We used Struct_ here as
|
|
16
|
+
# Struct is a reserved word in Flatbuffers
|
|
17
|
+
class Struct < ::FlatBuffers::Table
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Tensor.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/buffer"
|
|
9
|
+
require_relative "../../../apache/arrow/flatbuf/tensor_dim"
|
|
10
|
+
require_relative "../../../apache/arrow/flatbuf/type"
|
|
11
|
+
|
|
12
|
+
module ArrowFormat
|
|
13
|
+
module Org
|
|
14
|
+
module Apache
|
|
15
|
+
module Arrow
|
|
16
|
+
module Flatbuf
|
|
17
|
+
class Tensor < ::FlatBuffers::Table
|
|
18
|
+
# The location and size of the tensor's data
|
|
19
|
+
def data
|
|
20
|
+
field_offset = @view.unpack_virtual_offset(12)
|
|
21
|
+
return nil if field_offset.zero?
|
|
22
|
+
|
|
23
|
+
@view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, field_offset)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# The dimensions of the tensor, optionally named
|
|
27
|
+
def shape
|
|
28
|
+
field_offset = @view.unpack_virtual_offset(8)
|
|
29
|
+
return nil if field_offset.zero?
|
|
30
|
+
|
|
31
|
+
element_size = 4
|
|
32
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
33
|
+
@view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::TensorDim, element_offset)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Non-negative byte offsets to advance one value cell along each dimension
|
|
38
|
+
# If omitted, default to row-major order (C-like).
|
|
39
|
+
def strides
|
|
40
|
+
field_offset = @view.unpack_virtual_offset(10)
|
|
41
|
+
return nil if field_offset.zero?
|
|
42
|
+
|
|
43
|
+
element_size = 8
|
|
44
|
+
@view.unpack_vector(field_offset, element_size) do |element_offset|
|
|
45
|
+
@view.unpack_long(element_offset)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# The type of data contained in a value cell. Currently only fixed-width
|
|
50
|
+
# value types are supported, no strings or nested types
|
|
51
|
+
def type
|
|
52
|
+
type = type_type
|
|
53
|
+
return nil if type.nil?
|
|
54
|
+
|
|
55
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
56
|
+
return nil if field_offset.zero?
|
|
57
|
+
@view.unpack_union(type.table_class, field_offset)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def type_type
|
|
61
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
62
|
+
if field_offset.zero?
|
|
63
|
+
enum_value = 0
|
|
64
|
+
else
|
|
65
|
+
enum_value = @view.unpack_utype(field_offset)
|
|
66
|
+
end
|
|
67
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::Type.try_convert(enum_value) || enum_value
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Tensor.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
# ----------------------------------------------------------------------
|
|
15
|
+
# Data structures for dense tensors
|
|
16
|
+
# Shape data for a single axis in a tensor
|
|
17
|
+
class TensorDim < ::FlatBuffers::Table
|
|
18
|
+
# Name of the dimension, optional
|
|
19
|
+
def name
|
|
20
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
21
|
+
return nil if field_offset.zero?
|
|
22
|
+
|
|
23
|
+
@view.unpack_string(field_offset)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Length of dimension
|
|
27
|
+
def size
|
|
28
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
29
|
+
return 0 if field_offset.zero?
|
|
30
|
+
|
|
31
|
+
@view.unpack_long(field_offset)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/time_unit"
|
|
9
|
+
|
|
10
|
+
module ArrowFormat
|
|
11
|
+
module Org
|
|
12
|
+
module Apache
|
|
13
|
+
module Arrow
|
|
14
|
+
module Flatbuf
|
|
15
|
+
# Time is either a 32-bit or 64-bit signed integer type representing an
|
|
16
|
+
# elapsed time since midnight, stored in either of four units: seconds,
|
|
17
|
+
# milliseconds, microseconds or nanoseconds.
|
|
18
|
+
#
|
|
19
|
+
# The integer `bitWidth` depends on the `unit` and must be one of the following:
|
|
20
|
+
# * SECOND and MILLISECOND: 32 bits
|
|
21
|
+
# * MICROSECOND and NANOSECOND: 64 bits
|
|
22
|
+
#
|
|
23
|
+
# The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
|
|
24
|
+
# (exclusive), adjusted for the time unit (for example, up to 86400000
|
|
25
|
+
# exclusive for the MILLISECOND unit).
|
|
26
|
+
# This definition doesn't allow for leap seconds. Time values from
|
|
27
|
+
# measurements with leap seconds will need to be corrected when ingesting
|
|
28
|
+
# into Arrow (for example by replacing the value 86400 with 86399).
|
|
29
|
+
class Time < ::FlatBuffers::Table
|
|
30
|
+
def bit_width
|
|
31
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
32
|
+
return 32 if field_offset.zero?
|
|
33
|
+
|
|
34
|
+
@view.unpack_int(field_offset)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def unit
|
|
38
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
39
|
+
if field_offset.zero?
|
|
40
|
+
enum_value = 1
|
|
41
|
+
else
|
|
42
|
+
enum_value = @view.unpack_short(field_offset)
|
|
43
|
+
end
|
|
44
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::TimeUnit.try_convert(enum_value) || enum_value
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
|
|
9
|
+
module ArrowFormat
|
|
10
|
+
module Org
|
|
11
|
+
module Apache
|
|
12
|
+
module Arrow
|
|
13
|
+
module Flatbuf
|
|
14
|
+
class TimeUnit < ::FlatBuffers::Enum
|
|
15
|
+
SECOND = register("SECOND", 0)
|
|
16
|
+
MILLISECOND = register("MILLISECOND", 1)
|
|
17
|
+
MICROSECOND = register("MICROSECOND", 2)
|
|
18
|
+
NANOSECOND = register("NANOSECOND", 3)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Automatically generated. Don't modify manually.
|
|
2
|
+
#
|
|
3
|
+
# Red FlatBuffers version: 0.0.3
|
|
4
|
+
# Declared by: //Schema.fbs
|
|
5
|
+
# Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
|
|
6
|
+
|
|
7
|
+
require "flatbuffers"
|
|
8
|
+
require_relative "../../../apache/arrow/flatbuf/time_unit"
|
|
9
|
+
|
|
10
|
+
module ArrowFormat
|
|
11
|
+
module Org
|
|
12
|
+
module Apache
|
|
13
|
+
module Arrow
|
|
14
|
+
module Flatbuf
|
|
15
|
+
# Timestamp is a 64-bit signed integer representing an elapsed time since a
|
|
16
|
+
# fixed epoch, stored in either of four units: seconds, milliseconds,
|
|
17
|
+
# microseconds or nanoseconds, and is optionally annotated with a timezone.
|
|
18
|
+
#
|
|
19
|
+
# Timestamp values do not include any leap seconds (in other words, all
|
|
20
|
+
# days are considered 86400 seconds long).
|
|
21
|
+
#
|
|
22
|
+
# Timestamps with a non-empty timezone
|
|
23
|
+
# ------------------------------------
|
|
24
|
+
#
|
|
25
|
+
# If a Timestamp column has a non-empty timezone value, its epoch is
|
|
26
|
+
# 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
|
|
27
|
+
# (the Unix epoch), regardless of the Timestamp's own timezone.
|
|
28
|
+
#
|
|
29
|
+
# Therefore, timestamp values with a non-empty timezone correspond to
|
|
30
|
+
# physical points in time together with some additional information about
|
|
31
|
+
# how the data was obtained and/or how to display it (the timezone).
|
|
32
|
+
#
|
|
33
|
+
# For example, the timestamp value 0 with the timezone string "Europe/Paris"
|
|
34
|
+
# corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
|
|
35
|
+
# application may prefer to display it as "January 1st 1970, 01h00" in
|
|
36
|
+
# the Europe/Paris timezone (which is the same physical point in time).
|
|
37
|
+
#
|
|
38
|
+
# One consequence is that timestamp values with a non-empty timezone
|
|
39
|
+
# can be compared and ordered directly, since they all share the same
|
|
40
|
+
# well-known point of reference (the Unix epoch).
|
|
41
|
+
#
|
|
42
|
+
# Timestamps with an unset / empty timezone
|
|
43
|
+
# -----------------------------------------
|
|
44
|
+
#
|
|
45
|
+
# If a Timestamp column has no timezone value, its epoch is
|
|
46
|
+
# 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
|
|
47
|
+
#
|
|
48
|
+
# Therefore, timestamp values without a timezone cannot be meaningfully
|
|
49
|
+
# interpreted as physical points in time, but only as calendar / clock
|
|
50
|
+
# indications ("wall clock time") in an unspecified timezone.
|
|
51
|
+
#
|
|
52
|
+
# For example, the timestamp value 0 with an empty timezone string
|
|
53
|
+
# corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
|
|
54
|
+
# is not enough information to interpret it as a well-defined physical
|
|
55
|
+
# point in time.
|
|
56
|
+
#
|
|
57
|
+
# One consequence is that timestamp values without a timezone cannot
|
|
58
|
+
# be reliably compared or ordered, since they may have different points of
|
|
59
|
+
# reference. In particular, it is *not* possible to interpret an unset
|
|
60
|
+
# or empty timezone as the same as "UTC".
|
|
61
|
+
#
|
|
62
|
+
# Conversion between timezones
|
|
63
|
+
# ----------------------------
|
|
64
|
+
#
|
|
65
|
+
# If a Timestamp column has a non-empty timezone, changing the timezone
|
|
66
|
+
# to a different non-empty value is a metadata-only operation:
|
|
67
|
+
# the timestamp values need not change as their point of reference remains
|
|
68
|
+
# the same (the Unix epoch).
|
|
69
|
+
#
|
|
70
|
+
# However, if a Timestamp column has no timezone value, changing it to a
|
|
71
|
+
# non-empty value requires to think about the desired semantics.
|
|
72
|
+
# One possibility is to assume that the original timestamp values are
|
|
73
|
+
# relative to the epoch of the timezone being set; timestamp values should
|
|
74
|
+
# then adjusted to the Unix epoch (for example, changing the timezone from
|
|
75
|
+
# empty to "Europe/Paris" would require converting the timestamp values
|
|
76
|
+
# from "Europe/Paris" to "UTC", which seems counter-intuitive but is
|
|
77
|
+
# nevertheless correct).
|
|
78
|
+
#
|
|
79
|
+
# Guidelines for encoding data from external libraries
|
|
80
|
+
# ----------------------------------------------------
|
|
81
|
+
#
|
|
82
|
+
# Date & time libraries often have multiple different data types for temporal
|
|
83
|
+
# data. In order to ease interoperability between different implementations the
|
|
84
|
+
# Arrow project has some recommendations for encoding these types into a Timestamp
|
|
85
|
+
# column.
|
|
86
|
+
#
|
|
87
|
+
# An "instant" represents a physical point in time that has no relevant timezone
|
|
88
|
+
# (for example, astronomical data). To encode an instant, use a Timestamp with
|
|
89
|
+
# the timezone string set to "UTC", and make sure the Timestamp values
|
|
90
|
+
# are relative to the UTC epoch (January 1st 1970, midnight).
|
|
91
|
+
#
|
|
92
|
+
# A "zoned date-time" represents a physical point in time annotated with an
|
|
93
|
+
# informative timezone (for example, the timezone in which the data was
|
|
94
|
+
# recorded). To encode a zoned date-time, use a Timestamp with the timezone
|
|
95
|
+
# string set to the name of the timezone, and make sure the Timestamp values
|
|
96
|
+
# are relative to the UTC epoch (January 1st 1970, midnight).
|
|
97
|
+
#
|
|
98
|
+
# (There is some ambiguity between an instant and a zoned date-time with the
|
|
99
|
+
# UTC timezone. Both of these are stored the same in Arrow. Typically,
|
|
100
|
+
# this distinction does not matter. If it does, then an application should
|
|
101
|
+
# use custom metadata or an extension type to distinguish between the two cases.)
|
|
102
|
+
#
|
|
103
|
+
# An "offset date-time" represents a physical point in time combined with an
|
|
104
|
+
# explicit offset from UTC. To encode an offset date-time, use a Timestamp
|
|
105
|
+
# with the timezone string set to the numeric timezone offset string
|
|
106
|
+
# (e.g. "+03:00"), and make sure the Timestamp values are relative to
|
|
107
|
+
# the UTC epoch (January 1st 1970, midnight).
|
|
108
|
+
#
|
|
109
|
+
# A "naive date-time" (also called "local date-time" in some libraries)
|
|
110
|
+
# represents a wall clock time combined with a calendar date, but with
|
|
111
|
+
# no indication of how to map this information to a physical point in time.
|
|
112
|
+
# Naive date-times must be handled with care because of this missing
|
|
113
|
+
# information, and also because daylight saving time (DST) may make
|
|
114
|
+
# some values ambiguous or nonexistent. A naive date-time may be
|
|
115
|
+
# stored as a struct with Date and Time fields. However, it may also be
|
|
116
|
+
# encoded into a Timestamp column with an empty timezone. The timestamp
|
|
117
|
+
# values should be computed "as if" the timezone of the date-time values
|
|
118
|
+
# was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
|
|
119
|
+
# be encoded as timestamp value 0.
|
|
120
|
+
class Timestamp < ::FlatBuffers::Table
|
|
121
|
+
# The timezone is an optional string indicating the name of a timezone,
|
|
122
|
+
# one of:
|
|
123
|
+
#
|
|
124
|
+
# * As used in the Olson timezone database (the "tz database" or
|
|
125
|
+
# "tzdata"), such as "America/New_York".
|
|
126
|
+
# * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
|
|
127
|
+
# such as "+07:30".
|
|
128
|
+
#
|
|
129
|
+
# Whether a timezone string is present indicates different semantics about
|
|
130
|
+
# the data (see above).
|
|
131
|
+
def timezone
|
|
132
|
+
field_offset = @view.unpack_virtual_offset(6)
|
|
133
|
+
return nil if field_offset.zero?
|
|
134
|
+
|
|
135
|
+
@view.unpack_string(field_offset)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def unit
|
|
139
|
+
field_offset = @view.unpack_virtual_offset(4)
|
|
140
|
+
if field_offset.zero?
|
|
141
|
+
enum_value = 0
|
|
142
|
+
else
|
|
143
|
+
enum_value = @view.unpack_short(field_offset)
|
|
144
|
+
end
|
|
145
|
+
::ArrowFormat::Org::Apache::Arrow::Flatbuf::TimeUnit.try_convert(enum_value) || enum_value
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|