red-arrow-format 23.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +26 -0
  3. data/LICENSE.txt +202 -0
  4. data/NOTICE.txt +2 -0
  5. data/README.md +61 -0
  6. data/Rakefile +67 -0
  7. data/lib/arrow-format/array.rb +476 -0
  8. data/lib/arrow-format/bitmap.rb +44 -0
  9. data/lib/arrow-format/error.rb +34 -0
  10. data/lib/arrow-format/field.rb +33 -0
  11. data/lib/arrow-format/file-reader.rb +213 -0
  12. data/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +21 -0
  13. data/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +27 -0
  14. data/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +38 -0
  15. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +47 -0
  16. data/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +31 -0
  17. data/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +20 -0
  18. data/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +38 -0
  19. data/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +22 -0
  20. data/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +36 -0
  21. data/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +22 -0
  22. data/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +48 -0
  23. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +50 -0
  24. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +64 -0
  25. data/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +26 -0
  26. data/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +30 -0
  27. data/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +24 -0
  28. data/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +46 -0
  29. data/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +92 -0
  30. data/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +43 -0
  31. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +27 -0
  32. data/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +27 -0
  33. data/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +30 -0
  34. data/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +74 -0
  35. data/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +33 -0
  36. data/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +30 -0
  37. data/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +23 -0
  38. data/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +36 -0
  39. data/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +22 -0
  40. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +22 -0
  41. data/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +22 -0
  42. data/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +22 -0
  43. data/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +20 -0
  44. data/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +23 -0
  45. data/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +52 -0
  46. data/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +68 -0
  47. data/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +39 -0
  48. data/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +36 -0
  49. data/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +21 -0
  50. data/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +23 -0
  51. data/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +93 -0
  52. data/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +25 -0
  53. data/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +68 -0
  54. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +22 -0
  55. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +96 -0
  56. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +92 -0
  57. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +29 -0
  58. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +93 -0
  59. data/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +129 -0
  60. data/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +23 -0
  61. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +74 -0
  62. data/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +38 -0
  63. data/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +51 -0
  64. data/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +24 -0
  65. data/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +152 -0
  66. data/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +55 -0
  67. data/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +44 -0
  68. data/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +22 -0
  69. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +21 -0
  70. data/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +27 -0
  71. data/lib/arrow-format/readable.rb +271 -0
  72. data/lib/arrow-format/record-batch.rb +36 -0
  73. data/lib/arrow-format/schema.rb +24 -0
  74. data/lib/arrow-format/streaming-pull-reader.rb +243 -0
  75. data/lib/arrow-format/streaming-reader.rb +50 -0
  76. data/lib/arrow-format/type.rb +704 -0
  77. data/lib/arrow-format/version.rb +26 -0
  78. data/lib/arrow-format.rb +20 -0
  79. data/red-arrow-format.gemspec +57 -0
  80. metadata +137 -0
@@ -0,0 +1,29 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //SparseTensor.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class SparseTensorIndex < ::FlatBuffers::Union
15
+ NONE = register("NONE", 0, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Utf8View", "../../../apache/arrow/flatbuf/utf8view")
16
+ SPARSE_TENSOR_INDEX_COO = register("SparseTensorIndexCOO", 1, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseTensorIndexCOO", "../../../apache/arrow/flatbuf/sparse_tensor_index_coo")
17
+ SPARSE_MATRIX_INDEX_CSX = register("SparseMatrixIndexCSX", 2, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseMatrixIndexCSX", "../../../apache/arrow/flatbuf/sparse_matrix_index_csx")
18
+ SPARSE_TENSOR_INDEX_CSF = register("SparseTensorIndexCSF", 3, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseTensorIndexCSF", "../../../apache/arrow/flatbuf/sparse_tensor_index_csf")
19
+
20
+
21
+ private def require_table_class
22
+ require_relative @require_path
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,93 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //SparseTensor.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/buffer"
9
+ require_relative "../../../apache/arrow/flatbuf/int"
10
+
11
+ module ArrowFormat
12
+ module Org
13
+ module Apache
14
+ module Arrow
15
+ module Flatbuf
16
+ # ----------------------------------------------------------------------
17
+ # EXPERIMENTAL: Data structures for sparse tensors
18
+ # Coordinate (COO) format of sparse tensor index.
19
+ #
20
+ # COO's index list are represented as a NxM matrix,
21
+ # where N is the number of non-zero values,
22
+ # and M is the number of dimensions of a sparse tensor.
23
+ #
24
+ # indicesBuffer stores the location and size of the data of this indices
25
+ # matrix. The value type and the stride of the indices matrix is
26
+ # specified in indicesType and indicesStrides fields.
27
+ #
28
+ # For example, let X be a 2x3x4x5 tensor, and it has the following
29
+ # 6 non-zero values:
30
+ # ```text
31
+ # X[0, 1, 2, 0] := 1
32
+ # X[1, 1, 2, 3] := 2
33
+ # X[0, 2, 1, 0] := 3
34
+ # X[0, 1, 3, 0] := 4
35
+ # X[0, 1, 2, 1] := 5
36
+ # X[1, 2, 0, 4] := 6
37
+ # ```
38
+ # In COO format, the index matrix of X is the following 4x6 matrix:
39
+ # ```text
40
+ # [[0, 0, 0, 0, 1, 1],
41
+ # [1, 1, 1, 2, 1, 2],
42
+ # [2, 2, 3, 1, 2, 0],
43
+ # [0, 1, 0, 0, 3, 4]]
44
+ # ```
45
+ # When isCanonical is true, the indices is sorted in lexicographical order
46
+ # (row-major order), and it does not have duplicated entries. Otherwise,
47
+ # the indices may not be sorted, or may have duplicated entries.
48
+ class SparseTensorIndexCOO < ::FlatBuffers::Table
49
+ # The location and size of the indices matrix's data
50
+ def indices_buffer
51
+ field_offset = @view.unpack_virtual_offset(8)
52
+ return nil if field_offset.zero?
53
+
54
+ @view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, field_offset)
55
+ end
56
+
57
+ # Non-negative byte offsets to advance one value cell along each dimension
58
+ # If omitted, default to row-major order (C-like).
59
+ def indices_strides
60
+ field_offset = @view.unpack_virtual_offset(6)
61
+ return nil if field_offset.zero?
62
+
63
+ element_size = 8
64
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
65
+ @view.unpack_long(element_offset)
66
+ end
67
+ end
68
+
69
+ # The type of values in indicesBuffer
70
+ def indices_type
71
+ field_offset = @view.unpack_virtual_offset(4)
72
+ return nil if field_offset.zero?
73
+
74
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
75
+ end
76
+
77
+ # This flag is true if and only if the indices matrix is sorted in
78
+ # row-major order, and does not have duplicated entries.
79
+ # This sort order is the same as of Tensorflow's SparseTensor,
80
+ # but it is inverse order of SciPy's canonical coo_matrix
81
+ # (SciPy employs column-major order for its coo_matrix).
82
+ def canonical?
83
+ field_offset = @view.unpack_virtual_offset(10)
84
+ return false if field_offset.zero?
85
+
86
+ @view.unpack_bool(field_offset)
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,129 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //SparseTensor.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/buffer"
9
+ require_relative "../../../apache/arrow/flatbuf/int"
10
+
11
+ module ArrowFormat
12
+ module Org
13
+ module Apache
14
+ module Arrow
15
+ module Flatbuf
16
+ # Compressed Sparse Fiber (CSF) sparse tensor index.
17
+ class SparseTensorIndexCSF < ::FlatBuffers::Table
18
+ # axisOrder stores the sequence in which dimensions were traversed to
19
+ # produce the prefix tree.
20
+ # For example, the axisOrder for the above X is:
21
+ # ```text
22
+ # axisOrder(X) = [0, 1, 2, 3].
23
+ # ```
24
+ def axis_order
25
+ field_offset = @view.unpack_virtual_offset(12)
26
+ return nil if field_offset.zero?
27
+
28
+ element_size = 4
29
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
30
+ @view.unpack_int(element_offset)
31
+ end
32
+ end
33
+
34
+ # indicesBuffers stores values of nodes.
35
+ # Each tensor dimension corresponds to a buffer in indicesBuffers.
36
+ # For example, the indicesBuffers for the above X is:
37
+ # ```text
38
+ # indicesBuffer(X) = [
39
+ # [0, 1],
40
+ # [0, 1, 1],
41
+ # [0, 0, 1, 1],
42
+ # [1, 2, 0, 2, 0, 0, 1, 2]
43
+ # ].
44
+ # ```
45
+ def indices_buffers
46
+ field_offset = @view.unpack_virtual_offset(10)
47
+ return nil if field_offset.zero?
48
+
49
+ element_size = 16
50
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
51
+ @view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, element_offset)
52
+ end
53
+ end
54
+
55
+ # The type of values in indicesBuffers
56
+ def indices_type
57
+ field_offset = @view.unpack_virtual_offset(8)
58
+ return nil if field_offset.zero?
59
+
60
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
61
+ end
62
+
63
+ # indptrBuffers stores the sparsity structure.
64
+ # Each two consecutive dimensions in a tensor correspond to a buffer in
65
+ # indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]`
66
+ # and `indptrBuffers[dim][i + 1]` signify a range of nodes in
67
+ # `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node.
68
+ #
69
+ # For example, the indptrBuffers for the above X is:
70
+ # ```text
71
+ # indptrBuffer(X) = [
72
+ # [0, 2, 3],
73
+ # [0, 1, 3, 4],
74
+ # [0, 2, 4, 5, 8]
75
+ # ].
76
+ # ```
77
+ def indptr_buffers
78
+ field_offset = @view.unpack_virtual_offset(6)
79
+ return nil if field_offset.zero?
80
+
81
+ element_size = 16
82
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
83
+ @view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, element_offset)
84
+ end
85
+ end
86
+
87
+ # CSF is a generalization of compressed sparse row (CSR) index.
88
+ # See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf)
89
+ #
90
+ # CSF index recursively compresses each dimension of a tensor into a set
91
+ # of prefix trees. Each path from a root to leaf forms one tensor
92
+ # non-zero index. CSF is implemented with two arrays of buffers and one
93
+ # arrays of integers.
94
+ #
95
+ # For example, let X be a 2x3x4x5 tensor and let it have the following
96
+ # 8 non-zero values:
97
+ # ```text
98
+ # X[0, 0, 0, 1] := 1
99
+ # X[0, 0, 0, 2] := 2
100
+ # X[0, 1, 0, 0] := 3
101
+ # X[0, 1, 0, 2] := 4
102
+ # X[0, 1, 1, 0] := 5
103
+ # X[1, 1, 1, 0] := 6
104
+ # X[1, 1, 1, 1] := 7
105
+ # X[1, 1, 1, 2] := 8
106
+ # ```
107
+ # As a prefix tree this would be represented as:
108
+ # ```text
109
+ # 0 1
110
+ # / \ |
111
+ # 0 1 1
112
+ # / / \ |
113
+ # 0 0 1 1
114
+ # /| /| | /| |
115
+ # 1 2 0 2 0 0 1 2
116
+ # ```
117
+ # The type of values in indptrBuffers
118
+ def indptr_type
119
+ field_offset = @view.unpack_virtual_offset(4)
120
+ return nil if field_offset.zero?
121
+
122
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int, field_offset)
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,23 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
15
+ # (according to the physical memory layout). We used Struct_ here as
16
+ # Struct is a reserved word in Flatbuffers
17
+ class Struct < ::FlatBuffers::Table
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,74 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Tensor.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/buffer"
9
+ require_relative "../../../apache/arrow/flatbuf/tensor_dim"
10
+ require_relative "../../../apache/arrow/flatbuf/type"
11
+
12
+ module ArrowFormat
13
+ module Org
14
+ module Apache
15
+ module Arrow
16
+ module Flatbuf
17
+ class Tensor < ::FlatBuffers::Table
18
+ # The location and size of the tensor's data
19
+ def data
20
+ field_offset = @view.unpack_virtual_offset(12)
21
+ return nil if field_offset.zero?
22
+
23
+ @view.unpack_struct(::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer, field_offset)
24
+ end
25
+
26
+ # The dimensions of the tensor, optionally named
27
+ def shape
28
+ field_offset = @view.unpack_virtual_offset(8)
29
+ return nil if field_offset.zero?
30
+
31
+ element_size = 4
32
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
33
+ @view.unpack_table(::ArrowFormat::Org::Apache::Arrow::Flatbuf::TensorDim, element_offset)
34
+ end
35
+ end
36
+
37
+ # Non-negative byte offsets to advance one value cell along each dimension
38
+ # If omitted, default to row-major order (C-like).
39
+ def strides
40
+ field_offset = @view.unpack_virtual_offset(10)
41
+ return nil if field_offset.zero?
42
+
43
+ element_size = 8
44
+ @view.unpack_vector(field_offset, element_size) do |element_offset|
45
+ @view.unpack_long(element_offset)
46
+ end
47
+ end
48
+
49
+ # The type of data contained in a value cell. Currently only fixed-width
50
+ # value types are supported, no strings or nested types
51
+ def type
52
+ type = type_type
53
+ return nil if type.nil?
54
+
55
+ field_offset = @view.unpack_virtual_offset(6)
56
+ return nil if field_offset.zero?
57
+ @view.unpack_union(type.table_class, field_offset)
58
+ end
59
+
60
+ def type_type
61
+ field_offset = @view.unpack_virtual_offset(4)
62
+ if field_offset.zero?
63
+ enum_value = 0
64
+ else
65
+ enum_value = @view.unpack_utype(field_offset)
66
+ end
67
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::Type.try_convert(enum_value) || enum_value
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,38 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Tensor.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ # ----------------------------------------------------------------------
15
+ # Data structures for dense tensors
16
+ # Shape data for a single axis in a tensor
17
+ class TensorDim < ::FlatBuffers::Table
18
+ # Name of the dimension, optional
19
+ def name
20
+ field_offset = @view.unpack_virtual_offset(6)
21
+ return nil if field_offset.zero?
22
+
23
+ @view.unpack_string(field_offset)
24
+ end
25
+
26
+ # Length of dimension
27
+ def size
28
+ field_offset = @view.unpack_virtual_offset(4)
29
+ return 0 if field_offset.zero?
30
+
31
+ @view.unpack_long(field_offset)
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,51 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/time_unit"
9
+
10
+ module ArrowFormat
11
+ module Org
12
+ module Apache
13
+ module Arrow
14
+ module Flatbuf
15
+ # Time is either a 32-bit or 64-bit signed integer type representing an
16
+ # elapsed time since midnight, stored in either of four units: seconds,
17
+ # milliseconds, microseconds or nanoseconds.
18
+ #
19
+ # The integer `bitWidth` depends on the `unit` and must be one of the following:
20
+ # * SECOND and MILLISECOND: 32 bits
21
+ # * MICROSECOND and NANOSECOND: 64 bits
22
+ #
23
+ # The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
24
+ # (exclusive), adjusted for the time unit (for example, up to 86400000
25
+ # exclusive for the MILLISECOND unit).
26
+ # This definition doesn't allow for leap seconds. Time values from
27
+ # measurements with leap seconds will need to be corrected when ingesting
28
+ # into Arrow (for example by replacing the value 86400 with 86399).
29
+ class Time < ::FlatBuffers::Table
30
+ def bit_width
31
+ field_offset = @view.unpack_virtual_offset(6)
32
+ return 32 if field_offset.zero?
33
+
34
+ @view.unpack_int(field_offset)
35
+ end
36
+
37
+ def unit
38
+ field_offset = @view.unpack_virtual_offset(4)
39
+ if field_offset.zero?
40
+ enum_value = 1
41
+ else
42
+ enum_value = @view.unpack_short(field_offset)
43
+ end
44
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::TimeUnit.try_convert(enum_value) || enum_value
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,24 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+
9
+ module ArrowFormat
10
+ module Org
11
+ module Apache
12
+ module Arrow
13
+ module Flatbuf
14
+ class TimeUnit < ::FlatBuffers::Enum
15
+ SECOND = register("SECOND", 0)
16
+ MILLISECOND = register("MILLISECOND", 1)
17
+ MICROSECOND = register("MICROSECOND", 2)
18
+ NANOSECOND = register("NANOSECOND", 3)
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,152 @@
1
+ # Automatically generated. Don't modify manually.
2
+ #
3
+ # Red FlatBuffers version: 0.0.3
4
+ # Declared by: //Schema.fbs
5
+ # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs)
6
+
7
+ require "flatbuffers"
8
+ require_relative "../../../apache/arrow/flatbuf/time_unit"
9
+
10
+ module ArrowFormat
11
+ module Org
12
+ module Apache
13
+ module Arrow
14
+ module Flatbuf
15
+ # Timestamp is a 64-bit signed integer representing an elapsed time since a
16
+ # fixed epoch, stored in either of four units: seconds, milliseconds,
17
+ # microseconds or nanoseconds, and is optionally annotated with a timezone.
18
+ #
19
+ # Timestamp values do not include any leap seconds (in other words, all
20
+ # days are considered 86400 seconds long).
21
+ #
22
+ # Timestamps with a non-empty timezone
23
+ # ------------------------------------
24
+ #
25
+ # If a Timestamp column has a non-empty timezone value, its epoch is
26
+ # 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
27
+ # (the Unix epoch), regardless of the Timestamp's own timezone.
28
+ #
29
+ # Therefore, timestamp values with a non-empty timezone correspond to
30
+ # physical points in time together with some additional information about
31
+ # how the data was obtained and/or how to display it (the timezone).
32
+ #
33
+ # For example, the timestamp value 0 with the timezone string "Europe/Paris"
34
+ # corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
35
+ # application may prefer to display it as "January 1st 1970, 01h00" in
36
+ # the Europe/Paris timezone (which is the same physical point in time).
37
+ #
38
+ # One consequence is that timestamp values with a non-empty timezone
39
+ # can be compared and ordered directly, since they all share the same
40
+ # well-known point of reference (the Unix epoch).
41
+ #
42
+ # Timestamps with an unset / empty timezone
43
+ # -----------------------------------------
44
+ #
45
+ # If a Timestamp column has no timezone value, its epoch is
46
+ # 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
47
+ #
48
+ # Therefore, timestamp values without a timezone cannot be meaningfully
49
+ # interpreted as physical points in time, but only as calendar / clock
50
+ # indications ("wall clock time") in an unspecified timezone.
51
+ #
52
+ # For example, the timestamp value 0 with an empty timezone string
53
+ # corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
54
+ # is not enough information to interpret it as a well-defined physical
55
+ # point in time.
56
+ #
57
+ # One consequence is that timestamp values without a timezone cannot
58
+ # be reliably compared or ordered, since they may have different points of
59
+ # reference. In particular, it is *not* possible to interpret an unset
60
+ # or empty timezone as the same as "UTC".
61
+ #
62
+ # Conversion between timezones
63
+ # ----------------------------
64
+ #
65
+ # If a Timestamp column has a non-empty timezone, changing the timezone
66
+ # to a different non-empty value is a metadata-only operation:
67
+ # the timestamp values need not change as their point of reference remains
68
+ # the same (the Unix epoch).
69
+ #
70
+ # However, if a Timestamp column has no timezone value, changing it to a
71
+ # non-empty value requires to think about the desired semantics.
72
+ # One possibility is to assume that the original timestamp values are
73
+ # relative to the epoch of the timezone being set; timestamp values should
74
+ # then adjusted to the Unix epoch (for example, changing the timezone from
75
+ # empty to "Europe/Paris" would require converting the timestamp values
76
+ # from "Europe/Paris" to "UTC", which seems counter-intuitive but is
77
+ # nevertheless correct).
78
+ #
79
+ # Guidelines for encoding data from external libraries
80
+ # ----------------------------------------------------
81
+ #
82
+ # Date & time libraries often have multiple different data types for temporal
83
+ # data. In order to ease interoperability between different implementations the
84
+ # Arrow project has some recommendations for encoding these types into a Timestamp
85
+ # column.
86
+ #
87
+ # An "instant" represents a physical point in time that has no relevant timezone
88
+ # (for example, astronomical data). To encode an instant, use a Timestamp with
89
+ # the timezone string set to "UTC", and make sure the Timestamp values
90
+ # are relative to the UTC epoch (January 1st 1970, midnight).
91
+ #
92
+ # A "zoned date-time" represents a physical point in time annotated with an
93
+ # informative timezone (for example, the timezone in which the data was
94
+ # recorded). To encode a zoned date-time, use a Timestamp with the timezone
95
+ # string set to the name of the timezone, and make sure the Timestamp values
96
+ # are relative to the UTC epoch (January 1st 1970, midnight).
97
+ #
98
+ # (There is some ambiguity between an instant and a zoned date-time with the
99
+ # UTC timezone. Both of these are stored the same in Arrow. Typically,
100
+ # this distinction does not matter. If it does, then an application should
101
+ # use custom metadata or an extension type to distinguish between the two cases.)
102
+ #
103
+ # An "offset date-time" represents a physical point in time combined with an
104
+ # explicit offset from UTC. To encode an offset date-time, use a Timestamp
105
+ # with the timezone string set to the numeric timezone offset string
106
+ # (e.g. "+03:00"), and make sure the Timestamp values are relative to
107
+ # the UTC epoch (January 1st 1970, midnight).
108
+ #
109
+ # A "naive date-time" (also called "local date-time" in some libraries)
110
+ # represents a wall clock time combined with a calendar date, but with
111
+ # no indication of how to map this information to a physical point in time.
112
+ # Naive date-times must be handled with care because of this missing
113
+ # information, and also because daylight saving time (DST) may make
114
+ # some values ambiguous or nonexistent. A naive date-time may be
115
+ # stored as a struct with Date and Time fields. However, it may also be
116
+ # encoded into a Timestamp column with an empty timezone. The timestamp
117
+ # values should be computed "as if" the timezone of the date-time values
118
+ # was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
119
+ # be encoded as timestamp value 0.
120
+ class Timestamp < ::FlatBuffers::Table
121
+ # The timezone is an optional string indicating the name of a timezone,
122
+ # one of:
123
+ #
124
+ # * As used in the Olson timezone database (the "tz database" or
125
+ # "tzdata"), such as "America/New_York".
126
+ # * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
127
+ # such as "+07:30".
128
+ #
129
+ # Whether a timezone string is present indicates different semantics about
130
+ # the data (see above).
131
+ def timezone
132
+ field_offset = @view.unpack_virtual_offset(6)
133
+ return nil if field_offset.zero?
134
+
135
+ @view.unpack_string(field_offset)
136
+ end
137
+
138
+ def unit
139
+ field_offset = @view.unpack_virtual_offset(4)
140
+ if field_offset.zero?
141
+ enum_value = 0
142
+ else
143
+ enum_value = @view.unpack_short(field_offset)
144
+ end
145
+ ::ArrowFormat::Org::Apache::Arrow::Flatbuf::TimeUnit.try_convert(enum_value) || enum_value
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
152
+ end