red-arrow 3.0.0 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +130 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/datum.rb +100 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +46 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +34 -40
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +154 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-orc.rb +19 -23
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +73 -6
@@ -0,0 +1,46 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Aggregation
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
function = value[:function]
|
26
|
+
return nil if function.nil?
|
27
|
+
function = function.to_s if function.is_a?(Symbol)
|
28
|
+
return nil unless function.is_a?(String)
|
29
|
+
# TODO: Improve this when we have non hash based aggregate function
|
30
|
+
function = "hash_#{function}" unless function.start_with?("hash_")
|
31
|
+
options = value[:options]
|
32
|
+
input = value[:input]
|
33
|
+
return nil if input.nil?
|
34
|
+
output = value[:output]
|
35
|
+
if output.nil?
|
36
|
+
normalized_function = function.gsub(/\Ahash_/, "")
|
37
|
+
output = "#{normalized_function}(#{input})"
|
38
|
+
end
|
39
|
+
new(function, options, input, output)
|
40
|
+
else
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -55,6 +55,18 @@ module Arrow
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
# @param other [Arrow::Array] The array to be compared.
|
59
|
+
# @param options [Arrow::EqualOptions, Hash] (nil)
|
60
|
+
# The options to custom how to compare.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
# `true` if both of them have the same data, `false` otherwise.
|
64
|
+
#
|
65
|
+
# @since 5.0.0
|
66
|
+
def equal_array?(other, options=nil)
|
67
|
+
equal_options(other, options)
|
68
|
+
end
|
69
|
+
|
58
70
|
def each
|
59
71
|
return to_enum(__method__) unless block_given?
|
60
72
|
|
@@ -100,5 +112,123 @@ module Arrow
|
|
100
112
|
is_in_raw(values)
|
101
113
|
end
|
102
114
|
end
|
115
|
+
|
116
|
+
# @api private
|
117
|
+
alias_method :concatenate_raw, :concatenate
|
118
|
+
# Concatenates the given other arrays to the array.
|
119
|
+
#
|
120
|
+
# @param other_arrays [::Array, Arrow::Array] The arrays to be
|
121
|
+
# concatenated.
|
122
|
+
#
|
123
|
+
# Each other array is processed by {#resolve} before they're
|
124
|
+
# concatenated.
|
125
|
+
#
|
126
|
+
# @example Raw Ruby Array
|
127
|
+
# array = Arrow::Int32Array.new([1])
|
128
|
+
# array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
129
|
+
#
|
130
|
+
# @example Arrow::Array
|
131
|
+
# array = Arrow::Int32Array.new([1])
|
132
|
+
# array.concatenate(Arrow::Int32Array.new([2, 3]),
|
133
|
+
# Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
134
|
+
#
|
135
|
+
# @since 4.0.0
|
136
|
+
def concatenate(*other_arrays)
|
137
|
+
other_arrays = other_arrays.collect do |other_array|
|
138
|
+
resolve(other_array)
|
139
|
+
end
|
140
|
+
concatenate_raw(other_arrays)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Concatenates the given other array to the array.
|
144
|
+
#
|
145
|
+
# If you have multiple arrays to be concatenated, you should use
|
146
|
+
# {#concatenate} to concatenate multiple arrays at once.
|
147
|
+
#
|
148
|
+
# @param other_array [::Array, Arrow::Array] The array to be concatenated.
|
149
|
+
#
|
150
|
+
# `@other_array` is processed by {#resolve} before it's
|
151
|
+
# concatenated.
|
152
|
+
#
|
153
|
+
# @example Raw Ruby Array
|
154
|
+
# Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
|
155
|
+
#
|
156
|
+
# @example Arrow::Array
|
157
|
+
# Arrow::Int32Array.new([1]) +
|
158
|
+
# Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
|
159
|
+
#
|
160
|
+
# @since 4.0.0
|
161
|
+
def +(other_array)
|
162
|
+
concatenate(other_array)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Ensures returning the same data type array from the given array.
|
166
|
+
#
|
167
|
+
# @return [Arrow::Array]
|
168
|
+
#
|
169
|
+
# @overload resolve(other_raw_array)
|
170
|
+
#
|
171
|
+
# @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
|
172
|
+
# is built by `self.class.new`.
|
173
|
+
#
|
174
|
+
# @example Raw Ruby Array
|
175
|
+
# int32_array = Arrow::Int32Array.new([1])
|
176
|
+
# other_array = int32_array.resolve([2, 3, 4])
|
177
|
+
# other_array # => Arrow::Int32Array.new([2, 3, 4])
|
178
|
+
#
|
179
|
+
# @overload resolve(other_array)
|
180
|
+
#
|
181
|
+
# @param other_array [Arrow::Array] Another Arrow::Array.
|
182
|
+
#
|
183
|
+
# If the given other array is an same data type array of
|
184
|
+
# `self`, the given other array is returned as-is.
|
185
|
+
#
|
186
|
+
# If the given other array isn't an same data type array of
|
187
|
+
# `self`, the given other array is casted.
|
188
|
+
#
|
189
|
+
# @example Same data type
|
190
|
+
# int32_array = Arrow::Int32Array.new([1])
|
191
|
+
# other_int32_array = Arrow::Int32Array.new([2, 3, 4])
|
192
|
+
# other_array = int32_array.resolve(other_int32_array)
|
193
|
+
# other_array.object_id == other_int32_array.object_id
|
194
|
+
#
|
195
|
+
# @example Other data type
|
196
|
+
# int32_array = Arrow::Int32Array.new([1])
|
197
|
+
# other_int8_array = Arrow::Int8Array.new([2, 3, 4])
|
198
|
+
# other_array = int32_array.resolve(other_int32_array)
|
199
|
+
# other_array #=> Arrow::Int32Array.new([2, 3, 4])
|
200
|
+
#
|
201
|
+
# @since 4.0.0
|
202
|
+
def resolve(other_array)
|
203
|
+
if other_array.is_a?(::Array)
|
204
|
+
builder_class = self.class.builder_class
|
205
|
+
if builder_class.nil?
|
206
|
+
message =
|
207
|
+
"[array][resolve] can't build #{value_data_type} array " +
|
208
|
+
"from raw Ruby Array"
|
209
|
+
raise ArgumentError, message
|
210
|
+
end
|
211
|
+
if builder_class.buildable?([other_array])
|
212
|
+
other_array = builder_class.build(other_array)
|
213
|
+
elsif builder_class.buildable?([value_data_type, other_array])
|
214
|
+
other_array = builder_class.build(value_data_type, other_array)
|
215
|
+
else
|
216
|
+
message =
|
217
|
+
"[array][resolve] need to implement " +
|
218
|
+
"a feature that building #{value_data_type} array " +
|
219
|
+
"from raw Ruby Array"
|
220
|
+
raise NotImpelemented, message
|
221
|
+
end
|
222
|
+
other_array
|
223
|
+
elsif other_array.respond_to?(:value_data_type)
|
224
|
+
return other_array if value_data_type == other_array.value_data_type
|
225
|
+
other_array.cast(value_data_type)
|
226
|
+
else
|
227
|
+
message =
|
228
|
+
"[array][resolve] can't build #{value_data_type} array: " +
|
229
|
+
"#{other_array.inspect}"
|
230
|
+
raise ArgumentError, message
|
231
|
+
end
|
232
|
+
end
|
103
233
|
end
|
104
234
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class BinaryDictionaryArrayBuilder
|
20
|
+
include SymbolValuesAppendable
|
21
|
+
|
22
|
+
private
|
23
|
+
def create_values_array_builder
|
24
|
+
BinaryArrayBuilder.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/arrow/buffer.rb
CHANGED
@@ -17,12 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Buffer
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when String
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -27,6 +27,17 @@ module Arrow
|
|
27
27
|
columns.each(&block)
|
28
28
|
end
|
29
29
|
|
30
|
+
# @overload [](name)
|
31
|
+
# Find a column that has the given name.
|
32
|
+
#
|
33
|
+
# @param name [String, Symbol] The column name to be found.
|
34
|
+
# @return [Column] The found column.
|
35
|
+
#
|
36
|
+
# @overload [](index)
|
37
|
+
# Find the `index`-th column.
|
38
|
+
#
|
39
|
+
# @param index [Integer] The index to be found.
|
40
|
+
# @return [Column] The found column.
|
30
41
|
def find_column(name_or_index)
|
31
42
|
case name_or_index
|
32
43
|
when String, Symbol
|
@@ -40,9 +51,97 @@ module Arrow
|
|
40
51
|
return nil if index < 0 or index >= n_columns
|
41
52
|
Column.new(self, index)
|
42
53
|
else
|
43
|
-
message = "column name or index must be String, Symbol or Integer"
|
54
|
+
message = "column name or index must be String, Symbol or Integer: "
|
55
|
+
message << name_or_index.inspect
|
44
56
|
raise ArgumentError, message
|
45
57
|
end
|
46
58
|
end
|
59
|
+
|
60
|
+
# Selects columns that are selected by `selectors` and/or `block`
|
61
|
+
# and creates a new container only with the selected columns.
|
62
|
+
#
|
63
|
+
# @param selectors [Array<String, Symbol, Integer, Range>]
|
64
|
+
# If a selector is `String`, `Symbol` or `Integer`, the selector
|
65
|
+
# selects a column by {#find_column}.
|
66
|
+
#
|
67
|
+
# If a selector is `Range`, the selector selects columns by `::Array#[]`.
|
68
|
+
# @yield [column] Gives a column to the block to select columns.
|
69
|
+
# This uses `::Array#select`.
|
70
|
+
# @yieldparam column [Column] A target column.
|
71
|
+
# @yieldreturn [Boolean] Whether the given column is selected or not.
|
72
|
+
# @return [self.class] The newly created container that only has selected
|
73
|
+
# columns.
|
74
|
+
def select_columns(*selectors, &block)
|
75
|
+
if selectors.empty?
|
76
|
+
return to_enum(__method__) unless block_given?
|
77
|
+
selected_columns = columns.select(&block)
|
78
|
+
else
|
79
|
+
selected_columns = []
|
80
|
+
selectors.each do |selector|
|
81
|
+
case selector
|
82
|
+
when Range
|
83
|
+
selected_columns.concat(columns[selector])
|
84
|
+
else
|
85
|
+
column = find_column(selector)
|
86
|
+
if column.nil?
|
87
|
+
case selector
|
88
|
+
when String, Symbol
|
89
|
+
message = "unknown column: #{selector.inspect}: #{inspect}"
|
90
|
+
raise KeyError.new(message)
|
91
|
+
else
|
92
|
+
message = "out of index (0..#{n_columns - 1}): "
|
93
|
+
message << "#{selector.inspect}: #{inspect}"
|
94
|
+
raise IndexError.new(message)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
selected_columns << column
|
98
|
+
end
|
99
|
+
end
|
100
|
+
selected_columns = selected_columns.select(&block) if block_given?
|
101
|
+
end
|
102
|
+
self.class.new(selected_columns)
|
103
|
+
end
|
104
|
+
|
105
|
+
# @overload [](name)
|
106
|
+
# Find a column that has the given name.
|
107
|
+
#
|
108
|
+
# @param name [String, Symbol] The column name to be found.
|
109
|
+
# @return [Column] The found column.
|
110
|
+
# @see #find_column
|
111
|
+
#
|
112
|
+
# @overload [](index)
|
113
|
+
# Find the `index`-th column.
|
114
|
+
#
|
115
|
+
# @param index [Integer] The index to be found.
|
116
|
+
# @return [Column] The found column.
|
117
|
+
# @see #find_column
|
118
|
+
#
|
119
|
+
# @overload [](range)
|
120
|
+
# Selects columns that are in `range` and creates a new container
|
121
|
+
# only with the selected columns.
|
122
|
+
#
|
123
|
+
# @param range [Range] The range to be selected.
|
124
|
+
# @return [self.class] The newly created container that only has selected
|
125
|
+
# columns.
|
126
|
+
# @see #select_columns
|
127
|
+
#
|
128
|
+
# @overload [](selectors)
|
129
|
+
# Selects columns that are selected by `selectors` and creates a
|
130
|
+
# new container only with the selected columns.
|
131
|
+
#
|
132
|
+
# @param selectors [Array] The selectors that are used to select columns.
|
133
|
+
# @return [self.class] The newly created container that only has selected
|
134
|
+
# columns.
|
135
|
+
# @see #select_columns
|
136
|
+
def [](selector)
|
137
|
+
case selector
|
138
|
+
when ::Array
|
139
|
+
select_columns(*selector)
|
140
|
+
when Range
|
141
|
+
select_columns(selector)
|
142
|
+
else
|
143
|
+
find_column(selector)
|
144
|
+
end
|
145
|
+
end
|
47
146
|
end
|
48
147
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module ConstructorArgumentsGCGuardable
|
20
|
+
def initialize(*args)
|
21
|
+
super
|
22
|
+
@arguments = args
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
module Arrow
|
19
19
|
class DataType
|
20
20
|
class << self
|
21
|
-
#
|
21
|
+
# Ensure returning suitable {Arrow::DataType}.
|
22
22
|
#
|
23
23
|
# @overload resolve(data_type)
|
24
24
|
#
|
@@ -31,17 +31,21 @@ module Arrow
|
|
31
31
|
#
|
32
32
|
# @overload resolve(name)
|
33
33
|
#
|
34
|
-
# Creates a suitable data type from type name. For
|
35
|
-
# you can create {Arrow::BooleanDataType} from
|
34
|
+
# Creates a suitable data type from the given type name. For
|
35
|
+
# example, you can create {Arrow::BooleanDataType} from
|
36
|
+
# `:boolean`.
|
36
37
|
#
|
37
38
|
# @param name [String, Symbol] The type name of the data type.
|
38
39
|
#
|
40
|
+
# @return [Arrow::DataType] A new suitable data type.
|
41
|
+
#
|
39
42
|
# @example Create a boolean data type
|
40
43
|
# Arrow::DataType.resolve(:boolean)
|
41
44
|
#
|
42
45
|
# @overload resolve(name_with_arguments)
|
43
46
|
#
|
44
|
-
# Creates a suitable data type from type name
|
47
|
+
# Creates a new suitable data type from the given type name
|
48
|
+
# with arguments.
|
45
49
|
#
|
46
50
|
# @param name_with_arguments [::Array<String, ...>]
|
47
51
|
# The type name of the data type as the first element.
|
@@ -51,6 +55,8 @@ module Arrow
|
|
51
55
|
# For example, {Arrow::TimestampDataType} needs unit as
|
52
56
|
# additional information.
|
53
57
|
#
|
58
|
+
# @return [Arrow::DataType] A new suitable data type.
|
59
|
+
#
|
54
60
|
# @example Create a boolean data type
|
55
61
|
# Arrow::DataType.resolve([:boolean])
|
56
62
|
#
|
@@ -59,7 +65,8 @@ module Arrow
|
|
59
65
|
#
|
60
66
|
# @overload resolve(description)
|
61
67
|
#
|
62
|
-
# Creates a suitable data type from data type
|
68
|
+
# Creates a new suitable data type from the given data type
|
69
|
+
# description.
|
63
70
|
#
|
64
71
|
# Data type description is a raw `Hash`. Data type description
|
65
72
|
# must have `:type` value. `:type` is the type of the data type.
|
@@ -74,6 +81,8 @@ module Arrow
|
|
74
81
|
# @option description [String, Symbol] :type The type name of
|
75
82
|
# the data type.
|
76
83
|
#
|
84
|
+
# @return [Arrow::DataType] A new suitable data type.
|
85
|
+
#
|
77
86
|
# @example Create a boolean data type
|
78
87
|
# Arrow::DataType.resolve(type: :boolean)
|
79
88
|
#
|
data/lib/arrow/datum.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Datum
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Table
|
25
|
+
TableDatum.new(value)
|
26
|
+
when Array
|
27
|
+
ArrayDatum.new(value)
|
28
|
+
when ChunkedArray
|
29
|
+
ChunkedArrayDatum.new(value)
|
30
|
+
when Scalar
|
31
|
+
ScalarDatum.new(value)
|
32
|
+
when ::Array
|
33
|
+
ArrayDatum.new(ArrayBuilder.build(value))
|
34
|
+
when Integer
|
35
|
+
case value
|
36
|
+
when (0..((2 ** 8) - 1))
|
37
|
+
try_convert(UInt8Scalar.new(value))
|
38
|
+
when ((-(2 ** 7))..((2 ** 7) - 1))
|
39
|
+
try_convert(Int8Scalar.new(value))
|
40
|
+
when (0..((2 ** 16) - 1))
|
41
|
+
try_convert(UInt16Scalar.new(value))
|
42
|
+
when ((-(2 ** 15))..((2 ** 15) - 1))
|
43
|
+
try_convert(Int16Scalar.new(value))
|
44
|
+
when (0..((2 ** 32) - 1))
|
45
|
+
try_convert(UInt32Scalar.new(value))
|
46
|
+
when ((-(2 ** 31))..((2 ** 31) - 1))
|
47
|
+
try_convert(Int32Scalar.new(value))
|
48
|
+
when (0..((2 ** 64) - 1))
|
49
|
+
try_convert(UInt64Scalar.new(value))
|
50
|
+
when ((-(2 ** 63))..((2 ** 63) - 1))
|
51
|
+
try_convert(Int64Scalar.new(value))
|
52
|
+
else
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
when Float
|
56
|
+
try_convert(DoubleScalar.new(value))
|
57
|
+
when true, false
|
58
|
+
try_convert(BooleanScalar.new(value))
|
59
|
+
when String
|
60
|
+
if value.ascii_only? or value.encoding == Encoding::UTF_8
|
61
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
62
|
+
try_convert(StringScalar.new(value))
|
63
|
+
else
|
64
|
+
try_convert(LargeStringScalar.new(value))
|
65
|
+
end
|
66
|
+
else
|
67
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
68
|
+
try_convert(BinaryScalar.new(value))
|
69
|
+
else
|
70
|
+
try_convert(LargeBinaryScalar.new(value))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
when Date
|
74
|
+
date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
|
75
|
+
try_convert(Date32Scalar.new(date32_value))
|
76
|
+
when Time
|
77
|
+
case value.unit
|
78
|
+
when TimeUnit::SECOND, TimeUnit::MILLI
|
79
|
+
data_type = Time32DataType.new(value.unit)
|
80
|
+
scalar_class = Time32Scalar
|
81
|
+
else
|
82
|
+
data_type = Time64DataType.new(value.unit)
|
83
|
+
scalar_class = Time64Scalar
|
84
|
+
end
|
85
|
+
try_convert(scalar_class.new(data_type, value.value))
|
86
|
+
when ::Time
|
87
|
+
data_type = TimestampDataType.new(:nano)
|
88
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
89
|
+
try_convert(TimestampScalar.new(data_type, timestamp_value))
|
90
|
+
when Decimal128
|
91
|
+
data_type = TimestampDataType.new(:nano)
|
92
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
93
|
+
try_convert(Decimal128Scalar.new(data_type, timestamp_value))
|
94
|
+
else
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -33,7 +33,7 @@ module Arrow
|
|
33
33
|
# @param type_codes [::Array<Integer>] The IDs that indicates
|
34
34
|
# corresponding fields.
|
35
35
|
#
|
36
|
-
# @example Create a dense union data type for {2: visible, 9: count}
|
36
|
+
# @example Create a dense union data type for `{2: visible, 9: count}`
|
37
37
|
# fields = [
|
38
38
|
# Arrow::Field.new("visible", :boolean),
|
39
39
|
# {
|
@@ -57,7 +57,7 @@ module Arrow
|
|
57
57
|
# @option description [::Array<Integer>] :type_codes The IDs
|
58
58
|
# that indicates corresponding fields.
|
59
59
|
#
|
60
|
-
# @example Create a dense union data type for {2: visible, 9: count}
|
60
|
+
# @example Create a dense union data type for `{2: visible, 9: count}`
|
61
61
|
# fields = [
|
62
62
|
# Arrow::Field.new("visible", :boolean),
|
63
63
|
# {
|
@@ -50,7 +50,7 @@ module Arrow
|
|
50
50
|
# @param ordered [Boolean] Whether dictionary contents are
|
51
51
|
# ordered or not.
|
52
52
|
#
|
53
|
-
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
53
|
+
# @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
|
54
54
|
# index_data_type = :int8
|
55
55
|
# value_data_type = :string
|
56
56
|
# ordered = true
|
@@ -91,7 +91,7 @@ module Arrow
|
|
91
91
|
# @option description [Boolean] :ordered Whether dictionary
|
92
92
|
# contents are ordered or not.
|
93
93
|
#
|
94
|
-
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
94
|
+
# @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
|
95
95
|
# Arrow::DictionaryDataType.new(index_data_type: :int8,
|
96
96
|
# value_data_type: :string,
|
97
97
|
# ordered: true)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class EqualOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
setter = :"#{k}="
|
28
|
+
return unless options.respond_to?(setter)
|
29
|
+
options.__send__(setter, v)
|
30
|
+
end
|
31
|
+
options
|
32
|
+
else
|
33
|
+
nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Expression
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol
|
25
|
+
FieldExpression.new(value.to_s)
|
26
|
+
when ::Array
|
27
|
+
function_name, *arguments = value
|
28
|
+
case function_name
|
29
|
+
when String, Symbol
|
30
|
+
function_name = function_name.to_s
|
31
|
+
else
|
32
|
+
return nil
|
33
|
+
end
|
34
|
+
if arguments.last.is_a?(FunctionOptions)
|
35
|
+
options = arguments.pop
|
36
|
+
else
|
37
|
+
options = nil
|
38
|
+
end
|
39
|
+
CallExpression.new(function_name, arguments, options)
|
40
|
+
else
|
41
|
+
datum = Datum.try_convert(value)
|
42
|
+
return nil if datum.nil?
|
43
|
+
LiteralExpression.new(datum)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|