red-arrow 3.0.0 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -0
- data/ext/arrow/arrow.cpp +3 -0
- data/ext/arrow/converters.cpp +5 -0
- data/ext/arrow/converters.hpp +126 -0
- data/ext/arrow/extconf.rb +13 -0
- data/ext/arrow/memory-view.cpp +311 -0
- data/ext/arrow/memory-view.hpp +26 -0
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/aggregate-node-options.rb +35 -0
- data/lib/arrow/aggregation.rb +46 -0
- data/lib/arrow/array-builder.rb +5 -0
- data/lib/arrow/array.rb +130 -0
- data/lib/arrow/binary-dictionary-array-builder.rb +27 -0
- data/lib/arrow/buffer.rb +10 -6
- data/lib/arrow/column-containable.rb +100 -1
- data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/datum.rb +100 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/equal-options.rb +38 -0
- data/lib/arrow/expression.rb +48 -0
- data/lib/arrow/file-system.rb +34 -0
- data/lib/arrow/group.rb +116 -124
- data/lib/arrow/loader.rb +46 -0
- data/lib/arrow/map-array-builder.rb +109 -0
- data/lib/arrow/map-array.rb +26 -0
- data/lib/arrow/map-data-type.rb +89 -0
- data/lib/arrow/path-extension.rb +1 -1
- data/lib/arrow/record-batch-reader.rb +41 -0
- data/lib/arrow/record-batch.rb +0 -2
- data/lib/arrow/scalar.rb +32 -0
- data/lib/arrow/slicer.rb +44 -143
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/source-node-options.rb +32 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/string-dictionary-array-builder.rb +27 -0
- data/lib/arrow/symbol-values-appendable.rb +34 -0
- data/lib/arrow/table-concatenate-options.rb +36 -0
- data/lib/arrow/table-formatter.rb +141 -17
- data/lib/arrow/table-list-formatter.rb +5 -3
- data/lib/arrow/table-loader.rb +41 -3
- data/lib/arrow/table-saver.rb +29 -3
- data/lib/arrow/table-table-formatter.rb +7 -31
- data/lib/arrow/table.rb +34 -40
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/helper.rb +1 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +19 -0
- data/test/raw-records/test-map-array.rb +441 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array-builder.rb +7 -0
- data/test/test-array.rb +154 -0
- data/test/test-binary-dictionary-array-builder.rb +103 -0
- data/test/test-boolean-scalar.rb +26 -0
- data/test/test-csv-loader.rb +8 -8
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-expression.rb +40 -0
- data/test/test-float-scalar.rb +46 -0
- data/test/test-function.rb +176 -0
- data/test/test-group.rb +75 -51
- data/test/test-map-array-builder.rb +110 -0
- data/test/test-map-array.rb +33 -0
- data/test/test-map-data-type.rb +36 -0
- data/test/test-memory-view.rb +434 -0
- data/test/test-orc.rb +19 -23
- data/test/test-record-batch-reader.rb +46 -0
- data/test/test-record-batch.rb +42 -0
- data/test/test-slicer.rb +166 -167
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-string-dictionary-array-builder.rb +103 -0
- data/test/test-table.rb +190 -53
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +17 -0
- data/test/values/test-map-array.rb +433 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +73 -6
@@ -0,0 +1,46 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Aggregation
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
function = value[:function]
|
26
|
+
return nil if function.nil?
|
27
|
+
function = function.to_s if function.is_a?(Symbol)
|
28
|
+
return nil unless function.is_a?(String)
|
29
|
+
# TODO: Improve this when we have non hash based aggregate function
|
30
|
+
function = "hash_#{function}" unless function.start_with?("hash_")
|
31
|
+
options = value[:options]
|
32
|
+
input = value[:input]
|
33
|
+
return nil if input.nil?
|
34
|
+
output = value[:output]
|
35
|
+
if output.nil?
|
36
|
+
normalized_function = function.gsub(/\Ahash_/, "")
|
37
|
+
output = "#{normalized_function}(#{input})"
|
38
|
+
end
|
39
|
+
new(function, options, input, output)
|
40
|
+
else
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/arrow/array-builder.rb
CHANGED
data/lib/arrow/array.rb
CHANGED
@@ -55,6 +55,18 @@ module Arrow
|
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
|
+
# @param other [Arrow::Array] The array to be compared.
|
59
|
+
# @param options [Arrow::EqualOptions, Hash] (nil)
|
60
|
+
# The options to custom how to compare.
|
61
|
+
#
|
62
|
+
# @return [Boolean]
|
63
|
+
# `true` if both of them have the same data, `false` otherwise.
|
64
|
+
#
|
65
|
+
# @since 5.0.0
|
66
|
+
def equal_array?(other, options=nil)
|
67
|
+
equal_options(other, options)
|
68
|
+
end
|
69
|
+
|
58
70
|
def each
|
59
71
|
return to_enum(__method__) unless block_given?
|
60
72
|
|
@@ -100,5 +112,123 @@ module Arrow
|
|
100
112
|
is_in_raw(values)
|
101
113
|
end
|
102
114
|
end
|
115
|
+
|
116
|
+
# @api private
|
117
|
+
alias_method :concatenate_raw, :concatenate
|
118
|
+
# Concatenates the given other arrays to the array.
|
119
|
+
#
|
120
|
+
# @param other_arrays [::Array, Arrow::Array] The arrays to be
|
121
|
+
# concatenated.
|
122
|
+
#
|
123
|
+
# Each other array is processed by {#resolve} before they're
|
124
|
+
# concatenated.
|
125
|
+
#
|
126
|
+
# @example Raw Ruby Array
|
127
|
+
# array = Arrow::Int32Array.new([1])
|
128
|
+
# array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
129
|
+
#
|
130
|
+
# @example Arrow::Array
|
131
|
+
# array = Arrow::Int32Array.new([1])
|
132
|
+
# array.concatenate(Arrow::Int32Array.new([2, 3]),
|
133
|
+
# Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
134
|
+
#
|
135
|
+
# @since 4.0.0
|
136
|
+
def concatenate(*other_arrays)
|
137
|
+
other_arrays = other_arrays.collect do |other_array|
|
138
|
+
resolve(other_array)
|
139
|
+
end
|
140
|
+
concatenate_raw(other_arrays)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Concatenates the given other array to the array.
|
144
|
+
#
|
145
|
+
# If you have multiple arrays to be concatenated, you should use
|
146
|
+
# {#concatenate} to concatenate multiple arrays at once.
|
147
|
+
#
|
148
|
+
# @param other_array [::Array, Arrow::Array] The array to be concatenated.
|
149
|
+
#
|
150
|
+
# `@other_array` is processed by {#resolve} before it's
|
151
|
+
# concatenated.
|
152
|
+
#
|
153
|
+
# @example Raw Ruby Array
|
154
|
+
# Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
|
155
|
+
#
|
156
|
+
# @example Arrow::Array
|
157
|
+
# Arrow::Int32Array.new([1]) +
|
158
|
+
# Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
|
159
|
+
#
|
160
|
+
# @since 4.0.0
|
161
|
+
def +(other_array)
|
162
|
+
concatenate(other_array)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Ensures returning the same data type array from the given array.
|
166
|
+
#
|
167
|
+
# @return [Arrow::Array]
|
168
|
+
#
|
169
|
+
# @overload resolve(other_raw_array)
|
170
|
+
#
|
171
|
+
# @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
|
172
|
+
# is built by `self.class.new`.
|
173
|
+
#
|
174
|
+
# @example Raw Ruby Array
|
175
|
+
# int32_array = Arrow::Int32Array.new([1])
|
176
|
+
# other_array = int32_array.resolve([2, 3, 4])
|
177
|
+
# other_array # => Arrow::Int32Array.new([2, 3, 4])
|
178
|
+
#
|
179
|
+
# @overload resolve(other_array)
|
180
|
+
#
|
181
|
+
# @param other_array [Arrow::Array] Another Arrow::Array.
|
182
|
+
#
|
183
|
+
# If the given other array is an same data type array of
|
184
|
+
# `self`, the given other array is returned as-is.
|
185
|
+
#
|
186
|
+
# If the given other array isn't an same data type array of
|
187
|
+
# `self`, the given other array is casted.
|
188
|
+
#
|
189
|
+
# @example Same data type
|
190
|
+
# int32_array = Arrow::Int32Array.new([1])
|
191
|
+
# other_int32_array = Arrow::Int32Array.new([2, 3, 4])
|
192
|
+
# other_array = int32_array.resolve(other_int32_array)
|
193
|
+
# other_array.object_id == other_int32_array.object_id
|
194
|
+
#
|
195
|
+
# @example Other data type
|
196
|
+
# int32_array = Arrow::Int32Array.new([1])
|
197
|
+
# other_int8_array = Arrow::Int8Array.new([2, 3, 4])
|
198
|
+
# other_array = int32_array.resolve(other_int32_array)
|
199
|
+
# other_array #=> Arrow::Int32Array.new([2, 3, 4])
|
200
|
+
#
|
201
|
+
# @since 4.0.0
|
202
|
+
def resolve(other_array)
|
203
|
+
if other_array.is_a?(::Array)
|
204
|
+
builder_class = self.class.builder_class
|
205
|
+
if builder_class.nil?
|
206
|
+
message =
|
207
|
+
"[array][resolve] can't build #{value_data_type} array " +
|
208
|
+
"from raw Ruby Array"
|
209
|
+
raise ArgumentError, message
|
210
|
+
end
|
211
|
+
if builder_class.buildable?([other_array])
|
212
|
+
other_array = builder_class.build(other_array)
|
213
|
+
elsif builder_class.buildable?([value_data_type, other_array])
|
214
|
+
other_array = builder_class.build(value_data_type, other_array)
|
215
|
+
else
|
216
|
+
message =
|
217
|
+
"[array][resolve] need to implement " +
|
218
|
+
"a feature that building #{value_data_type} array " +
|
219
|
+
"from raw Ruby Array"
|
220
|
+
raise NotImpelemented, message
|
221
|
+
end
|
222
|
+
other_array
|
223
|
+
elsif other_array.respond_to?(:value_data_type)
|
224
|
+
return other_array if value_data_type == other_array.value_data_type
|
225
|
+
other_array.cast(value_data_type)
|
226
|
+
else
|
227
|
+
message =
|
228
|
+
"[array][resolve] can't build #{value_data_type} array: " +
|
229
|
+
"#{other_array.inspect}"
|
230
|
+
raise ArgumentError, message
|
231
|
+
end
|
232
|
+
end
|
103
233
|
end
|
104
234
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class BinaryDictionaryArrayBuilder
|
20
|
+
include SymbolValuesAppendable
|
21
|
+
|
22
|
+
private
|
23
|
+
def create_values_array_builder
|
24
|
+
BinaryArrayBuilder.new
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/arrow/buffer.rb
CHANGED
@@ -17,12 +17,16 @@
|
|
17
17
|
|
18
18
|
module Arrow
|
19
19
|
class Buffer
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when String
|
25
|
+
new(value)
|
26
|
+
else
|
27
|
+
nil
|
28
|
+
end
|
29
|
+
end
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -27,6 +27,17 @@ module Arrow
|
|
27
27
|
columns.each(&block)
|
28
28
|
end
|
29
29
|
|
30
|
+
# @overload [](name)
|
31
|
+
# Find a column that has the given name.
|
32
|
+
#
|
33
|
+
# @param name [String, Symbol] The column name to be found.
|
34
|
+
# @return [Column] The found column.
|
35
|
+
#
|
36
|
+
# @overload [](index)
|
37
|
+
# Find the `index`-th column.
|
38
|
+
#
|
39
|
+
# @param index [Integer] The index to be found.
|
40
|
+
# @return [Column] The found column.
|
30
41
|
def find_column(name_or_index)
|
31
42
|
case name_or_index
|
32
43
|
when String, Symbol
|
@@ -40,9 +51,97 @@ module Arrow
|
|
40
51
|
return nil if index < 0 or index >= n_columns
|
41
52
|
Column.new(self, index)
|
42
53
|
else
|
43
|
-
message = "column name or index must be String, Symbol or Integer"
|
54
|
+
message = "column name or index must be String, Symbol or Integer: "
|
55
|
+
message << name_or_index.inspect
|
44
56
|
raise ArgumentError, message
|
45
57
|
end
|
46
58
|
end
|
59
|
+
|
60
|
+
# Selects columns that are selected by `selectors` and/or `block`
|
61
|
+
# and creates a new container only with the selected columns.
|
62
|
+
#
|
63
|
+
# @param selectors [Array<String, Symbol, Integer, Range>]
|
64
|
+
# If a selector is `String`, `Symbol` or `Integer`, the selector
|
65
|
+
# selects a column by {#find_column}.
|
66
|
+
#
|
67
|
+
# If a selector is `Range`, the selector selects columns by `::Array#[]`.
|
68
|
+
# @yield [column] Gives a column to the block to select columns.
|
69
|
+
# This uses `::Array#select`.
|
70
|
+
# @yieldparam column [Column] A target column.
|
71
|
+
# @yieldreturn [Boolean] Whether the given column is selected or not.
|
72
|
+
# @return [self.class] The newly created container that only has selected
|
73
|
+
# columns.
|
74
|
+
def select_columns(*selectors, &block)
|
75
|
+
if selectors.empty?
|
76
|
+
return to_enum(__method__) unless block_given?
|
77
|
+
selected_columns = columns.select(&block)
|
78
|
+
else
|
79
|
+
selected_columns = []
|
80
|
+
selectors.each do |selector|
|
81
|
+
case selector
|
82
|
+
when Range
|
83
|
+
selected_columns.concat(columns[selector])
|
84
|
+
else
|
85
|
+
column = find_column(selector)
|
86
|
+
if column.nil?
|
87
|
+
case selector
|
88
|
+
when String, Symbol
|
89
|
+
message = "unknown column: #{selector.inspect}: #{inspect}"
|
90
|
+
raise KeyError.new(message)
|
91
|
+
else
|
92
|
+
message = "out of index (0..#{n_columns - 1}): "
|
93
|
+
message << "#{selector.inspect}: #{inspect}"
|
94
|
+
raise IndexError.new(message)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
selected_columns << column
|
98
|
+
end
|
99
|
+
end
|
100
|
+
selected_columns = selected_columns.select(&block) if block_given?
|
101
|
+
end
|
102
|
+
self.class.new(selected_columns)
|
103
|
+
end
|
104
|
+
|
105
|
+
# @overload [](name)
|
106
|
+
# Find a column that has the given name.
|
107
|
+
#
|
108
|
+
# @param name [String, Symbol] The column name to be found.
|
109
|
+
# @return [Column] The found column.
|
110
|
+
# @see #find_column
|
111
|
+
#
|
112
|
+
# @overload [](index)
|
113
|
+
# Find the `index`-th column.
|
114
|
+
#
|
115
|
+
# @param index [Integer] The index to be found.
|
116
|
+
# @return [Column] The found column.
|
117
|
+
# @see #find_column
|
118
|
+
#
|
119
|
+
# @overload [](range)
|
120
|
+
# Selects columns that are in `range` and creates a new container
|
121
|
+
# only with the selected columns.
|
122
|
+
#
|
123
|
+
# @param range [Range] The range to be selected.
|
124
|
+
# @return [self.class] The newly created container that only has selected
|
125
|
+
# columns.
|
126
|
+
# @see #select_columns
|
127
|
+
#
|
128
|
+
# @overload [](selectors)
|
129
|
+
# Selects columns that are selected by `selectors` and creates a
|
130
|
+
# new container only with the selected columns.
|
131
|
+
#
|
132
|
+
# @param selectors [Array] The selectors that are used to select columns.
|
133
|
+
# @return [self.class] The newly created container that only has selected
|
134
|
+
# columns.
|
135
|
+
# @see #select_columns
|
136
|
+
def [](selector)
|
137
|
+
case selector
|
138
|
+
when ::Array
|
139
|
+
select_columns(*selector)
|
140
|
+
when Range
|
141
|
+
select_columns(selector)
|
142
|
+
else
|
143
|
+
find_column(selector)
|
144
|
+
end
|
145
|
+
end
|
47
146
|
end
|
48
147
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
module ConstructorArgumentsGCGuardable
|
20
|
+
def initialize(*args)
|
21
|
+
super
|
22
|
+
@arguments = args
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
module Arrow
|
19
19
|
class DataType
|
20
20
|
class << self
|
21
|
-
#
|
21
|
+
# Ensure returning suitable {Arrow::DataType}.
|
22
22
|
#
|
23
23
|
# @overload resolve(data_type)
|
24
24
|
#
|
@@ -31,17 +31,21 @@ module Arrow
|
|
31
31
|
#
|
32
32
|
# @overload resolve(name)
|
33
33
|
#
|
34
|
-
# Creates a suitable data type from type name. For
|
35
|
-
# you can create {Arrow::BooleanDataType} from
|
34
|
+
# Creates a suitable data type from the given type name. For
|
35
|
+
# example, you can create {Arrow::BooleanDataType} from
|
36
|
+
# `:boolean`.
|
36
37
|
#
|
37
38
|
# @param name [String, Symbol] The type name of the data type.
|
38
39
|
#
|
40
|
+
# @return [Arrow::DataType] A new suitable data type.
|
41
|
+
#
|
39
42
|
# @example Create a boolean data type
|
40
43
|
# Arrow::DataType.resolve(:boolean)
|
41
44
|
#
|
42
45
|
# @overload resolve(name_with_arguments)
|
43
46
|
#
|
44
|
-
# Creates a suitable data type from type name
|
47
|
+
# Creates a new suitable data type from the given type name
|
48
|
+
# with arguments.
|
45
49
|
#
|
46
50
|
# @param name_with_arguments [::Array<String, ...>]
|
47
51
|
# The type name of the data type as the first element.
|
@@ -51,6 +55,8 @@ module Arrow
|
|
51
55
|
# For example, {Arrow::TimestampDataType} needs unit as
|
52
56
|
# additional information.
|
53
57
|
#
|
58
|
+
# @return [Arrow::DataType] A new suitable data type.
|
59
|
+
#
|
54
60
|
# @example Create a boolean data type
|
55
61
|
# Arrow::DataType.resolve([:boolean])
|
56
62
|
#
|
@@ -59,7 +65,8 @@ module Arrow
|
|
59
65
|
#
|
60
66
|
# @overload resolve(description)
|
61
67
|
#
|
62
|
-
# Creates a suitable data type from data type
|
68
|
+
# Creates a new suitable data type from the given data type
|
69
|
+
# description.
|
63
70
|
#
|
64
71
|
# Data type description is a raw `Hash`. Data type description
|
65
72
|
# must have `:type` value. `:type` is the type of the data type.
|
@@ -74,6 +81,8 @@ module Arrow
|
|
74
81
|
# @option description [String, Symbol] :type The type name of
|
75
82
|
# the data type.
|
76
83
|
#
|
84
|
+
# @return [Arrow::DataType] A new suitable data type.
|
85
|
+
#
|
77
86
|
# @example Create a boolean data type
|
78
87
|
# Arrow::DataType.resolve(type: :boolean)
|
79
88
|
#
|
data/lib/arrow/datum.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Datum
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Table
|
25
|
+
TableDatum.new(value)
|
26
|
+
when Array
|
27
|
+
ArrayDatum.new(value)
|
28
|
+
when ChunkedArray
|
29
|
+
ChunkedArrayDatum.new(value)
|
30
|
+
when Scalar
|
31
|
+
ScalarDatum.new(value)
|
32
|
+
when ::Array
|
33
|
+
ArrayDatum.new(ArrayBuilder.build(value))
|
34
|
+
when Integer
|
35
|
+
case value
|
36
|
+
when (0..((2 ** 8) - 1))
|
37
|
+
try_convert(UInt8Scalar.new(value))
|
38
|
+
when ((-(2 ** 7))..((2 ** 7) - 1))
|
39
|
+
try_convert(Int8Scalar.new(value))
|
40
|
+
when (0..((2 ** 16) - 1))
|
41
|
+
try_convert(UInt16Scalar.new(value))
|
42
|
+
when ((-(2 ** 15))..((2 ** 15) - 1))
|
43
|
+
try_convert(Int16Scalar.new(value))
|
44
|
+
when (0..((2 ** 32) - 1))
|
45
|
+
try_convert(UInt32Scalar.new(value))
|
46
|
+
when ((-(2 ** 31))..((2 ** 31) - 1))
|
47
|
+
try_convert(Int32Scalar.new(value))
|
48
|
+
when (0..((2 ** 64) - 1))
|
49
|
+
try_convert(UInt64Scalar.new(value))
|
50
|
+
when ((-(2 ** 63))..((2 ** 63) - 1))
|
51
|
+
try_convert(Int64Scalar.new(value))
|
52
|
+
else
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
when Float
|
56
|
+
try_convert(DoubleScalar.new(value))
|
57
|
+
when true, false
|
58
|
+
try_convert(BooleanScalar.new(value))
|
59
|
+
when String
|
60
|
+
if value.ascii_only? or value.encoding == Encoding::UTF_8
|
61
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
62
|
+
try_convert(StringScalar.new(value))
|
63
|
+
else
|
64
|
+
try_convert(LargeStringScalar.new(value))
|
65
|
+
end
|
66
|
+
else
|
67
|
+
if value.bytesize <= ((2 ** 31) - 1)
|
68
|
+
try_convert(BinaryScalar.new(value))
|
69
|
+
else
|
70
|
+
try_convert(LargeBinaryScalar.new(value))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
when Date
|
74
|
+
date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
|
75
|
+
try_convert(Date32Scalar.new(date32_value))
|
76
|
+
when Time
|
77
|
+
case value.unit
|
78
|
+
when TimeUnit::SECOND, TimeUnit::MILLI
|
79
|
+
data_type = Time32DataType.new(value.unit)
|
80
|
+
scalar_class = Time32Scalar
|
81
|
+
else
|
82
|
+
data_type = Time64DataType.new(value.unit)
|
83
|
+
scalar_class = Time64Scalar
|
84
|
+
end
|
85
|
+
try_convert(scalar_class.new(data_type, value.value))
|
86
|
+
when ::Time
|
87
|
+
data_type = TimestampDataType.new(:nano)
|
88
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
89
|
+
try_convert(TimestampScalar.new(data_type, timestamp_value))
|
90
|
+
when Decimal128
|
91
|
+
data_type = TimestampDataType.new(:nano)
|
92
|
+
timestamp_value = value.to_i * 1_000_000_000 + value.nsec
|
93
|
+
try_convert(Decimal128Scalar.new(data_type, timestamp_value))
|
94
|
+
else
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -33,7 +33,7 @@ module Arrow
|
|
33
33
|
# @param type_codes [::Array<Integer>] The IDs that indicates
|
34
34
|
# corresponding fields.
|
35
35
|
#
|
36
|
-
# @example Create a dense union data type for {2: visible, 9: count}
|
36
|
+
# @example Create a dense union data type for `{2: visible, 9: count}`
|
37
37
|
# fields = [
|
38
38
|
# Arrow::Field.new("visible", :boolean),
|
39
39
|
# {
|
@@ -57,7 +57,7 @@ module Arrow
|
|
57
57
|
# @option description [::Array<Integer>] :type_codes The IDs
|
58
58
|
# that indicates corresponding fields.
|
59
59
|
#
|
60
|
-
# @example Create a dense union data type for {2: visible, 9: count}
|
60
|
+
# @example Create a dense union data type for `{2: visible, 9: count}`
|
61
61
|
# fields = [
|
62
62
|
# Arrow::Field.new("visible", :boolean),
|
63
63
|
# {
|
@@ -50,7 +50,7 @@ module Arrow
|
|
50
50
|
# @param ordered [Boolean] Whether dictionary contents are
|
51
51
|
# ordered or not.
|
52
52
|
#
|
53
|
-
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
53
|
+
# @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
|
54
54
|
# index_data_type = :int8
|
55
55
|
# value_data_type = :string
|
56
56
|
# ordered = true
|
@@ -91,7 +91,7 @@ module Arrow
|
|
91
91
|
# @option description [Boolean] :ordered Whether dictionary
|
92
92
|
# contents are ordered or not.
|
93
93
|
#
|
94
|
-
# @example Create a dictionary data type for {0: "Hello", 1: "World"}
|
94
|
+
# @example Create a dictionary data type for `{0: "Hello", 1: "World"}`
|
95
95
|
# Arrow::DictionaryDataType.new(index_data_type: :int8,
|
96
96
|
# value_data_type: :string,
|
97
97
|
# ordered: true)
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class EqualOptions
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Hash
|
25
|
+
options = new
|
26
|
+
value.each do |k, v|
|
27
|
+
setter = :"#{k}="
|
28
|
+
return unless options.respond_to?(setter)
|
29
|
+
options.__send__(setter, v)
|
30
|
+
end
|
31
|
+
options
|
32
|
+
else
|
33
|
+
nil
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Expression
|
20
|
+
class << self
|
21
|
+
# @api private
|
22
|
+
def try_convert(value)
|
23
|
+
case value
|
24
|
+
when Symbol
|
25
|
+
FieldExpression.new(value.to_s)
|
26
|
+
when ::Array
|
27
|
+
function_name, *arguments = value
|
28
|
+
case function_name
|
29
|
+
when String, Symbol
|
30
|
+
function_name = function_name.to_s
|
31
|
+
else
|
32
|
+
return nil
|
33
|
+
end
|
34
|
+
if arguments.last.is_a?(FunctionOptions)
|
35
|
+
options = arguments.pop
|
36
|
+
else
|
37
|
+
options = nil
|
38
|
+
end
|
39
|
+
CallExpression.new(function_name, arguments, options)
|
40
|
+
else
|
41
|
+
datum = Datum.try_convert(value)
|
42
|
+
return nil if datum.nil?
|
43
|
+
LiteralExpression.new(datum)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|