red-arrow 1.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +15 -2
- data/ext/arrow/extconf.rb +14 -3
- data/ext/arrow/raw-records.cpp +1 -0
- data/ext/arrow/values.cpp +1 -0
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/array.rb +118 -0
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/data-type.rb +14 -5
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dense-union-data-type.rb +2 -2
- data/lib/arrow/dictionary-data-type.rb +2 -2
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +15 -0
- data/lib/arrow/sort-key.rb +193 -0
- data/lib/arrow/sort-options.rb +109 -0
- data/lib/arrow/sparse-union-data-type.rb +2 -2
- data/lib/arrow/time32-data-type.rb +2 -2
- data/lib/arrow/time64-data-type.rb +2 -2
- data/lib/arrow/timestamp-data-type.rb +2 -2
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +14 -0
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +14 -0
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +122 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128-data-type.rb +2 -2
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-orc.rb +19 -23
- data/test/test-sort-indices.rb +40 -0
- data/test/test-sort-key.rb +81 -0
- data/test/test-sort-options.rb +58 -0
- data/test/test-struct-array-builder.rb +8 -8
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +14 -0
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +14 -0
- data/test/values/test-struct-array.rb +15 -0
- metadata +101 -61
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: df66b2dac421ab800bd00510e4a13e250646d1f1c27541f2ca44cd04be38a3ba
|
4
|
+
data.tar.gz: 3c39ec6a1a3ee48708a28583c125d96f9195e9973f6f2f3707cd923eca915329
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9ebf50ffecfdea79140b158f5aeea0cc3bb63403e405bcbba2b34e288b8575df3df859b8f46eb12e26199e566237016d0e3907acc1d7c0d16e82d638c7bdce2e
|
7
|
+
data.tar.gz: 1ff1389d28db025efd82c87e9f85cfb7e3bce21abe85a1b943ca8536bbc1015b0015c05ab45e5d9a35914034a673da0bbc59f8211bb0b620ee4a7487f5fec413
|
data/ext/arrow/converters.hpp
CHANGED
@@ -212,7 +212,17 @@ namespace red_arrow {
|
|
212
212
|
|
213
213
|
inline VALUE convert(const arrow::Decimal128Array& array,
|
214
214
|
const int64_t i) {
|
215
|
-
|
215
|
+
return convert_decimal(std::move(array.FormatValue(i)));
|
216
|
+
}
|
217
|
+
|
218
|
+
inline VALUE convert(const arrow::Decimal256Array& array,
|
219
|
+
const int64_t i) {
|
220
|
+
return convert_decimal(std::move(array.FormatValue(i)));
|
221
|
+
}
|
222
|
+
|
223
|
+
private:
|
224
|
+
inline VALUE convert_decimal(std::string&& value) {
|
225
|
+
decimal_buffer_ = value;
|
216
226
|
return rb_funcall(rb_cObject,
|
217
227
|
id_BigDecimal,
|
218
228
|
1,
|
@@ -221,7 +231,6 @@ namespace red_arrow {
|
|
221
231
|
rb_ascii8bit_encoding()));
|
222
232
|
}
|
223
233
|
|
224
|
-
private:
|
225
234
|
std::string decimal_buffer_;
|
226
235
|
ListArrayValueConverter* list_array_value_converter_;
|
227
236
|
StructArrayValueConverter* struct_array_value_converter_;
|
@@ -289,6 +298,7 @@ namespace red_arrow {
|
|
289
298
|
VISIT(DenseUnion)
|
290
299
|
VISIT(Dictionary)
|
291
300
|
VISIT(Decimal128)
|
301
|
+
VISIT(Decimal256)
|
292
302
|
// TODO
|
293
303
|
// VISIT(Extension)
|
294
304
|
|
@@ -393,6 +403,7 @@ namespace red_arrow {
|
|
393
403
|
VISIT(DenseUnion)
|
394
404
|
VISIT(Dictionary)
|
395
405
|
VISIT(Decimal128)
|
406
|
+
VISIT(Decimal256)
|
396
407
|
// TODO
|
397
408
|
// VISIT(Extension)
|
398
409
|
|
@@ -485,6 +496,7 @@ namespace red_arrow {
|
|
485
496
|
VISIT(DenseUnion)
|
486
497
|
VISIT(Dictionary)
|
487
498
|
VISIT(Decimal128)
|
499
|
+
VISIT(Decimal256)
|
488
500
|
// TODO
|
489
501
|
// VISIT(Extension)
|
490
502
|
|
@@ -609,6 +621,7 @@ namespace red_arrow {
|
|
609
621
|
VISIT(DenseUnion)
|
610
622
|
VISIT(Dictionary)
|
611
623
|
VISIT(Decimal128)
|
624
|
+
VISIT(Decimal256)
|
612
625
|
// TODO
|
613
626
|
// VISIT(Extension)
|
614
627
|
|
data/ext/arrow/extconf.rb
CHANGED
@@ -16,7 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
require "extpp"
|
19
|
-
require "mkmf-
|
19
|
+
require "mkmf-gnome"
|
20
|
+
require_relative "../../lib/arrow/version"
|
20
21
|
|
21
22
|
arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
|
22
23
|
if arrow_pkg_config_path
|
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
|
|
24
25
|
ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
|
25
26
|
end
|
26
27
|
|
27
|
-
unless required_pkg_config_package(
|
28
|
+
unless required_pkg_config_package([
|
29
|
+
"arrow",
|
30
|
+
Arrow::Version::MAJOR,
|
31
|
+
Arrow::Version::MINOR,
|
32
|
+
Arrow::Version::MICRO,
|
33
|
+
],
|
28
34
|
debian: "libarrow-dev",
|
29
35
|
redhat: "arrow-devel",
|
30
36
|
homebrew: "apache-arrow",
|
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
|
|
32
38
|
exit(false)
|
33
39
|
end
|
34
40
|
|
35
|
-
unless required_pkg_config_package(
|
41
|
+
unless required_pkg_config_package([
|
42
|
+
"arrow-glib",
|
43
|
+
Arrow::Version::MAJOR,
|
44
|
+
Arrow::Version::MINOR,
|
45
|
+
Arrow::Version::MICRO,
|
46
|
+
],
|
36
47
|
debian: "libarrow-glib-dev",
|
37
48
|
redhat: "arrow-glib-devel",
|
38
49
|
homebrew: "apache-arrow-glib",
|
data/ext/arrow/raw-records.cpp
CHANGED
data/ext/arrow/values.cpp
CHANGED
data/lib/arrow/array-builder.rb
CHANGED
@@ -115,6 +115,17 @@ module Arrow
|
|
115
115
|
builder: Date32ArrayBuilder.new,
|
116
116
|
detected: true,
|
117
117
|
}
|
118
|
+
when BigDecimal
|
119
|
+
if value.to_arrow.is_a?(Decimal128)
|
120
|
+
{
|
121
|
+
builder: Decimal128ArrayBuilder.new,
|
122
|
+
}
|
123
|
+
else
|
124
|
+
{
|
125
|
+
builder: Decimal256ArrayBuilder.new,
|
126
|
+
detected: true,
|
127
|
+
}
|
128
|
+
end
|
118
129
|
when ::Array
|
119
130
|
sub_builder_info = nil
|
120
131
|
value.each do |sub_value|
|
@@ -194,11 +205,5 @@ module Arrow
|
|
194
205
|
end
|
195
206
|
end
|
196
207
|
end
|
197
|
-
|
198
|
-
def append_nulls(n)
|
199
|
-
n.times do
|
200
|
-
append_null
|
201
|
-
end
|
202
|
-
end
|
203
208
|
end
|
204
209
|
end
|
data/lib/arrow/array.rb
CHANGED
@@ -100,5 +100,123 @@ module Arrow
|
|
100
100
|
is_in_raw(values)
|
101
101
|
end
|
102
102
|
end
|
103
|
+
|
104
|
+
# @api private
|
105
|
+
alias_method :concatenate_raw, :concatenate
|
106
|
+
# Concatenates the given other arrays to the array.
|
107
|
+
#
|
108
|
+
# @param other_arrays [::Array, Arrow::Array] The arrays to be
|
109
|
+
# concatenated.
|
110
|
+
#
|
111
|
+
# Each other array is processed by {#resolve} before they're
|
112
|
+
# concatenated.
|
113
|
+
#
|
114
|
+
# @example Raw Ruby Array
|
115
|
+
# array = Arrow::Int32Array.new([1])
|
116
|
+
# array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
117
|
+
#
|
118
|
+
# @example Arrow::Array
|
119
|
+
# array = Arrow::Int32Array.new([1])
|
120
|
+
# array.concatenate(Arrow::Int32Array.new([2, 3]),
|
121
|
+
# Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
|
122
|
+
#
|
123
|
+
# @since 4.0.0
|
124
|
+
def concatenate(*other_arrays)
|
125
|
+
other_arrays = other_arrays.collect do |other_array|
|
126
|
+
resolve(other_array)
|
127
|
+
end
|
128
|
+
concatenate_raw(other_arrays)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Concatenates the given other array to the array.
|
132
|
+
#
|
133
|
+
# If you have multiple arrays to be concatenated, you should use
|
134
|
+
# {#concatenate} to concatenate multiple arrays at once.
|
135
|
+
#
|
136
|
+
# @param other_array [::Array, Arrow::Array] The array to be concatenated.
|
137
|
+
#
|
138
|
+
# `@other_array` is processed by {#resolve} before it's
|
139
|
+
# concatenated.
|
140
|
+
#
|
141
|
+
# @example Raw Ruby Array
|
142
|
+
# Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
|
143
|
+
#
|
144
|
+
# @example Arrow::Array
|
145
|
+
# Arrow::Int32Array.new([1]) +
|
146
|
+
# Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
|
147
|
+
#
|
148
|
+
# @since 4.0.0
|
149
|
+
def +(other_array)
|
150
|
+
concatenate(other_array)
|
151
|
+
end
|
152
|
+
|
153
|
+
# Ensures returning the same data type array from the given array.
|
154
|
+
#
|
155
|
+
# @return [Arrow::Array]
|
156
|
+
#
|
157
|
+
# @overload resolve(other_raw_array)
|
158
|
+
#
|
159
|
+
# @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
|
160
|
+
# is built by `self.class.new`.
|
161
|
+
#
|
162
|
+
# @example Raw Ruby Array
|
163
|
+
# int32_array = Arrow::Int32Array.new([1])
|
164
|
+
# other_array = int32_array.resolve([2, 3, 4])
|
165
|
+
# other_array # => Arrow::Int32Array.new([2, 3, 4])
|
166
|
+
#
|
167
|
+
# @overload resolve(other_array)
|
168
|
+
#
|
169
|
+
# @param other_array [Arrow::Array] Another Arrow::Array.
|
170
|
+
#
|
171
|
+
# If the given other array is an same data type array of
|
172
|
+
# `self`, the given other array is returned as-is.
|
173
|
+
#
|
174
|
+
# If the given other array isn't an same data type array of
|
175
|
+
# `self`, the given other array is casted.
|
176
|
+
#
|
177
|
+
# @example Same data type
|
178
|
+
# int32_array = Arrow::Int32Array.new([1])
|
179
|
+
# other_int32_array = Arrow::Int32Array.new([2, 3, 4])
|
180
|
+
# other_array = int32_array.resolve(other_int32_array)
|
181
|
+
# other_array.object_id == other_int32_array.object_id
|
182
|
+
#
|
183
|
+
# @example Other data type
|
184
|
+
# int32_array = Arrow::Int32Array.new([1])
|
185
|
+
# other_int8_array = Arrow::Int8Array.new([2, 3, 4])
|
186
|
+
# other_array = int32_array.resolve(other_int32_array)
|
187
|
+
# other_array #=> Arrow::Int32Array.new([2, 3, 4])
|
188
|
+
#
|
189
|
+
# @since 4.0.0
|
190
|
+
def resolve(other_array)
|
191
|
+
if other_array.is_a?(::Array)
|
192
|
+
builder_class = self.class.builder_class
|
193
|
+
if builder_class.nil?
|
194
|
+
message =
|
195
|
+
"[array][resolve] can't build #{value_data_type} array " +
|
196
|
+
"from raw Ruby Array"
|
197
|
+
raise ArgumentError, message
|
198
|
+
end
|
199
|
+
if builder_class.buildable?([other_array])
|
200
|
+
other_array = builder_class.build(other_array)
|
201
|
+
elsif builder_class.buildable?([value_data_type, other_array])
|
202
|
+
other_array = builder_class.build(value_data_type, other_array)
|
203
|
+
else
|
204
|
+
message =
|
205
|
+
"[array][resolve] need to implement " +
|
206
|
+
"a feature that building #{value_data_type} array " +
|
207
|
+
"from raw Ruby Array"
|
208
|
+
raise NotImpelemented, message
|
209
|
+
end
|
210
|
+
other_array
|
211
|
+
elsif other_array.respond_to?(:value_data_type)
|
212
|
+
return other_array if value_data_type == other_array.value_data_type
|
213
|
+
other_array.cast(value_data_type)
|
214
|
+
else
|
215
|
+
message =
|
216
|
+
"[array][resolve] can't build #{value_data_type} array: " +
|
217
|
+
"#{other_array.inspect}"
|
218
|
+
raise ArgumentError, message
|
219
|
+
end
|
220
|
+
end
|
103
221
|
end
|
104
222
|
end
|
data/lib/arrow/data-type.rb
CHANGED
@@ -18,7 +18,7 @@
|
|
18
18
|
module Arrow
|
19
19
|
class DataType
|
20
20
|
class << self
|
21
|
-
#
|
21
|
+
# Ensure returning suitable {Arrow::DataType}.
|
22
22
|
#
|
23
23
|
# @overload resolve(data_type)
|
24
24
|
#
|
@@ -31,17 +31,21 @@ module Arrow
|
|
31
31
|
#
|
32
32
|
# @overload resolve(name)
|
33
33
|
#
|
34
|
-
# Creates a suitable data type from type name. For
|
35
|
-
# you can create {Arrow::BooleanDataType} from
|
34
|
+
# Creates a suitable data type from the given type name. For
|
35
|
+
# example, you can create {Arrow::BooleanDataType} from
|
36
|
+
# `:boolean`.
|
36
37
|
#
|
37
38
|
# @param name [String, Symbol] The type name of the data type.
|
38
39
|
#
|
40
|
+
# @return [Arrow::DataType] A new suitable data type.
|
41
|
+
#
|
39
42
|
# @example Create a boolean data type
|
40
43
|
# Arrow::DataType.resolve(:boolean)
|
41
44
|
#
|
42
45
|
# @overload resolve(name_with_arguments)
|
43
46
|
#
|
44
|
-
# Creates a suitable data type from type name
|
47
|
+
# Creates a new suitable data type from the given type name
|
48
|
+
# with arguments.
|
45
49
|
#
|
46
50
|
# @param name_with_arguments [::Array<String, ...>]
|
47
51
|
# The type name of the data type as the first element.
|
@@ -51,6 +55,8 @@ module Arrow
|
|
51
55
|
# For example, {Arrow::TimestampDataType} needs unit as
|
52
56
|
# additional information.
|
53
57
|
#
|
58
|
+
# @return [Arrow::DataType] A new suitable data type.
|
59
|
+
#
|
54
60
|
# @example Create a boolean data type
|
55
61
|
# Arrow::DataType.resolve([:boolean])
|
56
62
|
#
|
@@ -59,7 +65,8 @@ module Arrow
|
|
59
65
|
#
|
60
66
|
# @overload resolve(description)
|
61
67
|
#
|
62
|
-
# Creates a suitable data type from data type
|
68
|
+
# Creates a new suitable data type from the given data type
|
69
|
+
# description.
|
63
70
|
#
|
64
71
|
# Data type description is a raw `Hash`. Data type description
|
65
72
|
# must have `:type` value. `:type` is the type of the data type.
|
@@ -74,6 +81,8 @@ module Arrow
|
|
74
81
|
# @option description [String, Symbol] :type The type name of
|
75
82
|
# the data type.
|
76
83
|
#
|
84
|
+
# @return [Arrow::DataType] A new suitable data type.
|
85
|
+
#
|
77
86
|
# @example Create a boolean data type
|
78
87
|
# Arrow::DataType.resolve(type: :boolean)
|
79
88
|
#
|
@@ -26,36 +26,32 @@ module Arrow
|
|
26
26
|
|
27
27
|
alias_method :append_value_raw, :append_value
|
28
28
|
def append_value(value)
|
29
|
-
|
30
|
-
when nil
|
31
|
-
return append_null
|
32
|
-
when String
|
33
|
-
value = Decimal128.new(value)
|
34
|
-
when Float
|
35
|
-
value = Decimal128.new(value.to_s)
|
36
|
-
when BigDecimal
|
37
|
-
value = value.to_arrow
|
38
|
-
end
|
39
|
-
append_value_raw(value)
|
29
|
+
append_value_raw(normalize_value(value))
|
40
30
|
end
|
41
31
|
|
32
|
+
alias_method :append_values_raw, :append_values
|
42
33
|
def append_values(values, is_valids=nil)
|
43
|
-
if
|
44
|
-
|
45
|
-
|
46
|
-
append_value(values[i])
|
47
|
-
else
|
48
|
-
append_null
|
49
|
-
end
|
34
|
+
if values.is_a?(::Array)
|
35
|
+
values = values.collect do |value|
|
36
|
+
normalize_value(value)
|
50
37
|
end
|
38
|
+
append_values_raw(values, is_valids)
|
51
39
|
else
|
52
|
-
values
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
40
|
+
append_values_packed(values, is_valids)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def normalize_value(value)
|
46
|
+
case value
|
47
|
+
when String
|
48
|
+
Decimal128.new(value)
|
49
|
+
when Float
|
50
|
+
Decimal128.new(value.to_s)
|
51
|
+
when BigDecimal
|
52
|
+
Decimal128.new(value.to_s)
|
53
|
+
else
|
54
|
+
value
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
data/lib/arrow/decimal128.rb
CHANGED
@@ -38,5 +38,23 @@ module Arrow
|
|
38
38
|
to_s_raw
|
39
39
|
end
|
40
40
|
end
|
41
|
+
|
42
|
+
alias_method :abs!, :abs
|
43
|
+
|
44
|
+
# @since 3.0.0
|
45
|
+
def abs
|
46
|
+
copied = dup
|
47
|
+
copied.abs!
|
48
|
+
copied
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :negate!, :negate
|
52
|
+
|
53
|
+
# @since 3.0.0
|
54
|
+
def negate
|
55
|
+
copied = dup
|
56
|
+
copied.negate!
|
57
|
+
copied
|
58
|
+
end
|
41
59
|
end
|
42
60
|
end
|