red-arrow 2.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +3 -0
  3. data/ext/arrow/converters.hpp +15 -2
  4. data/ext/arrow/memory-view.cpp +311 -0
  5. data/ext/arrow/memory-view.hpp +26 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/array-builder.rb +11 -6
  9. data/lib/arrow/array.rb +130 -0
  10. data/lib/arrow/bigdecimal-extension.rb +5 -1
  11. data/lib/arrow/buffer.rb +10 -6
  12. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  13. data/lib/arrow/data-type.rb +14 -5
  14. data/lib/arrow/datum.rb +98 -0
  15. data/lib/arrow/decimal128-array-builder.rb +21 -25
  16. data/lib/arrow/decimal128-data-type.rb +2 -0
  17. data/lib/arrow/decimal128.rb +18 -0
  18. data/lib/arrow/decimal256-array-builder.rb +61 -0
  19. data/lib/arrow/decimal256-array.rb +25 -0
  20. data/lib/arrow/decimal256-data-type.rb +73 -0
  21. data/lib/arrow/decimal256.rb +60 -0
  22. data/lib/arrow/dense-union-data-type.rb +2 -2
  23. data/lib/arrow/dictionary-data-type.rb +2 -2
  24. data/lib/arrow/equal-options.rb +38 -0
  25. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  26. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/scalar.rb +32 -0
  29. data/lib/arrow/sort-key.rb +193 -0
  30. data/lib/arrow/sort-options.rb +109 -0
  31. data/lib/arrow/sparse-union-data-type.rb +2 -2
  32. data/lib/arrow/table.rb +2 -2
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +3 -1
  38. data/test/helper.rb +1 -0
  39. data/test/raw-records/test-basic-arrays.rb +17 -0
  40. data/test/raw-records/test-dense-union-array.rb +14 -0
  41. data/test/raw-records/test-list-array.rb +20 -0
  42. data/test/raw-records/test-sparse-union-array.rb +14 -0
  43. data/test/raw-records/test-struct-array.rb +15 -0
  44. data/test/test-array.rb +156 -2
  45. data/test/test-bigdecimal.rb +20 -3
  46. data/test/test-boolean-scalar.rb +26 -0
  47. data/test/test-decimal128-array-builder.rb +18 -1
  48. data/test/test-decimal128-data-type.rb +2 -2
  49. data/test/test-decimal128.rb +38 -0
  50. data/test/test-decimal256-array-builder.rb +112 -0
  51. data/test/test-decimal256-array.rb +38 -0
  52. data/test/test-decimal256-data-type.rb +31 -0
  53. data/test/test-decimal256.rb +102 -0
  54. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  55. data/test/test-fixed-size-binary-array.rb +36 -0
  56. data/test/test-float-scalar.rb +46 -0
  57. data/test/test-function.rb +176 -0
  58. data/test/test-memory-view.rb +434 -0
  59. data/test/test-orc.rb +19 -23
  60. data/test/test-sort-indices.rb +40 -0
  61. data/test/test-sort-key.rb +81 -0
  62. data/test/test-sort-options.rb +58 -0
  63. data/test/test-struct-array-builder.rb +8 -8
  64. data/test/test-struct-array.rb +2 -2
  65. data/test/values/test-basic-arrays.rb +11 -0
  66. data/test/values/test-dense-union-array.rb +14 -0
  67. data/test/values/test-list-array.rb +18 -0
  68. data/test/values/test-sparse-union-array.rb +14 -0
  69. data/test/values/test-struct-array.rb +15 -0
  70. metadata +127 -59
data/lib/arrow/array.rb CHANGED
@@ -55,6 +55,18 @@ module Arrow
55
55
  end
56
56
  end
57
57
 
58
+ # @param other [Arrow::Array] The array to be compared.
59
+ # @param options [Arrow::EqualOptions, Hash] (nil)
60
+ # The options to custom how to compare.
61
+ #
62
+ # @return [Boolean]
63
+ # `true` if both of them have the same data, `false` otherwise.
64
+ #
65
+ # @since 5.0.0
66
+ def equal_array?(other, options=nil)
67
+ equal_options(other, options)
68
+ end
69
+
58
70
  def each
59
71
  return to_enum(__method__) unless block_given?
60
72
 
@@ -100,5 +112,123 @@ module Arrow
100
112
  is_in_raw(values)
101
113
  end
102
114
  end
115
+
116
+ # @api private
117
+ alias_method :concatenate_raw, :concatenate
118
+ # Concatenates the given other arrays to the array.
119
+ #
120
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
121
+ # concatenated.
122
+ #
123
+ # Each other array is processed by {#resolve} before they're
124
+ # concatenated.
125
+ #
126
+ # @example Raw Ruby Array
127
+ # array = Arrow::Int32Array.new([1])
128
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
129
+ #
130
+ # @example Arrow::Array
131
+ # array = Arrow::Int32Array.new([1])
132
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
133
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
134
+ #
135
+ # @since 4.0.0
136
+ def concatenate(*other_arrays)
137
+ other_arrays = other_arrays.collect do |other_array|
138
+ resolve(other_array)
139
+ end
140
+ concatenate_raw(other_arrays)
141
+ end
142
+
143
+ # Concatenates the given other array to the array.
144
+ #
145
+ # If you have multiple arrays to be concatenated, you should use
146
+ # {#concatenate} to concatenate multiple arrays at once.
147
+ #
148
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
149
+ #
150
+ # `@other_array` is processed by {#resolve} before it's
151
+ # concatenated.
152
+ #
153
+ # @example Raw Ruby Array
154
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
155
+ #
156
+ # @example Arrow::Array
157
+ # Arrow::Int32Array.new([1]) +
158
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
159
+ #
160
+ # @since 4.0.0
161
+ def +(other_array)
162
+ concatenate(other_array)
163
+ end
164
+
165
+ # Ensures returning the same data type array from the given array.
166
+ #
167
+ # @return [Arrow::Array]
168
+ #
169
+ # @overload resolve(other_raw_array)
170
+ #
171
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
172
+ # is built by `self.class.new`.
173
+ #
174
+ # @example Raw Ruby Array
175
+ # int32_array = Arrow::Int32Array.new([1])
176
+ # other_array = int32_array.resolve([2, 3, 4])
177
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
178
+ #
179
+ # @overload resolve(other_array)
180
+ #
181
+ # @param other_array [Arrow::Array] Another Arrow::Array.
182
+ #
183
+ # If the given other array is an same data type array of
184
+ # `self`, the given other array is returned as-is.
185
+ #
186
+ # If the given other array isn't an same data type array of
187
+ # `self`, the given other array is casted.
188
+ #
189
+ # @example Same data type
190
+ # int32_array = Arrow::Int32Array.new([1])
191
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
192
+ # other_array = int32_array.resolve(other_int32_array)
193
+ # other_array.object_id == other_int32_array.object_id
194
+ #
195
+ # @example Other data type
196
+ # int32_array = Arrow::Int32Array.new([1])
197
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
198
+ # other_array = int32_array.resolve(other_int32_array)
199
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
200
+ #
201
+ # @since 4.0.0
202
+ def resolve(other_array)
203
+ if other_array.is_a?(::Array)
204
+ builder_class = self.class.builder_class
205
+ if builder_class.nil?
206
+ message =
207
+ "[array][resolve] can't build #{value_data_type} array " +
208
+ "from raw Ruby Array"
209
+ raise ArgumentError, message
210
+ end
211
+ if builder_class.buildable?([other_array])
212
+ other_array = builder_class.build(other_array)
213
+ elsif builder_class.buildable?([value_data_type, other_array])
214
+ other_array = builder_class.build(value_data_type, other_array)
215
+ else
216
+ message =
217
+ "[array][resolve] need to implement " +
218
+ "a feature that building #{value_data_type} array " +
219
+ "from raw Ruby Array"
220
+ raise NotImpelemented, message
221
+ end
222
+ other_array
223
+ elsif other_array.respond_to?(:value_data_type)
224
+ return other_array if value_data_type == other_array.value_data_type
225
+ other_array.cast(value_data_type)
226
+ else
227
+ message =
228
+ "[array][resolve] can't build #{value_data_type} array: " +
229
+ "#{other_array.inspect}"
230
+ raise ArgumentError, message
231
+ end
232
+ end
103
233
  end
104
234
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
data/lib/arrow/buffer.rb CHANGED
@@ -17,12 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Buffer
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- def initialize(data)
24
- @data = data
25
- initialize_raw(data)
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when String
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module ConstructorArgumentsGCGuardable
20
+ def initialize(*args)
21
+ super
22
+ @arguments = args
23
+ end
24
+ end
25
+ end
@@ -18,7 +18,7 @@
18
18
  module Arrow
19
19
  class DataType
20
20
  class << self
21
- # Creates a new suitable {Arrow::DataType}.
21
+ # Ensure returning suitable {Arrow::DataType}.
22
22
  #
23
23
  # @overload resolve(data_type)
24
24
  #
@@ -31,17 +31,21 @@ module Arrow
31
31
  #
32
32
  # @overload resolve(name)
33
33
  #
34
- # Creates a suitable data type from type name. For example,
35
- # you can create {Arrow::BooleanDataType} from `:boolean`.
34
+ # Creates a suitable data type from the given type name. For
35
+ # example, you can create {Arrow::BooleanDataType} from
36
+ # `:boolean`.
36
37
  #
37
38
  # @param name [String, Symbol] The type name of the data type.
38
39
  #
40
+ # @return [Arrow::DataType] A new suitable data type.
41
+ #
39
42
  # @example Create a boolean data type
40
43
  # Arrow::DataType.resolve(:boolean)
41
44
  #
42
45
  # @overload resolve(name_with_arguments)
43
46
  #
44
- # Creates a suitable data type from type name with arguments.
47
+ # Creates a new suitable data type from the given type name
48
+ # with arguments.
45
49
  #
46
50
  # @param name_with_arguments [::Array<String, ...>]
47
51
  # The type name of the data type as the first element.
@@ -51,6 +55,8 @@ module Arrow
51
55
  # For example, {Arrow::TimestampDataType} needs unit as
52
56
  # additional information.
53
57
  #
58
+ # @return [Arrow::DataType] A new suitable data type.
59
+ #
54
60
  # @example Create a boolean data type
55
61
  # Arrow::DataType.resolve([:boolean])
56
62
  #
@@ -59,7 +65,8 @@ module Arrow
59
65
  #
60
66
  # @overload resolve(description)
61
67
  #
62
- # Creates a suitable data type from data type description.
68
+ # Creates a new suitable data type from the given data type
69
+ # description.
63
70
  #
64
71
  # Data type description is a raw `Hash`. Data type description
65
72
  # must have `:type` value. `:type` is the type of the data type.
@@ -74,6 +81,8 @@ module Arrow
74
81
  # @option description [String, Symbol] :type The type name of
75
82
  # the data type.
76
83
  #
84
+ # @return [Arrow::DataType] A new suitable data type.
85
+ #
77
86
  # @example Create a boolean data type
78
87
  # Arrow::DataType.resolve(type: :boolean)
79
88
  #
@@ -0,0 +1,98 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Datum
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Array
25
+ ArrayDatum.new(value)
26
+ when ChunkedArray
27
+ ChunkedArrayDatum.new(value)
28
+ when Scalar
29
+ ScalarDatum.new(value)
30
+ when ::Array
31
+ ArrayDatum.new(ArrayBuilder.build(value))
32
+ when Integer
33
+ case value
34
+ when (0..((2 ** 8) - 1))
35
+ try_convert(UInt8Scalar.new(value))
36
+ when ((-(2 ** 7))..((2 ** 7) - 1))
37
+ try_convert(Int8Scalar.new(value))
38
+ when (0..((2 ** 16) - 1))
39
+ try_convert(UInt16Scalar.new(value))
40
+ when ((-(2 ** 15))..((2 ** 15) - 1))
41
+ try_convert(Int16Scalar.new(value))
42
+ when (0..((2 ** 32) - 1))
43
+ try_convert(UInt32Scalar.new(value))
44
+ when ((-(2 ** 31))..((2 ** 31) - 1))
45
+ try_convert(Int32Scalar.new(value))
46
+ when (0..((2 ** 64) - 1))
47
+ try_convert(UInt64Scalar.new(value))
48
+ when ((-(2 ** 63))..((2 ** 63) - 1))
49
+ try_convert(Int64Scalar.new(value))
50
+ else
51
+ nil
52
+ end
53
+ when Float
54
+ try_convert(DoubleScalar.new(value))
55
+ when true, false
56
+ try_convert(BooleanScalar.new(value))
57
+ when String
58
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
59
+ if value.bytesize <= ((2 ** 31) - 1)
60
+ try_convert(StringScalar.new(value))
61
+ else
62
+ try_convert(LargeStringScalar.new(value))
63
+ end
64
+ else
65
+ if value.bytesize <= ((2 ** 31) - 1)
66
+ try_convert(BinaryScalar.new(value))
67
+ else
68
+ try_convert(LargeBinaryScalar.new(value))
69
+ end
70
+ end
71
+ when Date
72
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
73
+ try_convert(Date32Scalar.new(date32_value))
74
+ when Time
75
+ case value.unit
76
+ when TimeUnit::SECOND, TimeUnit::MILLI
77
+ data_type = Time32DataType.new(value.unit)
78
+ scalar_class = Time32Scalar
79
+ else
80
+ data_type = Time64DataType.new(value.unit)
81
+ scalar_class = Time64Scalar
82
+ end
83
+ try_convert(scalar_class.new(data_type, value.value))
84
+ when ::Time
85
+ data_type = TimestampDataType.new(:nano)
86
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
87
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
88
+ when Decimal128
89
+ data_type = TimestampDataType.new(:nano)
90
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
91
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
92
+ else
93
+ nil
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end