red-arrow 2.0.0 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +3 -0
  3. data/ext/arrow/converters.hpp +15 -2
  4. data/ext/arrow/memory-view.cpp +311 -0
  5. data/ext/arrow/memory-view.hpp +26 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/array-builder.rb +11 -6
  9. data/lib/arrow/array.rb +130 -0
  10. data/lib/arrow/bigdecimal-extension.rb +5 -1
  11. data/lib/arrow/buffer.rb +10 -6
  12. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  13. data/lib/arrow/data-type.rb +14 -5
  14. data/lib/arrow/datum.rb +98 -0
  15. data/lib/arrow/decimal128-array-builder.rb +21 -25
  16. data/lib/arrow/decimal128-data-type.rb +2 -0
  17. data/lib/arrow/decimal128.rb +18 -0
  18. data/lib/arrow/decimal256-array-builder.rb +61 -0
  19. data/lib/arrow/decimal256-array.rb +25 -0
  20. data/lib/arrow/decimal256-data-type.rb +73 -0
  21. data/lib/arrow/decimal256.rb +60 -0
  22. data/lib/arrow/dense-union-data-type.rb +2 -2
  23. data/lib/arrow/dictionary-data-type.rb +2 -2
  24. data/lib/arrow/equal-options.rb +38 -0
  25. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  26. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/scalar.rb +32 -0
  29. data/lib/arrow/sort-key.rb +193 -0
  30. data/lib/arrow/sort-options.rb +109 -0
  31. data/lib/arrow/sparse-union-data-type.rb +2 -2
  32. data/lib/arrow/table.rb +2 -2
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +3 -1
  38. data/test/helper.rb +1 -0
  39. data/test/raw-records/test-basic-arrays.rb +17 -0
  40. data/test/raw-records/test-dense-union-array.rb +14 -0
  41. data/test/raw-records/test-list-array.rb +20 -0
  42. data/test/raw-records/test-sparse-union-array.rb +14 -0
  43. data/test/raw-records/test-struct-array.rb +15 -0
  44. data/test/test-array.rb +156 -2
  45. data/test/test-bigdecimal.rb +20 -3
  46. data/test/test-boolean-scalar.rb +26 -0
  47. data/test/test-decimal128-array-builder.rb +18 -1
  48. data/test/test-decimal128-data-type.rb +2 -2
  49. data/test/test-decimal128.rb +38 -0
  50. data/test/test-decimal256-array-builder.rb +112 -0
  51. data/test/test-decimal256-array.rb +38 -0
  52. data/test/test-decimal256-data-type.rb +31 -0
  53. data/test/test-decimal256.rb +102 -0
  54. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  55. data/test/test-fixed-size-binary-array.rb +36 -0
  56. data/test/test-float-scalar.rb +46 -0
  57. data/test/test-function.rb +176 -0
  58. data/test/test-memory-view.rb +434 -0
  59. data/test/test-orc.rb +19 -23
  60. data/test/test-sort-indices.rb +40 -0
  61. data/test/test-sort-key.rb +81 -0
  62. data/test/test-sort-options.rb +58 -0
  63. data/test/test-struct-array-builder.rb +8 -8
  64. data/test/test-struct-array.rb +2 -2
  65. data/test/values/test-basic-arrays.rb +11 -0
  66. data/test/values/test-dense-union-array.rb +14 -0
  67. data/test/values/test-list-array.rb +18 -0
  68. data/test/values/test-sparse-union-array.rb +14 -0
  69. data/test/values/test-struct-array.rb +15 -0
  70. metadata +127 -59
data/lib/arrow/array.rb CHANGED
@@ -55,6 +55,18 @@ module Arrow
55
55
  end
56
56
  end
57
57
 
58
+ # @param other [Arrow::Array] The array to be compared.
59
+ # @param options [Arrow::EqualOptions, Hash] (nil)
60
+ # The options to custom how to compare.
61
+ #
62
+ # @return [Boolean]
63
+ # `true` if both of them have the same data, `false` otherwise.
64
+ #
65
+ # @since 5.0.0
66
+ def equal_array?(other, options=nil)
67
+ equal_options(other, options)
68
+ end
69
+
58
70
  def each
59
71
  return to_enum(__method__) unless block_given?
60
72
 
@@ -100,5 +112,123 @@ module Arrow
100
112
  is_in_raw(values)
101
113
  end
102
114
  end
115
+
116
+ # @api private
117
+ alias_method :concatenate_raw, :concatenate
118
+ # Concatenates the given other arrays to the array.
119
+ #
120
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
121
+ # concatenated.
122
+ #
123
+ # Each other array is processed by {#resolve} before they're
124
+ # concatenated.
125
+ #
126
+ # @example Raw Ruby Array
127
+ # array = Arrow::Int32Array.new([1])
128
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
129
+ #
130
+ # @example Arrow::Array
131
+ # array = Arrow::Int32Array.new([1])
132
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
133
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
134
+ #
135
+ # @since 4.0.0
136
+ def concatenate(*other_arrays)
137
+ other_arrays = other_arrays.collect do |other_array|
138
+ resolve(other_array)
139
+ end
140
+ concatenate_raw(other_arrays)
141
+ end
142
+
143
+ # Concatenates the given other array to the array.
144
+ #
145
+ # If you have multiple arrays to be concatenated, you should use
146
+ # {#concatenate} to concatenate multiple arrays at once.
147
+ #
148
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
149
+ #
150
+ # `@other_array` is processed by {#resolve} before it's
151
+ # concatenated.
152
+ #
153
+ # @example Raw Ruby Array
154
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
155
+ #
156
+ # @example Arrow::Array
157
+ # Arrow::Int32Array.new([1]) +
158
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
159
+ #
160
+ # @since 4.0.0
161
+ def +(other_array)
162
+ concatenate(other_array)
163
+ end
164
+
165
+ # Ensures returning the same data type array from the given array.
166
+ #
167
+ # @return [Arrow::Array]
168
+ #
169
+ # @overload resolve(other_raw_array)
170
+ #
171
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
172
+ # is built by `self.class.new`.
173
+ #
174
+ # @example Raw Ruby Array
175
+ # int32_array = Arrow::Int32Array.new([1])
176
+ # other_array = int32_array.resolve([2, 3, 4])
177
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
178
+ #
179
+ # @overload resolve(other_array)
180
+ #
181
+ # @param other_array [Arrow::Array] Another Arrow::Array.
182
+ #
183
+ # If the given other array is an same data type array of
184
+ # `self`, the given other array is returned as-is.
185
+ #
186
+ # If the given other array isn't an same data type array of
187
+ # `self`, the given other array is casted.
188
+ #
189
+ # @example Same data type
190
+ # int32_array = Arrow::Int32Array.new([1])
191
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
192
+ # other_array = int32_array.resolve(other_int32_array)
193
+ # other_array.object_id == other_int32_array.object_id
194
+ #
195
+ # @example Other data type
196
+ # int32_array = Arrow::Int32Array.new([1])
197
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
198
+ # other_array = int32_array.resolve(other_int32_array)
199
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
200
+ #
201
+ # @since 4.0.0
202
+ def resolve(other_array)
203
+ if other_array.is_a?(::Array)
204
+ builder_class = self.class.builder_class
205
+ if builder_class.nil?
206
+ message =
207
+ "[array][resolve] can't build #{value_data_type} array " +
208
+ "from raw Ruby Array"
209
+ raise ArgumentError, message
210
+ end
211
+ if builder_class.buildable?([other_array])
212
+ other_array = builder_class.build(other_array)
213
+ elsif builder_class.buildable?([value_data_type, other_array])
214
+ other_array = builder_class.build(value_data_type, other_array)
215
+ else
216
+ message =
217
+ "[array][resolve] need to implement " +
218
+ "a feature that building #{value_data_type} array " +
219
+ "from raw Ruby Array"
220
+ raise NotImpelemented, message
221
+ end
222
+ other_array
223
+ elsif other_array.respond_to?(:value_data_type)
224
+ return other_array if value_data_type == other_array.value_data_type
225
+ other_array.cast(value_data_type)
226
+ else
227
+ message =
228
+ "[array][resolve] can't build #{value_data_type} array: " +
229
+ "#{other_array.inspect}"
230
+ raise ArgumentError, message
231
+ end
232
+ end
103
233
  end
104
234
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
data/lib/arrow/buffer.rb CHANGED
@@ -17,12 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Buffer
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- def initialize(data)
24
- @data = data
25
- initialize_raw(data)
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when String
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module ConstructorArgumentsGCGuardable
20
+ def initialize(*args)
21
+ super
22
+ @arguments = args
23
+ end
24
+ end
25
+ end
@@ -18,7 +18,7 @@
18
18
  module Arrow
19
19
  class DataType
20
20
  class << self
21
- # Creates a new suitable {Arrow::DataType}.
21
+ # Ensure returning suitable {Arrow::DataType}.
22
22
  #
23
23
  # @overload resolve(data_type)
24
24
  #
@@ -31,17 +31,21 @@ module Arrow
31
31
  #
32
32
  # @overload resolve(name)
33
33
  #
34
- # Creates a suitable data type from type name. For example,
35
- # you can create {Arrow::BooleanDataType} from `:boolean`.
34
+ # Creates a suitable data type from the given type name. For
35
+ # example, you can create {Arrow::BooleanDataType} from
36
+ # `:boolean`.
36
37
  #
37
38
  # @param name [String, Symbol] The type name of the data type.
38
39
  #
40
+ # @return [Arrow::DataType] A new suitable data type.
41
+ #
39
42
  # @example Create a boolean data type
40
43
  # Arrow::DataType.resolve(:boolean)
41
44
  #
42
45
  # @overload resolve(name_with_arguments)
43
46
  #
44
- # Creates a suitable data type from type name with arguments.
47
+ # Creates a new suitable data type from the given type name
48
+ # with arguments.
45
49
  #
46
50
  # @param name_with_arguments [::Array<String, ...>]
47
51
  # The type name of the data type as the first element.
@@ -51,6 +55,8 @@ module Arrow
51
55
  # For example, {Arrow::TimestampDataType} needs unit as
52
56
  # additional information.
53
57
  #
58
+ # @return [Arrow::DataType] A new suitable data type.
59
+ #
54
60
  # @example Create a boolean data type
55
61
  # Arrow::DataType.resolve([:boolean])
56
62
  #
@@ -59,7 +65,8 @@ module Arrow
59
65
  #
60
66
  # @overload resolve(description)
61
67
  #
62
- # Creates a suitable data type from data type description.
68
+ # Creates a new suitable data type from the given data type
69
+ # description.
63
70
  #
64
71
  # Data type description is a raw `Hash`. Data type description
65
72
  # must have `:type` value. `:type` is the type of the data type.
@@ -74,6 +81,8 @@ module Arrow
74
81
  # @option description [String, Symbol] :type The type name of
75
82
  # the data type.
76
83
  #
84
+ # @return [Arrow::DataType] A new suitable data type.
85
+ #
77
86
  # @example Create a boolean data type
78
87
  # Arrow::DataType.resolve(type: :boolean)
79
88
  #
@@ -0,0 +1,98 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Datum
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Array
25
+ ArrayDatum.new(value)
26
+ when ChunkedArray
27
+ ChunkedArrayDatum.new(value)
28
+ when Scalar
29
+ ScalarDatum.new(value)
30
+ when ::Array
31
+ ArrayDatum.new(ArrayBuilder.build(value))
32
+ when Integer
33
+ case value
34
+ when (0..((2 ** 8) - 1))
35
+ try_convert(UInt8Scalar.new(value))
36
+ when ((-(2 ** 7))..((2 ** 7) - 1))
37
+ try_convert(Int8Scalar.new(value))
38
+ when (0..((2 ** 16) - 1))
39
+ try_convert(UInt16Scalar.new(value))
40
+ when ((-(2 ** 15))..((2 ** 15) - 1))
41
+ try_convert(Int16Scalar.new(value))
42
+ when (0..((2 ** 32) - 1))
43
+ try_convert(UInt32Scalar.new(value))
44
+ when ((-(2 ** 31))..((2 ** 31) - 1))
45
+ try_convert(Int32Scalar.new(value))
46
+ when (0..((2 ** 64) - 1))
47
+ try_convert(UInt64Scalar.new(value))
48
+ when ((-(2 ** 63))..((2 ** 63) - 1))
49
+ try_convert(Int64Scalar.new(value))
50
+ else
51
+ nil
52
+ end
53
+ when Float
54
+ try_convert(DoubleScalar.new(value))
55
+ when true, false
56
+ try_convert(BooleanScalar.new(value))
57
+ when String
58
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
59
+ if value.bytesize <= ((2 ** 31) - 1)
60
+ try_convert(StringScalar.new(value))
61
+ else
62
+ try_convert(LargeStringScalar.new(value))
63
+ end
64
+ else
65
+ if value.bytesize <= ((2 ** 31) - 1)
66
+ try_convert(BinaryScalar.new(value))
67
+ else
68
+ try_convert(LargeBinaryScalar.new(value))
69
+ end
70
+ end
71
+ when Date
72
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
73
+ try_convert(Date32Scalar.new(date32_value))
74
+ when Time
75
+ case value.unit
76
+ when TimeUnit::SECOND, TimeUnit::MILLI
77
+ data_type = Time32DataType.new(value.unit)
78
+ scalar_class = Time32Scalar
79
+ else
80
+ data_type = Time64DataType.new(value.unit)
81
+ scalar_class = Time64Scalar
82
+ end
83
+ try_convert(scalar_class.new(data_type, value.value))
84
+ when ::Time
85
+ data_type = TimestampDataType.new(:nano)
86
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
87
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
88
+ when Decimal128
89
+ data_type = TimestampDataType.new(:nano)
90
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
91
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
92
+ else
93
+ nil
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end