red-arrow 1.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +15 -2
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +1 -0
  5. data/ext/arrow/values.cpp +1 -0
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/data-type.rb +14 -5
  10. data/lib/arrow/decimal128-array-builder.rb +21 -25
  11. data/lib/arrow/decimal128-data-type.rb +2 -0
  12. data/lib/arrow/decimal128.rb +18 -0
  13. data/lib/arrow/decimal256-array-builder.rb +61 -0
  14. data/lib/arrow/decimal256-array.rb +25 -0
  15. data/lib/arrow/decimal256-data-type.rb +73 -0
  16. data/lib/arrow/decimal256.rb +60 -0
  17. data/lib/arrow/dense-union-data-type.rb +2 -2
  18. data/lib/arrow/dictionary-data-type.rb +2 -2
  19. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  20. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  21. data/lib/arrow/loader.rb +15 -0
  22. data/lib/arrow/sort-key.rb +193 -0
  23. data/lib/arrow/sort-options.rb +109 -0
  24. data/lib/arrow/sparse-union-data-type.rb +2 -2
  25. data/lib/arrow/time32-data-type.rb +2 -2
  26. data/lib/arrow/time64-data-type.rb +2 -2
  27. data/lib/arrow/timestamp-data-type.rb +2 -2
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -0
  30. data/test/raw-records/test-basic-arrays.rb +17 -0
  31. data/test/raw-records/test-dense-union-array.rb +14 -0
  32. data/test/raw-records/test-list-array.rb +20 -0
  33. data/test/raw-records/test-sparse-union-array.rb +14 -0
  34. data/test/raw-records/test-struct-array.rb +15 -0
  35. data/test/test-array.rb +122 -2
  36. data/test/test-bigdecimal.rb +20 -3
  37. data/test/test-decimal128-array-builder.rb +18 -1
  38. data/test/test-decimal128-data-type.rb +2 -2
  39. data/test/test-decimal128.rb +38 -0
  40. data/test/test-decimal256-array-builder.rb +112 -0
  41. data/test/test-decimal256-array.rb +38 -0
  42. data/test/test-decimal256-data-type.rb +31 -0
  43. data/test/test-decimal256.rb +102 -0
  44. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  45. data/test/test-fixed-size-binary-array.rb +36 -0
  46. data/test/test-orc.rb +19 -23
  47. data/test/test-sort-indices.rb +40 -0
  48. data/test/test-sort-key.rb +81 -0
  49. data/test/test-sort-options.rb +58 -0
  50. data/test/test-struct-array-builder.rb +8 -8
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +14 -0
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +14 -0
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +101 -61
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a45e003f0a453f175b1dbfc81c1fcf092cbfe964dd43e44d7b16e2087834ef5d
4
- data.tar.gz: 5d1dc1d87a821d1ac4c49603d1d92d723230eb1f8a7cdd540dafe3715a53c12e
3
+ metadata.gz: df66b2dac421ab800bd00510e4a13e250646d1f1c27541f2ca44cd04be38a3ba
4
+ data.tar.gz: 3c39ec6a1a3ee48708a28583c125d96f9195e9973f6f2f3707cd923eca915329
5
5
  SHA512:
6
- metadata.gz: 85329e05ae20268d44a9ffff5fb3278263e5b77deda6f03fda31595b117252b7970fd17328ce2650c8650895a1813a1fb95799d3331a312bce2c276f3e5d55a4
7
- data.tar.gz: fe90b0ff0dfb9b9126765818d54f7d4d203e6c72ab53dfb2680a48dc474fef09b8ed3f0553f3c1fe110d500d625ab2300563c3e4169403326c63c6bf27f33a5f
6
+ metadata.gz: 9ebf50ffecfdea79140b158f5aeea0cc3bb63403e405bcbba2b34e288b8575df3df859b8f46eb12e26199e566237016d0e3907acc1d7c0d16e82d638c7bdce2e
7
+ data.tar.gz: 1ff1389d28db025efd82c87e9f85cfb7e3bce21abe85a1b943ca8536bbc1015b0015c05ab45e5d9a35914034a673da0bbc59f8211bb0b620ee4a7487f5fec413
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -289,6 +298,7 @@ namespace red_arrow {
289
298
  VISIT(DenseUnion)
290
299
  VISIT(Dictionary)
291
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
292
302
  // TODO
293
303
  // VISIT(Extension)
294
304
 
@@ -393,6 +403,7 @@ namespace red_arrow {
393
403
  VISIT(DenseUnion)
394
404
  VISIT(Dictionary)
395
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
396
407
  // TODO
397
408
  // VISIT(Extension)
398
409
 
@@ -485,6 +496,7 @@ namespace red_arrow {
485
496
  VISIT(DenseUnion)
486
497
  VISIT(Dictionary)
487
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
488
500
  // TODO
489
501
  // VISIT(Extension)
490
502
 
@@ -609,6 +621,7 @@ namespace red_arrow {
609
621
  VISIT(DenseUnion)
610
622
  VISIT(Dictionary)
611
623
  VISIT(Decimal128)
624
+ VISIT(Decimal256)
612
625
  // TODO
613
626
  // VISIT(Extension)
614
627
 
data/ext/arrow/extconf.rb CHANGED
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -104,6 +104,7 @@ namespace red_arrow {
104
104
  VISIT(DenseUnion)
105
105
  VISIT(Dictionary)
106
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
107
108
  // TODO
108
109
  // VISIT(Extension)
109
110
 
data/ext/arrow/values.cpp CHANGED
@@ -85,6 +85,7 @@ namespace red_arrow {
85
85
  VISIT(DenseUnion)
86
86
  VISIT(Dictionary)
87
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
88
89
  // TODO
89
90
  // VISIT(Extension)
90
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
data/lib/arrow/array.rb CHANGED
@@ -100,5 +100,123 @@ module Arrow
100
100
  is_in_raw(values)
101
101
  end
102
102
  end
103
+
104
+ # @api private
105
+ alias_method :concatenate_raw, :concatenate
106
+ # Concatenates the given other arrays to the array.
107
+ #
108
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
109
+ # concatenated.
110
+ #
111
+ # Each other array is processed by {#resolve} before they're
112
+ # concatenated.
113
+ #
114
+ # @example Raw Ruby Array
115
+ # array = Arrow::Int32Array.new([1])
116
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
117
+ #
118
+ # @example Arrow::Array
119
+ # array = Arrow::Int32Array.new([1])
120
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
121
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
122
+ #
123
+ # @since 4.0.0
124
+ def concatenate(*other_arrays)
125
+ other_arrays = other_arrays.collect do |other_array|
126
+ resolve(other_array)
127
+ end
128
+ concatenate_raw(other_arrays)
129
+ end
130
+
131
+ # Concatenates the given other array to the array.
132
+ #
133
+ # If you have multiple arrays to be concatenated, you should use
134
+ # {#concatenate} to concatenate multiple arrays at once.
135
+ #
136
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
137
+ #
138
+ # `@other_array` is processed by {#resolve} before it's
139
+ # concatenated.
140
+ #
141
+ # @example Raw Ruby Array
142
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
143
+ #
144
+ # @example Arrow::Array
145
+ # Arrow::Int32Array.new([1]) +
146
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
147
+ #
148
+ # @since 4.0.0
149
+ def +(other_array)
150
+ concatenate(other_array)
151
+ end
152
+
153
+ # Ensures returning the same data type array from the given array.
154
+ #
155
+ # @return [Arrow::Array]
156
+ #
157
+ # @overload resolve(other_raw_array)
158
+ #
159
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
160
+ # is built by `self.class.new`.
161
+ #
162
+ # @example Raw Ruby Array
163
+ # int32_array = Arrow::Int32Array.new([1])
164
+ # other_array = int32_array.resolve([2, 3, 4])
165
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
166
+ #
167
+ # @overload resolve(other_array)
168
+ #
169
+ # @param other_array [Arrow::Array] Another Arrow::Array.
170
+ #
171
+ # If the given other array is an same data type array of
172
+ # `self`, the given other array is returned as-is.
173
+ #
174
+ # If the given other array isn't an same data type array of
175
+ # `self`, the given other array is casted.
176
+ #
177
+ # @example Same data type
178
+ # int32_array = Arrow::Int32Array.new([1])
179
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
180
+ # other_array = int32_array.resolve(other_int32_array)
181
+ # other_array.object_id == other_int32_array.object_id
182
+ #
183
+ # @example Other data type
184
+ # int32_array = Arrow::Int32Array.new([1])
185
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
186
+ # other_array = int32_array.resolve(other_int32_array)
187
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
188
+ #
189
+ # @since 4.0.0
190
+ def resolve(other_array)
191
+ if other_array.is_a?(::Array)
192
+ builder_class = self.class.builder_class
193
+ if builder_class.nil?
194
+ message =
195
+ "[array][resolve] can't build #{value_data_type} array " +
196
+ "from raw Ruby Array"
197
+ raise ArgumentError, message
198
+ end
199
+ if builder_class.buildable?([other_array])
200
+ other_array = builder_class.build(other_array)
201
+ elsif builder_class.buildable?([value_data_type, other_array])
202
+ other_array = builder_class.build(value_data_type, other_array)
203
+ else
204
+ message =
205
+ "[array][resolve] need to implement " +
206
+ "a feature that building #{value_data_type} array " +
207
+ "from raw Ruby Array"
208
+ raise NotImpelemented, message
209
+ end
210
+ other_array
211
+ elsif other_array.respond_to?(:value_data_type)
212
+ return other_array if value_data_type == other_array.value_data_type
213
+ other_array.cast(value_data_type)
214
+ else
215
+ message =
216
+ "[array][resolve] can't build #{value_data_type} array: " +
217
+ "#{other_array.inspect}"
218
+ raise ArgumentError, message
219
+ end
220
+ end
103
221
  end
104
222
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
@@ -18,7 +18,7 @@
18
18
  module Arrow
19
19
  class DataType
20
20
  class << self
21
- # Creates a new suitable {Arrow::DataType}.
21
+ # Ensure returning suitable {Arrow::DataType}.
22
22
  #
23
23
  # @overload resolve(data_type)
24
24
  #
@@ -31,17 +31,21 @@ module Arrow
31
31
  #
32
32
  # @overload resolve(name)
33
33
  #
34
- # Creates a suitable data type from type name. For example,
35
- # you can create {Arrow::BooleanDataType} from `:boolean`.
34
+ # Creates a suitable data type from the given type name. For
35
+ # example, you can create {Arrow::BooleanDataType} from
36
+ # `:boolean`.
36
37
  #
37
38
  # @param name [String, Symbol] The type name of the data type.
38
39
  #
40
+ # @return [Arrow::DataType] A new suitable data type.
41
+ #
39
42
  # @example Create a boolean data type
40
43
  # Arrow::DataType.resolve(:boolean)
41
44
  #
42
45
  # @overload resolve(name_with_arguments)
43
46
  #
44
- # Creates a suitable data type from type name with arguments.
47
+ # Creates a new suitable data type from the given type name
48
+ # with arguments.
45
49
  #
46
50
  # @param name_with_arguments [::Array<String, ...>]
47
51
  # The type name of the data type as the first element.
@@ -51,6 +55,8 @@ module Arrow
51
55
  # For example, {Arrow::TimestampDataType} needs unit as
52
56
  # additional information.
53
57
  #
58
+ # @return [Arrow::DataType] A new suitable data type.
59
+ #
54
60
  # @example Create a boolean data type
55
61
  # Arrow::DataType.resolve([:boolean])
56
62
  #
@@ -59,7 +65,8 @@ module Arrow
59
65
  #
60
66
  # @overload resolve(description)
61
67
  #
62
- # Creates a suitable data type from data type description.
68
+ # Creates a new suitable data type from the given data type
69
+ # description.
63
70
  #
64
71
  # Data type description is a raw `Hash`. Data type description
65
72
  # must have `:type` value. `:type` is the type of the data type.
@@ -74,6 +81,8 @@ module Arrow
74
81
  # @option description [String, Symbol] :type The type name of
75
82
  # the data type.
76
83
  #
84
+ # @return [Arrow::DataType] A new suitable data type.
85
+ #
77
86
  # @example Create a boolean data type
78
87
  # Arrow::DataType.resolve(type: :boolean)
79
88
  #
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end