red-arrow 1.0.0 → 4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +15 -2
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +1 -0
  5. data/ext/arrow/values.cpp +1 -0
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/data-type.rb +14 -5
  10. data/lib/arrow/decimal128-array-builder.rb +21 -25
  11. data/lib/arrow/decimal128-data-type.rb +2 -0
  12. data/lib/arrow/decimal128.rb +18 -0
  13. data/lib/arrow/decimal256-array-builder.rb +61 -0
  14. data/lib/arrow/decimal256-array.rb +25 -0
  15. data/lib/arrow/decimal256-data-type.rb +73 -0
  16. data/lib/arrow/decimal256.rb +60 -0
  17. data/lib/arrow/dense-union-data-type.rb +2 -2
  18. data/lib/arrow/dictionary-data-type.rb +2 -2
  19. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  20. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  21. data/lib/arrow/loader.rb +15 -0
  22. data/lib/arrow/sort-key.rb +193 -0
  23. data/lib/arrow/sort-options.rb +109 -0
  24. data/lib/arrow/sparse-union-data-type.rb +2 -2
  25. data/lib/arrow/time32-data-type.rb +2 -2
  26. data/lib/arrow/time64-data-type.rb +2 -2
  27. data/lib/arrow/timestamp-data-type.rb +2 -2
  28. data/lib/arrow/version.rb +1 -1
  29. data/red-arrow.gemspec +1 -0
  30. data/test/raw-records/test-basic-arrays.rb +17 -0
  31. data/test/raw-records/test-dense-union-array.rb +14 -0
  32. data/test/raw-records/test-list-array.rb +20 -0
  33. data/test/raw-records/test-sparse-union-array.rb +14 -0
  34. data/test/raw-records/test-struct-array.rb +15 -0
  35. data/test/test-array.rb +122 -2
  36. data/test/test-bigdecimal.rb +20 -3
  37. data/test/test-decimal128-array-builder.rb +18 -1
  38. data/test/test-decimal128-data-type.rb +2 -2
  39. data/test/test-decimal128.rb +38 -0
  40. data/test/test-decimal256-array-builder.rb +112 -0
  41. data/test/test-decimal256-array.rb +38 -0
  42. data/test/test-decimal256-data-type.rb +31 -0
  43. data/test/test-decimal256.rb +102 -0
  44. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  45. data/test/test-fixed-size-binary-array.rb +36 -0
  46. data/test/test-orc.rb +19 -23
  47. data/test/test-sort-indices.rb +40 -0
  48. data/test/test-sort-key.rb +81 -0
  49. data/test/test-sort-options.rb +58 -0
  50. data/test/test-struct-array-builder.rb +8 -8
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +14 -0
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +14 -0
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +101 -61
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a45e003f0a453f175b1dbfc81c1fcf092cbfe964dd43e44d7b16e2087834ef5d
4
- data.tar.gz: 5d1dc1d87a821d1ac4c49603d1d92d723230eb1f8a7cdd540dafe3715a53c12e
3
+ metadata.gz: df66b2dac421ab800bd00510e4a13e250646d1f1c27541f2ca44cd04be38a3ba
4
+ data.tar.gz: 3c39ec6a1a3ee48708a28583c125d96f9195e9973f6f2f3707cd923eca915329
5
5
  SHA512:
6
- metadata.gz: 85329e05ae20268d44a9ffff5fb3278263e5b77deda6f03fda31595b117252b7970fd17328ce2650c8650895a1813a1fb95799d3331a312bce2c276f3e5d55a4
7
- data.tar.gz: fe90b0ff0dfb9b9126765818d54f7d4d203e6c72ab53dfb2680a48dc474fef09b8ed3f0553f3c1fe110d500d625ab2300563c3e4169403326c63c6bf27f33a5f
6
+ metadata.gz: 9ebf50ffecfdea79140b158f5aeea0cc3bb63403e405bcbba2b34e288b8575df3df859b8f46eb12e26199e566237016d0e3907acc1d7c0d16e82d638c7bdce2e
7
+ data.tar.gz: 1ff1389d28db025efd82c87e9f85cfb7e3bce21abe85a1b943ca8536bbc1015b0015c05ab45e5d9a35914034a673da0bbc59f8211bb0b620ee4a7487f5fec413
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -289,6 +298,7 @@ namespace red_arrow {
289
298
  VISIT(DenseUnion)
290
299
  VISIT(Dictionary)
291
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
292
302
  // TODO
293
303
  // VISIT(Extension)
294
304
 
@@ -393,6 +403,7 @@ namespace red_arrow {
393
403
  VISIT(DenseUnion)
394
404
  VISIT(Dictionary)
395
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
396
407
  // TODO
397
408
  // VISIT(Extension)
398
409
 
@@ -485,6 +496,7 @@ namespace red_arrow {
485
496
  VISIT(DenseUnion)
486
497
  VISIT(Dictionary)
487
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
488
500
  // TODO
489
501
  // VISIT(Extension)
490
502
 
@@ -609,6 +621,7 @@ namespace red_arrow {
609
621
  VISIT(DenseUnion)
610
622
  VISIT(Dictionary)
611
623
  VISIT(Decimal128)
624
+ VISIT(Decimal256)
612
625
  // TODO
613
626
  // VISIT(Extension)
614
627
 
data/ext/arrow/extconf.rb CHANGED
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -104,6 +104,7 @@ namespace red_arrow {
104
104
  VISIT(DenseUnion)
105
105
  VISIT(Dictionary)
106
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
107
108
  // TODO
108
109
  // VISIT(Extension)
109
110
 
data/ext/arrow/values.cpp CHANGED
@@ -85,6 +85,7 @@ namespace red_arrow {
85
85
  VISIT(DenseUnion)
86
86
  VISIT(Dictionary)
87
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
88
89
  // TODO
89
90
  // VISIT(Extension)
90
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
data/lib/arrow/array.rb CHANGED
@@ -100,5 +100,123 @@ module Arrow
100
100
  is_in_raw(values)
101
101
  end
102
102
  end
103
+
104
+ # @api private
105
+ alias_method :concatenate_raw, :concatenate
106
+ # Concatenates the given other arrays to the array.
107
+ #
108
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
109
+ # concatenated.
110
+ #
111
+ # Each other array is processed by {#resolve} before they're
112
+ # concatenated.
113
+ #
114
+ # @example Raw Ruby Array
115
+ # array = Arrow::Int32Array.new([1])
116
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
117
+ #
118
+ # @example Arrow::Array
119
+ # array = Arrow::Int32Array.new([1])
120
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
121
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
122
+ #
123
+ # @since 4.0.0
124
+ def concatenate(*other_arrays)
125
+ other_arrays = other_arrays.collect do |other_array|
126
+ resolve(other_array)
127
+ end
128
+ concatenate_raw(other_arrays)
129
+ end
130
+
131
+ # Concatenates the given other array to the array.
132
+ #
133
+ # If you have multiple arrays to be concatenated, you should use
134
+ # {#concatenate} to concatenate multiple arrays at once.
135
+ #
136
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
137
+ #
138
+ # `@other_array` is processed by {#resolve} before it's
139
+ # concatenated.
140
+ #
141
+ # @example Raw Ruby Array
142
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
143
+ #
144
+ # @example Arrow::Array
145
+ # Arrow::Int32Array.new([1]) +
146
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
147
+ #
148
+ # @since 4.0.0
149
+ def +(other_array)
150
+ concatenate(other_array)
151
+ end
152
+
153
+ # Ensures returning the same data type array from the given array.
154
+ #
155
+ # @return [Arrow::Array]
156
+ #
157
+ # @overload resolve(other_raw_array)
158
+ #
159
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
160
+ # is built by `self.class.new`.
161
+ #
162
+ # @example Raw Ruby Array
163
+ # int32_array = Arrow::Int32Array.new([1])
164
+ # other_array = int32_array.resolve([2, 3, 4])
165
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
166
+ #
167
+ # @overload resolve(other_array)
168
+ #
169
+ # @param other_array [Arrow::Array] Another Arrow::Array.
170
+ #
171
+ # If the given other array is an same data type array of
172
+ # `self`, the given other array is returned as-is.
173
+ #
174
+ # If the given other array isn't an same data type array of
175
+ # `self`, the given other array is casted.
176
+ #
177
+ # @example Same data type
178
+ # int32_array = Arrow::Int32Array.new([1])
179
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
180
+ # other_array = int32_array.resolve(other_int32_array)
181
+ # other_array.object_id == other_int32_array.object_id
182
+ #
183
+ # @example Other data type
184
+ # int32_array = Arrow::Int32Array.new([1])
185
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
186
+ # other_array = int32_array.resolve(other_int32_array)
187
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
188
+ #
189
+ # @since 4.0.0
190
+ def resolve(other_array)
191
+ if other_array.is_a?(::Array)
192
+ builder_class = self.class.builder_class
193
+ if builder_class.nil?
194
+ message =
195
+ "[array][resolve] can't build #{value_data_type} array " +
196
+ "from raw Ruby Array"
197
+ raise ArgumentError, message
198
+ end
199
+ if builder_class.buildable?([other_array])
200
+ other_array = builder_class.build(other_array)
201
+ elsif builder_class.buildable?([value_data_type, other_array])
202
+ other_array = builder_class.build(value_data_type, other_array)
203
+ else
204
+ message =
205
+ "[array][resolve] need to implement " +
206
+ "a feature that building #{value_data_type} array " +
207
+ "from raw Ruby Array"
208
+ raise NotImpelemented, message
209
+ end
210
+ other_array
211
+ elsif other_array.respond_to?(:value_data_type)
212
+ return other_array if value_data_type == other_array.value_data_type
213
+ other_array.cast(value_data_type)
214
+ else
215
+ message =
216
+ "[array][resolve] can't build #{value_data_type} array: " +
217
+ "#{other_array.inspect}"
218
+ raise ArgumentError, message
219
+ end
220
+ end
103
221
  end
104
222
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
@@ -18,7 +18,7 @@
18
18
  module Arrow
19
19
  class DataType
20
20
  class << self
21
- # Creates a new suitable {Arrow::DataType}.
21
+ # Ensure returning suitable {Arrow::DataType}.
22
22
  #
23
23
  # @overload resolve(data_type)
24
24
  #
@@ -31,17 +31,21 @@ module Arrow
31
31
  #
32
32
  # @overload resolve(name)
33
33
  #
34
- # Creates a suitable data type from type name. For example,
35
- # you can create {Arrow::BooleanDataType} from `:boolean`.
34
+ # Creates a suitable data type from the given type name. For
35
+ # example, you can create {Arrow::BooleanDataType} from
36
+ # `:boolean`.
36
37
  #
37
38
  # @param name [String, Symbol] The type name of the data type.
38
39
  #
40
+ # @return [Arrow::DataType] A new suitable data type.
41
+ #
39
42
  # @example Create a boolean data type
40
43
  # Arrow::DataType.resolve(:boolean)
41
44
  #
42
45
  # @overload resolve(name_with_arguments)
43
46
  #
44
- # Creates a suitable data type from type name with arguments.
47
+ # Creates a new suitable data type from the given type name
48
+ # with arguments.
45
49
  #
46
50
  # @param name_with_arguments [::Array<String, ...>]
47
51
  # The type name of the data type as the first element.
@@ -51,6 +55,8 @@ module Arrow
51
55
  # For example, {Arrow::TimestampDataType} needs unit as
52
56
  # additional information.
53
57
  #
58
+ # @return [Arrow::DataType] A new suitable data type.
59
+ #
54
60
  # @example Create a boolean data type
55
61
  # Arrow::DataType.resolve([:boolean])
56
62
  #
@@ -59,7 +65,8 @@ module Arrow
59
65
  #
60
66
  # @overload resolve(description)
61
67
  #
62
- # Creates a suitable data type from data type description.
68
+ # Creates a new suitable data type from the given data type
69
+ # description.
63
70
  #
64
71
  # Data type description is a raw `Hash`. Data type description
65
72
  # must have `:type` value. `:type` is the type of the data type.
@@ -74,6 +81,8 @@ module Arrow
74
81
  # @option description [String, Symbol] :type The type name of
75
82
  # the data type.
76
83
  #
84
+ # @return [Arrow::DataType] A new suitable data type.
85
+ #
77
86
  # @example Create a boolean data type
78
87
  # Arrow::DataType.resolve(type: :boolean)
79
88
  #
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end