red-arrow 0.17.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/buffer.rb +28 -0
  10. data/lib/arrow/data-type.rb +14 -5
  11. data/lib/arrow/decimal128-array-builder.rb +21 -25
  12. data/lib/arrow/decimal128-data-type.rb +2 -0
  13. data/lib/arrow/decimal128.rb +18 -0
  14. data/lib/arrow/decimal256-array-builder.rb +61 -0
  15. data/lib/arrow/decimal256-array.rb +25 -0
  16. data/lib/arrow/decimal256-data-type.rb +73 -0
  17. data/lib/arrow/decimal256.rb +60 -0
  18. data/lib/arrow/dense-union-data-type.rb +2 -2
  19. data/lib/arrow/dictionary-array.rb +24 -0
  20. data/lib/arrow/dictionary-data-type.rb +2 -2
  21. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  22. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  23. data/lib/arrow/loader.rb +18 -0
  24. data/lib/arrow/raw-table-converter.rb +47 -0
  25. data/lib/arrow/record-batch-iterator.rb +22 -0
  26. data/lib/arrow/record-batch.rb +9 -1
  27. data/lib/arrow/sort-key.rb +193 -0
  28. data/lib/arrow/sort-options.rb +109 -0
  29. data/lib/arrow/sparse-union-data-type.rb +2 -2
  30. data/lib/arrow/struct-array-builder.rb +13 -7
  31. data/lib/arrow/table-saver.rb +6 -6
  32. data/lib/arrow/table.rb +5 -24
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +1 -0
  38. data/test/raw-records/test-basic-arrays.rb +17 -0
  39. data/test/raw-records/test-dense-union-array.rb +15 -34
  40. data/test/raw-records/test-list-array.rb +20 -0
  41. data/test/raw-records/test-sparse-union-array.rb +15 -33
  42. data/test/raw-records/test-struct-array.rb +15 -0
  43. data/test/test-array.rb +122 -2
  44. data/test/test-bigdecimal.rb +20 -3
  45. data/test/test-buffer.rb +11 -0
  46. data/test/test-decimal128-array-builder.rb +18 -1
  47. data/test/test-decimal128-data-type.rb +2 -2
  48. data/test/test-decimal128.rb +38 -0
  49. data/test/test-decimal256-array-builder.rb +112 -0
  50. data/test/test-decimal256-array.rb +38 -0
  51. data/test/test-decimal256-data-type.rb +31 -0
  52. data/test/test-decimal256.rb +102 -0
  53. data/test/test-dense-union-data-type.rb +2 -2
  54. data/test/test-dictionary-array.rb +41 -0
  55. data/test/test-feather.rb +1 -1
  56. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  57. data/test/test-fixed-size-binary-array.rb +36 -0
  58. data/test/test-orc.rb +19 -23
  59. data/test/test-record-batch-iterator.rb +37 -0
  60. data/test/test-record-batch.rb +14 -0
  61. data/test/test-sort-indices.rb +40 -0
  62. data/test/test-sort-key.rb +81 -0
  63. data/test/test-sort-options.rb +58 -0
  64. data/test/test-sparse-union-data-type.rb +2 -2
  65. data/test/test-struct-array-builder.rb +16 -12
  66. data/test/test-struct-array.rb +2 -2
  67. data/test/values/test-basic-arrays.rb +11 -0
  68. data/test/values/test-dense-union-array.rb +15 -34
  69. data/test/values/test-list-array.rb +18 -0
  70. data/test/values/test-sparse-union-array.rb +15 -33
  71. data/test/values/test-struct-array.rb +15 -0
  72. metadata +107 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 661c88599c8cb32f10fbea1e03994a861bb0a14da3617a0124c614428dc8e016
4
- data.tar.gz: b478f0588467345bdce0da3c8a10453767bb0ce9cf64586bbd38411363f4e423
3
+ metadata.gz: f048aad4d5cc38373c1acc3976ba7012292c2b21315be7334704b6abbd4076fa
4
+ data.tar.gz: 426aac582a879286bd8f6ae632f4851bcc00025a08032142f07073fd4c665c62
5
5
  SHA512:
6
- metadata.gz: e2c72551f2211db790cb0627754290a5d1ddf6583d0b14b830ee5922786a5096d1ad0e2b78849f8213b2164a4c2e6803fd6317d65d5a3f36cbf71b3c660bbd51
7
- data.tar.gz: b1271385c4e346627dd58340a369742307fb432ce29a0f2acab501c385c1677c50ed885e3528f5d895bc8a35d5e5a99ddc2ee79357c902cd2236186d6da647d5
6
+ metadata.gz: 5f7ab5ce2884d205b7d498db7f74d50c9c6228a4c27de8638fb921535c71fed28aa69ffdcb55bc27300e8b5995d5f31b0e78a29b2bd0513fcbf60d1514a58f79
7
+ data.tar.gz: b6d28706fb0b4845c2b134e22f4988199e8714bf41e24a73d02b38a6161142ad04c7ebf30185791afb3c3a0b7a02701e92193b6ae55ff1e78089d60f3ca91c9a
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -285,9 +294,11 @@ namespace red_arrow {
285
294
  // VISIT(Interval)
286
295
  VISIT(List)
287
296
  VISIT(Struct)
288
- VISIT(Union)
297
+ VISIT(SparseUnion)
298
+ VISIT(DenseUnion)
289
299
  VISIT(Dictionary)
290
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
291
302
  // TODO
292
303
  // VISIT(Extension)
293
304
 
@@ -339,9 +350,9 @@ namespace red_arrow {
339
350
  index_ = index;
340
351
  result_ = rb_hash_new();
341
352
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
353
+ const auto n = struct_type->num_fields();
343
354
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
355
+ const auto field_type = struct_type->field(i).get();
345
356
  const auto& field_name = field_type->name();
346
357
  auto key_keep = key_;
347
358
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,9 +399,11 @@ namespace red_arrow {
388
399
  // VISIT(Interval)
389
400
  VISIT(List)
390
401
  VISIT(Struct)
391
- VISIT(Union)
402
+ VISIT(SparseUnion)
403
+ VISIT(DenseUnion)
392
404
  VISIT(Dictionary)
393
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
394
407
  // TODO
395
408
  // VISIT(Extension)
396
409
 
@@ -432,10 +445,10 @@ namespace red_arrow {
432
445
  index_ = index;
433
446
  switch (array.mode()) {
434
447
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
448
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
449
  break;
437
450
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
451
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
452
  break;
440
453
  default:
441
454
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,9 +492,11 @@ namespace red_arrow {
479
492
  // VISIT(Interval)
480
493
  VISIT(List)
481
494
  VISIT(Struct)
482
- VISIT(Union)
495
+ VISIT(SparseUnion)
496
+ VISIT(DenseUnion)
483
497
  VISIT(Dictionary)
484
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
485
500
  // TODO
486
501
  // VISIT(Extension)
487
502
 
@@ -501,14 +516,14 @@ namespace red_arrow {
501
516
  result_ = result;
502
517
  }
503
518
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
519
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
520
  arrow::UnionType* type,
506
521
  const char* tag) {
507
522
  const auto type_code = array.raw_type_codes()[index_];
508
523
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
524
+ const auto field_id = type->child_ids()[type_code];
525
+ if (field_id >= 0) {
526
+ return field_id;
512
527
  }
513
528
  }
514
529
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +531,33 @@ namespace red_arrow {
516
531
  return 0;
517
532
  }
518
533
 
519
- void convert_sparse(const arrow::UnionArray& array) {
534
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
535
  const auto type =
521
536
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
537
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
538
+ const auto index = compute_field_index(array, type, tag);
539
+ const auto field = type->field(index).get();
540
+ const auto& field_name = field->name();
526
541
  const auto field_name_keep = field_name_;
527
542
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
543
+ const auto field_array = array.field(index).get();
544
+ check_status(field_array->Accept(this), tag);
530
545
  field_name_ = field_name_keep;
531
546
  }
532
547
 
533
- void convert_dense(const arrow::UnionArray& array) {
548
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
549
  const auto type =
535
550
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
551
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
552
+ const auto index = compute_field_index(array, type, tag);
553
+ const auto field = type->field(index).get();
554
+ const auto& field_name = field->name();
540
555
  const auto field_name_keep = field_name_;
541
556
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
557
+ const auto field_array = array.field(index);
543
558
  const auto index_keep = index_;
544
559
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
560
+ check_status(field_array->Accept(this), tag);
546
561
  index_ = index_keep;
547
562
  field_name_ = field_name_keep;
548
563
  }
@@ -557,30 +572,58 @@ namespace red_arrow {
557
572
  public:
558
573
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
574
  : array_value_converter_(converter),
560
- index_(0),
575
+ value_index_(0),
561
576
  result_(Qnil) {
562
577
  }
563
578
 
564
579
  VALUE convert(const arrow::DictionaryArray& array,
565
580
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
581
+ value_index_ = array.GetValueIndex(index);
582
+ auto dictionary = array.dictionary().get();
583
+ check_status(dictionary->Accept(this),
569
584
  "[raw-records][dictionary-array]");
570
585
  return result_;
571
586
  }
572
587
 
573
- // TODO: Convert to real value.
574
588
  #define VISIT(TYPE) \
575
589
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
590
+ result_ = convert_value(array, value_index_); \
577
591
  return arrow::Status::OK(); \
578
592
  }
579
593
 
594
+ VISIT(Null)
595
+ VISIT(Boolean)
580
596
  VISIT(Int8)
581
597
  VISIT(Int16)
582
598
  VISIT(Int32)
583
599
  VISIT(Int64)
600
+ VISIT(UInt8)
601
+ VISIT(UInt16)
602
+ VISIT(UInt32)
603
+ VISIT(UInt64)
604
+ // TODO
605
+ // VISIT(HalfFloat)
606
+ VISIT(Float)
607
+ VISIT(Double)
608
+ VISIT(Binary)
609
+ VISIT(String)
610
+ VISIT(FixedSizeBinary)
611
+ VISIT(Date32)
612
+ VISIT(Date64)
613
+ VISIT(Time32)
614
+ VISIT(Time64)
615
+ VISIT(Timestamp)
616
+ // TODO
617
+ // VISIT(Interval)
618
+ VISIT(List)
619
+ VISIT(Struct)
620
+ VISIT(SparseUnion)
621
+ VISIT(DenseUnion)
622
+ VISIT(Dictionary)
623
+ VISIT(Decimal128)
624
+ VISIT(Decimal256)
625
+ // TODO
626
+ // VISIT(Extension)
584
627
 
585
628
  #undef VISIT
586
629
 
@@ -592,7 +635,7 @@ namespace red_arrow {
592
635
  }
593
636
 
594
637
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
638
+ int64_t value_index_;
596
639
  VALUE result_;
597
640
  };
598
641
 
data/ext/arrow/extconf.rb CHANGED
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -100,9 +100,11 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
106
108
  // TODO
107
109
  // VISIT(Extension)
108
110
 
data/ext/arrow/values.cpp CHANGED
@@ -81,9 +81,11 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
87
89
  // TODO
88
90
  // VISIT(Extension)
89
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
data/lib/arrow/array.rb CHANGED
@@ -100,5 +100,123 @@ module Arrow
100
100
  is_in_raw(values)
101
101
  end
102
102
  end
103
+
104
+ # @api private
105
+ alias_method :concatenate_raw, :concatenate
106
+ # Concatenates the given other arrays to the array.
107
+ #
108
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
109
+ # concatenated.
110
+ #
111
+ # Each other array is processed by {#resolve} before they're
112
+ # concatenated.
113
+ #
114
+ # @example Raw Ruby Array
115
+ # array = Arrow::Int32Array.new([1])
116
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
117
+ #
118
+ # @example Arrow::Array
119
+ # array = Arrow::Int32Array.new([1])
120
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
121
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
122
+ #
123
+ # @since 4.0.0
124
+ def concatenate(*other_arrays)
125
+ other_arrays = other_arrays.collect do |other_array|
126
+ resolve(other_array)
127
+ end
128
+ concatenate_raw(other_arrays)
129
+ end
130
+
131
+ # Concatenates the given other array to the array.
132
+ #
133
+ # If you have multiple arrays to be concatenated, you should use
134
+ # {#concatenate} to concatenate multiple arrays at once.
135
+ #
136
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
137
+ #
138
+ # `@other_array` is processed by {#resolve} before it's
139
+ # concatenated.
140
+ #
141
+ # @example Raw Ruby Array
142
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
143
+ #
144
+ # @example Arrow::Array
145
+ # Arrow::Int32Array.new([1]) +
146
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
147
+ #
148
+ # @since 4.0.0
149
+ def +(other_array)
150
+ concatenate(other_array)
151
+ end
152
+
153
+ # Ensures returning the same data type array from the given array.
154
+ #
155
+ # @return [Arrow::Array]
156
+ #
157
+ # @overload resolve(other_raw_array)
158
+ #
159
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
160
+ # is built by `self.class.new`.
161
+ #
162
+ # @example Raw Ruby Array
163
+ # int32_array = Arrow::Int32Array.new([1])
164
+ # other_array = int32_array.resolve([2, 3, 4])
165
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
166
+ #
167
+ # @overload resolve(other_array)
168
+ #
169
+ # @param other_array [Arrow::Array] Another Arrow::Array.
170
+ #
171
+ # If the given other array is an same data type array of
172
+ # `self`, the given other array is returned as-is.
173
+ #
174
+ # If the given other array isn't an same data type array of
175
+ # `self`, the given other array is casted.
176
+ #
177
+ # @example Same data type
178
+ # int32_array = Arrow::Int32Array.new([1])
179
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
180
+ # other_array = int32_array.resolve(other_int32_array)
181
+ # other_array.object_id == other_int32_array.object_id
182
+ #
183
+ # @example Other data type
184
+ # int32_array = Arrow::Int32Array.new([1])
185
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
186
+ # other_array = int32_array.resolve(other_int32_array)
187
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
188
+ #
189
+ # @since 4.0.0
190
+ def resolve(other_array)
191
+ if other_array.is_a?(::Array)
192
+ builder_class = self.class.builder_class
193
+ if builder_class.nil?
194
+ message =
195
+ "[array][resolve] can't build #{value_data_type} array " +
196
+ "from raw Ruby Array"
197
+ raise ArgumentError, message
198
+ end
199
+ if builder_class.buildable?([other_array])
200
+ other_array = builder_class.build(other_array)
201
+ elsif builder_class.buildable?([value_data_type, other_array])
202
+ other_array = builder_class.build(value_data_type, other_array)
203
+ else
204
+ message =
205
+ "[array][resolve] need to implement " +
206
+ "a feature that building #{value_data_type} array " +
207
+ "from raw Ruby Array"
208
+ raise NotImpelemented, message
209
+ end
210
+ other_array
211
+ elsif other_array.respond_to?(:value_data_type)
212
+ return other_array if value_data_type == other_array.value_data_type
213
+ other_array.cast(value_data_type)
214
+ else
215
+ message =
216
+ "[array][resolve] can't build #{value_data_type} array: " +
217
+ "#{other_array.inspect}"
218
+ raise ArgumentError, message
219
+ end
220
+ end
103
221
  end
104
222
  end