red-arrow 0.17.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/array.rb +118 -0
  8. data/lib/arrow/bigdecimal-extension.rb +5 -1
  9. data/lib/arrow/buffer.rb +28 -0
  10. data/lib/arrow/data-type.rb +14 -5
  11. data/lib/arrow/decimal128-array-builder.rb +21 -25
  12. data/lib/arrow/decimal128-data-type.rb +2 -0
  13. data/lib/arrow/decimal128.rb +18 -0
  14. data/lib/arrow/decimal256-array-builder.rb +61 -0
  15. data/lib/arrow/decimal256-array.rb +25 -0
  16. data/lib/arrow/decimal256-data-type.rb +73 -0
  17. data/lib/arrow/decimal256.rb +60 -0
  18. data/lib/arrow/dense-union-data-type.rb +2 -2
  19. data/lib/arrow/dictionary-array.rb +24 -0
  20. data/lib/arrow/dictionary-data-type.rb +2 -2
  21. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  22. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  23. data/lib/arrow/loader.rb +18 -0
  24. data/lib/arrow/raw-table-converter.rb +47 -0
  25. data/lib/arrow/record-batch-iterator.rb +22 -0
  26. data/lib/arrow/record-batch.rb +9 -1
  27. data/lib/arrow/sort-key.rb +193 -0
  28. data/lib/arrow/sort-options.rb +109 -0
  29. data/lib/arrow/sparse-union-data-type.rb +2 -2
  30. data/lib/arrow/struct-array-builder.rb +13 -7
  31. data/lib/arrow/table-saver.rb +6 -6
  32. data/lib/arrow/table.rb +5 -24
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +1 -0
  38. data/test/raw-records/test-basic-arrays.rb +17 -0
  39. data/test/raw-records/test-dense-union-array.rb +15 -34
  40. data/test/raw-records/test-list-array.rb +20 -0
  41. data/test/raw-records/test-sparse-union-array.rb +15 -33
  42. data/test/raw-records/test-struct-array.rb +15 -0
  43. data/test/test-array.rb +122 -2
  44. data/test/test-bigdecimal.rb +20 -3
  45. data/test/test-buffer.rb +11 -0
  46. data/test/test-decimal128-array-builder.rb +18 -1
  47. data/test/test-decimal128-data-type.rb +2 -2
  48. data/test/test-decimal128.rb +38 -0
  49. data/test/test-decimal256-array-builder.rb +112 -0
  50. data/test/test-decimal256-array.rb +38 -0
  51. data/test/test-decimal256-data-type.rb +31 -0
  52. data/test/test-decimal256.rb +102 -0
  53. data/test/test-dense-union-data-type.rb +2 -2
  54. data/test/test-dictionary-array.rb +41 -0
  55. data/test/test-feather.rb +1 -1
  56. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  57. data/test/test-fixed-size-binary-array.rb +36 -0
  58. data/test/test-orc.rb +19 -23
  59. data/test/test-record-batch-iterator.rb +37 -0
  60. data/test/test-record-batch.rb +14 -0
  61. data/test/test-sort-indices.rb +40 -0
  62. data/test/test-sort-key.rb +81 -0
  63. data/test/test-sort-options.rb +58 -0
  64. data/test/test-sparse-union-data-type.rb +2 -2
  65. data/test/test-struct-array-builder.rb +16 -12
  66. data/test/test-struct-array.rb +2 -2
  67. data/test/values/test-basic-arrays.rb +11 -0
  68. data/test/values/test-dense-union-array.rb +15 -34
  69. data/test/values/test-list-array.rb +18 -0
  70. data/test/values/test-sparse-union-array.rb +15 -33
  71. data/test/values/test-struct-array.rb +15 -0
  72. metadata +107 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 661c88599c8cb32f10fbea1e03994a861bb0a14da3617a0124c614428dc8e016
4
- data.tar.gz: b478f0588467345bdce0da3c8a10453767bb0ce9cf64586bbd38411363f4e423
3
+ metadata.gz: f048aad4d5cc38373c1acc3976ba7012292c2b21315be7334704b6abbd4076fa
4
+ data.tar.gz: 426aac582a879286bd8f6ae632f4851bcc00025a08032142f07073fd4c665c62
5
5
  SHA512:
6
- metadata.gz: e2c72551f2211db790cb0627754290a5d1ddf6583d0b14b830ee5922786a5096d1ad0e2b78849f8213b2164a4c2e6803fd6317d65d5a3f36cbf71b3c660bbd51
7
- data.tar.gz: b1271385c4e346627dd58340a369742307fb432ce29a0f2acab501c385c1677c50ed885e3528f5d895bc8a35d5e5a99ddc2ee79357c902cd2236186d6da647d5
6
+ metadata.gz: 5f7ab5ce2884d205b7d498db7f74d50c9c6228a4c27de8638fb921535c71fed28aa69ffdcb55bc27300e8b5995d5f31b0e78a29b2bd0513fcbf60d1514a58f79
7
+ data.tar.gz: b6d28706fb0b4845c2b134e22f4988199e8714bf41e24a73d02b38a6161142ad04c7ebf30185791afb3c3a0b7a02701e92193b6ae55ff1e78089d60f3ca91c9a
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -285,9 +294,11 @@ namespace red_arrow {
285
294
  // VISIT(Interval)
286
295
  VISIT(List)
287
296
  VISIT(Struct)
288
- VISIT(Union)
297
+ VISIT(SparseUnion)
298
+ VISIT(DenseUnion)
289
299
  VISIT(Dictionary)
290
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
291
302
  // TODO
292
303
  // VISIT(Extension)
293
304
 
@@ -339,9 +350,9 @@ namespace red_arrow {
339
350
  index_ = index;
340
351
  result_ = rb_hash_new();
341
352
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
353
+ const auto n = struct_type->num_fields();
343
354
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
355
+ const auto field_type = struct_type->field(i).get();
345
356
  const auto& field_name = field_type->name();
346
357
  auto key_keep = key_;
347
358
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,9 +399,11 @@ namespace red_arrow {
388
399
  // VISIT(Interval)
389
400
  VISIT(List)
390
401
  VISIT(Struct)
391
- VISIT(Union)
402
+ VISIT(SparseUnion)
403
+ VISIT(DenseUnion)
392
404
  VISIT(Dictionary)
393
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
394
407
  // TODO
395
408
  // VISIT(Extension)
396
409
 
@@ -432,10 +445,10 @@ namespace red_arrow {
432
445
  index_ = index;
433
446
  switch (array.mode()) {
434
447
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
448
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
449
  break;
437
450
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
451
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
452
  break;
440
453
  default:
441
454
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,9 +492,11 @@ namespace red_arrow {
479
492
  // VISIT(Interval)
480
493
  VISIT(List)
481
494
  VISIT(Struct)
482
- VISIT(Union)
495
+ VISIT(SparseUnion)
496
+ VISIT(DenseUnion)
483
497
  VISIT(Dictionary)
484
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
485
500
  // TODO
486
501
  // VISIT(Extension)
487
502
 
@@ -501,14 +516,14 @@ namespace red_arrow {
501
516
  result_ = result;
502
517
  }
503
518
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
519
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
520
  arrow::UnionType* type,
506
521
  const char* tag) {
507
522
  const auto type_code = array.raw_type_codes()[index_];
508
523
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
524
+ const auto field_id = type->child_ids()[type_code];
525
+ if (field_id >= 0) {
526
+ return field_id;
512
527
  }
513
528
  }
514
529
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +531,33 @@ namespace red_arrow {
516
531
  return 0;
517
532
  }
518
533
 
519
- void convert_sparse(const arrow::UnionArray& array) {
534
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
535
  const auto type =
521
536
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
537
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
538
+ const auto index = compute_field_index(array, type, tag);
539
+ const auto field = type->field(index).get();
540
+ const auto& field_name = field->name();
526
541
  const auto field_name_keep = field_name_;
527
542
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
543
+ const auto field_array = array.field(index).get();
544
+ check_status(field_array->Accept(this), tag);
530
545
  field_name_ = field_name_keep;
531
546
  }
532
547
 
533
- void convert_dense(const arrow::UnionArray& array) {
548
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
549
  const auto type =
535
550
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
551
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
552
+ const auto index = compute_field_index(array, type, tag);
553
+ const auto field = type->field(index).get();
554
+ const auto& field_name = field->name();
540
555
  const auto field_name_keep = field_name_;
541
556
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
557
+ const auto field_array = array.field(index);
543
558
  const auto index_keep = index_;
544
559
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
560
+ check_status(field_array->Accept(this), tag);
546
561
  index_ = index_keep;
547
562
  field_name_ = field_name_keep;
548
563
  }
@@ -557,30 +572,58 @@ namespace red_arrow {
557
572
  public:
558
573
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
574
  : array_value_converter_(converter),
560
- index_(0),
575
+ value_index_(0),
561
576
  result_(Qnil) {
562
577
  }
563
578
 
564
579
  VALUE convert(const arrow::DictionaryArray& array,
565
580
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
581
+ value_index_ = array.GetValueIndex(index);
582
+ auto dictionary = array.dictionary().get();
583
+ check_status(dictionary->Accept(this),
569
584
  "[raw-records][dictionary-array]");
570
585
  return result_;
571
586
  }
572
587
 
573
- // TODO: Convert to real value.
574
588
  #define VISIT(TYPE) \
575
589
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
590
+ result_ = convert_value(array, value_index_); \
577
591
  return arrow::Status::OK(); \
578
592
  }
579
593
 
594
+ VISIT(Null)
595
+ VISIT(Boolean)
580
596
  VISIT(Int8)
581
597
  VISIT(Int16)
582
598
  VISIT(Int32)
583
599
  VISIT(Int64)
600
+ VISIT(UInt8)
601
+ VISIT(UInt16)
602
+ VISIT(UInt32)
603
+ VISIT(UInt64)
604
+ // TODO
605
+ // VISIT(HalfFloat)
606
+ VISIT(Float)
607
+ VISIT(Double)
608
+ VISIT(Binary)
609
+ VISIT(String)
610
+ VISIT(FixedSizeBinary)
611
+ VISIT(Date32)
612
+ VISIT(Date64)
613
+ VISIT(Time32)
614
+ VISIT(Time64)
615
+ VISIT(Timestamp)
616
+ // TODO
617
+ // VISIT(Interval)
618
+ VISIT(List)
619
+ VISIT(Struct)
620
+ VISIT(SparseUnion)
621
+ VISIT(DenseUnion)
622
+ VISIT(Dictionary)
623
+ VISIT(Decimal128)
624
+ VISIT(Decimal256)
625
+ // TODO
626
+ // VISIT(Extension)
584
627
 
585
628
  #undef VISIT
586
629
 
@@ -592,7 +635,7 @@ namespace red_arrow {
592
635
  }
593
636
 
594
637
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
638
+ int64_t value_index_;
596
639
  VALUE result_;
597
640
  };
598
641
 
data/ext/arrow/extconf.rb CHANGED
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -100,9 +100,11 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
106
108
  // TODO
107
109
  // VISIT(Extension)
108
110
 
data/ext/arrow/values.cpp CHANGED
@@ -81,9 +81,11 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
87
89
  // TODO
88
90
  // VISIT(Extension)
89
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
data/lib/arrow/array.rb CHANGED
@@ -100,5 +100,123 @@ module Arrow
100
100
  is_in_raw(values)
101
101
  end
102
102
  end
103
+
104
+ # @api private
105
+ alias_method :concatenate_raw, :concatenate
106
+ # Concatenates the given other arrays to the array.
107
+ #
108
+ # @param other_arrays [::Array, Arrow::Array] The arrays to be
109
+ # concatenated.
110
+ #
111
+ # Each other array is processed by {#resolve} before they're
112
+ # concatenated.
113
+ #
114
+ # @example Raw Ruby Array
115
+ # array = Arrow::Int32Array.new([1])
116
+ # array.concatenate([2, 3], [4]) # => Arrow::Int32Array.new([1, 2, 3, 4])
117
+ #
118
+ # @example Arrow::Array
119
+ # array = Arrow::Int32Array.new([1])
120
+ # array.concatenate(Arrow::Int32Array.new([2, 3]),
121
+ # Arrow::Int8Array.new([4])) # => Arrow::Int32Array.new([1, 2, 3, 4])
122
+ #
123
+ # @since 4.0.0
124
+ def concatenate(*other_arrays)
125
+ other_arrays = other_arrays.collect do |other_array|
126
+ resolve(other_array)
127
+ end
128
+ concatenate_raw(other_arrays)
129
+ end
130
+
131
+ # Concatenates the given other array to the array.
132
+ #
133
+ # If you have multiple arrays to be concatenated, you should use
134
+ # {#concatenate} to concatenate multiple arrays at once.
135
+ #
136
+ # @param other_array [::Array, Arrow::Array] The array to be concatenated.
137
+ #
138
+ # `@other_array` is processed by {#resolve} before it's
139
+ # concatenated.
140
+ #
141
+ # @example Raw Ruby Array
142
+ # Arrow::Int32Array.new([1]) + [2, 3] # => Arrow::Int32Array.new([1, 2, 3])
143
+ #
144
+ # @example Arrow::Array
145
+ # Arrow::Int32Array.new([1]) +
146
+ # Arrow::Int32Array.new([2, 3]) # => Arrow::Int32Array.new([1, 2, 3])
147
+ #
148
+ # @since 4.0.0
149
+ def +(other_array)
150
+ concatenate(other_array)
151
+ end
152
+
153
+ # Ensures returning the same data type array from the given array.
154
+ #
155
+ # @return [Arrow::Array]
156
+ #
157
+ # @overload resolve(other_raw_array)
158
+ #
159
+ # @param other_raw_array [::Array] A raw Ruby Array. A new Arrow::Array
160
+ # is built by `self.class.new`.
161
+ #
162
+ # @example Raw Ruby Array
163
+ # int32_array = Arrow::Int32Array.new([1])
164
+ # other_array = int32_array.resolve([2, 3, 4])
165
+ # other_array # => Arrow::Int32Array.new([2, 3, 4])
166
+ #
167
+ # @overload resolve(other_array)
168
+ #
169
+ # @param other_array [Arrow::Array] Another Arrow::Array.
170
+ #
171
+ # If the given other array is an same data type array of
172
+ # `self`, the given other array is returned as-is.
173
+ #
174
+ # If the given other array isn't an same data type array of
175
+ # `self`, the given other array is casted.
176
+ #
177
+ # @example Same data type
178
+ # int32_array = Arrow::Int32Array.new([1])
179
+ # other_int32_array = Arrow::Int32Array.new([2, 3, 4])
180
+ # other_array = int32_array.resolve(other_int32_array)
181
+ # other_array.object_id == other_int32_array.object_id
182
+ #
183
+ # @example Other data type
184
+ # int32_array = Arrow::Int32Array.new([1])
185
+ # other_int8_array = Arrow::Int8Array.new([2, 3, 4])
186
+ # other_array = int32_array.resolve(other_int32_array)
187
+ # other_array #=> Arrow::Int32Array.new([2, 3, 4])
188
+ #
189
+ # @since 4.0.0
190
+ def resolve(other_array)
191
+ if other_array.is_a?(::Array)
192
+ builder_class = self.class.builder_class
193
+ if builder_class.nil?
194
+ message =
195
+ "[array][resolve] can't build #{value_data_type} array " +
196
+ "from raw Ruby Array"
197
+ raise ArgumentError, message
198
+ end
199
+ if builder_class.buildable?([other_array])
200
+ other_array = builder_class.build(other_array)
201
+ elsif builder_class.buildable?([value_data_type, other_array])
202
+ other_array = builder_class.build(value_data_type, other_array)
203
+ else
204
+ message =
205
+ "[array][resolve] need to implement " +
206
+ "a feature that building #{value_data_type} array " +
207
+ "from raw Ruby Array"
208
+ raise NotImpelemented, message
209
+ end
210
+ other_array
211
+ elsif other_array.respond_to?(:value_data_type)
212
+ return other_array if value_data_type == other_array.value_data_type
213
+ other_array.cast(value_data_type)
214
+ else
215
+ message =
216
+ "[array][resolve] can't build #{value_data_type} array: " +
217
+ "#{other_array.inspect}"
218
+ raise ArgumentError, message
219
+ end
220
+ end
103
221
  end
104
222
  end