red-arrow 0.17.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/bigdecimal-extension.rb +5 -1
  8. data/lib/arrow/buffer.rb +28 -0
  9. data/lib/arrow/decimal128-array-builder.rb +21 -25
  10. data/lib/arrow/decimal128-data-type.rb +2 -0
  11. data/lib/arrow/decimal128.rb +18 -0
  12. data/lib/arrow/decimal256-array-builder.rb +61 -0
  13. data/lib/arrow/decimal256-array.rb +25 -0
  14. data/lib/arrow/decimal256-data-type.rb +73 -0
  15. data/lib/arrow/decimal256.rb +60 -0
  16. data/lib/arrow/dictionary-array.rb +24 -0
  17. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  18. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  19. data/lib/arrow/loader.rb +16 -0
  20. data/lib/arrow/raw-table-converter.rb +47 -0
  21. data/lib/arrow/record-batch-iterator.rb +22 -0
  22. data/lib/arrow/record-batch.rb +9 -1
  23. data/lib/arrow/struct-array-builder.rb +13 -7
  24. data/lib/arrow/table-saver.rb +6 -6
  25. data/lib/arrow/table.rb +5 -24
  26. data/lib/arrow/version.rb +1 -1
  27. data/red-arrow.gemspec +1 -0
  28. data/test/raw-records/test-basic-arrays.rb +17 -0
  29. data/test/raw-records/test-dense-union-array.rb +15 -34
  30. data/test/raw-records/test-list-array.rb +20 -0
  31. data/test/raw-records/test-sparse-union-array.rb +15 -33
  32. data/test/raw-records/test-struct-array.rb +15 -0
  33. data/test/test-array.rb +2 -2
  34. data/test/test-bigdecimal.rb +20 -3
  35. data/test/test-buffer.rb +11 -0
  36. data/test/test-decimal128-array-builder.rb +18 -1
  37. data/test/test-decimal128.rb +38 -0
  38. data/test/test-decimal256-array-builder.rb +112 -0
  39. data/test/test-decimal256-array.rb +38 -0
  40. data/test/test-decimal256-data-type.rb +31 -0
  41. data/test/test-decimal256.rb +102 -0
  42. data/test/test-dense-union-data-type.rb +2 -2
  43. data/test/test-dictionary-array.rb +41 -0
  44. data/test/test-feather.rb +1 -1
  45. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  46. data/test/test-fixed-size-binary-array.rb +36 -0
  47. data/test/test-record-batch-iterator.rb +37 -0
  48. data/test/test-record-batch.rb +14 -0
  49. data/test/test-sparse-union-data-type.rb +2 -2
  50. data/test/test-struct-array-builder.rb +16 -12
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +15 -34
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +15 -33
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +96 -56
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba4a00fdcc27e2e75985b0d0da37eff70530aef9431d5234da72517313ebad55
4
- data.tar.gz: 50fb004195b273ec5cbf3e9fa7c92fcb3a7755f010e2ddd469d84c7c27a104df
3
+ metadata.gz: a70c1505c294a8f74c992ec0f2ab5d651647e45dd178db8523f5b5c01e64a541
4
+ data.tar.gz: dad979599033104a25d5be3d05c57e552c803f9c8002d66b757866c425937ce9
5
5
  SHA512:
6
- metadata.gz: 0d0e852a7d0ea89c03ac752e779a1bd5b0ec6a81a10b6b457d66857f2dacb5e445dc41f5254e8985f3d7b8ec6328cedea92bd22758b84a7e935278374e51fcd3
7
- data.tar.gz: 4aa5f3c817f2ef1a475ed7f272f37f495d4f9c66f07a98381a04a6b16ee9b803904f2c0982048003b0bbe6038369d33b3632b6cb437d849d25179b504233e142
6
+ metadata.gz: b2363ba6468985a3d237cbb9cec8b2ccb5031aae94153b8f263916e811524e8a5326f408867a843b7077f624050fabcde93520b26e6f8ceb1dd094aaa0068ed6
7
+ data.tar.gz: 797b89062dfd212d92ca957b8e51c6dc8acf16ab51e5a60ecb0cbf31dd979c288f2afb71274b2235a24d31da344cfce93b40830d6dc6edd000ce0aa607ce8cde
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -285,9 +294,11 @@ namespace red_arrow {
285
294
  // VISIT(Interval)
286
295
  VISIT(List)
287
296
  VISIT(Struct)
288
- VISIT(Union)
297
+ VISIT(SparseUnion)
298
+ VISIT(DenseUnion)
289
299
  VISIT(Dictionary)
290
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
291
302
  // TODO
292
303
  // VISIT(Extension)
293
304
 
@@ -339,9 +350,9 @@ namespace red_arrow {
339
350
  index_ = index;
340
351
  result_ = rb_hash_new();
341
352
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
353
+ const auto n = struct_type->num_fields();
343
354
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
355
+ const auto field_type = struct_type->field(i).get();
345
356
  const auto& field_name = field_type->name();
346
357
  auto key_keep = key_;
347
358
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,9 +399,11 @@ namespace red_arrow {
388
399
  // VISIT(Interval)
389
400
  VISIT(List)
390
401
  VISIT(Struct)
391
- VISIT(Union)
402
+ VISIT(SparseUnion)
403
+ VISIT(DenseUnion)
392
404
  VISIT(Dictionary)
393
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
394
407
  // TODO
395
408
  // VISIT(Extension)
396
409
 
@@ -432,10 +445,10 @@ namespace red_arrow {
432
445
  index_ = index;
433
446
  switch (array.mode()) {
434
447
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
448
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
449
  break;
437
450
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
451
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
452
  break;
440
453
  default:
441
454
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,9 +492,11 @@ namespace red_arrow {
479
492
  // VISIT(Interval)
480
493
  VISIT(List)
481
494
  VISIT(Struct)
482
- VISIT(Union)
495
+ VISIT(SparseUnion)
496
+ VISIT(DenseUnion)
483
497
  VISIT(Dictionary)
484
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
485
500
  // TODO
486
501
  // VISIT(Extension)
487
502
 
@@ -501,14 +516,14 @@ namespace red_arrow {
501
516
  result_ = result;
502
517
  }
503
518
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
519
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
520
  arrow::UnionType* type,
506
521
  const char* tag) {
507
522
  const auto type_code = array.raw_type_codes()[index_];
508
523
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
524
+ const auto field_id = type->child_ids()[type_code];
525
+ if (field_id >= 0) {
526
+ return field_id;
512
527
  }
513
528
  }
514
529
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +531,33 @@ namespace red_arrow {
516
531
  return 0;
517
532
  }
518
533
 
519
- void convert_sparse(const arrow::UnionArray& array) {
534
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
535
  const auto type =
521
536
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
537
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
538
+ const auto index = compute_field_index(array, type, tag);
539
+ const auto field = type->field(index).get();
540
+ const auto& field_name = field->name();
526
541
  const auto field_name_keep = field_name_;
527
542
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
543
+ const auto field_array = array.field(index).get();
544
+ check_status(field_array->Accept(this), tag);
530
545
  field_name_ = field_name_keep;
531
546
  }
532
547
 
533
- void convert_dense(const arrow::UnionArray& array) {
548
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
549
  const auto type =
535
550
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
551
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
552
+ const auto index = compute_field_index(array, type, tag);
553
+ const auto field = type->field(index).get();
554
+ const auto& field_name = field->name();
540
555
  const auto field_name_keep = field_name_;
541
556
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
557
+ const auto field_array = array.field(index);
543
558
  const auto index_keep = index_;
544
559
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
560
+ check_status(field_array->Accept(this), tag);
546
561
  index_ = index_keep;
547
562
  field_name_ = field_name_keep;
548
563
  }
@@ -557,30 +572,58 @@ namespace red_arrow {
557
572
  public:
558
573
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
574
  : array_value_converter_(converter),
560
- index_(0),
575
+ value_index_(0),
561
576
  result_(Qnil) {
562
577
  }
563
578
 
564
579
  VALUE convert(const arrow::DictionaryArray& array,
565
580
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
581
+ value_index_ = array.GetValueIndex(index);
582
+ auto dictionary = array.dictionary().get();
583
+ check_status(dictionary->Accept(this),
569
584
  "[raw-records][dictionary-array]");
570
585
  return result_;
571
586
  }
572
587
 
573
- // TODO: Convert to real value.
574
588
  #define VISIT(TYPE) \
575
589
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
590
+ result_ = convert_value(array, value_index_); \
577
591
  return arrow::Status::OK(); \
578
592
  }
579
593
 
594
+ VISIT(Null)
595
+ VISIT(Boolean)
580
596
  VISIT(Int8)
581
597
  VISIT(Int16)
582
598
  VISIT(Int32)
583
599
  VISIT(Int64)
600
+ VISIT(UInt8)
601
+ VISIT(UInt16)
602
+ VISIT(UInt32)
603
+ VISIT(UInt64)
604
+ // TODO
605
+ // VISIT(HalfFloat)
606
+ VISIT(Float)
607
+ VISIT(Double)
608
+ VISIT(Binary)
609
+ VISIT(String)
610
+ VISIT(FixedSizeBinary)
611
+ VISIT(Date32)
612
+ VISIT(Date64)
613
+ VISIT(Time32)
614
+ VISIT(Time64)
615
+ VISIT(Timestamp)
616
+ // TODO
617
+ // VISIT(Interval)
618
+ VISIT(List)
619
+ VISIT(Struct)
620
+ VISIT(SparseUnion)
621
+ VISIT(DenseUnion)
622
+ VISIT(Dictionary)
623
+ VISIT(Decimal128)
624
+ VISIT(Decimal256)
625
+ // TODO
626
+ // VISIT(Extension)
584
627
 
585
628
  #undef VISIT
586
629
 
@@ -592,7 +635,7 @@ namespace red_arrow {
592
635
  }
593
636
 
594
637
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
638
+ int64_t value_index_;
596
639
  VALUE result_;
597
640
  };
598
641
 
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -100,9 +100,11 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
106
108
  // TODO
107
109
  // VISIT(Extension)
108
110
 
@@ -81,9 +81,11 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
87
89
  // TODO
88
90
  // VISIT(Extension)
89
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
26
+ end
27
+ end
28
+ end
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end