red-arrow 0.17.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/converters.hpp +75 -32
  3. data/ext/arrow/extconf.rb +14 -3
  4. data/ext/arrow/raw-records.cpp +3 -1
  5. data/ext/arrow/values.cpp +3 -1
  6. data/lib/arrow/array-builder.rb +11 -6
  7. data/lib/arrow/bigdecimal-extension.rb +5 -1
  8. data/lib/arrow/buffer.rb +28 -0
  9. data/lib/arrow/decimal128-array-builder.rb +21 -25
  10. data/lib/arrow/decimal128-data-type.rb +2 -0
  11. data/lib/arrow/decimal128.rb +18 -0
  12. data/lib/arrow/decimal256-array-builder.rb +61 -0
  13. data/lib/arrow/decimal256-array.rb +25 -0
  14. data/lib/arrow/decimal256-data-type.rb +73 -0
  15. data/lib/arrow/decimal256.rb +60 -0
  16. data/lib/arrow/dictionary-array.rb +24 -0
  17. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  18. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  19. data/lib/arrow/loader.rb +16 -0
  20. data/lib/arrow/raw-table-converter.rb +47 -0
  21. data/lib/arrow/record-batch-iterator.rb +22 -0
  22. data/lib/arrow/record-batch.rb +9 -1
  23. data/lib/arrow/struct-array-builder.rb +13 -7
  24. data/lib/arrow/table-saver.rb +6 -6
  25. data/lib/arrow/table.rb +5 -24
  26. data/lib/arrow/version.rb +1 -1
  27. data/red-arrow.gemspec +1 -0
  28. data/test/raw-records/test-basic-arrays.rb +17 -0
  29. data/test/raw-records/test-dense-union-array.rb +15 -34
  30. data/test/raw-records/test-list-array.rb +20 -0
  31. data/test/raw-records/test-sparse-union-array.rb +15 -33
  32. data/test/raw-records/test-struct-array.rb +15 -0
  33. data/test/test-array.rb +2 -2
  34. data/test/test-bigdecimal.rb +20 -3
  35. data/test/test-buffer.rb +11 -0
  36. data/test/test-decimal128-array-builder.rb +18 -1
  37. data/test/test-decimal128.rb +38 -0
  38. data/test/test-decimal256-array-builder.rb +112 -0
  39. data/test/test-decimal256-array.rb +38 -0
  40. data/test/test-decimal256-data-type.rb +31 -0
  41. data/test/test-decimal256.rb +102 -0
  42. data/test/test-dense-union-data-type.rb +2 -2
  43. data/test/test-dictionary-array.rb +41 -0
  44. data/test/test-feather.rb +1 -1
  45. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  46. data/test/test-fixed-size-binary-array.rb +36 -0
  47. data/test/test-record-batch-iterator.rb +37 -0
  48. data/test/test-record-batch.rb +14 -0
  49. data/test/test-sparse-union-data-type.rb +2 -2
  50. data/test/test-struct-array-builder.rb +16 -12
  51. data/test/test-struct-array.rb +2 -2
  52. data/test/values/test-basic-arrays.rb +11 -0
  53. data/test/values/test-dense-union-array.rb +15 -34
  54. data/test/values/test-list-array.rb +18 -0
  55. data/test/values/test-sparse-union-array.rb +15 -33
  56. data/test/values/test-struct-array.rb +15 -0
  57. metadata +96 -56
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba4a00fdcc27e2e75985b0d0da37eff70530aef9431d5234da72517313ebad55
4
- data.tar.gz: 50fb004195b273ec5cbf3e9fa7c92fcb3a7755f010e2ddd469d84c7c27a104df
3
+ metadata.gz: a70c1505c294a8f74c992ec0f2ab5d651647e45dd178db8523f5b5c01e64a541
4
+ data.tar.gz: dad979599033104a25d5be3d05c57e552c803f9c8002d66b757866c425937ce9
5
5
  SHA512:
6
- metadata.gz: 0d0e852a7d0ea89c03ac752e779a1bd5b0ec6a81a10b6b457d66857f2dacb5e445dc41f5254e8985f3d7b8ec6328cedea92bd22758b84a7e935278374e51fcd3
7
- data.tar.gz: 4aa5f3c817f2ef1a475ed7f272f37f495d4f9c66f07a98381a04a6b16ee9b803904f2c0982048003b0bbe6038369d33b3632b6cb437d849d25179b504233e142
6
+ metadata.gz: b2363ba6468985a3d237cbb9cec8b2ccb5031aae94153b8f263916e811524e8a5326f408867a843b7077f624050fabcde93520b26e6f8ceb1dd094aaa0068ed6
7
+ data.tar.gz: 797b89062dfd212d92ca957b8e51c6dc8acf16ab51e5a60ecb0cbf31dd979c288f2afb71274b2235a24d31da344cfce93b40830d6dc6edd000ce0aa607ce8cde
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -285,9 +294,11 @@ namespace red_arrow {
285
294
  // VISIT(Interval)
286
295
  VISIT(List)
287
296
  VISIT(Struct)
288
- VISIT(Union)
297
+ VISIT(SparseUnion)
298
+ VISIT(DenseUnion)
289
299
  VISIT(Dictionary)
290
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
291
302
  // TODO
292
303
  // VISIT(Extension)
293
304
 
@@ -339,9 +350,9 @@ namespace red_arrow {
339
350
  index_ = index;
340
351
  result_ = rb_hash_new();
341
352
  const auto struct_type = array.struct_type();
342
- const auto n = struct_type->num_children();
353
+ const auto n = struct_type->num_fields();
343
354
  for (int i = 0; i < n; ++i) {
344
- const auto field_type = struct_type->child(i).get();
355
+ const auto field_type = struct_type->field(i).get();
345
356
  const auto& field_name = field_type->name();
346
357
  auto key_keep = key_;
347
358
  key_ = rb_utf8_str_new(field_name.data(), field_name.length());
@@ -388,9 +399,11 @@ namespace red_arrow {
388
399
  // VISIT(Interval)
389
400
  VISIT(List)
390
401
  VISIT(Struct)
391
- VISIT(Union)
402
+ VISIT(SparseUnion)
403
+ VISIT(DenseUnion)
392
404
  VISIT(Dictionary)
393
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
394
407
  // TODO
395
408
  // VISIT(Extension)
396
409
 
@@ -432,10 +445,10 @@ namespace red_arrow {
432
445
  index_ = index;
433
446
  switch (array.mode()) {
434
447
  case arrow::UnionMode::SPARSE:
435
- convert_sparse(array);
448
+ convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
436
449
  break;
437
450
  case arrow::UnionMode::DENSE:
438
- convert_dense(array);
451
+ convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
439
452
  break;
440
453
  default:
441
454
  rb_raise(rb_eArgError, "Invalid union mode");
@@ -479,9 +492,11 @@ namespace red_arrow {
479
492
  // VISIT(Interval)
480
493
  VISIT(List)
481
494
  VISIT(Struct)
482
- VISIT(Union)
495
+ VISIT(SparseUnion)
496
+ VISIT(DenseUnion)
483
497
  VISIT(Dictionary)
484
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
485
500
  // TODO
486
501
  // VISIT(Extension)
487
502
 
@@ -501,14 +516,14 @@ namespace red_arrow {
501
516
  result_ = result;
502
517
  }
503
518
 
504
- uint8_t compute_child_index(const arrow::UnionArray& array,
519
+ uint8_t compute_field_index(const arrow::UnionArray& array,
505
520
  arrow::UnionType* type,
506
521
  const char* tag) {
507
522
  const auto type_code = array.raw_type_codes()[index_];
508
523
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
509
- const auto child_id = type->child_ids()[type_code];
510
- if (child_id >= 0) {
511
- return child_id;
524
+ const auto field_id = type->child_ids()[type_code];
525
+ if (field_id >= 0) {
526
+ return field_id;
512
527
  }
513
528
  }
514
529
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -516,33 +531,33 @@ namespace red_arrow {
516
531
  return 0;
517
532
  }
518
533
 
519
- void convert_sparse(const arrow::UnionArray& array) {
534
+ void convert_sparse(const arrow::SparseUnionArray& array) {
520
535
  const auto type =
521
536
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
537
  const auto tag = "[raw-records][union-sparse-array]";
523
- const auto child_index = compute_child_index(array, type, tag);
524
- const auto child_field = type->child(child_index).get();
525
- const auto& field_name = child_field->name();
538
+ const auto index = compute_field_index(array, type, tag);
539
+ const auto field = type->field(index).get();
540
+ const auto& field_name = field->name();
526
541
  const auto field_name_keep = field_name_;
527
542
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
- const auto child_array = array.child(child_index).get();
529
- check_status(child_array->Accept(this), tag);
543
+ const auto field_array = array.field(index).get();
544
+ check_status(field_array->Accept(this), tag);
530
545
  field_name_ = field_name_keep;
531
546
  }
532
547
 
533
- void convert_dense(const arrow::UnionArray& array) {
548
+ void convert_dense(const arrow::DenseUnionArray& array) {
534
549
  const auto type =
535
550
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
551
  const auto tag = "[raw-records][union-dense-array]";
537
- const auto child_index = compute_child_index(array, type, tag);
538
- const auto child_field = type->child(child_index).get();
539
- const auto& field_name = child_field->name();
552
+ const auto index = compute_field_index(array, type, tag);
553
+ const auto field = type->field(index).get();
554
+ const auto& field_name = field->name();
540
555
  const auto field_name_keep = field_name_;
541
556
  field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
- const auto child_array = array.child(child_index);
557
+ const auto field_array = array.field(index);
543
558
  const auto index_keep = index_;
544
559
  index_ = array.value_offset(index_);
545
- check_status(child_array->Accept(this), tag);
560
+ check_status(field_array->Accept(this), tag);
546
561
  index_ = index_keep;
547
562
  field_name_ = field_name_keep;
548
563
  }
@@ -557,30 +572,58 @@ namespace red_arrow {
557
572
  public:
558
573
  explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
574
  : array_value_converter_(converter),
560
- index_(0),
575
+ value_index_(0),
561
576
  result_(Qnil) {
562
577
  }
563
578
 
564
579
  VALUE convert(const arrow::DictionaryArray& array,
565
580
  const int64_t index) {
566
- index_ = index;
567
- auto indices = array.indices().get();
568
- check_status(indices->Accept(this),
581
+ value_index_ = array.GetValueIndex(index);
582
+ auto dictionary = array.dictionary().get();
583
+ check_status(dictionary->Accept(this),
569
584
  "[raw-records][dictionary-array]");
570
585
  return result_;
571
586
  }
572
587
 
573
- // TODO: Convert to real value.
574
588
  #define VISIT(TYPE) \
575
589
  arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
- result_ = convert_value(array, index_); \
590
+ result_ = convert_value(array, value_index_); \
577
591
  return arrow::Status::OK(); \
578
592
  }
579
593
 
594
+ VISIT(Null)
595
+ VISIT(Boolean)
580
596
  VISIT(Int8)
581
597
  VISIT(Int16)
582
598
  VISIT(Int32)
583
599
  VISIT(Int64)
600
+ VISIT(UInt8)
601
+ VISIT(UInt16)
602
+ VISIT(UInt32)
603
+ VISIT(UInt64)
604
+ // TODO
605
+ // VISIT(HalfFloat)
606
+ VISIT(Float)
607
+ VISIT(Double)
608
+ VISIT(Binary)
609
+ VISIT(String)
610
+ VISIT(FixedSizeBinary)
611
+ VISIT(Date32)
612
+ VISIT(Date64)
613
+ VISIT(Time32)
614
+ VISIT(Time64)
615
+ VISIT(Timestamp)
616
+ // TODO
617
+ // VISIT(Interval)
618
+ VISIT(List)
619
+ VISIT(Struct)
620
+ VISIT(SparseUnion)
621
+ VISIT(DenseUnion)
622
+ VISIT(Dictionary)
623
+ VISIT(Decimal128)
624
+ VISIT(Decimal256)
625
+ // TODO
626
+ // VISIT(Extension)
584
627
 
585
628
  #undef VISIT
586
629
 
@@ -592,7 +635,7 @@ namespace red_arrow {
592
635
  }
593
636
 
594
637
  ArrayValueConverter* array_value_converter_;
595
- int64_t index_;
638
+ int64_t value_index_;
596
639
  VALUE result_;
597
640
  };
598
641
 
@@ -16,7 +16,8 @@
16
16
  # under the License.
17
17
 
18
18
  require "extpp"
19
- require "mkmf-gnome2"
19
+ require "mkmf-gnome"
20
+ require_relative "../../lib/arrow/version"
20
21
 
21
22
  arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
22
23
  if arrow_pkg_config_path
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
24
25
  ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
25
26
  end
26
27
 
27
- unless required_pkg_config_package("arrow",
28
+ unless required_pkg_config_package([
29
+ "arrow",
30
+ Arrow::Version::MAJOR,
31
+ Arrow::Version::MINOR,
32
+ Arrow::Version::MICRO,
33
+ ],
28
34
  debian: "libarrow-dev",
29
35
  redhat: "arrow-devel",
30
36
  homebrew: "apache-arrow",
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
32
38
  exit(false)
33
39
  end
34
40
 
35
- unless required_pkg_config_package("arrow-glib",
41
+ unless required_pkg_config_package([
42
+ "arrow-glib",
43
+ Arrow::Version::MAJOR,
44
+ Arrow::Version::MINOR,
45
+ Arrow::Version::MICRO,
46
+ ],
36
47
  debian: "libarrow-glib-dev",
37
48
  redhat: "arrow-glib-devel",
38
49
  homebrew: "apache-arrow-glib",
@@ -100,9 +100,11 @@ namespace red_arrow {
100
100
  // VISIT(Interval)
101
101
  VISIT(List)
102
102
  VISIT(Struct)
103
- VISIT(Union)
103
+ VISIT(SparseUnion)
104
+ VISIT(DenseUnion)
104
105
  VISIT(Dictionary)
105
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
106
108
  // TODO
107
109
  // VISIT(Extension)
108
110
 
@@ -81,9 +81,11 @@ namespace red_arrow {
81
81
  // VISIT(Interval)
82
82
  VISIT(List)
83
83
  VISIT(Struct)
84
- VISIT(Union)
84
+ VISIT(SparseUnion)
85
+ VISIT(DenseUnion)
85
86
  VISIT(Dictionary)
86
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
87
89
  // TODO
88
90
  // VISIT(Extension)
89
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end
@@ -19,6 +19,10 @@ require "bigdecimal"
19
19
 
20
20
  class BigDecimal
21
21
  def to_arrow
22
- Arrow::Decimal128.new(to_s)
22
+ if precision <= Arrow::Decimal128DataType::MAX_PRECISION
23
+ Arrow::Decimal128.new(to_s)
24
+ else
25
+ Arrow::Decimal256.new(to_s)
26
+ end
23
27
  end
24
28
  end
@@ -0,0 +1,28 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Buffer
20
+ alias_method :initialize_raw, :initialize
21
+ private :initialize_raw
22
+
23
+ def initialize(data)
24
+ @data = data
25
+ initialize_raw(data)
26
+ end
27
+ end
28
+ end
@@ -26,36 +26,32 @@ module Arrow
26
26
 
27
27
  alias_method :append_value_raw, :append_value
28
28
  def append_value(value)
29
- case value
30
- when nil
31
- return append_null
32
- when String
33
- value = Decimal128.new(value)
34
- when Float
35
- value = Decimal128.new(value.to_s)
36
- when BigDecimal
37
- value = value.to_arrow
38
- end
39
- append_value_raw(value)
29
+ append_value_raw(normalize_value(value))
40
30
  end
41
31
 
32
+ alias_method :append_values_raw, :append_values
42
33
  def append_values(values, is_valids=nil)
43
- if is_valids
44
- is_valids.each_with_index do |is_valid, i|
45
- if is_valid
46
- append_value(values[i])
47
- else
48
- append_null
49
- end
34
+ if values.is_a?(::Array)
35
+ values = values.collect do |value|
36
+ normalize_value(value)
50
37
  end
38
+ append_values_raw(values, is_valids)
51
39
  else
52
- values.each do |value|
53
- if value.nil?
54
- append_null
55
- else
56
- append_value(value)
57
- end
58
- end
40
+ append_values_packed(values, is_valids)
41
+ end
42
+ end
43
+
44
+ private
45
+ def normalize_value(value)
46
+ case value
47
+ when String
48
+ Decimal128.new(value)
49
+ when Float
50
+ Decimal128.new(value.to_s)
51
+ when BigDecimal
52
+ Decimal128.new(value.to_s)
53
+ else
54
+ value
59
55
  end
60
56
  end
61
57
  end
@@ -17,6 +17,8 @@
17
17
 
18
18
  module Arrow
19
19
  class Decimal128DataType
20
+ MAX_PRECISION = max_precision
21
+
20
22
  alias_method :initialize_raw, :initialize
21
23
  private :initialize_raw
22
24
 
@@ -38,5 +38,23 @@ module Arrow
38
38
  to_s_raw
39
39
  end
40
40
  end
41
+
42
+ alias_method :abs!, :abs
43
+
44
+ # @since 3.0.0
45
+ def abs
46
+ copied = dup
47
+ copied.abs!
48
+ copied
49
+ end
50
+
51
+ alias_method :negate!, :negate
52
+
53
+ # @since 3.0.0
54
+ def negate
55
+ copied = dup
56
+ copied.negate!
57
+ copied
58
+ end
41
59
  end
42
60
  end