red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
@@ -28,6 +28,8 @@
28
28
 
29
29
  namespace red_arrow {
30
30
  class ListArrayValueConverter;
31
+ class LargeListArrayValueConverter;
32
+ class FixedSizeListArrayValueConverter;
31
33
  class StructArrayValueConverter;
32
34
  class MapArrayValueConverter;
33
35
  class UnionArrayValueConverter;
@@ -38,6 +40,8 @@ namespace red_arrow {
38
40
  ArrayValueConverter()
39
41
  : decimal_buffer_(),
40
42
  list_array_value_converter_(nullptr),
43
+ large_list_array_value_converter_(nullptr),
44
+ fixed_size_list_array_value_converter_(nullptr),
41
45
  struct_array_value_converter_(nullptr),
42
46
  map_array_value_converter_(nullptr),
43
47
  union_array_value_converter_(nullptr),
@@ -45,11 +49,15 @@ namespace red_arrow {
45
49
  }
46
50
 
47
51
  inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
52
+ LargeListArrayValueConverter* large_list_array_value_converter,
53
+ FixedSizeListArrayValueConverter* fixed_size_list_array_value_converter,
48
54
  StructArrayValueConverter* struct_array_value_converter,
49
55
  MapArrayValueConverter* map_array_value_converter,
50
56
  UnionArrayValueConverter* union_array_value_converter,
51
57
  DictionaryArrayValueConverter* dictionary_array_value_converter) {
52
58
  list_array_value_converter_ = list_array_value_converter;
59
+ large_list_array_value_converter_ = large_list_array_value_converter;
60
+ fixed_size_list_array_value_converter_ = fixed_size_list_array_value_converter;
53
61
  struct_array_value_converter_ = struct_array_value_converter;
54
62
  map_array_value_converter_ = map_array_value_converter;
55
63
  union_array_value_converter_ = union_array_value_converter;
@@ -106,10 +114,34 @@ namespace red_arrow {
106
114
  return ULL2NUM(array.Value(i));
107
115
  }
108
116
 
109
- // TODO
110
- // inline VALUE convert(const arrow::HalfFloatArray& array,
111
- // const int64_t i) {
112
- // }
117
+ inline VALUE convert(const arrow::HalfFloatArray& array,
118
+ const int64_t i) {
119
+ const auto value = array.Value(i);
120
+ // | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
121
+ constexpr auto exponent_n_bits = 5;
122
+ static const auto exponent_mask =
123
+ static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
124
+ constexpr auto exponent_bias = 15;
125
+ constexpr auto fraction_n_bits = 10;
126
+ static const auto fraction_mask =
127
+ static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
128
+ static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
129
+ const auto sign = value >> (exponent_n_bits + fraction_n_bits);
130
+ const auto exponent = (value >> fraction_n_bits) & exponent_mask;
131
+ const auto fraction = value & fraction_mask;
132
+ if (exponent == exponent_mask) {
133
+ if (sign == 0) {
134
+ return DBL2NUM(HUGE_VAL);
135
+ } else {
136
+ return DBL2NUM(-HUGE_VAL);
137
+ }
138
+ } else {
139
+ const auto implicit_fraction = (exponent == 0) ? 0 : 1;
140
+ return DBL2NUM(((sign == 0) ? 1 : -1) *
141
+ std::pow(2.0, exponent - exponent_bias) *
142
+ (implicit_fraction + fraction / fraction_denominator));
143
+ }
144
+ }
113
145
 
114
146
  inline VALUE convert(const arrow::FloatArray& array,
115
147
  const int64_t i) {
@@ -125,7 +157,15 @@ namespace red_arrow {
125
157
  const int64_t i) {
126
158
  int32_t length;
127
159
  const auto value = array.GetValue(i, &length);
128
- // TODO: encoding support
160
+ return rb_enc_str_new(reinterpret_cast<const char*>(value),
161
+ length,
162
+ rb_ascii8bit_encoding());
163
+ }
164
+
165
+ inline VALUE convert(const arrow::LargeBinaryArray& array,
166
+ const int64_t i) {
167
+ int64_t length;
168
+ const auto value = array.GetValue(i, &length);
129
169
  return rb_enc_str_new(reinterpret_cast<const char*>(value),
130
170
  length,
131
171
  rb_ascii8bit_encoding());
@@ -139,6 +179,14 @@ namespace red_arrow {
139
179
  length);
140
180
  }
141
181
 
182
+ inline VALUE convert(const arrow::LargeStringArray& array,
183
+ const int64_t i) {
184
+ int64_t length;
185
+ const auto value = array.GetValue(i, &length);
186
+ return rb_utf8_str_new(reinterpret_cast<const char*>(value),
187
+ length);
188
+ }
189
+
142
190
  inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
143
191
  const int64_t i) {
144
192
  return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
@@ -197,11 +245,6 @@ namespace red_arrow {
197
245
  return rb_time_num_new(sec, Qnil);
198
246
  }
199
247
 
200
- // TODO
201
- // inline VALUE convert(const arrow::IntervalArray& array,
202
- // const int64_t i) {
203
- // };
204
-
205
248
  inline VALUE convert(const arrow::MonthIntervalArray& array,
206
249
  const int64_t i) {
207
250
  return INT2NUM(array.Value(i));
@@ -236,9 +279,20 @@ namespace red_arrow {
236
279
  return value;
237
280
  }
238
281
 
282
+ inline VALUE convert(const arrow::DurationArray& array,
283
+ const int64_t i) {
284
+ return LL2NUM(array.Value(i));
285
+ }
286
+
239
287
  VALUE convert(const arrow::ListArray& array,
240
288
  const int64_t i);
241
289
 
290
+ VALUE convert(const arrow::LargeListArray& array,
291
+ const int64_t i);
292
+
293
+ VALUE convert(const arrow::FixedSizeListArray& array,
294
+ const int64_t i);
295
+
242
296
  VALUE convert(const arrow::StructArray& array,
243
297
  const int64_t i);
244
298
 
@@ -274,6 +328,8 @@ namespace red_arrow {
274
328
 
275
329
  std::string decimal_buffer_;
276
330
  ListArrayValueConverter* list_array_value_converter_;
331
+ LargeListArrayValueConverter* large_list_array_value_converter_;
332
+ FixedSizeListArrayValueConverter* fixed_size_list_array_value_converter_;
277
333
  StructArrayValueConverter* struct_array_value_converter_;
278
334
  MapArrayValueConverter* map_array_value_converter_;
279
335
  UnionArrayValueConverter* union_array_value_converter_;
@@ -320,8 +376,209 @@ namespace red_arrow {
320
376
  VISIT(UInt16)
321
377
  VISIT(UInt32)
322
378
  VISIT(UInt64)
379
+ VISIT(HalfFloat)
380
+ VISIT(Float)
381
+ VISIT(Double)
382
+ VISIT(Binary)
383
+ VISIT(String)
384
+ VISIT(FixedSizeBinary)
385
+ VISIT(Date32)
386
+ VISIT(Date64)
387
+ VISIT(Time32)
388
+ VISIT(Time64)
389
+ VISIT(Timestamp)
390
+ VISIT(MonthInterval)
391
+ VISIT(DayTimeInterval)
392
+ VISIT(MonthDayNanoInterval)
393
+ VISIT(Duration)
394
+ VISIT(List)
395
+ VISIT(LargeList)
396
+ VISIT(FixedSizeList)
397
+ VISIT(Struct)
398
+ VISIT(Map)
399
+ VISIT(SparseUnion)
400
+ VISIT(DenseUnion)
401
+ VISIT(Dictionary)
402
+ VISIT(Decimal128)
403
+ VISIT(Decimal256)
404
+ // TODO
405
+ // VISIT(Extension)
406
+
407
+ #undef VISIT
408
+
409
+ private:
410
+ template <typename ArrayType>
411
+ inline VALUE convert_value(const ArrayType& array,
412
+ const int64_t i) {
413
+ return array_value_converter_->convert(array, i);
414
+ }
415
+
416
+ template <typename ArrayType>
417
+ arrow::Status visit_value(const ArrayType& array) {
418
+ if (array.null_count() > 0) {
419
+ for (int64_t i = 0; i < length_; ++i) {
420
+ auto value = Qnil;
421
+ if (!array.IsNull(i + offset_)) {
422
+ value = convert_value(array, i + offset_);
423
+ }
424
+ rb_ary_push(result_, value);
425
+ }
426
+ } else {
427
+ for (int64_t i = 0; i < length_; ++i) {
428
+ rb_ary_push(result_, convert_value(array, i + offset_));
429
+ }
430
+ }
431
+ return arrow::Status::OK();
432
+ }
433
+
434
+ ArrayValueConverter* array_value_converter_;
435
+ int32_t offset_;
436
+ int32_t length_;
437
+ VALUE result_;
438
+ };
439
+
440
+ class LargeListArrayValueConverter : public arrow::ArrayVisitor {
441
+ public:
442
+ explicit LargeListArrayValueConverter(ArrayValueConverter* converter)
443
+ : array_value_converter_(converter),
444
+ offset_(0),
445
+ length_(0),
446
+ result_(Qnil) {}
447
+
448
+ VALUE convert(const arrow::LargeListArray& array, const int64_t index) {
449
+ auto values = array.values().get();
450
+ auto offset_keep = offset_;
451
+ auto length_keep = length_;
452
+ offset_ = array.value_offset(index);
453
+ length_ = array.value_length(index);
454
+ auto result_keep = result_;
455
+ result_ = rb_ary_new_capa(length_);
456
+ check_status(values->Accept(this),
457
+ "[raw-records][large-list-array]");
458
+ offset_ = offset_keep;
459
+ length_ = length_keep;
460
+ auto result_return = result_;
461
+ result_ = result_keep;
462
+ return result_return;
463
+ }
464
+
465
+ #define VISIT(TYPE) \
466
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
467
+ return visit_value(array); \
468
+ }
469
+
470
+ VISIT(Null)
471
+ VISIT(Boolean)
472
+ VISIT(Int8)
473
+ VISIT(Int16)
474
+ VISIT(Int32)
475
+ VISIT(Int64)
476
+ VISIT(UInt8)
477
+ VISIT(UInt16)
478
+ VISIT(UInt32)
479
+ VISIT(UInt64)
480
+ VISIT(HalfFloat)
481
+ VISIT(Float)
482
+ VISIT(Double)
483
+ VISIT(Binary)
484
+ VISIT(String)
485
+ VISIT(FixedSizeBinary)
486
+ VISIT(Date32)
487
+ VISIT(Date64)
488
+ VISIT(Time32)
489
+ VISIT(Time64)
490
+ VISIT(Timestamp)
491
+ VISIT(MonthInterval)
492
+ VISIT(DayTimeInterval)
493
+ VISIT(MonthDayNanoInterval)
494
+ VISIT(Duration)
495
+ VISIT(List)
496
+ VISIT(LargeList)
497
+ VISIT(FixedSizeList)
498
+ VISIT(Struct)
499
+ VISIT(Map)
500
+ VISIT(SparseUnion)
501
+ VISIT(DenseUnion)
502
+ VISIT(Dictionary)
503
+ VISIT(Decimal128)
504
+ VISIT(Decimal256)
323
505
  // TODO
324
- // VISIT(HalfFloat)
506
+ // VISIT(Extension)
507
+
508
+ #undef VISIT
509
+
510
+ private:
511
+ template <typename ArrayType>
512
+ inline VALUE convert_value(const ArrayType& array,
513
+ const int64_t i) {
514
+ return array_value_converter_->convert(array, i);
515
+ }
516
+
517
+ template <typename ArrayType>
518
+ arrow::Status visit_value(const ArrayType& array) {
519
+ if (array.null_count() > 0) {
520
+ for (int64_t i = 0; i < length_; ++i) {
521
+ auto value = Qnil;
522
+ if (!array.IsNull(i + offset_)) {
523
+ value = convert_value(array, i + offset_);
524
+ }
525
+ rb_ary_push(result_, value);
526
+ }
527
+ } else {
528
+ for (int64_t i = 0; i < length_; ++i) {
529
+ rb_ary_push(result_, convert_value(array, i + offset_));
530
+ }
531
+ }
532
+ return arrow::Status::OK();
533
+ }
534
+
535
+ ArrayValueConverter* array_value_converter_;
536
+ int32_t offset_;
537
+ int32_t length_;
538
+ VALUE result_;
539
+ };
540
+
541
+ class FixedSizeListArrayValueConverter : public arrow::ArrayVisitor {
542
+ public:
543
+ explicit FixedSizeListArrayValueConverter(ArrayValueConverter* converter)
544
+ : array_value_converter_(converter),
545
+ offset_(0),
546
+ length_(0),
547
+ result_(Qnil) {}
548
+
549
+ VALUE convert(const arrow::FixedSizeListArray& array, const int64_t index) {
550
+ auto values = array.values().get();
551
+ auto offset_keep = offset_;
552
+ auto length_keep = length_;
553
+ offset_ = array.value_offset(index);
554
+ length_ = array.value_length(index);
555
+ auto result_keep = result_;
556
+ result_ = rb_ary_new_capa(length_);
557
+ check_status(values->Accept(this),
558
+ "[raw-records][fixed-size-list-array]");
559
+ offset_ = offset_keep;
560
+ length_ = length_keep;
561
+ auto result_return = result_;
562
+ result_ = result_keep;
563
+ return result_return;
564
+ }
565
+
566
+ #define VISIT(TYPE) \
567
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
568
+ return visit_value(array); \
569
+ }
570
+
571
+ VISIT(Null)
572
+ VISIT(Boolean)
573
+ VISIT(Int8)
574
+ VISIT(Int16)
575
+ VISIT(Int32)
576
+ VISIT(Int64)
577
+ VISIT(UInt8)
578
+ VISIT(UInt16)
579
+ VISIT(UInt32)
580
+ VISIT(UInt64)
581
+ VISIT(HalfFloat)
325
582
  VISIT(Float)
326
583
  VISIT(Double)
327
584
  VISIT(Binary)
@@ -335,7 +592,10 @@ namespace red_arrow {
335
592
  VISIT(MonthInterval)
336
593
  VISIT(DayTimeInterval)
337
594
  VISIT(MonthDayNanoInterval)
595
+ VISIT(Duration)
338
596
  VISIT(List)
597
+ VISIT(LargeList)
598
+ VISIT(FixedSizeList)
339
599
  VISIT(Struct)
340
600
  VISIT(Map)
341
601
  VISIT(SparseUnion)
@@ -427,8 +687,7 @@ namespace red_arrow {
427
687
  VISIT(UInt16)
428
688
  VISIT(UInt32)
429
689
  VISIT(UInt64)
430
- // TODO
431
- // VISIT(HalfFloat)
690
+ VISIT(HalfFloat)
432
691
  VISIT(Float)
433
692
  VISIT(Double)
434
693
  VISIT(Binary)
@@ -442,7 +701,10 @@ namespace red_arrow {
442
701
  VISIT(MonthInterval)
443
702
  VISIT(DayTimeInterval)
444
703
  VISIT(MonthDayNanoInterval)
704
+ VISIT(Duration)
445
705
  VISIT(List)
706
+ VISIT(LargeList)
707
+ VISIT(FixedSizeList)
446
708
  VISIT(Struct)
447
709
  VISIT(Map)
448
710
  VISIT(SparseUnion)
@@ -530,8 +792,7 @@ namespace red_arrow {
530
792
  VISIT(UInt16)
531
793
  VISIT(UInt32)
532
794
  VISIT(UInt64)
533
- // TODO
534
- // VISIT(HalfFloat)
795
+ VISIT(HalfFloat)
535
796
  VISIT(Float)
536
797
  VISIT(Double)
537
798
  VISIT(Binary)
@@ -545,7 +806,10 @@ namespace red_arrow {
545
806
  VISIT(MonthInterval)
546
807
  VISIT(DayTimeInterval)
547
808
  VISIT(MonthDayNanoInterval)
809
+ VISIT(Duration)
548
810
  VISIT(List)
811
+ VISIT(LargeList)
812
+ VISIT(FixedSizeList)
549
813
  VISIT(Struct)
550
814
  VISIT(Map)
551
815
  VISIT(SparseUnion)
@@ -634,8 +898,7 @@ namespace red_arrow {
634
898
  VISIT(UInt16)
635
899
  VISIT(UInt32)
636
900
  VISIT(UInt64)
637
- // TODO
638
- // VISIT(HalfFloat)
901
+ VISIT(HalfFloat)
639
902
  VISIT(Float)
640
903
  VISIT(Double)
641
904
  VISIT(Binary)
@@ -649,7 +912,10 @@ namespace red_arrow {
649
912
  VISIT(MonthInterval)
650
913
  VISIT(DayTimeInterval)
651
914
  VISIT(MonthDayNanoInterval)
915
+ VISIT(Duration)
652
916
  VISIT(List)
917
+ VISIT(LargeList)
918
+ VISIT(FixedSizeList)
653
919
  VISIT(Struct)
654
920
  VISIT(Map)
655
921
  VISIT(SparseUnion)
@@ -665,25 +931,21 @@ namespace red_arrow {
665
931
  private:
666
932
  template <typename ArrayType>
667
933
  inline void convert_value(const ArrayType& array) {
668
- auto result = rb_hash_new();
669
934
  if (array.IsNull(index_)) {
670
- rb_hash_aset(result, field_name_, Qnil);
935
+ result_ = RUBY_Qnil;
671
936
  } else {
672
- rb_hash_aset(result,
673
- field_name_,
674
- array_value_converter_->convert(array, index_));
937
+ result_ = array_value_converter_->convert(array, index_);
675
938
  }
676
- result_ = result;
677
939
  }
678
940
 
679
- uint8_t compute_field_index(const arrow::UnionArray& array,
680
- arrow::UnionType* type,
681
- const char* tag) {
941
+ int8_t compute_child_id(const arrow::UnionArray& array,
942
+ arrow::UnionType* type,
943
+ const char* tag) {
682
944
  const auto type_code = array.raw_type_codes()[index_];
683
945
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
684
- const auto field_id = type->child_ids()[type_code];
685
- if (field_id >= 0) {
686
- return field_id;
946
+ const auto child_id = type->child_ids()[type_code];
947
+ if (child_id >= 0) {
948
+ return child_id;
687
949
  }
688
950
  }
689
951
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -695,36 +957,25 @@ namespace red_arrow {
695
957
  const auto type =
696
958
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
697
959
  const auto tag = "[raw-records][union-sparse-array]";
698
- const auto index = compute_field_index(array, type, tag);
699
- const auto field = type->field(index).get();
700
- const auto& field_name = field->name();
701
- const auto field_name_keep = field_name_;
702
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
703
- const auto field_array = array.field(index).get();
960
+ const auto child_id = compute_child_id(array, type, tag);
961
+ const auto field_array = array.field(child_id).get();
704
962
  check_status(field_array->Accept(this), tag);
705
- field_name_ = field_name_keep;
706
963
  }
707
964
 
708
965
  void convert_dense(const arrow::DenseUnionArray& array) {
709
966
  const auto type =
710
967
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
711
968
  const auto tag = "[raw-records][union-dense-array]";
712
- const auto index = compute_field_index(array, type, tag);
713
- const auto field = type->field(index).get();
714
- const auto& field_name = field->name();
715
- const auto field_name_keep = field_name_;
716
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
717
- const auto field_array = array.field(index);
969
+ const auto child_id = compute_child_id(array, type, tag);
970
+ const auto field_array = array.field(child_id);
718
971
  const auto index_keep = index_;
719
972
  index_ = array.value_offset(index_);
720
973
  check_status(field_array->Accept(this), tag);
721
974
  index_ = index_keep;
722
- field_name_ = field_name_keep;
723
975
  }
724
976
 
725
977
  ArrayValueConverter* array_value_converter_;
726
978
  int64_t index_;
727
- VALUE field_name_;
728
979
  VALUE result_;
729
980
  };
730
981
 
@@ -761,19 +1012,26 @@ namespace red_arrow {
761
1012
  VISIT(UInt16)
762
1013
  VISIT(UInt32)
763
1014
  VISIT(UInt64)
764
- // TODO
765
- // VISIT(HalfFloat)
1015
+ VISIT(HalfFloat)
766
1016
  VISIT(Float)
767
1017
  VISIT(Double)
768
1018
  VISIT(Binary)
1019
+ VISIT(LargeBinary)
769
1020
  VISIT(String)
1021
+ VISIT(LargeString)
770
1022
  VISIT(FixedSizeBinary)
771
1023
  VISIT(Date32)
772
1024
  VISIT(Date64)
773
1025
  VISIT(Time32)
774
1026
  VISIT(Time64)
775
1027
  VISIT(Timestamp)
1028
+ VISIT(MonthInterval)
1029
+ VISIT(DayTimeInterval)
1030
+ VISIT(MonthDayNanoInterval)
1031
+ VISIT(Duration)
776
1032
  VISIT(List)
1033
+ VISIT(LargeList)
1034
+ VISIT(FixedSizeList)
777
1035
  VISIT(Struct)
778
1036
  VISIT(Map)
779
1037
  VISIT(SparseUnion)
@@ -803,12 +1061,16 @@ namespace red_arrow {
803
1061
  explicit Converter()
804
1062
  : array_value_converter_(),
805
1063
  list_array_value_converter_(&array_value_converter_),
1064
+ large_list_array_value_converter_(&array_value_converter_),
1065
+ fixed_size_list_array_value_converter_(&array_value_converter_),
806
1066
  struct_array_value_converter_(&array_value_converter_),
807
1067
  map_array_value_converter_(&array_value_converter_),
808
1068
  union_array_value_converter_(&array_value_converter_),
809
1069
  dictionary_array_value_converter_(&array_value_converter_) {
810
1070
  array_value_converter_.
811
1071
  set_sub_value_converters(&list_array_value_converter_,
1072
+ &large_list_array_value_converter_,
1073
+ &fixed_size_list_array_value_converter_,
812
1074
  &struct_array_value_converter_,
813
1075
  &map_array_value_converter_,
814
1076
  &union_array_value_converter_,
@@ -823,6 +1085,8 @@ namespace red_arrow {
823
1085
 
824
1086
  ArrayValueConverter array_value_converter_;
825
1087
  ListArrayValueConverter list_array_value_converter_;
1088
+ LargeListArrayValueConverter large_list_array_value_converter_;
1089
+ FixedSizeListArrayValueConverter fixed_size_list_array_value_converter_;
826
1090
  StructArrayValueConverter struct_array_value_converter_;
827
1091
  MapArrayValueConverter map_array_value_converter_;
828
1092
  UnionArrayValueConverter union_array_value_converter_;
data/ext/arrow/extconf.rb CHANGED
@@ -38,32 +38,37 @@ checking_for(checking_message("Homebrew")) do
38
38
  end
39
39
  end
40
40
 
41
- unless required_pkg_config_package([
42
- "arrow",
43
- Arrow::Version::MAJOR,
44
- Arrow::Version::MINOR,
45
- Arrow::Version::MICRO,
46
- ],
47
- debian: "libarrow-dev",
48
- redhat: "arrow-devel",
49
- homebrew: "apache-arrow",
50
- msys2: "arrow")
51
- exit(false)
41
+ unless PKGConfig.have_package("arrow", Arrow::Version::MAJOR)
42
+ raise <<-MESSAGE
43
+ Apache Arrow C++ >= #{Arrow::Version::MAJOR} isn't found.
44
+ You can install it automatically by enabling rubygems-requirements-system.
45
+ See https://github.com/ruby-gnome/rubygems-requirements-system/ how to enable it.
46
+ MESSAGE
52
47
  end
53
48
 
54
- unless required_pkg_config_package([
55
- "arrow-glib",
56
- Arrow::Version::MAJOR,
57
- Arrow::Version::MINOR,
58
- Arrow::Version::MICRO,
59
- ],
60
- debian: "libarrow-glib-dev",
61
- redhat: "arrow-glib-devel",
62
- homebrew: "apache-arrow-glib",
63
- msys2: "arrow")
64
- exit(false)
49
+ unless PKGConfig.have_package("arrow-glib",
50
+ Arrow::Version::MAJOR,
51
+ Arrow::Version::MINOR,
52
+ Arrow::Version::MICRO)
53
+ version = [
54
+ Arrow::Version::MAJOR,
55
+ Arrow::Version::MINOR,
56
+ Arrow::Version::MICRO,
57
+ ].join(".")
58
+ raise <<-MESSAGE
59
+ Apache Arrow GLib >= #{version} isn't found.
60
+ You can install it automatically by enabling rubygems-requirements-system.
61
+ See https://github.com/ruby-gnome/rubygems-requirements-system/ how to enable it.
62
+ MESSAGE
65
63
  end
66
64
 
65
+ # Old re2.pc (e.g. re2.pc on Ubuntu 20.04) may add -std=c++11. It
66
+ # causes a build error because Apache Arrow C++ requires C++17 or
67
+ # later.
68
+ #
69
+ # We can remove this when we drop support for Ubuntu 20.04.
70
+ $CXXFLAGS.gsub!("-std=c++11", "")
71
+
67
72
  [
68
73
  ["glib2", "ext/glib2"],
69
74
  ].each do |name, relative_source_dir|
@@ -73,4 +78,18 @@ end
73
78
  add_depend_package_path(name, source_dir, build_dir)
74
79
  end
75
80
 
81
+ case RUBY_PLATFORM
82
+ when /darwin/
83
+ symbols_in_external_bundles = [
84
+ "_rbgerr_gerror2exception",
85
+ "_rbgobj_instance_from_ruby_object",
86
+ ]
87
+ symbols_in_external_bundles.each do |symbol|
88
+ $DLDFLAGS << " -Wl,-U,#{symbol}"
89
+ end
90
+ mmacosx_version_min = "-mmacosx-version-min=12.0"
91
+ $CFLAGS << " #{mmacosx_version_min}"
92
+ $CXXFLAGS << " #{mmacosx_version_min}"
93
+ end
94
+
76
95
  create_makefile("arrow")