red-arrow 10.0.0 → 16.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/ext/arrow/arrow.cpp +31 -0
  4. data/ext/arrow/converters.hpp +45 -41
  5. data/ext/arrow/extconf.rb +16 -4
  6. data/ext/arrow/raw-records.cpp +155 -2
  7. data/ext/arrow/red-arrow.hpp +2 -0
  8. data/ext/arrow/values.cpp +1 -2
  9. data/lib/arrow/array-computable.rb +13 -0
  10. data/lib/arrow/array.rb +6 -1
  11. data/lib/arrow/chunked-array.rb +35 -1
  12. data/lib/arrow/column-containable.rb +9 -0
  13. data/lib/arrow/column.rb +1 -0
  14. data/lib/arrow/data-type.rb +9 -0
  15. data/lib/arrow/dense-union-array-builder.rb +49 -0
  16. data/lib/arrow/dense-union-array.rb +26 -0
  17. data/lib/arrow/expression.rb +6 -2
  18. data/lib/arrow/function.rb +0 -1
  19. data/lib/arrow/half-float-array-builder.rb +32 -0
  20. data/lib/arrow/half-float-array.rb +24 -0
  21. data/lib/arrow/half-float.rb +118 -0
  22. data/lib/arrow/input-referable.rb +29 -0
  23. data/lib/arrow/loader.rb +11 -0
  24. data/lib/arrow/raw-table-converter.rb +7 -5
  25. data/lib/arrow/record-batch-file-reader.rb +2 -0
  26. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  27. data/lib/arrow/record-batch.rb +6 -2
  28. data/lib/arrow/scalar.rb +67 -0
  29. data/lib/arrow/slicer.rb +61 -0
  30. data/lib/arrow/sort-key.rb +3 -3
  31. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  32. data/lib/arrow/sparse-union-array.rb +26 -0
  33. data/lib/arrow/struct-array-builder.rb +0 -5
  34. data/lib/arrow/table-loader.rb +11 -5
  35. data/lib/arrow/table-saver.rb +1 -0
  36. data/lib/arrow/table.rb +180 -33
  37. data/lib/arrow/tensor.rb +4 -0
  38. data/lib/arrow/timestamp-parser.rb +33 -0
  39. data/lib/arrow/union-array-builder.rb +59 -0
  40. data/lib/arrow/version.rb +1 -1
  41. data/red-arrow.gemspec +2 -1
  42. data/test/each-raw-record/test-basic-arrays.rb +411 -0
  43. data/test/each-raw-record/test-dense-union-array.rb +566 -0
  44. data/test/each-raw-record/test-dictionary-array.rb +341 -0
  45. data/test/each-raw-record/test-list-array.rb +628 -0
  46. data/test/each-raw-record/test-map-array.rb +507 -0
  47. data/test/each-raw-record/test-multiple-columns.rb +72 -0
  48. data/test/each-raw-record/test-sparse-union-array.rb +528 -0
  49. data/test/each-raw-record/test-struct-array.rb +529 -0
  50. data/test/each-raw-record/test-table.rb +47 -0
  51. data/test/helper/omittable.rb +13 -0
  52. data/test/helper.rb +1 -0
  53. data/test/raw-records/test-basic-arrays.rb +11 -1
  54. data/test/raw-records/test-dense-union-array.rb +90 -45
  55. data/test/raw-records/test-list-array.rb +28 -10
  56. data/test/raw-records/test-map-array.rb +39 -10
  57. data/test/raw-records/test-sparse-union-array.rb +86 -41
  58. data/test/raw-records/test-struct-array.rb +22 -8
  59. data/test/test-array.rb +7 -0
  60. data/test/test-chunked-array.rb +9 -0
  61. data/test/test-csv-loader.rb +39 -0
  62. data/test/test-data-type.rb +2 -1
  63. data/test/test-dense-union-array.rb +42 -0
  64. data/test/test-dense-union-data-type.rb +1 -1
  65. data/test/test-expression.rb +11 -0
  66. data/test/test-function.rb +7 -7
  67. data/test/test-group.rb +58 -58
  68. data/test/test-half-float-array.rb +43 -0
  69. data/test/test-half-float.rb +130 -0
  70. data/test/test-ractor.rb +34 -0
  71. data/test/test-record-batch-file-reader.rb +21 -0
  72. data/test/test-record-batch-stream-reader.rb +129 -0
  73. data/test/test-scalar.rb +65 -0
  74. data/test/test-slicer.rb +194 -129
  75. data/test/test-sparse-union-array.rb +38 -0
  76. data/test/test-table.rb +356 -40
  77. data/test/values/test-basic-arrays.rb +10 -0
  78. data/test/values/test-dense-union-array.rb +88 -45
  79. data/test/values/test-list-array.rb +26 -10
  80. data/test/values/test-map-array.rb +33 -10
  81. data/test/values/test-sparse-union-array.rb +84 -41
  82. data/test/values/test-struct-array.rb +20 -8
  83. metadata +62 -9
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dc23de72fe60f7f6b71d45d4ec9019b0066da0de1aaef7992af984fa3ee14db3
4
- data.tar.gz: 5383f405645f394c8af4b35ce537a82a54b207823a1debf755d09ef645bcbe15
3
+ metadata.gz: 021e0bb1714a4dbaf54622b66dcea24682f78d2045497a8109148620f6090e83
4
+ data.tar.gz: 8c61eedc5f5f94011b3c7a37aed40583c8b94f36e2e91f86a1d8c76fbe768a5c
5
5
  SHA512:
6
- metadata.gz: cf9d0c9274059ab09a91af39573c5e4b1a902ca486038e61aff882441d725592bc4143821ba82607979ff09ea7cc9e5220efda1f12478e28b2cf9a9c89cff2d2
7
- data.tar.gz: 4225c79d4eeb6770175624ef0eb4e90e263715a1d0b817bf34898ad179a85325f0d15e8fca6697d94870b6147bb965ab1644170fdae52eab07c37396214e5549
6
+ metadata.gz: 73e30cc705260b29ab70be6f98ad30075a5d6c07878c4af25b1650d48d9d7b5df6a7884eaf0db98f4e20cfa8ca0e3a25c49c625fe64f5c8aa4bd529dd10c9b8c
7
+ data.tar.gz: 30c65035dcf46283e7e64533604c16dcba2dd1b54e00bb8b7dfd10e9ecb2c8440f32a5cfc6adf24bcd7bee5a159aeba110d180f1bdd2d5746f28f66cabb696a8
data/README.md CHANGED
@@ -25,9 +25,9 @@ Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject In
25
25
 
26
26
  [GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
27
27
 
28
- Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
28
+ Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
29
29
 
30
- Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
30
+ Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
31
31
 
32
32
  gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem.
33
33
 
@@ -56,7 +56,7 @@ table.save("/dev/shm/data-processed.arrow")
56
56
  Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
57
57
 
58
58
  * For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
59
- * For Apache Arrow GLib: https://github.com/apache/arrow/blob/master/c_glib/README.md
59
+ * For Apache Arrow GLib: https://github.com/apache/arrow/blob/main/c_glib/README.md
60
60
 
61
61
  ```console
62
62
  $ cd ruby/red-arrow
data/ext/arrow/arrow.cpp CHANGED
@@ -43,6 +43,26 @@ namespace red_arrow {
43
43
  VALUE month;
44
44
  VALUE nanosecond;
45
45
  }
46
+
47
+ void
48
+ record_batch_reader_mark(gpointer object)
49
+ {
50
+ auto reader = GARROW_RECORD_BATCH_READER(object);
51
+ auto sources = garrow_record_batch_reader_get_sources(reader);
52
+ for (auto source = sources; sources; sources = g_list_next(sources)) {
53
+ rbgobj_gc_mark_instance(source->data);
54
+ }
55
+ }
56
+
57
+ void
58
+ execute_plan_mark(gpointer object)
59
+ {
60
+ auto plan = GARROW_EXECUTE_PLAN(object);
61
+ auto nodes = garrow_execute_plan_get_nodes(plan);
62
+ for (auto node = nodes; nodes; nodes = g_list_next(nodes)) {
63
+ rbgobj_gc_mark_instance(node->data);
64
+ }
65
+ }
46
66
  }
47
67
 
48
68
  extern "C" void Init_arrow() {
@@ -62,11 +82,17 @@ extern "C" void Init_arrow() {
62
82
  rb_define_method(cArrowRecordBatch, "raw_records",
63
83
  reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
64
84
  0);
85
+ rb_define_method(cArrowRecordBatch, "each_raw_record",
86
+ reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_each_raw_record),
87
+ 0);
65
88
 
66
89
  auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
67
90
  rb_define_method(cArrowTable, "raw_records",
68
91
  reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
69
92
  0);
93
+ rb_define_method(cArrowTable, "each_raw_record",
94
+ reinterpret_cast<rb::RawMethod>(red_arrow::table_each_raw_record),
95
+ 0);
70
96
 
71
97
  red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
72
98
 
@@ -93,4 +119,9 @@ extern "C" void Init_arrow() {
93
119
  red_arrow::symbols::millisecond = ID2SYM(rb_intern("millisecond"));
94
120
  red_arrow::symbols::month = ID2SYM(rb_intern("month"));
95
121
  red_arrow::symbols::nanosecond = ID2SYM(rb_intern("nanosecond"));
122
+
123
+ rbgobj_register_mark_func(GARROW_TYPE_RECORD_BATCH_READER,
124
+ red_arrow::record_batch_reader_mark);
125
+ rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN,
126
+ red_arrow::execute_plan_mark);
96
127
  }
@@ -106,10 +106,34 @@ namespace red_arrow {
106
106
  return ULL2NUM(array.Value(i));
107
107
  }
108
108
 
109
- // TODO
110
- // inline VALUE convert(const arrow::HalfFloatArray& array,
111
- // const int64_t i) {
112
- // }
109
+ inline VALUE convert(const arrow::HalfFloatArray& array,
110
+ const int64_t i) {
111
+ const auto value = array.Value(i);
112
+ // | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
113
+ constexpr auto exponent_n_bits = 5;
114
+ static const auto exponent_mask =
115
+ static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
116
+ constexpr auto exponent_bias = 15;
117
+ constexpr auto fraction_n_bits = 10;
118
+ static const auto fraction_mask =
119
+ static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
120
+ static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
121
+ const auto sign = value >> (exponent_n_bits + fraction_n_bits);
122
+ const auto exponent = (value >> fraction_n_bits) & exponent_mask;
123
+ const auto fraction = value & fraction_mask;
124
+ if (exponent == exponent_mask) {
125
+ if (sign == 0) {
126
+ return DBL2NUM(HUGE_VAL);
127
+ } else {
128
+ return DBL2NUM(-HUGE_VAL);
129
+ }
130
+ } else {
131
+ const auto implicit_fraction = (exponent == 0) ? 0 : 1;
132
+ return DBL2NUM(((sign == 0) ? 1 : -1) *
133
+ std::pow(2.0, exponent - exponent_bias) *
134
+ (implicit_fraction + fraction / fraction_denominator));
135
+ }
136
+ }
113
137
 
114
138
  inline VALUE convert(const arrow::FloatArray& array,
115
139
  const int64_t i) {
@@ -320,8 +344,7 @@ namespace red_arrow {
320
344
  VISIT(UInt16)
321
345
  VISIT(UInt32)
322
346
  VISIT(UInt64)
323
- // TODO
324
- // VISIT(HalfFloat)
347
+ VISIT(HalfFloat)
325
348
  VISIT(Float)
326
349
  VISIT(Double)
327
350
  VISIT(Binary)
@@ -427,8 +450,7 @@ namespace red_arrow {
427
450
  VISIT(UInt16)
428
451
  VISIT(UInt32)
429
452
  VISIT(UInt64)
430
- // TODO
431
- // VISIT(HalfFloat)
453
+ VISIT(HalfFloat)
432
454
  VISIT(Float)
433
455
  VISIT(Double)
434
456
  VISIT(Binary)
@@ -530,8 +552,7 @@ namespace red_arrow {
530
552
  VISIT(UInt16)
531
553
  VISIT(UInt32)
532
554
  VISIT(UInt64)
533
- // TODO
534
- // VISIT(HalfFloat)
555
+ VISIT(HalfFloat)
535
556
  VISIT(Float)
536
557
  VISIT(Double)
537
558
  VISIT(Binary)
@@ -634,8 +655,7 @@ namespace red_arrow {
634
655
  VISIT(UInt16)
635
656
  VISIT(UInt32)
636
657
  VISIT(UInt64)
637
- // TODO
638
- // VISIT(HalfFloat)
658
+ VISIT(HalfFloat)
639
659
  VISIT(Float)
640
660
  VISIT(Double)
641
661
  VISIT(Binary)
@@ -665,25 +685,21 @@ namespace red_arrow {
665
685
  private:
666
686
  template <typename ArrayType>
667
687
  inline void convert_value(const ArrayType& array) {
668
- auto result = rb_hash_new();
669
688
  if (array.IsNull(index_)) {
670
- rb_hash_aset(result, field_name_, Qnil);
689
+ result_ = RUBY_Qnil;
671
690
  } else {
672
- rb_hash_aset(result,
673
- field_name_,
674
- array_value_converter_->convert(array, index_));
691
+ result_ = array_value_converter_->convert(array, index_);
675
692
  }
676
- result_ = result;
677
693
  }
678
694
 
679
- uint8_t compute_field_index(const arrow::UnionArray& array,
680
- arrow::UnionType* type,
681
- const char* tag) {
695
+ int8_t compute_child_id(const arrow::UnionArray& array,
696
+ arrow::UnionType* type,
697
+ const char* tag) {
682
698
  const auto type_code = array.raw_type_codes()[index_];
683
699
  if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
684
- const auto field_id = type->child_ids()[type_code];
685
- if (field_id >= 0) {
686
- return field_id;
700
+ const auto child_id = type->child_ids()[type_code];
701
+ if (child_id >= 0) {
702
+ return child_id;
687
703
  }
688
704
  }
689
705
  check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
@@ -695,36 +711,25 @@ namespace red_arrow {
695
711
  const auto type =
696
712
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
697
713
  const auto tag = "[raw-records][union-sparse-array]";
698
- const auto index = compute_field_index(array, type, tag);
699
- const auto field = type->field(index).get();
700
- const auto& field_name = field->name();
701
- const auto field_name_keep = field_name_;
702
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
703
- const auto field_array = array.field(index).get();
714
+ const auto child_id = compute_child_id(array, type, tag);
715
+ const auto field_array = array.field(child_id).get();
704
716
  check_status(field_array->Accept(this), tag);
705
- field_name_ = field_name_keep;
706
717
  }
707
718
 
708
719
  void convert_dense(const arrow::DenseUnionArray& array) {
709
720
  const auto type =
710
721
  std::static_pointer_cast<arrow::UnionType>(array.type()).get();
711
722
  const auto tag = "[raw-records][union-dense-array]";
712
- const auto index = compute_field_index(array, type, tag);
713
- const auto field = type->field(index).get();
714
- const auto& field_name = field->name();
715
- const auto field_name_keep = field_name_;
716
- field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
717
- const auto field_array = array.field(index);
723
+ const auto child_id = compute_child_id(array, type, tag);
724
+ const auto field_array = array.field(child_id);
718
725
  const auto index_keep = index_;
719
726
  index_ = array.value_offset(index_);
720
727
  check_status(field_array->Accept(this), tag);
721
728
  index_ = index_keep;
722
- field_name_ = field_name_keep;
723
729
  }
724
730
 
725
731
  ArrayValueConverter* array_value_converter_;
726
732
  int64_t index_;
727
- VALUE field_name_;
728
733
  VALUE result_;
729
734
  };
730
735
 
@@ -761,8 +766,7 @@ namespace red_arrow {
761
766
  VISIT(UInt16)
762
767
  VISIT(UInt32)
763
768
  VISIT(UInt64)
764
- // TODO
765
- // VISIT(HalfFloat)
769
+ VISIT(HalfFloat)
766
770
  VISIT(Float)
767
771
  VISIT(Double)
768
772
  VISIT(Binary)
data/ext/arrow/extconf.rb CHANGED
@@ -38,14 +38,11 @@ checking_for(checking_message("Homebrew")) do
38
38
  end
39
39
  end
40
40
 
41
- $CXXFLAGS += " -std=c++17 "
42
-
43
41
  unless required_pkg_config_package([
44
42
  "arrow",
45
43
  Arrow::Version::MAJOR,
46
- Arrow::Version::MINOR,
47
- Arrow::Version::MICRO,
48
44
  ],
45
+ conda: "libarrow",
49
46
  debian: "libarrow-dev",
50
47
  fedora: "libarrow-devel",
51
48
  homebrew: "apache-arrow",
@@ -60,6 +57,7 @@ unless required_pkg_config_package([
60
57
  Arrow::Version::MINOR,
61
58
  Arrow::Version::MICRO,
62
59
  ],
60
+ conda: "arrow-c-glib",
63
61
  debian: "libarrow-glib-dev",
64
62
  fedora: "libarrow-glib-devel",
65
63
  homebrew: "apache-arrow-glib",
@@ -77,4 +75,18 @@ end
77
75
  add_depend_package_path(name, source_dir, build_dir)
78
76
  end
79
77
 
78
+ case RUBY_PLATFORM
79
+ when /darwin/
80
+ symbols_in_external_bundles = [
81
+ "_rbgerr_gerror2exception",
82
+ "_rbgobj_instance_from_ruby_object",
83
+ ]
84
+ symbols_in_external_bundles.each do |symbol|
85
+ $DLDFLAGS << " -Wl,-U,#{symbol}"
86
+ end
87
+ mmacosx_version_min = "-mmacosx-version-min=10.15"
88
+ $CFLAGS << " #{mmacosx_version_min}"
89
+ $CXXFLAGS << " #{mmacosx_version_min}"
90
+ end
91
+
80
92
  create_makefile("arrow")
@@ -84,8 +84,7 @@ namespace red_arrow {
84
84
  VISIT(UInt16)
85
85
  VISIT(UInt32)
86
86
  VISIT(UInt64)
87
- // TODO
88
- // VISIT(HalfFloat)
87
+ VISIT(HalfFloat)
89
88
  VISIT(Float)
90
89
  VISIT(Double)
91
90
  VISIT(Binary)
@@ -145,6 +144,128 @@ namespace red_arrow {
145
144
  // The number of columns.
146
145
  const int n_columns_;
147
146
  };
147
+
148
+ class RawRecordsProducer : private Converter, public arrow::ArrayVisitor {
149
+ public:
150
+ explicit RawRecordsProducer()
151
+ : Converter(),
152
+ record_(Qnil),
153
+ column_index_(0),
154
+ row_offset_(0) {
155
+ }
156
+
157
+ void produce(const arrow::RecordBatch& record_batch) {
158
+ rb::protect([&] {
159
+ const auto n_columns = record_batch.num_columns();
160
+ const auto n_rows = record_batch.num_rows();
161
+ for (int64_t i = 0; i < n_rows; ++i) {
162
+ record_ = rb_ary_new_capa(n_columns);
163
+ row_offset_ = i;
164
+ for (int i = 0; i < n_columns; ++i) {
165
+ const auto array = record_batch.column(i).get();
166
+ column_index_ = i;
167
+ check_status(array->Accept(this),
168
+ "[record-batch][each-raw-record]");
169
+ }
170
+ rb_yield(record_);
171
+ }
172
+ return Qnil;
173
+ });
174
+ }
175
+
176
+ void produce(const arrow::Table& table) {
177
+ rb::protect([&] {
178
+ const auto n_columns = table.num_columns();
179
+ const auto n_rows = table.num_rows();
180
+ std::vector<int> chunk_indexes(n_columns);
181
+ std::vector<int64_t> row_offsets(n_columns);
182
+ for (int64_t i_row = 0; i_row < n_rows; ++i_row) {
183
+ record_ = rb_ary_new_capa(n_columns);
184
+ for (int i_column = 0; i_column < n_columns; ++i_column) {
185
+ column_index_ = i_column;
186
+ const auto chunked_array = table.column(i_column).get();
187
+ auto& chunk_index = chunk_indexes[i_column];
188
+ auto& row_offset = row_offsets[i_column];
189
+ auto array = chunked_array->chunk(chunk_index).get();
190
+ while (array->length() == row_offset) {
191
+ ++chunk_index;
192
+ row_offset = 0;
193
+ array = chunked_array->chunk(chunk_index).get();
194
+ }
195
+ row_offset_ = row_offset;
196
+ check_status(array->Accept(this),
197
+ "[table][each-raw-record]");
198
+ ++row_offset;
199
+ }
200
+ rb_yield(record_);
201
+ }
202
+
203
+ return Qnil;
204
+ });
205
+ }
206
+
207
+ #define VISIT(TYPE) \
208
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
209
+ convert(array); \
210
+ return arrow::Status::OK(); \
211
+ }
212
+
213
+ VISIT(Null)
214
+ VISIT(Boolean)
215
+ VISIT(Int8)
216
+ VISIT(Int16)
217
+ VISIT(Int32)
218
+ VISIT(Int64)
219
+ VISIT(UInt8)
220
+ VISIT(UInt16)
221
+ VISIT(UInt32)
222
+ VISIT(UInt64)
223
+ VISIT(HalfFloat)
224
+ VISIT(Float)
225
+ VISIT(Double)
226
+ VISIT(Binary)
227
+ VISIT(String)
228
+ VISIT(FixedSizeBinary)
229
+ VISIT(Date32)
230
+ VISIT(Date64)
231
+ VISIT(Time32)
232
+ VISIT(Time64)
233
+ VISIT(Timestamp)
234
+ VISIT(MonthInterval)
235
+ VISIT(DayTimeInterval)
236
+ VISIT(MonthDayNanoInterval)
237
+ VISIT(List)
238
+ VISIT(Struct)
239
+ VISIT(Map)
240
+ VISIT(SparseUnion)
241
+ VISIT(DenseUnion)
242
+ VISIT(Dictionary)
243
+ VISIT(Decimal128)
244
+ VISIT(Decimal256)
245
+ // TODO
246
+ // VISIT(Extension)
247
+
248
+ #undef VISIT
249
+
250
+ private:
251
+ template <typename ArrayType>
252
+ void convert(const ArrayType& array) {
253
+ auto value = Qnil;
254
+ if (!array.IsNull(row_offset_)) {
255
+ value = convert_value(array, row_offset_);
256
+ }
257
+ rb_ary_store(record_, column_index_, value);
258
+ }
259
+
260
+ // Destination for converted record.
261
+ VALUE record_;
262
+
263
+ // The current column index.
264
+ int column_index_;
265
+
266
+ // The current row offset.
267
+ int64_t row_offset_;
268
+ };
148
269
  }
149
270
 
150
271
  VALUE
@@ -182,4 +303,36 @@ namespace red_arrow {
182
303
 
183
304
  return records;
184
305
  }
306
+
307
+ VALUE
308
+ record_batch_each_raw_record(VALUE rb_record_batch) {
309
+ auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch));
310
+ auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get();
311
+ RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows());
312
+
313
+ try {
314
+ RawRecordsProducer producer;
315
+ producer.produce(*record_batch);
316
+ } catch (rb::State& state) {
317
+ state.jump();
318
+ }
319
+
320
+ return Qnil;
321
+ }
322
+
323
+ VALUE
324
+ table_each_raw_record(VALUE rb_table) {
325
+ auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
326
+ auto table = garrow_table_get_raw(garrow_table).get();
327
+ RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows());
328
+
329
+ try {
330
+ RawRecordsProducer producer;
331
+ producer.produce(*table);
332
+ } catch (rb::State& state) {
333
+ state.jump();
334
+ }
335
+
336
+ return Qnil;
337
+ }
185
338
  }
@@ -59,6 +59,8 @@ namespace red_arrow {
59
59
 
60
60
  VALUE record_batch_raw_records(VALUE obj);
61
61
  VALUE table_raw_records(VALUE obj);
62
+ VALUE record_batch_each_raw_record(VALUE obj);
63
+ VALUE table_each_raw_record(VALUE obj);
62
64
 
63
65
  inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) {
64
66
  switch (unit) {
data/ext/arrow/values.cpp CHANGED
@@ -65,8 +65,7 @@ namespace red_arrow {
65
65
  VISIT(UInt16)
66
66
  VISIT(UInt32)
67
67
  VISIT(UInt64)
68
- // TODO
69
- // VISIT(HalfFloat)
68
+ VISIT(HalfFloat)
70
69
  VISIT(Float)
71
70
  VISIT(Double)
72
71
  VISIT(Binary)
@@ -29,6 +29,19 @@ module Arrow
29
29
  unique.values
30
30
  end
31
31
 
32
+ # Finds the index of the first occurrence of a given value.
33
+ #
34
+ # @param value [Object] The value to be compared.
35
+ #
36
+ # @return [Integer] The index of the first occurrence of a given
37
+ # value on found, -1 on not found.
38
+ #
39
+ # @since 12.0.0
40
+ def index(value)
41
+ value = Scalar.resolve(value, value_data_type)
42
+ compute("index", options: {value: value}).value
43
+ end
44
+
32
45
  private
33
46
  def compute(name, options: nil)
34
47
  Function.find(name).execute([self], options).value
data/lib/arrow/array.rb CHANGED
@@ -22,6 +22,7 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
25
26
 
26
27
  class << self
27
28
  def new(*args)
@@ -115,6 +116,10 @@ module Arrow
115
116
  self
116
117
  end
117
118
 
119
+ def to_arrow_chunked_array
120
+ ChunkedArray.new([self])
121
+ end
122
+
118
123
  alias_method :value_data_type_raw, :value_data_type
119
124
  def value_data_type
120
125
  @value_data_type ||= value_data_type_raw
@@ -245,7 +250,7 @@ module Arrow
245
250
  "[array][resolve] need to implement " +
246
251
  "a feature that building #{value_data_type} array " +
247
252
  "from raw Ruby Array"
248
- raise NotImpelemented, message
253
+ raise NotImplemented, message
249
254
  end
250
255
  other_array
251
256
  elsif other_array.respond_to?(:value_data_type)
@@ -22,6 +22,31 @@ module Arrow
22
22
  include ArrayComputable
23
23
  include GenericFilterable
24
24
  include GenericTakeable
25
+ include InputReferable
26
+
27
+ def freeze
28
+ unless frozen?
29
+ # Ensure caching
30
+ chunks
31
+ end
32
+ super
33
+ end
34
+
35
+ def to_arrow
36
+ self
37
+ end
38
+
39
+ def to_arrow_array
40
+ if n_chunks.zero?
41
+ value_data_type.build_array([])
42
+ else
43
+ combine
44
+ end
45
+ end
46
+
47
+ def to_arrow_chunked_array
48
+ self
49
+ end
25
50
 
26
51
  alias_method :size, :n_rows
27
52
  unless method_defined?(:length)
@@ -30,7 +55,16 @@ module Arrow
30
55
 
31
56
  alias_method :chunks_raw, :chunks
32
57
  def chunks
33
- @chunks ||= chunks_raw
58
+ @chunks ||= chunks_raw.tap do |_chunks|
59
+ _chunks.each do |chunk|
60
+ share_input(chunk)
61
+ end
62
+ end
63
+ end
64
+
65
+ alias_method :get_chunk_raw, :get_chunk
66
+ def get_chunk(i)
67
+ chunks[i]
34
68
  end
35
69
 
36
70
  def null?(i)
@@ -143,5 +143,14 @@ module Arrow
143
143
  find_column(selector)
144
144
  end
145
145
  end
146
+
147
+ # Return column names in this object.
148
+ #
149
+ # @return [::Array<String>] column names.
150
+ #
151
+ # @since 11.0.0
152
+ def column_names
153
+ @column_names ||= columns.collect(&:name)
154
+ end
146
155
  end
147
156
  end
data/lib/arrow/column.rb CHANGED
@@ -27,6 +27,7 @@ module Arrow
27
27
  @index = index
28
28
  @field = @container.schema[@index]
29
29
  @data = @container.get_column_data(@index)
30
+ @container.share_input(@data)
30
31
  end
31
32
 
32
33
  def name
@@ -199,5 +199,14 @@ module Arrow
199
199
  args.unshift(self) unless builder_class.buildable?(args)
200
200
  builder_class.build(*args)
201
201
  end
202
+
203
+ # @return [Arrow::Scalar} A corresponding {Arrow::Scalar} class
204
+ # for this data type.
205
+ #
206
+ # @since 12.0.0
207
+ def scalar_class
208
+ base_name = self.class.name.gsub(/DataType\z/, "")
209
+ ::Arrow.const_get("#{base_name}Scalar")
210
+ end
202
211
  end
203
212
  end
@@ -0,0 +1,49 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class DenseUnionArrayBuilder
20
+ alias_method :append_value_raw, :append_value
21
+
22
+ # @overload append_value
23
+ #
24
+ # Starts appending an union record. You need to append values of
25
+ # fields.
26
+ #
27
+ # @overload append_value(value)
28
+ #
29
+ # Appends an union record including values of fields.
30
+ #
31
+ # @param value [nil, Hash] The union record value.
32
+ #
33
+ # If this is `nil`, the union record is null.
34
+ #
35
+ # If this is `Hash`, it's values of fields.
36
+ #
37
+ # @since 12.0.0
38
+ def append_value(value)
39
+ if value.nil?
40
+ append_null
41
+ else
42
+ key = value.keys[0]
43
+ child_info = child_infos[key]
44
+ append_value_raw(child_info[:id])
45
+ child_info[:builder].append(value.values[0])
46
+ end
47
+ end
48
+ end
49
+ end