red-arrow 0.14.1 → 0.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of red-arrow might be problematic. Click here for more details.

Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +34 -0
  3. data/ext/arrow/converters.cpp +42 -0
  4. data/ext/arrow/converters.hpp +626 -0
  5. data/ext/arrow/raw-records.cpp +6 -625
  6. data/ext/arrow/red-arrow.hpp +37 -3
  7. data/ext/arrow/values.cpp +154 -0
  8. data/lib/arrow/array-builder.rb +24 -1
  9. data/lib/arrow/array.rb +9 -0
  10. data/lib/arrow/chunked-array.rb +5 -0
  11. data/lib/arrow/column-containable.rb +48 -0
  12. data/lib/arrow/column.rb +36 -10
  13. data/lib/arrow/csv-loader.rb +2 -2
  14. data/lib/arrow/data-type.rb +22 -5
  15. data/lib/arrow/date64-array-builder.rb +2 -2
  16. data/lib/arrow/date64-array.rb +1 -1
  17. data/lib/arrow/decimal128-array.rb +24 -0
  18. data/lib/arrow/field-containable.rb +3 -0
  19. data/lib/arrow/group.rb +10 -13
  20. data/lib/arrow/loader.rb +20 -1
  21. data/lib/arrow/record-batch.rb +6 -4
  22. data/lib/arrow/record-containable.rb +0 -35
  23. data/lib/arrow/record.rb +12 -9
  24. data/lib/arrow/slicer.rb +2 -2
  25. data/lib/arrow/struct-array-builder.rb +1 -7
  26. data/lib/arrow/struct-array.rb +13 -11
  27. data/lib/arrow/table-loader.rb +3 -9
  28. data/lib/arrow/table-table-formatter.rb +2 -2
  29. data/lib/arrow/table.rb +61 -24
  30. data/lib/arrow/time.rb +159 -0
  31. data/lib/arrow/time32-array-builder.rb +49 -0
  32. data/lib/arrow/time32-array.rb +28 -0
  33. data/lib/arrow/time64-array-builder.rb +49 -0
  34. data/lib/arrow/time64-array.rb +28 -0
  35. data/lib/arrow/timestamp-array-builder.rb +20 -1
  36. data/lib/arrow/timestamp-array.rb +10 -22
  37. data/lib/arrow/version.rb +1 -1
  38. data/red-arrow.gemspec +1 -1
  39. data/test/raw-records/test-basic-arrays.rb +16 -8
  40. data/test/raw-records/test-dense-union-array.rb +12 -5
  41. data/test/raw-records/test-list-array.rb +21 -9
  42. data/test/raw-records/test-sparse-union-array.rb +13 -5
  43. data/test/raw-records/test-struct-array.rb +11 -4
  44. data/test/test-column.rb +56 -31
  45. data/test/test-decimal128-array-builder.rb +11 -11
  46. data/test/test-decimal128-array.rb +4 -4
  47. data/test/test-slicer.rb +1 -3
  48. data/test/test-struct-array-builder.rb +4 -4
  49. data/test/test-struct-array.rb +4 -4
  50. data/test/test-table.rb +17 -8
  51. data/test/test-time.rb +288 -0
  52. data/test/test-time32-array.rb +81 -0
  53. data/test/test-time64-array.rb +81 -0
  54. data/test/values/test-basic-arrays.rb +284 -0
  55. data/test/values/test-dense-union-array.rb +487 -0
  56. data/test/values/test-list-array.rb +497 -0
  57. data/test/values/test-sparse-union-array.rb +477 -0
  58. data/test/values/test-struct-array.rb +452 -0
  59. metadata +78 -54
  60. data/lib/arrow/struct.rb +0 -79
  61. data/test/test-struct.rb +0 -81
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f7bdcaca942fb8a25fa1764179d518cf33a1fea5972c6029ecd752a74a91fad
4
- data.tar.gz: c780051fb0429542c4ac329e79e9d814743149e414d81e06d31a59db9145a8d7
3
+ metadata.gz: 65ddeae926378c460b0945ff6949ecbf3ee911611cdcb95bf21f3cf3765efe6d
4
+ data.tar.gz: 193bce59b05b836fb5a5d6d8b650ea9adf441bf04beda8a6083633692c796957
5
5
  SHA512:
6
- metadata.gz: 57d3983f2f2d6e26dd3568e97eeec6f85489f8addd7faa3a26eded35ba4d1c4716c794c32d0ed1eec996c7334f9e1fa5b622416e22d2021d88be3cd891a0a7f4
7
- data.tar.gz: a32cf91310e2ed8f3db1e9bec1e790da52bbba27f4cc9313a1b5ee89e560049bb72c66f7a779c7ba9326231e4aad46a15adfb91a6f92dbba75999d36669db24c
6
+ metadata.gz: b502a0bc9f65b24d04d9f5ee1f58e7eb5db27e885d756158c249fba8be843af7f1ef12c234110cc7136d67f5e998ceb726a7ecf8688b653ba84ec207f7308e78
7
+ data.tar.gz: 1d8f8798f582fdb8b4e0e566d52407b3303391c4c43deef9471dc0011c9a377f83fca308b230709ef0b332ce83892a1cd967096531b4701382d4ed40066b216c
@@ -23,17 +23,38 @@
23
23
 
24
24
  namespace red_arrow {
25
25
  VALUE cDate;
26
+
27
+ VALUE cArrowTime;
28
+
29
+ VALUE ArrowTimeUnitSECOND;
30
+ VALUE ArrowTimeUnitMILLI;
31
+ VALUE ArrowTimeUnitMICRO;
32
+ VALUE ArrowTimeUnitNANO;
33
+
26
34
  ID id_BigDecimal;
27
35
  ID id_jd;
36
+ ID id_new;
28
37
  ID id_to_datetime;
29
38
  }
30
39
 
31
40
  extern "C" void Init_arrow() {
32
41
  auto mArrow = rb_const_get_at(rb_cObject, rb_intern("Arrow"));
42
+
43
+ auto cArrowArray = rb_const_get_at(mArrow, rb_intern("Array"));
44
+ rb_define_method(cArrowArray, "values",
45
+ reinterpret_cast<rb::RawMethod>(red_arrow::array_values),
46
+ 0);
47
+
48
+ auto cArrowChunkedArray = rb_const_get_at(mArrow, rb_intern("ChunkedArray"));
49
+ rb_define_method(cArrowChunkedArray, "values",
50
+ reinterpret_cast<rb::RawMethod>(red_arrow::chunked_array_values),
51
+ 0);
52
+
33
53
  auto cArrowRecordBatch = rb_const_get_at(mArrow, rb_intern("RecordBatch"));
34
54
  rb_define_method(cArrowRecordBatch, "raw_records",
35
55
  reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
36
56
  0);
57
+
37
58
  auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
38
59
  rb_define_method(cArrowTable, "raw_records",
39
60
  reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
@@ -41,7 +62,20 @@ extern "C" void Init_arrow() {
41
62
 
42
63
  red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
43
64
 
65
+ red_arrow::cArrowTime = rb_const_get_at(mArrow, rb_intern("Time"));
66
+
67
+ auto cArrowTimeUnit = rb_const_get_at(mArrow, rb_intern("TimeUnit"));
68
+ red_arrow::ArrowTimeUnitSECOND =
69
+ rb_const_get_at(cArrowTimeUnit, rb_intern("SECOND"));
70
+ red_arrow::ArrowTimeUnitMILLI =
71
+ rb_const_get_at(cArrowTimeUnit, rb_intern("MILLI"));
72
+ red_arrow::ArrowTimeUnitMICRO =
73
+ rb_const_get_at(cArrowTimeUnit, rb_intern("MICRO"));
74
+ red_arrow::ArrowTimeUnitNANO =
75
+ rb_const_get_at(cArrowTimeUnit, rb_intern("NANO"));
76
+
44
77
  red_arrow::id_BigDecimal = rb_intern("BigDecimal");
45
78
  red_arrow::id_jd = rb_intern("jd");
79
+ red_arrow::id_new = rb_intern("new");
46
80
  red_arrow::id_to_datetime = rb_intern("to_datetime");
47
81
  }
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "converters.hpp"
21
+
22
+ namespace red_arrow {
23
+ VALUE ArrayValueConverter::convert(const arrow::ListArray& array,
24
+ const int64_t i) {
25
+ return list_array_value_converter_->convert(array, i);
26
+ }
27
+
28
+ VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
29
+ const int64_t i) {
30
+ return struct_array_value_converter_->convert(array, i);
31
+ }
32
+
33
+ VALUE ArrayValueConverter::convert(const arrow::UnionArray& array,
34
+ const int64_t i) {
35
+ return union_array_value_converter_->convert(array, i);
36
+ }
37
+
38
+ VALUE ArrayValueConverter::convert(const arrow::DictionaryArray& array,
39
+ const int64_t i) {
40
+ return dictionary_array_value_converter_->convert(array, i);
41
+ }
42
+ }
@@ -0,0 +1,626 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "red-arrow.hpp"
21
+
22
+ #include <ruby.hpp>
23
+ #include <ruby/encoding.h>
24
+
25
+ #include <arrow-glib/error.hpp>
26
+
27
+ #include <arrow/util/logging.h>
28
+
29
+ namespace red_arrow {
30
+ class ListArrayValueConverter;
31
+ class StructArrayValueConverter;
32
+ class UnionArrayValueConverter;
33
+ class DictionaryArrayValueConverter;
34
+
35
+ class ArrayValueConverter {
36
+ public:
37
+ ArrayValueConverter()
38
+ : decimal_buffer_(),
39
+ list_array_value_converter_(nullptr),
40
+ struct_array_value_converter_(nullptr),
41
+ union_array_value_converter_(nullptr),
42
+ dictionary_array_value_converter_(nullptr) {
43
+ }
44
+
45
+ inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
46
+ StructArrayValueConverter* struct_array_value_converter,
47
+ UnionArrayValueConverter* union_array_value_converter,
48
+ DictionaryArrayValueConverter* dictionary_array_value_converter) {
49
+ list_array_value_converter_ = list_array_value_converter;
50
+ struct_array_value_converter_ = struct_array_value_converter;
51
+ union_array_value_converter_ = union_array_value_converter;
52
+ dictionary_array_value_converter_ = dictionary_array_value_converter;
53
+ }
54
+
55
+ inline VALUE convert(const arrow::NullArray& array,
56
+ const int64_t i) {
57
+ return Qnil;
58
+ }
59
+
60
+ inline VALUE convert(const arrow::BooleanArray& array,
61
+ const int64_t i) {
62
+ return array.Value(i) ? Qtrue : Qfalse;
63
+ }
64
+
65
+ inline VALUE convert(const arrow::Int8Array& array,
66
+ const int64_t i) {
67
+ return INT2NUM(array.Value(i));
68
+ }
69
+
70
+ inline VALUE convert(const arrow::Int16Array& array,
71
+ const int64_t i) {
72
+ return INT2NUM(array.Value(i));
73
+ }
74
+
75
+ inline VALUE convert(const arrow::Int32Array& array,
76
+ const int64_t i) {
77
+ return INT2NUM(array.Value(i));
78
+ }
79
+
80
+ inline VALUE convert(const arrow::Int64Array& array,
81
+ const int64_t i) {
82
+ return LL2NUM(array.Value(i));
83
+ }
84
+
85
+ inline VALUE convert(const arrow::UInt8Array& array,
86
+ const int64_t i) {
87
+ return UINT2NUM(array.Value(i));
88
+ }
89
+
90
+ inline VALUE convert(const arrow::UInt16Array& array,
91
+ const int64_t i) {
92
+ return UINT2NUM(array.Value(i));
93
+ }
94
+
95
+ inline VALUE convert(const arrow::UInt32Array& array,
96
+ const int64_t i) {
97
+ return UINT2NUM(array.Value(i));
98
+ }
99
+
100
+ inline VALUE convert(const arrow::UInt64Array& array,
101
+ const int64_t i) {
102
+ return ULL2NUM(array.Value(i));
103
+ }
104
+
105
+ // TODO
106
+ // inline VALUE convert(const arrow::HalfFloatArray& array,
107
+ // const int64_t i) {
108
+ // }
109
+
110
+ inline VALUE convert(const arrow::FloatArray& array,
111
+ const int64_t i) {
112
+ return DBL2NUM(array.Value(i));
113
+ }
114
+
115
+ inline VALUE convert(const arrow::DoubleArray& array,
116
+ const int64_t i) {
117
+ return DBL2NUM(array.Value(i));
118
+ }
119
+
120
+ inline VALUE convert(const arrow::BinaryArray& array,
121
+ const int64_t i) {
122
+ int32_t length;
123
+ const auto value = array.GetValue(i, &length);
124
+ // TODO: encoding support
125
+ return rb_enc_str_new(reinterpret_cast<const char*>(value),
126
+ length,
127
+ rb_ascii8bit_encoding());
128
+ }
129
+
130
+ inline VALUE convert(const arrow::StringArray& array,
131
+ const int64_t i) {
132
+ int32_t length;
133
+ const auto value = array.GetValue(i, &length);
134
+ return rb_utf8_str_new(reinterpret_cast<const char*>(value),
135
+ length);
136
+ }
137
+
138
+ inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
139
+ const int64_t i) {
140
+ return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
141
+ array.byte_width(),
142
+ rb_ascii8bit_encoding());
143
+ }
144
+
145
+ constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588;
146
+ inline VALUE convert(const arrow::Date32Array& array,
147
+ const int64_t i) {
148
+ const auto value = array.Value(i);
149
+ const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH;
150
+ return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian));
151
+ }
152
+
153
+ inline VALUE convert(const arrow::Date64Array& array,
154
+ const int64_t i) {
155
+ const auto value = array.Value(i);
156
+ auto msec = LL2NUM(value);
157
+ auto sec = rb_rational_new(msec, INT2NUM(1000));
158
+ auto time_value = rb_time_num_new(sec, Qnil);
159
+ return rb_funcall(time_value, id_to_datetime, 0, 0);
160
+ }
161
+
162
+ inline VALUE convert(const arrow::Time32Array& array,
163
+ const int64_t i) {
164
+ const auto type =
165
+ arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get());
166
+ const auto value = array.Value(i);
167
+ return rb_funcall(red_arrow::cArrowTime,
168
+ id_new,
169
+ 2,
170
+ time_unit_to_enum(type->unit()),
171
+ INT2NUM(value));
172
+ }
173
+
174
+ inline VALUE convert(const arrow::Time64Array& array,
175
+ const int64_t i) {
176
+ const auto type =
177
+ arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get());
178
+ const auto value = array.Value(i);
179
+ return rb_funcall(red_arrow::cArrowTime,
180
+ id_new,
181
+ 2,
182
+ time_unit_to_enum(type->unit()),
183
+ LL2NUM(value));
184
+ }
185
+
186
+ inline VALUE convert(const arrow::TimestampArray& array,
187
+ const int64_t i) {
188
+ const auto type =
189
+ arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get());
190
+ auto scale = time_unit_to_scale(type->unit());
191
+ auto value = array.Value(i);
192
+ auto sec = rb_rational_new(LL2NUM(value), scale);
193
+ return rb_time_num_new(sec, Qnil);
194
+ }
195
+
196
+ // TODO
197
+ // inline VALUE convert(const arrow::IntervalArray& array,
198
+ // const int64_t i) {
199
+ // };
200
+
201
+ VALUE convert(const arrow::ListArray& array,
202
+ const int64_t i);
203
+
204
+ VALUE convert(const arrow::StructArray& array,
205
+ const int64_t i);
206
+
207
+ VALUE convert(const arrow::UnionArray& array,
208
+ const int64_t i);
209
+
210
+ VALUE convert(const arrow::DictionaryArray& array,
211
+ const int64_t i);
212
+
213
+ inline VALUE convert(const arrow::Decimal128Array& array,
214
+ const int64_t i) {
215
+ decimal_buffer_ = array.FormatValue(i);
216
+ return rb_funcall(rb_cObject,
217
+ id_BigDecimal,
218
+ 1,
219
+ rb_enc_str_new(decimal_buffer_.data(),
220
+ decimal_buffer_.length(),
221
+ rb_ascii8bit_encoding()));
222
+ }
223
+
224
+ private:
225
+ std::string decimal_buffer_;
226
+ ListArrayValueConverter* list_array_value_converter_;
227
+ StructArrayValueConverter* struct_array_value_converter_;
228
+ UnionArrayValueConverter* union_array_value_converter_;
229
+ DictionaryArrayValueConverter* dictionary_array_value_converter_;
230
+ };
231
+
232
+ class ListArrayValueConverter : public arrow::ArrayVisitor {
233
+ public:
234
+ explicit ListArrayValueConverter(ArrayValueConverter* converter)
235
+ : array_value_converter_(converter),
236
+ offset_(0),
237
+ length_(0),
238
+ result_(Qnil) {}
239
+
240
+ VALUE convert(const arrow::ListArray& array, const int64_t index) {
241
+ auto values = array.values().get();
242
+ auto offset_keep = offset_;
243
+ auto length_keep = length_;
244
+ offset_ = array.value_offset(index);
245
+ length_ = array.value_length(index);
246
+ auto result_keep = result_;
247
+ result_ = rb_ary_new_capa(length_);
248
+ check_status(values->Accept(this),
249
+ "[raw-records][list-array]");
250
+ offset_ = offset_keep;
251
+ length_ = length_keep;
252
+ auto result_return = result_;
253
+ result_ = result_keep;
254
+ return result_return;
255
+ }
256
+
257
+ #define VISIT(TYPE) \
258
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
259
+ return visit_value(array); \
260
+ }
261
+
262
+ VISIT(Null)
263
+ VISIT(Boolean)
264
+ VISIT(Int8)
265
+ VISIT(Int16)
266
+ VISIT(Int32)
267
+ VISIT(Int64)
268
+ VISIT(UInt8)
269
+ VISIT(UInt16)
270
+ VISIT(UInt32)
271
+ VISIT(UInt64)
272
+ // TODO
273
+ // VISIT(HalfFloat)
274
+ VISIT(Float)
275
+ VISIT(Double)
276
+ VISIT(Binary)
277
+ VISIT(String)
278
+ VISIT(FixedSizeBinary)
279
+ VISIT(Date32)
280
+ VISIT(Date64)
281
+ VISIT(Time32)
282
+ VISIT(Time64)
283
+ VISIT(Timestamp)
284
+ // TODO
285
+ // VISIT(Interval)
286
+ VISIT(List)
287
+ VISIT(Struct)
288
+ VISIT(Union)
289
+ VISIT(Dictionary)
290
+ VISIT(Decimal128)
291
+ // TODO
292
+ // VISIT(Extension)
293
+
294
+ #undef VISIT
295
+
296
+ private:
297
+ template <typename ArrayType>
298
+ inline VALUE convert_value(const ArrayType& array,
299
+ const int64_t i) {
300
+ return array_value_converter_->convert(array, i);
301
+ }
302
+
303
+ template <typename ArrayType>
304
+ arrow::Status visit_value(const ArrayType& array) {
305
+ if (array.null_count() > 0) {
306
+ for (int64_t i = 0; i < length_; ++i) {
307
+ auto value = Qnil;
308
+ if (!array.IsNull(i + offset_)) {
309
+ value = convert_value(array, i + offset_);
310
+ }
311
+ rb_ary_push(result_, value);
312
+ }
313
+ } else {
314
+ for (int64_t i = 0; i < length_; ++i) {
315
+ rb_ary_push(result_, convert_value(array, i + offset_));
316
+ }
317
+ }
318
+ return arrow::Status::OK();
319
+ }
320
+
321
+ ArrayValueConverter* array_value_converter_;
322
+ int32_t offset_;
323
+ int32_t length_;
324
+ VALUE result_;
325
+ };
326
+
327
+ class StructArrayValueConverter : public arrow::ArrayVisitor {
328
+ public:
329
+ explicit StructArrayValueConverter(ArrayValueConverter* converter)
330
+ : array_value_converter_(converter),
331
+ key_(Qnil),
332
+ index_(0),
333
+ result_(Qnil) {}
334
+
335
+ VALUE convert(const arrow::StructArray& array,
336
+ const int64_t index) {
337
+ auto index_keep = index_;
338
+ auto result_keep = result_;
339
+ index_ = index;
340
+ result_ = rb_hash_new();
341
+ const auto struct_type = array.struct_type();
342
+ const auto n = struct_type->num_children();
343
+ for (int i = 0; i < n; ++i) {
344
+ const auto field_type = struct_type->child(i).get();
345
+ const auto& field_name = field_type->name();
346
+ auto key_keep = key_;
347
+ key_ = rb_utf8_str_new(field_name.data(), field_name.length());
348
+ const auto field_array = array.field(i).get();
349
+ check_status(field_array->Accept(this),
350
+ "[raw-records][struct-array]");
351
+ key_ = key_keep;
352
+ }
353
+ auto result_return = result_;
354
+ result_ = result_keep;
355
+ index_ = index_keep;
356
+ return result_return;
357
+ }
358
+
359
+ #define VISIT(TYPE) \
360
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
361
+ fill_field(array); \
362
+ return arrow::Status::OK(); \
363
+ }
364
+
365
+ VISIT(Null)
366
+ VISIT(Boolean)
367
+ VISIT(Int8)
368
+ VISIT(Int16)
369
+ VISIT(Int32)
370
+ VISIT(Int64)
371
+ VISIT(UInt8)
372
+ VISIT(UInt16)
373
+ VISIT(UInt32)
374
+ VISIT(UInt64)
375
+ // TODO
376
+ // VISIT(HalfFloat)
377
+ VISIT(Float)
378
+ VISIT(Double)
379
+ VISIT(Binary)
380
+ VISIT(String)
381
+ VISIT(FixedSizeBinary)
382
+ VISIT(Date32)
383
+ VISIT(Date64)
384
+ VISIT(Time32)
385
+ VISIT(Time64)
386
+ VISIT(Timestamp)
387
+ // TODO
388
+ // VISIT(Interval)
389
+ VISIT(List)
390
+ VISIT(Struct)
391
+ VISIT(Union)
392
+ VISIT(Dictionary)
393
+ VISIT(Decimal128)
394
+ // TODO
395
+ // VISIT(Extension)
396
+
397
+ #undef VISIT
398
+
399
+ private:
400
+ template <typename ArrayType>
401
+ inline VALUE convert_value(const ArrayType& array,
402
+ const int64_t i) {
403
+ return array_value_converter_->convert(array, i);
404
+ }
405
+
406
+ template <typename ArrayType>
407
+ void fill_field(const ArrayType& array) {
408
+ if (array.IsNull(index_)) {
409
+ rb_hash_aset(result_, key_, Qnil);
410
+ } else {
411
+ rb_hash_aset(result_, key_, convert_value(array, index_));
412
+ }
413
+ }
414
+
415
+ ArrayValueConverter* array_value_converter_;
416
+ VALUE key_;
417
+ int64_t index_;
418
+ VALUE result_;
419
+ };
420
+
421
+ class UnionArrayValueConverter : public arrow::ArrayVisitor {
422
+ public:
423
+ explicit UnionArrayValueConverter(ArrayValueConverter* converter)
424
+ : array_value_converter_(converter),
425
+ index_(0),
426
+ result_(Qnil) {}
427
+
428
+ VALUE convert(const arrow::UnionArray& array,
429
+ const int64_t index) {
430
+ const auto index_keep = index_;
431
+ const auto result_keep = result_;
432
+ index_ = index;
433
+ switch (array.mode()) {
434
+ case arrow::UnionMode::SPARSE:
435
+ convert_sparse(array);
436
+ break;
437
+ case arrow::UnionMode::DENSE:
438
+ convert_dense(array);
439
+ break;
440
+ default:
441
+ rb_raise(rb_eArgError, "Invalid union mode");
442
+ break;
443
+ }
444
+ auto result_return = result_;
445
+ index_ = index_keep;
446
+ result_ = result_keep;
447
+ return result_return;
448
+ }
449
+
450
+ #define VISIT(TYPE) \
451
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
452
+ convert_value(array); \
453
+ return arrow::Status::OK(); \
454
+ }
455
+
456
+ VISIT(Null)
457
+ VISIT(Boolean)
458
+ VISIT(Int8)
459
+ VISIT(Int16)
460
+ VISIT(Int32)
461
+ VISIT(Int64)
462
+ VISIT(UInt8)
463
+ VISIT(UInt16)
464
+ VISIT(UInt32)
465
+ VISIT(UInt64)
466
+ // TODO
467
+ // VISIT(HalfFloat)
468
+ VISIT(Float)
469
+ VISIT(Double)
470
+ VISIT(Binary)
471
+ VISIT(String)
472
+ VISIT(FixedSizeBinary)
473
+ VISIT(Date32)
474
+ VISIT(Date64)
475
+ VISIT(Time32)
476
+ VISIT(Time64)
477
+ VISIT(Timestamp)
478
+ // TODO
479
+ // VISIT(Interval)
480
+ VISIT(List)
481
+ VISIT(Struct)
482
+ VISIT(Union)
483
+ VISIT(Dictionary)
484
+ VISIT(Decimal128)
485
+ // TODO
486
+ // VISIT(Extension)
487
+
488
+ #undef VISIT
489
+
490
+ private:
491
+ template <typename ArrayType>
492
+ inline void convert_value(const ArrayType& array) {
493
+ auto result = rb_hash_new();
494
+ if (array.IsNull(index_)) {
495
+ rb_hash_aset(result, field_name_, Qnil);
496
+ } else {
497
+ rb_hash_aset(result,
498
+ field_name_,
499
+ array_value_converter_->convert(array, index_));
500
+ }
501
+ result_ = result;
502
+ }
503
+
504
+ uint8_t compute_child_index(const arrow::UnionArray& array,
505
+ arrow::UnionType* type,
506
+ const char* tag) {
507
+ const auto type_id = array.raw_type_ids()[index_];
508
+ const auto& type_codes = type->type_codes();
509
+ for (uint8_t i = 0; i < type_codes.size(); ++i) {
510
+ if (type_codes[i] == type_id) {
511
+ return i;
512
+ }
513
+ }
514
+ check_status(arrow::Status::Invalid("Unknown type ID: ", type_id),
515
+ tag);
516
+ return 0;
517
+ }
518
+
519
+ void convert_sparse(const arrow::UnionArray& array) {
520
+ const auto type =
521
+ std::static_pointer_cast<arrow::UnionType>(array.type()).get();
522
+ const auto tag = "[raw-records][union-sparse-array]";
523
+ const auto child_index = compute_child_index(array, type, tag);
524
+ const auto child_field = type->child(child_index).get();
525
+ const auto& field_name = child_field->name();
526
+ const auto field_name_keep = field_name_;
527
+ field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
528
+ const auto child_array = array.child(child_index).get();
529
+ check_status(child_array->Accept(this), tag);
530
+ field_name_ = field_name_keep;
531
+ }
532
+
533
+ void convert_dense(const arrow::UnionArray& array) {
534
+ const auto type =
535
+ std::static_pointer_cast<arrow::UnionType>(array.type()).get();
536
+ const auto tag = "[raw-records][union-dense-array]";
537
+ const auto child_index = compute_child_index(array, type, tag);
538
+ const auto child_field = type->child(child_index).get();
539
+ const auto& field_name = child_field->name();
540
+ const auto field_name_keep = field_name_;
541
+ field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
542
+ const auto child_array = array.child(child_index);
543
+ const auto index_keep = index_;
544
+ index_ = array.value_offset(index_);
545
+ check_status(child_array->Accept(this), tag);
546
+ index_ = index_keep;
547
+ field_name_ = field_name_keep;
548
+ }
549
+
550
+ ArrayValueConverter* array_value_converter_;
551
+ int64_t index_;
552
+ VALUE field_name_;
553
+ VALUE result_;
554
+ };
555
+
556
+ class DictionaryArrayValueConverter : public arrow::ArrayVisitor {
557
+ public:
558
+ explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
559
+ : array_value_converter_(converter),
560
+ index_(0),
561
+ result_(Qnil) {
562
+ }
563
+
564
+ VALUE convert(const arrow::DictionaryArray& array,
565
+ const int64_t index) {
566
+ index_ = index;
567
+ auto indices = array.indices().get();
568
+ check_status(indices->Accept(this),
569
+ "[raw-records][dictionary-array]");
570
+ return result_;
571
+ }
572
+
573
+ // TODO: Convert to real value.
574
+ #define VISIT(TYPE) \
575
+ arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
576
+ result_ = convert_value(array, index_); \
577
+ return arrow::Status::OK(); \
578
+ }
579
+
580
+ VISIT(Int8)
581
+ VISIT(Int16)
582
+ VISIT(Int32)
583
+ VISIT(Int64)
584
+
585
+ #undef VISIT
586
+
587
+ private:
588
+ template <typename ArrayType>
589
+ inline VALUE convert_value(const ArrayType& array,
590
+ const int64_t i) {
591
+ return array_value_converter_->convert(array, i);
592
+ }
593
+
594
+ ArrayValueConverter* array_value_converter_;
595
+ int64_t index_;
596
+ VALUE result_;
597
+ };
598
+
599
+ class Converter {
600
+ public:
601
+ explicit Converter()
602
+ : array_value_converter_(),
603
+ list_array_value_converter_(&array_value_converter_),
604
+ struct_array_value_converter_(&array_value_converter_),
605
+ union_array_value_converter_(&array_value_converter_),
606
+ dictionary_array_value_converter_(&array_value_converter_) {
607
+ array_value_converter_.
608
+ set_sub_value_converters(&list_array_value_converter_,
609
+ &struct_array_value_converter_,
610
+ &union_array_value_converter_,
611
+ &dictionary_array_value_converter_);
612
+ }
613
+
614
+ template <typename ArrayType>
615
+ inline VALUE convert_value(const ArrayType& array,
616
+ const int64_t i) {
617
+ return array_value_converter_.convert(array, i);
618
+ }
619
+
620
+ ArrayValueConverter array_value_converter_;
621
+ ListArrayValueConverter list_array_value_converter_;
622
+ StructArrayValueConverter struct_array_value_converter_;
623
+ UnionArrayValueConverter union_array_value_converter_;
624
+ DictionaryArrayValueConverter dictionary_array_value_converter_;
625
+ };
626
+ }