red-arrow 8.0.0 → 24.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -7
- data/ext/arrow/arrow.cpp +67 -0
- data/ext/arrow/converters.cpp +10 -0
- data/ext/arrow/converters.hpp +310 -46
- data/ext/arrow/extconf.rb +41 -22
- data/ext/arrow/raw-records.cpp +165 -2
- data/ext/arrow/red-arrow.hpp +2 -0
- data/ext/arrow/values.cpp +6 -2
- data/lib/arrow/array-builder.rb +89 -14
- data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
- data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
- data/lib/arrow/array.rb +40 -4
- data/lib/arrow/chunked-array.rb +56 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +49 -4
- data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
- data/lib/arrow/data-type.rb +17 -3
- data/lib/arrow/decimal128-array-builder.rb +16 -6
- data/lib/arrow/decimal128.rb +14 -0
- data/lib/arrow/decimal256-array-builder.rb +16 -6
- data/lib/arrow/decimal256.rb +14 -0
- data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
- data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
- data/lib/arrow/duration-array-builder.rb +27 -0
- data/lib/arrow/duration-array.rb +24 -0
- data/lib/arrow/duration-data-type.rb +32 -0
- data/lib/arrow/expression.rb +6 -2
- data/lib/arrow/field-containable.rb +1 -1
- data/lib/arrow/field.rb +44 -3
- data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
- data/lib/arrow/fixed-size-list-data-type.rb +118 -0
- data/lib/arrow/function.rb +0 -1
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
- data/lib/arrow/jruby/array-builder.rb +114 -0
- data/lib/arrow/jruby/array.rb +109 -0
- data/lib/arrow/jruby/chunked-array.rb +36 -0
- data/lib/arrow/jruby/compression-type.rb +26 -0
- data/lib/arrow/jruby/csv-read-options.rb +32 -0
- data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
- data/lib/arrow/jruby/decimal128.rb +28 -0
- data/lib/arrow/jruby/decimal256.rb +28 -0
- data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
- data/lib/arrow/jruby/file-system.rb +24 -0
- data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
- data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
- data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
- data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
- data/lib/arrow/jruby/sort-options.rb +24 -0
- data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
- data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
- data/lib/arrow/jruby/writable.rb +24 -0
- data/lib/arrow/jruby.rb +52 -0
- data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
- data/lib/arrow/large-list-data-type.rb +83 -0
- data/lib/arrow/libraries.rb +140 -0
- data/lib/arrow/list-array-builder.rb +1 -68
- data/lib/arrow/list-data-type.rb +3 -38
- data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
- data/lib/arrow/list-slice-options.rb +76 -0
- data/lib/arrow/list-values-appendable.rb +88 -0
- data/lib/arrow/loader.rb +15 -96
- data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
- data/lib/arrow/raw-table-converter.rb +10 -3
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sort-key.rb +3 -3
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/stream-decoder.rb +29 -0
- data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
- data/lib/arrow/string-array-builder.rb +30 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-formatter.rb +38 -8
- data/lib/arrow/table-list-formatter.rb +3 -3
- data/lib/arrow/table-loader.rb +11 -5
- data/lib/arrow/table-saver.rb +4 -3
- data/lib/arrow/table-table-formatter.rb +7 -0
- data/lib/arrow/table.rb +180 -33
- data/lib/arrow/tensor.rb +144 -0
- data/lib/arrow/time-unit.rb +31 -0
- data/lib/arrow/time32-array-builder.rb +2 -14
- data/lib/arrow/time32-data-type.rb +9 -38
- data/lib/arrow/time64-array-builder.rb +2 -14
- data/lib/arrow/time64-data-type.rb +9 -38
- data/lib/arrow/timestamp-array-builder.rb +3 -15
- data/lib/arrow/timestamp-data-type.rb +9 -34
- data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/union-array.rb +26 -0
- data/lib/arrow/version.rb +1 -1
- data/lib/arrow.rb +2 -7
- data/red-arrow.gemspec +74 -11
- metadata +85 -210
- data/test/fixture/TestOrcFile.test1.orc +0 -0
- data/test/fixture/with-header-float.csv +0 -20
- data/test/fixture/with-header.csv +0 -20
- data/test/fixture/without-header-float.csv +0 -19
- data/test/fixture/without-header.csv +0 -19
- data/test/helper/omittable.rb +0 -36
- data/test/helper.rb +0 -30
- data/test/raw-records/test-basic-arrays.rb +0 -395
- data/test/raw-records/test-dense-union-array.rb +0 -521
- data/test/raw-records/test-list-array.rb +0 -610
- data/test/raw-records/test-map-array.rb +0 -478
- data/test/raw-records/test-multiple-columns.rb +0 -65
- data/test/raw-records/test-sparse-union-array.rb +0 -511
- data/test/raw-records/test-struct-array.rb +0 -515
- data/test/raw-records/test-table.rb +0 -47
- data/test/run-test.rb +0 -71
- data/test/test-array-builder.rb +0 -136
- data/test/test-array.rb +0 -325
- data/test/test-bigdecimal.rb +0 -40
- data/test/test-binary-dictionary-array-builder.rb +0 -103
- data/test/test-chunked-array.rb +0 -183
- data/test/test-column.rb +0 -92
- data/test/test-csv-loader.rb +0 -250
- data/test/test-data-type.rb +0 -83
- data/test/test-decimal128-array-builder.rb +0 -112
- data/test/test-decimal128-data-type.rb +0 -31
- data/test/test-decimal128.rb +0 -102
- data/test/test-decimal256-array-builder.rb +0 -112
- data/test/test-decimal256-array.rb +0 -38
- data/test/test-decimal256.rb +0 -102
- data/test/test-dense-union-data-type.rb +0 -41
- data/test/test-dictionary-data-type.rb +0 -40
- data/test/test-expression.rb +0 -40
- data/test/test-feather.rb +0 -49
- data/test/test-field.rb +0 -91
- data/test/test-file-output-stream.rb +0 -54
- data/test/test-fixed-size-binary-array-builder.rb +0 -92
- data/test/test-fixed-size-binary-array.rb +0 -36
- data/test/test-function.rb +0 -210
- data/test/test-group.rb +0 -180
- data/test/test-list-array-builder.rb +0 -79
- data/test/test-list-array.rb +0 -32
- data/test/test-list-data-type.rb +0 -69
- data/test/test-map-array-builder.rb +0 -110
- data/test/test-map-array.rb +0 -33
- data/test/test-memory-view.rb +0 -434
- data/test/test-orc.rb +0 -173
- data/test/test-record-batch-builder.rb +0 -125
- data/test/test-record-batch-file-reader.rb +0 -115
- data/test/test-record-batch-iterator.rb +0 -37
- data/test/test-record-batch-reader.rb +0 -46
- data/test/test-record-batch.rb +0 -182
- data/test/test-schema.rb +0 -134
- data/test/test-slicer.rb +0 -487
- data/test/test-sort-indices.rb +0 -40
- data/test/test-sort-key.rb +0 -81
- data/test/test-sort-options.rb +0 -58
- data/test/test-sparse-union-data-type.rb +0 -41
- data/test/test-string-dictionary-array-builder.rb +0 -103
- data/test/test-struct-array-builder.rb +0 -184
- data/test/test-struct-array.rb +0 -94
- data/test/test-struct-data-type.rb +0 -112
- data/test/test-table.rb +0 -1123
- data/test/test-time.rb +0 -288
- data/test/test-time32-array.rb +0 -81
- data/test/test-time64-array.rb +0 -81
- data/test/test-time64-data-type.rb +0 -42
- data/test/test-timestamp-array.rb +0 -45
- data/test/test-timestamp-data-type.rb +0 -42
- data/test/values/test-basic-arrays.rb +0 -325
- data/test/values/test-dense-union-array.rb +0 -509
- data/test/values/test-dictionary-array.rb +0 -295
- data/test/values/test-list-array.rb +0 -571
- data/test/values/test-map-array.rb +0 -466
- data/test/values/test-sparse-union-array.rb +0 -500
- data/test/values/test-struct-array.rb +0 -512
data/ext/arrow/converters.hpp
CHANGED
|
@@ -28,6 +28,8 @@
|
|
|
28
28
|
|
|
29
29
|
namespace red_arrow {
|
|
30
30
|
class ListArrayValueConverter;
|
|
31
|
+
class LargeListArrayValueConverter;
|
|
32
|
+
class FixedSizeListArrayValueConverter;
|
|
31
33
|
class StructArrayValueConverter;
|
|
32
34
|
class MapArrayValueConverter;
|
|
33
35
|
class UnionArrayValueConverter;
|
|
@@ -38,6 +40,8 @@ namespace red_arrow {
|
|
|
38
40
|
ArrayValueConverter()
|
|
39
41
|
: decimal_buffer_(),
|
|
40
42
|
list_array_value_converter_(nullptr),
|
|
43
|
+
large_list_array_value_converter_(nullptr),
|
|
44
|
+
fixed_size_list_array_value_converter_(nullptr),
|
|
41
45
|
struct_array_value_converter_(nullptr),
|
|
42
46
|
map_array_value_converter_(nullptr),
|
|
43
47
|
union_array_value_converter_(nullptr),
|
|
@@ -45,11 +49,15 @@ namespace red_arrow {
|
|
|
45
49
|
}
|
|
46
50
|
|
|
47
51
|
inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
|
|
52
|
+
LargeListArrayValueConverter* large_list_array_value_converter,
|
|
53
|
+
FixedSizeListArrayValueConverter* fixed_size_list_array_value_converter,
|
|
48
54
|
StructArrayValueConverter* struct_array_value_converter,
|
|
49
55
|
MapArrayValueConverter* map_array_value_converter,
|
|
50
56
|
UnionArrayValueConverter* union_array_value_converter,
|
|
51
57
|
DictionaryArrayValueConverter* dictionary_array_value_converter) {
|
|
52
58
|
list_array_value_converter_ = list_array_value_converter;
|
|
59
|
+
large_list_array_value_converter_ = large_list_array_value_converter;
|
|
60
|
+
fixed_size_list_array_value_converter_ = fixed_size_list_array_value_converter;
|
|
53
61
|
struct_array_value_converter_ = struct_array_value_converter;
|
|
54
62
|
map_array_value_converter_ = map_array_value_converter;
|
|
55
63
|
union_array_value_converter_ = union_array_value_converter;
|
|
@@ -106,10 +114,34 @@ namespace red_arrow {
|
|
|
106
114
|
return ULL2NUM(array.Value(i));
|
|
107
115
|
}
|
|
108
116
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
117
|
+
inline VALUE convert(const arrow::HalfFloatArray& array,
|
|
118
|
+
const int64_t i) {
|
|
119
|
+
const auto value = array.Value(i);
|
|
120
|
+
// | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
|
|
121
|
+
constexpr auto exponent_n_bits = 5;
|
|
122
|
+
static const auto exponent_mask =
|
|
123
|
+
static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
|
|
124
|
+
constexpr auto exponent_bias = 15;
|
|
125
|
+
constexpr auto fraction_n_bits = 10;
|
|
126
|
+
static const auto fraction_mask =
|
|
127
|
+
static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
|
|
128
|
+
static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
|
|
129
|
+
const auto sign = value >> (exponent_n_bits + fraction_n_bits);
|
|
130
|
+
const auto exponent = (value >> fraction_n_bits) & exponent_mask;
|
|
131
|
+
const auto fraction = value & fraction_mask;
|
|
132
|
+
if (exponent == exponent_mask) {
|
|
133
|
+
if (sign == 0) {
|
|
134
|
+
return DBL2NUM(HUGE_VAL);
|
|
135
|
+
} else {
|
|
136
|
+
return DBL2NUM(-HUGE_VAL);
|
|
137
|
+
}
|
|
138
|
+
} else {
|
|
139
|
+
const auto implicit_fraction = (exponent == 0) ? 0 : 1;
|
|
140
|
+
return DBL2NUM(((sign == 0) ? 1 : -1) *
|
|
141
|
+
std::pow(2.0, exponent - exponent_bias) *
|
|
142
|
+
(implicit_fraction + fraction / fraction_denominator));
|
|
143
|
+
}
|
|
144
|
+
}
|
|
113
145
|
|
|
114
146
|
inline VALUE convert(const arrow::FloatArray& array,
|
|
115
147
|
const int64_t i) {
|
|
@@ -125,7 +157,15 @@ namespace red_arrow {
|
|
|
125
157
|
const int64_t i) {
|
|
126
158
|
int32_t length;
|
|
127
159
|
const auto value = array.GetValue(i, &length);
|
|
128
|
-
|
|
160
|
+
return rb_enc_str_new(reinterpret_cast<const char*>(value),
|
|
161
|
+
length,
|
|
162
|
+
rb_ascii8bit_encoding());
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
inline VALUE convert(const arrow::LargeBinaryArray& array,
|
|
166
|
+
const int64_t i) {
|
|
167
|
+
int64_t length;
|
|
168
|
+
const auto value = array.GetValue(i, &length);
|
|
129
169
|
return rb_enc_str_new(reinterpret_cast<const char*>(value),
|
|
130
170
|
length,
|
|
131
171
|
rb_ascii8bit_encoding());
|
|
@@ -139,6 +179,14 @@ namespace red_arrow {
|
|
|
139
179
|
length);
|
|
140
180
|
}
|
|
141
181
|
|
|
182
|
+
inline VALUE convert(const arrow::LargeStringArray& array,
|
|
183
|
+
const int64_t i) {
|
|
184
|
+
int64_t length;
|
|
185
|
+
const auto value = array.GetValue(i, &length);
|
|
186
|
+
return rb_utf8_str_new(reinterpret_cast<const char*>(value),
|
|
187
|
+
length);
|
|
188
|
+
}
|
|
189
|
+
|
|
142
190
|
inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
|
|
143
191
|
const int64_t i) {
|
|
144
192
|
return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
|
|
@@ -197,11 +245,6 @@ namespace red_arrow {
|
|
|
197
245
|
return rb_time_num_new(sec, Qnil);
|
|
198
246
|
}
|
|
199
247
|
|
|
200
|
-
// TODO
|
|
201
|
-
// inline VALUE convert(const arrow::IntervalArray& array,
|
|
202
|
-
// const int64_t i) {
|
|
203
|
-
// };
|
|
204
|
-
|
|
205
248
|
inline VALUE convert(const arrow::MonthIntervalArray& array,
|
|
206
249
|
const int64_t i) {
|
|
207
250
|
return INT2NUM(array.Value(i));
|
|
@@ -236,9 +279,20 @@ namespace red_arrow {
|
|
|
236
279
|
return value;
|
|
237
280
|
}
|
|
238
281
|
|
|
282
|
+
inline VALUE convert(const arrow::DurationArray& array,
|
|
283
|
+
const int64_t i) {
|
|
284
|
+
return LL2NUM(array.Value(i));
|
|
285
|
+
}
|
|
286
|
+
|
|
239
287
|
VALUE convert(const arrow::ListArray& array,
|
|
240
288
|
const int64_t i);
|
|
241
289
|
|
|
290
|
+
VALUE convert(const arrow::LargeListArray& array,
|
|
291
|
+
const int64_t i);
|
|
292
|
+
|
|
293
|
+
VALUE convert(const arrow::FixedSizeListArray& array,
|
|
294
|
+
const int64_t i);
|
|
295
|
+
|
|
242
296
|
VALUE convert(const arrow::StructArray& array,
|
|
243
297
|
const int64_t i);
|
|
244
298
|
|
|
@@ -274,6 +328,8 @@ namespace red_arrow {
|
|
|
274
328
|
|
|
275
329
|
std::string decimal_buffer_;
|
|
276
330
|
ListArrayValueConverter* list_array_value_converter_;
|
|
331
|
+
LargeListArrayValueConverter* large_list_array_value_converter_;
|
|
332
|
+
FixedSizeListArrayValueConverter* fixed_size_list_array_value_converter_;
|
|
277
333
|
StructArrayValueConverter* struct_array_value_converter_;
|
|
278
334
|
MapArrayValueConverter* map_array_value_converter_;
|
|
279
335
|
UnionArrayValueConverter* union_array_value_converter_;
|
|
@@ -320,8 +376,209 @@ namespace red_arrow {
|
|
|
320
376
|
VISIT(UInt16)
|
|
321
377
|
VISIT(UInt32)
|
|
322
378
|
VISIT(UInt64)
|
|
379
|
+
VISIT(HalfFloat)
|
|
380
|
+
VISIT(Float)
|
|
381
|
+
VISIT(Double)
|
|
382
|
+
VISIT(Binary)
|
|
383
|
+
VISIT(String)
|
|
384
|
+
VISIT(FixedSizeBinary)
|
|
385
|
+
VISIT(Date32)
|
|
386
|
+
VISIT(Date64)
|
|
387
|
+
VISIT(Time32)
|
|
388
|
+
VISIT(Time64)
|
|
389
|
+
VISIT(Timestamp)
|
|
390
|
+
VISIT(MonthInterval)
|
|
391
|
+
VISIT(DayTimeInterval)
|
|
392
|
+
VISIT(MonthDayNanoInterval)
|
|
393
|
+
VISIT(Duration)
|
|
394
|
+
VISIT(List)
|
|
395
|
+
VISIT(LargeList)
|
|
396
|
+
VISIT(FixedSizeList)
|
|
397
|
+
VISIT(Struct)
|
|
398
|
+
VISIT(Map)
|
|
399
|
+
VISIT(SparseUnion)
|
|
400
|
+
VISIT(DenseUnion)
|
|
401
|
+
VISIT(Dictionary)
|
|
402
|
+
VISIT(Decimal128)
|
|
403
|
+
VISIT(Decimal256)
|
|
404
|
+
// TODO
|
|
405
|
+
// VISIT(Extension)
|
|
406
|
+
|
|
407
|
+
#undef VISIT
|
|
408
|
+
|
|
409
|
+
private:
|
|
410
|
+
template <typename ArrayType>
|
|
411
|
+
inline VALUE convert_value(const ArrayType& array,
|
|
412
|
+
const int64_t i) {
|
|
413
|
+
return array_value_converter_->convert(array, i);
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
template <typename ArrayType>
|
|
417
|
+
arrow::Status visit_value(const ArrayType& array) {
|
|
418
|
+
if (array.null_count() > 0) {
|
|
419
|
+
for (int64_t i = 0; i < length_; ++i) {
|
|
420
|
+
auto value = Qnil;
|
|
421
|
+
if (!array.IsNull(i + offset_)) {
|
|
422
|
+
value = convert_value(array, i + offset_);
|
|
423
|
+
}
|
|
424
|
+
rb_ary_push(result_, value);
|
|
425
|
+
}
|
|
426
|
+
} else {
|
|
427
|
+
for (int64_t i = 0; i < length_; ++i) {
|
|
428
|
+
rb_ary_push(result_, convert_value(array, i + offset_));
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return arrow::Status::OK();
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
ArrayValueConverter* array_value_converter_;
|
|
435
|
+
int32_t offset_;
|
|
436
|
+
int32_t length_;
|
|
437
|
+
VALUE result_;
|
|
438
|
+
};
|
|
439
|
+
|
|
440
|
+
class LargeListArrayValueConverter : public arrow::ArrayVisitor {
|
|
441
|
+
public:
|
|
442
|
+
explicit LargeListArrayValueConverter(ArrayValueConverter* converter)
|
|
443
|
+
: array_value_converter_(converter),
|
|
444
|
+
offset_(0),
|
|
445
|
+
length_(0),
|
|
446
|
+
result_(Qnil) {}
|
|
447
|
+
|
|
448
|
+
VALUE convert(const arrow::LargeListArray& array, const int64_t index) {
|
|
449
|
+
auto values = array.values().get();
|
|
450
|
+
auto offset_keep = offset_;
|
|
451
|
+
auto length_keep = length_;
|
|
452
|
+
offset_ = array.value_offset(index);
|
|
453
|
+
length_ = array.value_length(index);
|
|
454
|
+
auto result_keep = result_;
|
|
455
|
+
result_ = rb_ary_new_capa(length_);
|
|
456
|
+
check_status(values->Accept(this),
|
|
457
|
+
"[raw-records][large-list-array]");
|
|
458
|
+
offset_ = offset_keep;
|
|
459
|
+
length_ = length_keep;
|
|
460
|
+
auto result_return = result_;
|
|
461
|
+
result_ = result_keep;
|
|
462
|
+
return result_return;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
#define VISIT(TYPE) \
|
|
466
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
|
467
|
+
return visit_value(array); \
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
VISIT(Null)
|
|
471
|
+
VISIT(Boolean)
|
|
472
|
+
VISIT(Int8)
|
|
473
|
+
VISIT(Int16)
|
|
474
|
+
VISIT(Int32)
|
|
475
|
+
VISIT(Int64)
|
|
476
|
+
VISIT(UInt8)
|
|
477
|
+
VISIT(UInt16)
|
|
478
|
+
VISIT(UInt32)
|
|
479
|
+
VISIT(UInt64)
|
|
480
|
+
VISIT(HalfFloat)
|
|
481
|
+
VISIT(Float)
|
|
482
|
+
VISIT(Double)
|
|
483
|
+
VISIT(Binary)
|
|
484
|
+
VISIT(String)
|
|
485
|
+
VISIT(FixedSizeBinary)
|
|
486
|
+
VISIT(Date32)
|
|
487
|
+
VISIT(Date64)
|
|
488
|
+
VISIT(Time32)
|
|
489
|
+
VISIT(Time64)
|
|
490
|
+
VISIT(Timestamp)
|
|
491
|
+
VISIT(MonthInterval)
|
|
492
|
+
VISIT(DayTimeInterval)
|
|
493
|
+
VISIT(MonthDayNanoInterval)
|
|
494
|
+
VISIT(Duration)
|
|
495
|
+
VISIT(List)
|
|
496
|
+
VISIT(LargeList)
|
|
497
|
+
VISIT(FixedSizeList)
|
|
498
|
+
VISIT(Struct)
|
|
499
|
+
VISIT(Map)
|
|
500
|
+
VISIT(SparseUnion)
|
|
501
|
+
VISIT(DenseUnion)
|
|
502
|
+
VISIT(Dictionary)
|
|
503
|
+
VISIT(Decimal128)
|
|
504
|
+
VISIT(Decimal256)
|
|
323
505
|
// TODO
|
|
324
|
-
// VISIT(
|
|
506
|
+
// VISIT(Extension)
|
|
507
|
+
|
|
508
|
+
#undef VISIT
|
|
509
|
+
|
|
510
|
+
private:
|
|
511
|
+
template <typename ArrayType>
|
|
512
|
+
inline VALUE convert_value(const ArrayType& array,
|
|
513
|
+
const int64_t i) {
|
|
514
|
+
return array_value_converter_->convert(array, i);
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
template <typename ArrayType>
|
|
518
|
+
arrow::Status visit_value(const ArrayType& array) {
|
|
519
|
+
if (array.null_count() > 0) {
|
|
520
|
+
for (int64_t i = 0; i < length_; ++i) {
|
|
521
|
+
auto value = Qnil;
|
|
522
|
+
if (!array.IsNull(i + offset_)) {
|
|
523
|
+
value = convert_value(array, i + offset_);
|
|
524
|
+
}
|
|
525
|
+
rb_ary_push(result_, value);
|
|
526
|
+
}
|
|
527
|
+
} else {
|
|
528
|
+
for (int64_t i = 0; i < length_; ++i) {
|
|
529
|
+
rb_ary_push(result_, convert_value(array, i + offset_));
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
return arrow::Status::OK();
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
ArrayValueConverter* array_value_converter_;
|
|
536
|
+
int32_t offset_;
|
|
537
|
+
int32_t length_;
|
|
538
|
+
VALUE result_;
|
|
539
|
+
};
|
|
540
|
+
|
|
541
|
+
class FixedSizeListArrayValueConverter : public arrow::ArrayVisitor {
|
|
542
|
+
public:
|
|
543
|
+
explicit FixedSizeListArrayValueConverter(ArrayValueConverter* converter)
|
|
544
|
+
: array_value_converter_(converter),
|
|
545
|
+
offset_(0),
|
|
546
|
+
length_(0),
|
|
547
|
+
result_(Qnil) {}
|
|
548
|
+
|
|
549
|
+
VALUE convert(const arrow::FixedSizeListArray& array, const int64_t index) {
|
|
550
|
+
auto values = array.values().get();
|
|
551
|
+
auto offset_keep = offset_;
|
|
552
|
+
auto length_keep = length_;
|
|
553
|
+
offset_ = array.value_offset(index);
|
|
554
|
+
length_ = array.value_length(index);
|
|
555
|
+
auto result_keep = result_;
|
|
556
|
+
result_ = rb_ary_new_capa(length_);
|
|
557
|
+
check_status(values->Accept(this),
|
|
558
|
+
"[raw-records][fixed-size-list-array]");
|
|
559
|
+
offset_ = offset_keep;
|
|
560
|
+
length_ = length_keep;
|
|
561
|
+
auto result_return = result_;
|
|
562
|
+
result_ = result_keep;
|
|
563
|
+
return result_return;
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
#define VISIT(TYPE) \
|
|
567
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
|
568
|
+
return visit_value(array); \
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
VISIT(Null)
|
|
572
|
+
VISIT(Boolean)
|
|
573
|
+
VISIT(Int8)
|
|
574
|
+
VISIT(Int16)
|
|
575
|
+
VISIT(Int32)
|
|
576
|
+
VISIT(Int64)
|
|
577
|
+
VISIT(UInt8)
|
|
578
|
+
VISIT(UInt16)
|
|
579
|
+
VISIT(UInt32)
|
|
580
|
+
VISIT(UInt64)
|
|
581
|
+
VISIT(HalfFloat)
|
|
325
582
|
VISIT(Float)
|
|
326
583
|
VISIT(Double)
|
|
327
584
|
VISIT(Binary)
|
|
@@ -335,7 +592,10 @@ namespace red_arrow {
|
|
|
335
592
|
VISIT(MonthInterval)
|
|
336
593
|
VISIT(DayTimeInterval)
|
|
337
594
|
VISIT(MonthDayNanoInterval)
|
|
595
|
+
VISIT(Duration)
|
|
338
596
|
VISIT(List)
|
|
597
|
+
VISIT(LargeList)
|
|
598
|
+
VISIT(FixedSizeList)
|
|
339
599
|
VISIT(Struct)
|
|
340
600
|
VISIT(Map)
|
|
341
601
|
VISIT(SparseUnion)
|
|
@@ -427,8 +687,7 @@ namespace red_arrow {
|
|
|
427
687
|
VISIT(UInt16)
|
|
428
688
|
VISIT(UInt32)
|
|
429
689
|
VISIT(UInt64)
|
|
430
|
-
|
|
431
|
-
// VISIT(HalfFloat)
|
|
690
|
+
VISIT(HalfFloat)
|
|
432
691
|
VISIT(Float)
|
|
433
692
|
VISIT(Double)
|
|
434
693
|
VISIT(Binary)
|
|
@@ -442,7 +701,10 @@ namespace red_arrow {
|
|
|
442
701
|
VISIT(MonthInterval)
|
|
443
702
|
VISIT(DayTimeInterval)
|
|
444
703
|
VISIT(MonthDayNanoInterval)
|
|
704
|
+
VISIT(Duration)
|
|
445
705
|
VISIT(List)
|
|
706
|
+
VISIT(LargeList)
|
|
707
|
+
VISIT(FixedSizeList)
|
|
446
708
|
VISIT(Struct)
|
|
447
709
|
VISIT(Map)
|
|
448
710
|
VISIT(SparseUnion)
|
|
@@ -530,8 +792,7 @@ namespace red_arrow {
|
|
|
530
792
|
VISIT(UInt16)
|
|
531
793
|
VISIT(UInt32)
|
|
532
794
|
VISIT(UInt64)
|
|
533
|
-
|
|
534
|
-
// VISIT(HalfFloat)
|
|
795
|
+
VISIT(HalfFloat)
|
|
535
796
|
VISIT(Float)
|
|
536
797
|
VISIT(Double)
|
|
537
798
|
VISIT(Binary)
|
|
@@ -545,7 +806,10 @@ namespace red_arrow {
|
|
|
545
806
|
VISIT(MonthInterval)
|
|
546
807
|
VISIT(DayTimeInterval)
|
|
547
808
|
VISIT(MonthDayNanoInterval)
|
|
809
|
+
VISIT(Duration)
|
|
548
810
|
VISIT(List)
|
|
811
|
+
VISIT(LargeList)
|
|
812
|
+
VISIT(FixedSizeList)
|
|
549
813
|
VISIT(Struct)
|
|
550
814
|
VISIT(Map)
|
|
551
815
|
VISIT(SparseUnion)
|
|
@@ -634,8 +898,7 @@ namespace red_arrow {
|
|
|
634
898
|
VISIT(UInt16)
|
|
635
899
|
VISIT(UInt32)
|
|
636
900
|
VISIT(UInt64)
|
|
637
|
-
|
|
638
|
-
// VISIT(HalfFloat)
|
|
901
|
+
VISIT(HalfFloat)
|
|
639
902
|
VISIT(Float)
|
|
640
903
|
VISIT(Double)
|
|
641
904
|
VISIT(Binary)
|
|
@@ -649,7 +912,10 @@ namespace red_arrow {
|
|
|
649
912
|
VISIT(MonthInterval)
|
|
650
913
|
VISIT(DayTimeInterval)
|
|
651
914
|
VISIT(MonthDayNanoInterval)
|
|
915
|
+
VISIT(Duration)
|
|
652
916
|
VISIT(List)
|
|
917
|
+
VISIT(LargeList)
|
|
918
|
+
VISIT(FixedSizeList)
|
|
653
919
|
VISIT(Struct)
|
|
654
920
|
VISIT(Map)
|
|
655
921
|
VISIT(SparseUnion)
|
|
@@ -665,25 +931,21 @@ namespace red_arrow {
|
|
|
665
931
|
private:
|
|
666
932
|
template <typename ArrayType>
|
|
667
933
|
inline void convert_value(const ArrayType& array) {
|
|
668
|
-
auto result = rb_hash_new();
|
|
669
934
|
if (array.IsNull(index_)) {
|
|
670
|
-
|
|
935
|
+
result_ = RUBY_Qnil;
|
|
671
936
|
} else {
|
|
672
|
-
|
|
673
|
-
field_name_,
|
|
674
|
-
array_value_converter_->convert(array, index_));
|
|
937
|
+
result_ = array_value_converter_->convert(array, index_);
|
|
675
938
|
}
|
|
676
|
-
result_ = result;
|
|
677
939
|
}
|
|
678
940
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
941
|
+
int8_t compute_child_id(const arrow::UnionArray& array,
|
|
942
|
+
arrow::UnionType* type,
|
|
943
|
+
const char* tag) {
|
|
682
944
|
const auto type_code = array.raw_type_codes()[index_];
|
|
683
945
|
if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
|
|
684
|
-
const auto
|
|
685
|
-
if (
|
|
686
|
-
return
|
|
946
|
+
const auto child_id = type->child_ids()[type_code];
|
|
947
|
+
if (child_id >= 0) {
|
|
948
|
+
return child_id;
|
|
687
949
|
}
|
|
688
950
|
}
|
|
689
951
|
check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
|
|
@@ -695,36 +957,25 @@ namespace red_arrow {
|
|
|
695
957
|
const auto type =
|
|
696
958
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
|
697
959
|
const auto tag = "[raw-records][union-sparse-array]";
|
|
698
|
-
const auto
|
|
699
|
-
const auto
|
|
700
|
-
const auto& field_name = field->name();
|
|
701
|
-
const auto field_name_keep = field_name_;
|
|
702
|
-
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
|
703
|
-
const auto field_array = array.field(index).get();
|
|
960
|
+
const auto child_id = compute_child_id(array, type, tag);
|
|
961
|
+
const auto field_array = array.field(child_id).get();
|
|
704
962
|
check_status(field_array->Accept(this), tag);
|
|
705
|
-
field_name_ = field_name_keep;
|
|
706
963
|
}
|
|
707
964
|
|
|
708
965
|
void convert_dense(const arrow::DenseUnionArray& array) {
|
|
709
966
|
const auto type =
|
|
710
967
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
|
711
968
|
const auto tag = "[raw-records][union-dense-array]";
|
|
712
|
-
const auto
|
|
713
|
-
const auto
|
|
714
|
-
const auto& field_name = field->name();
|
|
715
|
-
const auto field_name_keep = field_name_;
|
|
716
|
-
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
|
717
|
-
const auto field_array = array.field(index);
|
|
969
|
+
const auto child_id = compute_child_id(array, type, tag);
|
|
970
|
+
const auto field_array = array.field(child_id);
|
|
718
971
|
const auto index_keep = index_;
|
|
719
972
|
index_ = array.value_offset(index_);
|
|
720
973
|
check_status(field_array->Accept(this), tag);
|
|
721
974
|
index_ = index_keep;
|
|
722
|
-
field_name_ = field_name_keep;
|
|
723
975
|
}
|
|
724
976
|
|
|
725
977
|
ArrayValueConverter* array_value_converter_;
|
|
726
978
|
int64_t index_;
|
|
727
|
-
VALUE field_name_;
|
|
728
979
|
VALUE result_;
|
|
729
980
|
};
|
|
730
981
|
|
|
@@ -761,19 +1012,26 @@ namespace red_arrow {
|
|
|
761
1012
|
VISIT(UInt16)
|
|
762
1013
|
VISIT(UInt32)
|
|
763
1014
|
VISIT(UInt64)
|
|
764
|
-
|
|
765
|
-
// VISIT(HalfFloat)
|
|
1015
|
+
VISIT(HalfFloat)
|
|
766
1016
|
VISIT(Float)
|
|
767
1017
|
VISIT(Double)
|
|
768
1018
|
VISIT(Binary)
|
|
1019
|
+
VISIT(LargeBinary)
|
|
769
1020
|
VISIT(String)
|
|
1021
|
+
VISIT(LargeString)
|
|
770
1022
|
VISIT(FixedSizeBinary)
|
|
771
1023
|
VISIT(Date32)
|
|
772
1024
|
VISIT(Date64)
|
|
773
1025
|
VISIT(Time32)
|
|
774
1026
|
VISIT(Time64)
|
|
775
1027
|
VISIT(Timestamp)
|
|
1028
|
+
VISIT(MonthInterval)
|
|
1029
|
+
VISIT(DayTimeInterval)
|
|
1030
|
+
VISIT(MonthDayNanoInterval)
|
|
1031
|
+
VISIT(Duration)
|
|
776
1032
|
VISIT(List)
|
|
1033
|
+
VISIT(LargeList)
|
|
1034
|
+
VISIT(FixedSizeList)
|
|
777
1035
|
VISIT(Struct)
|
|
778
1036
|
VISIT(Map)
|
|
779
1037
|
VISIT(SparseUnion)
|
|
@@ -803,12 +1061,16 @@ namespace red_arrow {
|
|
|
803
1061
|
explicit Converter()
|
|
804
1062
|
: array_value_converter_(),
|
|
805
1063
|
list_array_value_converter_(&array_value_converter_),
|
|
1064
|
+
large_list_array_value_converter_(&array_value_converter_),
|
|
1065
|
+
fixed_size_list_array_value_converter_(&array_value_converter_),
|
|
806
1066
|
struct_array_value_converter_(&array_value_converter_),
|
|
807
1067
|
map_array_value_converter_(&array_value_converter_),
|
|
808
1068
|
union_array_value_converter_(&array_value_converter_),
|
|
809
1069
|
dictionary_array_value_converter_(&array_value_converter_) {
|
|
810
1070
|
array_value_converter_.
|
|
811
1071
|
set_sub_value_converters(&list_array_value_converter_,
|
|
1072
|
+
&large_list_array_value_converter_,
|
|
1073
|
+
&fixed_size_list_array_value_converter_,
|
|
812
1074
|
&struct_array_value_converter_,
|
|
813
1075
|
&map_array_value_converter_,
|
|
814
1076
|
&union_array_value_converter_,
|
|
@@ -823,6 +1085,8 @@ namespace red_arrow {
|
|
|
823
1085
|
|
|
824
1086
|
ArrayValueConverter array_value_converter_;
|
|
825
1087
|
ListArrayValueConverter list_array_value_converter_;
|
|
1088
|
+
LargeListArrayValueConverter large_list_array_value_converter_;
|
|
1089
|
+
FixedSizeListArrayValueConverter fixed_size_list_array_value_converter_;
|
|
826
1090
|
StructArrayValueConverter struct_array_value_converter_;
|
|
827
1091
|
MapArrayValueConverter map_array_value_converter_;
|
|
828
1092
|
UnionArrayValueConverter union_array_value_converter_;
|
data/ext/arrow/extconf.rb
CHANGED
|
@@ -38,32 +38,37 @@ checking_for(checking_message("Homebrew")) do
|
|
|
38
38
|
end
|
|
39
39
|
end
|
|
40
40
|
|
|
41
|
-
unless
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
debian: "libarrow-dev",
|
|
48
|
-
redhat: "arrow-devel",
|
|
49
|
-
homebrew: "apache-arrow",
|
|
50
|
-
msys2: "arrow")
|
|
51
|
-
exit(false)
|
|
41
|
+
unless PKGConfig.have_package("arrow", Arrow::Version::MAJOR)
|
|
42
|
+
raise <<-MESSAGE
|
|
43
|
+
Apache Arrow C++ >= #{Arrow::Version::MAJOR} isn't found.
|
|
44
|
+
You can install it automatically by enabling rubygems-requirements-system.
|
|
45
|
+
See https://github.com/ruby-gnome/rubygems-requirements-system/ how to enable it.
|
|
46
|
+
MESSAGE
|
|
52
47
|
end
|
|
53
48
|
|
|
54
|
-
unless
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
49
|
+
unless PKGConfig.have_package("arrow-glib",
|
|
50
|
+
Arrow::Version::MAJOR,
|
|
51
|
+
Arrow::Version::MINOR,
|
|
52
|
+
Arrow::Version::MICRO)
|
|
53
|
+
version = [
|
|
54
|
+
Arrow::Version::MAJOR,
|
|
55
|
+
Arrow::Version::MINOR,
|
|
56
|
+
Arrow::Version::MICRO,
|
|
57
|
+
].join(".")
|
|
58
|
+
raise <<-MESSAGE
|
|
59
|
+
Apache Arrow GLib >= #{version} isn't found.
|
|
60
|
+
You can install it automatically by enabling rubygems-requirements-system.
|
|
61
|
+
See https://github.com/ruby-gnome/rubygems-requirements-system/ how to enable it.
|
|
62
|
+
MESSAGE
|
|
65
63
|
end
|
|
66
64
|
|
|
65
|
+
# Old re2.pc (e.g. re2.pc on Ubuntu 20.04) may add -std=c++11. It
|
|
66
|
+
# causes a build error because Apache Arrow C++ requires C++17 or
|
|
67
|
+
# later.
|
|
68
|
+
#
|
|
69
|
+
# We can remove this when we drop support for Ubuntu 20.04.
|
|
70
|
+
$CXXFLAGS.gsub!("-std=c++11", "")
|
|
71
|
+
|
|
67
72
|
[
|
|
68
73
|
["glib2", "ext/glib2"],
|
|
69
74
|
].each do |name, relative_source_dir|
|
|
@@ -73,4 +78,18 @@ end
|
|
|
73
78
|
add_depend_package_path(name, source_dir, build_dir)
|
|
74
79
|
end
|
|
75
80
|
|
|
81
|
+
case RUBY_PLATFORM
|
|
82
|
+
when /darwin/
|
|
83
|
+
symbols_in_external_bundles = [
|
|
84
|
+
"_rbgerr_gerror2exception",
|
|
85
|
+
"_rbgobj_instance_from_ruby_object",
|
|
86
|
+
]
|
|
87
|
+
symbols_in_external_bundles.each do |symbol|
|
|
88
|
+
$DLDFLAGS << " -Wl,-U,#{symbol}"
|
|
89
|
+
end
|
|
90
|
+
mmacosx_version_min = "-mmacosx-version-min=12.0"
|
|
91
|
+
$CFLAGS << " #{mmacosx_version_min}"
|
|
92
|
+
$CXXFLAGS << " #{mmacosx_version_min}"
|
|
93
|
+
end
|
|
94
|
+
|
|
76
95
|
create_makefile("arrow")
|