red-arrow 0.14.1 → 0.15.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/arrow/arrow.cpp +34 -0
- data/ext/arrow/converters.cpp +42 -0
- data/ext/arrow/converters.hpp +626 -0
- data/ext/arrow/raw-records.cpp +6 -625
- data/ext/arrow/red-arrow.hpp +37 -3
- data/ext/arrow/values.cpp +154 -0
- data/lib/arrow/array-builder.rb +24 -1
- data/lib/arrow/array.rb +9 -0
- data/lib/arrow/chunked-array.rb +5 -0
- data/lib/arrow/column-containable.rb +48 -0
- data/lib/arrow/column.rb +36 -10
- data/lib/arrow/csv-loader.rb +2 -2
- data/lib/arrow/data-type.rb +22 -5
- data/lib/arrow/date64-array-builder.rb +2 -2
- data/lib/arrow/date64-array.rb +1 -1
- data/lib/arrow/decimal128-array.rb +24 -0
- data/lib/arrow/field-containable.rb +3 -0
- data/lib/arrow/group.rb +10 -13
- data/lib/arrow/loader.rb +20 -1
- data/lib/arrow/record-batch.rb +6 -4
- data/lib/arrow/record-containable.rb +0 -35
- data/lib/arrow/record.rb +12 -9
- data/lib/arrow/slicer.rb +2 -2
- data/lib/arrow/struct-array-builder.rb +1 -7
- data/lib/arrow/struct-array.rb +13 -11
- data/lib/arrow/table-loader.rb +3 -9
- data/lib/arrow/table-table-formatter.rb +2 -2
- data/lib/arrow/table.rb +61 -24
- data/lib/arrow/time.rb +159 -0
- data/lib/arrow/time32-array-builder.rb +49 -0
- data/lib/arrow/time32-array.rb +28 -0
- data/lib/arrow/time64-array-builder.rb +49 -0
- data/lib/arrow/time64-array.rb +28 -0
- data/lib/arrow/timestamp-array-builder.rb +20 -1
- data/lib/arrow/timestamp-array.rb +10 -22
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +16 -8
- data/test/raw-records/test-dense-union-array.rb +12 -5
- data/test/raw-records/test-list-array.rb +21 -9
- data/test/raw-records/test-sparse-union-array.rb +13 -5
- data/test/raw-records/test-struct-array.rb +11 -4
- data/test/test-column.rb +56 -31
- data/test/test-decimal128-array-builder.rb +11 -11
- data/test/test-decimal128-array.rb +4 -4
- data/test/test-slicer.rb +1 -3
- data/test/test-struct-array-builder.rb +4 -4
- data/test/test-struct-array.rb +4 -4
- data/test/test-table.rb +17 -8
- data/test/test-time.rb +288 -0
- data/test/test-time32-array.rb +81 -0
- data/test/test-time64-array.rb +81 -0
- data/test/values/test-basic-arrays.rb +284 -0
- data/test/values/test-dense-union-array.rb +487 -0
- data/test/values/test-list-array.rb +497 -0
- data/test/values/test-sparse-union-array.rb +477 -0
- data/test/values/test-struct-array.rb +452 -0
- metadata +78 -54
- data/lib/arrow/struct.rb +0 -79
- data/test/test-struct.rb +0 -81
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65ddeae926378c460b0945ff6949ecbf3ee911611cdcb95bf21f3cf3765efe6d
|
4
|
+
data.tar.gz: 193bce59b05b836fb5a5d6d8b650ea9adf441bf04beda8a6083633692c796957
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b502a0bc9f65b24d04d9f5ee1f58e7eb5db27e885d756158c249fba8be843af7f1ef12c234110cc7136d67f5e998ceb726a7ecf8688b653ba84ec207f7308e78
|
7
|
+
data.tar.gz: 1d8f8798f582fdb8b4e0e566d52407b3303391c4c43deef9471dc0011c9a377f83fca308b230709ef0b332ce83892a1cd967096531b4701382d4ed40066b216c
|
data/ext/arrow/arrow.cpp
CHANGED
@@ -23,17 +23,38 @@
|
|
23
23
|
|
24
24
|
namespace red_arrow {
|
25
25
|
VALUE cDate;
|
26
|
+
|
27
|
+
VALUE cArrowTime;
|
28
|
+
|
29
|
+
VALUE ArrowTimeUnitSECOND;
|
30
|
+
VALUE ArrowTimeUnitMILLI;
|
31
|
+
VALUE ArrowTimeUnitMICRO;
|
32
|
+
VALUE ArrowTimeUnitNANO;
|
33
|
+
|
26
34
|
ID id_BigDecimal;
|
27
35
|
ID id_jd;
|
36
|
+
ID id_new;
|
28
37
|
ID id_to_datetime;
|
29
38
|
}
|
30
39
|
|
31
40
|
extern "C" void Init_arrow() {
|
32
41
|
auto mArrow = rb_const_get_at(rb_cObject, rb_intern("Arrow"));
|
42
|
+
|
43
|
+
auto cArrowArray = rb_const_get_at(mArrow, rb_intern("Array"));
|
44
|
+
rb_define_method(cArrowArray, "values",
|
45
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::array_values),
|
46
|
+
0);
|
47
|
+
|
48
|
+
auto cArrowChunkedArray = rb_const_get_at(mArrow, rb_intern("ChunkedArray"));
|
49
|
+
rb_define_method(cArrowChunkedArray, "values",
|
50
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::chunked_array_values),
|
51
|
+
0);
|
52
|
+
|
33
53
|
auto cArrowRecordBatch = rb_const_get_at(mArrow, rb_intern("RecordBatch"));
|
34
54
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
35
55
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
36
56
|
0);
|
57
|
+
|
37
58
|
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
38
59
|
rb_define_method(cArrowTable, "raw_records",
|
39
60
|
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
@@ -41,7 +62,20 @@ extern "C" void Init_arrow() {
|
|
41
62
|
|
42
63
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
43
64
|
|
65
|
+
red_arrow::cArrowTime = rb_const_get_at(mArrow, rb_intern("Time"));
|
66
|
+
|
67
|
+
auto cArrowTimeUnit = rb_const_get_at(mArrow, rb_intern("TimeUnit"));
|
68
|
+
red_arrow::ArrowTimeUnitSECOND =
|
69
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("SECOND"));
|
70
|
+
red_arrow::ArrowTimeUnitMILLI =
|
71
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("MILLI"));
|
72
|
+
red_arrow::ArrowTimeUnitMICRO =
|
73
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("MICRO"));
|
74
|
+
red_arrow::ArrowTimeUnitNANO =
|
75
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("NANO"));
|
76
|
+
|
44
77
|
red_arrow::id_BigDecimal = rb_intern("BigDecimal");
|
45
78
|
red_arrow::id_jd = rb_intern("jd");
|
79
|
+
red_arrow::id_new = rb_intern("new");
|
46
80
|
red_arrow::id_to_datetime = rb_intern("to_datetime");
|
47
81
|
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "converters.hpp"
|
21
|
+
|
22
|
+
namespace red_arrow {
|
23
|
+
VALUE ArrayValueConverter::convert(const arrow::ListArray& array,
|
24
|
+
const int64_t i) {
|
25
|
+
return list_array_value_converter_->convert(array, i);
|
26
|
+
}
|
27
|
+
|
28
|
+
VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
|
29
|
+
const int64_t i) {
|
30
|
+
return struct_array_value_converter_->convert(array, i);
|
31
|
+
}
|
32
|
+
|
33
|
+
VALUE ArrayValueConverter::convert(const arrow::UnionArray& array,
|
34
|
+
const int64_t i) {
|
35
|
+
return union_array_value_converter_->convert(array, i);
|
36
|
+
}
|
37
|
+
|
38
|
+
VALUE ArrayValueConverter::convert(const arrow::DictionaryArray& array,
|
39
|
+
const int64_t i) {
|
40
|
+
return dictionary_array_value_converter_->convert(array, i);
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,626 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "red-arrow.hpp"
|
21
|
+
|
22
|
+
#include <ruby.hpp>
|
23
|
+
#include <ruby/encoding.h>
|
24
|
+
|
25
|
+
#include <arrow-glib/error.hpp>
|
26
|
+
|
27
|
+
#include <arrow/util/logging.h>
|
28
|
+
|
29
|
+
namespace red_arrow {
|
30
|
+
class ListArrayValueConverter;
|
31
|
+
class StructArrayValueConverter;
|
32
|
+
class UnionArrayValueConverter;
|
33
|
+
class DictionaryArrayValueConverter;
|
34
|
+
|
35
|
+
class ArrayValueConverter {
|
36
|
+
public:
|
37
|
+
ArrayValueConverter()
|
38
|
+
: decimal_buffer_(),
|
39
|
+
list_array_value_converter_(nullptr),
|
40
|
+
struct_array_value_converter_(nullptr),
|
41
|
+
union_array_value_converter_(nullptr),
|
42
|
+
dictionary_array_value_converter_(nullptr) {
|
43
|
+
}
|
44
|
+
|
45
|
+
inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
|
46
|
+
StructArrayValueConverter* struct_array_value_converter,
|
47
|
+
UnionArrayValueConverter* union_array_value_converter,
|
48
|
+
DictionaryArrayValueConverter* dictionary_array_value_converter) {
|
49
|
+
list_array_value_converter_ = list_array_value_converter;
|
50
|
+
struct_array_value_converter_ = struct_array_value_converter;
|
51
|
+
union_array_value_converter_ = union_array_value_converter;
|
52
|
+
dictionary_array_value_converter_ = dictionary_array_value_converter;
|
53
|
+
}
|
54
|
+
|
55
|
+
inline VALUE convert(const arrow::NullArray& array,
|
56
|
+
const int64_t i) {
|
57
|
+
return Qnil;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline VALUE convert(const arrow::BooleanArray& array,
|
61
|
+
const int64_t i) {
|
62
|
+
return array.Value(i) ? Qtrue : Qfalse;
|
63
|
+
}
|
64
|
+
|
65
|
+
inline VALUE convert(const arrow::Int8Array& array,
|
66
|
+
const int64_t i) {
|
67
|
+
return INT2NUM(array.Value(i));
|
68
|
+
}
|
69
|
+
|
70
|
+
inline VALUE convert(const arrow::Int16Array& array,
|
71
|
+
const int64_t i) {
|
72
|
+
return INT2NUM(array.Value(i));
|
73
|
+
}
|
74
|
+
|
75
|
+
inline VALUE convert(const arrow::Int32Array& array,
|
76
|
+
const int64_t i) {
|
77
|
+
return INT2NUM(array.Value(i));
|
78
|
+
}
|
79
|
+
|
80
|
+
inline VALUE convert(const arrow::Int64Array& array,
|
81
|
+
const int64_t i) {
|
82
|
+
return LL2NUM(array.Value(i));
|
83
|
+
}
|
84
|
+
|
85
|
+
inline VALUE convert(const arrow::UInt8Array& array,
|
86
|
+
const int64_t i) {
|
87
|
+
return UINT2NUM(array.Value(i));
|
88
|
+
}
|
89
|
+
|
90
|
+
inline VALUE convert(const arrow::UInt16Array& array,
|
91
|
+
const int64_t i) {
|
92
|
+
return UINT2NUM(array.Value(i));
|
93
|
+
}
|
94
|
+
|
95
|
+
inline VALUE convert(const arrow::UInt32Array& array,
|
96
|
+
const int64_t i) {
|
97
|
+
return UINT2NUM(array.Value(i));
|
98
|
+
}
|
99
|
+
|
100
|
+
inline VALUE convert(const arrow::UInt64Array& array,
|
101
|
+
const int64_t i) {
|
102
|
+
return ULL2NUM(array.Value(i));
|
103
|
+
}
|
104
|
+
|
105
|
+
// TODO
|
106
|
+
// inline VALUE convert(const arrow::HalfFloatArray& array,
|
107
|
+
// const int64_t i) {
|
108
|
+
// }
|
109
|
+
|
110
|
+
inline VALUE convert(const arrow::FloatArray& array,
|
111
|
+
const int64_t i) {
|
112
|
+
return DBL2NUM(array.Value(i));
|
113
|
+
}
|
114
|
+
|
115
|
+
inline VALUE convert(const arrow::DoubleArray& array,
|
116
|
+
const int64_t i) {
|
117
|
+
return DBL2NUM(array.Value(i));
|
118
|
+
}
|
119
|
+
|
120
|
+
inline VALUE convert(const arrow::BinaryArray& array,
|
121
|
+
const int64_t i) {
|
122
|
+
int32_t length;
|
123
|
+
const auto value = array.GetValue(i, &length);
|
124
|
+
// TODO: encoding support
|
125
|
+
return rb_enc_str_new(reinterpret_cast<const char*>(value),
|
126
|
+
length,
|
127
|
+
rb_ascii8bit_encoding());
|
128
|
+
}
|
129
|
+
|
130
|
+
inline VALUE convert(const arrow::StringArray& array,
|
131
|
+
const int64_t i) {
|
132
|
+
int32_t length;
|
133
|
+
const auto value = array.GetValue(i, &length);
|
134
|
+
return rb_utf8_str_new(reinterpret_cast<const char*>(value),
|
135
|
+
length);
|
136
|
+
}
|
137
|
+
|
138
|
+
inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
|
139
|
+
const int64_t i) {
|
140
|
+
return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
|
141
|
+
array.byte_width(),
|
142
|
+
rb_ascii8bit_encoding());
|
143
|
+
}
|
144
|
+
|
145
|
+
constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588;
|
146
|
+
inline VALUE convert(const arrow::Date32Array& array,
|
147
|
+
const int64_t i) {
|
148
|
+
const auto value = array.Value(i);
|
149
|
+
const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH;
|
150
|
+
return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian));
|
151
|
+
}
|
152
|
+
|
153
|
+
inline VALUE convert(const arrow::Date64Array& array,
|
154
|
+
const int64_t i) {
|
155
|
+
const auto value = array.Value(i);
|
156
|
+
auto msec = LL2NUM(value);
|
157
|
+
auto sec = rb_rational_new(msec, INT2NUM(1000));
|
158
|
+
auto time_value = rb_time_num_new(sec, Qnil);
|
159
|
+
return rb_funcall(time_value, id_to_datetime, 0, 0);
|
160
|
+
}
|
161
|
+
|
162
|
+
inline VALUE convert(const arrow::Time32Array& array,
|
163
|
+
const int64_t i) {
|
164
|
+
const auto type =
|
165
|
+
arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get());
|
166
|
+
const auto value = array.Value(i);
|
167
|
+
return rb_funcall(red_arrow::cArrowTime,
|
168
|
+
id_new,
|
169
|
+
2,
|
170
|
+
time_unit_to_enum(type->unit()),
|
171
|
+
INT2NUM(value));
|
172
|
+
}
|
173
|
+
|
174
|
+
inline VALUE convert(const arrow::Time64Array& array,
|
175
|
+
const int64_t i) {
|
176
|
+
const auto type =
|
177
|
+
arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get());
|
178
|
+
const auto value = array.Value(i);
|
179
|
+
return rb_funcall(red_arrow::cArrowTime,
|
180
|
+
id_new,
|
181
|
+
2,
|
182
|
+
time_unit_to_enum(type->unit()),
|
183
|
+
LL2NUM(value));
|
184
|
+
}
|
185
|
+
|
186
|
+
inline VALUE convert(const arrow::TimestampArray& array,
|
187
|
+
const int64_t i) {
|
188
|
+
const auto type =
|
189
|
+
arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get());
|
190
|
+
auto scale = time_unit_to_scale(type->unit());
|
191
|
+
auto value = array.Value(i);
|
192
|
+
auto sec = rb_rational_new(LL2NUM(value), scale);
|
193
|
+
return rb_time_num_new(sec, Qnil);
|
194
|
+
}
|
195
|
+
|
196
|
+
// TODO
|
197
|
+
// inline VALUE convert(const arrow::IntervalArray& array,
|
198
|
+
// const int64_t i) {
|
199
|
+
// };
|
200
|
+
|
201
|
+
VALUE convert(const arrow::ListArray& array,
|
202
|
+
const int64_t i);
|
203
|
+
|
204
|
+
VALUE convert(const arrow::StructArray& array,
|
205
|
+
const int64_t i);
|
206
|
+
|
207
|
+
VALUE convert(const arrow::UnionArray& array,
|
208
|
+
const int64_t i);
|
209
|
+
|
210
|
+
VALUE convert(const arrow::DictionaryArray& array,
|
211
|
+
const int64_t i);
|
212
|
+
|
213
|
+
inline VALUE convert(const arrow::Decimal128Array& array,
|
214
|
+
const int64_t i) {
|
215
|
+
decimal_buffer_ = array.FormatValue(i);
|
216
|
+
return rb_funcall(rb_cObject,
|
217
|
+
id_BigDecimal,
|
218
|
+
1,
|
219
|
+
rb_enc_str_new(decimal_buffer_.data(),
|
220
|
+
decimal_buffer_.length(),
|
221
|
+
rb_ascii8bit_encoding()));
|
222
|
+
}
|
223
|
+
|
224
|
+
private:
|
225
|
+
std::string decimal_buffer_;
|
226
|
+
ListArrayValueConverter* list_array_value_converter_;
|
227
|
+
StructArrayValueConverter* struct_array_value_converter_;
|
228
|
+
UnionArrayValueConverter* union_array_value_converter_;
|
229
|
+
DictionaryArrayValueConverter* dictionary_array_value_converter_;
|
230
|
+
};
|
231
|
+
|
232
|
+
class ListArrayValueConverter : public arrow::ArrayVisitor {
|
233
|
+
public:
|
234
|
+
explicit ListArrayValueConverter(ArrayValueConverter* converter)
|
235
|
+
: array_value_converter_(converter),
|
236
|
+
offset_(0),
|
237
|
+
length_(0),
|
238
|
+
result_(Qnil) {}
|
239
|
+
|
240
|
+
VALUE convert(const arrow::ListArray& array, const int64_t index) {
|
241
|
+
auto values = array.values().get();
|
242
|
+
auto offset_keep = offset_;
|
243
|
+
auto length_keep = length_;
|
244
|
+
offset_ = array.value_offset(index);
|
245
|
+
length_ = array.value_length(index);
|
246
|
+
auto result_keep = result_;
|
247
|
+
result_ = rb_ary_new_capa(length_);
|
248
|
+
check_status(values->Accept(this),
|
249
|
+
"[raw-records][list-array]");
|
250
|
+
offset_ = offset_keep;
|
251
|
+
length_ = length_keep;
|
252
|
+
auto result_return = result_;
|
253
|
+
result_ = result_keep;
|
254
|
+
return result_return;
|
255
|
+
}
|
256
|
+
|
257
|
+
#define VISIT(TYPE) \
|
258
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
259
|
+
return visit_value(array); \
|
260
|
+
}
|
261
|
+
|
262
|
+
VISIT(Null)
|
263
|
+
VISIT(Boolean)
|
264
|
+
VISIT(Int8)
|
265
|
+
VISIT(Int16)
|
266
|
+
VISIT(Int32)
|
267
|
+
VISIT(Int64)
|
268
|
+
VISIT(UInt8)
|
269
|
+
VISIT(UInt16)
|
270
|
+
VISIT(UInt32)
|
271
|
+
VISIT(UInt64)
|
272
|
+
// TODO
|
273
|
+
// VISIT(HalfFloat)
|
274
|
+
VISIT(Float)
|
275
|
+
VISIT(Double)
|
276
|
+
VISIT(Binary)
|
277
|
+
VISIT(String)
|
278
|
+
VISIT(FixedSizeBinary)
|
279
|
+
VISIT(Date32)
|
280
|
+
VISIT(Date64)
|
281
|
+
VISIT(Time32)
|
282
|
+
VISIT(Time64)
|
283
|
+
VISIT(Timestamp)
|
284
|
+
// TODO
|
285
|
+
// VISIT(Interval)
|
286
|
+
VISIT(List)
|
287
|
+
VISIT(Struct)
|
288
|
+
VISIT(Union)
|
289
|
+
VISIT(Dictionary)
|
290
|
+
VISIT(Decimal128)
|
291
|
+
// TODO
|
292
|
+
// VISIT(Extension)
|
293
|
+
|
294
|
+
#undef VISIT
|
295
|
+
|
296
|
+
private:
|
297
|
+
template <typename ArrayType>
|
298
|
+
inline VALUE convert_value(const ArrayType& array,
|
299
|
+
const int64_t i) {
|
300
|
+
return array_value_converter_->convert(array, i);
|
301
|
+
}
|
302
|
+
|
303
|
+
template <typename ArrayType>
|
304
|
+
arrow::Status visit_value(const ArrayType& array) {
|
305
|
+
if (array.null_count() > 0) {
|
306
|
+
for (int64_t i = 0; i < length_; ++i) {
|
307
|
+
auto value = Qnil;
|
308
|
+
if (!array.IsNull(i + offset_)) {
|
309
|
+
value = convert_value(array, i + offset_);
|
310
|
+
}
|
311
|
+
rb_ary_push(result_, value);
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
for (int64_t i = 0; i < length_; ++i) {
|
315
|
+
rb_ary_push(result_, convert_value(array, i + offset_));
|
316
|
+
}
|
317
|
+
}
|
318
|
+
return arrow::Status::OK();
|
319
|
+
}
|
320
|
+
|
321
|
+
ArrayValueConverter* array_value_converter_;
|
322
|
+
int32_t offset_;
|
323
|
+
int32_t length_;
|
324
|
+
VALUE result_;
|
325
|
+
};
|
326
|
+
|
327
|
+
class StructArrayValueConverter : public arrow::ArrayVisitor {
|
328
|
+
public:
|
329
|
+
explicit StructArrayValueConverter(ArrayValueConverter* converter)
|
330
|
+
: array_value_converter_(converter),
|
331
|
+
key_(Qnil),
|
332
|
+
index_(0),
|
333
|
+
result_(Qnil) {}
|
334
|
+
|
335
|
+
VALUE convert(const arrow::StructArray& array,
|
336
|
+
const int64_t index) {
|
337
|
+
auto index_keep = index_;
|
338
|
+
auto result_keep = result_;
|
339
|
+
index_ = index;
|
340
|
+
result_ = rb_hash_new();
|
341
|
+
const auto struct_type = array.struct_type();
|
342
|
+
const auto n = struct_type->num_children();
|
343
|
+
for (int i = 0; i < n; ++i) {
|
344
|
+
const auto field_type = struct_type->child(i).get();
|
345
|
+
const auto& field_name = field_type->name();
|
346
|
+
auto key_keep = key_;
|
347
|
+
key_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
348
|
+
const auto field_array = array.field(i).get();
|
349
|
+
check_status(field_array->Accept(this),
|
350
|
+
"[raw-records][struct-array]");
|
351
|
+
key_ = key_keep;
|
352
|
+
}
|
353
|
+
auto result_return = result_;
|
354
|
+
result_ = result_keep;
|
355
|
+
index_ = index_keep;
|
356
|
+
return result_return;
|
357
|
+
}
|
358
|
+
|
359
|
+
#define VISIT(TYPE) \
|
360
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
361
|
+
fill_field(array); \
|
362
|
+
return arrow::Status::OK(); \
|
363
|
+
}
|
364
|
+
|
365
|
+
VISIT(Null)
|
366
|
+
VISIT(Boolean)
|
367
|
+
VISIT(Int8)
|
368
|
+
VISIT(Int16)
|
369
|
+
VISIT(Int32)
|
370
|
+
VISIT(Int64)
|
371
|
+
VISIT(UInt8)
|
372
|
+
VISIT(UInt16)
|
373
|
+
VISIT(UInt32)
|
374
|
+
VISIT(UInt64)
|
375
|
+
// TODO
|
376
|
+
// VISIT(HalfFloat)
|
377
|
+
VISIT(Float)
|
378
|
+
VISIT(Double)
|
379
|
+
VISIT(Binary)
|
380
|
+
VISIT(String)
|
381
|
+
VISIT(FixedSizeBinary)
|
382
|
+
VISIT(Date32)
|
383
|
+
VISIT(Date64)
|
384
|
+
VISIT(Time32)
|
385
|
+
VISIT(Time64)
|
386
|
+
VISIT(Timestamp)
|
387
|
+
// TODO
|
388
|
+
// VISIT(Interval)
|
389
|
+
VISIT(List)
|
390
|
+
VISIT(Struct)
|
391
|
+
VISIT(Union)
|
392
|
+
VISIT(Dictionary)
|
393
|
+
VISIT(Decimal128)
|
394
|
+
// TODO
|
395
|
+
// VISIT(Extension)
|
396
|
+
|
397
|
+
#undef VISIT
|
398
|
+
|
399
|
+
private:
|
400
|
+
template <typename ArrayType>
|
401
|
+
inline VALUE convert_value(const ArrayType& array,
|
402
|
+
const int64_t i) {
|
403
|
+
return array_value_converter_->convert(array, i);
|
404
|
+
}
|
405
|
+
|
406
|
+
template <typename ArrayType>
|
407
|
+
void fill_field(const ArrayType& array) {
|
408
|
+
if (array.IsNull(index_)) {
|
409
|
+
rb_hash_aset(result_, key_, Qnil);
|
410
|
+
} else {
|
411
|
+
rb_hash_aset(result_, key_, convert_value(array, index_));
|
412
|
+
}
|
413
|
+
}
|
414
|
+
|
415
|
+
ArrayValueConverter* array_value_converter_;
|
416
|
+
VALUE key_;
|
417
|
+
int64_t index_;
|
418
|
+
VALUE result_;
|
419
|
+
};
|
420
|
+
|
421
|
+
class UnionArrayValueConverter : public arrow::ArrayVisitor {
|
422
|
+
public:
|
423
|
+
explicit UnionArrayValueConverter(ArrayValueConverter* converter)
|
424
|
+
: array_value_converter_(converter),
|
425
|
+
index_(0),
|
426
|
+
result_(Qnil) {}
|
427
|
+
|
428
|
+
VALUE convert(const arrow::UnionArray& array,
|
429
|
+
const int64_t index) {
|
430
|
+
const auto index_keep = index_;
|
431
|
+
const auto result_keep = result_;
|
432
|
+
index_ = index;
|
433
|
+
switch (array.mode()) {
|
434
|
+
case arrow::UnionMode::SPARSE:
|
435
|
+
convert_sparse(array);
|
436
|
+
break;
|
437
|
+
case arrow::UnionMode::DENSE:
|
438
|
+
convert_dense(array);
|
439
|
+
break;
|
440
|
+
default:
|
441
|
+
rb_raise(rb_eArgError, "Invalid union mode");
|
442
|
+
break;
|
443
|
+
}
|
444
|
+
auto result_return = result_;
|
445
|
+
index_ = index_keep;
|
446
|
+
result_ = result_keep;
|
447
|
+
return result_return;
|
448
|
+
}
|
449
|
+
|
450
|
+
#define VISIT(TYPE) \
|
451
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
452
|
+
convert_value(array); \
|
453
|
+
return arrow::Status::OK(); \
|
454
|
+
}
|
455
|
+
|
456
|
+
VISIT(Null)
|
457
|
+
VISIT(Boolean)
|
458
|
+
VISIT(Int8)
|
459
|
+
VISIT(Int16)
|
460
|
+
VISIT(Int32)
|
461
|
+
VISIT(Int64)
|
462
|
+
VISIT(UInt8)
|
463
|
+
VISIT(UInt16)
|
464
|
+
VISIT(UInt32)
|
465
|
+
VISIT(UInt64)
|
466
|
+
// TODO
|
467
|
+
// VISIT(HalfFloat)
|
468
|
+
VISIT(Float)
|
469
|
+
VISIT(Double)
|
470
|
+
VISIT(Binary)
|
471
|
+
VISIT(String)
|
472
|
+
VISIT(FixedSizeBinary)
|
473
|
+
VISIT(Date32)
|
474
|
+
VISIT(Date64)
|
475
|
+
VISIT(Time32)
|
476
|
+
VISIT(Time64)
|
477
|
+
VISIT(Timestamp)
|
478
|
+
// TODO
|
479
|
+
// VISIT(Interval)
|
480
|
+
VISIT(List)
|
481
|
+
VISIT(Struct)
|
482
|
+
VISIT(Union)
|
483
|
+
VISIT(Dictionary)
|
484
|
+
VISIT(Decimal128)
|
485
|
+
// TODO
|
486
|
+
// VISIT(Extension)
|
487
|
+
|
488
|
+
#undef VISIT
|
489
|
+
|
490
|
+
private:
|
491
|
+
template <typename ArrayType>
|
492
|
+
inline void convert_value(const ArrayType& array) {
|
493
|
+
auto result = rb_hash_new();
|
494
|
+
if (array.IsNull(index_)) {
|
495
|
+
rb_hash_aset(result, field_name_, Qnil);
|
496
|
+
} else {
|
497
|
+
rb_hash_aset(result,
|
498
|
+
field_name_,
|
499
|
+
array_value_converter_->convert(array, index_));
|
500
|
+
}
|
501
|
+
result_ = result;
|
502
|
+
}
|
503
|
+
|
504
|
+
uint8_t compute_child_index(const arrow::UnionArray& array,
|
505
|
+
arrow::UnionType* type,
|
506
|
+
const char* tag) {
|
507
|
+
const auto type_id = array.raw_type_ids()[index_];
|
508
|
+
const auto& type_codes = type->type_codes();
|
509
|
+
for (uint8_t i = 0; i < type_codes.size(); ++i) {
|
510
|
+
if (type_codes[i] == type_id) {
|
511
|
+
return i;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
check_status(arrow::Status::Invalid("Unknown type ID: ", type_id),
|
515
|
+
tag);
|
516
|
+
return 0;
|
517
|
+
}
|
518
|
+
|
519
|
+
void convert_sparse(const arrow::UnionArray& array) {
|
520
|
+
const auto type =
|
521
|
+
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
522
|
+
const auto tag = "[raw-records][union-sparse-array]";
|
523
|
+
const auto child_index = compute_child_index(array, type, tag);
|
524
|
+
const auto child_field = type->child(child_index).get();
|
525
|
+
const auto& field_name = child_field->name();
|
526
|
+
const auto field_name_keep = field_name_;
|
527
|
+
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
528
|
+
const auto child_array = array.child(child_index).get();
|
529
|
+
check_status(child_array->Accept(this), tag);
|
530
|
+
field_name_ = field_name_keep;
|
531
|
+
}
|
532
|
+
|
533
|
+
void convert_dense(const arrow::UnionArray& array) {
|
534
|
+
const auto type =
|
535
|
+
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
536
|
+
const auto tag = "[raw-records][union-dense-array]";
|
537
|
+
const auto child_index = compute_child_index(array, type, tag);
|
538
|
+
const auto child_field = type->child(child_index).get();
|
539
|
+
const auto& field_name = child_field->name();
|
540
|
+
const auto field_name_keep = field_name_;
|
541
|
+
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
542
|
+
const auto child_array = array.child(child_index);
|
543
|
+
const auto index_keep = index_;
|
544
|
+
index_ = array.value_offset(index_);
|
545
|
+
check_status(child_array->Accept(this), tag);
|
546
|
+
index_ = index_keep;
|
547
|
+
field_name_ = field_name_keep;
|
548
|
+
}
|
549
|
+
|
550
|
+
ArrayValueConverter* array_value_converter_;
|
551
|
+
int64_t index_;
|
552
|
+
VALUE field_name_;
|
553
|
+
VALUE result_;
|
554
|
+
};
|
555
|
+
|
556
|
+
class DictionaryArrayValueConverter : public arrow::ArrayVisitor {
|
557
|
+
public:
|
558
|
+
explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
|
559
|
+
: array_value_converter_(converter),
|
560
|
+
index_(0),
|
561
|
+
result_(Qnil) {
|
562
|
+
}
|
563
|
+
|
564
|
+
VALUE convert(const arrow::DictionaryArray& array,
|
565
|
+
const int64_t index) {
|
566
|
+
index_ = index;
|
567
|
+
auto indices = array.indices().get();
|
568
|
+
check_status(indices->Accept(this),
|
569
|
+
"[raw-records][dictionary-array]");
|
570
|
+
return result_;
|
571
|
+
}
|
572
|
+
|
573
|
+
// TODO: Convert to real value.
|
574
|
+
#define VISIT(TYPE) \
|
575
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
576
|
+
result_ = convert_value(array, index_); \
|
577
|
+
return arrow::Status::OK(); \
|
578
|
+
}
|
579
|
+
|
580
|
+
VISIT(Int8)
|
581
|
+
VISIT(Int16)
|
582
|
+
VISIT(Int32)
|
583
|
+
VISIT(Int64)
|
584
|
+
|
585
|
+
#undef VISIT
|
586
|
+
|
587
|
+
private:
|
588
|
+
template <typename ArrayType>
|
589
|
+
inline VALUE convert_value(const ArrayType& array,
|
590
|
+
const int64_t i) {
|
591
|
+
return array_value_converter_->convert(array, i);
|
592
|
+
}
|
593
|
+
|
594
|
+
ArrayValueConverter* array_value_converter_;
|
595
|
+
int64_t index_;
|
596
|
+
VALUE result_;
|
597
|
+
};
|
598
|
+
|
599
|
+
class Converter {
|
600
|
+
public:
|
601
|
+
explicit Converter()
|
602
|
+
: array_value_converter_(),
|
603
|
+
list_array_value_converter_(&array_value_converter_),
|
604
|
+
struct_array_value_converter_(&array_value_converter_),
|
605
|
+
union_array_value_converter_(&array_value_converter_),
|
606
|
+
dictionary_array_value_converter_(&array_value_converter_) {
|
607
|
+
array_value_converter_.
|
608
|
+
set_sub_value_converters(&list_array_value_converter_,
|
609
|
+
&struct_array_value_converter_,
|
610
|
+
&union_array_value_converter_,
|
611
|
+
&dictionary_array_value_converter_);
|
612
|
+
}
|
613
|
+
|
614
|
+
template <typename ArrayType>
|
615
|
+
inline VALUE convert_value(const ArrayType& array,
|
616
|
+
const int64_t i) {
|
617
|
+
return array_value_converter_.convert(array, i);
|
618
|
+
}
|
619
|
+
|
620
|
+
ArrayValueConverter array_value_converter_;
|
621
|
+
ListArrayValueConverter list_array_value_converter_;
|
622
|
+
StructArrayValueConverter struct_array_value_converter_;
|
623
|
+
UnionArrayValueConverter union_array_value_converter_;
|
624
|
+
DictionaryArrayValueConverter dictionary_array_value_converter_;
|
625
|
+
};
|
626
|
+
}
|