red-arrow 0.14.1 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of red-arrow might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/ext/arrow/arrow.cpp +34 -0
- data/ext/arrow/converters.cpp +42 -0
- data/ext/arrow/converters.hpp +626 -0
- data/ext/arrow/raw-records.cpp +6 -625
- data/ext/arrow/red-arrow.hpp +37 -3
- data/ext/arrow/values.cpp +154 -0
- data/lib/arrow/array-builder.rb +24 -1
- data/lib/arrow/array.rb +9 -0
- data/lib/arrow/chunked-array.rb +5 -0
- data/lib/arrow/column-containable.rb +48 -0
- data/lib/arrow/column.rb +36 -10
- data/lib/arrow/csv-loader.rb +2 -2
- data/lib/arrow/data-type.rb +22 -5
- data/lib/arrow/date64-array-builder.rb +2 -2
- data/lib/arrow/date64-array.rb +1 -1
- data/lib/arrow/decimal128-array.rb +24 -0
- data/lib/arrow/field-containable.rb +3 -0
- data/lib/arrow/group.rb +10 -13
- data/lib/arrow/loader.rb +20 -1
- data/lib/arrow/record-batch.rb +6 -4
- data/lib/arrow/record-containable.rb +0 -35
- data/lib/arrow/record.rb +12 -9
- data/lib/arrow/slicer.rb +2 -2
- data/lib/arrow/struct-array-builder.rb +1 -7
- data/lib/arrow/struct-array.rb +13 -11
- data/lib/arrow/table-loader.rb +3 -9
- data/lib/arrow/table-table-formatter.rb +2 -2
- data/lib/arrow/table.rb +61 -24
- data/lib/arrow/time.rb +159 -0
- data/lib/arrow/time32-array-builder.rb +49 -0
- data/lib/arrow/time32-array.rb +28 -0
- data/lib/arrow/time64-array-builder.rb +49 -0
- data/lib/arrow/time64-array.rb +28 -0
- data/lib/arrow/timestamp-array-builder.rb +20 -1
- data/lib/arrow/timestamp-array.rb +10 -22
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +16 -8
- data/test/raw-records/test-dense-union-array.rb +12 -5
- data/test/raw-records/test-list-array.rb +21 -9
- data/test/raw-records/test-sparse-union-array.rb +13 -5
- data/test/raw-records/test-struct-array.rb +11 -4
- data/test/test-column.rb +56 -31
- data/test/test-decimal128-array-builder.rb +11 -11
- data/test/test-decimal128-array.rb +4 -4
- data/test/test-slicer.rb +1 -3
- data/test/test-struct-array-builder.rb +4 -4
- data/test/test-struct-array.rb +4 -4
- data/test/test-table.rb +17 -8
- data/test/test-time.rb +288 -0
- data/test/test-time32-array.rb +81 -0
- data/test/test-time64-array.rb +81 -0
- data/test/values/test-basic-arrays.rb +284 -0
- data/test/values/test-dense-union-array.rb +487 -0
- data/test/values/test-list-array.rb +497 -0
- data/test/values/test-sparse-union-array.rb +477 -0
- data/test/values/test-struct-array.rb +452 -0
- metadata +78 -54
- data/lib/arrow/struct.rb +0 -79
- data/test/test-struct.rb +0 -81
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65ddeae926378c460b0945ff6949ecbf3ee911611cdcb95bf21f3cf3765efe6d
|
4
|
+
data.tar.gz: 193bce59b05b836fb5a5d6d8b650ea9adf441bf04beda8a6083633692c796957
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b502a0bc9f65b24d04d9f5ee1f58e7eb5db27e885d756158c249fba8be843af7f1ef12c234110cc7136d67f5e998ceb726a7ecf8688b653ba84ec207f7308e78
|
7
|
+
data.tar.gz: 1d8f8798f582fdb8b4e0e566d52407b3303391c4c43deef9471dc0011c9a377f83fca308b230709ef0b332ce83892a1cd967096531b4701382d4ed40066b216c
|
data/ext/arrow/arrow.cpp
CHANGED
@@ -23,17 +23,38 @@
|
|
23
23
|
|
24
24
|
namespace red_arrow {
|
25
25
|
VALUE cDate;
|
26
|
+
|
27
|
+
VALUE cArrowTime;
|
28
|
+
|
29
|
+
VALUE ArrowTimeUnitSECOND;
|
30
|
+
VALUE ArrowTimeUnitMILLI;
|
31
|
+
VALUE ArrowTimeUnitMICRO;
|
32
|
+
VALUE ArrowTimeUnitNANO;
|
33
|
+
|
26
34
|
ID id_BigDecimal;
|
27
35
|
ID id_jd;
|
36
|
+
ID id_new;
|
28
37
|
ID id_to_datetime;
|
29
38
|
}
|
30
39
|
|
31
40
|
extern "C" void Init_arrow() {
|
32
41
|
auto mArrow = rb_const_get_at(rb_cObject, rb_intern("Arrow"));
|
42
|
+
|
43
|
+
auto cArrowArray = rb_const_get_at(mArrow, rb_intern("Array"));
|
44
|
+
rb_define_method(cArrowArray, "values",
|
45
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::array_values),
|
46
|
+
0);
|
47
|
+
|
48
|
+
auto cArrowChunkedArray = rb_const_get_at(mArrow, rb_intern("ChunkedArray"));
|
49
|
+
rb_define_method(cArrowChunkedArray, "values",
|
50
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::chunked_array_values),
|
51
|
+
0);
|
52
|
+
|
33
53
|
auto cArrowRecordBatch = rb_const_get_at(mArrow, rb_intern("RecordBatch"));
|
34
54
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
35
55
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
36
56
|
0);
|
57
|
+
|
37
58
|
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
38
59
|
rb_define_method(cArrowTable, "raw_records",
|
39
60
|
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
@@ -41,7 +62,20 @@ extern "C" void Init_arrow() {
|
|
41
62
|
|
42
63
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
43
64
|
|
65
|
+
red_arrow::cArrowTime = rb_const_get_at(mArrow, rb_intern("Time"));
|
66
|
+
|
67
|
+
auto cArrowTimeUnit = rb_const_get_at(mArrow, rb_intern("TimeUnit"));
|
68
|
+
red_arrow::ArrowTimeUnitSECOND =
|
69
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("SECOND"));
|
70
|
+
red_arrow::ArrowTimeUnitMILLI =
|
71
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("MILLI"));
|
72
|
+
red_arrow::ArrowTimeUnitMICRO =
|
73
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("MICRO"));
|
74
|
+
red_arrow::ArrowTimeUnitNANO =
|
75
|
+
rb_const_get_at(cArrowTimeUnit, rb_intern("NANO"));
|
76
|
+
|
44
77
|
red_arrow::id_BigDecimal = rb_intern("BigDecimal");
|
45
78
|
red_arrow::id_jd = rb_intern("jd");
|
79
|
+
red_arrow::id_new = rb_intern("new");
|
46
80
|
red_arrow::id_to_datetime = rb_intern("to_datetime");
|
47
81
|
}
|
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "converters.hpp"
|
21
|
+
|
22
|
+
namespace red_arrow {
|
23
|
+
VALUE ArrayValueConverter::convert(const arrow::ListArray& array,
|
24
|
+
const int64_t i) {
|
25
|
+
return list_array_value_converter_->convert(array, i);
|
26
|
+
}
|
27
|
+
|
28
|
+
VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
|
29
|
+
const int64_t i) {
|
30
|
+
return struct_array_value_converter_->convert(array, i);
|
31
|
+
}
|
32
|
+
|
33
|
+
VALUE ArrayValueConverter::convert(const arrow::UnionArray& array,
|
34
|
+
const int64_t i) {
|
35
|
+
return union_array_value_converter_->convert(array, i);
|
36
|
+
}
|
37
|
+
|
38
|
+
VALUE ArrayValueConverter::convert(const arrow::DictionaryArray& array,
|
39
|
+
const int64_t i) {
|
40
|
+
return dictionary_array_value_converter_->convert(array, i);
|
41
|
+
}
|
42
|
+
}
|
@@ -0,0 +1,626 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "red-arrow.hpp"
|
21
|
+
|
22
|
+
#include <ruby.hpp>
|
23
|
+
#include <ruby/encoding.h>
|
24
|
+
|
25
|
+
#include <arrow-glib/error.hpp>
|
26
|
+
|
27
|
+
#include <arrow/util/logging.h>
|
28
|
+
|
29
|
+
namespace red_arrow {
|
30
|
+
class ListArrayValueConverter;
|
31
|
+
class StructArrayValueConverter;
|
32
|
+
class UnionArrayValueConverter;
|
33
|
+
class DictionaryArrayValueConverter;
|
34
|
+
|
35
|
+
class ArrayValueConverter {
|
36
|
+
public:
|
37
|
+
ArrayValueConverter()
|
38
|
+
: decimal_buffer_(),
|
39
|
+
list_array_value_converter_(nullptr),
|
40
|
+
struct_array_value_converter_(nullptr),
|
41
|
+
union_array_value_converter_(nullptr),
|
42
|
+
dictionary_array_value_converter_(nullptr) {
|
43
|
+
}
|
44
|
+
|
45
|
+
inline void set_sub_value_converters(ListArrayValueConverter* list_array_value_converter,
|
46
|
+
StructArrayValueConverter* struct_array_value_converter,
|
47
|
+
UnionArrayValueConverter* union_array_value_converter,
|
48
|
+
DictionaryArrayValueConverter* dictionary_array_value_converter) {
|
49
|
+
list_array_value_converter_ = list_array_value_converter;
|
50
|
+
struct_array_value_converter_ = struct_array_value_converter;
|
51
|
+
union_array_value_converter_ = union_array_value_converter;
|
52
|
+
dictionary_array_value_converter_ = dictionary_array_value_converter;
|
53
|
+
}
|
54
|
+
|
55
|
+
inline VALUE convert(const arrow::NullArray& array,
|
56
|
+
const int64_t i) {
|
57
|
+
return Qnil;
|
58
|
+
}
|
59
|
+
|
60
|
+
inline VALUE convert(const arrow::BooleanArray& array,
|
61
|
+
const int64_t i) {
|
62
|
+
return array.Value(i) ? Qtrue : Qfalse;
|
63
|
+
}
|
64
|
+
|
65
|
+
inline VALUE convert(const arrow::Int8Array& array,
|
66
|
+
const int64_t i) {
|
67
|
+
return INT2NUM(array.Value(i));
|
68
|
+
}
|
69
|
+
|
70
|
+
inline VALUE convert(const arrow::Int16Array& array,
|
71
|
+
const int64_t i) {
|
72
|
+
return INT2NUM(array.Value(i));
|
73
|
+
}
|
74
|
+
|
75
|
+
inline VALUE convert(const arrow::Int32Array& array,
|
76
|
+
const int64_t i) {
|
77
|
+
return INT2NUM(array.Value(i));
|
78
|
+
}
|
79
|
+
|
80
|
+
inline VALUE convert(const arrow::Int64Array& array,
|
81
|
+
const int64_t i) {
|
82
|
+
return LL2NUM(array.Value(i));
|
83
|
+
}
|
84
|
+
|
85
|
+
inline VALUE convert(const arrow::UInt8Array& array,
|
86
|
+
const int64_t i) {
|
87
|
+
return UINT2NUM(array.Value(i));
|
88
|
+
}
|
89
|
+
|
90
|
+
inline VALUE convert(const arrow::UInt16Array& array,
|
91
|
+
const int64_t i) {
|
92
|
+
return UINT2NUM(array.Value(i));
|
93
|
+
}
|
94
|
+
|
95
|
+
inline VALUE convert(const arrow::UInt32Array& array,
|
96
|
+
const int64_t i) {
|
97
|
+
return UINT2NUM(array.Value(i));
|
98
|
+
}
|
99
|
+
|
100
|
+
inline VALUE convert(const arrow::UInt64Array& array,
|
101
|
+
const int64_t i) {
|
102
|
+
return ULL2NUM(array.Value(i));
|
103
|
+
}
|
104
|
+
|
105
|
+
// TODO
|
106
|
+
// inline VALUE convert(const arrow::HalfFloatArray& array,
|
107
|
+
// const int64_t i) {
|
108
|
+
// }
|
109
|
+
|
110
|
+
inline VALUE convert(const arrow::FloatArray& array,
|
111
|
+
const int64_t i) {
|
112
|
+
return DBL2NUM(array.Value(i));
|
113
|
+
}
|
114
|
+
|
115
|
+
inline VALUE convert(const arrow::DoubleArray& array,
|
116
|
+
const int64_t i) {
|
117
|
+
return DBL2NUM(array.Value(i));
|
118
|
+
}
|
119
|
+
|
120
|
+
inline VALUE convert(const arrow::BinaryArray& array,
|
121
|
+
const int64_t i) {
|
122
|
+
int32_t length;
|
123
|
+
const auto value = array.GetValue(i, &length);
|
124
|
+
// TODO: encoding support
|
125
|
+
return rb_enc_str_new(reinterpret_cast<const char*>(value),
|
126
|
+
length,
|
127
|
+
rb_ascii8bit_encoding());
|
128
|
+
}
|
129
|
+
|
130
|
+
inline VALUE convert(const arrow::StringArray& array,
|
131
|
+
const int64_t i) {
|
132
|
+
int32_t length;
|
133
|
+
const auto value = array.GetValue(i, &length);
|
134
|
+
return rb_utf8_str_new(reinterpret_cast<const char*>(value),
|
135
|
+
length);
|
136
|
+
}
|
137
|
+
|
138
|
+
inline VALUE convert(const arrow::FixedSizeBinaryArray& array,
|
139
|
+
const int64_t i) {
|
140
|
+
return rb_enc_str_new(reinterpret_cast<const char*>(array.Value(i)),
|
141
|
+
array.byte_width(),
|
142
|
+
rb_ascii8bit_encoding());
|
143
|
+
}
|
144
|
+
|
145
|
+
constexpr static int32_t JULIAN_DATE_UNIX_EPOCH = 2440588;
|
146
|
+
inline VALUE convert(const arrow::Date32Array& array,
|
147
|
+
const int64_t i) {
|
148
|
+
const auto value = array.Value(i);
|
149
|
+
const auto days_in_julian = value + JULIAN_DATE_UNIX_EPOCH;
|
150
|
+
return rb_funcall(cDate, id_jd, 1, LONG2NUM(days_in_julian));
|
151
|
+
}
|
152
|
+
|
153
|
+
inline VALUE convert(const arrow::Date64Array& array,
|
154
|
+
const int64_t i) {
|
155
|
+
const auto value = array.Value(i);
|
156
|
+
auto msec = LL2NUM(value);
|
157
|
+
auto sec = rb_rational_new(msec, INT2NUM(1000));
|
158
|
+
auto time_value = rb_time_num_new(sec, Qnil);
|
159
|
+
return rb_funcall(time_value, id_to_datetime, 0, 0);
|
160
|
+
}
|
161
|
+
|
162
|
+
inline VALUE convert(const arrow::Time32Array& array,
|
163
|
+
const int64_t i) {
|
164
|
+
const auto type =
|
165
|
+
arrow::internal::checked_cast<const arrow::Time32Type*>(array.type().get());
|
166
|
+
const auto value = array.Value(i);
|
167
|
+
return rb_funcall(red_arrow::cArrowTime,
|
168
|
+
id_new,
|
169
|
+
2,
|
170
|
+
time_unit_to_enum(type->unit()),
|
171
|
+
INT2NUM(value));
|
172
|
+
}
|
173
|
+
|
174
|
+
inline VALUE convert(const arrow::Time64Array& array,
|
175
|
+
const int64_t i) {
|
176
|
+
const auto type =
|
177
|
+
arrow::internal::checked_cast<const arrow::Time64Type*>(array.type().get());
|
178
|
+
const auto value = array.Value(i);
|
179
|
+
return rb_funcall(red_arrow::cArrowTime,
|
180
|
+
id_new,
|
181
|
+
2,
|
182
|
+
time_unit_to_enum(type->unit()),
|
183
|
+
LL2NUM(value));
|
184
|
+
}
|
185
|
+
|
186
|
+
inline VALUE convert(const arrow::TimestampArray& array,
|
187
|
+
const int64_t i) {
|
188
|
+
const auto type =
|
189
|
+
arrow::internal::checked_cast<const arrow::TimestampType*>(array.type().get());
|
190
|
+
auto scale = time_unit_to_scale(type->unit());
|
191
|
+
auto value = array.Value(i);
|
192
|
+
auto sec = rb_rational_new(LL2NUM(value), scale);
|
193
|
+
return rb_time_num_new(sec, Qnil);
|
194
|
+
}
|
195
|
+
|
196
|
+
// TODO
|
197
|
+
// inline VALUE convert(const arrow::IntervalArray& array,
|
198
|
+
// const int64_t i) {
|
199
|
+
// };
|
200
|
+
|
201
|
+
VALUE convert(const arrow::ListArray& array,
|
202
|
+
const int64_t i);
|
203
|
+
|
204
|
+
VALUE convert(const arrow::StructArray& array,
|
205
|
+
const int64_t i);
|
206
|
+
|
207
|
+
VALUE convert(const arrow::UnionArray& array,
|
208
|
+
const int64_t i);
|
209
|
+
|
210
|
+
VALUE convert(const arrow::DictionaryArray& array,
|
211
|
+
const int64_t i);
|
212
|
+
|
213
|
+
inline VALUE convert(const arrow::Decimal128Array& array,
|
214
|
+
const int64_t i) {
|
215
|
+
decimal_buffer_ = array.FormatValue(i);
|
216
|
+
return rb_funcall(rb_cObject,
|
217
|
+
id_BigDecimal,
|
218
|
+
1,
|
219
|
+
rb_enc_str_new(decimal_buffer_.data(),
|
220
|
+
decimal_buffer_.length(),
|
221
|
+
rb_ascii8bit_encoding()));
|
222
|
+
}
|
223
|
+
|
224
|
+
private:
|
225
|
+
std::string decimal_buffer_;
|
226
|
+
ListArrayValueConverter* list_array_value_converter_;
|
227
|
+
StructArrayValueConverter* struct_array_value_converter_;
|
228
|
+
UnionArrayValueConverter* union_array_value_converter_;
|
229
|
+
DictionaryArrayValueConverter* dictionary_array_value_converter_;
|
230
|
+
};
|
231
|
+
|
232
|
+
class ListArrayValueConverter : public arrow::ArrayVisitor {
|
233
|
+
public:
|
234
|
+
explicit ListArrayValueConverter(ArrayValueConverter* converter)
|
235
|
+
: array_value_converter_(converter),
|
236
|
+
offset_(0),
|
237
|
+
length_(0),
|
238
|
+
result_(Qnil) {}
|
239
|
+
|
240
|
+
VALUE convert(const arrow::ListArray& array, const int64_t index) {
|
241
|
+
auto values = array.values().get();
|
242
|
+
auto offset_keep = offset_;
|
243
|
+
auto length_keep = length_;
|
244
|
+
offset_ = array.value_offset(index);
|
245
|
+
length_ = array.value_length(index);
|
246
|
+
auto result_keep = result_;
|
247
|
+
result_ = rb_ary_new_capa(length_);
|
248
|
+
check_status(values->Accept(this),
|
249
|
+
"[raw-records][list-array]");
|
250
|
+
offset_ = offset_keep;
|
251
|
+
length_ = length_keep;
|
252
|
+
auto result_return = result_;
|
253
|
+
result_ = result_keep;
|
254
|
+
return result_return;
|
255
|
+
}
|
256
|
+
|
257
|
+
#define VISIT(TYPE) \
|
258
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
259
|
+
return visit_value(array); \
|
260
|
+
}
|
261
|
+
|
262
|
+
VISIT(Null)
|
263
|
+
VISIT(Boolean)
|
264
|
+
VISIT(Int8)
|
265
|
+
VISIT(Int16)
|
266
|
+
VISIT(Int32)
|
267
|
+
VISIT(Int64)
|
268
|
+
VISIT(UInt8)
|
269
|
+
VISIT(UInt16)
|
270
|
+
VISIT(UInt32)
|
271
|
+
VISIT(UInt64)
|
272
|
+
// TODO
|
273
|
+
// VISIT(HalfFloat)
|
274
|
+
VISIT(Float)
|
275
|
+
VISIT(Double)
|
276
|
+
VISIT(Binary)
|
277
|
+
VISIT(String)
|
278
|
+
VISIT(FixedSizeBinary)
|
279
|
+
VISIT(Date32)
|
280
|
+
VISIT(Date64)
|
281
|
+
VISIT(Time32)
|
282
|
+
VISIT(Time64)
|
283
|
+
VISIT(Timestamp)
|
284
|
+
// TODO
|
285
|
+
// VISIT(Interval)
|
286
|
+
VISIT(List)
|
287
|
+
VISIT(Struct)
|
288
|
+
VISIT(Union)
|
289
|
+
VISIT(Dictionary)
|
290
|
+
VISIT(Decimal128)
|
291
|
+
// TODO
|
292
|
+
// VISIT(Extension)
|
293
|
+
|
294
|
+
#undef VISIT
|
295
|
+
|
296
|
+
private:
|
297
|
+
template <typename ArrayType>
|
298
|
+
inline VALUE convert_value(const ArrayType& array,
|
299
|
+
const int64_t i) {
|
300
|
+
return array_value_converter_->convert(array, i);
|
301
|
+
}
|
302
|
+
|
303
|
+
template <typename ArrayType>
|
304
|
+
arrow::Status visit_value(const ArrayType& array) {
|
305
|
+
if (array.null_count() > 0) {
|
306
|
+
for (int64_t i = 0; i < length_; ++i) {
|
307
|
+
auto value = Qnil;
|
308
|
+
if (!array.IsNull(i + offset_)) {
|
309
|
+
value = convert_value(array, i + offset_);
|
310
|
+
}
|
311
|
+
rb_ary_push(result_, value);
|
312
|
+
}
|
313
|
+
} else {
|
314
|
+
for (int64_t i = 0; i < length_; ++i) {
|
315
|
+
rb_ary_push(result_, convert_value(array, i + offset_));
|
316
|
+
}
|
317
|
+
}
|
318
|
+
return arrow::Status::OK();
|
319
|
+
}
|
320
|
+
|
321
|
+
ArrayValueConverter* array_value_converter_;
|
322
|
+
int32_t offset_;
|
323
|
+
int32_t length_;
|
324
|
+
VALUE result_;
|
325
|
+
};
|
326
|
+
|
327
|
+
class StructArrayValueConverter : public arrow::ArrayVisitor {
|
328
|
+
public:
|
329
|
+
explicit StructArrayValueConverter(ArrayValueConverter* converter)
|
330
|
+
: array_value_converter_(converter),
|
331
|
+
key_(Qnil),
|
332
|
+
index_(0),
|
333
|
+
result_(Qnil) {}
|
334
|
+
|
335
|
+
VALUE convert(const arrow::StructArray& array,
|
336
|
+
const int64_t index) {
|
337
|
+
auto index_keep = index_;
|
338
|
+
auto result_keep = result_;
|
339
|
+
index_ = index;
|
340
|
+
result_ = rb_hash_new();
|
341
|
+
const auto struct_type = array.struct_type();
|
342
|
+
const auto n = struct_type->num_children();
|
343
|
+
for (int i = 0; i < n; ++i) {
|
344
|
+
const auto field_type = struct_type->child(i).get();
|
345
|
+
const auto& field_name = field_type->name();
|
346
|
+
auto key_keep = key_;
|
347
|
+
key_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
348
|
+
const auto field_array = array.field(i).get();
|
349
|
+
check_status(field_array->Accept(this),
|
350
|
+
"[raw-records][struct-array]");
|
351
|
+
key_ = key_keep;
|
352
|
+
}
|
353
|
+
auto result_return = result_;
|
354
|
+
result_ = result_keep;
|
355
|
+
index_ = index_keep;
|
356
|
+
return result_return;
|
357
|
+
}
|
358
|
+
|
359
|
+
#define VISIT(TYPE) \
|
360
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
361
|
+
fill_field(array); \
|
362
|
+
return arrow::Status::OK(); \
|
363
|
+
}
|
364
|
+
|
365
|
+
VISIT(Null)
|
366
|
+
VISIT(Boolean)
|
367
|
+
VISIT(Int8)
|
368
|
+
VISIT(Int16)
|
369
|
+
VISIT(Int32)
|
370
|
+
VISIT(Int64)
|
371
|
+
VISIT(UInt8)
|
372
|
+
VISIT(UInt16)
|
373
|
+
VISIT(UInt32)
|
374
|
+
VISIT(UInt64)
|
375
|
+
// TODO
|
376
|
+
// VISIT(HalfFloat)
|
377
|
+
VISIT(Float)
|
378
|
+
VISIT(Double)
|
379
|
+
VISIT(Binary)
|
380
|
+
VISIT(String)
|
381
|
+
VISIT(FixedSizeBinary)
|
382
|
+
VISIT(Date32)
|
383
|
+
VISIT(Date64)
|
384
|
+
VISIT(Time32)
|
385
|
+
VISIT(Time64)
|
386
|
+
VISIT(Timestamp)
|
387
|
+
// TODO
|
388
|
+
// VISIT(Interval)
|
389
|
+
VISIT(List)
|
390
|
+
VISIT(Struct)
|
391
|
+
VISIT(Union)
|
392
|
+
VISIT(Dictionary)
|
393
|
+
VISIT(Decimal128)
|
394
|
+
// TODO
|
395
|
+
// VISIT(Extension)
|
396
|
+
|
397
|
+
#undef VISIT
|
398
|
+
|
399
|
+
private:
|
400
|
+
template <typename ArrayType>
|
401
|
+
inline VALUE convert_value(const ArrayType& array,
|
402
|
+
const int64_t i) {
|
403
|
+
return array_value_converter_->convert(array, i);
|
404
|
+
}
|
405
|
+
|
406
|
+
template <typename ArrayType>
|
407
|
+
void fill_field(const ArrayType& array) {
|
408
|
+
if (array.IsNull(index_)) {
|
409
|
+
rb_hash_aset(result_, key_, Qnil);
|
410
|
+
} else {
|
411
|
+
rb_hash_aset(result_, key_, convert_value(array, index_));
|
412
|
+
}
|
413
|
+
}
|
414
|
+
|
415
|
+
ArrayValueConverter* array_value_converter_;
|
416
|
+
VALUE key_;
|
417
|
+
int64_t index_;
|
418
|
+
VALUE result_;
|
419
|
+
};
|
420
|
+
|
421
|
+
class UnionArrayValueConverter : public arrow::ArrayVisitor {
|
422
|
+
public:
|
423
|
+
explicit UnionArrayValueConverter(ArrayValueConverter* converter)
|
424
|
+
: array_value_converter_(converter),
|
425
|
+
index_(0),
|
426
|
+
result_(Qnil) {}
|
427
|
+
|
428
|
+
VALUE convert(const arrow::UnionArray& array,
|
429
|
+
const int64_t index) {
|
430
|
+
const auto index_keep = index_;
|
431
|
+
const auto result_keep = result_;
|
432
|
+
index_ = index;
|
433
|
+
switch (array.mode()) {
|
434
|
+
case arrow::UnionMode::SPARSE:
|
435
|
+
convert_sparse(array);
|
436
|
+
break;
|
437
|
+
case arrow::UnionMode::DENSE:
|
438
|
+
convert_dense(array);
|
439
|
+
break;
|
440
|
+
default:
|
441
|
+
rb_raise(rb_eArgError, "Invalid union mode");
|
442
|
+
break;
|
443
|
+
}
|
444
|
+
auto result_return = result_;
|
445
|
+
index_ = index_keep;
|
446
|
+
result_ = result_keep;
|
447
|
+
return result_return;
|
448
|
+
}
|
449
|
+
|
450
|
+
#define VISIT(TYPE) \
|
451
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
452
|
+
convert_value(array); \
|
453
|
+
return arrow::Status::OK(); \
|
454
|
+
}
|
455
|
+
|
456
|
+
VISIT(Null)
|
457
|
+
VISIT(Boolean)
|
458
|
+
VISIT(Int8)
|
459
|
+
VISIT(Int16)
|
460
|
+
VISIT(Int32)
|
461
|
+
VISIT(Int64)
|
462
|
+
VISIT(UInt8)
|
463
|
+
VISIT(UInt16)
|
464
|
+
VISIT(UInt32)
|
465
|
+
VISIT(UInt64)
|
466
|
+
// TODO
|
467
|
+
// VISIT(HalfFloat)
|
468
|
+
VISIT(Float)
|
469
|
+
VISIT(Double)
|
470
|
+
VISIT(Binary)
|
471
|
+
VISIT(String)
|
472
|
+
VISIT(FixedSizeBinary)
|
473
|
+
VISIT(Date32)
|
474
|
+
VISIT(Date64)
|
475
|
+
VISIT(Time32)
|
476
|
+
VISIT(Time64)
|
477
|
+
VISIT(Timestamp)
|
478
|
+
// TODO
|
479
|
+
// VISIT(Interval)
|
480
|
+
VISIT(List)
|
481
|
+
VISIT(Struct)
|
482
|
+
VISIT(Union)
|
483
|
+
VISIT(Dictionary)
|
484
|
+
VISIT(Decimal128)
|
485
|
+
// TODO
|
486
|
+
// VISIT(Extension)
|
487
|
+
|
488
|
+
#undef VISIT
|
489
|
+
|
490
|
+
private:
|
491
|
+
template <typename ArrayType>
|
492
|
+
inline void convert_value(const ArrayType& array) {
|
493
|
+
auto result = rb_hash_new();
|
494
|
+
if (array.IsNull(index_)) {
|
495
|
+
rb_hash_aset(result, field_name_, Qnil);
|
496
|
+
} else {
|
497
|
+
rb_hash_aset(result,
|
498
|
+
field_name_,
|
499
|
+
array_value_converter_->convert(array, index_));
|
500
|
+
}
|
501
|
+
result_ = result;
|
502
|
+
}
|
503
|
+
|
504
|
+
uint8_t compute_child_index(const arrow::UnionArray& array,
|
505
|
+
arrow::UnionType* type,
|
506
|
+
const char* tag) {
|
507
|
+
const auto type_id = array.raw_type_ids()[index_];
|
508
|
+
const auto& type_codes = type->type_codes();
|
509
|
+
for (uint8_t i = 0; i < type_codes.size(); ++i) {
|
510
|
+
if (type_codes[i] == type_id) {
|
511
|
+
return i;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
check_status(arrow::Status::Invalid("Unknown type ID: ", type_id),
|
515
|
+
tag);
|
516
|
+
return 0;
|
517
|
+
}
|
518
|
+
|
519
|
+
void convert_sparse(const arrow::UnionArray& array) {
|
520
|
+
const auto type =
|
521
|
+
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
522
|
+
const auto tag = "[raw-records][union-sparse-array]";
|
523
|
+
const auto child_index = compute_child_index(array, type, tag);
|
524
|
+
const auto child_field = type->child(child_index).get();
|
525
|
+
const auto& field_name = child_field->name();
|
526
|
+
const auto field_name_keep = field_name_;
|
527
|
+
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
528
|
+
const auto child_array = array.child(child_index).get();
|
529
|
+
check_status(child_array->Accept(this), tag);
|
530
|
+
field_name_ = field_name_keep;
|
531
|
+
}
|
532
|
+
|
533
|
+
void convert_dense(const arrow::UnionArray& array) {
|
534
|
+
const auto type =
|
535
|
+
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
536
|
+
const auto tag = "[raw-records][union-dense-array]";
|
537
|
+
const auto child_index = compute_child_index(array, type, tag);
|
538
|
+
const auto child_field = type->child(child_index).get();
|
539
|
+
const auto& field_name = child_field->name();
|
540
|
+
const auto field_name_keep = field_name_;
|
541
|
+
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
542
|
+
const auto child_array = array.child(child_index);
|
543
|
+
const auto index_keep = index_;
|
544
|
+
index_ = array.value_offset(index_);
|
545
|
+
check_status(child_array->Accept(this), tag);
|
546
|
+
index_ = index_keep;
|
547
|
+
field_name_ = field_name_keep;
|
548
|
+
}
|
549
|
+
|
550
|
+
ArrayValueConverter* array_value_converter_;
|
551
|
+
int64_t index_;
|
552
|
+
VALUE field_name_;
|
553
|
+
VALUE result_;
|
554
|
+
};
|
555
|
+
|
556
|
+
class DictionaryArrayValueConverter : public arrow::ArrayVisitor {
|
557
|
+
public:
|
558
|
+
explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
|
559
|
+
: array_value_converter_(converter),
|
560
|
+
index_(0),
|
561
|
+
result_(Qnil) {
|
562
|
+
}
|
563
|
+
|
564
|
+
VALUE convert(const arrow::DictionaryArray& array,
|
565
|
+
const int64_t index) {
|
566
|
+
index_ = index;
|
567
|
+
auto indices = array.indices().get();
|
568
|
+
check_status(indices->Accept(this),
|
569
|
+
"[raw-records][dictionary-array]");
|
570
|
+
return result_;
|
571
|
+
}
|
572
|
+
|
573
|
+
// TODO: Convert to real value.
|
574
|
+
#define VISIT(TYPE) \
|
575
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
576
|
+
result_ = convert_value(array, index_); \
|
577
|
+
return arrow::Status::OK(); \
|
578
|
+
}
|
579
|
+
|
580
|
+
VISIT(Int8)
|
581
|
+
VISIT(Int16)
|
582
|
+
VISIT(Int32)
|
583
|
+
VISIT(Int64)
|
584
|
+
|
585
|
+
#undef VISIT
|
586
|
+
|
587
|
+
private:
|
588
|
+
template <typename ArrayType>
|
589
|
+
inline VALUE convert_value(const ArrayType& array,
|
590
|
+
const int64_t i) {
|
591
|
+
return array_value_converter_->convert(array, i);
|
592
|
+
}
|
593
|
+
|
594
|
+
ArrayValueConverter* array_value_converter_;
|
595
|
+
int64_t index_;
|
596
|
+
VALUE result_;
|
597
|
+
};
|
598
|
+
|
599
|
+
class Converter {
|
600
|
+
public:
|
601
|
+
explicit Converter()
|
602
|
+
: array_value_converter_(),
|
603
|
+
list_array_value_converter_(&array_value_converter_),
|
604
|
+
struct_array_value_converter_(&array_value_converter_),
|
605
|
+
union_array_value_converter_(&array_value_converter_),
|
606
|
+
dictionary_array_value_converter_(&array_value_converter_) {
|
607
|
+
array_value_converter_.
|
608
|
+
set_sub_value_converters(&list_array_value_converter_,
|
609
|
+
&struct_array_value_converter_,
|
610
|
+
&union_array_value_converter_,
|
611
|
+
&dictionary_array_value_converter_);
|
612
|
+
}
|
613
|
+
|
614
|
+
template <typename ArrayType>
|
615
|
+
inline VALUE convert_value(const ArrayType& array,
|
616
|
+
const int64_t i) {
|
617
|
+
return array_value_converter_.convert(array, i);
|
618
|
+
}
|
619
|
+
|
620
|
+
ArrayValueConverter array_value_converter_;
|
621
|
+
ListArrayValueConverter list_array_value_converter_;
|
622
|
+
StructArrayValueConverter struct_array_value_converter_;
|
623
|
+
UnionArrayValueConverter union_array_value_converter_;
|
624
|
+
DictionaryArrayValueConverter dictionary_array_value_converter_;
|
625
|
+
};
|
626
|
+
}
|