red-arrow 8.0.0 → 24.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -7
- data/ext/arrow/arrow.cpp +67 -0
- data/ext/arrow/converters.cpp +10 -0
- data/ext/arrow/converters.hpp +310 -46
- data/ext/arrow/extconf.rb +41 -22
- data/ext/arrow/raw-records.cpp +165 -2
- data/ext/arrow/red-arrow.hpp +2 -0
- data/ext/arrow/values.cpp +6 -2
- data/lib/arrow/array-builder.rb +89 -14
- data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
- data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
- data/lib/arrow/array.rb +40 -4
- data/lib/arrow/chunked-array.rb +56 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +49 -4
- data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
- data/lib/arrow/data-type.rb +17 -3
- data/lib/arrow/decimal128-array-builder.rb +16 -6
- data/lib/arrow/decimal128.rb +14 -0
- data/lib/arrow/decimal256-array-builder.rb +16 -6
- data/lib/arrow/decimal256.rb +14 -0
- data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
- data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
- data/lib/arrow/duration-array-builder.rb +27 -0
- data/lib/arrow/duration-array.rb +24 -0
- data/lib/arrow/duration-data-type.rb +32 -0
- data/lib/arrow/expression.rb +6 -2
- data/lib/arrow/field-containable.rb +1 -1
- data/lib/arrow/field.rb +44 -3
- data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
- data/lib/arrow/fixed-size-list-data-type.rb +118 -0
- data/lib/arrow/function.rb +0 -1
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
- data/lib/arrow/jruby/array-builder.rb +114 -0
- data/lib/arrow/jruby/array.rb +109 -0
- data/lib/arrow/jruby/chunked-array.rb +36 -0
- data/lib/arrow/jruby/compression-type.rb +26 -0
- data/lib/arrow/jruby/csv-read-options.rb +32 -0
- data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
- data/lib/arrow/jruby/decimal128.rb +28 -0
- data/lib/arrow/jruby/decimal256.rb +28 -0
- data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
- data/lib/arrow/jruby/file-system.rb +24 -0
- data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
- data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
- data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
- data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
- data/lib/arrow/jruby/sort-options.rb +24 -0
- data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
- data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
- data/lib/arrow/jruby/writable.rb +24 -0
- data/lib/arrow/jruby.rb +52 -0
- data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
- data/lib/arrow/large-list-data-type.rb +83 -0
- data/lib/arrow/libraries.rb +140 -0
- data/lib/arrow/list-array-builder.rb +1 -68
- data/lib/arrow/list-data-type.rb +3 -38
- data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
- data/lib/arrow/list-slice-options.rb +76 -0
- data/lib/arrow/list-values-appendable.rb +88 -0
- data/lib/arrow/loader.rb +15 -96
- data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
- data/lib/arrow/raw-table-converter.rb +10 -3
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sort-key.rb +3 -3
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/stream-decoder.rb +29 -0
- data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
- data/lib/arrow/string-array-builder.rb +30 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-formatter.rb +38 -8
- data/lib/arrow/table-list-formatter.rb +3 -3
- data/lib/arrow/table-loader.rb +11 -5
- data/lib/arrow/table-saver.rb +4 -3
- data/lib/arrow/table-table-formatter.rb +7 -0
- data/lib/arrow/table.rb +180 -33
- data/lib/arrow/tensor.rb +144 -0
- data/lib/arrow/time-unit.rb +31 -0
- data/lib/arrow/time32-array-builder.rb +2 -14
- data/lib/arrow/time32-data-type.rb +9 -38
- data/lib/arrow/time64-array-builder.rb +2 -14
- data/lib/arrow/time64-data-type.rb +9 -38
- data/lib/arrow/timestamp-array-builder.rb +3 -15
- data/lib/arrow/timestamp-data-type.rb +9 -34
- data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/union-array.rb +26 -0
- data/lib/arrow/version.rb +1 -1
- data/lib/arrow.rb +2 -7
- data/red-arrow.gemspec +74 -11
- metadata +85 -210
- data/test/fixture/TestOrcFile.test1.orc +0 -0
- data/test/fixture/with-header-float.csv +0 -20
- data/test/fixture/with-header.csv +0 -20
- data/test/fixture/without-header-float.csv +0 -19
- data/test/fixture/without-header.csv +0 -19
- data/test/helper/omittable.rb +0 -36
- data/test/helper.rb +0 -30
- data/test/raw-records/test-basic-arrays.rb +0 -395
- data/test/raw-records/test-dense-union-array.rb +0 -521
- data/test/raw-records/test-list-array.rb +0 -610
- data/test/raw-records/test-map-array.rb +0 -478
- data/test/raw-records/test-multiple-columns.rb +0 -65
- data/test/raw-records/test-sparse-union-array.rb +0 -511
- data/test/raw-records/test-struct-array.rb +0 -515
- data/test/raw-records/test-table.rb +0 -47
- data/test/run-test.rb +0 -71
- data/test/test-array-builder.rb +0 -136
- data/test/test-array.rb +0 -325
- data/test/test-bigdecimal.rb +0 -40
- data/test/test-binary-dictionary-array-builder.rb +0 -103
- data/test/test-chunked-array.rb +0 -183
- data/test/test-column.rb +0 -92
- data/test/test-csv-loader.rb +0 -250
- data/test/test-data-type.rb +0 -83
- data/test/test-decimal128-array-builder.rb +0 -112
- data/test/test-decimal128-data-type.rb +0 -31
- data/test/test-decimal128.rb +0 -102
- data/test/test-decimal256-array-builder.rb +0 -112
- data/test/test-decimal256-array.rb +0 -38
- data/test/test-decimal256.rb +0 -102
- data/test/test-dense-union-data-type.rb +0 -41
- data/test/test-dictionary-data-type.rb +0 -40
- data/test/test-expression.rb +0 -40
- data/test/test-feather.rb +0 -49
- data/test/test-field.rb +0 -91
- data/test/test-file-output-stream.rb +0 -54
- data/test/test-fixed-size-binary-array-builder.rb +0 -92
- data/test/test-fixed-size-binary-array.rb +0 -36
- data/test/test-function.rb +0 -210
- data/test/test-group.rb +0 -180
- data/test/test-list-array-builder.rb +0 -79
- data/test/test-list-array.rb +0 -32
- data/test/test-list-data-type.rb +0 -69
- data/test/test-map-array-builder.rb +0 -110
- data/test/test-map-array.rb +0 -33
- data/test/test-memory-view.rb +0 -434
- data/test/test-orc.rb +0 -173
- data/test/test-record-batch-builder.rb +0 -125
- data/test/test-record-batch-file-reader.rb +0 -115
- data/test/test-record-batch-iterator.rb +0 -37
- data/test/test-record-batch-reader.rb +0 -46
- data/test/test-record-batch.rb +0 -182
- data/test/test-schema.rb +0 -134
- data/test/test-slicer.rb +0 -487
- data/test/test-sort-indices.rb +0 -40
- data/test/test-sort-key.rb +0 -81
- data/test/test-sort-options.rb +0 -58
- data/test/test-sparse-union-data-type.rb +0 -41
- data/test/test-string-dictionary-array-builder.rb +0 -103
- data/test/test-struct-array-builder.rb +0 -184
- data/test/test-struct-array.rb +0 -94
- data/test/test-struct-data-type.rb +0 -112
- data/test/test-table.rb +0 -1123
- data/test/test-time.rb +0 -288
- data/test/test-time32-array.rb +0 -81
- data/test/test-time64-array.rb +0 -81
- data/test/test-time64-data-type.rb +0 -42
- data/test/test-timestamp-array.rb +0 -45
- data/test/test-timestamp-data-type.rb +0 -42
- data/test/values/test-basic-arrays.rb +0 -325
- data/test/values/test-dense-union-array.rb +0 -509
- data/test/values/test-dictionary-array.rb +0 -295
- data/test/values/test-list-array.rb +0 -571
- data/test/values/test-map-array.rb +0 -466
- data/test/values/test-sparse-union-array.rb +0 -500
- data/test/values/test-struct-array.rb +0 -512
data/ext/arrow/raw-records.cpp
CHANGED
|
@@ -84,12 +84,13 @@ namespace red_arrow {
|
|
|
84
84
|
VISIT(UInt16)
|
|
85
85
|
VISIT(UInt32)
|
|
86
86
|
VISIT(UInt64)
|
|
87
|
-
|
|
88
|
-
// VISIT(HalfFloat)
|
|
87
|
+
VISIT(HalfFloat)
|
|
89
88
|
VISIT(Float)
|
|
90
89
|
VISIT(Double)
|
|
91
90
|
VISIT(Binary)
|
|
91
|
+
VISIT(LargeBinary)
|
|
92
92
|
VISIT(String)
|
|
93
|
+
VISIT(LargeString)
|
|
93
94
|
VISIT(FixedSizeBinary)
|
|
94
95
|
VISIT(Date32)
|
|
95
96
|
VISIT(Date64)
|
|
@@ -99,7 +100,10 @@ namespace red_arrow {
|
|
|
99
100
|
VISIT(MonthInterval)
|
|
100
101
|
VISIT(DayTimeInterval)
|
|
101
102
|
VISIT(MonthDayNanoInterval)
|
|
103
|
+
VISIT(Duration)
|
|
102
104
|
VISIT(List)
|
|
105
|
+
VISIT(LargeList)
|
|
106
|
+
VISIT(FixedSizeList)
|
|
103
107
|
VISIT(Struct)
|
|
104
108
|
VISIT(Map)
|
|
105
109
|
VISIT(SparseUnion)
|
|
@@ -145,6 +149,133 @@ namespace red_arrow {
|
|
|
145
149
|
// The number of columns.
|
|
146
150
|
const int n_columns_;
|
|
147
151
|
};
|
|
152
|
+
|
|
153
|
+
class RawRecordsProducer : private Converter, public arrow::ArrayVisitor {
|
|
154
|
+
public:
|
|
155
|
+
explicit RawRecordsProducer()
|
|
156
|
+
: Converter(),
|
|
157
|
+
record_(Qnil),
|
|
158
|
+
column_index_(0),
|
|
159
|
+
row_offset_(0) {
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
void produce(const arrow::RecordBatch& record_batch) {
|
|
163
|
+
rb::protect([&] {
|
|
164
|
+
const auto n_columns = record_batch.num_columns();
|
|
165
|
+
const auto n_rows = record_batch.num_rows();
|
|
166
|
+
for (int64_t i = 0; i < n_rows; ++i) {
|
|
167
|
+
record_ = rb_ary_new_capa(n_columns);
|
|
168
|
+
row_offset_ = i;
|
|
169
|
+
for (int i = 0; i < n_columns; ++i) {
|
|
170
|
+
const auto array = record_batch.column(i).get();
|
|
171
|
+
column_index_ = i;
|
|
172
|
+
check_status(array->Accept(this),
|
|
173
|
+
"[record-batch][each-raw-record]");
|
|
174
|
+
}
|
|
175
|
+
rb_yield(record_);
|
|
176
|
+
}
|
|
177
|
+
return Qnil;
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
void produce(const arrow::Table& table) {
|
|
182
|
+
rb::protect([&] {
|
|
183
|
+
const auto n_columns = table.num_columns();
|
|
184
|
+
const auto n_rows = table.num_rows();
|
|
185
|
+
std::vector<int> chunk_indexes(n_columns);
|
|
186
|
+
std::vector<int64_t> row_offsets(n_columns);
|
|
187
|
+
for (int64_t i_row = 0; i_row < n_rows; ++i_row) {
|
|
188
|
+
record_ = rb_ary_new_capa(n_columns);
|
|
189
|
+
for (int i_column = 0; i_column < n_columns; ++i_column) {
|
|
190
|
+
column_index_ = i_column;
|
|
191
|
+
const auto chunked_array = table.column(i_column).get();
|
|
192
|
+
auto& chunk_index = chunk_indexes[i_column];
|
|
193
|
+
auto& row_offset = row_offsets[i_column];
|
|
194
|
+
auto array = chunked_array->chunk(chunk_index).get();
|
|
195
|
+
while (array->length() == row_offset) {
|
|
196
|
+
++chunk_index;
|
|
197
|
+
row_offset = 0;
|
|
198
|
+
array = chunked_array->chunk(chunk_index).get();
|
|
199
|
+
}
|
|
200
|
+
row_offset_ = row_offset;
|
|
201
|
+
check_status(array->Accept(this),
|
|
202
|
+
"[table][each-raw-record]");
|
|
203
|
+
++row_offset;
|
|
204
|
+
}
|
|
205
|
+
rb_yield(record_);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return Qnil;
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
#define VISIT(TYPE) \
|
|
213
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
|
214
|
+
convert(array); \
|
|
215
|
+
return arrow::Status::OK(); \
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
VISIT(Null)
|
|
219
|
+
VISIT(Boolean)
|
|
220
|
+
VISIT(Int8)
|
|
221
|
+
VISIT(Int16)
|
|
222
|
+
VISIT(Int32)
|
|
223
|
+
VISIT(Int64)
|
|
224
|
+
VISIT(UInt8)
|
|
225
|
+
VISIT(UInt16)
|
|
226
|
+
VISIT(UInt32)
|
|
227
|
+
VISIT(UInt64)
|
|
228
|
+
VISIT(HalfFloat)
|
|
229
|
+
VISIT(Float)
|
|
230
|
+
VISIT(Double)
|
|
231
|
+
VISIT(Binary)
|
|
232
|
+
VISIT(LargeBinary)
|
|
233
|
+
VISIT(String)
|
|
234
|
+
VISIT(LargeString)
|
|
235
|
+
VISIT(FixedSizeBinary)
|
|
236
|
+
VISIT(Date32)
|
|
237
|
+
VISIT(Date64)
|
|
238
|
+
VISIT(Time32)
|
|
239
|
+
VISIT(Time64)
|
|
240
|
+
VISIT(Timestamp)
|
|
241
|
+
VISIT(MonthInterval)
|
|
242
|
+
VISIT(DayTimeInterval)
|
|
243
|
+
VISIT(MonthDayNanoInterval)
|
|
244
|
+
VISIT(Duration)
|
|
245
|
+
VISIT(List)
|
|
246
|
+
VISIT(LargeList)
|
|
247
|
+
VISIT(FixedSizeList)
|
|
248
|
+
VISIT(Struct)
|
|
249
|
+
VISIT(Map)
|
|
250
|
+
VISIT(SparseUnion)
|
|
251
|
+
VISIT(DenseUnion)
|
|
252
|
+
VISIT(Dictionary)
|
|
253
|
+
VISIT(Decimal128)
|
|
254
|
+
VISIT(Decimal256)
|
|
255
|
+
// TODO
|
|
256
|
+
// VISIT(Extension)
|
|
257
|
+
|
|
258
|
+
#undef VISIT
|
|
259
|
+
|
|
260
|
+
private:
|
|
261
|
+
template <typename ArrayType>
|
|
262
|
+
void convert(const ArrayType& array) {
|
|
263
|
+
auto value = Qnil;
|
|
264
|
+
if (!array.IsNull(row_offset_)) {
|
|
265
|
+
value = convert_value(array, row_offset_);
|
|
266
|
+
}
|
|
267
|
+
rb_ary_store(record_, column_index_, value);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Destination for converted record.
|
|
271
|
+
VALUE record_;
|
|
272
|
+
|
|
273
|
+
// The current column index.
|
|
274
|
+
int column_index_;
|
|
275
|
+
|
|
276
|
+
// The current row offset.
|
|
277
|
+
int64_t row_offset_;
|
|
278
|
+
};
|
|
148
279
|
}
|
|
149
280
|
|
|
150
281
|
VALUE
|
|
@@ -182,4 +313,36 @@ namespace red_arrow {
|
|
|
182
313
|
|
|
183
314
|
return records;
|
|
184
315
|
}
|
|
316
|
+
|
|
317
|
+
VALUE
|
|
318
|
+
record_batch_each_raw_record(VALUE rb_record_batch) {
|
|
319
|
+
auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch));
|
|
320
|
+
auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get();
|
|
321
|
+
RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows());
|
|
322
|
+
|
|
323
|
+
try {
|
|
324
|
+
RawRecordsProducer producer;
|
|
325
|
+
producer.produce(*record_batch);
|
|
326
|
+
} catch (rb::State& state) {
|
|
327
|
+
state.jump();
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return Qnil;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
VALUE
|
|
334
|
+
table_each_raw_record(VALUE rb_table) {
|
|
335
|
+
auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
|
|
336
|
+
auto table = garrow_table_get_raw(garrow_table).get();
|
|
337
|
+
RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows());
|
|
338
|
+
|
|
339
|
+
try {
|
|
340
|
+
RawRecordsProducer producer;
|
|
341
|
+
producer.produce(*table);
|
|
342
|
+
} catch (rb::State& state) {
|
|
343
|
+
state.jump();
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
return Qnil;
|
|
347
|
+
}
|
|
185
348
|
}
|
data/ext/arrow/red-arrow.hpp
CHANGED
|
@@ -59,6 +59,8 @@ namespace red_arrow {
|
|
|
59
59
|
|
|
60
60
|
VALUE record_batch_raw_records(VALUE obj);
|
|
61
61
|
VALUE table_raw_records(VALUE obj);
|
|
62
|
+
VALUE record_batch_each_raw_record(VALUE obj);
|
|
63
|
+
VALUE table_each_raw_record(VALUE obj);
|
|
62
64
|
|
|
63
65
|
inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) {
|
|
64
66
|
switch (unit) {
|
data/ext/arrow/values.cpp
CHANGED
|
@@ -65,12 +65,13 @@ namespace red_arrow {
|
|
|
65
65
|
VISIT(UInt16)
|
|
66
66
|
VISIT(UInt32)
|
|
67
67
|
VISIT(UInt64)
|
|
68
|
-
|
|
69
|
-
// VISIT(HalfFloat)
|
|
68
|
+
VISIT(HalfFloat)
|
|
70
69
|
VISIT(Float)
|
|
71
70
|
VISIT(Double)
|
|
72
71
|
VISIT(Binary)
|
|
72
|
+
VISIT(LargeBinary)
|
|
73
73
|
VISIT(String)
|
|
74
|
+
VISIT(LargeString)
|
|
74
75
|
VISIT(FixedSizeBinary)
|
|
75
76
|
VISIT(Date32)
|
|
76
77
|
VISIT(Date64)
|
|
@@ -80,7 +81,10 @@ namespace red_arrow {
|
|
|
80
81
|
VISIT(MonthInterval)
|
|
81
82
|
VISIT(DayTimeInterval)
|
|
82
83
|
VISIT(MonthDayNanoInterval)
|
|
84
|
+
VISIT(Duration)
|
|
83
85
|
VISIT(List)
|
|
86
|
+
VISIT(LargeList)
|
|
87
|
+
VISIT(FixedSizeList)
|
|
84
88
|
VISIT(Struct)
|
|
85
89
|
VISIT(Map)
|
|
86
90
|
VISIT(SparseUnion)
|
data/lib/arrow/array-builder.rb
CHANGED
|
@@ -33,6 +33,11 @@ module Arrow
|
|
|
33
33
|
end
|
|
34
34
|
if builder_info
|
|
35
35
|
builder = builder_info[:builder]
|
|
36
|
+
if builder.nil? and builder_info[:builder_type]
|
|
37
|
+
builder = create_builder(builder_info)
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
if builder
|
|
36
41
|
builder.build(values)
|
|
37
42
|
else
|
|
38
43
|
Arrow::StringArray.new(values)
|
|
@@ -69,14 +74,23 @@ module Arrow
|
|
|
69
74
|
detected: true,
|
|
70
75
|
}
|
|
71
76
|
when Integer
|
|
72
|
-
|
|
77
|
+
builder_info ||= {}
|
|
78
|
+
min = builder_info[:min] || value
|
|
79
|
+
max = builder_info[:max] || value
|
|
80
|
+
min = value if value < min
|
|
81
|
+
max = value if value > max
|
|
82
|
+
|
|
83
|
+
if builder_info[:builder_type] == :int || value < 0
|
|
73
84
|
{
|
|
74
|
-
|
|
75
|
-
|
|
85
|
+
builder_type: :int,
|
|
86
|
+
min: min,
|
|
87
|
+
max: max,
|
|
76
88
|
}
|
|
77
89
|
else
|
|
78
90
|
{
|
|
79
|
-
|
|
91
|
+
builder_type: :uint,
|
|
92
|
+
min: min,
|
|
93
|
+
max: max,
|
|
80
94
|
}
|
|
81
95
|
end
|
|
82
96
|
when Time
|
|
@@ -121,28 +135,44 @@ module Arrow
|
|
|
121
135
|
detected: true,
|
|
122
136
|
}
|
|
123
137
|
when BigDecimal
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
builder: Decimal128ArrayBuilder.new,
|
|
127
|
-
}
|
|
128
|
-
else
|
|
138
|
+
builder_info ||= {}
|
|
139
|
+
if builder_info[:builder] or value.nan? or value.infinite?
|
|
129
140
|
{
|
|
130
|
-
builder:
|
|
141
|
+
builder: StringArrayBuilder.new,
|
|
131
142
|
detected: true,
|
|
132
143
|
}
|
|
144
|
+
else
|
|
145
|
+
precision = [builder_info[:precision] || 0, value.precision].max
|
|
146
|
+
scale = [builder_info[:scale] || 0, value.scale].max
|
|
147
|
+
if precision <= Decimal128DataType::MAX_PRECISION
|
|
148
|
+
{
|
|
149
|
+
builder_type: :decimal128,
|
|
150
|
+
precision: precision,
|
|
151
|
+
scale: scale,
|
|
152
|
+
}
|
|
153
|
+
else
|
|
154
|
+
{
|
|
155
|
+
builder_type: :decimal256,
|
|
156
|
+
precision: precision,
|
|
157
|
+
scale: scale,
|
|
158
|
+
}
|
|
159
|
+
end
|
|
133
160
|
end
|
|
134
161
|
when ::Array
|
|
135
|
-
sub_builder_info =
|
|
162
|
+
sub_builder_info = builder_info && builder_info[:value_builder_info]
|
|
136
163
|
value.each do |sub_value|
|
|
137
164
|
sub_builder_info = detect_builder_info(sub_value, sub_builder_info)
|
|
138
165
|
break if sub_builder_info and sub_builder_info[:detected]
|
|
139
166
|
end
|
|
140
|
-
if sub_builder_info
|
|
141
|
-
|
|
167
|
+
if sub_builder_info
|
|
168
|
+
sub_builder = sub_builder_info[:builder] || create_builder(sub_builder_info)
|
|
169
|
+
return sub_builder_info unless sub_builder
|
|
170
|
+
sub_value_data_type = sub_builder.value_data_type
|
|
142
171
|
field = Field.new("item", sub_value_data_type)
|
|
143
172
|
{
|
|
144
173
|
builder: ListArrayBuilder.new(ListDataType.new(field)),
|
|
145
|
-
|
|
174
|
+
value_builder_info: sub_builder_info,
|
|
175
|
+
detected: sub_builder_info[:detected],
|
|
146
176
|
}
|
|
147
177
|
else
|
|
148
178
|
builder_info
|
|
@@ -154,6 +184,51 @@ module Arrow
|
|
|
154
184
|
}
|
|
155
185
|
end
|
|
156
186
|
end
|
|
187
|
+
|
|
188
|
+
def create_builder(builder_info)
|
|
189
|
+
builder_type = builder_info[:builder_type]
|
|
190
|
+
case builder_type
|
|
191
|
+
when :decimal128
|
|
192
|
+
data_type = Decimal128DataType.new(builder_info[:precision],
|
|
193
|
+
builder_info[:scale])
|
|
194
|
+
Decimal128ArrayBuilder.new(data_type)
|
|
195
|
+
when :decimal256
|
|
196
|
+
data_type = Decimal256DataType.new(builder_info[:precision],
|
|
197
|
+
builder_info[:scale])
|
|
198
|
+
Decimal256ArrayBuilder.new(data_type)
|
|
199
|
+
when :int
|
|
200
|
+
min = builder_info[:min]
|
|
201
|
+
max = builder_info[:max]
|
|
202
|
+
|
|
203
|
+
if GLib::MININT8 <= min && max <= GLib::MAXINT8
|
|
204
|
+
Int8ArrayBuilder.new
|
|
205
|
+
elsif GLib::MININT16 <= min && max <= GLib::MAXINT16
|
|
206
|
+
Int16ArrayBuilder.new
|
|
207
|
+
elsif GLib::MININT32 <= min && max <= GLib::MAXINT32
|
|
208
|
+
Int32ArrayBuilder.new
|
|
209
|
+
elsif GLib::MININT64 <= min && max <= GLib::MAXINT64
|
|
210
|
+
Int64ArrayBuilder.new
|
|
211
|
+
else
|
|
212
|
+
StringArrayBuilder.new
|
|
213
|
+
end
|
|
214
|
+
when :uint
|
|
215
|
+
max = builder_info[:max]
|
|
216
|
+
|
|
217
|
+
if max <= GLib::MAXUINT8
|
|
218
|
+
UInt8ArrayBuilder.new
|
|
219
|
+
elsif max <= GLib::MAXUINT16
|
|
220
|
+
UInt16ArrayBuilder.new
|
|
221
|
+
elsif max <= GLib::MAXUINT32
|
|
222
|
+
UInt32ArrayBuilder.new
|
|
223
|
+
elsif max <= GLib::MAXUINT64
|
|
224
|
+
UInt64ArrayBuilder.new
|
|
225
|
+
else
|
|
226
|
+
StringArrayBuilder.new
|
|
227
|
+
end
|
|
228
|
+
else
|
|
229
|
+
nil
|
|
230
|
+
end
|
|
231
|
+
end
|
|
157
232
|
end
|
|
158
233
|
|
|
159
234
|
def build(values)
|
|
@@ -15,28 +15,36 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
Arrow::Time32DataType.new(Arrow::TimeUnit::MILLI).to_s)
|
|
18
|
+
module Arrow
|
|
19
|
+
module ArrayComputable
|
|
20
|
+
def min(options: nil)
|
|
21
|
+
compute("min", options: options).value
|
|
23
22
|
end
|
|
24
23
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
Arrow::Time32DataType.new(:milli).to_s)
|
|
24
|
+
def max(options: nil)
|
|
25
|
+
compute("max", options: options).value
|
|
28
26
|
end
|
|
29
27
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
assert_equal("time32[ms]",
|
|
33
|
-
data_type.to_s)
|
|
28
|
+
def uniq
|
|
29
|
+
unique.values
|
|
34
30
|
end
|
|
35
31
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
# Finds the index of the first occurrence of a given value.
|
|
33
|
+
#
|
|
34
|
+
# @param value [Object] The value to be compared.
|
|
35
|
+
#
|
|
36
|
+
# @return [Integer] The index of the first occurrence of a given
|
|
37
|
+
# value on found, -1 on not found.
|
|
38
|
+
#
|
|
39
|
+
# @since 12.0.0
|
|
40
|
+
def index(value)
|
|
41
|
+
value = Scalar.resolve(value, value_data_type)
|
|
42
|
+
compute("index", options: {value: value}).value
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
def compute(name, options: nil)
|
|
47
|
+
Function.find(name).execute([self], options).value
|
|
40
48
|
end
|
|
41
49
|
end
|
|
42
50
|
end
|
|
@@ -15,33 +15,28 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
sub_test_case("instance methods") do
|
|
31
|
-
def setup
|
|
32
|
-
@buffer = Arrow::Buffer.new("Hello")
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
sub_test_case("#==") do
|
|
36
|
-
test("Arrow::Buffer") do
|
|
37
|
-
assert do
|
|
38
|
-
@buffer == @buffer
|
|
18
|
+
module Arrow
|
|
19
|
+
class ArrayStatistics
|
|
20
|
+
if method_defined?(:null_count_exact)
|
|
21
|
+
alias_method :null_count_raw, :null_count
|
|
22
|
+
def null_count
|
|
23
|
+
return nil unless has_null_count?
|
|
24
|
+
if null_count_exact?
|
|
25
|
+
null_count_exact
|
|
26
|
+
else
|
|
27
|
+
null_count_approximate
|
|
39
28
|
end
|
|
40
29
|
end
|
|
30
|
+
end
|
|
41
31
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
32
|
+
if method_defined?(:distinct_count_exact)
|
|
33
|
+
alias_method :distinct_count_raw, :distinct_count
|
|
34
|
+
def distinct_count
|
|
35
|
+
return nil unless has_distinct_count?
|
|
36
|
+
if distinct_count_exact?
|
|
37
|
+
distinct_count_exact
|
|
38
|
+
else
|
|
39
|
+
distinct_count_approximate
|
|
45
40
|
end
|
|
46
41
|
end
|
|
47
42
|
end
|
data/lib/arrow/array.rb
CHANGED
|
@@ -18,8 +18,11 @@
|
|
|
18
18
|
module Arrow
|
|
19
19
|
class Array
|
|
20
20
|
include Enumerable
|
|
21
|
+
|
|
22
|
+
include ArrayComputable
|
|
21
23
|
include GenericFilterable
|
|
22
24
|
include GenericTakeable
|
|
25
|
+
include InputReferable
|
|
23
26
|
|
|
24
27
|
class << self
|
|
25
28
|
def new(*args)
|
|
@@ -30,9 +33,32 @@ module Arrow
|
|
|
30
33
|
end
|
|
31
34
|
|
|
32
35
|
def builder_class
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
local_name = name.split("::").last
|
|
37
|
+
builder_class_name = "#{local_name}Builder"
|
|
38
|
+
return nil unless Arrow.const_defined?(builder_class_name)
|
|
39
|
+
Arrow.const_get(builder_class_name)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @api private
|
|
43
|
+
def try_convert(value)
|
|
44
|
+
case value
|
|
45
|
+
when ::Array
|
|
46
|
+
begin
|
|
47
|
+
new(value)
|
|
48
|
+
rescue ArgumentError
|
|
49
|
+
nil
|
|
50
|
+
end
|
|
51
|
+
else
|
|
52
|
+
if value.respond_to?(:to_arrow_array)
|
|
53
|
+
begin
|
|
54
|
+
value.to_arrow_array
|
|
55
|
+
rescue RangeError
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
58
|
+
else
|
|
59
|
+
nil
|
|
60
|
+
end
|
|
61
|
+
end
|
|
36
62
|
end
|
|
37
63
|
end
|
|
38
64
|
|
|
@@ -67,6 +93,8 @@ module Arrow
|
|
|
67
93
|
equal_options(other, options)
|
|
68
94
|
end
|
|
69
95
|
|
|
96
|
+
alias_method :size, :length
|
|
97
|
+
|
|
70
98
|
def each
|
|
71
99
|
return to_enum(__method__) unless block_given?
|
|
72
100
|
|
|
@@ -87,6 +115,14 @@ module Arrow
|
|
|
87
115
|
self
|
|
88
116
|
end
|
|
89
117
|
|
|
118
|
+
def to_arrow_array
|
|
119
|
+
self
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def to_arrow_chunked_array
|
|
123
|
+
ChunkedArray.new([self])
|
|
124
|
+
end
|
|
125
|
+
|
|
90
126
|
alias_method :value_data_type_raw, :value_data_type
|
|
91
127
|
def value_data_type
|
|
92
128
|
@value_data_type ||= value_data_type_raw
|
|
@@ -217,7 +253,7 @@ module Arrow
|
|
|
217
253
|
"[array][resolve] need to implement " +
|
|
218
254
|
"a feature that building #{value_data_type} array " +
|
|
219
255
|
"from raw Ruby Array"
|
|
220
|
-
raise
|
|
256
|
+
raise NotImplementedError, message
|
|
221
257
|
end
|
|
222
258
|
other_array
|
|
223
259
|
elsif other_array.respond_to?(:value_data_type)
|
data/lib/arrow/chunked-array.rb
CHANGED
|
@@ -18,8 +18,35 @@
|
|
|
18
18
|
module Arrow
|
|
19
19
|
class ChunkedArray
|
|
20
20
|
include Enumerable
|
|
21
|
+
|
|
22
|
+
include ArrayComputable
|
|
21
23
|
include GenericFilterable
|
|
22
24
|
include GenericTakeable
|
|
25
|
+
include InputReferable
|
|
26
|
+
|
|
27
|
+
def freeze
|
|
28
|
+
unless frozen?
|
|
29
|
+
# Ensure caching
|
|
30
|
+
chunks
|
|
31
|
+
end
|
|
32
|
+
super
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def to_arrow
|
|
36
|
+
self
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def to_arrow_array
|
|
40
|
+
if n_chunks.zero?
|
|
41
|
+
value_data_type.build_array([])
|
|
42
|
+
else
|
|
43
|
+
combine
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def to_arrow_chunked_array
|
|
48
|
+
self
|
|
49
|
+
end
|
|
23
50
|
|
|
24
51
|
alias_method :size, :n_rows
|
|
25
52
|
unless method_defined?(:length)
|
|
@@ -28,7 +55,16 @@ module Arrow
|
|
|
28
55
|
|
|
29
56
|
alias_method :chunks_raw, :chunks
|
|
30
57
|
def chunks
|
|
31
|
-
@chunks ||= chunks_raw
|
|
58
|
+
@chunks ||= chunks_raw.tap do |_chunks|
|
|
59
|
+
_chunks.each do |chunk|
|
|
60
|
+
share_input(chunk)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
alias_method :get_chunk_raw, :get_chunk
|
|
66
|
+
def get_chunk(i)
|
|
67
|
+
chunks[i]
|
|
32
68
|
end
|
|
33
69
|
|
|
34
70
|
def null?(i)
|
|
@@ -87,5 +123,24 @@ module Arrow
|
|
|
87
123
|
first_chunk.class.new(to_a)
|
|
88
124
|
end
|
|
89
125
|
end
|
|
126
|
+
|
|
127
|
+
def count(options: nil)
|
|
128
|
+
compute("count", options: options).value
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def sum(options: nil)
|
|
132
|
+
compute("sum", options: options).value
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def unique
|
|
136
|
+
compute("unique")
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def cast(target_data_type, options: nil)
|
|
140
|
+
casted_chunks = chunks.collect do |chunk|
|
|
141
|
+
chunk.cast(target_data_type, options)
|
|
142
|
+
end
|
|
143
|
+
self.class.new(casted_chunks)
|
|
144
|
+
end
|
|
90
145
|
end
|
|
91
146
|
end
|
|
@@ -143,5 +143,14 @@ module Arrow
|
|
|
143
143
|
find_column(selector)
|
|
144
144
|
end
|
|
145
145
|
end
|
|
146
|
+
|
|
147
|
+
# Return column names in this object.
|
|
148
|
+
#
|
|
149
|
+
# @return [::Array<String>] column names.
|
|
150
|
+
#
|
|
151
|
+
# @since 11.0.0
|
|
152
|
+
def column_names
|
|
153
|
+
@column_names ||= columns.collect(&:name)
|
|
154
|
+
end
|
|
146
155
|
end
|
|
147
156
|
end
|