red-arrow 10.0.0 → 16.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/arrow.cpp +31 -0
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +16 -4
- data/ext/arrow/raw-records.cpp +155 -2
- data/ext/arrow/red-arrow.hpp +2 -0
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +6 -1
- data/lib/arrow/chunked-array.rb +35 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/expression.rb +6 -2
- data/lib/arrow/function.rb +0 -1
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +11 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sort-key.rb +3 -3
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +11 -5
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +180 -33
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/timestamp-parser.rb +33 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +2 -1
- data/test/each-raw-record/test-basic-arrays.rb +411 -0
- data/test/each-raw-record/test-dense-union-array.rb +566 -0
- data/test/each-raw-record/test-dictionary-array.rb +341 -0
- data/test/each-raw-record/test-list-array.rb +628 -0
- data/test/each-raw-record/test-map-array.rb +507 -0
- data/test/each-raw-record/test-multiple-columns.rb +72 -0
- data/test/each-raw-record/test-sparse-union-array.rb +528 -0
- data/test/each-raw-record/test-struct-array.rb +529 -0
- data/test/each-raw-record/test-table.rb +47 -0
- data/test/helper/omittable.rb +13 -0
- data/test/helper.rb +1 -0
- data/test/raw-records/test-basic-arrays.rb +11 -1
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-csv-loader.rb +39 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-expression.rb +11 -0
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-ractor.rb +34 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +356 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +62 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 021e0bb1714a4dbaf54622b66dcea24682f78d2045497a8109148620f6090e83
|
4
|
+
data.tar.gz: 8c61eedc5f5f94011b3c7a37aed40583c8b94f36e2e91f86a1d8c76fbe768a5c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73e30cc705260b29ab70be6f98ad30075a5d6c07878c4af25b1650d48d9d7b5df6a7884eaf0db98f4e20cfa8ca0e3a25c49c625fe64f5c8aa4bd529dd10c9b8c
|
7
|
+
data.tar.gz: 30c65035dcf46283e7e64533604c16dcba2dd1b54e00bb8b7dfd10e9ecb2c8440f32a5cfc6adf24bcd7bee5a159aeba110d180f1bdd2d5746f28f66cabb696a8
|
data/README.md
CHANGED
@@ -25,9 +25,9 @@ Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject In
|
|
25
25
|
|
26
26
|
[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
|
27
27
|
|
28
|
-
Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/
|
28
|
+
Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
|
29
29
|
|
30
|
-
Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/
|
30
|
+
Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
|
31
31
|
|
32
32
|
gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem.
|
33
33
|
|
@@ -56,7 +56,7 @@ table.save("/dev/shm/data-processed.arrow")
|
|
56
56
|
Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
|
57
57
|
|
58
58
|
* For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
|
59
|
-
* For Apache Arrow GLib: https://github.com/apache/arrow/blob/
|
59
|
+
* For Apache Arrow GLib: https://github.com/apache/arrow/blob/main/c_glib/README.md
|
60
60
|
|
61
61
|
```console
|
62
62
|
$ cd ruby/red-arrow
|
data/ext/arrow/arrow.cpp
CHANGED
@@ -43,6 +43,26 @@ namespace red_arrow {
|
|
43
43
|
VALUE month;
|
44
44
|
VALUE nanosecond;
|
45
45
|
}
|
46
|
+
|
47
|
+
void
|
48
|
+
record_batch_reader_mark(gpointer object)
|
49
|
+
{
|
50
|
+
auto reader = GARROW_RECORD_BATCH_READER(object);
|
51
|
+
auto sources = garrow_record_batch_reader_get_sources(reader);
|
52
|
+
for (auto source = sources; sources; sources = g_list_next(sources)) {
|
53
|
+
rbgobj_gc_mark_instance(source->data);
|
54
|
+
}
|
55
|
+
}
|
56
|
+
|
57
|
+
void
|
58
|
+
execute_plan_mark(gpointer object)
|
59
|
+
{
|
60
|
+
auto plan = GARROW_EXECUTE_PLAN(object);
|
61
|
+
auto nodes = garrow_execute_plan_get_nodes(plan);
|
62
|
+
for (auto node = nodes; nodes; nodes = g_list_next(nodes)) {
|
63
|
+
rbgobj_gc_mark_instance(node->data);
|
64
|
+
}
|
65
|
+
}
|
46
66
|
}
|
47
67
|
|
48
68
|
extern "C" void Init_arrow() {
|
@@ -62,11 +82,17 @@ extern "C" void Init_arrow() {
|
|
62
82
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
63
83
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
64
84
|
0);
|
85
|
+
rb_define_method(cArrowRecordBatch, "each_raw_record",
|
86
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_each_raw_record),
|
87
|
+
0);
|
65
88
|
|
66
89
|
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
67
90
|
rb_define_method(cArrowTable, "raw_records",
|
68
91
|
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
69
92
|
0);
|
93
|
+
rb_define_method(cArrowTable, "each_raw_record",
|
94
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::table_each_raw_record),
|
95
|
+
0);
|
70
96
|
|
71
97
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
72
98
|
|
@@ -93,4 +119,9 @@ extern "C" void Init_arrow() {
|
|
93
119
|
red_arrow::symbols::millisecond = ID2SYM(rb_intern("millisecond"));
|
94
120
|
red_arrow::symbols::month = ID2SYM(rb_intern("month"));
|
95
121
|
red_arrow::symbols::nanosecond = ID2SYM(rb_intern("nanosecond"));
|
122
|
+
|
123
|
+
rbgobj_register_mark_func(GARROW_TYPE_RECORD_BATCH_READER,
|
124
|
+
red_arrow::record_batch_reader_mark);
|
125
|
+
rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN,
|
126
|
+
red_arrow::execute_plan_mark);
|
96
127
|
}
|
data/ext/arrow/converters.hpp
CHANGED
@@ -106,10 +106,34 @@ namespace red_arrow {
|
|
106
106
|
return ULL2NUM(array.Value(i));
|
107
107
|
}
|
108
108
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
109
|
+
inline VALUE convert(const arrow::HalfFloatArray& array,
|
110
|
+
const int64_t i) {
|
111
|
+
const auto value = array.Value(i);
|
112
|
+
// | sign (1 bit) | exponent (5 bit) | fraction (10 bit) |
|
113
|
+
constexpr auto exponent_n_bits = 5;
|
114
|
+
static const auto exponent_mask =
|
115
|
+
static_cast<uint32_t>(std::pow(2.0, exponent_n_bits) - 1);
|
116
|
+
constexpr auto exponent_bias = 15;
|
117
|
+
constexpr auto fraction_n_bits = 10;
|
118
|
+
static const auto fraction_mask =
|
119
|
+
static_cast<uint32_t>(std::pow(2.0, fraction_n_bits)) - 1;
|
120
|
+
static const auto fraction_denominator = std::pow(2.0, fraction_n_bits);
|
121
|
+
const auto sign = value >> (exponent_n_bits + fraction_n_bits);
|
122
|
+
const auto exponent = (value >> fraction_n_bits) & exponent_mask;
|
123
|
+
const auto fraction = value & fraction_mask;
|
124
|
+
if (exponent == exponent_mask) {
|
125
|
+
if (sign == 0) {
|
126
|
+
return DBL2NUM(HUGE_VAL);
|
127
|
+
} else {
|
128
|
+
return DBL2NUM(-HUGE_VAL);
|
129
|
+
}
|
130
|
+
} else {
|
131
|
+
const auto implicit_fraction = (exponent == 0) ? 0 : 1;
|
132
|
+
return DBL2NUM(((sign == 0) ? 1 : -1) *
|
133
|
+
std::pow(2.0, exponent - exponent_bias) *
|
134
|
+
(implicit_fraction + fraction / fraction_denominator));
|
135
|
+
}
|
136
|
+
}
|
113
137
|
|
114
138
|
inline VALUE convert(const arrow::FloatArray& array,
|
115
139
|
const int64_t i) {
|
@@ -320,8 +344,7 @@ namespace red_arrow {
|
|
320
344
|
VISIT(UInt16)
|
321
345
|
VISIT(UInt32)
|
322
346
|
VISIT(UInt64)
|
323
|
-
|
324
|
-
// VISIT(HalfFloat)
|
347
|
+
VISIT(HalfFloat)
|
325
348
|
VISIT(Float)
|
326
349
|
VISIT(Double)
|
327
350
|
VISIT(Binary)
|
@@ -427,8 +450,7 @@ namespace red_arrow {
|
|
427
450
|
VISIT(UInt16)
|
428
451
|
VISIT(UInt32)
|
429
452
|
VISIT(UInt64)
|
430
|
-
|
431
|
-
// VISIT(HalfFloat)
|
453
|
+
VISIT(HalfFloat)
|
432
454
|
VISIT(Float)
|
433
455
|
VISIT(Double)
|
434
456
|
VISIT(Binary)
|
@@ -530,8 +552,7 @@ namespace red_arrow {
|
|
530
552
|
VISIT(UInt16)
|
531
553
|
VISIT(UInt32)
|
532
554
|
VISIT(UInt64)
|
533
|
-
|
534
|
-
// VISIT(HalfFloat)
|
555
|
+
VISIT(HalfFloat)
|
535
556
|
VISIT(Float)
|
536
557
|
VISIT(Double)
|
537
558
|
VISIT(Binary)
|
@@ -634,8 +655,7 @@ namespace red_arrow {
|
|
634
655
|
VISIT(UInt16)
|
635
656
|
VISIT(UInt32)
|
636
657
|
VISIT(UInt64)
|
637
|
-
|
638
|
-
// VISIT(HalfFloat)
|
658
|
+
VISIT(HalfFloat)
|
639
659
|
VISIT(Float)
|
640
660
|
VISIT(Double)
|
641
661
|
VISIT(Binary)
|
@@ -665,25 +685,21 @@ namespace red_arrow {
|
|
665
685
|
private:
|
666
686
|
template <typename ArrayType>
|
667
687
|
inline void convert_value(const ArrayType& array) {
|
668
|
-
auto result = rb_hash_new();
|
669
688
|
if (array.IsNull(index_)) {
|
670
|
-
|
689
|
+
result_ = RUBY_Qnil;
|
671
690
|
} else {
|
672
|
-
|
673
|
-
field_name_,
|
674
|
-
array_value_converter_->convert(array, index_));
|
691
|
+
result_ = array_value_converter_->convert(array, index_);
|
675
692
|
}
|
676
|
-
result_ = result;
|
677
693
|
}
|
678
694
|
|
679
|
-
|
680
|
-
|
681
|
-
|
695
|
+
int8_t compute_child_id(const arrow::UnionArray& array,
|
696
|
+
arrow::UnionType* type,
|
697
|
+
const char* tag) {
|
682
698
|
const auto type_code = array.raw_type_codes()[index_];
|
683
699
|
if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
|
684
|
-
const auto
|
685
|
-
if (
|
686
|
-
return
|
700
|
+
const auto child_id = type->child_ids()[type_code];
|
701
|
+
if (child_id >= 0) {
|
702
|
+
return child_id;
|
687
703
|
}
|
688
704
|
}
|
689
705
|
check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
|
@@ -695,36 +711,25 @@ namespace red_arrow {
|
|
695
711
|
const auto type =
|
696
712
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
697
713
|
const auto tag = "[raw-records][union-sparse-array]";
|
698
|
-
const auto
|
699
|
-
const auto
|
700
|
-
const auto& field_name = field->name();
|
701
|
-
const auto field_name_keep = field_name_;
|
702
|
-
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
703
|
-
const auto field_array = array.field(index).get();
|
714
|
+
const auto child_id = compute_child_id(array, type, tag);
|
715
|
+
const auto field_array = array.field(child_id).get();
|
704
716
|
check_status(field_array->Accept(this), tag);
|
705
|
-
field_name_ = field_name_keep;
|
706
717
|
}
|
707
718
|
|
708
719
|
void convert_dense(const arrow::DenseUnionArray& array) {
|
709
720
|
const auto type =
|
710
721
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
711
722
|
const auto tag = "[raw-records][union-dense-array]";
|
712
|
-
const auto
|
713
|
-
const auto
|
714
|
-
const auto& field_name = field->name();
|
715
|
-
const auto field_name_keep = field_name_;
|
716
|
-
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
717
|
-
const auto field_array = array.field(index);
|
723
|
+
const auto child_id = compute_child_id(array, type, tag);
|
724
|
+
const auto field_array = array.field(child_id);
|
718
725
|
const auto index_keep = index_;
|
719
726
|
index_ = array.value_offset(index_);
|
720
727
|
check_status(field_array->Accept(this), tag);
|
721
728
|
index_ = index_keep;
|
722
|
-
field_name_ = field_name_keep;
|
723
729
|
}
|
724
730
|
|
725
731
|
ArrayValueConverter* array_value_converter_;
|
726
732
|
int64_t index_;
|
727
|
-
VALUE field_name_;
|
728
733
|
VALUE result_;
|
729
734
|
};
|
730
735
|
|
@@ -761,8 +766,7 @@ namespace red_arrow {
|
|
761
766
|
VISIT(UInt16)
|
762
767
|
VISIT(UInt32)
|
763
768
|
VISIT(UInt64)
|
764
|
-
|
765
|
-
// VISIT(HalfFloat)
|
769
|
+
VISIT(HalfFloat)
|
766
770
|
VISIT(Float)
|
767
771
|
VISIT(Double)
|
768
772
|
VISIT(Binary)
|
data/ext/arrow/extconf.rb
CHANGED
@@ -38,14 +38,11 @@ checking_for(checking_message("Homebrew")) do
|
|
38
38
|
end
|
39
39
|
end
|
40
40
|
|
41
|
-
$CXXFLAGS += " -std=c++17 "
|
42
|
-
|
43
41
|
unless required_pkg_config_package([
|
44
42
|
"arrow",
|
45
43
|
Arrow::Version::MAJOR,
|
46
|
-
Arrow::Version::MINOR,
|
47
|
-
Arrow::Version::MICRO,
|
48
44
|
],
|
45
|
+
conda: "libarrow",
|
49
46
|
debian: "libarrow-dev",
|
50
47
|
fedora: "libarrow-devel",
|
51
48
|
homebrew: "apache-arrow",
|
@@ -60,6 +57,7 @@ unless required_pkg_config_package([
|
|
60
57
|
Arrow::Version::MINOR,
|
61
58
|
Arrow::Version::MICRO,
|
62
59
|
],
|
60
|
+
conda: "arrow-c-glib",
|
63
61
|
debian: "libarrow-glib-dev",
|
64
62
|
fedora: "libarrow-glib-devel",
|
65
63
|
homebrew: "apache-arrow-glib",
|
@@ -77,4 +75,18 @@ end
|
|
77
75
|
add_depend_package_path(name, source_dir, build_dir)
|
78
76
|
end
|
79
77
|
|
78
|
+
case RUBY_PLATFORM
|
79
|
+
when /darwin/
|
80
|
+
symbols_in_external_bundles = [
|
81
|
+
"_rbgerr_gerror2exception",
|
82
|
+
"_rbgobj_instance_from_ruby_object",
|
83
|
+
]
|
84
|
+
symbols_in_external_bundles.each do |symbol|
|
85
|
+
$DLDFLAGS << " -Wl,-U,#{symbol}"
|
86
|
+
end
|
87
|
+
mmacosx_version_min = "-mmacosx-version-min=10.15"
|
88
|
+
$CFLAGS << " #{mmacosx_version_min}"
|
89
|
+
$CXXFLAGS << " #{mmacosx_version_min}"
|
90
|
+
end
|
91
|
+
|
80
92
|
create_makefile("arrow")
|
data/ext/arrow/raw-records.cpp
CHANGED
@@ -84,8 +84,7 @@ namespace red_arrow {
|
|
84
84
|
VISIT(UInt16)
|
85
85
|
VISIT(UInt32)
|
86
86
|
VISIT(UInt64)
|
87
|
-
|
88
|
-
// VISIT(HalfFloat)
|
87
|
+
VISIT(HalfFloat)
|
89
88
|
VISIT(Float)
|
90
89
|
VISIT(Double)
|
91
90
|
VISIT(Binary)
|
@@ -145,6 +144,128 @@ namespace red_arrow {
|
|
145
144
|
// The number of columns.
|
146
145
|
const int n_columns_;
|
147
146
|
};
|
147
|
+
|
148
|
+
class RawRecordsProducer : private Converter, public arrow::ArrayVisitor {
|
149
|
+
public:
|
150
|
+
explicit RawRecordsProducer()
|
151
|
+
: Converter(),
|
152
|
+
record_(Qnil),
|
153
|
+
column_index_(0),
|
154
|
+
row_offset_(0) {
|
155
|
+
}
|
156
|
+
|
157
|
+
void produce(const arrow::RecordBatch& record_batch) {
|
158
|
+
rb::protect([&] {
|
159
|
+
const auto n_columns = record_batch.num_columns();
|
160
|
+
const auto n_rows = record_batch.num_rows();
|
161
|
+
for (int64_t i = 0; i < n_rows; ++i) {
|
162
|
+
record_ = rb_ary_new_capa(n_columns);
|
163
|
+
row_offset_ = i;
|
164
|
+
for (int i = 0; i < n_columns; ++i) {
|
165
|
+
const auto array = record_batch.column(i).get();
|
166
|
+
column_index_ = i;
|
167
|
+
check_status(array->Accept(this),
|
168
|
+
"[record-batch][each-raw-record]");
|
169
|
+
}
|
170
|
+
rb_yield(record_);
|
171
|
+
}
|
172
|
+
return Qnil;
|
173
|
+
});
|
174
|
+
}
|
175
|
+
|
176
|
+
void produce(const arrow::Table& table) {
|
177
|
+
rb::protect([&] {
|
178
|
+
const auto n_columns = table.num_columns();
|
179
|
+
const auto n_rows = table.num_rows();
|
180
|
+
std::vector<int> chunk_indexes(n_columns);
|
181
|
+
std::vector<int64_t> row_offsets(n_columns);
|
182
|
+
for (int64_t i_row = 0; i_row < n_rows; ++i_row) {
|
183
|
+
record_ = rb_ary_new_capa(n_columns);
|
184
|
+
for (int i_column = 0; i_column < n_columns; ++i_column) {
|
185
|
+
column_index_ = i_column;
|
186
|
+
const auto chunked_array = table.column(i_column).get();
|
187
|
+
auto& chunk_index = chunk_indexes[i_column];
|
188
|
+
auto& row_offset = row_offsets[i_column];
|
189
|
+
auto array = chunked_array->chunk(chunk_index).get();
|
190
|
+
while (array->length() == row_offset) {
|
191
|
+
++chunk_index;
|
192
|
+
row_offset = 0;
|
193
|
+
array = chunked_array->chunk(chunk_index).get();
|
194
|
+
}
|
195
|
+
row_offset_ = row_offset;
|
196
|
+
check_status(array->Accept(this),
|
197
|
+
"[table][each-raw-record]");
|
198
|
+
++row_offset;
|
199
|
+
}
|
200
|
+
rb_yield(record_);
|
201
|
+
}
|
202
|
+
|
203
|
+
return Qnil;
|
204
|
+
});
|
205
|
+
}
|
206
|
+
|
207
|
+
#define VISIT(TYPE) \
|
208
|
+
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
209
|
+
convert(array); \
|
210
|
+
return arrow::Status::OK(); \
|
211
|
+
}
|
212
|
+
|
213
|
+
VISIT(Null)
|
214
|
+
VISIT(Boolean)
|
215
|
+
VISIT(Int8)
|
216
|
+
VISIT(Int16)
|
217
|
+
VISIT(Int32)
|
218
|
+
VISIT(Int64)
|
219
|
+
VISIT(UInt8)
|
220
|
+
VISIT(UInt16)
|
221
|
+
VISIT(UInt32)
|
222
|
+
VISIT(UInt64)
|
223
|
+
VISIT(HalfFloat)
|
224
|
+
VISIT(Float)
|
225
|
+
VISIT(Double)
|
226
|
+
VISIT(Binary)
|
227
|
+
VISIT(String)
|
228
|
+
VISIT(FixedSizeBinary)
|
229
|
+
VISIT(Date32)
|
230
|
+
VISIT(Date64)
|
231
|
+
VISIT(Time32)
|
232
|
+
VISIT(Time64)
|
233
|
+
VISIT(Timestamp)
|
234
|
+
VISIT(MonthInterval)
|
235
|
+
VISIT(DayTimeInterval)
|
236
|
+
VISIT(MonthDayNanoInterval)
|
237
|
+
VISIT(List)
|
238
|
+
VISIT(Struct)
|
239
|
+
VISIT(Map)
|
240
|
+
VISIT(SparseUnion)
|
241
|
+
VISIT(DenseUnion)
|
242
|
+
VISIT(Dictionary)
|
243
|
+
VISIT(Decimal128)
|
244
|
+
VISIT(Decimal256)
|
245
|
+
// TODO
|
246
|
+
// VISIT(Extension)
|
247
|
+
|
248
|
+
#undef VISIT
|
249
|
+
|
250
|
+
private:
|
251
|
+
template <typename ArrayType>
|
252
|
+
void convert(const ArrayType& array) {
|
253
|
+
auto value = Qnil;
|
254
|
+
if (!array.IsNull(row_offset_)) {
|
255
|
+
value = convert_value(array, row_offset_);
|
256
|
+
}
|
257
|
+
rb_ary_store(record_, column_index_, value);
|
258
|
+
}
|
259
|
+
|
260
|
+
// Destination for converted record.
|
261
|
+
VALUE record_;
|
262
|
+
|
263
|
+
// The current column index.
|
264
|
+
int column_index_;
|
265
|
+
|
266
|
+
// The current row offset.
|
267
|
+
int64_t row_offset_;
|
268
|
+
};
|
148
269
|
}
|
149
270
|
|
150
271
|
VALUE
|
@@ -182,4 +303,36 @@ namespace red_arrow {
|
|
182
303
|
|
183
304
|
return records;
|
184
305
|
}
|
306
|
+
|
307
|
+
VALUE
|
308
|
+
record_batch_each_raw_record(VALUE rb_record_batch) {
|
309
|
+
auto garrow_record_batch = GARROW_RECORD_BATCH(RVAL2GOBJ(rb_record_batch));
|
310
|
+
auto record_batch = garrow_record_batch_get_raw(garrow_record_batch).get();
|
311
|
+
RETURN_SIZED_ENUMERATOR(rb_record_batch, 0, nullptr, record_batch->num_rows());
|
312
|
+
|
313
|
+
try {
|
314
|
+
RawRecordsProducer producer;
|
315
|
+
producer.produce(*record_batch);
|
316
|
+
} catch (rb::State& state) {
|
317
|
+
state.jump();
|
318
|
+
}
|
319
|
+
|
320
|
+
return Qnil;
|
321
|
+
}
|
322
|
+
|
323
|
+
VALUE
|
324
|
+
table_each_raw_record(VALUE rb_table) {
|
325
|
+
auto garrow_table = GARROW_TABLE(RVAL2GOBJ(rb_table));
|
326
|
+
auto table = garrow_table_get_raw(garrow_table).get();
|
327
|
+
RETURN_SIZED_ENUMERATOR(rb_table, 0, nullptr, table->num_rows());
|
328
|
+
|
329
|
+
try {
|
330
|
+
RawRecordsProducer producer;
|
331
|
+
producer.produce(*table);
|
332
|
+
} catch (rb::State& state) {
|
333
|
+
state.jump();
|
334
|
+
}
|
335
|
+
|
336
|
+
return Qnil;
|
337
|
+
}
|
185
338
|
}
|
data/ext/arrow/red-arrow.hpp
CHANGED
@@ -59,6 +59,8 @@ namespace red_arrow {
|
|
59
59
|
|
60
60
|
VALUE record_batch_raw_records(VALUE obj);
|
61
61
|
VALUE table_raw_records(VALUE obj);
|
62
|
+
VALUE record_batch_each_raw_record(VALUE obj);
|
63
|
+
VALUE table_each_raw_record(VALUE obj);
|
62
64
|
|
63
65
|
inline VALUE time_unit_to_scale(const arrow::TimeUnit::type unit) {
|
64
66
|
switch (unit) {
|
data/ext/arrow/values.cpp
CHANGED
@@ -29,6 +29,19 @@ module Arrow
|
|
29
29
|
unique.values
|
30
30
|
end
|
31
31
|
|
32
|
+
# Finds the index of the first occurrence of a given value.
|
33
|
+
#
|
34
|
+
# @param value [Object] The value to be compared.
|
35
|
+
#
|
36
|
+
# @return [Integer] The index of the first occurrence of a given
|
37
|
+
# value on found, -1 on not found.
|
38
|
+
#
|
39
|
+
# @since 12.0.0
|
40
|
+
def index(value)
|
41
|
+
value = Scalar.resolve(value, value_data_type)
|
42
|
+
compute("index", options: {value: value}).value
|
43
|
+
end
|
44
|
+
|
32
45
|
private
|
33
46
|
def compute(name, options: nil)
|
34
47
|
Function.find(name).execute([self], options).value
|
data/lib/arrow/array.rb
CHANGED
@@ -22,6 +22,7 @@ module Arrow
|
|
22
22
|
include ArrayComputable
|
23
23
|
include GenericFilterable
|
24
24
|
include GenericTakeable
|
25
|
+
include InputReferable
|
25
26
|
|
26
27
|
class << self
|
27
28
|
def new(*args)
|
@@ -115,6 +116,10 @@ module Arrow
|
|
115
116
|
self
|
116
117
|
end
|
117
118
|
|
119
|
+
def to_arrow_chunked_array
|
120
|
+
ChunkedArray.new([self])
|
121
|
+
end
|
122
|
+
|
118
123
|
alias_method :value_data_type_raw, :value_data_type
|
119
124
|
def value_data_type
|
120
125
|
@value_data_type ||= value_data_type_raw
|
@@ -245,7 +250,7 @@ module Arrow
|
|
245
250
|
"[array][resolve] need to implement " +
|
246
251
|
"a feature that building #{value_data_type} array " +
|
247
252
|
"from raw Ruby Array"
|
248
|
-
raise
|
253
|
+
raise NotImplemented, message
|
249
254
|
end
|
250
255
|
other_array
|
251
256
|
elsif other_array.respond_to?(:value_data_type)
|
data/lib/arrow/chunked-array.rb
CHANGED
@@ -22,6 +22,31 @@ module Arrow
|
|
22
22
|
include ArrayComputable
|
23
23
|
include GenericFilterable
|
24
24
|
include GenericTakeable
|
25
|
+
include InputReferable
|
26
|
+
|
27
|
+
def freeze
|
28
|
+
unless frozen?
|
29
|
+
# Ensure caching
|
30
|
+
chunks
|
31
|
+
end
|
32
|
+
super
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_arrow
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_arrow_array
|
40
|
+
if n_chunks.zero?
|
41
|
+
value_data_type.build_array([])
|
42
|
+
else
|
43
|
+
combine
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def to_arrow_chunked_array
|
48
|
+
self
|
49
|
+
end
|
25
50
|
|
26
51
|
alias_method :size, :n_rows
|
27
52
|
unless method_defined?(:length)
|
@@ -30,7 +55,16 @@ module Arrow
|
|
30
55
|
|
31
56
|
alias_method :chunks_raw, :chunks
|
32
57
|
def chunks
|
33
|
-
@chunks ||= chunks_raw
|
58
|
+
@chunks ||= chunks_raw.tap do |_chunks|
|
59
|
+
_chunks.each do |chunk|
|
60
|
+
share_input(chunk)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
alias_method :get_chunk_raw, :get_chunk
|
66
|
+
def get_chunk(i)
|
67
|
+
chunks[i]
|
34
68
|
end
|
35
69
|
|
36
70
|
def null?(i)
|
@@ -143,5 +143,14 @@ module Arrow
|
|
143
143
|
find_column(selector)
|
144
144
|
end
|
145
145
|
end
|
146
|
+
|
147
|
+
# Return column names in this object.
|
148
|
+
#
|
149
|
+
# @return [::Array<String>] column names.
|
150
|
+
#
|
151
|
+
# @since 11.0.0
|
152
|
+
def column_names
|
153
|
+
@column_names ||= columns.collect(&:name)
|
154
|
+
end
|
146
155
|
end
|
147
156
|
end
|
data/lib/arrow/column.rb
CHANGED
data/lib/arrow/data-type.rb
CHANGED
@@ -199,5 +199,14 @@ module Arrow
|
|
199
199
|
args.unshift(self) unless builder_class.buildable?(args)
|
200
200
|
builder_class.build(*args)
|
201
201
|
end
|
202
|
+
|
203
|
+
# @return [Arrow::Scalar} A corresponding {Arrow::Scalar} class
|
204
|
+
# for this data type.
|
205
|
+
#
|
206
|
+
# @since 12.0.0
|
207
|
+
def scalar_class
|
208
|
+
base_name = self.class.name.gsub(/DataType\z/, "")
|
209
|
+
::Arrow.const_get("#{base_name}Scalar")
|
210
|
+
end
|
202
211
|
end
|
203
212
|
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class DenseUnionArrayBuilder
|
20
|
+
alias_method :append_value_raw, :append_value
|
21
|
+
|
22
|
+
# @overload append_value
|
23
|
+
#
|
24
|
+
# Starts appending an union record. You need to append values of
|
25
|
+
# fields.
|
26
|
+
#
|
27
|
+
# @overload append_value(value)
|
28
|
+
#
|
29
|
+
# Appends an union record including values of fields.
|
30
|
+
#
|
31
|
+
# @param value [nil, Hash] The union record value.
|
32
|
+
#
|
33
|
+
# If this is `nil`, the union record is null.
|
34
|
+
#
|
35
|
+
# If this is `Hash`, it's values of fields.
|
36
|
+
#
|
37
|
+
# @since 12.0.0
|
38
|
+
def append_value(value)
|
39
|
+
if value.nil?
|
40
|
+
append_null
|
41
|
+
else
|
42
|
+
key = value.keys[0]
|
43
|
+
child_info = child_infos[key]
|
44
|
+
append_value_raw(child_info[:id])
|
45
|
+
child_info[:builder].append(value.values[0])
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|