red-arrow 8.0.0 → 24.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +15 -7
- data/ext/arrow/arrow.cpp +67 -0
- data/ext/arrow/converters.cpp +10 -0
- data/ext/arrow/converters.hpp +310 -46
- data/ext/arrow/extconf.rb +41 -22
- data/ext/arrow/raw-records.cpp +165 -2
- data/ext/arrow/red-arrow.hpp +2 -0
- data/ext/arrow/values.cpp +6 -2
- data/lib/arrow/array-builder.rb +89 -14
- data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
- data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
- data/lib/arrow/array.rb +40 -4
- data/lib/arrow/chunked-array.rb +56 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +49 -4
- data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
- data/lib/arrow/data-type.rb +17 -3
- data/lib/arrow/decimal128-array-builder.rb +16 -6
- data/lib/arrow/decimal128.rb +14 -0
- data/lib/arrow/decimal256-array-builder.rb +16 -6
- data/lib/arrow/decimal256.rb +14 -0
- data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
- data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
- data/lib/arrow/duration-array-builder.rb +27 -0
- data/lib/arrow/duration-array.rb +24 -0
- data/lib/arrow/duration-data-type.rb +32 -0
- data/lib/arrow/expression.rb +6 -2
- data/lib/arrow/field-containable.rb +1 -1
- data/lib/arrow/field.rb +44 -3
- data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
- data/lib/arrow/fixed-size-list-data-type.rb +118 -0
- data/lib/arrow/function.rb +0 -1
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
- data/lib/arrow/jruby/array-builder.rb +114 -0
- data/lib/arrow/jruby/array.rb +109 -0
- data/lib/arrow/jruby/chunked-array.rb +36 -0
- data/lib/arrow/jruby/compression-type.rb +26 -0
- data/lib/arrow/jruby/csv-read-options.rb +32 -0
- data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
- data/lib/arrow/jruby/decimal128.rb +28 -0
- data/lib/arrow/jruby/decimal256.rb +28 -0
- data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
- data/lib/arrow/jruby/file-system.rb +24 -0
- data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
- data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
- data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
- data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
- data/lib/arrow/jruby/sort-options.rb +24 -0
- data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
- data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
- data/lib/arrow/jruby/writable.rb +24 -0
- data/lib/arrow/jruby.rb +52 -0
- data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
- data/lib/arrow/large-list-data-type.rb +83 -0
- data/lib/arrow/libraries.rb +140 -0
- data/lib/arrow/list-array-builder.rb +1 -68
- data/lib/arrow/list-data-type.rb +3 -38
- data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
- data/lib/arrow/list-slice-options.rb +76 -0
- data/lib/arrow/list-values-appendable.rb +88 -0
- data/lib/arrow/loader.rb +15 -96
- data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
- data/lib/arrow/raw-table-converter.rb +10 -3
- data/lib/arrow/raw-tensor-converter.rb +89 -0
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sort-key.rb +3 -3
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/stream-decoder.rb +29 -0
- data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
- data/lib/arrow/string-array-builder.rb +30 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-formatter.rb +38 -8
- data/lib/arrow/table-list-formatter.rb +3 -3
- data/lib/arrow/table-loader.rb +11 -5
- data/lib/arrow/table-saver.rb +4 -3
- data/lib/arrow/table-table-formatter.rb +7 -0
- data/lib/arrow/table.rb +180 -33
- data/lib/arrow/tensor.rb +144 -0
- data/lib/arrow/time-unit.rb +31 -0
- data/lib/arrow/time32-array-builder.rb +2 -14
- data/lib/arrow/time32-data-type.rb +9 -38
- data/lib/arrow/time64-array-builder.rb +2 -14
- data/lib/arrow/time64-data-type.rb +9 -38
- data/lib/arrow/timestamp-array-builder.rb +3 -15
- data/lib/arrow/timestamp-data-type.rb +9 -34
- data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/union-array.rb +26 -0
- data/lib/arrow/version.rb +1 -1
- data/lib/arrow.rb +2 -7
- data/red-arrow.gemspec +74 -11
- metadata +85 -210
- data/test/fixture/TestOrcFile.test1.orc +0 -0
- data/test/fixture/with-header-float.csv +0 -20
- data/test/fixture/with-header.csv +0 -20
- data/test/fixture/without-header-float.csv +0 -19
- data/test/fixture/without-header.csv +0 -19
- data/test/helper/omittable.rb +0 -36
- data/test/helper.rb +0 -30
- data/test/raw-records/test-basic-arrays.rb +0 -395
- data/test/raw-records/test-dense-union-array.rb +0 -521
- data/test/raw-records/test-list-array.rb +0 -610
- data/test/raw-records/test-map-array.rb +0 -478
- data/test/raw-records/test-multiple-columns.rb +0 -65
- data/test/raw-records/test-sparse-union-array.rb +0 -511
- data/test/raw-records/test-struct-array.rb +0 -515
- data/test/raw-records/test-table.rb +0 -47
- data/test/run-test.rb +0 -71
- data/test/test-array-builder.rb +0 -136
- data/test/test-array.rb +0 -325
- data/test/test-bigdecimal.rb +0 -40
- data/test/test-binary-dictionary-array-builder.rb +0 -103
- data/test/test-chunked-array.rb +0 -183
- data/test/test-column.rb +0 -92
- data/test/test-csv-loader.rb +0 -250
- data/test/test-data-type.rb +0 -83
- data/test/test-decimal128-array-builder.rb +0 -112
- data/test/test-decimal128-data-type.rb +0 -31
- data/test/test-decimal128.rb +0 -102
- data/test/test-decimal256-array-builder.rb +0 -112
- data/test/test-decimal256-array.rb +0 -38
- data/test/test-decimal256.rb +0 -102
- data/test/test-dense-union-data-type.rb +0 -41
- data/test/test-dictionary-data-type.rb +0 -40
- data/test/test-expression.rb +0 -40
- data/test/test-feather.rb +0 -49
- data/test/test-field.rb +0 -91
- data/test/test-file-output-stream.rb +0 -54
- data/test/test-fixed-size-binary-array-builder.rb +0 -92
- data/test/test-fixed-size-binary-array.rb +0 -36
- data/test/test-function.rb +0 -210
- data/test/test-group.rb +0 -180
- data/test/test-list-array-builder.rb +0 -79
- data/test/test-list-array.rb +0 -32
- data/test/test-list-data-type.rb +0 -69
- data/test/test-map-array-builder.rb +0 -110
- data/test/test-map-array.rb +0 -33
- data/test/test-memory-view.rb +0 -434
- data/test/test-orc.rb +0 -173
- data/test/test-record-batch-builder.rb +0 -125
- data/test/test-record-batch-file-reader.rb +0 -115
- data/test/test-record-batch-iterator.rb +0 -37
- data/test/test-record-batch-reader.rb +0 -46
- data/test/test-record-batch.rb +0 -182
- data/test/test-schema.rb +0 -134
- data/test/test-slicer.rb +0 -487
- data/test/test-sort-indices.rb +0 -40
- data/test/test-sort-key.rb +0 -81
- data/test/test-sort-options.rb +0 -58
- data/test/test-sparse-union-data-type.rb +0 -41
- data/test/test-string-dictionary-array-builder.rb +0 -103
- data/test/test-struct-array-builder.rb +0 -184
- data/test/test-struct-array.rb +0 -94
- data/test/test-struct-data-type.rb +0 -112
- data/test/test-table.rb +0 -1123
- data/test/test-time.rb +0 -288
- data/test/test-time32-array.rb +0 -81
- data/test/test-time64-array.rb +0 -81
- data/test/test-time64-data-type.rb +0 -42
- data/test/test-timestamp-array.rb +0 -45
- data/test/test-timestamp-data-type.rb +0 -42
- data/test/values/test-basic-arrays.rb +0 -325
- data/test/values/test-dense-union-array.rb +0 -509
- data/test/values/test-dictionary-array.rb +0 -295
- data/test/values/test-list-array.rb +0 -571
- data/test/values/test-map-array.rb +0 -466
- data/test/values/test-sparse-union-array.rb +0 -500
- data/test/values/test-struct-array.rb +0 -512
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e8464767793fb8efd382689c8ca939af17fdb052f462885efb0461d07d905d18
|
|
4
|
+
data.tar.gz: e6d037e1400af61e90438eaa255c172eaf850b136ea7f8262b6acbd2b41c8126
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0cfab03a5b046c67d892db13cf1cd8d988fd0da9e61e950983dc2c1662c9b25a11ad8fb2e27ca68c7cd34814f208a4a51eda0d689e6aa9c514b82c026797ac7e
|
|
7
|
+
data.tar.gz: 22370cbfb633c8efd7d3ef8e8131001ece304d1c2e79e29bbda7eae4095dea0486f79a9253fbf8d586c11389bd2f3c8b38841f90733cb1dfb4787c2a9327fa15
|
data/README.md
CHANGED
|
@@ -25,20 +25,28 @@ Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject In
|
|
|
25
25
|
|
|
26
26
|
[GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
|
|
27
27
|
|
|
28
|
-
Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/
|
|
28
|
+
Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
|
|
29
29
|
|
|
30
|
-
Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/
|
|
30
|
+
Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
|
|
31
31
|
|
|
32
32
|
gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem.
|
|
33
33
|
|
|
34
34
|
## Install
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
You need to install Apache Arrow GLib to install Red Arrow. You can automate it by enabling [rubygems-requirements-system](https://github.com/ruby-gnome/rubygems-requirements-system/). If you want to install Apache Arrow GLib manually, see [Apache Arrow install document](https://arrow.apache.org/install/) for details.
|
|
37
37
|
|
|
38
|
-
|
|
38
|
+
If you want to install Red Arrow by Bundler, you can add the followings to your `Gemfile`:
|
|
39
|
+
|
|
40
|
+
```ruby
|
|
41
|
+
plugin "rubygems-requirements-system"
|
|
42
|
+
|
|
43
|
+
gem "red-arrow"
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
If you want to install Red Arrow by RubyGems, you can use the following command line:
|
|
39
47
|
|
|
40
48
|
```console
|
|
41
|
-
|
|
49
|
+
$ gem install rubygems-requirements-system red-arrow
|
|
42
50
|
```
|
|
43
51
|
|
|
44
52
|
## Usage
|
|
@@ -56,7 +64,7 @@ table.save("/dev/shm/data-processed.arrow")
|
|
|
56
64
|
Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
|
|
57
65
|
|
|
58
66
|
* For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
|
|
59
|
-
* For Apache Arrow GLib: https://github.com/apache/arrow/blob/
|
|
67
|
+
* For Apache Arrow GLib: https://github.com/apache/arrow/blob/main/c_glib/README.md
|
|
60
68
|
|
|
61
69
|
```console
|
|
62
70
|
$ cd ruby/red-arrow
|
|
@@ -72,4 +80,4 @@ $ bundle install
|
|
|
72
80
|
$ brew install apache-arrow --head
|
|
73
81
|
$ brew install apache-arrow-glib --head
|
|
74
82
|
$ bundle exec rake test
|
|
75
|
-
```
|
|
83
|
+
```
|
data/ext/arrow/arrow.cpp
CHANGED
|
@@ -43,6 +43,56 @@ namespace red_arrow {
|
|
|
43
43
|
VALUE month;
|
|
44
44
|
VALUE nanosecond;
|
|
45
45
|
}
|
|
46
|
+
|
|
47
|
+
void
|
|
48
|
+
record_batch_reader_mark(gpointer object)
|
|
49
|
+
{
|
|
50
|
+
auto reader = GARROW_RECORD_BATCH_READER(object);
|
|
51
|
+
auto sources = garrow_record_batch_reader_get_sources(reader);
|
|
52
|
+
for (auto source = sources; sources; sources = g_list_next(sources)) {
|
|
53
|
+
rbgobj_gc_mark_instance(source->data);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
void
|
|
58
|
+
execute_plan_mark(gpointer object)
|
|
59
|
+
{
|
|
60
|
+
auto plan = GARROW_EXECUTE_PLAN(object);
|
|
61
|
+
auto nodes = garrow_execute_plan_get_nodes(plan);
|
|
62
|
+
for (auto node = nodes; node; node = g_list_next(node)) {
|
|
63
|
+
rbgobj_gc_mark_instance(node->data);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
void
|
|
68
|
+
call_expression_mark(gpointer object)
|
|
69
|
+
{
|
|
70
|
+
auto expression = GARROW_CALL_EXPRESSION(object);
|
|
71
|
+
auto arguments = garrow_call_expression_get_arguments(expression);
|
|
72
|
+
for (auto argument = arguments; argument; argument = g_list_next(argument)) {
|
|
73
|
+
rbgobj_gc_mark_instance(argument->data);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void
|
|
78
|
+
aggregate_node_options_mark(gpointer object)
|
|
79
|
+
{
|
|
80
|
+
auto options = GARROW_AGGREGATE_NODE_OPTIONS(object);
|
|
81
|
+
auto aggregations = garrow_aggregate_node_options_get_aggregations(options);
|
|
82
|
+
for (auto aggregation = aggregations; aggregation; aggregation = g_list_next(aggregation)) {
|
|
83
|
+
rbgobj_gc_mark_instance(aggregation->data);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
void
|
|
88
|
+
project_node_options_mark(gpointer object)
|
|
89
|
+
{
|
|
90
|
+
auto options = GARROW_PROJECT_NODE_OPTIONS(object);
|
|
91
|
+
auto expressions = garrow_project_node_options_get_expressions(options);
|
|
92
|
+
for (auto expression = expressions; expression; expression = g_list_next(expression)) {
|
|
93
|
+
rbgobj_gc_mark_instance(expression->data);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
46
96
|
}
|
|
47
97
|
|
|
48
98
|
extern "C" void Init_arrow() {
|
|
@@ -62,11 +112,17 @@ extern "C" void Init_arrow() {
|
|
|
62
112
|
rb_define_method(cArrowRecordBatch, "raw_records",
|
|
63
113
|
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
|
|
64
114
|
0);
|
|
115
|
+
rb_define_method(cArrowRecordBatch, "each_raw_record",
|
|
116
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_each_raw_record),
|
|
117
|
+
0);
|
|
65
118
|
|
|
66
119
|
auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
|
|
67
120
|
rb_define_method(cArrowTable, "raw_records",
|
|
68
121
|
reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
|
|
69
122
|
0);
|
|
123
|
+
rb_define_method(cArrowTable, "each_raw_record",
|
|
124
|
+
reinterpret_cast<rb::RawMethod>(red_arrow::table_each_raw_record),
|
|
125
|
+
0);
|
|
70
126
|
|
|
71
127
|
red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
|
|
72
128
|
|
|
@@ -93,4 +149,15 @@ extern "C" void Init_arrow() {
|
|
|
93
149
|
red_arrow::symbols::millisecond = ID2SYM(rb_intern("millisecond"));
|
|
94
150
|
red_arrow::symbols::month = ID2SYM(rb_intern("month"));
|
|
95
151
|
red_arrow::symbols::nanosecond = ID2SYM(rb_intern("nanosecond"));
|
|
152
|
+
|
|
153
|
+
rbgobj_register_mark_func(GARROW_TYPE_RECORD_BATCH_READER,
|
|
154
|
+
red_arrow::record_batch_reader_mark);
|
|
155
|
+
rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN,
|
|
156
|
+
red_arrow::execute_plan_mark);
|
|
157
|
+
rbgobj_register_mark_func(GARROW_TYPE_CALL_EXPRESSION,
|
|
158
|
+
red_arrow::call_expression_mark);
|
|
159
|
+
rbgobj_register_mark_func(GARROW_TYPE_AGGREGATE_NODE_OPTIONS,
|
|
160
|
+
red_arrow::aggregate_node_options_mark);
|
|
161
|
+
rbgobj_register_mark_func(GARROW_TYPE_PROJECT_NODE_OPTIONS,
|
|
162
|
+
red_arrow::project_node_options_mark);
|
|
96
163
|
}
|
data/ext/arrow/converters.cpp
CHANGED
|
@@ -25,6 +25,16 @@ namespace red_arrow {
|
|
|
25
25
|
return list_array_value_converter_->convert(array, i);
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
VALUE ArrayValueConverter::convert(const arrow::LargeListArray& array,
|
|
29
|
+
const int64_t i) {
|
|
30
|
+
return large_list_array_value_converter_->convert(array, i);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
VALUE ArrayValueConverter::convert(const arrow::FixedSizeListArray& array,
|
|
34
|
+
const int64_t i) {
|
|
35
|
+
return fixed_size_list_array_value_converter_->convert(array, i);
|
|
36
|
+
}
|
|
37
|
+
|
|
28
38
|
VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
|
|
29
39
|
const int64_t i) {
|
|
30
40
|
return struct_array_value_converter_->convert(array, i);
|