red-arrow 8.0.0 → 24.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +15 -7
  3. data/ext/arrow/arrow.cpp +67 -0
  4. data/ext/arrow/converters.cpp +10 -0
  5. data/ext/arrow/converters.hpp +310 -46
  6. data/ext/arrow/extconf.rb +41 -22
  7. data/ext/arrow/raw-records.cpp +165 -2
  8. data/ext/arrow/red-arrow.hpp +2 -0
  9. data/ext/arrow/values.cpp +6 -2
  10. data/lib/arrow/array-builder.rb +89 -14
  11. data/{test/test-time32-data-type.rb → lib/arrow/array-computable.rb} +24 -16
  12. data/{test/test-buffer.rb → lib/arrow/array-statistics.rb} +19 -24
  13. data/lib/arrow/array.rb +40 -4
  14. data/lib/arrow/chunked-array.rb +56 -1
  15. data/lib/arrow/column-containable.rb +9 -0
  16. data/lib/arrow/column.rb +49 -4
  17. data/{test/test-tensor.rb → lib/arrow/csv-write-options.rb} +28 -31
  18. data/lib/arrow/data-type.rb +17 -3
  19. data/lib/arrow/decimal128-array-builder.rb +16 -6
  20. data/lib/arrow/decimal128.rb +14 -0
  21. data/lib/arrow/decimal256-array-builder.rb +16 -6
  22. data/lib/arrow/decimal256.rb +14 -0
  23. data/{test/test-float-scalar.rb → lib/arrow/dense-union-array-builder.rb} +27 -24
  24. data/{test/test-boolean-scalar.rb → lib/arrow/dense-union-array.rb} +7 -7
  25. data/lib/arrow/duration-array-builder.rb +27 -0
  26. data/lib/arrow/duration-array.rb +24 -0
  27. data/lib/arrow/duration-data-type.rb +32 -0
  28. data/lib/arrow/expression.rb +6 -2
  29. data/lib/arrow/field-containable.rb +1 -1
  30. data/lib/arrow/field.rb +44 -3
  31. data/lib/arrow/fixed-size-list-array-builder.rb +29 -0
  32. data/lib/arrow/fixed-size-list-data-type.rb +118 -0
  33. data/lib/arrow/function.rb +0 -1
  34. data/lib/arrow/half-float-array-builder.rb +32 -0
  35. data/lib/arrow/half-float-array.rb +24 -0
  36. data/lib/arrow/half-float.rb +118 -0
  37. data/{test/helper/fixture.rb → lib/arrow/input-referable.rb} +7 -6
  38. data/lib/arrow/jruby/array-builder.rb +114 -0
  39. data/lib/arrow/jruby/array.rb +109 -0
  40. data/lib/arrow/jruby/chunked-array.rb +36 -0
  41. data/lib/arrow/jruby/compression-type.rb +26 -0
  42. data/lib/arrow/jruby/csv-read-options.rb +32 -0
  43. data/{test/test-map-data-type.rb → lib/arrow/jruby/data-type.rb} +24 -12
  44. data/lib/arrow/jruby/decimal128.rb +28 -0
  45. data/lib/arrow/jruby/decimal256.rb +28 -0
  46. data/{test/fixture/float-integer.csv → lib/arrow/jruby/error.rb} +7 -4
  47. data/lib/arrow/jruby/file-system.rb +24 -0
  48. data/{test/test-null-array.rb → lib/arrow/jruby/function.rb} +5 -4
  49. data/lib/arrow/jruby/record-batch-iterator.rb +24 -0
  50. data/{test/fixture/null-with-double-quote.csv → lib/arrow/jruby/record-batch.rb} +8 -4
  51. data/{test/fixture/integer-float.csv → lib/arrow/jruby/sort-key.rb} +8 -4
  52. data/lib/arrow/jruby/sort-options.rb +24 -0
  53. data/lib/arrow/jruby/stream-listener-raw.rb +25 -0
  54. data/{test/test-rolling-window.rb → lib/arrow/jruby/table.rb} +19 -19
  55. data/lib/arrow/jruby/writable.rb +24 -0
  56. data/lib/arrow/jruby.rb +52 -0
  57. data/{test/test-date32-array.rb → lib/arrow/large-list-array-builder.rb} +10 -5
  58. data/lib/arrow/large-list-data-type.rb +83 -0
  59. data/lib/arrow/libraries.rb +140 -0
  60. data/lib/arrow/list-array-builder.rb +1 -68
  61. data/lib/arrow/list-data-type.rb +3 -38
  62. data/{test/test-dictionary-array.rb → lib/arrow/list-field-resolvable.rb} +26 -17
  63. data/lib/arrow/list-slice-options.rb +76 -0
  64. data/lib/arrow/list-values-appendable.rb +88 -0
  65. data/lib/arrow/loader.rb +15 -96
  66. data/{test/test-decimal128-array.rb → lib/arrow/make-struct-options.rb} +18 -18
  67. data/lib/arrow/raw-table-converter.rb +10 -3
  68. data/lib/arrow/raw-tensor-converter.rb +89 -0
  69. data/lib/arrow/record-batch-file-reader.rb +2 -0
  70. data/lib/arrow/record-batch-stream-reader.rb +2 -0
  71. data/lib/arrow/record-batch.rb +6 -2
  72. data/{test/fixture/null-without-double-quote.csv → lib/arrow/ruby.rb} +5 -4
  73. data/lib/arrow/scalar.rb +67 -0
  74. data/lib/arrow/slicer.rb +61 -0
  75. data/lib/arrow/sort-key.rb +3 -3
  76. data/lib/arrow/sparse-union-array-builder.rb +56 -0
  77. data/lib/arrow/sparse-union-array.rb +26 -0
  78. data/lib/arrow/stream-decoder.rb +29 -0
  79. data/{test/test-decimal256-data-type.rb → lib/arrow/stream-listener.rb} +25 -9
  80. data/lib/arrow/string-array-builder.rb +30 -0
  81. data/lib/arrow/struct-array-builder.rb +0 -5
  82. data/lib/arrow/table-formatter.rb +38 -8
  83. data/lib/arrow/table-list-formatter.rb +3 -3
  84. data/lib/arrow/table-loader.rb +11 -5
  85. data/lib/arrow/table-saver.rb +4 -3
  86. data/lib/arrow/table-table-formatter.rb +7 -0
  87. data/lib/arrow/table.rb +180 -33
  88. data/lib/arrow/tensor.rb +144 -0
  89. data/lib/arrow/time-unit.rb +31 -0
  90. data/lib/arrow/time32-array-builder.rb +2 -14
  91. data/lib/arrow/time32-data-type.rb +9 -38
  92. data/lib/arrow/time64-array-builder.rb +2 -14
  93. data/lib/arrow/time64-data-type.rb +9 -38
  94. data/lib/arrow/timestamp-array-builder.rb +3 -15
  95. data/lib/arrow/timestamp-data-type.rb +9 -34
  96. data/{test/test-date64-array.rb → lib/arrow/timestamp-parser.rb} +14 -6
  97. data/lib/arrow/union-array-builder.rb +59 -0
  98. data/lib/arrow/union-array.rb +26 -0
  99. data/lib/arrow/version.rb +1 -1
  100. data/lib/arrow.rb +2 -7
  101. data/red-arrow.gemspec +74 -11
  102. metadata +85 -210
  103. data/test/fixture/TestOrcFile.test1.orc +0 -0
  104. data/test/fixture/with-header-float.csv +0 -20
  105. data/test/fixture/with-header.csv +0 -20
  106. data/test/fixture/without-header-float.csv +0 -19
  107. data/test/fixture/without-header.csv +0 -19
  108. data/test/helper/omittable.rb +0 -36
  109. data/test/helper.rb +0 -30
  110. data/test/raw-records/test-basic-arrays.rb +0 -395
  111. data/test/raw-records/test-dense-union-array.rb +0 -521
  112. data/test/raw-records/test-list-array.rb +0 -610
  113. data/test/raw-records/test-map-array.rb +0 -478
  114. data/test/raw-records/test-multiple-columns.rb +0 -65
  115. data/test/raw-records/test-sparse-union-array.rb +0 -511
  116. data/test/raw-records/test-struct-array.rb +0 -515
  117. data/test/raw-records/test-table.rb +0 -47
  118. data/test/run-test.rb +0 -71
  119. data/test/test-array-builder.rb +0 -136
  120. data/test/test-array.rb +0 -325
  121. data/test/test-bigdecimal.rb +0 -40
  122. data/test/test-binary-dictionary-array-builder.rb +0 -103
  123. data/test/test-chunked-array.rb +0 -183
  124. data/test/test-column.rb +0 -92
  125. data/test/test-csv-loader.rb +0 -250
  126. data/test/test-data-type.rb +0 -83
  127. data/test/test-decimal128-array-builder.rb +0 -112
  128. data/test/test-decimal128-data-type.rb +0 -31
  129. data/test/test-decimal128.rb +0 -102
  130. data/test/test-decimal256-array-builder.rb +0 -112
  131. data/test/test-decimal256-array.rb +0 -38
  132. data/test/test-decimal256.rb +0 -102
  133. data/test/test-dense-union-data-type.rb +0 -41
  134. data/test/test-dictionary-data-type.rb +0 -40
  135. data/test/test-expression.rb +0 -40
  136. data/test/test-feather.rb +0 -49
  137. data/test/test-field.rb +0 -91
  138. data/test/test-file-output-stream.rb +0 -54
  139. data/test/test-fixed-size-binary-array-builder.rb +0 -92
  140. data/test/test-fixed-size-binary-array.rb +0 -36
  141. data/test/test-function.rb +0 -210
  142. data/test/test-group.rb +0 -180
  143. data/test/test-list-array-builder.rb +0 -79
  144. data/test/test-list-array.rb +0 -32
  145. data/test/test-list-data-type.rb +0 -69
  146. data/test/test-map-array-builder.rb +0 -110
  147. data/test/test-map-array.rb +0 -33
  148. data/test/test-memory-view.rb +0 -434
  149. data/test/test-orc.rb +0 -173
  150. data/test/test-record-batch-builder.rb +0 -125
  151. data/test/test-record-batch-file-reader.rb +0 -115
  152. data/test/test-record-batch-iterator.rb +0 -37
  153. data/test/test-record-batch-reader.rb +0 -46
  154. data/test/test-record-batch.rb +0 -182
  155. data/test/test-schema.rb +0 -134
  156. data/test/test-slicer.rb +0 -487
  157. data/test/test-sort-indices.rb +0 -40
  158. data/test/test-sort-key.rb +0 -81
  159. data/test/test-sort-options.rb +0 -58
  160. data/test/test-sparse-union-data-type.rb +0 -41
  161. data/test/test-string-dictionary-array-builder.rb +0 -103
  162. data/test/test-struct-array-builder.rb +0 -184
  163. data/test/test-struct-array.rb +0 -94
  164. data/test/test-struct-data-type.rb +0 -112
  165. data/test/test-table.rb +0 -1123
  166. data/test/test-time.rb +0 -288
  167. data/test/test-time32-array.rb +0 -81
  168. data/test/test-time64-array.rb +0 -81
  169. data/test/test-time64-data-type.rb +0 -42
  170. data/test/test-timestamp-array.rb +0 -45
  171. data/test/test-timestamp-data-type.rb +0 -42
  172. data/test/values/test-basic-arrays.rb +0 -325
  173. data/test/values/test-dense-union-array.rb +0 -509
  174. data/test/values/test-dictionary-array.rb +0 -295
  175. data/test/values/test-list-array.rb +0 -571
  176. data/test/values/test-map-array.rb +0 -466
  177. data/test/values/test-sparse-union-array.rb +0 -500
  178. data/test/values/test-struct-array.rb +0 -512
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f43d3875f2e48876cd47533124a9fcf9559e263e63e3954c65fedd36b4e59744
4
- data.tar.gz: 2548f661536d3288f4ad8bb25d6587a78ee51d810a85a7d533ccdaf595439a12
3
+ metadata.gz: e8464767793fb8efd382689c8ca939af17fdb052f462885efb0461d07d905d18
4
+ data.tar.gz: e6d037e1400af61e90438eaa255c172eaf850b136ea7f8262b6acbd2b41c8126
5
5
  SHA512:
6
- metadata.gz: caeffbc164f14db7b056263f1ecd449afe2feec3ccbf7b883c147d415199c9ddc6c20398b00c34884248c103fbb7c55aa225719d8b8e75ac7f49337ee89d557e
7
- data.tar.gz: ae5a793e06b63bd80e3499f794193f47f72ff7510a558d2d0e61314068d5bd3a1986e5083bda749b00b73a4615901beb320521eeb360576b93fcb9a1e9d9510a
6
+ metadata.gz: 0cfab03a5b046c67d892db13cf1cd8d988fd0da9e61e950983dc2c1662c9b25a11ad8fb2e27ca68c7cd34814f208a4a51eda0d689e6aa9c514b82c026797ac7e
7
+ data.tar.gz: 22370cbfb633c8efd7d3ef8e8131001ece304d1c2e79e29bbda7eae4095dea0486f79a9253fbf8d586c11389bd2f3c8b38841f90733cb1dfb4787c2a9327fa15
data/README.md CHANGED
@@ -25,20 +25,28 @@ Red Arrow is the Ruby bindings of Apache Arrow. Red Arrow is based on GObject In
25
25
 
26
26
  [GObject Introspection](https://wiki.gnome.org/action/show/Projects/GObjectIntrospection) is a middleware for language bindings of C library. GObject Introspection can generate language bindings automatically at runtime.
27
27
 
28
- Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/master/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
28
+ Red Arrow uses [Apache Arrow GLib](https://github.com/apache/arrow/tree/main/c_glib) and [gobject-introspection gem](https://rubygems.org/gems/gobject-introspection) to generate Ruby bindings of Apache Arrow.
29
29
 
30
- Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/master/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
30
+ Apache Arrow GLib is a C wrapper for [Apache Arrow C++](https://github.com/apache/arrow/tree/main/cpp). GObject Introspection can't use Apache Arrow C++ directly. Apache Arrow GLib is a bridge between Apache Arrow C++ and GObject Introspection.
31
31
 
32
32
  gobject-introspection gem is a Ruby bindings of GObject Introspection. Red Arrow uses GObject Introspection via gobject-introspection gem.
33
33
 
34
34
  ## Install
35
35
 
36
- Install Apache Arrow GLib before install Red Arrow. See [Apache Arrow install document](https://arrow.apache.org/install/) for details.
36
+ You need to install Apache Arrow GLib to install Red Arrow. You can automate it by enabling [rubygems-requirements-system](https://github.com/ruby-gnome/rubygems-requirements-system/). If you want to install Apache Arrow GLib manually, see [Apache Arrow install document](https://arrow.apache.org/install/) for details.
37
37
 
38
- Install Red Arrow after you install Apache Arrow GLib:
38
+ If you want to install Red Arrow by Bundler, you can add the followings to your `Gemfile`:
39
+
40
+ ```ruby
41
+ plugin "rubygems-requirements-system"
42
+
43
+ gem "red-arrow"
44
+ ```
45
+
46
+ If you want to install Red Arrow by RubyGems, you can use the following command line:
39
47
 
40
48
  ```console
41
- % gem install red-arrow
49
+ $ gem install rubygems-requirements-system red-arrow
42
50
  ```
43
51
 
44
52
  ## Usage
@@ -56,7 +64,7 @@ table.save("/dev/shm/data-processed.arrow")
56
64
  Note that you need to install Apache Arrow C++/GLib at master before preparing Red Arrow. See also:
57
65
 
58
66
  * For Apache Arrow C++: https://arrow.apache.org/docs/developers/cpp/building.html
59
- * For Apache Arrow GLib: https://github.com/apache/arrow/blob/master/c_glib/README.md
67
+ * For Apache Arrow GLib: https://github.com/apache/arrow/blob/main/c_glib/README.md
60
68
 
61
69
  ```console
62
70
  $ cd ruby/red-arrow
@@ -72,4 +80,4 @@ $ bundle install
72
80
  $ brew install apache-arrow --head
73
81
  $ brew install apache-arrow-glib --head
74
82
  $ bundle exec rake test
75
- ```
83
+ ```
data/ext/arrow/arrow.cpp CHANGED
@@ -43,6 +43,56 @@ namespace red_arrow {
43
43
  VALUE month;
44
44
  VALUE nanosecond;
45
45
  }
46
+
47
+ void
48
+ record_batch_reader_mark(gpointer object)
49
+ {
50
+ auto reader = GARROW_RECORD_BATCH_READER(object);
51
+ auto sources = garrow_record_batch_reader_get_sources(reader);
52
+ for (auto source = sources; sources; sources = g_list_next(sources)) {
53
+ rbgobj_gc_mark_instance(source->data);
54
+ }
55
+ }
56
+
57
+ void
58
+ execute_plan_mark(gpointer object)
59
+ {
60
+ auto plan = GARROW_EXECUTE_PLAN(object);
61
+ auto nodes = garrow_execute_plan_get_nodes(plan);
62
+ for (auto node = nodes; node; node = g_list_next(node)) {
63
+ rbgobj_gc_mark_instance(node->data);
64
+ }
65
+ }
66
+
67
+ void
68
+ call_expression_mark(gpointer object)
69
+ {
70
+ auto expression = GARROW_CALL_EXPRESSION(object);
71
+ auto arguments = garrow_call_expression_get_arguments(expression);
72
+ for (auto argument = arguments; argument; argument = g_list_next(argument)) {
73
+ rbgobj_gc_mark_instance(argument->data);
74
+ }
75
+ }
76
+
77
+ void
78
+ aggregate_node_options_mark(gpointer object)
79
+ {
80
+ auto options = GARROW_AGGREGATE_NODE_OPTIONS(object);
81
+ auto aggregations = garrow_aggregate_node_options_get_aggregations(options);
82
+ for (auto aggregation = aggregations; aggregation; aggregation = g_list_next(aggregation)) {
83
+ rbgobj_gc_mark_instance(aggregation->data);
84
+ }
85
+ }
86
+
87
+ void
88
+ project_node_options_mark(gpointer object)
89
+ {
90
+ auto options = GARROW_PROJECT_NODE_OPTIONS(object);
91
+ auto expressions = garrow_project_node_options_get_expressions(options);
92
+ for (auto expression = expressions; expression; expression = g_list_next(expression)) {
93
+ rbgobj_gc_mark_instance(expression->data);
94
+ }
95
+ }
46
96
  }
47
97
 
48
98
  extern "C" void Init_arrow() {
@@ -62,11 +112,17 @@ extern "C" void Init_arrow() {
62
112
  rb_define_method(cArrowRecordBatch, "raw_records",
63
113
  reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_raw_records),
64
114
  0);
115
+ rb_define_method(cArrowRecordBatch, "each_raw_record",
116
+ reinterpret_cast<rb::RawMethod>(red_arrow::record_batch_each_raw_record),
117
+ 0);
65
118
 
66
119
  auto cArrowTable = rb_const_get_at(mArrow, rb_intern("Table"));
67
120
  rb_define_method(cArrowTable, "raw_records",
68
121
  reinterpret_cast<rb::RawMethod>(red_arrow::table_raw_records),
69
122
  0);
123
+ rb_define_method(cArrowTable, "each_raw_record",
124
+ reinterpret_cast<rb::RawMethod>(red_arrow::table_each_raw_record),
125
+ 0);
70
126
 
71
127
  red_arrow::cDate = rb_const_get(rb_cObject, rb_intern("Date"));
72
128
 
@@ -93,4 +149,15 @@ extern "C" void Init_arrow() {
93
149
  red_arrow::symbols::millisecond = ID2SYM(rb_intern("millisecond"));
94
150
  red_arrow::symbols::month = ID2SYM(rb_intern("month"));
95
151
  red_arrow::symbols::nanosecond = ID2SYM(rb_intern("nanosecond"));
152
+
153
+ rbgobj_register_mark_func(GARROW_TYPE_RECORD_BATCH_READER,
154
+ red_arrow::record_batch_reader_mark);
155
+ rbgobj_register_mark_func(GARROW_TYPE_EXECUTE_PLAN,
156
+ red_arrow::execute_plan_mark);
157
+ rbgobj_register_mark_func(GARROW_TYPE_CALL_EXPRESSION,
158
+ red_arrow::call_expression_mark);
159
+ rbgobj_register_mark_func(GARROW_TYPE_AGGREGATE_NODE_OPTIONS,
160
+ red_arrow::aggregate_node_options_mark);
161
+ rbgobj_register_mark_func(GARROW_TYPE_PROJECT_NODE_OPTIONS,
162
+ red_arrow::project_node_options_mark);
96
163
  }
@@ -25,6 +25,16 @@ namespace red_arrow {
25
25
  return list_array_value_converter_->convert(array, i);
26
26
  }
27
27
 
28
+ VALUE ArrayValueConverter::convert(const arrow::LargeListArray& array,
29
+ const int64_t i) {
30
+ return large_list_array_value_converter_->convert(array, i);
31
+ }
32
+
33
+ VALUE ArrayValueConverter::convert(const arrow::FixedSizeListArray& array,
34
+ const int64_t i) {
35
+ return fixed_size_list_array_value_converter_->convert(array, i);
36
+ }
37
+
28
38
  VALUE ArrayValueConverter::convert(const arrow::StructArray& array,
29
39
  const int64_t i) {
30
40
  return struct_array_value_converter_->convert(array, i);