red-arrow 2.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +3 -0
  3. data/ext/arrow/converters.hpp +15 -2
  4. data/ext/arrow/memory-view.cpp +311 -0
  5. data/ext/arrow/memory-view.hpp +26 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/array-builder.rb +11 -6
  9. data/lib/arrow/array.rb +130 -0
  10. data/lib/arrow/bigdecimal-extension.rb +5 -1
  11. data/lib/arrow/buffer.rb +10 -6
  12. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  13. data/lib/arrow/data-type.rb +14 -5
  14. data/lib/arrow/datum.rb +98 -0
  15. data/lib/arrow/decimal128-array-builder.rb +21 -25
  16. data/lib/arrow/decimal128-data-type.rb +2 -0
  17. data/lib/arrow/decimal128.rb +18 -0
  18. data/lib/arrow/decimal256-array-builder.rb +61 -0
  19. data/lib/arrow/decimal256-array.rb +25 -0
  20. data/lib/arrow/decimal256-data-type.rb +73 -0
  21. data/lib/arrow/decimal256.rb +60 -0
  22. data/lib/arrow/dense-union-data-type.rb +2 -2
  23. data/lib/arrow/dictionary-data-type.rb +2 -2
  24. data/lib/arrow/equal-options.rb +38 -0
  25. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  26. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/scalar.rb +32 -0
  29. data/lib/arrow/sort-key.rb +193 -0
  30. data/lib/arrow/sort-options.rb +109 -0
  31. data/lib/arrow/sparse-union-data-type.rb +2 -2
  32. data/lib/arrow/table.rb +2 -2
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +3 -1
  38. data/test/helper.rb +1 -0
  39. data/test/raw-records/test-basic-arrays.rb +17 -0
  40. data/test/raw-records/test-dense-union-array.rb +14 -0
  41. data/test/raw-records/test-list-array.rb +20 -0
  42. data/test/raw-records/test-sparse-union-array.rb +14 -0
  43. data/test/raw-records/test-struct-array.rb +15 -0
  44. data/test/test-array.rb +156 -2
  45. data/test/test-bigdecimal.rb +20 -3
  46. data/test/test-boolean-scalar.rb +26 -0
  47. data/test/test-decimal128-array-builder.rb +18 -1
  48. data/test/test-decimal128-data-type.rb +2 -2
  49. data/test/test-decimal128.rb +38 -0
  50. data/test/test-decimal256-array-builder.rb +112 -0
  51. data/test/test-decimal256-array.rb +38 -0
  52. data/test/test-decimal256-data-type.rb +31 -0
  53. data/test/test-decimal256.rb +102 -0
  54. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  55. data/test/test-fixed-size-binary-array.rb +36 -0
  56. data/test/test-float-scalar.rb +46 -0
  57. data/test/test-function.rb +176 -0
  58. data/test/test-memory-view.rb +434 -0
  59. data/test/test-orc.rb +19 -23
  60. data/test/test-sort-indices.rb +40 -0
  61. data/test/test-sort-key.rb +81 -0
  62. data/test/test-sort-options.rb +58 -0
  63. data/test/test-struct-array-builder.rb +8 -8
  64. data/test/test-struct-array.rb +2 -2
  65. data/test/values/test-basic-arrays.rb +11 -0
  66. data/test/values/test-dense-union-array.rb +14 -0
  67. data/test/values/test-list-array.rb +18 -0
  68. data/test/values/test-sparse-union-array.rb +14 -0
  69. data/test/values/test-struct-array.rb +15 -0
  70. metadata +127 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 171c7e5854ed116c8324153272827255b18f1083d5ae38866903b3cb0faa2977
4
- data.tar.gz: 86c21238d83c5e4a8d114464eb8281dca5ddaff7cb6fb3e08b858a67082d4514
3
+ metadata.gz: dcc31d7ec789dfa277963fda5b3710e7eec2451b6fbe425282638e48e684b95f
4
+ data.tar.gz: 00bfbf2e0357861f7247c42c4faae6a72d9adfca745ad1b3298f549997847158
5
5
  SHA512:
6
- metadata.gz: f46692362251101f0d18782e755dd3e2453a168ddefc564a623cc6f16a336d974fca83079faa63198b201e88e3fc678552bd69adca86bf743daf5ef7cd432ff2
7
- data.tar.gz: 887b098b1d9b832a9197a9ef08e7be63e2d12b1772c45054e02b582d47bc8382c6d73645fce3ea2982d9b1f73454a78fe8372a7b01879cb1d865b971167bc2ad
6
+ metadata.gz: 8e60e9a4ce4d379f7f1d9387154ae4d9fe4ec91ffe304647966d33d52e380950cc0a8a5f025164466526feb5198638139bb7564f2066fd93b7c21dcd697e07ca
7
+ data.tar.gz: 140a8f087eeb42baae703eee677c97261c6b30317c0f72ce49231a117b2e32f34175bad71f913bdd6f940abd8097c77f968b27780cecbfca062be60ac87d09ec
data/ext/arrow/arrow.cpp CHANGED
@@ -18,6 +18,7 @@
18
18
  */
19
19
 
20
20
  #include "red-arrow.hpp"
21
+ #include "memory-view.hpp"
21
22
 
22
23
  #include <ruby.hpp>
23
24
 
@@ -78,4 +79,6 @@ extern "C" void Init_arrow() {
78
79
  red_arrow::id_jd = rb_intern("jd");
79
80
  red_arrow::id_new = rb_intern("new");
80
81
  red_arrow::id_to_datetime = rb_intern("to_datetime");
82
+
83
+ red_arrow::memory_view::init(mArrow);
81
84
  }
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -289,6 +298,7 @@ namespace red_arrow {
289
298
  VISIT(DenseUnion)
290
299
  VISIT(Dictionary)
291
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
292
302
  // TODO
293
303
  // VISIT(Extension)
294
304
 
@@ -393,6 +403,7 @@ namespace red_arrow {
393
403
  VISIT(DenseUnion)
394
404
  VISIT(Dictionary)
395
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
396
407
  // TODO
397
408
  // VISIT(Extension)
398
409
 
@@ -485,6 +496,7 @@ namespace red_arrow {
485
496
  VISIT(DenseUnion)
486
497
  VISIT(Dictionary)
487
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
488
500
  // TODO
489
501
  // VISIT(Extension)
490
502
 
@@ -609,6 +621,7 @@ namespace red_arrow {
609
621
  VISIT(DenseUnion)
610
622
  VISIT(Dictionary)
611
623
  VISIT(Decimal128)
624
+ VISIT(Decimal256)
612
625
  // TODO
613
626
  // VISIT(Extension)
614
627
 
@@ -0,0 +1,311 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "memory-view.hpp"
21
+
22
+ #include <arrow-glib/arrow-glib.hpp>
23
+ #include <rbgobject.h>
24
+
25
+ #include <ruby/version.h>
26
+
27
+ #if RUBY_API_VERSION_MAJOR >= 3
28
+ # define HAVE_MEMORY_VIEW
29
+ # define private memory_view_private
30
+ # include <ruby/memory_view.h>
31
+ # undef private
32
+ #endif
33
+
34
+ #include <sstream>
35
+
36
+ namespace red_arrow {
37
+ namespace memory_view {
38
+ #ifdef HAVE_MEMORY_VIEW
39
+ // This is workaround for the following rb_memory_view_t problems
40
+ // in C++:
41
+ //
42
+ // * Can't use "private" as member name
43
+ // * Can't assign a value to "rb_memory_view_t::private"
44
+ //
45
+ // This has compatible layout with rb_memory_view_t.
46
+ struct memory_view {
47
+ VALUE obj;
48
+ void *data;
49
+ ssize_t byte_size;
50
+ bool readonly;
51
+ const char *format;
52
+ ssize_t item_size;
53
+ struct {
54
+ const rb_memory_view_item_component_t *components;
55
+ size_t length;
56
+ } item_desc;
57
+ ssize_t ndim;
58
+ const ssize_t *shape;
59
+ const ssize_t *strides;
60
+ const ssize_t *sub_offsets;
61
+ void *private_data;
62
+ };
63
+
64
+ struct PrivateData {
65
+ std::string format;
66
+ };
67
+
68
+ class PrimitiveArrayGetter : public arrow::ArrayVisitor {
69
+ public:
70
+ explicit PrimitiveArrayGetter(memory_view *view)
71
+ : view_(view) {
72
+ }
73
+
74
+ arrow::Status Visit(const arrow::BooleanArray& array) override {
75
+ fill(static_cast<const arrow::Array&>(array));
76
+ // Memory view doesn't support bit stream. We use one byte
77
+ // for 8 elements. Users can't calculate the number of
78
+ // elements from memory view but it's limitation of memory view.
79
+ #ifdef ARROW_LITTLE_ENDIAN
80
+ view_->format = "b8";
81
+ #else
82
+ view_->format = "B8";
83
+ #endif
84
+ view_->item_size = 1;
85
+ view_->byte_size = (array.length() + 7) / 8;
86
+ return arrow::Status::OK();
87
+ }
88
+
89
+ arrow::Status Visit(const arrow::Int8Array& array) override {
90
+ fill(static_cast<const arrow::Array&>(array));
91
+ view_->format = "c";
92
+ return arrow::Status::OK();
93
+ }
94
+
95
+ arrow::Status Visit(const arrow::Int16Array& array) override {
96
+ fill(static_cast<const arrow::Array&>(array));
97
+ view_->format = "s";
98
+ return arrow::Status::OK();
99
+ }
100
+
101
+ arrow::Status Visit(const arrow::Int32Array& array) override {
102
+ fill(static_cast<const arrow::Array&>(array));
103
+ view_->format = "l";
104
+ return arrow::Status::OK();
105
+ }
106
+
107
+ arrow::Status Visit(const arrow::Int64Array& array) override {
108
+ fill(static_cast<const arrow::Array&>(array));
109
+ view_->format = "q";
110
+ return arrow::Status::OK();
111
+ }
112
+
113
+ arrow::Status Visit(const arrow::UInt8Array& array) override {
114
+ fill(static_cast<const arrow::Array&>(array));
115
+ view_->format = "C";
116
+ return arrow::Status::OK();
117
+ }
118
+
119
+ arrow::Status Visit(const arrow::UInt16Array& array) override {
120
+ fill(static_cast<const arrow::Array&>(array));
121
+ view_->format = "S";
122
+ return arrow::Status::OK();
123
+ }
124
+
125
+ arrow::Status Visit(const arrow::UInt32Array& array) override {
126
+ fill(static_cast<const arrow::Array&>(array));
127
+ view_->format = "L";
128
+ return arrow::Status::OK();
129
+ }
130
+
131
+ arrow::Status Visit(const arrow::UInt64Array& array) override {
132
+ fill(static_cast<const arrow::Array&>(array));
133
+ view_->format = "Q";
134
+ return arrow::Status::OK();
135
+ }
136
+
137
+ arrow::Status Visit(const arrow::FloatArray& array) override {
138
+ fill(static_cast<const arrow::Array&>(array));
139
+ view_->format = "f";
140
+ return arrow::Status::OK();
141
+ }
142
+
143
+ arrow::Status Visit(const arrow::DoubleArray& array) override {
144
+ fill(static_cast<const arrow::Array&>(array));
145
+ view_->format = "d";
146
+ return arrow::Status::OK();
147
+ }
148
+
149
+ arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override {
150
+ fill(static_cast<const arrow::Array&>(array));
151
+ auto priv = static_cast<PrivateData *>(view_->private_data);
152
+ const auto type =
153
+ std::static_pointer_cast<const arrow::FixedSizeBinaryType>(
154
+ array.type());
155
+ std::ostringstream output;
156
+ output << "C" << type->byte_width();
157
+ priv->format = output.str();
158
+ view_->format = priv->format.c_str();
159
+ return arrow::Status::OK();
160
+ }
161
+
162
+ arrow::Status Visit(const arrow::Date32Array& array) override {
163
+ fill(static_cast<const arrow::Array&>(array));
164
+ view_->format = "l";
165
+ return arrow::Status::OK();
166
+ }
167
+
168
+ arrow::Status Visit(const arrow::Date64Array& array) override {
169
+ fill(static_cast<const arrow::Array&>(array));
170
+ view_->format = "q";
171
+ return arrow::Status::OK();
172
+ }
173
+
174
+ arrow::Status Visit(const arrow::Time32Array& array) override {
175
+ fill(static_cast<const arrow::Array&>(array));
176
+ view_->format = "l";
177
+ return arrow::Status::OK();
178
+ }
179
+
180
+ arrow::Status Visit(const arrow::Time64Array& array) override {
181
+ fill(static_cast<const arrow::Array&>(array));
182
+ view_->format = "q";
183
+ return arrow::Status::OK();
184
+ }
185
+
186
+ arrow::Status Visit(const arrow::TimestampArray& array) override {
187
+ fill(static_cast<const arrow::Array&>(array));
188
+ view_->format = "q";
189
+ return arrow::Status::OK();
190
+ }
191
+
192
+ arrow::Status Visit(const arrow::Decimal128Array& array) override {
193
+ fill(static_cast<const arrow::Array&>(array));
194
+ view_->format = "q2";
195
+ return arrow::Status::OK();
196
+ }
197
+
198
+ arrow::Status Visit(const arrow::Decimal256Array& array) override {
199
+ fill(static_cast<const arrow::Array&>(array));
200
+ view_->format = "q4";
201
+ return arrow::Status::OK();
202
+ }
203
+
204
+ private:
205
+ void fill(const arrow::Array& array) {
206
+ const auto array_data = array.data();
207
+ const auto data = array_data->GetValuesSafe<uint8_t>(1);
208
+ view_->data = const_cast<void *>(reinterpret_cast<const void *>(data));
209
+ const auto type =
210
+ std::static_pointer_cast<const arrow::FixedWidthType>(array.type());
211
+ view_->item_size = type->bit_width() / 8;
212
+ view_->byte_size = view_->item_size * array.length();
213
+ }
214
+
215
+ memory_view *view_;
216
+ };
217
+
218
+ bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) {
219
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
220
+ return false;
221
+ }
222
+ auto view_ = reinterpret_cast<memory_view *>(view);
223
+ view_->obj = obj;
224
+ view_->private_data = new PrivateData();
225
+ auto array = GARROW_ARRAY(RVAL2GOBJ(obj));
226
+ auto arrow_array = garrow_array_get_raw(array);
227
+ PrimitiveArrayGetter getter(view_);
228
+ auto status = arrow_array->Accept(&getter);
229
+ if (!status.ok()) {
230
+ return false;
231
+ }
232
+ view_->readonly = true;
233
+ view_->ndim = 1;
234
+ view_->shape = NULL;
235
+ view_->strides = NULL;
236
+ view_->sub_offsets = NULL;
237
+ return true;
238
+ }
239
+
240
+ bool primitive_array_release(VALUE obj, rb_memory_view_t *view) {
241
+ auto view_ = reinterpret_cast<memory_view *>(view);
242
+ delete static_cast<PrivateData *>(view_->private_data);
243
+ return true;
244
+ }
245
+
246
+ bool primitive_array_available_p(VALUE obj) {
247
+ return true;
248
+ }
249
+
250
+ rb_memory_view_entry_t primitive_array_entry = {
251
+ primitive_array_get,
252
+ primitive_array_release,
253
+ primitive_array_available_p,
254
+ };
255
+
256
+ bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) {
257
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
258
+ return false;
259
+ }
260
+ auto view_ = reinterpret_cast<memory_view *>(view);
261
+ view_->obj = obj;
262
+ auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj));
263
+ auto arrow_buffer = garrow_buffer_get_raw(buffer);
264
+ view_->data =
265
+ const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data()));
266
+ // Memory view doesn't support bit stream. We use one byte
267
+ // for 8 elements. Users can't calculate the number of
268
+ // elements from memory view but it's limitation of memory view.
269
+ #ifdef ARROW_LITTLE_ENDIAN
270
+ view_->format = "b8";
271
+ #else
272
+ view_->format = "B8";
273
+ #endif
274
+ view_->item_size = 1;
275
+ view_->byte_size = arrow_buffer->size();
276
+ view_->readonly = true;
277
+ view_->ndim = 1;
278
+ view_->shape = NULL;
279
+ view_->strides = NULL;
280
+ view_->sub_offsets = NULL;
281
+ return true;
282
+ }
283
+
284
+ bool buffer_release(VALUE obj, rb_memory_view_t *view) {
285
+ return true;
286
+ }
287
+
288
+ bool buffer_available_p(VALUE obj) {
289
+ return true;
290
+ }
291
+
292
+ rb_memory_view_entry_t buffer_entry = {
293
+ buffer_get,
294
+ buffer_release,
295
+ buffer_available_p,
296
+ };
297
+ #endif
298
+
299
+ void init(VALUE mArrow) {
300
+ #ifdef HAVE_MEMORY_VIEW
301
+ auto cPrimitiveArray =
302
+ rb_const_get_at(mArrow, rb_intern("PrimitiveArray"));
303
+ rb_memory_view_register(cPrimitiveArray,
304
+ &(red_arrow::memory_view::primitive_array_entry));
305
+
306
+ auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer"));
307
+ rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry));
308
+ #endif
309
+ }
310
+ }
311
+ }
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <ruby.hpp>
21
+
22
+ namespace red_arrow {
23
+ namespace memory_view {
24
+ void init(VALUE mArrow);
25
+ }
26
+ }
@@ -104,6 +104,7 @@ namespace red_arrow {
104
104
  VISIT(DenseUnion)
105
105
  VISIT(Dictionary)
106
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
107
108
  // TODO
108
109
  // VISIT(Extension)
109
110
 
data/ext/arrow/values.cpp CHANGED
@@ -85,6 +85,7 @@ namespace red_arrow {
85
85
  VISIT(DenseUnion)
86
86
  VISIT(Dictionary)
87
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
88
89
  // TODO
89
90
  // VISIT(Extension)
90
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end