red-arrow 2.0.0 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/ext/arrow/arrow.cpp +3 -0
  3. data/ext/arrow/converters.hpp +15 -2
  4. data/ext/arrow/memory-view.cpp +311 -0
  5. data/ext/arrow/memory-view.hpp +26 -0
  6. data/ext/arrow/raw-records.cpp +1 -0
  7. data/ext/arrow/values.cpp +1 -0
  8. data/lib/arrow/array-builder.rb +11 -6
  9. data/lib/arrow/array.rb +130 -0
  10. data/lib/arrow/bigdecimal-extension.rb +5 -1
  11. data/lib/arrow/buffer.rb +10 -6
  12. data/lib/arrow/constructor-arguments-gc-guardable.rb +25 -0
  13. data/lib/arrow/data-type.rb +14 -5
  14. data/lib/arrow/datum.rb +98 -0
  15. data/lib/arrow/decimal128-array-builder.rb +21 -25
  16. data/lib/arrow/decimal128-data-type.rb +2 -0
  17. data/lib/arrow/decimal128.rb +18 -0
  18. data/lib/arrow/decimal256-array-builder.rb +61 -0
  19. data/lib/arrow/decimal256-array.rb +25 -0
  20. data/lib/arrow/decimal256-data-type.rb +73 -0
  21. data/lib/arrow/decimal256.rb +60 -0
  22. data/lib/arrow/dense-union-data-type.rb +2 -2
  23. data/lib/arrow/dictionary-data-type.rb +2 -2
  24. data/lib/arrow/equal-options.rb +38 -0
  25. data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
  26. data/lib/arrow/fixed-size-binary-array.rb +26 -0
  27. data/lib/arrow/loader.rb +46 -0
  28. data/lib/arrow/scalar.rb +32 -0
  29. data/lib/arrow/sort-key.rb +193 -0
  30. data/lib/arrow/sort-options.rb +109 -0
  31. data/lib/arrow/sparse-union-data-type.rb +2 -2
  32. data/lib/arrow/table.rb +2 -2
  33. data/lib/arrow/time32-data-type.rb +2 -2
  34. data/lib/arrow/time64-data-type.rb +2 -2
  35. data/lib/arrow/timestamp-data-type.rb +2 -2
  36. data/lib/arrow/version.rb +1 -1
  37. data/red-arrow.gemspec +3 -1
  38. data/test/helper.rb +1 -0
  39. data/test/raw-records/test-basic-arrays.rb +17 -0
  40. data/test/raw-records/test-dense-union-array.rb +14 -0
  41. data/test/raw-records/test-list-array.rb +20 -0
  42. data/test/raw-records/test-sparse-union-array.rb +14 -0
  43. data/test/raw-records/test-struct-array.rb +15 -0
  44. data/test/test-array.rb +156 -2
  45. data/test/test-bigdecimal.rb +20 -3
  46. data/test/test-boolean-scalar.rb +26 -0
  47. data/test/test-decimal128-array-builder.rb +18 -1
  48. data/test/test-decimal128-data-type.rb +2 -2
  49. data/test/test-decimal128.rb +38 -0
  50. data/test/test-decimal256-array-builder.rb +112 -0
  51. data/test/test-decimal256-array.rb +38 -0
  52. data/test/test-decimal256-data-type.rb +31 -0
  53. data/test/test-decimal256.rb +102 -0
  54. data/test/test-fixed-size-binary-array-builder.rb +92 -0
  55. data/test/test-fixed-size-binary-array.rb +36 -0
  56. data/test/test-float-scalar.rb +46 -0
  57. data/test/test-function.rb +176 -0
  58. data/test/test-memory-view.rb +434 -0
  59. data/test/test-orc.rb +19 -23
  60. data/test/test-sort-indices.rb +40 -0
  61. data/test/test-sort-key.rb +81 -0
  62. data/test/test-sort-options.rb +58 -0
  63. data/test/test-struct-array-builder.rb +8 -8
  64. data/test/test-struct-array.rb +2 -2
  65. data/test/values/test-basic-arrays.rb +11 -0
  66. data/test/values/test-dense-union-array.rb +14 -0
  67. data/test/values/test-list-array.rb +18 -0
  68. data/test/values/test-sparse-union-array.rb +14 -0
  69. data/test/values/test-struct-array.rb +15 -0
  70. metadata +127 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 171c7e5854ed116c8324153272827255b18f1083d5ae38866903b3cb0faa2977
4
- data.tar.gz: 86c21238d83c5e4a8d114464eb8281dca5ddaff7cb6fb3e08b858a67082d4514
3
+ metadata.gz: dcc31d7ec789dfa277963fda5b3710e7eec2451b6fbe425282638e48e684b95f
4
+ data.tar.gz: 00bfbf2e0357861f7247c42c4faae6a72d9adfca745ad1b3298f549997847158
5
5
  SHA512:
6
- metadata.gz: f46692362251101f0d18782e755dd3e2453a168ddefc564a623cc6f16a336d974fca83079faa63198b201e88e3fc678552bd69adca86bf743daf5ef7cd432ff2
7
- data.tar.gz: 887b098b1d9b832a9197a9ef08e7be63e2d12b1772c45054e02b582d47bc8382c6d73645fce3ea2982d9b1f73454a78fe8372a7b01879cb1d865b971167bc2ad
6
+ metadata.gz: 8e60e9a4ce4d379f7f1d9387154ae4d9fe4ec91ffe304647966d33d52e380950cc0a8a5f025164466526feb5198638139bb7564f2066fd93b7c21dcd697e07ca
7
+ data.tar.gz: 140a8f087eeb42baae703eee677c97261c6b30317c0f72ce49231a117b2e32f34175bad71f913bdd6f940abd8097c77f968b27780cecbfca062be60ac87d09ec
data/ext/arrow/arrow.cpp CHANGED
@@ -18,6 +18,7 @@
18
18
  */
19
19
 
20
20
  #include "red-arrow.hpp"
21
+ #include "memory-view.hpp"
21
22
 
22
23
  #include <ruby.hpp>
23
24
 
@@ -78,4 +79,6 @@ extern "C" void Init_arrow() {
78
79
  red_arrow::id_jd = rb_intern("jd");
79
80
  red_arrow::id_new = rb_intern("new");
80
81
  red_arrow::id_to_datetime = rb_intern("to_datetime");
82
+
83
+ red_arrow::memory_view::init(mArrow);
81
84
  }
@@ -212,7 +212,17 @@ namespace red_arrow {
212
212
 
213
213
  inline VALUE convert(const arrow::Decimal128Array& array,
214
214
  const int64_t i) {
215
- decimal_buffer_ = array.FormatValue(i);
215
+ return convert_decimal(std::move(array.FormatValue(i)));
216
+ }
217
+
218
+ inline VALUE convert(const arrow::Decimal256Array& array,
219
+ const int64_t i) {
220
+ return convert_decimal(std::move(array.FormatValue(i)));
221
+ }
222
+
223
+ private:
224
+ inline VALUE convert_decimal(std::string&& value) {
225
+ decimal_buffer_ = value;
216
226
  return rb_funcall(rb_cObject,
217
227
  id_BigDecimal,
218
228
  1,
@@ -221,7 +231,6 @@ namespace red_arrow {
221
231
  rb_ascii8bit_encoding()));
222
232
  }
223
233
 
224
- private:
225
234
  std::string decimal_buffer_;
226
235
  ListArrayValueConverter* list_array_value_converter_;
227
236
  StructArrayValueConverter* struct_array_value_converter_;
@@ -289,6 +298,7 @@ namespace red_arrow {
289
298
  VISIT(DenseUnion)
290
299
  VISIT(Dictionary)
291
300
  VISIT(Decimal128)
301
+ VISIT(Decimal256)
292
302
  // TODO
293
303
  // VISIT(Extension)
294
304
 
@@ -393,6 +403,7 @@ namespace red_arrow {
393
403
  VISIT(DenseUnion)
394
404
  VISIT(Dictionary)
395
405
  VISIT(Decimal128)
406
+ VISIT(Decimal256)
396
407
  // TODO
397
408
  // VISIT(Extension)
398
409
 
@@ -485,6 +496,7 @@ namespace red_arrow {
485
496
  VISIT(DenseUnion)
486
497
  VISIT(Dictionary)
487
498
  VISIT(Decimal128)
499
+ VISIT(Decimal256)
488
500
  // TODO
489
501
  // VISIT(Extension)
490
502
 
@@ -609,6 +621,7 @@ namespace red_arrow {
609
621
  VISIT(DenseUnion)
610
622
  VISIT(Dictionary)
611
623
  VISIT(Decimal128)
624
+ VISIT(Decimal256)
612
625
  // TODO
613
626
  // VISIT(Extension)
614
627
 
@@ -0,0 +1,311 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "memory-view.hpp"
21
+
22
+ #include <arrow-glib/arrow-glib.hpp>
23
+ #include <rbgobject.h>
24
+
25
+ #include <ruby/version.h>
26
+
27
+ #if RUBY_API_VERSION_MAJOR >= 3
28
+ # define HAVE_MEMORY_VIEW
29
+ # define private memory_view_private
30
+ # include <ruby/memory_view.h>
31
+ # undef private
32
+ #endif
33
+
34
+ #include <sstream>
35
+
36
+ namespace red_arrow {
37
+ namespace memory_view {
38
+ #ifdef HAVE_MEMORY_VIEW
39
+ // This is workaround for the following rb_memory_view_t problems
40
+ // in C++:
41
+ //
42
+ // * Can't use "private" as member name
43
+ // * Can't assign a value to "rb_memory_view_t::private"
44
+ //
45
+ // This has compatible layout with rb_memory_view_t.
46
+ struct memory_view {
47
+ VALUE obj;
48
+ void *data;
49
+ ssize_t byte_size;
50
+ bool readonly;
51
+ const char *format;
52
+ ssize_t item_size;
53
+ struct {
54
+ const rb_memory_view_item_component_t *components;
55
+ size_t length;
56
+ } item_desc;
57
+ ssize_t ndim;
58
+ const ssize_t *shape;
59
+ const ssize_t *strides;
60
+ const ssize_t *sub_offsets;
61
+ void *private_data;
62
+ };
63
+
64
+ struct PrivateData {
65
+ std::string format;
66
+ };
67
+
68
+ class PrimitiveArrayGetter : public arrow::ArrayVisitor {
69
+ public:
70
+ explicit PrimitiveArrayGetter(memory_view *view)
71
+ : view_(view) {
72
+ }
73
+
74
+ arrow::Status Visit(const arrow::BooleanArray& array) override {
75
+ fill(static_cast<const arrow::Array&>(array));
76
+ // Memory view doesn't support bit stream. We use one byte
77
+ // for 8 elements. Users can't calculate the number of
78
+ // elements from memory view but it's limitation of memory view.
79
+ #ifdef ARROW_LITTLE_ENDIAN
80
+ view_->format = "b8";
81
+ #else
82
+ view_->format = "B8";
83
+ #endif
84
+ view_->item_size = 1;
85
+ view_->byte_size = (array.length() + 7) / 8;
86
+ return arrow::Status::OK();
87
+ }
88
+
89
+ arrow::Status Visit(const arrow::Int8Array& array) override {
90
+ fill(static_cast<const arrow::Array&>(array));
91
+ view_->format = "c";
92
+ return arrow::Status::OK();
93
+ }
94
+
95
+ arrow::Status Visit(const arrow::Int16Array& array) override {
96
+ fill(static_cast<const arrow::Array&>(array));
97
+ view_->format = "s";
98
+ return arrow::Status::OK();
99
+ }
100
+
101
+ arrow::Status Visit(const arrow::Int32Array& array) override {
102
+ fill(static_cast<const arrow::Array&>(array));
103
+ view_->format = "l";
104
+ return arrow::Status::OK();
105
+ }
106
+
107
+ arrow::Status Visit(const arrow::Int64Array& array) override {
108
+ fill(static_cast<const arrow::Array&>(array));
109
+ view_->format = "q";
110
+ return arrow::Status::OK();
111
+ }
112
+
113
+ arrow::Status Visit(const arrow::UInt8Array& array) override {
114
+ fill(static_cast<const arrow::Array&>(array));
115
+ view_->format = "C";
116
+ return arrow::Status::OK();
117
+ }
118
+
119
+ arrow::Status Visit(const arrow::UInt16Array& array) override {
120
+ fill(static_cast<const arrow::Array&>(array));
121
+ view_->format = "S";
122
+ return arrow::Status::OK();
123
+ }
124
+
125
+ arrow::Status Visit(const arrow::UInt32Array& array) override {
126
+ fill(static_cast<const arrow::Array&>(array));
127
+ view_->format = "L";
128
+ return arrow::Status::OK();
129
+ }
130
+
131
+ arrow::Status Visit(const arrow::UInt64Array& array) override {
132
+ fill(static_cast<const arrow::Array&>(array));
133
+ view_->format = "Q";
134
+ return arrow::Status::OK();
135
+ }
136
+
137
+ arrow::Status Visit(const arrow::FloatArray& array) override {
138
+ fill(static_cast<const arrow::Array&>(array));
139
+ view_->format = "f";
140
+ return arrow::Status::OK();
141
+ }
142
+
143
+ arrow::Status Visit(const arrow::DoubleArray& array) override {
144
+ fill(static_cast<const arrow::Array&>(array));
145
+ view_->format = "d";
146
+ return arrow::Status::OK();
147
+ }
148
+
149
+ arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override {
150
+ fill(static_cast<const arrow::Array&>(array));
151
+ auto priv = static_cast<PrivateData *>(view_->private_data);
152
+ const auto type =
153
+ std::static_pointer_cast<const arrow::FixedSizeBinaryType>(
154
+ array.type());
155
+ std::ostringstream output;
156
+ output << "C" << type->byte_width();
157
+ priv->format = output.str();
158
+ view_->format = priv->format.c_str();
159
+ return arrow::Status::OK();
160
+ }
161
+
162
+ arrow::Status Visit(const arrow::Date32Array& array) override {
163
+ fill(static_cast<const arrow::Array&>(array));
164
+ view_->format = "l";
165
+ return arrow::Status::OK();
166
+ }
167
+
168
+ arrow::Status Visit(const arrow::Date64Array& array) override {
169
+ fill(static_cast<const arrow::Array&>(array));
170
+ view_->format = "q";
171
+ return arrow::Status::OK();
172
+ }
173
+
174
+ arrow::Status Visit(const arrow::Time32Array& array) override {
175
+ fill(static_cast<const arrow::Array&>(array));
176
+ view_->format = "l";
177
+ return arrow::Status::OK();
178
+ }
179
+
180
+ arrow::Status Visit(const arrow::Time64Array& array) override {
181
+ fill(static_cast<const arrow::Array&>(array));
182
+ view_->format = "q";
183
+ return arrow::Status::OK();
184
+ }
185
+
186
+ arrow::Status Visit(const arrow::TimestampArray& array) override {
187
+ fill(static_cast<const arrow::Array&>(array));
188
+ view_->format = "q";
189
+ return arrow::Status::OK();
190
+ }
191
+
192
+ arrow::Status Visit(const arrow::Decimal128Array& array) override {
193
+ fill(static_cast<const arrow::Array&>(array));
194
+ view_->format = "q2";
195
+ return arrow::Status::OK();
196
+ }
197
+
198
+ arrow::Status Visit(const arrow::Decimal256Array& array) override {
199
+ fill(static_cast<const arrow::Array&>(array));
200
+ view_->format = "q4";
201
+ return arrow::Status::OK();
202
+ }
203
+
204
+ private:
205
+ void fill(const arrow::Array& array) {
206
+ const auto array_data = array.data();
207
+ const auto data = array_data->GetValuesSafe<uint8_t>(1);
208
+ view_->data = const_cast<void *>(reinterpret_cast<const void *>(data));
209
+ const auto type =
210
+ std::static_pointer_cast<const arrow::FixedWidthType>(array.type());
211
+ view_->item_size = type->bit_width() / 8;
212
+ view_->byte_size = view_->item_size * array.length();
213
+ }
214
+
215
+ memory_view *view_;
216
+ };
217
+
218
+ bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) {
219
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
220
+ return false;
221
+ }
222
+ auto view_ = reinterpret_cast<memory_view *>(view);
223
+ view_->obj = obj;
224
+ view_->private_data = new PrivateData();
225
+ auto array = GARROW_ARRAY(RVAL2GOBJ(obj));
226
+ auto arrow_array = garrow_array_get_raw(array);
227
+ PrimitiveArrayGetter getter(view_);
228
+ auto status = arrow_array->Accept(&getter);
229
+ if (!status.ok()) {
230
+ return false;
231
+ }
232
+ view_->readonly = true;
233
+ view_->ndim = 1;
234
+ view_->shape = NULL;
235
+ view_->strides = NULL;
236
+ view_->sub_offsets = NULL;
237
+ return true;
238
+ }
239
+
240
+ bool primitive_array_release(VALUE obj, rb_memory_view_t *view) {
241
+ auto view_ = reinterpret_cast<memory_view *>(view);
242
+ delete static_cast<PrivateData *>(view_->private_data);
243
+ return true;
244
+ }
245
+
246
+ bool primitive_array_available_p(VALUE obj) {
247
+ return true;
248
+ }
249
+
250
+ rb_memory_view_entry_t primitive_array_entry = {
251
+ primitive_array_get,
252
+ primitive_array_release,
253
+ primitive_array_available_p,
254
+ };
255
+
256
+ bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) {
257
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
258
+ return false;
259
+ }
260
+ auto view_ = reinterpret_cast<memory_view *>(view);
261
+ view_->obj = obj;
262
+ auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj));
263
+ auto arrow_buffer = garrow_buffer_get_raw(buffer);
264
+ view_->data =
265
+ const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data()));
266
+ // Memory view doesn't support bit stream. We use one byte
267
+ // for 8 elements. Users can't calculate the number of
268
+ // elements from memory view but it's limitation of memory view.
269
+ #ifdef ARROW_LITTLE_ENDIAN
270
+ view_->format = "b8";
271
+ #else
272
+ view_->format = "B8";
273
+ #endif
274
+ view_->item_size = 1;
275
+ view_->byte_size = arrow_buffer->size();
276
+ view_->readonly = true;
277
+ view_->ndim = 1;
278
+ view_->shape = NULL;
279
+ view_->strides = NULL;
280
+ view_->sub_offsets = NULL;
281
+ return true;
282
+ }
283
+
284
+ bool buffer_release(VALUE obj, rb_memory_view_t *view) {
285
+ return true;
286
+ }
287
+
288
+ bool buffer_available_p(VALUE obj) {
289
+ return true;
290
+ }
291
+
292
+ rb_memory_view_entry_t buffer_entry = {
293
+ buffer_get,
294
+ buffer_release,
295
+ buffer_available_p,
296
+ };
297
+ #endif
298
+
299
+ void init(VALUE mArrow) {
300
+ #ifdef HAVE_MEMORY_VIEW
301
+ auto cPrimitiveArray =
302
+ rb_const_get_at(mArrow, rb_intern("PrimitiveArray"));
303
+ rb_memory_view_register(cPrimitiveArray,
304
+ &(red_arrow::memory_view::primitive_array_entry));
305
+
306
+ auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer"));
307
+ rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry));
308
+ #endif
309
+ }
310
+ }
311
+ }
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <ruby.hpp>
21
+
22
+ namespace red_arrow {
23
+ namespace memory_view {
24
+ void init(VALUE mArrow);
25
+ }
26
+ }
@@ -104,6 +104,7 @@ namespace red_arrow {
104
104
  VISIT(DenseUnion)
105
105
  VISIT(Dictionary)
106
106
  VISIT(Decimal128)
107
+ VISIT(Decimal256)
107
108
  // TODO
108
109
  // VISIT(Extension)
109
110
 
data/ext/arrow/values.cpp CHANGED
@@ -85,6 +85,7 @@ namespace red_arrow {
85
85
  VISIT(DenseUnion)
86
86
  VISIT(Dictionary)
87
87
  VISIT(Decimal128)
88
+ VISIT(Decimal256)
88
89
  // TODO
89
90
  // VISIT(Extension)
90
91
 
@@ -115,6 +115,17 @@ module Arrow
115
115
  builder: Date32ArrayBuilder.new,
116
116
  detected: true,
117
117
  }
118
+ when BigDecimal
119
+ if value.to_arrow.is_a?(Decimal128)
120
+ {
121
+ builder: Decimal128ArrayBuilder.new,
122
+ }
123
+ else
124
+ {
125
+ builder: Decimal256ArrayBuilder.new,
126
+ detected: true,
127
+ }
128
+ end
118
129
  when ::Array
119
130
  sub_builder_info = nil
120
131
  value.each do |sub_value|
@@ -194,11 +205,5 @@ module Arrow
194
205
  end
195
206
  end
196
207
  end
197
-
198
- def append_nulls(n)
199
- n.times do
200
- append_null
201
- end
202
- end
203
208
  end
204
209
  end