red-arrow 4.0.1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df66b2dac421ab800bd00510e4a13e250646d1f1c27541f2ca44cd04be38a3ba
4
- data.tar.gz: 3c39ec6a1a3ee48708a28583c125d96f9195e9973f6f2f3707cd923eca915329
3
+ metadata.gz: dcc31d7ec789dfa277963fda5b3710e7eec2451b6fbe425282638e48e684b95f
4
+ data.tar.gz: 00bfbf2e0357861f7247c42c4faae6a72d9adfca745ad1b3298f549997847158
5
5
  SHA512:
6
- metadata.gz: 9ebf50ffecfdea79140b158f5aeea0cc3bb63403e405bcbba2b34e288b8575df3df859b8f46eb12e26199e566237016d0e3907acc1d7c0d16e82d638c7bdce2e
7
- data.tar.gz: 1ff1389d28db025efd82c87e9f85cfb7e3bce21abe85a1b943ca8536bbc1015b0015c05ab45e5d9a35914034a673da0bbc59f8211bb0b620ee4a7487f5fec413
6
+ metadata.gz: 8e60e9a4ce4d379f7f1d9387154ae4d9fe4ec91ffe304647966d33d52e380950cc0a8a5f025164466526feb5198638139bb7564f2066fd93b7c21dcd697e07ca
7
+ data.tar.gz: 140a8f087eeb42baae703eee677c97261c6b30317c0f72ce49231a117b2e32f34175bad71f913bdd6f940abd8097c77f968b27780cecbfca062be60ac87d09ec
data/ext/arrow/arrow.cpp CHANGED
@@ -18,6 +18,7 @@
18
18
  */
19
19
 
20
20
  #include "red-arrow.hpp"
21
+ #include "memory-view.hpp"
21
22
 
22
23
  #include <ruby.hpp>
23
24
 
@@ -78,4 +79,6 @@ extern "C" void Init_arrow() {
78
79
  red_arrow::id_jd = rb_intern("jd");
79
80
  red_arrow::id_new = rb_intern("new");
80
81
  red_arrow::id_to_datetime = rb_intern("to_datetime");
82
+
83
+ red_arrow::memory_view::init(mArrow);
81
84
  }
@@ -0,0 +1,311 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "memory-view.hpp"
21
+
22
+ #include <arrow-glib/arrow-glib.hpp>
23
+ #include <rbgobject.h>
24
+
25
+ #include <ruby/version.h>
26
+
27
+ #if RUBY_API_VERSION_MAJOR >= 3
28
+ # define HAVE_MEMORY_VIEW
29
+ # define private memory_view_private
30
+ # include <ruby/memory_view.h>
31
+ # undef private
32
+ #endif
33
+
34
+ #include <sstream>
35
+
36
+ namespace red_arrow {
37
+ namespace memory_view {
38
+ #ifdef HAVE_MEMORY_VIEW
39
+ // This is workaround for the following rb_memory_view_t problems
40
+ // in C++:
41
+ //
42
+ // * Can't use "private" as member name
43
+ // * Can't assign a value to "rb_memory_view_t::private"
44
+ //
45
+ // This has compatible layout with rb_memory_view_t.
46
+ struct memory_view {
47
+ VALUE obj;
48
+ void *data;
49
+ ssize_t byte_size;
50
+ bool readonly;
51
+ const char *format;
52
+ ssize_t item_size;
53
+ struct {
54
+ const rb_memory_view_item_component_t *components;
55
+ size_t length;
56
+ } item_desc;
57
+ ssize_t ndim;
58
+ const ssize_t *shape;
59
+ const ssize_t *strides;
60
+ const ssize_t *sub_offsets;
61
+ void *private_data;
62
+ };
63
+
64
+ struct PrivateData {
65
+ std::string format;
66
+ };
67
+
68
+ class PrimitiveArrayGetter : public arrow::ArrayVisitor {
69
+ public:
70
+ explicit PrimitiveArrayGetter(memory_view *view)
71
+ : view_(view) {
72
+ }
73
+
74
+ arrow::Status Visit(const arrow::BooleanArray& array) override {
75
+ fill(static_cast<const arrow::Array&>(array));
76
+ // Memory view doesn't support bit stream. We use one byte
77
+ // for 8 elements. Users can't calculate the number of
78
+ // elements from memory view but it's limitation of memory view.
79
+ #ifdef ARROW_LITTLE_ENDIAN
80
+ view_->format = "b8";
81
+ #else
82
+ view_->format = "B8";
83
+ #endif
84
+ view_->item_size = 1;
85
+ view_->byte_size = (array.length() + 7) / 8;
86
+ return arrow::Status::OK();
87
+ }
88
+
89
+ arrow::Status Visit(const arrow::Int8Array& array) override {
90
+ fill(static_cast<const arrow::Array&>(array));
91
+ view_->format = "c";
92
+ return arrow::Status::OK();
93
+ }
94
+
95
+ arrow::Status Visit(const arrow::Int16Array& array) override {
96
+ fill(static_cast<const arrow::Array&>(array));
97
+ view_->format = "s";
98
+ return arrow::Status::OK();
99
+ }
100
+
101
+ arrow::Status Visit(const arrow::Int32Array& array) override {
102
+ fill(static_cast<const arrow::Array&>(array));
103
+ view_->format = "l";
104
+ return arrow::Status::OK();
105
+ }
106
+
107
+ arrow::Status Visit(const arrow::Int64Array& array) override {
108
+ fill(static_cast<const arrow::Array&>(array));
109
+ view_->format = "q";
110
+ return arrow::Status::OK();
111
+ }
112
+
113
+ arrow::Status Visit(const arrow::UInt8Array& array) override {
114
+ fill(static_cast<const arrow::Array&>(array));
115
+ view_->format = "C";
116
+ return arrow::Status::OK();
117
+ }
118
+
119
+ arrow::Status Visit(const arrow::UInt16Array& array) override {
120
+ fill(static_cast<const arrow::Array&>(array));
121
+ view_->format = "S";
122
+ return arrow::Status::OK();
123
+ }
124
+
125
+ arrow::Status Visit(const arrow::UInt32Array& array) override {
126
+ fill(static_cast<const arrow::Array&>(array));
127
+ view_->format = "L";
128
+ return arrow::Status::OK();
129
+ }
130
+
131
+ arrow::Status Visit(const arrow::UInt64Array& array) override {
132
+ fill(static_cast<const arrow::Array&>(array));
133
+ view_->format = "Q";
134
+ return arrow::Status::OK();
135
+ }
136
+
137
+ arrow::Status Visit(const arrow::FloatArray& array) override {
138
+ fill(static_cast<const arrow::Array&>(array));
139
+ view_->format = "f";
140
+ return arrow::Status::OK();
141
+ }
142
+
143
+ arrow::Status Visit(const arrow::DoubleArray& array) override {
144
+ fill(static_cast<const arrow::Array&>(array));
145
+ view_->format = "d";
146
+ return arrow::Status::OK();
147
+ }
148
+
149
+ arrow::Status Visit(const arrow::FixedSizeBinaryArray& array) override {
150
+ fill(static_cast<const arrow::Array&>(array));
151
+ auto priv = static_cast<PrivateData *>(view_->private_data);
152
+ const auto type =
153
+ std::static_pointer_cast<const arrow::FixedSizeBinaryType>(
154
+ array.type());
155
+ std::ostringstream output;
156
+ output << "C" << type->byte_width();
157
+ priv->format = output.str();
158
+ view_->format = priv->format.c_str();
159
+ return arrow::Status::OK();
160
+ }
161
+
162
+ arrow::Status Visit(const arrow::Date32Array& array) override {
163
+ fill(static_cast<const arrow::Array&>(array));
164
+ view_->format = "l";
165
+ return arrow::Status::OK();
166
+ }
167
+
168
+ arrow::Status Visit(const arrow::Date64Array& array) override {
169
+ fill(static_cast<const arrow::Array&>(array));
170
+ view_->format = "q";
171
+ return arrow::Status::OK();
172
+ }
173
+
174
+ arrow::Status Visit(const arrow::Time32Array& array) override {
175
+ fill(static_cast<const arrow::Array&>(array));
176
+ view_->format = "l";
177
+ return arrow::Status::OK();
178
+ }
179
+
180
+ arrow::Status Visit(const arrow::Time64Array& array) override {
181
+ fill(static_cast<const arrow::Array&>(array));
182
+ view_->format = "q";
183
+ return arrow::Status::OK();
184
+ }
185
+
186
+ arrow::Status Visit(const arrow::TimestampArray& array) override {
187
+ fill(static_cast<const arrow::Array&>(array));
188
+ view_->format = "q";
189
+ return arrow::Status::OK();
190
+ }
191
+
192
+ arrow::Status Visit(const arrow::Decimal128Array& array) override {
193
+ fill(static_cast<const arrow::Array&>(array));
194
+ view_->format = "q2";
195
+ return arrow::Status::OK();
196
+ }
197
+
198
+ arrow::Status Visit(const arrow::Decimal256Array& array) override {
199
+ fill(static_cast<const arrow::Array&>(array));
200
+ view_->format = "q4";
201
+ return arrow::Status::OK();
202
+ }
203
+
204
+ private:
205
+ void fill(const arrow::Array& array) {
206
+ const auto array_data = array.data();
207
+ const auto data = array_data->GetValuesSafe<uint8_t>(1);
208
+ view_->data = const_cast<void *>(reinterpret_cast<const void *>(data));
209
+ const auto type =
210
+ std::static_pointer_cast<const arrow::FixedWidthType>(array.type());
211
+ view_->item_size = type->bit_width() / 8;
212
+ view_->byte_size = view_->item_size * array.length();
213
+ }
214
+
215
+ memory_view *view_;
216
+ };
217
+
218
+ bool primitive_array_get(VALUE obj, rb_memory_view_t *view, int flags) {
219
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
220
+ return false;
221
+ }
222
+ auto view_ = reinterpret_cast<memory_view *>(view);
223
+ view_->obj = obj;
224
+ view_->private_data = new PrivateData();
225
+ auto array = GARROW_ARRAY(RVAL2GOBJ(obj));
226
+ auto arrow_array = garrow_array_get_raw(array);
227
+ PrimitiveArrayGetter getter(view_);
228
+ auto status = arrow_array->Accept(&getter);
229
+ if (!status.ok()) {
230
+ return false;
231
+ }
232
+ view_->readonly = true;
233
+ view_->ndim = 1;
234
+ view_->shape = NULL;
235
+ view_->strides = NULL;
236
+ view_->sub_offsets = NULL;
237
+ return true;
238
+ }
239
+
240
+ bool primitive_array_release(VALUE obj, rb_memory_view_t *view) {
241
+ auto view_ = reinterpret_cast<memory_view *>(view);
242
+ delete static_cast<PrivateData *>(view_->private_data);
243
+ return true;
244
+ }
245
+
246
+ bool primitive_array_available_p(VALUE obj) {
247
+ return true;
248
+ }
249
+
250
+ rb_memory_view_entry_t primitive_array_entry = {
251
+ primitive_array_get,
252
+ primitive_array_release,
253
+ primitive_array_available_p,
254
+ };
255
+
256
+ bool buffer_get(VALUE obj, rb_memory_view_t *view, int flags) {
257
+ if (flags != RUBY_MEMORY_VIEW_SIMPLE) {
258
+ return false;
259
+ }
260
+ auto view_ = reinterpret_cast<memory_view *>(view);
261
+ view_->obj = obj;
262
+ auto buffer = GARROW_BUFFER(RVAL2GOBJ(obj));
263
+ auto arrow_buffer = garrow_buffer_get_raw(buffer);
264
+ view_->data =
265
+ const_cast<void *>(reinterpret_cast<const void *>(arrow_buffer->data()));
266
+ // Memory view doesn't support bit stream. We use one byte
267
+ // for 8 elements. Users can't calculate the number of
268
+ // elements from memory view but it's limitation of memory view.
269
+ #ifdef ARROW_LITTLE_ENDIAN
270
+ view_->format = "b8";
271
+ #else
272
+ view_->format = "B8";
273
+ #endif
274
+ view_->item_size = 1;
275
+ view_->byte_size = arrow_buffer->size();
276
+ view_->readonly = true;
277
+ view_->ndim = 1;
278
+ view_->shape = NULL;
279
+ view_->strides = NULL;
280
+ view_->sub_offsets = NULL;
281
+ return true;
282
+ }
283
+
284
+ bool buffer_release(VALUE obj, rb_memory_view_t *view) {
285
+ return true;
286
+ }
287
+
288
+ bool buffer_available_p(VALUE obj) {
289
+ return true;
290
+ }
291
+
292
+ rb_memory_view_entry_t buffer_entry = {
293
+ buffer_get,
294
+ buffer_release,
295
+ buffer_available_p,
296
+ };
297
+ #endif
298
+
299
+ void init(VALUE mArrow) {
300
+ #ifdef HAVE_MEMORY_VIEW
301
+ auto cPrimitiveArray =
302
+ rb_const_get_at(mArrow, rb_intern("PrimitiveArray"));
303
+ rb_memory_view_register(cPrimitiveArray,
304
+ &(red_arrow::memory_view::primitive_array_entry));
305
+
306
+ auto cBuffer = rb_const_get_at(mArrow, rb_intern("Buffer"));
307
+ rb_memory_view_register(cBuffer, &(red_arrow::memory_view::buffer_entry));
308
+ #endif
309
+ }
310
+ }
311
+ }
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <ruby.hpp>
21
+
22
+ namespace red_arrow {
23
+ namespace memory_view {
24
+ void init(VALUE mArrow);
25
+ }
26
+ }
data/lib/arrow/array.rb CHANGED
@@ -55,6 +55,18 @@ module Arrow
55
55
  end
56
56
  end
57
57
 
58
+ # @param other [Arrow::Array] The array to be compared.
59
+ # @param options [Arrow::EqualOptions, Hash] (nil)
60
+ # The options to custom how to compare.
61
+ #
62
+ # @return [Boolean]
63
+ # `true` if both of them have the same data, `false` otherwise.
64
+ #
65
+ # @since 5.0.0
66
+ def equal_array?(other, options=nil)
67
+ equal_options(other, options)
68
+ end
69
+
58
70
  def each
59
71
  return to_enum(__method__) unless block_given?
60
72
 
data/lib/arrow/buffer.rb CHANGED
@@ -17,12 +17,16 @@
17
17
 
18
18
  module Arrow
19
19
  class Buffer
20
- alias_method :initialize_raw, :initialize
21
- private :initialize_raw
22
-
23
- def initialize(data)
24
- @data = data
25
- initialize_raw(data)
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when String
25
+ new(value)
26
+ else
27
+ nil
28
+ end
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -0,0 +1,25 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ module ConstructorArgumentsGCGuardable
20
+ def initialize(*args)
21
+ super
22
+ @arguments = args
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,98 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ module Arrow
19
+ class Datum
20
+ class << self
21
+ # @api private
22
+ def try_convert(value)
23
+ case value
24
+ when Array
25
+ ArrayDatum.new(value)
26
+ when ChunkedArray
27
+ ChunkedArrayDatum.new(value)
28
+ when Scalar
29
+ ScalarDatum.new(value)
30
+ when ::Array
31
+ ArrayDatum.new(ArrayBuilder.build(value))
32
+ when Integer
33
+ case value
34
+ when (0..((2 ** 8) - 1))
35
+ try_convert(UInt8Scalar.new(value))
36
+ when ((-(2 ** 7))..((2 ** 7) - 1))
37
+ try_convert(Int8Scalar.new(value))
38
+ when (0..((2 ** 16) - 1))
39
+ try_convert(UInt16Scalar.new(value))
40
+ when ((-(2 ** 15))..((2 ** 15) - 1))
41
+ try_convert(Int16Scalar.new(value))
42
+ when (0..((2 ** 32) - 1))
43
+ try_convert(UInt32Scalar.new(value))
44
+ when ((-(2 ** 31))..((2 ** 31) - 1))
45
+ try_convert(Int32Scalar.new(value))
46
+ when (0..((2 ** 64) - 1))
47
+ try_convert(UInt64Scalar.new(value))
48
+ when ((-(2 ** 63))..((2 ** 63) - 1))
49
+ try_convert(Int64Scalar.new(value))
50
+ else
51
+ nil
52
+ end
53
+ when Float
54
+ try_convert(DoubleScalar.new(value))
55
+ when true, false
56
+ try_convert(BooleanScalar.new(value))
57
+ when String
58
+ if value.ascii_only? or value.encoding == Encoding::UTF_8
59
+ if value.bytesize <= ((2 ** 31) - 1)
60
+ try_convert(StringScalar.new(value))
61
+ else
62
+ try_convert(LargeStringScalar.new(value))
63
+ end
64
+ else
65
+ if value.bytesize <= ((2 ** 31) - 1)
66
+ try_convert(BinaryScalar.new(value))
67
+ else
68
+ try_convert(LargeBinaryScalar.new(value))
69
+ end
70
+ end
71
+ when Date
72
+ date32_value = (value - Date32ArrayBuilder::UNIX_EPOCH).to_i
73
+ try_convert(Date32Scalar.new(date32_value))
74
+ when Time
75
+ case value.unit
76
+ when TimeUnit::SECOND, TimeUnit::MILLI
77
+ data_type = Time32DataType.new(value.unit)
78
+ scalar_class = Time32Scalar
79
+ else
80
+ data_type = Time64DataType.new(value.unit)
81
+ scalar_class = Time64Scalar
82
+ end
83
+ try_convert(scalar_class.new(data_type, value.value))
84
+ when ::Time
85
+ data_type = TimestampDataType.new(:nano)
86
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
87
+ try_convert(TimestampScalar.new(data_type, timestamp_value))
88
+ when Decimal128
89
+ data_type = TimestampDataType.new(:nano)
90
+ timestamp_value = value.to_i * 1_000_000_000 + value.nsec
91
+ try_convert(Decimal128Scalar.new(data_type, timestamp_value))
92
+ else
93
+ nil
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end