red-arrow 0.17.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/arrow/converters.hpp +75 -32
- data/ext/arrow/extconf.rb +14 -3
- data/ext/arrow/raw-records.cpp +3 -1
- data/ext/arrow/values.cpp +3 -1
- data/lib/arrow/array-builder.rb +11 -6
- data/lib/arrow/bigdecimal-extension.rb +5 -1
- data/lib/arrow/buffer.rb +28 -0
- data/lib/arrow/decimal128-array-builder.rb +21 -25
- data/lib/arrow/decimal128-data-type.rb +2 -0
- data/lib/arrow/decimal128.rb +18 -0
- data/lib/arrow/decimal256-array-builder.rb +61 -0
- data/lib/arrow/decimal256-array.rb +25 -0
- data/lib/arrow/decimal256-data-type.rb +73 -0
- data/lib/arrow/decimal256.rb +60 -0
- data/lib/arrow/dictionary-array.rb +24 -0
- data/lib/arrow/fixed-size-binary-array-builder.rb +38 -0
- data/lib/arrow/fixed-size-binary-array.rb +26 -0
- data/lib/arrow/loader.rb +16 -0
- data/lib/arrow/raw-table-converter.rb +47 -0
- data/lib/arrow/record-batch-iterator.rb +22 -0
- data/lib/arrow/record-batch.rb +9 -1
- data/lib/arrow/struct-array-builder.rb +13 -7
- data/lib/arrow/table-saver.rb +6 -6
- data/lib/arrow/table.rb +5 -24
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -0
- data/test/raw-records/test-basic-arrays.rb +17 -0
- data/test/raw-records/test-dense-union-array.rb +15 -34
- data/test/raw-records/test-list-array.rb +20 -0
- data/test/raw-records/test-sparse-union-array.rb +15 -33
- data/test/raw-records/test-struct-array.rb +15 -0
- data/test/test-array.rb +2 -2
- data/test/test-bigdecimal.rb +20 -3
- data/test/test-buffer.rb +11 -0
- data/test/test-decimal128-array-builder.rb +18 -1
- data/test/test-decimal128.rb +38 -0
- data/test/test-decimal256-array-builder.rb +112 -0
- data/test/test-decimal256-array.rb +38 -0
- data/test/test-decimal256-data-type.rb +31 -0
- data/test/test-decimal256.rb +102 -0
- data/test/test-dense-union-data-type.rb +2 -2
- data/test/test-dictionary-array.rb +41 -0
- data/test/test-feather.rb +1 -1
- data/test/test-fixed-size-binary-array-builder.rb +92 -0
- data/test/test-fixed-size-binary-array.rb +36 -0
- data/test/test-record-batch-iterator.rb +37 -0
- data/test/test-record-batch.rb +14 -0
- data/test/test-sparse-union-data-type.rb +2 -2
- data/test/test-struct-array-builder.rb +16 -12
- data/test/test-struct-array.rb +2 -2
- data/test/values/test-basic-arrays.rb +11 -0
- data/test/values/test-dense-union-array.rb +15 -34
- data/test/values/test-list-array.rb +18 -0
- data/test/values/test-sparse-union-array.rb +15 -33
- data/test/values/test-struct-array.rb +15 -0
- metadata +96 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a70c1505c294a8f74c992ec0f2ab5d651647e45dd178db8523f5b5c01e64a541
|
4
|
+
data.tar.gz: dad979599033104a25d5be3d05c57e552c803f9c8002d66b757866c425937ce9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b2363ba6468985a3d237cbb9cec8b2ccb5031aae94153b8f263916e811524e8a5326f408867a843b7077f624050fabcde93520b26e6f8ceb1dd094aaa0068ed6
|
7
|
+
data.tar.gz: 797b89062dfd212d92ca957b8e51c6dc8acf16ab51e5a60ecb0cbf31dd979c288f2afb71274b2235a24d31da344cfce93b40830d6dc6edd000ce0aa607ce8cde
|
data/ext/arrow/converters.hpp
CHANGED
@@ -212,7 +212,17 @@ namespace red_arrow {
|
|
212
212
|
|
213
213
|
inline VALUE convert(const arrow::Decimal128Array& array,
|
214
214
|
const int64_t i) {
|
215
|
-
|
215
|
+
return convert_decimal(std::move(array.FormatValue(i)));
|
216
|
+
}
|
217
|
+
|
218
|
+
inline VALUE convert(const arrow::Decimal256Array& array,
|
219
|
+
const int64_t i) {
|
220
|
+
return convert_decimal(std::move(array.FormatValue(i)));
|
221
|
+
}
|
222
|
+
|
223
|
+
private:
|
224
|
+
inline VALUE convert_decimal(std::string&& value) {
|
225
|
+
decimal_buffer_ = value;
|
216
226
|
return rb_funcall(rb_cObject,
|
217
227
|
id_BigDecimal,
|
218
228
|
1,
|
@@ -221,7 +231,6 @@ namespace red_arrow {
|
|
221
231
|
rb_ascii8bit_encoding()));
|
222
232
|
}
|
223
233
|
|
224
|
-
private:
|
225
234
|
std::string decimal_buffer_;
|
226
235
|
ListArrayValueConverter* list_array_value_converter_;
|
227
236
|
StructArrayValueConverter* struct_array_value_converter_;
|
@@ -285,9 +294,11 @@ namespace red_arrow {
|
|
285
294
|
// VISIT(Interval)
|
286
295
|
VISIT(List)
|
287
296
|
VISIT(Struct)
|
288
|
-
VISIT(
|
297
|
+
VISIT(SparseUnion)
|
298
|
+
VISIT(DenseUnion)
|
289
299
|
VISIT(Dictionary)
|
290
300
|
VISIT(Decimal128)
|
301
|
+
VISIT(Decimal256)
|
291
302
|
// TODO
|
292
303
|
// VISIT(Extension)
|
293
304
|
|
@@ -339,9 +350,9 @@ namespace red_arrow {
|
|
339
350
|
index_ = index;
|
340
351
|
result_ = rb_hash_new();
|
341
352
|
const auto struct_type = array.struct_type();
|
342
|
-
const auto n = struct_type->
|
353
|
+
const auto n = struct_type->num_fields();
|
343
354
|
for (int i = 0; i < n; ++i) {
|
344
|
-
const auto field_type = struct_type->
|
355
|
+
const auto field_type = struct_type->field(i).get();
|
345
356
|
const auto& field_name = field_type->name();
|
346
357
|
auto key_keep = key_;
|
347
358
|
key_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
@@ -388,9 +399,11 @@ namespace red_arrow {
|
|
388
399
|
// VISIT(Interval)
|
389
400
|
VISIT(List)
|
390
401
|
VISIT(Struct)
|
391
|
-
VISIT(
|
402
|
+
VISIT(SparseUnion)
|
403
|
+
VISIT(DenseUnion)
|
392
404
|
VISIT(Dictionary)
|
393
405
|
VISIT(Decimal128)
|
406
|
+
VISIT(Decimal256)
|
394
407
|
// TODO
|
395
408
|
// VISIT(Extension)
|
396
409
|
|
@@ -432,10 +445,10 @@ namespace red_arrow {
|
|
432
445
|
index_ = index;
|
433
446
|
switch (array.mode()) {
|
434
447
|
case arrow::UnionMode::SPARSE:
|
435
|
-
convert_sparse(array);
|
448
|
+
convert_sparse(static_cast<const arrow::SparseUnionArray&>(array));
|
436
449
|
break;
|
437
450
|
case arrow::UnionMode::DENSE:
|
438
|
-
convert_dense(array);
|
451
|
+
convert_dense(static_cast<const arrow::DenseUnionArray&>(array));
|
439
452
|
break;
|
440
453
|
default:
|
441
454
|
rb_raise(rb_eArgError, "Invalid union mode");
|
@@ -479,9 +492,11 @@ namespace red_arrow {
|
|
479
492
|
// VISIT(Interval)
|
480
493
|
VISIT(List)
|
481
494
|
VISIT(Struct)
|
482
|
-
VISIT(
|
495
|
+
VISIT(SparseUnion)
|
496
|
+
VISIT(DenseUnion)
|
483
497
|
VISIT(Dictionary)
|
484
498
|
VISIT(Decimal128)
|
499
|
+
VISIT(Decimal256)
|
485
500
|
// TODO
|
486
501
|
// VISIT(Extension)
|
487
502
|
|
@@ -501,14 +516,14 @@ namespace red_arrow {
|
|
501
516
|
result_ = result;
|
502
517
|
}
|
503
518
|
|
504
|
-
uint8_t
|
519
|
+
uint8_t compute_field_index(const arrow::UnionArray& array,
|
505
520
|
arrow::UnionType* type,
|
506
521
|
const char* tag) {
|
507
522
|
const auto type_code = array.raw_type_codes()[index_];
|
508
523
|
if (type_code >= 0 && type_code <= arrow::UnionType::kMaxTypeCode) {
|
509
|
-
const auto
|
510
|
-
if (
|
511
|
-
return
|
524
|
+
const auto field_id = type->child_ids()[type_code];
|
525
|
+
if (field_id >= 0) {
|
526
|
+
return field_id;
|
512
527
|
}
|
513
528
|
}
|
514
529
|
check_status(arrow::Status::Invalid("Unknown type ID: ", type_code),
|
@@ -516,33 +531,33 @@ namespace red_arrow {
|
|
516
531
|
return 0;
|
517
532
|
}
|
518
533
|
|
519
|
-
void convert_sparse(const arrow::
|
534
|
+
void convert_sparse(const arrow::SparseUnionArray& array) {
|
520
535
|
const auto type =
|
521
536
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
522
537
|
const auto tag = "[raw-records][union-sparse-array]";
|
523
|
-
const auto
|
524
|
-
const auto
|
525
|
-
const auto& field_name =
|
538
|
+
const auto index = compute_field_index(array, type, tag);
|
539
|
+
const auto field = type->field(index).get();
|
540
|
+
const auto& field_name = field->name();
|
526
541
|
const auto field_name_keep = field_name_;
|
527
542
|
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
528
|
-
const auto
|
529
|
-
check_status(
|
543
|
+
const auto field_array = array.field(index).get();
|
544
|
+
check_status(field_array->Accept(this), tag);
|
530
545
|
field_name_ = field_name_keep;
|
531
546
|
}
|
532
547
|
|
533
|
-
void convert_dense(const arrow::
|
548
|
+
void convert_dense(const arrow::DenseUnionArray& array) {
|
534
549
|
const auto type =
|
535
550
|
std::static_pointer_cast<arrow::UnionType>(array.type()).get();
|
536
551
|
const auto tag = "[raw-records][union-dense-array]";
|
537
|
-
const auto
|
538
|
-
const auto
|
539
|
-
const auto& field_name =
|
552
|
+
const auto index = compute_field_index(array, type, tag);
|
553
|
+
const auto field = type->field(index).get();
|
554
|
+
const auto& field_name = field->name();
|
540
555
|
const auto field_name_keep = field_name_;
|
541
556
|
field_name_ = rb_utf8_str_new(field_name.data(), field_name.length());
|
542
|
-
const auto
|
557
|
+
const auto field_array = array.field(index);
|
543
558
|
const auto index_keep = index_;
|
544
559
|
index_ = array.value_offset(index_);
|
545
|
-
check_status(
|
560
|
+
check_status(field_array->Accept(this), tag);
|
546
561
|
index_ = index_keep;
|
547
562
|
field_name_ = field_name_keep;
|
548
563
|
}
|
@@ -557,30 +572,58 @@ namespace red_arrow {
|
|
557
572
|
public:
|
558
573
|
explicit DictionaryArrayValueConverter(ArrayValueConverter* converter)
|
559
574
|
: array_value_converter_(converter),
|
560
|
-
|
575
|
+
value_index_(0),
|
561
576
|
result_(Qnil) {
|
562
577
|
}
|
563
578
|
|
564
579
|
VALUE convert(const arrow::DictionaryArray& array,
|
565
580
|
const int64_t index) {
|
566
|
-
|
567
|
-
auto
|
568
|
-
check_status(
|
581
|
+
value_index_ = array.GetValueIndex(index);
|
582
|
+
auto dictionary = array.dictionary().get();
|
583
|
+
check_status(dictionary->Accept(this),
|
569
584
|
"[raw-records][dictionary-array]");
|
570
585
|
return result_;
|
571
586
|
}
|
572
587
|
|
573
|
-
// TODO: Convert to real value.
|
574
588
|
#define VISIT(TYPE) \
|
575
589
|
arrow::Status Visit(const arrow::TYPE ## Array& array) override { \
|
576
|
-
result_ = convert_value(array,
|
590
|
+
result_ = convert_value(array, value_index_); \
|
577
591
|
return arrow::Status::OK(); \
|
578
592
|
}
|
579
593
|
|
594
|
+
VISIT(Null)
|
595
|
+
VISIT(Boolean)
|
580
596
|
VISIT(Int8)
|
581
597
|
VISIT(Int16)
|
582
598
|
VISIT(Int32)
|
583
599
|
VISIT(Int64)
|
600
|
+
VISIT(UInt8)
|
601
|
+
VISIT(UInt16)
|
602
|
+
VISIT(UInt32)
|
603
|
+
VISIT(UInt64)
|
604
|
+
// TODO
|
605
|
+
// VISIT(HalfFloat)
|
606
|
+
VISIT(Float)
|
607
|
+
VISIT(Double)
|
608
|
+
VISIT(Binary)
|
609
|
+
VISIT(String)
|
610
|
+
VISIT(FixedSizeBinary)
|
611
|
+
VISIT(Date32)
|
612
|
+
VISIT(Date64)
|
613
|
+
VISIT(Time32)
|
614
|
+
VISIT(Time64)
|
615
|
+
VISIT(Timestamp)
|
616
|
+
// TODO
|
617
|
+
// VISIT(Interval)
|
618
|
+
VISIT(List)
|
619
|
+
VISIT(Struct)
|
620
|
+
VISIT(SparseUnion)
|
621
|
+
VISIT(DenseUnion)
|
622
|
+
VISIT(Dictionary)
|
623
|
+
VISIT(Decimal128)
|
624
|
+
VISIT(Decimal256)
|
625
|
+
// TODO
|
626
|
+
// VISIT(Extension)
|
584
627
|
|
585
628
|
#undef VISIT
|
586
629
|
|
@@ -592,7 +635,7 @@ namespace red_arrow {
|
|
592
635
|
}
|
593
636
|
|
594
637
|
ArrayValueConverter* array_value_converter_;
|
595
|
-
int64_t
|
638
|
+
int64_t value_index_;
|
596
639
|
VALUE result_;
|
597
640
|
};
|
598
641
|
|
data/ext/arrow/extconf.rb
CHANGED
@@ -16,7 +16,8 @@
|
|
16
16
|
# under the License.
|
17
17
|
|
18
18
|
require "extpp"
|
19
|
-
require "mkmf-
|
19
|
+
require "mkmf-gnome"
|
20
|
+
require_relative "../../lib/arrow/version"
|
20
21
|
|
21
22
|
arrow_pkg_config_path = ENV["ARROW_PKG_CONFIG_PATH"]
|
22
23
|
if arrow_pkg_config_path
|
@@ -24,7 +25,12 @@ if arrow_pkg_config_path
|
|
24
25
|
ENV["PKG_CONFIG_PATH"] = pkg_config_paths.join(File::PATH_SEPARATOR)
|
25
26
|
end
|
26
27
|
|
27
|
-
unless required_pkg_config_package(
|
28
|
+
unless required_pkg_config_package([
|
29
|
+
"arrow",
|
30
|
+
Arrow::Version::MAJOR,
|
31
|
+
Arrow::Version::MINOR,
|
32
|
+
Arrow::Version::MICRO,
|
33
|
+
],
|
28
34
|
debian: "libarrow-dev",
|
29
35
|
redhat: "arrow-devel",
|
30
36
|
homebrew: "apache-arrow",
|
@@ -32,7 +38,12 @@ unless required_pkg_config_package("arrow",
|
|
32
38
|
exit(false)
|
33
39
|
end
|
34
40
|
|
35
|
-
unless required_pkg_config_package(
|
41
|
+
unless required_pkg_config_package([
|
42
|
+
"arrow-glib",
|
43
|
+
Arrow::Version::MAJOR,
|
44
|
+
Arrow::Version::MINOR,
|
45
|
+
Arrow::Version::MICRO,
|
46
|
+
],
|
36
47
|
debian: "libarrow-glib-dev",
|
37
48
|
redhat: "arrow-glib-devel",
|
38
49
|
homebrew: "apache-arrow-glib",
|
data/ext/arrow/raw-records.cpp
CHANGED
@@ -100,9 +100,11 @@ namespace red_arrow {
|
|
100
100
|
// VISIT(Interval)
|
101
101
|
VISIT(List)
|
102
102
|
VISIT(Struct)
|
103
|
-
VISIT(
|
103
|
+
VISIT(SparseUnion)
|
104
|
+
VISIT(DenseUnion)
|
104
105
|
VISIT(Dictionary)
|
105
106
|
VISIT(Decimal128)
|
107
|
+
VISIT(Decimal256)
|
106
108
|
// TODO
|
107
109
|
// VISIT(Extension)
|
108
110
|
|
data/ext/arrow/values.cpp
CHANGED
data/lib/arrow/array-builder.rb
CHANGED
@@ -115,6 +115,17 @@ module Arrow
|
|
115
115
|
builder: Date32ArrayBuilder.new,
|
116
116
|
detected: true,
|
117
117
|
}
|
118
|
+
when BigDecimal
|
119
|
+
if value.to_arrow.is_a?(Decimal128)
|
120
|
+
{
|
121
|
+
builder: Decimal128ArrayBuilder.new,
|
122
|
+
}
|
123
|
+
else
|
124
|
+
{
|
125
|
+
builder: Decimal256ArrayBuilder.new,
|
126
|
+
detected: true,
|
127
|
+
}
|
128
|
+
end
|
118
129
|
when ::Array
|
119
130
|
sub_builder_info = nil
|
120
131
|
value.each do |sub_value|
|
@@ -194,11 +205,5 @@ module Arrow
|
|
194
205
|
end
|
195
206
|
end
|
196
207
|
end
|
197
|
-
|
198
|
-
def append_nulls(n)
|
199
|
-
n.times do
|
200
|
-
append_null
|
201
|
-
end
|
202
|
-
end
|
203
208
|
end
|
204
209
|
end
|
data/lib/arrow/buffer.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class Buffer
|
20
|
+
alias_method :initialize_raw, :initialize
|
21
|
+
private :initialize_raw
|
22
|
+
|
23
|
+
def initialize(data)
|
24
|
+
@data = data
|
25
|
+
initialize_raw(data)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -26,36 +26,32 @@ module Arrow
|
|
26
26
|
|
27
27
|
alias_method :append_value_raw, :append_value
|
28
28
|
def append_value(value)
|
29
|
-
|
30
|
-
when nil
|
31
|
-
return append_null
|
32
|
-
when String
|
33
|
-
value = Decimal128.new(value)
|
34
|
-
when Float
|
35
|
-
value = Decimal128.new(value.to_s)
|
36
|
-
when BigDecimal
|
37
|
-
value = value.to_arrow
|
38
|
-
end
|
39
|
-
append_value_raw(value)
|
29
|
+
append_value_raw(normalize_value(value))
|
40
30
|
end
|
41
31
|
|
32
|
+
alias_method :append_values_raw, :append_values
|
42
33
|
def append_values(values, is_valids=nil)
|
43
|
-
if
|
44
|
-
|
45
|
-
|
46
|
-
append_value(values[i])
|
47
|
-
else
|
48
|
-
append_null
|
49
|
-
end
|
34
|
+
if values.is_a?(::Array)
|
35
|
+
values = values.collect do |value|
|
36
|
+
normalize_value(value)
|
50
37
|
end
|
38
|
+
append_values_raw(values, is_valids)
|
51
39
|
else
|
52
|
-
values
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
40
|
+
append_values_packed(values, is_valids)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def normalize_value(value)
|
46
|
+
case value
|
47
|
+
when String
|
48
|
+
Decimal128.new(value)
|
49
|
+
when Float
|
50
|
+
Decimal128.new(value.to_s)
|
51
|
+
when BigDecimal
|
52
|
+
Decimal128.new(value.to_s)
|
53
|
+
else
|
54
|
+
value
|
59
55
|
end
|
60
56
|
end
|
61
57
|
end
|
data/lib/arrow/decimal128.rb
CHANGED
@@ -38,5 +38,23 @@ module Arrow
|
|
38
38
|
to_s_raw
|
39
39
|
end
|
40
40
|
end
|
41
|
+
|
42
|
+
alias_method :abs!, :abs
|
43
|
+
|
44
|
+
# @since 3.0.0
|
45
|
+
def abs
|
46
|
+
copied = dup
|
47
|
+
copied.abs!
|
48
|
+
copied
|
49
|
+
end
|
50
|
+
|
51
|
+
alias_method :negate!, :negate
|
52
|
+
|
53
|
+
# @since 3.0.0
|
54
|
+
def negate
|
55
|
+
copied = dup
|
56
|
+
copied.negate!
|
57
|
+
copied
|
58
|
+
end
|
41
59
|
end
|
42
60
|
end
|