red-arrow 10.0.1 → 12.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +14 -2
- data/ext/arrow/raw-records.cpp +1 -2
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +5 -0
- data/lib/arrow/chunked-array.rb +23 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +10 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +4 -4
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +178 -31
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +10 -0
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +324 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +30 -9
data/lib/arrow/table.rb
CHANGED
@@ -22,6 +22,7 @@ module Arrow
|
|
22
22
|
include ColumnContainable
|
23
23
|
include GenericFilterable
|
24
24
|
include GenericTakeable
|
25
|
+
include InputReferable
|
25
26
|
include RecordContainable
|
26
27
|
|
27
28
|
class << self
|
@@ -188,6 +189,7 @@ module Arrow
|
|
188
189
|
|
189
190
|
reader = TableBatchReader.new(self)
|
190
191
|
while record_batch = reader.read_next
|
192
|
+
share_input(record_batch)
|
191
193
|
yield(record_batch)
|
192
194
|
end
|
193
195
|
end
|
@@ -314,8 +316,6 @@ module Arrow
|
|
314
316
|
end
|
315
317
|
end
|
316
318
|
|
317
|
-
filter_options = Arrow::FilterOptions.new
|
318
|
-
filter_options.null_selection_behavior = :emit_null
|
319
319
|
sliced_tables = []
|
320
320
|
slicers.each do |slicer|
|
321
321
|
slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
|
@@ -337,7 +337,7 @@ module Arrow
|
|
337
337
|
to += n_rows if to < 0
|
338
338
|
sliced_tables << slice_by_range(from, to)
|
339
339
|
when ::Array, BooleanArray, ChunkedArray
|
340
|
-
sliced_tables << filter(slicer
|
340
|
+
sliced_tables << filter(slicer)
|
341
341
|
else
|
342
342
|
message = "slicer must be Integer, Range, (from, to), " +
|
343
343
|
"Arrow::ChunkedArray of Arrow::BooleanArray, " +
|
@@ -346,10 +346,12 @@ module Arrow
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
if sliced_tables.size > 1
|
349
|
-
sliced_tables[0].concatenate(sliced_tables[1..-1])
|
349
|
+
sliced_table = sliced_tables[0].concatenate(sliced_tables[1..-1])
|
350
350
|
else
|
351
|
-
sliced_tables[0]
|
351
|
+
sliced_table = sliced_tables[0]
|
352
352
|
end
|
353
|
+
share_input(sliced_table)
|
354
|
+
sliced_table
|
353
355
|
end
|
354
356
|
|
355
357
|
# TODO
|
@@ -401,7 +403,9 @@ module Arrow
|
|
401
403
|
new_fields << new_column[:field]
|
402
404
|
new_arrays << new_column[:data]
|
403
405
|
end
|
404
|
-
self.class.new(new_fields, new_arrays)
|
406
|
+
table = self.class.new(new_fields, new_arrays)
|
407
|
+
share_input(table)
|
408
|
+
table
|
405
409
|
end
|
406
410
|
|
407
411
|
alias_method :remove_column_raw, :remove_column
|
@@ -423,7 +427,9 @@ module Arrow
|
|
423
427
|
raise IndexError.new(message)
|
424
428
|
end
|
425
429
|
end
|
426
|
-
remove_column_raw(index)
|
430
|
+
table = remove_column_raw(index)
|
431
|
+
share_input(table)
|
432
|
+
table
|
427
433
|
end
|
428
434
|
|
429
435
|
# Experimental
|
@@ -445,43 +451,69 @@ module Arrow
|
|
445
451
|
packed_arrays = columns.collect do |column|
|
446
452
|
column.data.pack
|
447
453
|
end
|
448
|
-
self.class.new(schema, packed_arrays)
|
454
|
+
table = self.class.new(schema, packed_arrays)
|
455
|
+
share_input(table)
|
456
|
+
table
|
449
457
|
end
|
450
458
|
|
451
|
-
#
|
452
|
-
#
|
453
|
-
#
|
459
|
+
# Join another Table by matching with keys.
|
460
|
+
#
|
461
|
+
# @!macro join_common_before
|
462
|
+
# @param right [Arrow::Table] The right table.
|
463
|
+
#
|
464
|
+
# Join columns with `right` on join key columns.
|
454
465
|
#
|
455
|
-
#
|
466
|
+
# @!macro join_common_after
|
467
|
+
# @param type [Arrow::JoinType] How to join.
|
468
|
+
# @param left_outputs [::Array<String, Symbol>] Output columns in
|
469
|
+
# `self`.
|
456
470
|
#
|
457
|
-
#
|
458
|
-
#
|
459
|
-
#
|
460
|
-
#
|
471
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
472
|
+
# specified, all columns in `self` and `right` are
|
473
|
+
# output.
|
474
|
+
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
475
|
+
# `right`.
|
476
|
+
#
|
477
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
478
|
+
# specified, all columns in `self` and `right` are
|
479
|
+
# output.
|
480
|
+
# @return [Arrow::Table]
|
481
|
+
# The joined `Arrow::Table`.
|
461
482
|
#
|
462
|
-
#
|
463
|
-
#
|
464
|
-
#
|
465
|
-
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
466
|
-
# `right`.
|
483
|
+
# @overload join(right, type: :inner, left_outputs: nil, right_outputs: nil)
|
484
|
+
# If key(s) are not supplied, common keys in self and right are used
|
485
|
+
# (natural join).
|
467
486
|
#
|
468
|
-
#
|
469
|
-
#
|
470
|
-
#
|
471
|
-
#
|
472
|
-
#
|
487
|
+
# Column used as keys are merged and remain in left side
|
488
|
+
# when both of `left_outputs` and `right_outputs` are `nil`.
|
489
|
+
#
|
490
|
+
# @macro join_common_before
|
491
|
+
# @macro join_common_after
|
492
|
+
#
|
493
|
+
# @since 11.0.0
|
494
|
+
#
|
495
|
+
# @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
|
496
|
+
# Join right by a key.
|
497
|
+
#
|
498
|
+
# Column used as keys are merged and remain in left side
|
499
|
+
# when both of `left_outputs` and `right_outputs` are `nil`.
|
473
500
|
#
|
474
501
|
# @macro join_common_before
|
475
502
|
# @param key [String, Symbol] A join key.
|
476
503
|
# @macro join_common_after
|
477
504
|
#
|
478
|
-
# @overload join(right, keys, type: :inner,
|
505
|
+
# @overload join(right, keys, type: :inner, left_suffix: "", right_suffix: "",
|
506
|
+
# left_outputs: nil, right_outputs: nil)
|
507
|
+
# Join right by keys.
|
508
|
+
#
|
509
|
+
# Column name can be renamed by appending `left_suffix` or `right_suffix`.
|
479
510
|
#
|
480
511
|
# @macro join_common_before
|
481
512
|
# @param keys [::Array<String, Symbol>] Join keys.
|
482
513
|
# @macro join_common_after
|
483
514
|
#
|
484
515
|
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
516
|
+
# Join right by a key or keys mapped by a hash.
|
485
517
|
#
|
486
518
|
# @macro join_common_before
|
487
519
|
# @param keys [Hash] Specify join keys in `self` and `right` separately.
|
@@ -492,7 +524,16 @@ module Arrow
|
|
492
524
|
# @macro join_common_after
|
493
525
|
#
|
494
526
|
# @since 7.0.0
|
495
|
-
def join(right,
|
527
|
+
def join(right,
|
528
|
+
keys=nil,
|
529
|
+
type: :inner,
|
530
|
+
left_suffix: "",
|
531
|
+
right_suffix: "",
|
532
|
+
left_outputs: nil,
|
533
|
+
right_outputs: nil)
|
534
|
+
is_natural_join = keys.nil?
|
535
|
+
keys ||= (column_names & right.column_names)
|
536
|
+
type = JoinType.try_convert(type) || type
|
496
537
|
plan = ExecutePlan.new
|
497
538
|
left_node = plan.build_source_node(self)
|
498
539
|
right_node = plan.build_source_node(right)
|
@@ -508,22 +549,46 @@ module Arrow
|
|
508
549
|
hash_join_node_options = HashJoinNodeOptions.new(type,
|
509
550
|
left_keys,
|
510
551
|
right_keys)
|
552
|
+
use_manual_outputs = false
|
511
553
|
unless left_outputs.nil?
|
512
554
|
hash_join_node_options.left_outputs = left_outputs
|
555
|
+
use_manual_outputs = true
|
513
556
|
end
|
514
557
|
unless right_outputs.nil?
|
515
558
|
hash_join_node_options.right_outputs = right_outputs
|
559
|
+
use_manual_outputs = true
|
516
560
|
end
|
517
561
|
hash_join_node = plan.build_hash_join_node(left_node,
|
518
562
|
right_node,
|
519
563
|
hash_join_node_options)
|
564
|
+
type_nick = type.nick
|
565
|
+
is_filter_join = (type_nick.end_with?("-semi") or
|
566
|
+
type_nick.end_with?("-anti"))
|
567
|
+
if use_manual_outputs or is_filter_join
|
568
|
+
process_node = hash_join_node
|
569
|
+
elsif is_natural_join
|
570
|
+
process_node = join_merge_keys(plan, hash_join_node, right, keys)
|
571
|
+
elsif keys.is_a?(String) or keys.is_a?(Symbol)
|
572
|
+
process_node = join_merge_keys(plan, hash_join_node, right, [keys.to_s])
|
573
|
+
elsif !keys.is_a?(Hash) and (left_suffix != "" or right_suffix != "")
|
574
|
+
process_node = join_rename_keys(plan,
|
575
|
+
hash_join_node,
|
576
|
+
right,
|
577
|
+
keys,
|
578
|
+
left_suffix,
|
579
|
+
right_suffix)
|
580
|
+
else
|
581
|
+
process_node = hash_join_node
|
582
|
+
end
|
520
583
|
sink_node_options = SinkNodeOptions.new
|
521
|
-
plan.build_sink_node(
|
584
|
+
plan.build_sink_node(process_node, sink_node_options)
|
522
585
|
plan.validate
|
523
586
|
plan.start
|
524
587
|
plan.wait
|
525
|
-
reader = sink_node_options.get_reader(
|
526
|
-
reader.read_all
|
588
|
+
reader = sink_node_options.get_reader(process_node.output_schema)
|
589
|
+
table = reader.read_all
|
590
|
+
share_input(table)
|
591
|
+
table
|
527
592
|
end
|
528
593
|
|
529
594
|
alias_method :to_s_raw, :to_s
|
@@ -593,5 +658,87 @@ module Arrow
|
|
593
658
|
raise ArgumentError, message
|
594
659
|
end
|
595
660
|
end
|
661
|
+
|
662
|
+
def join_merge_keys(plan, input_node, right, keys)
|
663
|
+
expressions = []
|
664
|
+
names = []
|
665
|
+
normalized_keys = {}
|
666
|
+
keys.each do |key|
|
667
|
+
normalized_keys[key.to_s] = true
|
668
|
+
end
|
669
|
+
key_to_outputs = {}
|
670
|
+
outputs = []
|
671
|
+
left_n_column_names = column_names.size
|
672
|
+
column_names.each_with_index do |name, i|
|
673
|
+
is_key = normalized_keys.include?(name)
|
674
|
+
output = {is_key: is_key, name: name, index: i, direction: :left}
|
675
|
+
outputs << output
|
676
|
+
key_to_outputs[name] = {left: output} if is_key
|
677
|
+
end
|
678
|
+
right.column_names.each_with_index do |name, i|
|
679
|
+
index = left_n_column_names + i
|
680
|
+
is_key = normalized_keys.include?(name)
|
681
|
+
output = {is_key: is_key, name: name, index: index, direction: :right}
|
682
|
+
outputs << output
|
683
|
+
key_to_outputs[name][:right] = output if is_key
|
684
|
+
end
|
685
|
+
|
686
|
+
outputs.each do |output|
|
687
|
+
if output[:is_key]
|
688
|
+
next if output[:direction] == :right
|
689
|
+
left_output = key_to_outputs[output[:name]][:left]
|
690
|
+
right_output = key_to_outputs[output[:name]][:right]
|
691
|
+
left_field = FieldExpression.new("[#{left_output[:index]}]")
|
692
|
+
right_field = FieldExpression.new("[#{right_output[:index]}]")
|
693
|
+
is_left_null = CallExpression.new("is_null", [left_field])
|
694
|
+
merge_column = CallExpression.new("if_else",
|
695
|
+
[
|
696
|
+
is_left_null,
|
697
|
+
right_field,
|
698
|
+
left_field,
|
699
|
+
])
|
700
|
+
expressions << merge_column
|
701
|
+
else
|
702
|
+
expressions << FieldExpression.new("[#{output[:index]}]")
|
703
|
+
end
|
704
|
+
names << output[:name]
|
705
|
+
end
|
706
|
+
project_node_options = ProjectNodeOptions.new(expressions, names)
|
707
|
+
plan.build_project_node(input_node, project_node_options)
|
708
|
+
end
|
709
|
+
|
710
|
+
def join_rename_keys(plan,
|
711
|
+
input_node,
|
712
|
+
right,
|
713
|
+
keys,
|
714
|
+
left_suffix,
|
715
|
+
right_suffix)
|
716
|
+
expressions = []
|
717
|
+
names = []
|
718
|
+
normalized_keys = {}
|
719
|
+
keys.each do |key|
|
720
|
+
normalized_keys[key.to_s] = true
|
721
|
+
end
|
722
|
+
left_n_column_names = column_names.size
|
723
|
+
column_names.each_with_index do |name, i|
|
724
|
+
expressions << FieldExpression.new("[#{i}]")
|
725
|
+
if normalized_keys.include?(name)
|
726
|
+
names << "#{name}#{left_suffix}"
|
727
|
+
else
|
728
|
+
names << name
|
729
|
+
end
|
730
|
+
end
|
731
|
+
right.column_names.each_with_index do |name, i|
|
732
|
+
index = left_n_column_names + i
|
733
|
+
expressions << FieldExpression.new("[#{index}]")
|
734
|
+
if normalized_keys.include?(name)
|
735
|
+
names << "#{name}#{right_suffix}"
|
736
|
+
else
|
737
|
+
names << name
|
738
|
+
end
|
739
|
+
end
|
740
|
+
project_node_options = ProjectNodeOptions.new(expressions, names)
|
741
|
+
plan.build_project_node(input_node, project_node_options)
|
742
|
+
end
|
596
743
|
end
|
597
744
|
end
|
data/lib/arrow/tensor.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class UnionArrayBuilder
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
if is_valids
|
22
|
+
is_valids.each_with_index do |is_valid, i|
|
23
|
+
if is_valid
|
24
|
+
append_value(values[i])
|
25
|
+
else
|
26
|
+
append_null
|
27
|
+
end
|
28
|
+
end
|
29
|
+
else
|
30
|
+
values.each do |value|
|
31
|
+
append_value(value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
alias_method :append_child_raw, :append_child
|
37
|
+
def append_child(builder, filed_name=nil)
|
38
|
+
@child_infos = nil
|
39
|
+
append_child_raw(builder, field_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def child_infos
|
44
|
+
@child_infos ||= create_child_infos
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_child_infos
|
48
|
+
infos = {}
|
49
|
+
type = value_data_type
|
50
|
+
type.fields.zip(children, type.type_codes).each do |field, child, id|
|
51
|
+
infos[field.name] = {
|
52
|
+
builder: child,
|
53
|
+
id: id,
|
54
|
+
}
|
55
|
+
end
|
56
|
+
infos
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.extensions = ["ext/arrow/extconf.rb"]
|
48
48
|
|
49
49
|
spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
|
50
|
-
spec.add_runtime_dependency("extpp", ">= 0.
|
50
|
+
spec.add_runtime_dependency("extpp", ">= 0.1.1")
|
51
51
|
spec.add_runtime_dependency("gio2", ">= 3.5.0")
|
52
52
|
spec.add_runtime_dependency("native-package-installer")
|
53
53
|
spec.add_runtime_dependency("pkg-config")
|
@@ -117,6 +117,16 @@ module RawRecordsBasicArraysTests
|
|
117
117
|
assert_equal(records, target.raw_records)
|
118
118
|
end
|
119
119
|
|
120
|
+
def test_half_float
|
121
|
+
records = [
|
122
|
+
[-1.5],
|
123
|
+
[nil],
|
124
|
+
[1.5],
|
125
|
+
]
|
126
|
+
target = build({column: :half_float}, records)
|
127
|
+
assert_equal(records, target.raw_records)
|
128
|
+
end
|
129
|
+
|
120
130
|
def test_float
|
121
131
|
records = [
|
122
132
|
[-1.0],
|