red-arrow 10.0.1 → 12.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/arrow/converters.hpp +45 -41
- data/ext/arrow/extconf.rb +14 -2
- data/ext/arrow/raw-records.cpp +1 -2
- data/ext/arrow/values.cpp +1 -2
- data/lib/arrow/array-computable.rb +13 -0
- data/lib/arrow/array.rb +5 -0
- data/lib/arrow/chunked-array.rb +23 -1
- data/lib/arrow/column-containable.rb +9 -0
- data/lib/arrow/column.rb +1 -0
- data/lib/arrow/data-type.rb +9 -0
- data/lib/arrow/dense-union-array-builder.rb +49 -0
- data/lib/arrow/dense-union-array.rb +26 -0
- data/lib/arrow/half-float-array-builder.rb +32 -0
- data/lib/arrow/half-float-array.rb +24 -0
- data/lib/arrow/half-float.rb +118 -0
- data/lib/arrow/input-referable.rb +29 -0
- data/lib/arrow/loader.rb +10 -0
- data/lib/arrow/raw-table-converter.rb +7 -5
- data/lib/arrow/record-batch-file-reader.rb +2 -0
- data/lib/arrow/record-batch-stream-reader.rb +2 -0
- data/lib/arrow/record-batch.rb +6 -2
- data/lib/arrow/scalar.rb +67 -0
- data/lib/arrow/slicer.rb +61 -0
- data/lib/arrow/sparse-union-array-builder.rb +56 -0
- data/lib/arrow/sparse-union-array.rb +26 -0
- data/lib/arrow/struct-array-builder.rb +0 -5
- data/lib/arrow/table-loader.rb +4 -4
- data/lib/arrow/table-saver.rb +1 -0
- data/lib/arrow/table.rb +178 -31
- data/lib/arrow/tensor.rb +4 -0
- data/lib/arrow/union-array-builder.rb +59 -0
- data/lib/arrow/version.rb +1 -1
- data/red-arrow.gemspec +1 -1
- data/test/raw-records/test-basic-arrays.rb +10 -0
- data/test/raw-records/test-dense-union-array.rb +90 -45
- data/test/raw-records/test-list-array.rb +28 -10
- data/test/raw-records/test-map-array.rb +39 -10
- data/test/raw-records/test-sparse-union-array.rb +86 -41
- data/test/raw-records/test-struct-array.rb +22 -8
- data/test/test-array.rb +7 -0
- data/test/test-chunked-array.rb +9 -0
- data/test/test-data-type.rb +2 -1
- data/test/test-dense-union-array.rb +42 -0
- data/test/test-dense-union-data-type.rb +1 -1
- data/test/test-function.rb +7 -7
- data/test/test-group.rb +58 -58
- data/test/test-half-float-array.rb +43 -0
- data/test/test-half-float.rb +130 -0
- data/test/test-record-batch-file-reader.rb +21 -0
- data/test/test-record-batch-stream-reader.rb +129 -0
- data/test/test-scalar.rb +65 -0
- data/test/test-slicer.rb +194 -129
- data/test/test-sparse-union-array.rb +38 -0
- data/test/test-table.rb +324 -40
- data/test/values/test-basic-arrays.rb +10 -0
- data/test/values/test-dense-union-array.rb +88 -45
- data/test/values/test-list-array.rb +26 -10
- data/test/values/test-map-array.rb +33 -10
- data/test/values/test-sparse-union-array.rb +84 -41
- data/test/values/test-struct-array.rb +20 -8
- metadata +30 -9
data/lib/arrow/table.rb
CHANGED
@@ -22,6 +22,7 @@ module Arrow
|
|
22
22
|
include ColumnContainable
|
23
23
|
include GenericFilterable
|
24
24
|
include GenericTakeable
|
25
|
+
include InputReferable
|
25
26
|
include RecordContainable
|
26
27
|
|
27
28
|
class << self
|
@@ -188,6 +189,7 @@ module Arrow
|
|
188
189
|
|
189
190
|
reader = TableBatchReader.new(self)
|
190
191
|
while record_batch = reader.read_next
|
192
|
+
share_input(record_batch)
|
191
193
|
yield(record_batch)
|
192
194
|
end
|
193
195
|
end
|
@@ -314,8 +316,6 @@ module Arrow
|
|
314
316
|
end
|
315
317
|
end
|
316
318
|
|
317
|
-
filter_options = Arrow::FilterOptions.new
|
318
|
-
filter_options.null_selection_behavior = :emit_null
|
319
319
|
sliced_tables = []
|
320
320
|
slicers.each do |slicer|
|
321
321
|
slicer = slicer.evaluate if slicer.respond_to?(:evaluate)
|
@@ -337,7 +337,7 @@ module Arrow
|
|
337
337
|
to += n_rows if to < 0
|
338
338
|
sliced_tables << slice_by_range(from, to)
|
339
339
|
when ::Array, BooleanArray, ChunkedArray
|
340
|
-
sliced_tables << filter(slicer
|
340
|
+
sliced_tables << filter(slicer)
|
341
341
|
else
|
342
342
|
message = "slicer must be Integer, Range, (from, to), " +
|
343
343
|
"Arrow::ChunkedArray of Arrow::BooleanArray, " +
|
@@ -346,10 +346,12 @@ module Arrow
|
|
346
346
|
end
|
347
347
|
end
|
348
348
|
if sliced_tables.size > 1
|
349
|
-
sliced_tables[0].concatenate(sliced_tables[1..-1])
|
349
|
+
sliced_table = sliced_tables[0].concatenate(sliced_tables[1..-1])
|
350
350
|
else
|
351
|
-
sliced_tables[0]
|
351
|
+
sliced_table = sliced_tables[0]
|
352
352
|
end
|
353
|
+
share_input(sliced_table)
|
354
|
+
sliced_table
|
353
355
|
end
|
354
356
|
|
355
357
|
# TODO
|
@@ -401,7 +403,9 @@ module Arrow
|
|
401
403
|
new_fields << new_column[:field]
|
402
404
|
new_arrays << new_column[:data]
|
403
405
|
end
|
404
|
-
self.class.new(new_fields, new_arrays)
|
406
|
+
table = self.class.new(new_fields, new_arrays)
|
407
|
+
share_input(table)
|
408
|
+
table
|
405
409
|
end
|
406
410
|
|
407
411
|
alias_method :remove_column_raw, :remove_column
|
@@ -423,7 +427,9 @@ module Arrow
|
|
423
427
|
raise IndexError.new(message)
|
424
428
|
end
|
425
429
|
end
|
426
|
-
remove_column_raw(index)
|
430
|
+
table = remove_column_raw(index)
|
431
|
+
share_input(table)
|
432
|
+
table
|
427
433
|
end
|
428
434
|
|
429
435
|
# Experimental
|
@@ -445,43 +451,69 @@ module Arrow
|
|
445
451
|
packed_arrays = columns.collect do |column|
|
446
452
|
column.data.pack
|
447
453
|
end
|
448
|
-
self.class.new(schema, packed_arrays)
|
454
|
+
table = self.class.new(schema, packed_arrays)
|
455
|
+
share_input(table)
|
456
|
+
table
|
449
457
|
end
|
450
458
|
|
451
|
-
#
|
452
|
-
#
|
453
|
-
#
|
459
|
+
# Join another Table by matching with keys.
|
460
|
+
#
|
461
|
+
# @!macro join_common_before
|
462
|
+
# @param right [Arrow::Table] The right table.
|
463
|
+
#
|
464
|
+
# Join columns with `right` on join key columns.
|
454
465
|
#
|
455
|
-
#
|
466
|
+
# @!macro join_common_after
|
467
|
+
# @param type [Arrow::JoinType] How to join.
|
468
|
+
# @param left_outputs [::Array<String, Symbol>] Output columns in
|
469
|
+
# `self`.
|
456
470
|
#
|
457
|
-
#
|
458
|
-
#
|
459
|
-
#
|
460
|
-
#
|
471
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
472
|
+
# specified, all columns in `self` and `right` are
|
473
|
+
# output.
|
474
|
+
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
475
|
+
# `right`.
|
476
|
+
#
|
477
|
+
# If both of `left_outputs` and `right_outputs` aren't
|
478
|
+
# specified, all columns in `self` and `right` are
|
479
|
+
# output.
|
480
|
+
# @return [Arrow::Table]
|
481
|
+
# The joined `Arrow::Table`.
|
461
482
|
#
|
462
|
-
#
|
463
|
-
#
|
464
|
-
#
|
465
|
-
# @param right_outputs [::Array<String, Symbol>] Output columns in
|
466
|
-
# `right`.
|
483
|
+
# @overload join(right, type: :inner, left_outputs: nil, right_outputs: nil)
|
484
|
+
# If key(s) are not supplied, common keys in self and right are used
|
485
|
+
# (natural join).
|
467
486
|
#
|
468
|
-
#
|
469
|
-
#
|
470
|
-
#
|
471
|
-
#
|
472
|
-
#
|
487
|
+
# Column used as keys are merged and remain in left side
|
488
|
+
# when both of `left_outputs` and `right_outputs` are `nil`.
|
489
|
+
#
|
490
|
+
# @macro join_common_before
|
491
|
+
# @macro join_common_after
|
492
|
+
#
|
493
|
+
# @since 11.0.0
|
494
|
+
#
|
495
|
+
# @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil)
|
496
|
+
# Join right by a key.
|
497
|
+
#
|
498
|
+
# Column used as keys are merged and remain in left side
|
499
|
+
# when both of `left_outputs` and `right_outputs` are `nil`.
|
473
500
|
#
|
474
501
|
# @macro join_common_before
|
475
502
|
# @param key [String, Symbol] A join key.
|
476
503
|
# @macro join_common_after
|
477
504
|
#
|
478
|
-
# @overload join(right, keys, type: :inner,
|
505
|
+
# @overload join(right, keys, type: :inner, left_suffix: "", right_suffix: "",
|
506
|
+
# left_outputs: nil, right_outputs: nil)
|
507
|
+
# Join right by keys.
|
508
|
+
#
|
509
|
+
# Column name can be renamed by appending `left_suffix` or `right_suffix`.
|
479
510
|
#
|
480
511
|
# @macro join_common_before
|
481
512
|
# @param keys [::Array<String, Symbol>] Join keys.
|
482
513
|
# @macro join_common_after
|
483
514
|
#
|
484
515
|
# @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil)
|
516
|
+
# Join right by a key or keys mapped by a hash.
|
485
517
|
#
|
486
518
|
# @macro join_common_before
|
487
519
|
# @param keys [Hash] Specify join keys in `self` and `right` separately.
|
@@ -492,7 +524,16 @@ module Arrow
|
|
492
524
|
# @macro join_common_after
|
493
525
|
#
|
494
526
|
# @since 7.0.0
|
495
|
-
def join(right,
|
527
|
+
def join(right,
|
528
|
+
keys=nil,
|
529
|
+
type: :inner,
|
530
|
+
left_suffix: "",
|
531
|
+
right_suffix: "",
|
532
|
+
left_outputs: nil,
|
533
|
+
right_outputs: nil)
|
534
|
+
is_natural_join = keys.nil?
|
535
|
+
keys ||= (column_names & right.column_names)
|
536
|
+
type = JoinType.try_convert(type) || type
|
496
537
|
plan = ExecutePlan.new
|
497
538
|
left_node = plan.build_source_node(self)
|
498
539
|
right_node = plan.build_source_node(right)
|
@@ -508,22 +549,46 @@ module Arrow
|
|
508
549
|
hash_join_node_options = HashJoinNodeOptions.new(type,
|
509
550
|
left_keys,
|
510
551
|
right_keys)
|
552
|
+
use_manual_outputs = false
|
511
553
|
unless left_outputs.nil?
|
512
554
|
hash_join_node_options.left_outputs = left_outputs
|
555
|
+
use_manual_outputs = true
|
513
556
|
end
|
514
557
|
unless right_outputs.nil?
|
515
558
|
hash_join_node_options.right_outputs = right_outputs
|
559
|
+
use_manual_outputs = true
|
516
560
|
end
|
517
561
|
hash_join_node = plan.build_hash_join_node(left_node,
|
518
562
|
right_node,
|
519
563
|
hash_join_node_options)
|
564
|
+
type_nick = type.nick
|
565
|
+
is_filter_join = (type_nick.end_with?("-semi") or
|
566
|
+
type_nick.end_with?("-anti"))
|
567
|
+
if use_manual_outputs or is_filter_join
|
568
|
+
process_node = hash_join_node
|
569
|
+
elsif is_natural_join
|
570
|
+
process_node = join_merge_keys(plan, hash_join_node, right, keys)
|
571
|
+
elsif keys.is_a?(String) or keys.is_a?(Symbol)
|
572
|
+
process_node = join_merge_keys(plan, hash_join_node, right, [keys.to_s])
|
573
|
+
elsif !keys.is_a?(Hash) and (left_suffix != "" or right_suffix != "")
|
574
|
+
process_node = join_rename_keys(plan,
|
575
|
+
hash_join_node,
|
576
|
+
right,
|
577
|
+
keys,
|
578
|
+
left_suffix,
|
579
|
+
right_suffix)
|
580
|
+
else
|
581
|
+
process_node = hash_join_node
|
582
|
+
end
|
520
583
|
sink_node_options = SinkNodeOptions.new
|
521
|
-
plan.build_sink_node(
|
584
|
+
plan.build_sink_node(process_node, sink_node_options)
|
522
585
|
plan.validate
|
523
586
|
plan.start
|
524
587
|
plan.wait
|
525
|
-
reader = sink_node_options.get_reader(
|
526
|
-
reader.read_all
|
588
|
+
reader = sink_node_options.get_reader(process_node.output_schema)
|
589
|
+
table = reader.read_all
|
590
|
+
share_input(table)
|
591
|
+
table
|
527
592
|
end
|
528
593
|
|
529
594
|
alias_method :to_s_raw, :to_s
|
@@ -593,5 +658,87 @@ module Arrow
|
|
593
658
|
raise ArgumentError, message
|
594
659
|
end
|
595
660
|
end
|
661
|
+
|
662
|
+
def join_merge_keys(plan, input_node, right, keys)
|
663
|
+
expressions = []
|
664
|
+
names = []
|
665
|
+
normalized_keys = {}
|
666
|
+
keys.each do |key|
|
667
|
+
normalized_keys[key.to_s] = true
|
668
|
+
end
|
669
|
+
key_to_outputs = {}
|
670
|
+
outputs = []
|
671
|
+
left_n_column_names = column_names.size
|
672
|
+
column_names.each_with_index do |name, i|
|
673
|
+
is_key = normalized_keys.include?(name)
|
674
|
+
output = {is_key: is_key, name: name, index: i, direction: :left}
|
675
|
+
outputs << output
|
676
|
+
key_to_outputs[name] = {left: output} if is_key
|
677
|
+
end
|
678
|
+
right.column_names.each_with_index do |name, i|
|
679
|
+
index = left_n_column_names + i
|
680
|
+
is_key = normalized_keys.include?(name)
|
681
|
+
output = {is_key: is_key, name: name, index: index, direction: :right}
|
682
|
+
outputs << output
|
683
|
+
key_to_outputs[name][:right] = output if is_key
|
684
|
+
end
|
685
|
+
|
686
|
+
outputs.each do |output|
|
687
|
+
if output[:is_key]
|
688
|
+
next if output[:direction] == :right
|
689
|
+
left_output = key_to_outputs[output[:name]][:left]
|
690
|
+
right_output = key_to_outputs[output[:name]][:right]
|
691
|
+
left_field = FieldExpression.new("[#{left_output[:index]}]")
|
692
|
+
right_field = FieldExpression.new("[#{right_output[:index]}]")
|
693
|
+
is_left_null = CallExpression.new("is_null", [left_field])
|
694
|
+
merge_column = CallExpression.new("if_else",
|
695
|
+
[
|
696
|
+
is_left_null,
|
697
|
+
right_field,
|
698
|
+
left_field,
|
699
|
+
])
|
700
|
+
expressions << merge_column
|
701
|
+
else
|
702
|
+
expressions << FieldExpression.new("[#{output[:index]}]")
|
703
|
+
end
|
704
|
+
names << output[:name]
|
705
|
+
end
|
706
|
+
project_node_options = ProjectNodeOptions.new(expressions, names)
|
707
|
+
plan.build_project_node(input_node, project_node_options)
|
708
|
+
end
|
709
|
+
|
710
|
+
def join_rename_keys(plan,
|
711
|
+
input_node,
|
712
|
+
right,
|
713
|
+
keys,
|
714
|
+
left_suffix,
|
715
|
+
right_suffix)
|
716
|
+
expressions = []
|
717
|
+
names = []
|
718
|
+
normalized_keys = {}
|
719
|
+
keys.each do |key|
|
720
|
+
normalized_keys[key.to_s] = true
|
721
|
+
end
|
722
|
+
left_n_column_names = column_names.size
|
723
|
+
column_names.each_with_index do |name, i|
|
724
|
+
expressions << FieldExpression.new("[#{i}]")
|
725
|
+
if normalized_keys.include?(name)
|
726
|
+
names << "#{name}#{left_suffix}"
|
727
|
+
else
|
728
|
+
names << name
|
729
|
+
end
|
730
|
+
end
|
731
|
+
right.column_names.each_with_index do |name, i|
|
732
|
+
index = left_n_column_names + i
|
733
|
+
expressions << FieldExpression.new("[#{index}]")
|
734
|
+
if normalized_keys.include?(name)
|
735
|
+
names << "#{name}#{right_suffix}"
|
736
|
+
else
|
737
|
+
names << name
|
738
|
+
end
|
739
|
+
end
|
740
|
+
project_node_options = ProjectNodeOptions.new(expressions, names)
|
741
|
+
plan.build_project_node(input_node, project_node_options)
|
742
|
+
end
|
596
743
|
end
|
597
744
|
end
|
data/lib/arrow/tensor.rb
CHANGED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
module Arrow
|
19
|
+
class UnionArrayBuilder
|
20
|
+
def append_values(values, is_valids=nil)
|
21
|
+
if is_valids
|
22
|
+
is_valids.each_with_index do |is_valid, i|
|
23
|
+
if is_valid
|
24
|
+
append_value(values[i])
|
25
|
+
else
|
26
|
+
append_null
|
27
|
+
end
|
28
|
+
end
|
29
|
+
else
|
30
|
+
values.each do |value|
|
31
|
+
append_value(value)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
alias_method :append_child_raw, :append_child
|
37
|
+
def append_child(builder, filed_name=nil)
|
38
|
+
@child_infos = nil
|
39
|
+
append_child_raw(builder, field_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def child_infos
|
44
|
+
@child_infos ||= create_child_infos
|
45
|
+
end
|
46
|
+
|
47
|
+
def create_child_infos
|
48
|
+
infos = {}
|
49
|
+
type = value_data_type
|
50
|
+
type.fields.zip(children, type.type_codes).each do |field, child, id|
|
51
|
+
infos[field.name] = {
|
52
|
+
builder: child,
|
53
|
+
id: id,
|
54
|
+
}
|
55
|
+
end
|
56
|
+
infos
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/arrow/version.rb
CHANGED
data/red-arrow.gemspec
CHANGED
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.extensions = ["ext/arrow/extconf.rb"]
|
48
48
|
|
49
49
|
spec.add_runtime_dependency("bigdecimal", ">= 3.1.0")
|
50
|
-
spec.add_runtime_dependency("extpp", ">= 0.
|
50
|
+
spec.add_runtime_dependency("extpp", ">= 0.1.1")
|
51
51
|
spec.add_runtime_dependency("gio2", ">= 3.5.0")
|
52
52
|
spec.add_runtime_dependency("native-package-installer")
|
53
53
|
spec.add_runtime_dependency("pkg-config")
|
@@ -117,6 +117,16 @@ module RawRecordsBasicArraysTests
|
|
117
117
|
assert_equal(records, target.raw_records)
|
118
118
|
end
|
119
119
|
|
120
|
+
def test_half_float
|
121
|
+
records = [
|
122
|
+
[-1.5],
|
123
|
+
[nil],
|
124
|
+
[1.5],
|
125
|
+
]
|
126
|
+
target = build({column: :half_float}, records)
|
127
|
+
assert_equal(records, target.raw_records)
|
128
|
+
end
|
129
|
+
|
120
130
|
def test_float
|
121
131
|
records = [
|
122
132
|
[-1.0],
|