red_amber 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/docker/example CHANGED
@@ -1,46 +1,46 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- $stderr.print "starting.\r"
4
+ print "starting.\r"
5
5
 
6
- require 'bundler/setup'
6
+ Dir.chdir(__dir__) { require 'bundler/setup' }
7
7
 
8
- $stderr.print "starting..\r"
8
+ print "starting..\r"
9
9
  require 'red_amber'
10
10
  include RedAmber
11
11
 
12
- $stderr.print "starting...\r"
12
+ print "starting...\r"
13
13
  require 'datasets-arrow'
14
14
 
15
- $stderr.print "reading penguins...\r"
15
+ print "reading penguins...\r"
16
16
  penguins = DataFrame.new(Datasets::Penguins.new)
17
17
 
18
- $stderr.print "reading diamonds...\r"
18
+ print "reading diamonds...\r"
19
19
  diamonds = DataFrame.new(Datasets::Diamonds.new)
20
20
 
21
- $stderr.print "reading starwars...\r"
21
+ print "reading starwars...\r"
22
22
  starwars = DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars'))
23
23
 
24
- $stderr.print "reading openintro/simpsons_paradox_covid...\r"
24
+ print "reading openintro/simpsons_paradox_covid...\r"
25
25
  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
26
  simpsons_paradox_covid = DataFrame.new(ds.to_arrow)
27
27
 
28
- $stderr.print "reading mtcars... \r"
28
+ print "reading mtcars... \r"
29
29
  mtcars = DataFrame.new(Datasets::Rdatasets.new('datasets', 'mtcars'))
30
30
 
31
- $stderr.print "reading iris... \r"
31
+ print "reading iris... \r"
32
32
  iris = DataFrame.new(Datasets::Iris.new)
33
33
 
34
- $stderr.print "reading band_members...\r"
34
+ print "reading band_members...\r"
35
35
  band_members = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_members'))
36
36
 
37
- $stderr.print "reading band_instruments...\r"
37
+ print "reading band_instruments...\r"
38
38
  band_instruments = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_instruments'))
39
39
 
40
- $stderr.print "reading band_instruments2...\r"
40
+ print "reading band_instruments2...\r"
41
41
  band_instruments2 = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_instruments2'))
42
42
 
43
- $stderr.print "reading import_cars... \r"
43
+ print "reading import_cars... \r"
44
44
  import_cars = DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
45
45
  Year Audi BMW BMW_MINI Mercedes-Benz VW
46
46
  2017 28336 52527 25427 68221 49040
@@ -50,7 +50,7 @@ import_cars = DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
50
50
  2021 22535 35905 18211 51722 35215
51
51
  TSV
52
52
 
53
- $stderr.print "reading comecome... \r"
53
+ print "reading comecome... \r"
54
54
  comecome = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
55
55
  name,age
56
56
  Yasuko,68
@@ -58,7 +58,19 @@ comecome = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
58
58
  Hinata,28
59
59
  CSV
60
60
 
61
- $stderr.print "reading general dataframe and subframes...\r"
61
+ print "reading rubykaigi... \r"
62
+ rubykaigi = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
63
+ year,venue,prefecture,city,venue_en
64
+ 2015,ベルサール汐留,東京都,中央区,"Bellesalle Shiodome"
65
+ 2016,京都国際会議場,京都府,京都市左京区,"Kyoto International Conference Center"
66
+ 2017,広島国際会議場,広島県,広島市中区,"International Conference Center Hiroshima"
67
+ 2018,仙台国際センター,宮城県,仙台市青葉区,"Sendai International Center"
68
+ 2019,福岡国際会議場,福岡県,福岡市博多区,"Fukuoka International Congress Center"
69
+ 2022,三重県総合文化センター,三重県,津市,"Mie Center for the Arts"
70
+ 2023,松本市民芸術館,長野県,松本市,"Matsumoto Performing Arts Centre"
71
+ CSV
72
+
73
+ print "reading general dataframe and subframes...\r"
62
74
  dataframe = DataFrame.new(
63
75
  x: [*1..6],
64
76
  y: %w[A A B B B C],
@@ -70,5 +82,5 @@ subframes = SubFrames.new(dataframe, [[0, 1], [2, 3, 4], [5]])
70
82
  # This environment will offer these pre-loaded datasets:
71
83
  # penguins, diamonds, iris, starwars, simpsons_paradox_covid,
72
84
  # mtcars, band_members, band_instruments, band_instruments2
73
- # (original) import_cars, comecome, dataframe, subframes
85
+ # import_cars, comecome, rubykaigi, dataframe, subframes
74
86
  binding.irb
@@ -422,12 +422,12 @@ module RedAmber
422
422
  # Create SubFrames by value grouping.
423
423
  #
424
424
  # [Experimental feature] this method may be removed or be changed in the future.
425
- # @param keys [Symbol, String, Array<Symbol, String>]
425
+ # @param keys [List<Symbol, String>, Array<Symbol, String>]
426
426
  # grouping keys.
427
427
  # @return [SubFrames]
428
428
  # a created SubFrames grouped by column values on `keys`.
429
429
  # @example
430
- # df.sub_by_value(keys: :y)
430
+ # df.sub_by_value(:y)
431
431
  #
432
432
  # # =>
433
433
  # #<RedAmber::SubFrames : 0x000000000000fc08>
@@ -454,10 +454,11 @@ module RedAmber
454
454
  #
455
455
  # @since 0.4.0
456
456
  #
457
- def sub_by_value(keys: nil)
458
- SubFrames.new(self, group(keys).filters)
457
+ def sub_by_value(*keys)
458
+ SubFrames.new(self, group(keys.flatten).filters)
459
459
  end
460
460
  alias_method :subframes_by_value, :sub_by_value
461
+ alias_method :sub_group, :sub_by_value
461
462
 
462
463
  # Create SubFrames by Windowing with `from`, `size` and `step`.
463
464
  #
@@ -697,6 +698,79 @@ module RedAmber
697
698
  end
698
699
  end
699
700
 
701
+ # Returns a Vector such that all elements have value `scalar`
702
+ # and have same size as self.
703
+ #
704
+ # @overload propagate(scalar)
705
+ # Specifies scalar as an agrument.
706
+ #
707
+ # @param scalar [scalar]
708
+ # a value to propagate in Vector.
709
+ # @return [Vector]
710
+ # created Vector.
711
+ # @example propagate a value
712
+ # df
713
+ # # =>
714
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000849a4>
715
+ # x y z
716
+ # <uint8> <string> <boolean>
717
+ # 0 1 A false
718
+ # 1 2 A true
719
+ # 2 3 B false
720
+ # 3 4 B (nil)
721
+ # 4 5 B true
722
+ # 5 6 C false
723
+ #
724
+ # df.assign(:sum_x) { propagate(x.sum) }
725
+ # # =>
726
+ # #<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000007bd04>
727
+ # x y z sum_x
728
+ # <uint8> <string> <boolean> <uint8>
729
+ # 0 1 A false 21
730
+ # 1 2 A true 21
731
+ # 2 3 B false 21
732
+ # 3 4 B (nil) 21
733
+ # 4 5 B true 21
734
+ # 5 6 C false 21
735
+ #
736
+ # # Using `Vector#propagate` like below has same result as above.
737
+ # df.assign(:sum_x) { x.propagate(:sum) }
738
+ #
739
+ # # Also it is same as creating column from an Array.
740
+ # df.assign(:sum_x) { [x.sum] * size }
741
+ #
742
+ # @overload propagate
743
+ #
744
+ # @yieldparam self [DataFrame]
745
+ # gives self to the block.
746
+ # @yieldreturn [scalar]
747
+ # a value to propagate in Vector
748
+ # @return [Vector]
749
+ # created Vector.
750
+ # @example propagate the value from the block
751
+ # df.assign(:range) { propagate { x.max - x.min } }
752
+ # # =>
753
+ # #<RedAmber::DataFrame : 6 x 4 Vectors, 0x00000000000e603c>
754
+ # x y z range
755
+ # <uint8> <string> <boolean> <uint8>
756
+ # 0 1 A false 5
757
+ # 1 2 A true 5
758
+ # 2 3 B false 5
759
+ # 3 4 B (nil) 5
760
+ # 4 5 B true 5
761
+ # 5 6 C false 5
762
+ #
763
+ # @since 0.5.0
764
+ #
765
+ def propagate(scalar = nil, &block)
766
+ if block
767
+ raise VectorArgumentError, "can't specify both function and block" if scalar
768
+
769
+ scalar = instance_eval(&block)
770
+ end
771
+ Vector.new([scalar] * size)
772
+ end
773
+
700
774
  # Catch variable (column) key as method name.
701
775
  def method_missing(name, *args, &block)
702
776
  return variables[name] if args.empty? && key?(name)
@@ -221,6 +221,11 @@ module RedAmber
221
221
  # - Same as `#join` with `type: :inner`
222
222
  # - A kind of mutating join.
223
223
  #
224
+ # @note the order of joined results will be preserved by default.
225
+ # This is enabled by appending index column to sort after joining but
226
+ # it will cause some performance degradation. If you don't matter
227
+ # the order of the result, set `force_order` option to `false`.
228
+ #
224
229
  # @overload inner_join(other, suffix: '.1', force_order: true)
225
230
  # If `join_key` is not specified, common keys in self and other are used
226
231
  # (natural keys). Returns joined dataframe.
@@ -280,6 +285,11 @@ module RedAmber
280
285
  # - Same as `#join` with `type: :full_outer`
281
286
  # - A kind of mutating join.
282
287
  #
288
+ # @note the order of joined results will be preserved by default.
289
+ # This is enabled by appending index column to sort after joining but
290
+ # it will cause some performance degradation. If you don't matter
291
+ # the order of the result, set `force_order` option to `false`.
292
+ #
283
293
  # @overload full_join(other, suffix: '.1', force_order: true)
284
294
  # If `join_key` is not specified, common keys in self and other are used
285
295
  # (natural keys). Returns joined dataframe.
@@ -348,6 +358,11 @@ module RedAmber
348
358
  # - Same as `#join` with `type: :left_outer`
349
359
  # - A kind of mutating join.
350
360
  #
361
+ # @note the order of joined results will be preserved by default.
362
+ # This is enabled by appending index column to sort after joining but
363
+ # it will cause some performance degradation. If you don't matter
364
+ # the order of the result, set `force_order` option to `false`.
365
+ #
351
366
  # @overload left_join(other, suffix: '.1', force_order: true)
352
367
  # If `join_key` is not specified, common keys in self and other are used
353
368
  # (natural keys). Returns joined dataframe.
@@ -410,6 +425,11 @@ module RedAmber
410
425
  # - Same as `#join` with `type: :right_outer`
411
426
  # - A kind of mutating join.
412
427
  #
428
+ # @note the order of joined results will be preserved by default.
429
+ # This is enabled by appending index column to sort after joining but
430
+ # it will cause some performance degradation. If you don't matter
431
+ # the order of the result, set `force_order` option to `false`.
432
+ #
413
433
  # @overload right_join(other, suffix: '.1', force_order: true)
414
434
  # If `join_key` is not specified, common keys in self and other are used
415
435
  # (natural keys). Returns joined dataframe.
@@ -422,11 +442,11 @@ module RedAmber
422
442
  # df.right_join(other)
423
443
  #
424
444
  # # =>
425
- # KEY X1 X2
426
- # <string> <uint8> <boolean>
427
- # 0 A 1 true
428
- # 1 B 2 false
429
- # 2 D (nil) (nil)
445
+ # X1 KEY X2
446
+ # <uint8> <string> <boolean>
447
+ # 0 1 A true
448
+ # 1 2 B false
449
+ # 2 (nil) D (nil)
430
450
  #
431
451
  # @overload right_join(other, join_keys, suffix: '.1', force_order: true)
432
452
  #
@@ -439,11 +459,11 @@ module RedAmber
439
459
  # df.right_join(other, :KEY)
440
460
  #
441
461
  # # =>
442
- # KEY X1 X2
443
- # <string> <uint8> <boolean>
444
- # 0 A 1 true
445
- # 1 B 2 false
446
- # 2 D (nil) (nil)
462
+ # X1 KEY X2
463
+ # <uint8> <string> <boolean>
464
+ # 0 1 A true
465
+ # 1 2 B false
466
+ # 2 (nil) D (nil)
447
467
  #
448
468
  # @overload right_join(other, join_key_pairs, suffix: '.1', force_order: true)
449
469
  #
@@ -456,11 +476,11 @@ module RedAmber
456
476
  # df2.right_join(other2, { left: :KEY1, right: :KEY2 })
457
477
  #
458
478
  # # =>
459
- # KEY1 X1 X2
460
- # <string> <uint8> <boolean>
461
- # 0 A 1 true
462
- # 1 B 2 false
463
- # 2 D (nil) (nil)
479
+ # X1 KEY2 X2
480
+ # <uint8> >string> <boolean>
481
+ # 0 1 A true
482
+ # 1 2 B false
483
+ # 2 (nil) D (nil)
464
484
  #
465
485
  # @since 0.2.3
466
486
  #
@@ -480,6 +500,11 @@ module RedAmber
480
500
  # - Same as `#join` with `type: :left_semi`
481
501
  # - A kind of filtering join.
482
502
  #
503
+ # @note the order of joined results will be preserved by default.
504
+ # This is enabled by appending index column to sort after joining but
505
+ # it will cause some performance degradation. If you don't matter
506
+ # the order of the result, set `force_order` option to `false`.
507
+ #
483
508
  # @overload semi_join(other, suffix: '.1', force_order: true)
484
509
  # If `join_key` is not specified, common keys in self and other are used
485
510
  # (natural keys). Returns joined dataframe.
@@ -539,6 +564,11 @@ module RedAmber
539
564
  # - Same as `#join` with `type: :left_anti`
540
565
  # - A kind of filtering join.
541
566
  #
567
+ # @note the order of joined results will be preserved by default.
568
+ # This is enabled by appending index column to sort after joining but
569
+ # it will cause some performance degradation. If you don't matter
570
+ # the order of the result, set `force_order` option to `false`.
571
+ #
542
572
  # @overload anti_join(other, suffix: '.1', force_order: true)
543
573
  # If `join_key` is not specified, common keys in self and other are used
544
574
  # (natural keys). Returns joined dataframe.
@@ -661,7 +691,7 @@ module RedAmber
661
691
  raise DataFrameArgumentError, 'keys are not same with self and other'
662
692
  end
663
693
 
664
- join(other, keys, type: :full_outer)
694
+ join(other, keys, type: :full_outer, force_order: true)
665
695
  end
666
696
 
667
697
  # Select records appearing in self but not in other.
@@ -733,12 +763,12 @@ module RedAmber
733
763
  # 1 B E
734
764
  # 2 C F
735
765
 
736
- # @note the order of joined results will be preserved by default.
737
- # This is enabled by appending index column to sort after joining but
738
- # it will cause some performance degradation. If you don't matter
739
- # the order of the result, set `force_order` option to `false`.
766
+ # @note the order of joined results may not be preserved by default.
767
+ # if you prefer to preserve the order of the result, set `force_order` option
768
+ # to `true`. This is enabled by appending index column to sort after joining
769
+ # so it will cause some performance degradation.
740
770
  #
741
- # @overload join(other, type: :inner, suffix: '.1', force_order: true)
771
+ # @overload join(other, type: :inner, suffix: '.1', force_order: false)
742
772
  #
743
773
  # If `join_key` is not specified, common keys in self and other are used
744
774
  # (natural keys). Returns joined dataframe.
@@ -767,7 +797,7 @@ module RedAmber
767
797
  # 2 C 3 (nil)
768
798
  # 3 D (nil) (nil)
769
799
  #
770
- # @overload join(other, join_keys, type: :inner, suffix: '.1', force_order: true)
800
+ # @overload join(other, join_keys, type: :inner, suffix: '.1', force_order: false)
771
801
  #
772
802
  # @macro join_before
773
803
  # @macro join_key_in_array
@@ -792,7 +822,8 @@ module RedAmber
792
822
  # 0 A 1 1
793
823
  # 1 B 2 4
794
824
  #
795
- # @overload join(other, join_key_pairs, type: :inner, suffix: '.1', force_order: true)
825
+ # @overload join(
826
+ # other, join_key_pairs, type: :inner, suffix: '.1', force_order: false)
796
827
  #
797
828
  # @macro join_before
798
829
  # @macro join_key_in_hash
@@ -828,7 +859,8 @@ module RedAmber
828
859
  #
829
860
  # @since 0.2.3
830
861
  #
831
- def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: true)
862
+ def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: false)
863
+ left_table = table
832
864
  right_table =
833
865
  case other
834
866
  when DataFrame
@@ -839,24 +871,26 @@ module RedAmber
839
871
  raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
840
872
  end
841
873
 
842
- type = type.to_sym
843
- left_index = :__LEFT_INDEX__
844
- right_index = :__RIGHT_INDEX__
845
874
  if force_order
875
+ left_index = :__LEFT_INDEX__
876
+ right_index = :__RIGHT_INDEX__
846
877
  left_table = assign(left_index) { indices }.table
847
878
  other = DataFrame.create(other) if other.is_a?(Arrow::Table)
848
879
  right_table = other.assign(right_index) { indices }.table
849
- else
850
- left_table = table
851
880
  end
852
881
 
853
- table_keys = left_table.keys
854
- other_keys = right_table.keys
855
-
882
+ left_table_keys = ensure_keys(left_table.keys)
883
+ right_table_keys = ensure_keys(right_table.keys)
856
884
  # natural keys (implicit common keys)
857
- join_keys ||= table_keys.intersection(other_keys)
885
+ join_keys ||= left_table_keys.intersection(right_table_keys)
886
+
887
+ type = Arrow::JoinType.try_convert(type) || type
888
+ type_nick = type.nick
889
+
890
+ plan = Arrow::ExecutePlan.new
891
+ left_node = plan.build_source_node(left_table)
892
+ right_node = plan.build_source_node(right_table)
858
893
 
859
- # This is not necessary if additional procedure is contributed to Red Arrow.
860
894
  if join_keys.is_a?(Hash)
861
895
  left_keys = ensure_keys(join_keys[:left])
862
896
  right_keys = ensure_keys(join_keys[:right])
@@ -865,116 +899,110 @@ module RedAmber
865
899
  right_keys = left_keys
866
900
  end
867
901
 
868
- case type
869
- when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
870
- left_outputs = nil
871
- right_outputs = nil
872
- when :inner, :left_outer
873
- left_outputs = table_keys
874
- right_outputs = other_keys - right_keys
875
- when :right_outer
876
- left_outputs = table_keys - left_keys
877
- right_outputs = other_keys
902
+ context =
903
+ [type_nick, left_table_keys, right_table_keys, left_keys, right_keys, suffix]
904
+
905
+ hash_join_node_options = Arrow::HashJoinNodeOptions.new(type, left_keys, right_keys)
906
+ case type_nick
907
+ when 'inner', 'left-outer'
908
+ hash_join_node_options.left_outputs = left_table_keys
909
+ hash_join_node_options.right_outputs = right_table_keys - right_keys
910
+ when 'right-outer'
911
+ hash_join_node_options.left_outputs = left_table_keys - left_keys
912
+ hash_join_node_options.right_outputs = right_table_keys
878
913
  end
879
914
 
880
- # Should we rescue errors in Arrow::Table#join for usability ?
881
- joined_table =
882
- left_table.join(
883
- right_table,
884
- join_keys,
885
- type: type,
886
- left_outputs: left_outputs,
887
- right_outputs: right_outputs
888
- )
889
-
890
- case type
891
- when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
892
- dataframe =
893
- if joined_table.keys.uniq!
894
- DataFrame.create(rename_table(joined_table, n_keys, suffix))
895
- else
896
- DataFrame.create(joined_table)
897
- end
915
+ hash_join_node =
916
+ plan.build_hash_join_node(left_node, right_node, hash_join_node_options)
917
+ merge_node = merge_keys(plan, hash_join_node, context)
918
+ rename_node = rename_keys(plan, merge_node, context)
919
+ joined_table = sink_and_start_plan(plan, rename_node)
920
+
921
+ df = DataFrame.create(joined_table)
922
+ if force_order
898
923
  sorter =
899
- case type
900
- when :inner, :left_outer
901
- [left_index, right_index]
902
- when :left_semi, :left_anti
903
- [left_index]
904
- when :right_semi, :right_anti
924
+ case type_nick
925
+ when 'right-semi', 'right-anti'
905
926
  [right_index]
906
- end
907
- when :full_outer
908
- key_index_lr =
909
- left_keys.map { left_table.keys.index(_1) }
910
- .zip(right_keys.map { left_table.keys.size + right_table.keys.index(_1) })
911
- renamed_table = rename_table(joined_table, n_keys, suffix)
912
- dropper = []
913
- dataframe =
914
- DataFrame.create(renamed_table).assign do |df|
915
- key_index_lr.map do |l, r|
916
- dropper << df.keys[r]
917
- [df.keys[l], merge_array(df.vectors[l].data, df.vectors[r].data)]
918
- end
919
- end
920
- dataframe = dataframe.drop(dropper)
921
- sorter = [left_index, right_index]
922
- when :right_outer
923
- dataframe =
924
- if joined_table.keys.uniq!
925
- DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
927
+ when 'left-semi', 'left-anti'
928
+ [left_index]
926
929
  else
927
- DataFrame.create(joined_table)
930
+ [left_index, right_index]
928
931
  end
929
- dataframe = dataframe.pick(right_keys, dataframe.keys - right_keys)
930
- sorter = [left_index, right_index]
931
- end
932
-
933
- if force_order
934
- dataframe
935
- .sort(sorter)
932
+ df.sort(sorter)
936
933
  .drop(sorter)
937
934
  else
938
- dataframe
935
+ df
939
936
  end
940
937
  end
941
938
 
942
939
  private
943
940
 
944
- # To ensure Array of Symbols
941
+ # To ensure Array of Strings
945
942
  def ensure_keys(keys)
946
- Array(keys).map(&:to_sym)
943
+ Array(keys).map(&:to_s)
944
+ end
945
+
946
+ # Merge key columns and preserve as left and remove right.
947
+ def merge_keys(plan, input_node, context)
948
+ type_nick, left_table_keys, right_table_keys, left_keys, right_keys, * = context
949
+ return input_node unless type_nick == 'full-outer'
950
+
951
+ left_indices = left_keys.map { left_table_keys.index(_1) }
952
+ right_offset = left_table_keys.size
953
+ right_indices = right_keys.map { right_table_keys.index(_1) + right_offset }
954
+ expressions = []
955
+ names = []
956
+ left_table_keys.each_with_index do |key, index|
957
+ names << key
958
+ expressions <<
959
+ if (i = left_indices.index(index))
960
+ left_field = Arrow::FieldExpression.new("[#{left_indices[i]}]")
961
+ right_field = Arrow::FieldExpression.new("[#{right_indices[i]}]")
962
+ is_left_null = Arrow::CallExpression.new('is_null', [left_field])
963
+ Arrow::CallExpression.new('if_else', [is_left_null, right_field, left_field])
964
+ else
965
+ Arrow::FieldExpression.new("[#{index}]")
966
+ end
967
+ end
968
+ right_table_keys.each.with_index(right_offset) do |key, index|
969
+ unless right_indices.include?(index)
970
+ names << key
971
+ expressions << Arrow::FieldExpression.new("[#{index}]")
972
+ end
973
+ end
974
+ project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
975
+ plan.build_project_node(input_node, project_node_options)
947
976
  end
948
977
 
949
- # Rename duplicate keys by suffix
950
- def rename_table(joined_table, n_keys, suffix)
951
- joined_keys = joined_table.keys
952
- other_keys = joined_keys[n_keys..]
978
+ def rename_keys(plan, input_node, context)
979
+ type_nick, left_table_keys, right_table_keys, *, suffix = context
980
+ names = input_node.output_schema.fields.map(&:name)
981
+ return input_node unless names.dup.uniq!
953
982
 
954
- dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
983
+ pos_rights =
984
+ if type_nick.start_with?('right')
985
+ names.size - right_table_keys.size
986
+ else
987
+ left_table_keys.size
988
+ end
989
+ rights = names[pos_rights..]
990
+ dup_keys = names.tally.select { |_, v| v > 1 }.keys
955
991
  renamed_right_keys =
956
- other_keys.map do |key|
992
+ rights.map do |key|
957
993
  if dup_keys.include?(key)
958
- suffixed = "#{key}#{suffix}".to_sym
994
+ suffixed = "#{key}#{suffix}".to_s
959
995
  # Find a key from suffixed.succ
960
- (suffixed..).find { !joined_keys.include?(_1) }
996
+ (suffixed..).find { !names.include?(_1) }
961
997
  else
962
998
  key
963
999
  end
964
1000
  end
965
- joined_keys[n_keys..] = renamed_right_keys
966
-
967
- fields =
968
- joined_keys.map.with_index do |k, i|
969
- Arrow::Field.new(k, joined_table[i].data_type)
970
- end
971
- Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
972
- end
1001
+ names[pos_rights..] = renamed_right_keys
973
1002
 
974
- # Merge two Arrow::Arrays
975
- def merge_array(array1, array2)
976
- t = Arrow::Function.find(:is_null).execute([array1])
977
- Arrow::Function.find(:if_else).execute([t, array2, array1]).value
1003
+ expressions = names.map.with_index { |_, i| Arrow::FieldExpression.new("[#{i}]") }
1004
+ project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
1005
+ plan.build_project_node(input_node, project_node_options)
978
1006
  end
979
1007
  end
980
1008
  end
@@ -269,12 +269,13 @@ module RedAmber
269
269
  end
270
270
  alias_method :glimpse, :tdr
271
271
 
272
- # Shortcut for `tdr(:all)``.
272
+ # Shortcut for `tdr(:all)`.
273
273
  #
274
+ # @param (see #tdr)
274
275
  # @return (see #tdr)
275
276
  #
276
- def tdra
277
- puts tdr_str(:all)
277
+ def tdra(tally: 5, elements: 5)
278
+ puts tdr_str(:all, tally: tally, elements: elements)
278
279
  end
279
280
 
280
281
  # rubocop:enable Layout/LineLength
@@ -504,9 +505,9 @@ module RedAmber
504
505
  row.zip(formats).map do |elem, format|
505
506
  non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
507
  if format.negative?
507
- elem.ljust(-format + non_ascii_diff)
508
+ elem.ljust(-format - non_ascii_diff)
508
509
  else
509
- elem.rjust(format + non_ascii_diff)
510
+ elem.rjust(format - non_ascii_diff)
510
511
  end
511
512
  end
512
513
  str.puts a.join(' ').rstrip