red_amber 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/docker/example CHANGED
@@ -1,46 +1,46 @@
1
1
  #!/usr/bin/env ruby
2
2
  # frozen_string_literal: true
3
3
 
4
- $stderr.print "starting.\r"
4
+ print "starting.\r"
5
5
 
6
- require 'bundler/setup'
6
+ Dir.chdir(__dir__) { require 'bundler/setup' }
7
7
 
8
- $stderr.print "starting..\r"
8
+ print "starting..\r"
9
9
  require 'red_amber'
10
10
  include RedAmber
11
11
 
12
- $stderr.print "starting...\r"
12
+ print "starting...\r"
13
13
  require 'datasets-arrow'
14
14
 
15
- $stderr.print "reading penguins...\r"
15
+ print "reading penguins...\r"
16
16
  penguins = DataFrame.new(Datasets::Penguins.new)
17
17
 
18
- $stderr.print "reading diamonds...\r"
18
+ print "reading diamonds...\r"
19
19
  diamonds = DataFrame.new(Datasets::Diamonds.new)
20
20
 
21
- $stderr.print "reading starwars...\r"
21
+ print "reading starwars...\r"
22
22
  starwars = DataFrame.new(Datasets::Rdataset.new('dplyr', 'starwars'))
23
23
 
24
- $stderr.print "reading openintro/simpsons_paradox_covid...\r"
24
+ print "reading openintro/simpsons_paradox_covid...\r"
25
25
  ds = Datasets::Rdataset.new('openintro', 'simpsons_paradox_covid')
26
26
  simpsons_paradox_covid = DataFrame.new(ds.to_arrow)
27
27
 
28
- $stderr.print "reading mtcars... \r"
28
+ print "reading mtcars... \r"
29
29
  mtcars = DataFrame.new(Datasets::Rdatasets.new('datasets', 'mtcars'))
30
30
 
31
- $stderr.print "reading iris... \r"
31
+ print "reading iris... \r"
32
32
  iris = DataFrame.new(Datasets::Iris.new)
33
33
 
34
- $stderr.print "reading band_members...\r"
34
+ print "reading band_members...\r"
35
35
  band_members = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_members'))
36
36
 
37
- $stderr.print "reading band_instruments...\r"
37
+ print "reading band_instruments...\r"
38
38
  band_instruments = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_instruments'))
39
39
 
40
- $stderr.print "reading band_instruments2...\r"
40
+ print "reading band_instruments2...\r"
41
41
  band_instruments2 = DataFrame.new(Datasets::Rdatasets.new('dplyr', 'band_instruments2'))
42
42
 
43
- $stderr.print "reading import_cars... \r"
43
+ print "reading import_cars... \r"
44
44
  import_cars = DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
45
45
  Year Audi BMW BMW_MINI Mercedes-Benz VW
46
46
  2017 28336 52527 25427 68221 49040
@@ -50,7 +50,7 @@ import_cars = DataFrame.load(Arrow::Buffer.new(<<~TSV), format: :tsv)
50
50
  2021 22535 35905 18211 51722 35215
51
51
  TSV
52
52
 
53
- $stderr.print "reading comecome... \r"
53
+ print "reading comecome... \r"
54
54
  comecome = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
55
55
  name,age
56
56
  Yasuko,68
@@ -58,7 +58,19 @@ comecome = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
58
58
  Hinata,28
59
59
  CSV
60
60
 
61
- $stderr.print "reading general dataframe and subframes...\r"
61
+ print "reading rubykaigi... \r"
62
+ rubykaigi = DataFrame.load(Arrow::Buffer.new(<<~CSV), format: :csv)
63
+ year,venue,prefecture,city,venue_en
64
+ 2015,ベルサール汐留,東京都,中央区,"Bellesalle Shiodome"
65
+ 2016,京都国際会議場,京都府,京都市左京区,"Kyoto International Conference Center"
66
+ 2017,広島国際会議場,広島県,広島市中区,"International Conference Center Hiroshima"
67
+ 2018,仙台国際センター,宮城県,仙台市青葉区,"Sendai International Center"
68
+ 2019,福岡国際会議場,福岡県,福岡市博多区,"Fukuoka International Congress Center"
69
+ 2022,三重県総合文化センター,三重県,津市,"Mie Center for the Arts"
70
+ 2023,松本市民芸術館,長野県,松本市,"Matsumoto Performing Arts Centre"
71
+ CSV
72
+
73
+ print "reading general dataframe and subframes...\r"
62
74
  dataframe = DataFrame.new(
63
75
  x: [*1..6],
64
76
  y: %w[A A B B B C],
@@ -70,5 +82,5 @@ subframes = SubFrames.new(dataframe, [[0, 1], [2, 3, 4], [5]])
70
82
  # This environment will offer these pre-loaded datasets:
71
83
  # penguins, diamonds, iris, starwars, simpsons_paradox_covid,
72
84
  # mtcars, band_members, band_instruments, band_instruments2
73
- # (original) import_cars, comecome, dataframe, subframes
85
+ # import_cars, comecome, rubykaigi, dataframe, subframes
74
86
  binding.irb
@@ -422,12 +422,12 @@ module RedAmber
422
422
  # Create SubFrames by value grouping.
423
423
  #
424
424
  # [Experimental feature] this method may be removed or be changed in the future.
425
- # @param keys [Symbol, String, Array<Symbol, String>]
425
+ # @param keys [List<Symbol, String>, Array<Symbol, String>]
426
426
  # grouping keys.
427
427
  # @return [SubFrames]
428
428
  # a created SubFrames grouped by column values on `keys`.
429
429
  # @example
430
- # df.sub_by_value(keys: :y)
430
+ # df.sub_by_value(:y)
431
431
  #
432
432
  # # =>
433
433
  # #<RedAmber::SubFrames : 0x000000000000fc08>
@@ -454,10 +454,11 @@ module RedAmber
454
454
  #
455
455
  # @since 0.4.0
456
456
  #
457
- def sub_by_value(keys: nil)
458
- SubFrames.new(self, group(keys).filters)
457
+ def sub_by_value(*keys)
458
+ SubFrames.new(self, group(keys.flatten).filters)
459
459
  end
460
460
  alias_method :subframes_by_value, :sub_by_value
461
+ alias_method :sub_group, :sub_by_value
461
462
 
462
463
  # Create SubFrames by Windowing with `from`, `size` and `step`.
463
464
  #
@@ -697,6 +698,79 @@ module RedAmber
697
698
  end
698
699
  end
699
700
 
701
+ # Returns a Vector such that all elements have value `scalar`
702
+ # and have same size as self.
703
+ #
704
+ # @overload propagate(scalar)
705
+ # Specifies scalar as an agrument.
706
+ #
707
+ # @param scalar [scalar]
708
+ # a value to propagate in Vector.
709
+ # @return [Vector]
710
+ # created Vector.
711
+ # @example propagate a value
712
+ # df
713
+ # # =>
714
+ # #<RedAmber::DataFrame : 6 x 3 Vectors, 0x00000000000849a4>
715
+ # x y z
716
+ # <uint8> <string> <boolean>
717
+ # 0 1 A false
718
+ # 1 2 A true
719
+ # 2 3 B false
720
+ # 3 4 B (nil)
721
+ # 4 5 B true
722
+ # 5 6 C false
723
+ #
724
+ # df.assign(:sum_x) { propagate(x.sum) }
725
+ # # =>
726
+ # #<RedAmber::DataFrame : 6 x 4 Vectors, 0x000000000007bd04>
727
+ # x y z sum_x
728
+ # <uint8> <string> <boolean> <uint8>
729
+ # 0 1 A false 21
730
+ # 1 2 A true 21
731
+ # 2 3 B false 21
732
+ # 3 4 B (nil) 21
733
+ # 4 5 B true 21
734
+ # 5 6 C false 21
735
+ #
736
+ # # Using `Vector#propagate` like below has same result as above.
737
+ # df.assign(:sum_x) { x.propagate(:sum) }
738
+ #
739
+ # # Also it is same as creating column from an Array.
740
+ # df.assign(:sum_x) { [x.sum] * size }
741
+ #
742
+ # @overload propagate
743
+ #
744
+ # @yieldparam self [DataFrame]
745
+ # gives self to the block.
746
+ # @yieldreturn [scalar]
747
+ # a value to propagate in Vector
748
+ # @return [Vector]
749
+ # created Vector.
750
+ # @example propagate the value from the block
751
+ # df.assign(:range) { propagate { x.max - x.min } }
752
+ # # =>
753
+ # #<RedAmber::DataFrame : 6 x 4 Vectors, 0x00000000000e603c>
754
+ # x y z range
755
+ # <uint8> <string> <boolean> <uint8>
756
+ # 0 1 A false 5
757
+ # 1 2 A true 5
758
+ # 2 3 B false 5
759
+ # 3 4 B (nil) 5
760
+ # 4 5 B true 5
761
+ # 5 6 C false 5
762
+ #
763
+ # @since 0.5.0
764
+ #
765
+ def propagate(scalar = nil, &block)
766
+ if block
767
+ raise VectorArgumentError, "can't specify both function and block" if scalar
768
+
769
+ scalar = instance_eval(&block)
770
+ end
771
+ Vector.new([scalar] * size)
772
+ end
773
+
700
774
  # Catch variable (column) key as method name.
701
775
  def method_missing(name, *args, &block)
702
776
  return variables[name] if args.empty? && key?(name)
@@ -221,6 +221,11 @@ module RedAmber
221
221
  # - Same as `#join` with `type: :inner`
222
222
  # - A kind of mutating join.
223
223
  #
224
+ # @note the order of joined results will be preserved by default.
225
+ # This is enabled by appending index column to sort after joining but
226
+ # it will cause some performance degradation. If you don't matter
227
+ # the order of the result, set `force_order` option to `false`.
228
+ #
224
229
  # @overload inner_join(other, suffix: '.1', force_order: true)
225
230
  # If `join_key` is not specified, common keys in self and other are used
226
231
  # (natural keys). Returns joined dataframe.
@@ -280,6 +285,11 @@ module RedAmber
280
285
  # - Same as `#join` with `type: :full_outer`
281
286
  # - A kind of mutating join.
282
287
  #
288
+ # @note the order of joined results will be preserved by default.
289
+ # This is enabled by appending index column to sort after joining but
290
+ # it will cause some performance degradation. If you don't matter
291
+ # the order of the result, set `force_order` option to `false`.
292
+ #
283
293
  # @overload full_join(other, suffix: '.1', force_order: true)
284
294
  # If `join_key` is not specified, common keys in self and other are used
285
295
  # (natural keys). Returns joined dataframe.
@@ -348,6 +358,11 @@ module RedAmber
348
358
  # - Same as `#join` with `type: :left_outer`
349
359
  # - A kind of mutating join.
350
360
  #
361
+ # @note the order of joined results will be preserved by default.
362
+ # This is enabled by appending index column to sort after joining but
363
+ # it will cause some performance degradation. If you don't matter
364
+ # the order of the result, set `force_order` option to `false`.
365
+ #
351
366
  # @overload left_join(other, suffix: '.1', force_order: true)
352
367
  # If `join_key` is not specified, common keys in self and other are used
353
368
  # (natural keys). Returns joined dataframe.
@@ -410,6 +425,11 @@ module RedAmber
410
425
  # - Same as `#join` with `type: :right_outer`
411
426
  # - A kind of mutating join.
412
427
  #
428
+ # @note the order of joined results will be preserved by default.
429
+ # This is enabled by appending index column to sort after joining but
430
+ # it will cause some performance degradation. If you don't matter
431
+ # the order of the result, set `force_order` option to `false`.
432
+ #
413
433
  # @overload right_join(other, suffix: '.1', force_order: true)
414
434
  # If `join_key` is not specified, common keys in self and other are used
415
435
  # (natural keys). Returns joined dataframe.
@@ -422,11 +442,11 @@ module RedAmber
422
442
  # df.right_join(other)
423
443
  #
424
444
  # # =>
425
- # KEY X1 X2
426
- # <string> <uint8> <boolean>
427
- # 0 A 1 true
428
- # 1 B 2 false
429
- # 2 D (nil) (nil)
445
+ # X1 KEY X2
446
+ # <uint8> <string> <boolean>
447
+ # 0 1 A true
448
+ # 1 2 B false
449
+ # 2 (nil) D (nil)
430
450
  #
431
451
  # @overload right_join(other, join_keys, suffix: '.1', force_order: true)
432
452
  #
@@ -439,11 +459,11 @@ module RedAmber
439
459
  # df.right_join(other, :KEY)
440
460
  #
441
461
  # # =>
442
- # KEY X1 X2
443
- # <string> <uint8> <boolean>
444
- # 0 A 1 true
445
- # 1 B 2 false
446
- # 2 D (nil) (nil)
462
+ # X1 KEY X2
463
+ # <uint8> <string> <boolean>
464
+ # 0 1 A true
465
+ # 1 2 B false
466
+ # 2 (nil) D (nil)
447
467
  #
448
468
  # @overload right_join(other, join_key_pairs, suffix: '.1', force_order: true)
449
469
  #
@@ -456,11 +476,11 @@ module RedAmber
456
476
  # df2.right_join(other2, { left: :KEY1, right: :KEY2 })
457
477
  #
458
478
  # # =>
459
- # KEY1 X1 X2
460
- # <string> <uint8> <boolean>
461
- # 0 A 1 true
462
- # 1 B 2 false
463
- # 2 D (nil) (nil)
479
+ # X1 KEY2 X2
480
+ # <uint8> >string> <boolean>
481
+ # 0 1 A true
482
+ # 1 2 B false
483
+ # 2 (nil) D (nil)
464
484
  #
465
485
  # @since 0.2.3
466
486
  #
@@ -480,6 +500,11 @@ module RedAmber
480
500
  # - Same as `#join` with `type: :left_semi`
481
501
  # - A kind of filtering join.
482
502
  #
503
+ # @note the order of joined results will be preserved by default.
504
+ # This is enabled by appending index column to sort after joining but
505
+ # it will cause some performance degradation. If you don't matter
506
+ # the order of the result, set `force_order` option to `false`.
507
+ #
483
508
  # @overload semi_join(other, suffix: '.1', force_order: true)
484
509
  # If `join_key` is not specified, common keys in self and other are used
485
510
  # (natural keys). Returns joined dataframe.
@@ -539,6 +564,11 @@ module RedAmber
539
564
  # - Same as `#join` with `type: :left_anti`
540
565
  # - A kind of filtering join.
541
566
  #
567
+ # @note the order of joined results will be preserved by default.
568
+ # This is enabled by appending index column to sort after joining but
569
+ # it will cause some performance degradation. If you don't matter
570
+ # the order of the result, set `force_order` option to `false`.
571
+ #
542
572
  # @overload anti_join(other, suffix: '.1', force_order: true)
543
573
  # If `join_key` is not specified, common keys in self and other are used
544
574
  # (natural keys). Returns joined dataframe.
@@ -661,7 +691,7 @@ module RedAmber
661
691
  raise DataFrameArgumentError, 'keys are not same with self and other'
662
692
  end
663
693
 
664
- join(other, keys, type: :full_outer)
694
+ join(other, keys, type: :full_outer, force_order: true)
665
695
  end
666
696
 
667
697
  # Select records appearing in self but not in other.
@@ -733,12 +763,12 @@ module RedAmber
733
763
  # 1 B E
734
764
  # 2 C F
735
765
 
736
- # @note the order of joined results will be preserved by default.
737
- # This is enabled by appending index column to sort after joining but
738
- # it will cause some performance degradation. If you don't matter
739
- # the order of the result, set `force_order` option to `false`.
766
+ # @note the order of joined results may not be preserved by default.
767
+ # if you prefer to preserve the order of the result, set `force_order` option
768
+ # to `true`. This is enabled by appending index column to sort after joining
769
+ # so it will cause some performance degradation.
740
770
  #
741
- # @overload join(other, type: :inner, suffix: '.1', force_order: true)
771
+ # @overload join(other, type: :inner, suffix: '.1', force_order: false)
742
772
  #
743
773
  # If `join_key` is not specified, common keys in self and other are used
744
774
  # (natural keys). Returns joined dataframe.
@@ -767,7 +797,7 @@ module RedAmber
767
797
  # 2 C 3 (nil)
768
798
  # 3 D (nil) (nil)
769
799
  #
770
- # @overload join(other, join_keys, type: :inner, suffix: '.1', force_order: true)
800
+ # @overload join(other, join_keys, type: :inner, suffix: '.1', force_order: false)
771
801
  #
772
802
  # @macro join_before
773
803
  # @macro join_key_in_array
@@ -792,7 +822,8 @@ module RedAmber
792
822
  # 0 A 1 1
793
823
  # 1 B 2 4
794
824
  #
795
- # @overload join(other, join_key_pairs, type: :inner, suffix: '.1', force_order: true)
825
+ # @overload join(
826
+ # other, join_key_pairs, type: :inner, suffix: '.1', force_order: false)
796
827
  #
797
828
  # @macro join_before
798
829
  # @macro join_key_in_hash
@@ -828,7 +859,8 @@ module RedAmber
828
859
  #
829
860
  # @since 0.2.3
830
861
  #
831
- def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: true)
862
+ def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: false)
863
+ left_table = table
832
864
  right_table =
833
865
  case other
834
866
  when DataFrame
@@ -839,24 +871,26 @@ module RedAmber
839
871
  raise DataFrameArgumentError, 'other must be a DataFrame or an Arrow::Table'
840
872
  end
841
873
 
842
- type = type.to_sym
843
- left_index = :__LEFT_INDEX__
844
- right_index = :__RIGHT_INDEX__
845
874
  if force_order
875
+ left_index = :__LEFT_INDEX__
876
+ right_index = :__RIGHT_INDEX__
846
877
  left_table = assign(left_index) { indices }.table
847
878
  other = DataFrame.create(other) if other.is_a?(Arrow::Table)
848
879
  right_table = other.assign(right_index) { indices }.table
849
- else
850
- left_table = table
851
880
  end
852
881
 
853
- table_keys = left_table.keys
854
- other_keys = right_table.keys
855
-
882
+ left_table_keys = ensure_keys(left_table.keys)
883
+ right_table_keys = ensure_keys(right_table.keys)
856
884
  # natural keys (implicit common keys)
857
- join_keys ||= table_keys.intersection(other_keys)
885
+ join_keys ||= left_table_keys.intersection(right_table_keys)
886
+
887
+ type = Arrow::JoinType.try_convert(type) || type
888
+ type_nick = type.nick
889
+
890
+ plan = Arrow::ExecutePlan.new
891
+ left_node = plan.build_source_node(left_table)
892
+ right_node = plan.build_source_node(right_table)
858
893
 
859
- # This is not necessary if additional procedure is contributed to Red Arrow.
860
894
  if join_keys.is_a?(Hash)
861
895
  left_keys = ensure_keys(join_keys[:left])
862
896
  right_keys = ensure_keys(join_keys[:right])
@@ -865,116 +899,110 @@ module RedAmber
865
899
  right_keys = left_keys
866
900
  end
867
901
 
868
- case type
869
- when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
870
- left_outputs = nil
871
- right_outputs = nil
872
- when :inner, :left_outer
873
- left_outputs = table_keys
874
- right_outputs = other_keys - right_keys
875
- when :right_outer
876
- left_outputs = table_keys - left_keys
877
- right_outputs = other_keys
902
+ context =
903
+ [type_nick, left_table_keys, right_table_keys, left_keys, right_keys, suffix]
904
+
905
+ hash_join_node_options = Arrow::HashJoinNodeOptions.new(type, left_keys, right_keys)
906
+ case type_nick
907
+ when 'inner', 'left-outer'
908
+ hash_join_node_options.left_outputs = left_table_keys
909
+ hash_join_node_options.right_outputs = right_table_keys - right_keys
910
+ when 'right-outer'
911
+ hash_join_node_options.left_outputs = left_table_keys - left_keys
912
+ hash_join_node_options.right_outputs = right_table_keys
878
913
  end
879
914
 
880
- # Should we rescue errors in Arrow::Table#join for usability ?
881
- joined_table =
882
- left_table.join(
883
- right_table,
884
- join_keys,
885
- type: type,
886
- left_outputs: left_outputs,
887
- right_outputs: right_outputs
888
- )
889
-
890
- case type
891
- when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
892
- dataframe =
893
- if joined_table.keys.uniq!
894
- DataFrame.create(rename_table(joined_table, n_keys, suffix))
895
- else
896
- DataFrame.create(joined_table)
897
- end
915
+ hash_join_node =
916
+ plan.build_hash_join_node(left_node, right_node, hash_join_node_options)
917
+ merge_node = merge_keys(plan, hash_join_node, context)
918
+ rename_node = rename_keys(plan, merge_node, context)
919
+ joined_table = sink_and_start_plan(plan, rename_node)
920
+
921
+ df = DataFrame.create(joined_table)
922
+ if force_order
898
923
  sorter =
899
- case type
900
- when :inner, :left_outer
901
- [left_index, right_index]
902
- when :left_semi, :left_anti
903
- [left_index]
904
- when :right_semi, :right_anti
924
+ case type_nick
925
+ when 'right-semi', 'right-anti'
905
926
  [right_index]
906
- end
907
- when :full_outer
908
- key_index_lr =
909
- left_keys.map { left_table.keys.index(_1) }
910
- .zip(right_keys.map { left_table.keys.size + right_table.keys.index(_1) })
911
- renamed_table = rename_table(joined_table, n_keys, suffix)
912
- dropper = []
913
- dataframe =
914
- DataFrame.create(renamed_table).assign do |df|
915
- key_index_lr.map do |l, r|
916
- dropper << df.keys[r]
917
- [df.keys[l], merge_array(df.vectors[l].data, df.vectors[r].data)]
918
- end
919
- end
920
- dataframe = dataframe.drop(dropper)
921
- sorter = [left_index, right_index]
922
- when :right_outer
923
- dataframe =
924
- if joined_table.keys.uniq!
925
- DataFrame.create(rename_table(joined_table, left_outputs.size, suffix))
927
+ when 'left-semi', 'left-anti'
928
+ [left_index]
926
929
  else
927
- DataFrame.create(joined_table)
930
+ [left_index, right_index]
928
931
  end
929
- dataframe = dataframe.pick(right_keys, dataframe.keys - right_keys)
930
- sorter = [left_index, right_index]
931
- end
932
-
933
- if force_order
934
- dataframe
935
- .sort(sorter)
932
+ df.sort(sorter)
936
933
  .drop(sorter)
937
934
  else
938
- dataframe
935
+ df
939
936
  end
940
937
  end
941
938
 
942
939
  private
943
940
 
944
- # To ensure Array of Symbols
941
+ # To ensure Array of Strings
945
942
  def ensure_keys(keys)
946
- Array(keys).map(&:to_sym)
943
+ Array(keys).map(&:to_s)
944
+ end
945
+
946
+ # Merge key columns and preserve as left and remove right.
947
+ def merge_keys(plan, input_node, context)
948
+ type_nick, left_table_keys, right_table_keys, left_keys, right_keys, * = context
949
+ return input_node unless type_nick == 'full-outer'
950
+
951
+ left_indices = left_keys.map { left_table_keys.index(_1) }
952
+ right_offset = left_table_keys.size
953
+ right_indices = right_keys.map { right_table_keys.index(_1) + right_offset }
954
+ expressions = []
955
+ names = []
956
+ left_table_keys.each_with_index do |key, index|
957
+ names << key
958
+ expressions <<
959
+ if (i = left_indices.index(index))
960
+ left_field = Arrow::FieldExpression.new("[#{left_indices[i]}]")
961
+ right_field = Arrow::FieldExpression.new("[#{right_indices[i]}]")
962
+ is_left_null = Arrow::CallExpression.new('is_null', [left_field])
963
+ Arrow::CallExpression.new('if_else', [is_left_null, right_field, left_field])
964
+ else
965
+ Arrow::FieldExpression.new("[#{index}]")
966
+ end
967
+ end
968
+ right_table_keys.each.with_index(right_offset) do |key, index|
969
+ unless right_indices.include?(index)
970
+ names << key
971
+ expressions << Arrow::FieldExpression.new("[#{index}]")
972
+ end
973
+ end
974
+ project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
975
+ plan.build_project_node(input_node, project_node_options)
947
976
  end
948
977
 
949
- # Rename duplicate keys by suffix
950
- def rename_table(joined_table, n_keys, suffix)
951
- joined_keys = joined_table.keys
952
- other_keys = joined_keys[n_keys..]
978
+ def rename_keys(plan, input_node, context)
979
+ type_nick, left_table_keys, right_table_keys, *, suffix = context
980
+ names = input_node.output_schema.fields.map(&:name)
981
+ return input_node unless names.dup.uniq!
953
982
 
954
- dup_keys = joined_keys.tally.select { |_, v| v > 1 }.keys
983
+ pos_rights =
984
+ if type_nick.start_with?('right')
985
+ names.size - right_table_keys.size
986
+ else
987
+ left_table_keys.size
988
+ end
989
+ rights = names[pos_rights..]
990
+ dup_keys = names.tally.select { |_, v| v > 1 }.keys
955
991
  renamed_right_keys =
956
- other_keys.map do |key|
992
+ rights.map do |key|
957
993
  if dup_keys.include?(key)
958
- suffixed = "#{key}#{suffix}".to_sym
994
+ suffixed = "#{key}#{suffix}".to_s
959
995
  # Find a key from suffixed.succ
960
- (suffixed..).find { !joined_keys.include?(_1) }
996
+ (suffixed..).find { !names.include?(_1) }
961
997
  else
962
998
  key
963
999
  end
964
1000
  end
965
- joined_keys[n_keys..] = renamed_right_keys
966
-
967
- fields =
968
- joined_keys.map.with_index do |k, i|
969
- Arrow::Field.new(k, joined_table[i].data_type)
970
- end
971
- Arrow::Table.new(Arrow::Schema.new(fields), joined_table.columns)
972
- end
1001
+ names[pos_rights..] = renamed_right_keys
973
1002
 
974
- # Merge two Arrow::Arrays
975
- def merge_array(array1, array2)
976
- t = Arrow::Function.find(:is_null).execute([array1])
977
- Arrow::Function.find(:if_else).execute([t, array2, array1]).value
1003
+ expressions = names.map.with_index { |_, i| Arrow::FieldExpression.new("[#{i}]") }
1004
+ project_node_options = Arrow::ProjectNodeOptions.new(expressions, names)
1005
+ plan.build_project_node(input_node, project_node_options)
978
1006
  end
979
1007
  end
980
1008
  end
@@ -269,12 +269,13 @@ module RedAmber
269
269
  end
270
270
  alias_method :glimpse, :tdr
271
271
 
272
- # Shortcut for `tdr(:all)``.
272
+ # Shortcut for `tdr(:all)`.
273
273
  #
274
+ # @param (see #tdr)
274
275
  # @return (see #tdr)
275
276
  #
276
- def tdra
277
- puts tdr_str(:all)
277
+ def tdra(tally: 5, elements: 5)
278
+ puts tdr_str(:all, tally: tally, elements: elements)
278
279
  end
279
280
 
280
281
  # rubocop:enable Layout/LineLength
@@ -504,9 +505,9 @@ module RedAmber
504
505
  row.zip(formats).map do |elem, format|
505
506
  non_ascii_diff = elem.ascii_only? ? 0 : elem.width - elem.size
506
507
  if format.negative?
507
- elem.ljust(-format + non_ascii_diff)
508
+ elem.ljust(-format - non_ascii_diff)
508
509
  else
509
- elem.rjust(format + non_ascii_diff)
510
+ elem.rjust(format - non_ascii_diff)
510
511
  end
511
512
  end
512
513
  str.puts a.join(' ').rstrip