red_amber 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,8 @@ module RedAmber
31
31
  # 2 3 C
32
32
  # 3 4 D
33
33
  #
34
+ # @since 0.2.3
35
+ #
34
36
  def concatenate(*other)
35
37
  case other
36
38
  in [] | [nil] | [[]]
@@ -79,6 +81,8 @@ module RedAmber
79
81
  # 0 1 3 A C
80
82
  # 1 2 4 B D
81
83
  #
84
+ # @since 0.2.3
85
+ #
82
86
  def merge(*other)
83
87
  case other
84
88
  in [] | [nil] | [[]]
@@ -121,7 +125,7 @@ module RedAmber
121
125
  # @param other [DataFrame, Arrow::Table]
122
126
  # A DataFrame or a Table to be joined with self.
123
127
  #
124
- # @!macro join_dorce_order
128
+ # @!macro join_force_order
125
129
  # @param force_order [Boolean]
126
130
  # wheather force order of the output always same.
127
131
  # - This option is used in `:full_outer` and `:right_outer`.
@@ -217,11 +221,12 @@ module RedAmber
217
221
  # - Same as `#join` with `type: :inner`
218
222
  # - A kind of mutating join.
219
223
  #
220
- # @overload inner_join(other, suffix: '.1')
224
+ # @overload inner_join(other, suffix: '.1', force_order: true)
221
225
  # If `join_key` is not specified, common keys in self and other are used
222
226
  # (natural keys). Returns joined dataframe.
223
227
  #
224
228
  # @macro join_before
229
+ # @macro join_force_order
225
230
  # @macro join_after
226
231
  # @macro join_common_example_1
227
232
  # @example without key (use implicit common key)
@@ -233,10 +238,11 @@ module RedAmber
233
238
  # 0 A 1 true
234
239
  # 1 B 2 false
235
240
  #
236
- # @overload inner_join(other, join_keys, suffix: '.1')
241
+ # @overload inner_join(other, join_keys, suffix: '.1', force_order: true)
237
242
  #
238
243
  # @macro join_before
239
244
  # @macro join_key_in_array
245
+ # @macro join_force_order
240
246
  # @macro join_after
241
247
  # @macro join_common_example_1
242
248
  # @example with a key
@@ -248,10 +254,11 @@ module RedAmber
248
254
  # 0 A 1 true
249
255
  # 1 B 2 false
250
256
  #
251
- # @overload inner_join(other, join_key_pairs, suffix: '.1')
257
+ # @overload inner_join(other, join_key_pairs, suffix: '.1', force_order: true)
252
258
  #
253
259
  # @macro join_before
254
260
  # @macro join_key_in_hash
261
+ # @macro join_force_order
255
262
  # @macro join_after
256
263
  # @macro join_common_example_2
257
264
  # @example with key pairs
@@ -263,8 +270,10 @@ module RedAmber
263
270
  # 0 A 1 true
264
271
  # 1 B 2 false
265
272
  #
266
- def inner_join(other, join_keys = nil, suffix: '.1')
267
- join(other, join_keys, type: :inner, suffix: suffix)
273
+ # @since 0.2.3
274
+ #
275
+ def inner_join(other, join_keys = nil, suffix: '.1', force_order: true)
276
+ join(other, join_keys, type: :inner, suffix: suffix, force_order: force_order)
268
277
  end
269
278
 
270
279
  # Join another DataFrame or Table, leaving all records.
@@ -276,7 +285,7 @@ module RedAmber
276
285
  # (natural keys). Returns joined dataframe.
277
286
  #
278
287
  # @macro join_before
279
- # @macro join_dorce_order
288
+ # @macro join_force_order
280
289
  # @macro join_after
281
290
  # @macro join_common_example_1
282
291
  # @example without key (use implicit common key)
@@ -294,7 +303,7 @@ module RedAmber
294
303
  #
295
304
  # @macro join_before
296
305
  # @macro join_key_in_array
297
- # @macro join_dorce_order
306
+ # @macro join_force_order
298
307
  # @macro join_after
299
308
  # @macro join_common_example_1
300
309
  # @example with a key
@@ -312,7 +321,7 @@ module RedAmber
312
321
  #
313
322
  # @macro join_before
314
323
  # @macro join_key_in_hash
315
- # @macro join_dorce_order
324
+ # @macro join_force_order
316
325
  # @macro join_after
317
326
  # @macro join_common_example_2
318
327
  # @example with key pairs
@@ -326,6 +335,8 @@ module RedAmber
326
335
  # 2 C 3 (nil)
327
336
  # 3 D (nil) (nil)
328
337
  #
338
+ # @since 0.2.3
339
+ #
329
340
  def full_join(other, join_keys = nil, suffix: '.1', force_order: true)
330
341
  join(other, join_keys,
331
342
  type: :full_outer, suffix: suffix, force_order: force_order)
@@ -337,11 +348,12 @@ module RedAmber
337
348
  # - Same as `#join` with `type: :left_outer`
338
349
  # - A kind of mutating join.
339
350
  #
340
- # @overload left_join(other, suffix: '.1')
351
+ # @overload left_join(other, suffix: '.1', force_order: true)
341
352
  # If `join_key` is not specified, common keys in self and other are used
342
353
  # (natural keys). Returns joined dataframe.
343
354
  #
344
355
  # @macro join_before
356
+ # @macro join_force_order
345
357
  # @macro join_after
346
358
  # @macro join_common_example_1
347
359
  # @example without key (use implicit common key)
@@ -354,10 +366,11 @@ module RedAmber
354
366
  # 1 B 2 false
355
367
  # 2 C 3 (nil)
356
368
  #
357
- # @overload left_join(other, join_keys, suffix: '.1')
369
+ # @overload left_join(other, join_keys, suffix: '.1', force_order: true)
358
370
  #
359
371
  # @macro join_before
360
372
  # @macro join_key_in_array
373
+ # @macro join_force_order
361
374
  # @macro join_after
362
375
  # @macro join_common_example_1
363
376
  # @example with a key
@@ -370,10 +383,11 @@ module RedAmber
370
383
  # 1 B 2 false
371
384
  # 2 C 3 (nil)
372
385
  #
373
- # @overload left_join(other, join_key_pairs, suffix: '.1')
386
+ # @overload left_join(other, join_key_pairs, suffix: '.1', force_order: true)
374
387
  #
375
388
  # @macro join_before
376
389
  # @macro join_key_in_hash
390
+ # @macro join_force_order
377
391
  # @macro join_after
378
392
  # @macro join_common_example_2
379
393
  # @example with key pairs
@@ -386,8 +400,10 @@ module RedAmber
386
400
  # 1 B 2 false
387
401
  # 2 C 3 (nil)
388
402
  #
389
- def left_join(other, join_keys = nil, suffix: '.1')
390
- join(other, join_keys, type: :left_outer, suffix: suffix)
403
+ # @since 0.2.3
404
+ #
405
+ def left_join(other, join_keys = nil, suffix: '.1', force_order: true)
406
+ join(other, join_keys, type: :left_outer, suffix: suffix, force_order: force_order)
391
407
  end
392
408
 
393
409
  # Join matching values from self to other.
@@ -399,7 +415,7 @@ module RedAmber
399
415
  # (natural keys). Returns joined dataframe.
400
416
  #
401
417
  # @macro join_before
402
- # @macro join_dorce_order
418
+ # @macro join_force_order
403
419
  # @macro join_after
404
420
  # @macro join_common_example_1
405
421
  # @example without key (use implicit common key)
@@ -416,7 +432,7 @@ module RedAmber
416
432
  #
417
433
  # @macro join_before
418
434
  # @macro join_key_in_array
419
- # @macro join_dorce_order
435
+ # @macro join_force_order
420
436
  # @macro join_after
421
437
  # @macro join_common_example_1
422
438
  # @example with a key
@@ -433,7 +449,7 @@ module RedAmber
433
449
  #
434
450
  # @macro join_before
435
451
  # @macro join_key_in_hash
436
- # @macro join_dorce_order
452
+ # @macro join_force_order
437
453
  # @macro join_after
438
454
  # @macro join_common_example_2
439
455
  # @example with key pairs
@@ -446,6 +462,8 @@ module RedAmber
446
462
  # 1 B 2 false
447
463
  # 2 D (nil) (nil)
448
464
  #
465
+ # @since 0.2.3
466
+ #
449
467
  def right_join(other, join_keys = nil, suffix: '.1', force_order: true)
450
468
  join(
451
469
  other,
@@ -462,11 +480,12 @@ module RedAmber
462
480
  # - Same as `#join` with `type: :left_semi`
463
481
  # - A kind of filtering join.
464
482
  #
465
- # @overload semi_join(other, suffix: '.1')
483
+ # @overload semi_join(other, suffix: '.1', force_order: true)
466
484
  # If `join_key` is not specified, common keys in self and other are used
467
485
  # (natural keys). Returns joined dataframe.
468
486
  #
469
487
  # @macro join_before
488
+ # @macro join_force_order
470
489
  # @macro join_after
471
490
  # @macro join_common_example_1
472
491
  # @example without key (use implicit common key)
@@ -478,10 +497,11 @@ module RedAmber
478
497
  # 0 A 1
479
498
  # 1 B 2
480
499
  #
481
- # @overload semi_join(other, join_keys, suffix: '.1')
500
+ # @overload semi_join(other, join_keys, suffix: '.1', force_order: true)
482
501
  #
483
502
  # @macro join_before
484
503
  # @macro join_key_in_array
504
+ # @macro join_force_order
485
505
  # @macro join_after
486
506
  # @macro join_common_example_1
487
507
  # @example with a key
@@ -493,10 +513,11 @@ module RedAmber
493
513
  # 0 A 1
494
514
  # 1 B 2
495
515
  #
496
- # @overload semi_join(other, join_key_pairs, suffix: '.1')
516
+ # @overload semi_join(other, join_key_pairs, suffix: '.1', force_order: true)
497
517
  #
498
518
  # @macro join_before
499
519
  # @macro join_key_in_hash
520
+ # @macro join_force_order
500
521
  # @macro join_after
501
522
  # @macro join_common_example_2
502
523
  # @example with key pairs
@@ -508,19 +529,22 @@ module RedAmber
508
529
  # 0 A 1
509
530
  # 1 B 2
510
531
  #
511
- def semi_join(other, join_keys = nil, suffix: '.1')
512
- join(other, join_keys, type: :left_semi, suffix: suffix)
532
+ # @since 0.2.3
533
+ #
534
+ def semi_join(other, join_keys = nil, suffix: '.1', force_order: true)
535
+ join(other, join_keys, type: :left_semi, suffix: suffix, force_order: force_order)
513
536
  end
514
537
 
515
538
  # Return records of self that do not have a match in other.
516
539
  # - Same as `#join` with `type: :left_anti`
517
540
  # - A kind of filtering join.
518
541
  #
519
- # @overload anti_join(other, suffix: '.1')
542
+ # @overload anti_join(other, suffix: '.1', force_order: true)
520
543
  # If `join_key` is not specified, common keys in self and other are used
521
544
  # (natural keys). Returns joined dataframe.
522
545
  #
523
546
  # @macro join_before
547
+ # @macro join_force_order
524
548
  # @macro join_after
525
549
  # @macro join_common_example_1
526
550
  # @example without key (use implicit common key)
@@ -531,10 +555,11 @@ module RedAmber
531
555
  # <string> <uint8>
532
556
  # 0 C 3
533
557
  #
534
- # @overload anti_join(other, join_keys, suffix: '.1')
558
+ # @overload anti_join(other, join_keys, suffix: '.1', force_order: true)
535
559
  #
536
560
  # @macro join_before
537
561
  # @macro join_key_in_array
562
+ # @macro join_force_order
538
563
  # @macro join_after
539
564
  # @macro join_common_example_1
540
565
  # @example with a key
@@ -545,10 +570,11 @@ module RedAmber
545
570
  # <string> <uint8>
546
571
  # 0 C 3
547
572
  #
548
- # @overload anti_join(other, join_key_pairs, suffix: '.1')
573
+ # @overload anti_join(other, join_key_pairs, suffix: '.1', force_order: true)
549
574
  #
550
575
  # @macro join_before
551
576
  # @macro join_key_in_hash
577
+ # @macro join_force_order
552
578
  # @macro join_after
553
579
  # @macro join_common_example_2
554
580
  # @example with key pairs
@@ -559,8 +585,10 @@ module RedAmber
559
585
  # <string> <uint8>
560
586
  # 0 C 3
561
587
  #
562
- def anti_join(other, join_keys = nil, suffix: '.1')
563
- join(other, join_keys, type: :left_anti, suffix: suffix)
588
+ # @since 0.2.3
589
+ #
590
+ def anti_join(other, join_keys = nil, suffix: '.1', force_order: true)
591
+ join(other, join_keys, type: :left_anti, suffix: suffix, force_order: force_order)
564
592
  end
565
593
 
566
594
  # Set operations (#intersect, #union, #difference, #set_operable?)
@@ -574,6 +602,8 @@ module RedAmber
574
602
  # @example
575
603
  # df3.set_operable?(other3) # => true
576
604
  #
605
+ # @since 0.2.3
606
+ #
577
607
  def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
578
608
  keys == other.keys.map(&:to_sym)
579
609
  end
@@ -594,6 +624,8 @@ module RedAmber
594
624
  # <string> <uint8>
595
625
  # 0 A 1
596
626
  #
627
+ # @since 0.2.3
628
+ #
597
629
  def intersect(other)
598
630
  unless keys == other.keys.map(&:to_sym)
599
631
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -622,6 +654,8 @@ module RedAmber
622
654
  # 3 B 4
623
655
  # 4 D 5
624
656
  #
657
+ # @since 0.2.3
658
+ #
625
659
  def union(other)
626
660
  unless keys == other.keys.map(&:to_sym)
627
661
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -655,6 +689,8 @@ module RedAmber
655
689
  # 0 B 4
656
690
  # 1 D 5
657
691
  #
692
+ # @since 0.2.3
693
+ #
658
694
  def difference(other)
659
695
  unless keys == other.keys.map(&:to_sym)
660
696
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -709,7 +745,7 @@ module RedAmber
709
745
  #
710
746
  # @macro join_before
711
747
  # @macro join_common_type
712
- # @macro join_dorce_order
748
+ # @macro join_force_order
713
749
  # @macro join_after
714
750
  # @macro join_common_example_1
715
751
  # @example
@@ -736,7 +772,7 @@ module RedAmber
736
772
  # @macro join_before
737
773
  # @macro join_key_in_array
738
774
  # @macro join_common_type
739
- # @macro join_dorce_order
775
+ # @macro join_force_order
740
776
  # @macro join_after
741
777
  # @macro join_common_example_3
742
778
  # @example join keys in an Array
@@ -761,7 +797,7 @@ module RedAmber
761
797
  # @macro join_before
762
798
  # @macro join_key_in_hash
763
799
  # @macro join_common_type
764
- # @macro join_dorce_order
800
+ # @macro join_force_order
765
801
  # @macro join_after
766
802
  # @macro join_common_example_4
767
803
  # @example without options
@@ -790,6 +826,8 @@ module RedAmber
790
826
  # 0 A D e
791
827
  # 1 B E E
792
828
  #
829
+ # @since 0.2.3
830
+ #
793
831
  def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: true)
794
832
  right_table =
795
833
  case other
@@ -804,7 +842,7 @@ module RedAmber
804
842
  type = type.to_sym
805
843
  left_index = :__LEFT_INDEX__
806
844
  right_index = :__RIGHT_INDEX__
807
- if force_order && %i[full_outer right_outer].include?(type)
845
+ if force_order
808
846
  left_table = assign(left_index) { indices }.table
809
847
  other = DataFrame.create(other) if other.is_a?(Arrow::Table)
810
848
  right_table = other.assign(right_index) { indices }.table
@@ -820,14 +858,12 @@ module RedAmber
820
858
 
821
859
  # This is not necessary if additional procedure is contributed to Red Arrow.
822
860
  if join_keys.is_a?(Hash)
823
- left_keys = join_keys[:left]
824
- right_keys = join_keys[:right]
861
+ left_keys = ensure_keys(join_keys[:left])
862
+ right_keys = ensure_keys(join_keys[:right])
825
863
  else
826
- left_keys = join_keys
827
- right_keys = join_keys
864
+ left_keys = ensure_keys(join_keys)
865
+ right_keys = left_keys
828
866
  end
829
- left_keys = Array(left_keys).map(&:to_s)
830
- right_keys = Array(right_keys).map(&:to_s)
831
867
 
832
868
  case type
833
869
  when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
@@ -853,26 +889,36 @@ module RedAmber
853
889
 
854
890
  case type
855
891
  when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
856
- if joined_table.keys.uniq!
857
- DataFrame.create(rename_table(joined_table, n_keys, suffix))
858
- else
859
- DataFrame.create(joined_table)
860
- end
892
+ dataframe =
893
+ if joined_table.keys.uniq!
894
+ DataFrame.create(rename_table(joined_table, n_keys, suffix))
895
+ else
896
+ DataFrame.create(joined_table)
897
+ end
898
+ sorter =
899
+ case type
900
+ when :inner, :left_outer
901
+ [left_index, right_index]
902
+ when :left_semi, :left_anti
903
+ [left_index]
904
+ when :right_semi, :right_anti
905
+ [right_index]
906
+ end
861
907
  when :full_outer
908
+ key_index_lr =
909
+ left_keys.map { left_table.keys.index(_1) }
910
+ .zip(right_keys.map { left_table.keys.size + right_table.keys.index(_1) })
862
911
  renamed_table = rename_table(joined_table, n_keys, suffix)
863
- renamed_keys = renamed_table.keys
864
912
  dropper = []
865
- dataframe = DataFrame.create(renamed_table).assign do |df|
866
- left_keys.map do |left_key|
867
- i_left_key = renamed_keys.index(left_key)
868
- right_key = renamed_keys[i_left_key + table_keys.size]
869
- dropper << right_key
870
- [left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
913
+ dataframe =
914
+ DataFrame.create(renamed_table).assign do |df|
915
+ key_index_lr.map do |l, r|
916
+ dropper << df.keys[r]
917
+ [df.keys[l], merge_array(df.vectors[l].data, df.vectors[r].data)]
918
+ end
871
919
  end
872
- end
873
- dataframe = dataframe.sort(left_index, right_index) if force_order
874
-
875
- dataframe.drop(dropper, left_index, right_index)
920
+ dataframe = dataframe.drop(dropper)
921
+ sorter = [left_index, right_index]
876
922
  when :right_outer
877
923
  dataframe =
878
924
  if joined_table.keys.uniq!
@@ -880,20 +926,26 @@ module RedAmber
880
926
  else
881
927
  DataFrame.create(joined_table)
882
928
  end
883
- if force_order
884
- dataframe =
885
- dataframe
886
- .sort(left_index, right_index)
887
- .drop(left_index, right_index)
888
- end
889
- dataframe.pick do
890
- [right_keys, keys.map(&:to_s) - right_keys]
891
- end
929
+ dataframe = dataframe.pick(right_keys, dataframe.keys - right_keys)
930
+ sorter = [left_index, right_index]
931
+ end
932
+
933
+ if force_order
934
+ dataframe
935
+ .sort(sorter)
936
+ .drop(sorter)
937
+ else
938
+ dataframe
892
939
  end
893
940
  end
894
941
 
895
942
  private
896
943
 
944
+ # To ensure Array of Symbols
945
+ def ensure_keys(keys)
946
+ Array(keys).map(&:to_sym)
947
+ end
948
+
897
949
  # Rename duplicate keys by suffix
898
950
  def rename_table(joined_table, n_keys, suffix)
899
951
  joined_keys = joined_table.keys
@@ -903,17 +955,9 @@ module RedAmber
903
955
  renamed_right_keys =
904
956
  other_keys.map do |key|
905
957
  if dup_keys.include?(key)
906
- new_key = nil
907
- loop do
908
- new_key = "#{key}#{suffix}"
909
- break unless joined_keys.include?(new_key)
910
-
911
- s = suffix.succ
912
- raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
913
-
914
- suffix = s
915
- end
916
- new_key
958
+ suffixed = "#{key}#{suffix}".to_sym
959
+ # Find a key from suffixed.succ
960
+ (suffixed..).find { !joined_keys.include?(_1) }
917
961
  else
918
962
  key
919
963
  end