red_amber 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -31,6 +31,8 @@ module RedAmber
31
31
  # 2 3 C
32
32
  # 3 4 D
33
33
  #
34
+ # @since 0.2.3
35
+ #
34
36
  def concatenate(*other)
35
37
  case other
36
38
  in [] | [nil] | [[]]
@@ -79,6 +81,8 @@ module RedAmber
79
81
  # 0 1 3 A C
80
82
  # 1 2 4 B D
81
83
  #
84
+ # @since 0.2.3
85
+ #
82
86
  def merge(*other)
83
87
  case other
84
88
  in [] | [nil] | [[]]
@@ -121,7 +125,7 @@ module RedAmber
121
125
  # @param other [DataFrame, Arrow::Table]
122
126
  # A DataFrame or a Table to be joined with self.
123
127
  #
124
- # @!macro join_dorce_order
128
+ # @!macro join_force_order
125
129
  # @param force_order [Boolean]
126
130
  # wheather force order of the output always same.
127
131
  # - This option is used in `:full_outer` and `:right_outer`.
@@ -217,11 +221,12 @@ module RedAmber
217
221
  # - Same as `#join` with `type: :inner`
218
222
  # - A kind of mutating join.
219
223
  #
220
- # @overload inner_join(other, suffix: '.1')
224
+ # @overload inner_join(other, suffix: '.1', force_order: true)
221
225
  # If `join_key` is not specified, common keys in self and other are used
222
226
  # (natural keys). Returns joined dataframe.
223
227
  #
224
228
  # @macro join_before
229
+ # @macro join_force_order
225
230
  # @macro join_after
226
231
  # @macro join_common_example_1
227
232
  # @example without key (use implicit common key)
@@ -233,10 +238,11 @@ module RedAmber
233
238
  # 0 A 1 true
234
239
  # 1 B 2 false
235
240
  #
236
- # @overload inner_join(other, join_keys, suffix: '.1')
241
+ # @overload inner_join(other, join_keys, suffix: '.1', force_order: true)
237
242
  #
238
243
  # @macro join_before
239
244
  # @macro join_key_in_array
245
+ # @macro join_force_order
240
246
  # @macro join_after
241
247
  # @macro join_common_example_1
242
248
  # @example with a key
@@ -248,10 +254,11 @@ module RedAmber
248
254
  # 0 A 1 true
249
255
  # 1 B 2 false
250
256
  #
251
- # @overload inner_join(other, join_key_pairs, suffix: '.1')
257
+ # @overload inner_join(other, join_key_pairs, suffix: '.1', force_order: true)
252
258
  #
253
259
  # @macro join_before
254
260
  # @macro join_key_in_hash
261
+ # @macro join_force_order
255
262
  # @macro join_after
256
263
  # @macro join_common_example_2
257
264
  # @example with key pairs
@@ -263,8 +270,10 @@ module RedAmber
263
270
  # 0 A 1 true
264
271
  # 1 B 2 false
265
272
  #
266
- def inner_join(other, join_keys = nil, suffix: '.1')
267
- join(other, join_keys, type: :inner, suffix: suffix)
273
+ # @since 0.2.3
274
+ #
275
+ def inner_join(other, join_keys = nil, suffix: '.1', force_order: true)
276
+ join(other, join_keys, type: :inner, suffix: suffix, force_order: force_order)
268
277
  end
269
278
 
270
279
  # Join another DataFrame or Table, leaving all records.
@@ -276,7 +285,7 @@ module RedAmber
276
285
  # (natural keys). Returns joined dataframe.
277
286
  #
278
287
  # @macro join_before
279
- # @macro join_dorce_order
288
+ # @macro join_force_order
280
289
  # @macro join_after
281
290
  # @macro join_common_example_1
282
291
  # @example without key (use implicit common key)
@@ -294,7 +303,7 @@ module RedAmber
294
303
  #
295
304
  # @macro join_before
296
305
  # @macro join_key_in_array
297
- # @macro join_dorce_order
306
+ # @macro join_force_order
298
307
  # @macro join_after
299
308
  # @macro join_common_example_1
300
309
  # @example with a key
@@ -312,7 +321,7 @@ module RedAmber
312
321
  #
313
322
  # @macro join_before
314
323
  # @macro join_key_in_hash
315
- # @macro join_dorce_order
324
+ # @macro join_force_order
316
325
  # @macro join_after
317
326
  # @macro join_common_example_2
318
327
  # @example with key pairs
@@ -326,6 +335,8 @@ module RedAmber
326
335
  # 2 C 3 (nil)
327
336
  # 3 D (nil) (nil)
328
337
  #
338
+ # @since 0.2.3
339
+ #
329
340
  def full_join(other, join_keys = nil, suffix: '.1', force_order: true)
330
341
  join(other, join_keys,
331
342
  type: :full_outer, suffix: suffix, force_order: force_order)
@@ -337,11 +348,12 @@ module RedAmber
337
348
  # - Same as `#join` with `type: :left_outer`
338
349
  # - A kind of mutating join.
339
350
  #
340
- # @overload left_join(other, suffix: '.1')
351
+ # @overload left_join(other, suffix: '.1', force_order: true)
341
352
  # If `join_key` is not specified, common keys in self and other are used
342
353
  # (natural keys). Returns joined dataframe.
343
354
  #
344
355
  # @macro join_before
356
+ # @macro join_force_order
345
357
  # @macro join_after
346
358
  # @macro join_common_example_1
347
359
  # @example without key (use implicit common key)
@@ -354,10 +366,11 @@ module RedAmber
354
366
  # 1 B 2 false
355
367
  # 2 C 3 (nil)
356
368
  #
357
- # @overload left_join(other, join_keys, suffix: '.1')
369
+ # @overload left_join(other, join_keys, suffix: '.1', force_order: true)
358
370
  #
359
371
  # @macro join_before
360
372
  # @macro join_key_in_array
373
+ # @macro join_force_order
361
374
  # @macro join_after
362
375
  # @macro join_common_example_1
363
376
  # @example with a key
@@ -370,10 +383,11 @@ module RedAmber
370
383
  # 1 B 2 false
371
384
  # 2 C 3 (nil)
372
385
  #
373
- # @overload left_join(other, join_key_pairs, suffix: '.1')
386
+ # @overload left_join(other, join_key_pairs, suffix: '.1', force_order: true)
374
387
  #
375
388
  # @macro join_before
376
389
  # @macro join_key_in_hash
390
+ # @macro join_force_order
377
391
  # @macro join_after
378
392
  # @macro join_common_example_2
379
393
  # @example with key pairs
@@ -386,8 +400,10 @@ module RedAmber
386
400
  # 1 B 2 false
387
401
  # 2 C 3 (nil)
388
402
  #
389
- def left_join(other, join_keys = nil, suffix: '.1')
390
- join(other, join_keys, type: :left_outer, suffix: suffix)
403
+ # @since 0.2.3
404
+ #
405
+ def left_join(other, join_keys = nil, suffix: '.1', force_order: true)
406
+ join(other, join_keys, type: :left_outer, suffix: suffix, force_order: force_order)
391
407
  end
392
408
 
393
409
  # Join matching values from self to other.
@@ -399,7 +415,7 @@ module RedAmber
399
415
  # (natural keys). Returns joined dataframe.
400
416
  #
401
417
  # @macro join_before
402
- # @macro join_dorce_order
418
+ # @macro join_force_order
403
419
  # @macro join_after
404
420
  # @macro join_common_example_1
405
421
  # @example without key (use implicit common key)
@@ -416,7 +432,7 @@ module RedAmber
416
432
  #
417
433
  # @macro join_before
418
434
  # @macro join_key_in_array
419
- # @macro join_dorce_order
435
+ # @macro join_force_order
420
436
  # @macro join_after
421
437
  # @macro join_common_example_1
422
438
  # @example with a key
@@ -433,7 +449,7 @@ module RedAmber
433
449
  #
434
450
  # @macro join_before
435
451
  # @macro join_key_in_hash
436
- # @macro join_dorce_order
452
+ # @macro join_force_order
437
453
  # @macro join_after
438
454
  # @macro join_common_example_2
439
455
  # @example with key pairs
@@ -446,6 +462,8 @@ module RedAmber
446
462
  # 1 B 2 false
447
463
  # 2 D (nil) (nil)
448
464
  #
465
+ # @since 0.2.3
466
+ #
449
467
  def right_join(other, join_keys = nil, suffix: '.1', force_order: true)
450
468
  join(
451
469
  other,
@@ -462,11 +480,12 @@ module RedAmber
462
480
  # - Same as `#join` with `type: :left_semi`
463
481
  # - A kind of filtering join.
464
482
  #
465
- # @overload semi_join(other, suffix: '.1')
483
+ # @overload semi_join(other, suffix: '.1', force_order: true)
466
484
  # If `join_key` is not specified, common keys in self and other are used
467
485
  # (natural keys). Returns joined dataframe.
468
486
  #
469
487
  # @macro join_before
488
+ # @macro join_force_order
470
489
  # @macro join_after
471
490
  # @macro join_common_example_1
472
491
  # @example without key (use implicit common key)
@@ -478,10 +497,11 @@ module RedAmber
478
497
  # 0 A 1
479
498
  # 1 B 2
480
499
  #
481
- # @overload semi_join(other, join_keys, suffix: '.1')
500
+ # @overload semi_join(other, join_keys, suffix: '.1', force_order: true)
482
501
  #
483
502
  # @macro join_before
484
503
  # @macro join_key_in_array
504
+ # @macro join_force_order
485
505
  # @macro join_after
486
506
  # @macro join_common_example_1
487
507
  # @example with a key
@@ -493,10 +513,11 @@ module RedAmber
493
513
  # 0 A 1
494
514
  # 1 B 2
495
515
  #
496
- # @overload semi_join(other, join_key_pairs, suffix: '.1')
516
+ # @overload semi_join(other, join_key_pairs, suffix: '.1', force_order: true)
497
517
  #
498
518
  # @macro join_before
499
519
  # @macro join_key_in_hash
520
+ # @macro join_force_order
500
521
  # @macro join_after
501
522
  # @macro join_common_example_2
502
523
  # @example with key pairs
@@ -508,19 +529,22 @@ module RedAmber
508
529
  # 0 A 1
509
530
  # 1 B 2
510
531
  #
511
- def semi_join(other, join_keys = nil, suffix: '.1')
512
- join(other, join_keys, type: :left_semi, suffix: suffix)
532
+ # @since 0.2.3
533
+ #
534
+ def semi_join(other, join_keys = nil, suffix: '.1', force_order: true)
535
+ join(other, join_keys, type: :left_semi, suffix: suffix, force_order: force_order)
513
536
  end
514
537
 
515
538
  # Return records of self that do not have a match in other.
516
539
  # - Same as `#join` with `type: :left_anti`
517
540
  # - A kind of filtering join.
518
541
  #
519
- # @overload anti_join(other, suffix: '.1')
542
+ # @overload anti_join(other, suffix: '.1', force_order: true)
520
543
  # If `join_key` is not specified, common keys in self and other are used
521
544
  # (natural keys). Returns joined dataframe.
522
545
  #
523
546
  # @macro join_before
547
+ # @macro join_force_order
524
548
  # @macro join_after
525
549
  # @macro join_common_example_1
526
550
  # @example without key (use implicit common key)
@@ -531,10 +555,11 @@ module RedAmber
531
555
  # <string> <uint8>
532
556
  # 0 C 3
533
557
  #
534
- # @overload anti_join(other, join_keys, suffix: '.1')
558
+ # @overload anti_join(other, join_keys, suffix: '.1', force_order: true)
535
559
  #
536
560
  # @macro join_before
537
561
  # @macro join_key_in_array
562
+ # @macro join_force_order
538
563
  # @macro join_after
539
564
  # @macro join_common_example_1
540
565
  # @example with a key
@@ -545,10 +570,11 @@ module RedAmber
545
570
  # <string> <uint8>
546
571
  # 0 C 3
547
572
  #
548
- # @overload anti_join(other, join_key_pairs, suffix: '.1')
573
+ # @overload anti_join(other, join_key_pairs, suffix: '.1', force_order: true)
549
574
  #
550
575
  # @macro join_before
551
576
  # @macro join_key_in_hash
577
+ # @macro join_force_order
552
578
  # @macro join_after
553
579
  # @macro join_common_example_2
554
580
  # @example with key pairs
@@ -559,8 +585,10 @@ module RedAmber
559
585
  # <string> <uint8>
560
586
  # 0 C 3
561
587
  #
562
- def anti_join(other, join_keys = nil, suffix: '.1')
563
- join(other, join_keys, type: :left_anti, suffix: suffix)
588
+ # @since 0.2.3
589
+ #
590
+ def anti_join(other, join_keys = nil, suffix: '.1', force_order: true)
591
+ join(other, join_keys, type: :left_anti, suffix: suffix, force_order: force_order)
564
592
  end
565
593
 
566
594
  # Set operations (#intersect, #union, #difference, #set_operable?)
@@ -574,6 +602,8 @@ module RedAmber
574
602
  # @example
575
603
  # df3.set_operable?(other3) # => true
576
604
  #
605
+ # @since 0.2.3
606
+ #
577
607
  def set_operable?(other) # rubocop:disable Naming/AccessorMethodName
578
608
  keys == other.keys.map(&:to_sym)
579
609
  end
@@ -594,6 +624,8 @@ module RedAmber
594
624
  # <string> <uint8>
595
625
  # 0 A 1
596
626
  #
627
+ # @since 0.2.3
628
+ #
597
629
  def intersect(other)
598
630
  unless keys == other.keys.map(&:to_sym)
599
631
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -622,6 +654,8 @@ module RedAmber
622
654
  # 3 B 4
623
655
  # 4 D 5
624
656
  #
657
+ # @since 0.2.3
658
+ #
625
659
  def union(other)
626
660
  unless keys == other.keys.map(&:to_sym)
627
661
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -655,6 +689,8 @@ module RedAmber
655
689
  # 0 B 4
656
690
  # 1 D 5
657
691
  #
692
+ # @since 0.2.3
693
+ #
658
694
  def difference(other)
659
695
  unless keys == other.keys.map(&:to_sym)
660
696
  raise DataFrameArgumentError, 'keys are not same with self and other'
@@ -709,7 +745,7 @@ module RedAmber
709
745
  #
710
746
  # @macro join_before
711
747
  # @macro join_common_type
712
- # @macro join_dorce_order
748
+ # @macro join_force_order
713
749
  # @macro join_after
714
750
  # @macro join_common_example_1
715
751
  # @example
@@ -736,7 +772,7 @@ module RedAmber
736
772
  # @macro join_before
737
773
  # @macro join_key_in_array
738
774
  # @macro join_common_type
739
- # @macro join_dorce_order
775
+ # @macro join_force_order
740
776
  # @macro join_after
741
777
  # @macro join_common_example_3
742
778
  # @example join keys in an Array
@@ -761,7 +797,7 @@ module RedAmber
761
797
  # @macro join_before
762
798
  # @macro join_key_in_hash
763
799
  # @macro join_common_type
764
- # @macro join_dorce_order
800
+ # @macro join_force_order
765
801
  # @macro join_after
766
802
  # @macro join_common_example_4
767
803
  # @example without options
@@ -790,6 +826,8 @@ module RedAmber
790
826
  # 0 A D e
791
827
  # 1 B E E
792
828
  #
829
+ # @since 0.2.3
830
+ #
793
831
  def join(other, join_keys = nil, type: :inner, suffix: '.1', force_order: true)
794
832
  right_table =
795
833
  case other
@@ -804,7 +842,7 @@ module RedAmber
804
842
  type = type.to_sym
805
843
  left_index = :__LEFT_INDEX__
806
844
  right_index = :__RIGHT_INDEX__
807
- if force_order && %i[full_outer right_outer].include?(type)
845
+ if force_order
808
846
  left_table = assign(left_index) { indices }.table
809
847
  other = DataFrame.create(other) if other.is_a?(Arrow::Table)
810
848
  right_table = other.assign(right_index) { indices }.table
@@ -820,14 +858,12 @@ module RedAmber
820
858
 
821
859
  # This is not necessary if additional procedure is contributed to Red Arrow.
822
860
  if join_keys.is_a?(Hash)
823
- left_keys = join_keys[:left]
824
- right_keys = join_keys[:right]
861
+ left_keys = ensure_keys(join_keys[:left])
862
+ right_keys = ensure_keys(join_keys[:right])
825
863
  else
826
- left_keys = join_keys
827
- right_keys = join_keys
864
+ left_keys = ensure_keys(join_keys)
865
+ right_keys = left_keys
828
866
  end
829
- left_keys = Array(left_keys).map(&:to_s)
830
- right_keys = Array(right_keys).map(&:to_s)
831
867
 
832
868
  case type
833
869
  when :full_outer, :left_semi, :left_anti, :right_semi, :right_anti
@@ -853,26 +889,36 @@ module RedAmber
853
889
 
854
890
  case type
855
891
  when :inner, :left_outer, :left_semi, :left_anti, :right_semi, :right_anti
856
- if joined_table.keys.uniq!
857
- DataFrame.create(rename_table(joined_table, n_keys, suffix))
858
- else
859
- DataFrame.create(joined_table)
860
- end
892
+ dataframe =
893
+ if joined_table.keys.uniq!
894
+ DataFrame.create(rename_table(joined_table, n_keys, suffix))
895
+ else
896
+ DataFrame.create(joined_table)
897
+ end
898
+ sorter =
899
+ case type
900
+ when :inner, :left_outer
901
+ [left_index, right_index]
902
+ when :left_semi, :left_anti
903
+ [left_index]
904
+ when :right_semi, :right_anti
905
+ [right_index]
906
+ end
861
907
  when :full_outer
908
+ key_index_lr =
909
+ left_keys.map { left_table.keys.index(_1) }
910
+ .zip(right_keys.map { left_table.keys.size + right_table.keys.index(_1) })
862
911
  renamed_table = rename_table(joined_table, n_keys, suffix)
863
- renamed_keys = renamed_table.keys
864
912
  dropper = []
865
- dataframe = DataFrame.create(renamed_table).assign do |df|
866
- left_keys.map do |left_key|
867
- i_left_key = renamed_keys.index(left_key)
868
- right_key = renamed_keys[i_left_key + table_keys.size]
869
- dropper << right_key
870
- [left_key.to_sym, merge_array(df[left_key].data, df[right_key].data)]
913
+ dataframe =
914
+ DataFrame.create(renamed_table).assign do |df|
915
+ key_index_lr.map do |l, r|
916
+ dropper << df.keys[r]
917
+ [df.keys[l], merge_array(df.vectors[l].data, df.vectors[r].data)]
918
+ end
871
919
  end
872
- end
873
- dataframe = dataframe.sort(left_index, right_index) if force_order
874
-
875
- dataframe.drop(dropper, left_index, right_index)
920
+ dataframe = dataframe.drop(dropper)
921
+ sorter = [left_index, right_index]
876
922
  when :right_outer
877
923
  dataframe =
878
924
  if joined_table.keys.uniq!
@@ -880,20 +926,26 @@ module RedAmber
880
926
  else
881
927
  DataFrame.create(joined_table)
882
928
  end
883
- if force_order
884
- dataframe =
885
- dataframe
886
- .sort(left_index, right_index)
887
- .drop(left_index, right_index)
888
- end
889
- dataframe.pick do
890
- [right_keys, keys.map(&:to_s) - right_keys]
891
- end
929
+ dataframe = dataframe.pick(right_keys, dataframe.keys - right_keys)
930
+ sorter = [left_index, right_index]
931
+ end
932
+
933
+ if force_order
934
+ dataframe
935
+ .sort(sorter)
936
+ .drop(sorter)
937
+ else
938
+ dataframe
892
939
  end
893
940
  end
894
941
 
895
942
  private
896
943
 
944
+ # To ensure Array of Symbols
945
+ def ensure_keys(keys)
946
+ Array(keys).map(&:to_sym)
947
+ end
948
+
897
949
  # Rename duplicate keys by suffix
898
950
  def rename_table(joined_table, n_keys, suffix)
899
951
  joined_keys = joined_table.keys
@@ -903,17 +955,9 @@ module RedAmber
903
955
  renamed_right_keys =
904
956
  other_keys.map do |key|
905
957
  if dup_keys.include?(key)
906
- new_key = nil
907
- loop do
908
- new_key = "#{key}#{suffix}"
909
- break unless joined_keys.include?(new_key)
910
-
911
- s = suffix.succ
912
- raise DataFrameArgumentError, "suffix #{suffix} is invalid" if s == suffix
913
-
914
- suffix = s
915
- end
916
- new_key
958
+ suffixed = "#{key}#{suffix}".to_sym
959
+ # Find a key from suffixed.succ
960
+ (suffixed..).find { !joined_keys.include?(_1) }
917
961
  else
918
962
  key
919
963
  end