distclassipy 0.1.6a0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- distclassipy/__init__.py +13 -3
- distclassipy/classifier.py +387 -239
- distclassipy/distances.py +981 -905
- {distclassipy-0.1.6a0.dist-info → distclassipy-0.2.0.dist-info}/METADATA +14 -6
- distclassipy-0.2.0.dist-info/RECORD +8 -0
- {distclassipy-0.1.6a0.dist-info → distclassipy-0.2.0.dist-info}/WHEEL +1 -1
- distclassipy-0.1.6a0.dist-info/RECORD +0 -8
- {distclassipy-0.1.6a0.dist-info → distclassipy-0.2.0.dist-info}/LICENSE +0 -0
- {distclassipy-0.1.6a0.dist-info → distclassipy-0.2.0.dist-info}/top_level.txt +0 -0
distclassipy/distances.py
CHANGED
|
@@ -48,6 +48,52 @@ import numpy as np
|
|
|
48
48
|
|
|
49
49
|
import scipy
|
|
50
50
|
|
|
51
|
+
_ALL_METRICS = [
|
|
52
|
+
"euclidean",
|
|
53
|
+
"braycurtis",
|
|
54
|
+
"canberra",
|
|
55
|
+
"cityblock",
|
|
56
|
+
"chebyshev",
|
|
57
|
+
"clark",
|
|
58
|
+
"correlation",
|
|
59
|
+
"cosine",
|
|
60
|
+
"hellinger",
|
|
61
|
+
"jaccard",
|
|
62
|
+
"lorentzian",
|
|
63
|
+
"marylandbridge",
|
|
64
|
+
"meehl",
|
|
65
|
+
"motyka",
|
|
66
|
+
"soergel",
|
|
67
|
+
"wave_hedges",
|
|
68
|
+
"kulczynski",
|
|
69
|
+
"add_chisq",
|
|
70
|
+
"acc",
|
|
71
|
+
"chebyshev_min",
|
|
72
|
+
"czekanowski",
|
|
73
|
+
"dice",
|
|
74
|
+
"divergence",
|
|
75
|
+
"google",
|
|
76
|
+
"gower",
|
|
77
|
+
"jeffreys",
|
|
78
|
+
"jensenshannon_divergence",
|
|
79
|
+
"jensen_difference",
|
|
80
|
+
"kumarjohnson",
|
|
81
|
+
"matusita",
|
|
82
|
+
"minkowski",
|
|
83
|
+
"penroseshape",
|
|
84
|
+
"prob_chisq",
|
|
85
|
+
"ruzicka",
|
|
86
|
+
"sorensen",
|
|
87
|
+
"squared_chisq",
|
|
88
|
+
"squaredchord",
|
|
89
|
+
"squared_euclidean",
|
|
90
|
+
"taneja",
|
|
91
|
+
"tanimoto",
|
|
92
|
+
"topsoe",
|
|
93
|
+
"vicis_symmetric_chisq",
|
|
94
|
+
"vicis_wave_hedges",
|
|
95
|
+
]
|
|
96
|
+
|
|
51
97
|
|
|
52
98
|
class Distance:
|
|
53
99
|
"""A class to calculate various distance metrics between vectors.
|
|
@@ -352,7 +398,11 @@ class Distance:
|
|
|
352
398
|
1(4), 300-307.
|
|
353
399
|
"""
|
|
354
400
|
u, v = np.asarray(u), np.asarray(v)
|
|
355
|
-
|
|
401
|
+
# Clip negative values to zero for valid sqrt
|
|
402
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
403
|
+
u = np.clip(u, a_min=0, a_max=None)
|
|
404
|
+
v = np.clip(v, a_min=0, a_max=None)
|
|
405
|
+
return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
356
406
|
|
|
357
407
|
def jaccard(self, u, v):
|
|
358
408
|
"""Calculate the Jaccard distance between two vectors.
|
|
@@ -402,7 +452,8 @@ class Distance:
|
|
|
402
452
|
eschew the log of zero.
|
|
403
453
|
"""
|
|
404
454
|
u, v = np.asarray(u), np.asarray(v)
|
|
405
|
-
|
|
455
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
456
|
+
return np.sum(np.log(np.abs(u - v) + 1))
|
|
406
457
|
|
|
407
458
|
def marylandbridge(self, u, v):
|
|
408
459
|
"""Calculate the Maryland Bridge distance between two vectors.
|
|
@@ -578,907 +629,932 @@ class Distance:
|
|
|
578
629
|
with np.errstate(divide="ignore", invalid="ignore"):
|
|
579
630
|
return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
|
|
580
631
|
|
|
632
|
+
# NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
|
|
633
|
+
# CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
|
|
634
|
+
|
|
635
|
+
def acc(self, u, v):
|
|
636
|
+
"""Calculate the average of Cityblock and Chebyshev distance.
|
|
637
|
+
|
|
638
|
+
This function computes the ACC distance, also known as the
|
|
639
|
+
Average distance, between two vectors u and v. It is the average of the
|
|
640
|
+
Cityblock (or Manhattan) and Chebyshev distances.
|
|
641
|
+
|
|
642
|
+
Parameters
|
|
643
|
+
----------
|
|
644
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
645
|
+
|
|
646
|
+
Returns
|
|
647
|
+
-------
|
|
648
|
+
- The ACC distance between the two vectors.
|
|
649
|
+
|
|
650
|
+
References
|
|
651
|
+
----------
|
|
652
|
+
1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
|
|
653
|
+
Geometry. Dover Publications.
|
|
654
|
+
2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
655
|
+
Measures between Probability Density Functions. International
|
|
656
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
657
|
+
vol. 1(4), pp. 300-307.
|
|
658
|
+
"""
|
|
659
|
+
return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
|
|
660
|
+
|
|
661
|
+
# def bhattacharyya(self, u, v):
|
|
662
|
+
# """
|
|
663
|
+
# Calculate the Bhattacharyya distance between two vectors.
|
|
664
|
+
|
|
665
|
+
# Returns a distance value between 0 and 1.
|
|
666
|
+
|
|
667
|
+
# Parameters
|
|
668
|
+
# ----------
|
|
669
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
670
|
+
|
|
671
|
+
# Returns
|
|
672
|
+
# -------
|
|
673
|
+
# - The Bhattacharyya distance between the two vectors.
|
|
674
|
+
|
|
675
|
+
# References
|
|
676
|
+
# ----------
|
|
677
|
+
# 1. Bhattacharyya A (1947) On a measure of divergence between two
|
|
678
|
+
# statistical populations defined by probability distributions,
|
|
679
|
+
# Bull. Calcutta Math. Soc., 35, 99–109.
|
|
680
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
681
|
+
# Measures between Probability Density Functions. International
|
|
682
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
683
|
+
# 1(4), 300-307.
|
|
684
|
+
# 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
|
|
685
|
+
# """
|
|
686
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
687
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
688
|
+
# return -np.log(np.sum(np.sqrt(u * v)))
|
|
689
|
+
|
|
690
|
+
def chebyshev_min(self, u, v):
|
|
691
|
+
"""Calculate the minimum value distance between two vectors.
|
|
692
|
+
|
|
693
|
+
This measure represents a custom approach by Zielezinski to distance
|
|
694
|
+
measurement, focusing on the minimum absolute difference.
|
|
695
|
+
|
|
696
|
+
Parameters
|
|
697
|
+
----------
|
|
698
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
699
|
+
|
|
700
|
+
Returns
|
|
701
|
+
-------
|
|
702
|
+
- The minimum value distance between the two vectors.
|
|
703
|
+
"""
|
|
704
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
705
|
+
return np.amin(np.abs(u - v))
|
|
706
|
+
|
|
707
|
+
def czekanowski(self, u, v):
|
|
708
|
+
"""Calculate the Czekanowski distance between two vectors.
|
|
709
|
+
|
|
710
|
+
Parameters
|
|
711
|
+
----------
|
|
712
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
713
|
+
|
|
714
|
+
Returns
|
|
715
|
+
-------
|
|
716
|
+
- The Czekanowski distance between the two vectors.
|
|
717
|
+
|
|
718
|
+
References
|
|
719
|
+
----------
|
|
720
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
721
|
+
Measures between Probability Density Functions. International
|
|
722
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
723
|
+
1(4), 300-307.
|
|
724
|
+
"""
|
|
725
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
726
|
+
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
727
|
+
|
|
728
|
+
def dice(self, u, v):
|
|
729
|
+
"""Calculate the Dice dissimilarity between two vectors.
|
|
730
|
+
|
|
731
|
+
Synonyms:
|
|
732
|
+
Sorensen distance
|
|
733
|
+
|
|
734
|
+
Parameters
|
|
735
|
+
----------
|
|
736
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
737
|
+
|
|
738
|
+
Returns
|
|
739
|
+
-------
|
|
740
|
+
- The Dice dissimilarity between the two vectors.
|
|
741
|
+
|
|
742
|
+
References
|
|
743
|
+
----------
|
|
744
|
+
1. Dice LR (1945) Measures of the amount of ecologic association
|
|
745
|
+
between species. Ecology. 26, 297-302.
|
|
746
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
747
|
+
Measures between Probability Density Functions. International
|
|
748
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
749
|
+
1(4), 300-307.
|
|
750
|
+
"""
|
|
751
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
752
|
+
u_v = u - v
|
|
753
|
+
return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
|
|
754
|
+
|
|
755
|
+
def divergence(self, u, v):
|
|
756
|
+
"""Calculate the divergence between two vectors.
|
|
581
757
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
#
|
|
684
|
-
#
|
|
685
|
-
#
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
#
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
#
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
#
|
|
1158
|
-
#
|
|
1159
|
-
#
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
#
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
#
|
|
1166
|
-
#
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
#
|
|
1170
|
-
#
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
#
|
|
1174
|
-
#
|
|
1175
|
-
#
|
|
1176
|
-
#
|
|
1177
|
-
#
|
|
1178
|
-
|
|
1179
|
-
#
|
|
1180
|
-
#
|
|
1181
|
-
#
|
|
1182
|
-
#
|
|
1183
|
-
|
|
1184
|
-
#
|
|
1185
|
-
#
|
|
1186
|
-
#
|
|
1187
|
-
#
|
|
1188
|
-
|
|
1189
|
-
#
|
|
1190
|
-
#
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
#
|
|
1194
|
-
|
|
1195
|
-
#
|
|
1196
|
-
|
|
1197
|
-
#
|
|
1198
|
-
|
|
1199
|
-
#
|
|
1200
|
-
|
|
1201
|
-
#
|
|
1202
|
-
|
|
1203
|
-
#
|
|
1204
|
-
|
|
1205
|
-
#
|
|
1206
|
-
#
|
|
1207
|
-
#
|
|
1208
|
-
#
|
|
1209
|
-
#
|
|
1210
|
-
#
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
#
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
#
|
|
1217
|
-
#
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
#
|
|
1221
|
-
#
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
#
|
|
1225
|
-
#
|
|
1226
|
-
#
|
|
1227
|
-
#
|
|
1228
|
-
#
|
|
1229
|
-
#
|
|
1230
|
-
#
|
|
1231
|
-
#
|
|
1232
|
-
|
|
1233
|
-
#
|
|
1234
|
-
#
|
|
1235
|
-
|
|
1236
|
-
#
|
|
1237
|
-
#
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
#
|
|
1241
|
-
|
|
1242
|
-
#
|
|
1243
|
-
|
|
1244
|
-
#
|
|
1245
|
-
|
|
1246
|
-
#
|
|
1247
|
-
#
|
|
1248
|
-
#
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
#
|
|
1252
|
-
|
|
1253
|
-
#
|
|
1254
|
-
#
|
|
1255
|
-
|
|
1256
|
-
#
|
|
1257
|
-
#
|
|
1258
|
-
#
|
|
1259
|
-
|
|
1260
|
-
#
|
|
1261
|
-
#
|
|
1262
|
-
#
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
#
|
|
1266
|
-
#
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
#
|
|
1270
|
-
#
|
|
1271
|
-
|
|
1272
|
-
#
|
|
1273
|
-
|
|
1274
|
-
#
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
#
|
|
1278
|
-
#
|
|
1279
|
-
#
|
|
1280
|
-
|
|
1281
|
-
#
|
|
1282
|
-
#
|
|
1283
|
-
#
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
#
|
|
1287
|
-
#
|
|
1288
|
-
|
|
1289
|
-
#
|
|
1290
|
-
#
|
|
1291
|
-
#
|
|
1292
|
-
|
|
1293
|
-
#
|
|
1294
|
-
#
|
|
1295
|
-
#
|
|
1296
|
-
|
|
1297
|
-
#
|
|
1298
|
-
#
|
|
1299
|
-
|
|
1300
|
-
#
|
|
1301
|
-
#
|
|
1302
|
-
#
|
|
1303
|
-
|
|
1304
|
-
#
|
|
1305
|
-
#
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
#
|
|
1309
|
-
#
|
|
1310
|
-
|
|
1311
|
-
#
|
|
1312
|
-
#
|
|
1313
|
-
#
|
|
1314
|
-
|
|
1315
|
-
#
|
|
1316
|
-
#
|
|
1317
|
-
|
|
1318
|
-
#
|
|
1319
|
-
#
|
|
1320
|
-
#
|
|
1321
|
-
#
|
|
1322
|
-
#
|
|
1323
|
-
#
|
|
1324
|
-
|
|
1325
|
-
#
|
|
1326
|
-
#
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
#
|
|
1330
|
-
#
|
|
1331
|
-
|
|
1332
|
-
#
|
|
1333
|
-
#
|
|
1334
|
-
#
|
|
1335
|
-
|
|
1336
|
-
#
|
|
1337
|
-
#
|
|
1338
|
-
#
|
|
1339
|
-
|
|
1340
|
-
#
|
|
1341
|
-
#
|
|
1342
|
-
#
|
|
1343
|
-
#
|
|
1344
|
-
#
|
|
1345
|
-
|
|
1346
|
-
#
|
|
1347
|
-
#
|
|
1348
|
-
#
|
|
1349
|
-
#
|
|
1350
|
-
|
|
1351
|
-
#
|
|
1352
|
-
|
|
1353
|
-
#
|
|
1354
|
-
#
|
|
1355
|
-
|
|
1356
|
-
#
|
|
1357
|
-
#
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
#
|
|
1361
|
-
#
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
#
|
|
1365
|
-
#
|
|
1366
|
-
|
|
1367
|
-
#
|
|
1368
|
-
#
|
|
1369
|
-
#
|
|
1370
|
-
|
|
1371
|
-
#
|
|
1372
|
-
#
|
|
1373
|
-
#
|
|
1374
|
-
#
|
|
1375
|
-
#
|
|
1376
|
-
#
|
|
1377
|
-
|
|
1378
|
-
#
|
|
1379
|
-
|
|
1380
|
-
#
|
|
1381
|
-
#
|
|
1382
|
-
|
|
1383
|
-
#
|
|
1384
|
-
#
|
|
1385
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1386
|
-
|
|
1387
|
-
# Returns
|
|
1388
|
-
# -------
|
|
1389
|
-
# - The Tanimoto distance between the two vectors.
|
|
1390
|
-
|
|
1391
|
-
# References
|
|
1392
|
-
# ----------
|
|
1393
|
-
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1394
|
-
# Measures between Probability Density Functions. International
|
|
1395
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1396
|
-
# 1(4), 300-307.
|
|
1397
|
-
|
|
1398
|
-
# Notes
|
|
1399
|
-
# -----
|
|
1400
|
-
# Equals Soergel distance.
|
|
1401
|
-
# """
|
|
1402
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1403
|
-
# # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
1404
|
-
# usum = np.sum(u)
|
|
1405
|
-
# vsum = np.sum(v)
|
|
1406
|
-
# minsum = np.sum(np.minimum(u, v))
|
|
1407
|
-
# return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
|
|
1408
|
-
|
|
1409
|
-
# def topsoe(self, u, v):
|
|
1410
|
-
# """Calculate the Topsøe distance between two vectors.
|
|
1411
|
-
|
|
1412
|
-
# Parameters
|
|
1413
|
-
# ----------
|
|
1414
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1415
|
-
|
|
1416
|
-
# Returns
|
|
1417
|
-
# -------
|
|
1418
|
-
# - The Topsøe distance between the two vectors.
|
|
1419
|
-
|
|
1420
|
-
# References
|
|
1421
|
-
# ----------
|
|
1422
|
-
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1423
|
-
# Measures between Probability Density Functions. International
|
|
1424
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1425
|
-
# 1(4), 300-307.
|
|
1426
|
-
|
|
1427
|
-
# Notes
|
|
1428
|
-
# -----
|
|
1429
|
-
# Equals two times Jensen-Shannon divergence.
|
|
1430
|
-
# """
|
|
1431
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1432
|
-
# u = np.where(u == 0, self.epsilon, u)
|
|
1433
|
-
# v = np.where(v == 0, self.epsilon, v)
|
|
1434
|
-
# dl = u * np.log(2 * u / (u + v))
|
|
1435
|
-
# dr = v * np.log(2 * v / (u + v))
|
|
1436
|
-
# return np.sum(dl + dr)
|
|
1437
|
-
|
|
1438
|
-
# def vicis_symmetric_chisq(self, u, v):
|
|
1439
|
-
# """Calculate the Vicis Symmetric chi-square distance.
|
|
1440
|
-
|
|
1441
|
-
# Parameters
|
|
1442
|
-
# ----------
|
|
1443
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1444
|
-
|
|
1445
|
-
# Returns
|
|
1446
|
-
# -------
|
|
1447
|
-
# - The Vicis Symmetric chi-square distance between the two vectors.
|
|
1448
|
-
|
|
1449
|
-
# References
|
|
1450
|
-
# ----------
|
|
1451
|
-
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1452
|
-
# Measures between Probability Density Functions. International
|
|
1453
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1454
|
-
# 1(4), 300-307
|
|
1455
|
-
# """
|
|
1456
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1457
|
-
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1458
|
-
# u_v = (u - v) ** 2
|
|
1459
|
-
# uvmin = np.minimum(u, v) ** 2
|
|
1460
|
-
# return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1461
|
-
|
|
1462
|
-
# def vicis_wave_hedges(self, u, v):
|
|
1463
|
-
# """Calculate the Vicis-Wave Hedges distance between two vectors.
|
|
1464
|
-
|
|
1465
|
-
# Parameters
|
|
1466
|
-
# ----------
|
|
1467
|
-
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1468
|
-
|
|
1469
|
-
# Returns
|
|
1470
|
-
# -------
|
|
1471
|
-
# - The Vicis-Wave Hedges distance between the two vectors.
|
|
1472
|
-
|
|
1473
|
-
# References
|
|
1474
|
-
# ----------
|
|
1475
|
-
# 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1476
|
-
# Measures between Probability Density Functions. International
|
|
1477
|
-
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1478
|
-
# 1(4), 300-307.
|
|
1479
|
-
# """
|
|
1480
|
-
# u, v = np.asarray(u), np.asarray(v)
|
|
1481
|
-
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1482
|
-
# u_v = abs(u - v)
|
|
1483
|
-
# uvmin = np.minimum(u, v)
|
|
1484
|
-
# return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
758
|
+
Divergence equals squared Clark distance multiplied by 2.
|
|
759
|
+
|
|
760
|
+
Parameters
|
|
761
|
+
----------
|
|
762
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
763
|
+
|
|
764
|
+
Returns
|
|
765
|
+
-------
|
|
766
|
+
- The divergence between the two vectors.
|
|
767
|
+
|
|
768
|
+
References
|
|
769
|
+
----------
|
|
770
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
771
|
+
Measures between Probability Density Functions. International
|
|
772
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
773
|
+
1(4), 300-307.
|
|
774
|
+
"""
|
|
775
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
776
|
+
with np.errstate(invalid="ignore"):
|
|
777
|
+
return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
|
|
778
|
+
|
|
779
|
+
def google(self, u, v):
|
|
780
|
+
"""Calculate the Normalized Google Distance (NGD) between two vectors.
|
|
781
|
+
|
|
782
|
+
NGD is a measure of similarity derived from the number of hits returned by the
|
|
783
|
+
Google search engine for a given set of keywords.
|
|
784
|
+
|
|
785
|
+
Parameters
|
|
786
|
+
----------
|
|
787
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
788
|
+
|
|
789
|
+
Returns
|
|
790
|
+
-------
|
|
791
|
+
- The Normalized Google Distance between the two vectors.
|
|
792
|
+
|
|
793
|
+
Notes
|
|
794
|
+
-----
|
|
795
|
+
When used for comparing two probability density functions (pdfs),
|
|
796
|
+
Google distance equals half of Cityblock distance.
|
|
797
|
+
|
|
798
|
+
References
|
|
799
|
+
----------
|
|
800
|
+
1. Lee & Rashid (2008) Information Technology, ITSim 2008.
|
|
801
|
+
doi:10.1109/ITSIM.2008.4631601.
|
|
802
|
+
"""
|
|
803
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
804
|
+
x = float(np.sum(u))
|
|
805
|
+
y = float(np.sum(v))
|
|
806
|
+
summin = float(np.sum(np.minimum(u, v)))
|
|
807
|
+
return (max([x, y]) - summin) / ((x + y) - min([x, y]))
|
|
808
|
+
|
|
809
|
+
def gower(self, u, v):
|
|
810
|
+
"""Calculate the Gower distance between two vectors.
|
|
811
|
+
|
|
812
|
+
The Gower distance equals the Cityblock distance divided by the vector length.
|
|
813
|
+
|
|
814
|
+
Parameters
|
|
815
|
+
----------
|
|
816
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
817
|
+
|
|
818
|
+
Returns
|
|
819
|
+
-------
|
|
820
|
+
- The Gower distance between the two vectors.
|
|
821
|
+
|
|
822
|
+
References
|
|
823
|
+
----------
|
|
824
|
+
1. Gower JC. (1971) General Coefficient of Similarity
|
|
825
|
+
and Some of Its Properties, Biometrics 27, 857-874.
|
|
826
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
827
|
+
Measures between Probability Density Functions. International
|
|
828
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
829
|
+
1(4), 300-307.
|
|
830
|
+
"""
|
|
831
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
832
|
+
return np.sum(np.abs(u - v)) / u.size
|
|
833
|
+
|
|
834
|
+
def jeffreys(self, u, v):
|
|
835
|
+
"""Calculate the Jeffreys divergence between two vectors.
|
|
836
|
+
|
|
837
|
+
The Jeffreys divergence is a symmetric version of the Kullback-Leibler
|
|
838
|
+
divergence.
|
|
839
|
+
|
|
840
|
+
Parameters
|
|
841
|
+
----------
|
|
842
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
843
|
+
|
|
844
|
+
Returns
|
|
845
|
+
-------
|
|
846
|
+
- The Jeffreys divergence between the two vectors.
|
|
847
|
+
|
|
848
|
+
References
|
|
849
|
+
----------
|
|
850
|
+
1. Jeffreys H (1946) An Invariant Form for the Prior Probability
|
|
851
|
+
in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
|
|
852
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
853
|
+
Measures between Probability Density Functions. International
|
|
854
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
855
|
+
1(4), 300-307.
|
|
856
|
+
"""
|
|
857
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
858
|
+
# Add epsilon to zeros in vectors to avoid division
|
|
859
|
+
# by 0 and/or log of 0. Alternatively, zeros in the
|
|
860
|
+
# vectors could be ignored or masked (see below).
|
|
861
|
+
# u = ma.masked_where(u == 0, u)
|
|
862
|
+
# v = ma.masked_where(v == 0, u)
|
|
863
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
864
|
+
u[u == 0] = self.epsilon
|
|
865
|
+
v[v == 0] = self.epsilon
|
|
866
|
+
# Clip negative values to zero for valid log
|
|
867
|
+
udivv = np.clip(u / v, a_min=self.epsilon, a_max=None)
|
|
868
|
+
return np.sum((u - v) * np.log(udivv))
|
|
869
|
+
|
|
870
|
+
def jensenshannon_divergence(self, u, v):
|
|
871
|
+
"""Calculate the Jensen-Shannon divergence between two vectors.
|
|
872
|
+
|
|
873
|
+
The Jensen-Shannon divergence is a symmetric and finite measure of similarity
|
|
874
|
+
between two probability distributions.
|
|
875
|
+
|
|
876
|
+
Parameters
|
|
877
|
+
----------
|
|
878
|
+
- u, v: Input vectors between which the divergence is to be calculated.
|
|
879
|
+
|
|
880
|
+
Returns
|
|
881
|
+
-------
|
|
882
|
+
- The Jensen-Shannon divergence between the two vectors.
|
|
883
|
+
|
|
884
|
+
References
|
|
885
|
+
----------
|
|
886
|
+
1. Lin J. (1991) Divergence measures based on the Shannon entropy.
|
|
887
|
+
IEEE Transactions on Information Theory, 37(1):145–151.
|
|
888
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
889
|
+
Measures between Probability Density Functions. International
|
|
890
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
891
|
+
1(4), 300-307.
|
|
892
|
+
Comments:
|
|
893
|
+
Equals Jensen difference in Sung-Hyuk (2007):
|
|
894
|
+
u = np.where(u==0, self.epsilon, u)
|
|
895
|
+
v = np.where(v==0, self.epsilon, v)
|
|
896
|
+
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
897
|
+
el2 = (u + v)/2
|
|
898
|
+
el3 = np.log(el2)
|
|
899
|
+
return np.sum(el1 - el2 * el3)
|
|
900
|
+
"""
|
|
901
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
902
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
903
|
+
# Clip negative values to zero for valid log
|
|
904
|
+
u[u == 0] = self.epsilon
|
|
905
|
+
v[v == 0] = self.epsilon
|
|
906
|
+
|
|
907
|
+
term1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
|
|
908
|
+
term2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
|
|
909
|
+
|
|
910
|
+
dl = u * np.log(term1)
|
|
911
|
+
dr = v * np.log(term2)
|
|
912
|
+
return (np.sum(dl) + np.sum(dr)) / 2
|
|
913
|
+
|
|
914
|
+
def jensen_difference(self, u, v):
|
|
915
|
+
"""Calculate the Jensen difference between two vectors.
|
|
916
|
+
|
|
917
|
+
The Jensen difference is considered similar to the Jensen-Shannon divergence.
|
|
918
|
+
|
|
919
|
+
Parameters
|
|
920
|
+
----------
|
|
921
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
922
|
+
|
|
923
|
+
Returns
|
|
924
|
+
-------
|
|
925
|
+
- The Jensen difference between the two vectors.
|
|
926
|
+
|
|
927
|
+
Notes
|
|
928
|
+
-----
|
|
929
|
+
1. Equals half of Topsøe distance
|
|
930
|
+
2. Equals squared jensenshannon_distance.
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
References
|
|
934
|
+
----------
|
|
935
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
936
|
+
Measures between Probability Density Functions. International
|
|
937
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
938
|
+
1(4), 300-307.
|
|
939
|
+
"""
|
|
940
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
941
|
+
|
|
942
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
943
|
+
# Clip negative values to eps for valid log
|
|
944
|
+
u = np.clip(u, self.epsilon, None)
|
|
945
|
+
v = np.clip(v, self.epsilon, None)
|
|
946
|
+
el1 = (u * np.log(u) + v * np.log(v)) / 2
|
|
947
|
+
el2 = np.clip((u + v) / 2, a_min=self.epsilon, a_max=None)
|
|
948
|
+
return np.sum(el1 - el2 * np.log(el2))
|
|
949
|
+
|
|
950
|
+
def kumarjohnson(self, u, v):
|
|
951
|
+
"""Calculate the Kumar-Johnson distance between two vectors.
|
|
952
|
+
|
|
953
|
+
Parameters
|
|
954
|
+
----------
|
|
955
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
956
|
+
|
|
957
|
+
Returns
|
|
958
|
+
-------
|
|
959
|
+
- The Kumar-Johnson distance between the two vectors.
|
|
960
|
+
|
|
961
|
+
References
|
|
962
|
+
----------
|
|
963
|
+
1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
|
|
964
|
+
and information inequalities, Journal of Inequalities in pure
|
|
965
|
+
and applied Mathematics. 6(3).
|
|
966
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
967
|
+
Measures between Probability Density Functions. International
|
|
968
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
969
|
+
1(4):300-307.
|
|
970
|
+
"""
|
|
971
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
972
|
+
uvmult = u * v
|
|
973
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
974
|
+
numer = np.power(u**2 - v**2, 2)
|
|
975
|
+
denom = 2 * np.power(uvmult, 3 / 2)
|
|
976
|
+
return np.sum(np.where(uvmult != 0, numer / denom, 0))
|
|
977
|
+
|
|
978
|
+
def matusita(self, u, v):
|
|
979
|
+
"""Calculate the Matusita distance between two vectors.
|
|
980
|
+
|
|
981
|
+
Parameters
|
|
982
|
+
----------
|
|
983
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
984
|
+
|
|
985
|
+
Returns
|
|
986
|
+
-------
|
|
987
|
+
- The Matusita distance between the two vectors.
|
|
988
|
+
|
|
989
|
+
References
|
|
990
|
+
----------
|
|
991
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
992
|
+
Measures between Probability Density Functions. International
|
|
993
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
994
|
+
1(4):300-307.
|
|
995
|
+
|
|
996
|
+
Notes
|
|
997
|
+
-----
|
|
998
|
+
Equals square root of Squared-chord distance.
|
|
999
|
+
"""
|
|
1000
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1001
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1002
|
+
return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
|
|
1003
|
+
|
|
1004
|
+
def minkowski(self, u, v, p=2):
|
|
1005
|
+
"""Calculate the Minkowski distance between two vectors.
|
|
1006
|
+
|
|
1007
|
+
Parameters
|
|
1008
|
+
----------
|
|
1009
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1010
|
+
- p: The order of the norm of the difference.
|
|
1011
|
+
|
|
1012
|
+
Returns
|
|
1013
|
+
-------
|
|
1014
|
+
- The Minkowski distance between the two vectors.
|
|
1015
|
+
|
|
1016
|
+
Notes
|
|
1017
|
+
-----
|
|
1018
|
+
When p goes to infinite, the Chebyshev distance is derived.
|
|
1019
|
+
|
|
1020
|
+
References
|
|
1021
|
+
----------
|
|
1022
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1023
|
+
Measures between Probability Density Functions. International
|
|
1024
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1025
|
+
1(4):300-307.
|
|
1026
|
+
"""
|
|
1027
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1028
|
+
return np.linalg.norm(u - v, ord=p)
|
|
1029
|
+
|
|
1030
|
+
def penroseshape(self, u, v):
|
|
1031
|
+
"""Calculate the Penrose shape distance between two vectors.
|
|
1032
|
+
|
|
1033
|
+
Parameters
|
|
1034
|
+
----------
|
|
1035
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1036
|
+
|
|
1037
|
+
Returns
|
|
1038
|
+
-------
|
|
1039
|
+
- The Penrose shape distance between the two vectors.
|
|
1040
|
+
|
|
1041
|
+
References
|
|
1042
|
+
----------
|
|
1043
|
+
1. Deza M, Deza E (2009) Encyclopedia of Distances.
|
|
1044
|
+
Springer-Verlag Berlin Heidelberg. 1-590.
|
|
1045
|
+
"""
|
|
1046
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1047
|
+
umu = np.mean(u)
|
|
1048
|
+
vmu = np.mean(v)
|
|
1049
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1050
|
+
return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
|
|
1051
|
+
|
|
1052
|
+
def prob_chisq(self, u, v):
|
|
1053
|
+
"""Calculate the Probabilistic chi-square distance between two vectors.
|
|
1054
|
+
|
|
1055
|
+
Parameters
|
|
1056
|
+
----------
|
|
1057
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1058
|
+
|
|
1059
|
+
Returns
|
|
1060
|
+
-------
|
|
1061
|
+
- The Probabilistic chi-square distance between the two vectors.
|
|
1062
|
+
|
|
1063
|
+
Notes
|
|
1064
|
+
-----
|
|
1065
|
+
Added by SC.
|
|
1066
|
+
"""
|
|
1067
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1068
|
+
uvsum = u + v
|
|
1069
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1070
|
+
return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1071
|
+
|
|
1072
|
+
def ruzicka(self, u, v):
|
|
1073
|
+
"""Calculate the Ruzicka distance between two vectors.
|
|
1074
|
+
|
|
1075
|
+
Parameters
|
|
1076
|
+
----------
|
|
1077
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1078
|
+
|
|
1079
|
+
Returns
|
|
1080
|
+
-------
|
|
1081
|
+
- The Ruzicka distance between the two vectors.
|
|
1082
|
+
|
|
1083
|
+
Notes
|
|
1084
|
+
-----
|
|
1085
|
+
Added by SC.
|
|
1086
|
+
"""
|
|
1087
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1088
|
+
den = np.sum(np.maximum(u, v))
|
|
1089
|
+
|
|
1090
|
+
return 1 - np.sum(np.minimum(u, v)) / den
|
|
1091
|
+
|
|
1092
|
+
def sorensen(self, u, v):
|
|
1093
|
+
"""Calculate the Sorensen distance between two vectors.
|
|
1094
|
+
|
|
1095
|
+
Parameters
|
|
1096
|
+
----------
|
|
1097
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1098
|
+
|
|
1099
|
+
Returns
|
|
1100
|
+
-------
|
|
1101
|
+
- The Sorensen distance between the two vectors.
|
|
1102
|
+
|
|
1103
|
+
Notes
|
|
1104
|
+
-----
|
|
1105
|
+
The Sorensen distance equals the Manhattan distance divided by the sum of
|
|
1106
|
+
the two vectors.
|
|
1107
|
+
|
|
1108
|
+
Added by SC.
|
|
1109
|
+
"""
|
|
1110
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1111
|
+
return np.sum(np.abs(u - v)) / np.sum(u + v)
|
|
1112
|
+
|
|
1113
|
+
def squared_chisq(self, u, v):
|
|
1114
|
+
"""Calculate the Squared chi-square distance between two vectors.
|
|
1115
|
+
|
|
1116
|
+
Parameters
|
|
1117
|
+
----------
|
|
1118
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1119
|
+
|
|
1120
|
+
Returns
|
|
1121
|
+
-------
|
|
1122
|
+
- The Squared chi-square distance between the two vectors.
|
|
1123
|
+
|
|
1124
|
+
References
|
|
1125
|
+
----------
|
|
1126
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1127
|
+
Measures between Probability Density Functions. International
|
|
1128
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1129
|
+
1(4), 300-307.
|
|
1130
|
+
"""
|
|
1131
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1132
|
+
uvsum = u + v
|
|
1133
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1134
|
+
return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
|
|
1135
|
+
|
|
1136
|
+
def squaredchord(self, u, v):
|
|
1137
|
+
"""Calculate the Squared-chord distance between two vectors.
|
|
1138
|
+
|
|
1139
|
+
Parameters
|
|
1140
|
+
----------
|
|
1141
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1142
|
+
|
|
1143
|
+
Returns
|
|
1144
|
+
-------
|
|
1145
|
+
- The Squared-chord distance between the two vectors.
|
|
1146
|
+
|
|
1147
|
+
References
|
|
1148
|
+
----------
|
|
1149
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1150
|
+
distance metrics and analog assignments for pollen records.
|
|
1151
|
+
Quaternary Research 60:356–367.
|
|
1152
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1153
|
+
Measures between Probability Density Functions. International
|
|
1154
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1155
|
+
1(4), 300-307.
|
|
1156
|
+
|
|
1157
|
+
Notes
|
|
1158
|
+
-----
|
|
1159
|
+
Equals to squared Matusita distance.
|
|
1160
|
+
"""
|
|
1161
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1162
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1163
|
+
return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
|
|
1164
|
+
|
|
1165
|
+
def squared_euclidean(self, u, v):
|
|
1166
|
+
"""Calculate the Squared Euclidean distance between two vectors.
|
|
1167
|
+
|
|
1168
|
+
Parameters
|
|
1169
|
+
----------
|
|
1170
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1171
|
+
|
|
1172
|
+
Returns
|
|
1173
|
+
-------
|
|
1174
|
+
- The Squared Euclidean distance between the two vectors.
|
|
1175
|
+
|
|
1176
|
+
References
|
|
1177
|
+
----------
|
|
1178
|
+
1. Gavin DG et al. (2003) A statistical approach to evaluating
|
|
1179
|
+
distance metrics and analog assignments for pollen records.
|
|
1180
|
+
Quaternary Research 60:356–367.
|
|
1181
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1182
|
+
Measures between Probability Density Functions. International
|
|
1183
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1184
|
+
1(4), 300-307.
|
|
1185
|
+
|
|
1186
|
+
Notes
|
|
1187
|
+
-----
|
|
1188
|
+
Equals to squared Euclidean distance.
|
|
1189
|
+
"""
|
|
1190
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1191
|
+
return np.dot((u - v), (u - v))
|
|
1192
|
+
|
|
1193
|
+
def taneja(self, u, v):
|
|
1194
|
+
"""Calculate the Taneja distance between two vectors.
|
|
1195
|
+
|
|
1196
|
+
Parameters
|
|
1197
|
+
----------
|
|
1198
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1199
|
+
|
|
1200
|
+
Returns
|
|
1201
|
+
-------
|
|
1202
|
+
- The Taneja distance between the two vectors.
|
|
1203
|
+
|
|
1204
|
+
References
|
|
1205
|
+
----------
|
|
1206
|
+
1. Taneja IJ. (1995), New Developments in Generalized Information
|
|
1207
|
+
Measures, Chapter in: Advances in Imaging and Electron Physics,
|
|
1208
|
+
Ed. P.W. Hawkes, 91, 37-135.
|
|
1209
|
+
2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1210
|
+
Measures between Probability Density Functions. International
|
|
1211
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1212
|
+
1(4), 300-307.
|
|
1213
|
+
"""
|
|
1214
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1215
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1216
|
+
u[u == 0] = self.epsilon
|
|
1217
|
+
v[v == 0] = self.epsilon
|
|
1218
|
+
uvsum = u + v
|
|
1219
|
+
logarg = np.clip(
|
|
1220
|
+
uvsum / (2 * np.sqrt(u * v)), a_min=self.epsilon, a_max=None
|
|
1221
|
+
)
|
|
1222
|
+
return np.sum((uvsum / 2) * np.log(logarg))
|
|
1223
|
+
|
|
1224
|
+
def tanimoto(self, u, v):
|
|
1225
|
+
"""Calculate the Tanimoto distance between two vectors.
|
|
1226
|
+
|
|
1227
|
+
Parameters
|
|
1228
|
+
----------
|
|
1229
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1230
|
+
|
|
1231
|
+
Returns
|
|
1232
|
+
-------
|
|
1233
|
+
- The Tanimoto distance between the two vectors.
|
|
1234
|
+
|
|
1235
|
+
References
|
|
1236
|
+
----------
|
|
1237
|
+
1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1238
|
+
Measures between Probability Density Functions. International
|
|
1239
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1240
|
+
1(4), 300-307.
|
|
1241
|
+
|
|
1242
|
+
Notes
|
|
1243
|
+
-----
|
|
1244
|
+
Equals Soergel distance.
|
|
1245
|
+
"""
|
|
1246
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1247
|
+
# return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
|
|
1248
|
+
usum = np.sum(u)
|
|
1249
|
+
vsum = np.sum(v)
|
|
1250
|
+
minsum = np.sum(np.minimum(u, v))
|
|
1251
|
+
return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
|
|
1252
|
+
|
|
1253
|
+
def topsoe(self, u, v):
|
|
1254
|
+
"""Calculate the Topsøe distance between two vectors.
|
|
1255
|
+
|
|
1256
|
+
Parameters
|
|
1257
|
+
----------
|
|
1258
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1259
|
+
|
|
1260
|
+
Returns
|
|
1261
|
+
-------
|
|
1262
|
+
- The Topsøe distance between the two vectors.
|
|
1263
|
+
|
|
1264
|
+
References
|
|
1265
|
+
----------
|
|
1266
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1267
|
+
Measures between Probability Density Functions. International
|
|
1268
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1269
|
+
1(4), 300-307.
|
|
1270
|
+
|
|
1271
|
+
Notes
|
|
1272
|
+
-----
|
|
1273
|
+
Equals two times Jensen-Shannon divergence.
|
|
1274
|
+
"""
|
|
1275
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1276
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1277
|
+
u[u == 0] = self.epsilon
|
|
1278
|
+
v[v == 0] = self.epsilon
|
|
1279
|
+
logarg1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
|
|
1280
|
+
logarg2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
|
|
1281
|
+
dl = u * np.log(logarg1)
|
|
1282
|
+
dr = v * np.log(logarg2)
|
|
1283
|
+
return np.sum(dl + dr)
|
|
1284
|
+
|
|
1285
|
+
def vicis_symmetric_chisq(self, u, v):
|
|
1286
|
+
"""Calculate the Vicis Symmetric chi-square distance.
|
|
1287
|
+
|
|
1288
|
+
Parameters
|
|
1289
|
+
----------
|
|
1290
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1291
|
+
|
|
1292
|
+
Returns
|
|
1293
|
+
-------
|
|
1294
|
+
- The Vicis Symmetric chi-square distance between the two vectors.
|
|
1295
|
+
|
|
1296
|
+
References
|
|
1297
|
+
----------
|
|
1298
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1299
|
+
Measures between Probability Density Functions. International
|
|
1300
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1301
|
+
1(4), 300-307
|
|
1302
|
+
"""
|
|
1303
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1304
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1305
|
+
u_v = (u - v) ** 2
|
|
1306
|
+
uvmin = np.minimum(u, v) ** 2
|
|
1307
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1308
|
+
|
|
1309
|
+
def vicis_wave_hedges(self, u, v):
|
|
1310
|
+
"""Calculate the Vicis-Wave Hedges distance between two vectors.
|
|
1311
|
+
|
|
1312
|
+
Parameters
|
|
1313
|
+
----------
|
|
1314
|
+
- u, v: Input vectors between which the distance is to be calculated.
|
|
1315
|
+
|
|
1316
|
+
Returns
|
|
1317
|
+
-------
|
|
1318
|
+
- The Vicis-Wave Hedges distance between the two vectors.
|
|
1319
|
+
|
|
1320
|
+
References
|
|
1321
|
+
----------
|
|
1322
|
+
1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
|
|
1323
|
+
Measures between Probability Density Functions. International
|
|
1324
|
+
Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1325
|
+
1(4), 300-307.
|
|
1326
|
+
"""
|
|
1327
|
+
u, v = np.asarray(u), np.asarray(v)
|
|
1328
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
1329
|
+
u_v = abs(u - v)
|
|
1330
|
+
uvmin = np.minimum(u, v)
|
|
1331
|
+
return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
|
|
1332
|
+
|
|
1333
|
+
# def fidelity(self, u, v):
|
|
1334
|
+
# """
|
|
1335
|
+
# Calculate the fidelity distance between two vectors.
|
|
1336
|
+
|
|
1337
|
+
# The fidelity distance measures the similarity between two probability
|
|
1338
|
+
# distributions.
|
|
1339
|
+
|
|
1340
|
+
# Parameters
|
|
1341
|
+
# ----------
|
|
1342
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1343
|
+
|
|
1344
|
+
# Returns
|
|
1345
|
+
# -------
|
|
1346
|
+
# - The fidelity distance between the two vectors.
|
|
1347
|
+
|
|
1348
|
+
# Notes
|
|
1349
|
+
# -----
|
|
1350
|
+
# Added by SC.
|
|
1351
|
+
# """
|
|
1352
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1353
|
+
# return 1 - (np.sum(np.sqrt(u * v)))
|
|
1354
|
+
|
|
1355
|
+
# # NEEDS CHECKING
|
|
1356
|
+
# # def harmonicmean(self, u, v):
|
|
1357
|
+
# # """
|
|
1358
|
+
# # Harmonic mean distance.
|
|
1359
|
+
# # Notes:
|
|
1360
|
+
# # Added by SC.
|
|
1361
|
+
# # """
|
|
1362
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
1363
|
+
# # return 1 - 2.0 * np.sum(u * v / (u + v))
|
|
1364
|
+
|
|
1365
|
+
# # def inner(self, u, v):
|
|
1366
|
+
# # """
|
|
1367
|
+
# # Calculate the inner product distance between two vectors.
|
|
1368
|
+
|
|
1369
|
+
# # The inner product distance is a measure of
|
|
1370
|
+
# # similarity between two vectors,
|
|
1371
|
+
# # based on their inner product.
|
|
1372
|
+
|
|
1373
|
+
# # Parameters
|
|
1374
|
+
# # ----------
|
|
1375
|
+
# # - u, v: Input vectors between which the distance is to be calculated.
|
|
1376
|
+
|
|
1377
|
+
# # Returns
|
|
1378
|
+
# # -------
|
|
1379
|
+
# # - The inner product distance between the two vectors.
|
|
1380
|
+
|
|
1381
|
+
# # Notes
|
|
1382
|
+
# # -----
|
|
1383
|
+
# # Added by SC.
|
|
1384
|
+
# # """
|
|
1385
|
+
# # u, v = np.asarray(u), np.asarray(v)
|
|
1386
|
+
# # return 1 - np.dot(u, v)
|
|
1387
|
+
|
|
1388
|
+
# def k_divergence(self, u, v):
|
|
1389
|
+
# """Calculate the K divergence between two vectors.
|
|
1390
|
+
|
|
1391
|
+
# Parameters
|
|
1392
|
+
# ----------
|
|
1393
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1394
|
+
|
|
1395
|
+
# Returns
|
|
1396
|
+
# -------
|
|
1397
|
+
# - The K divergence between the two vectors.
|
|
1398
|
+
|
|
1399
|
+
# References
|
|
1400
|
+
# ----------
|
|
1401
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1402
|
+
# Measures between Probability Density Functions. International
|
|
1403
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1404
|
+
# 1(4), 300-307.
|
|
1405
|
+
# """
|
|
1406
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1407
|
+
# u[u == 0] = self.epsilon
|
|
1408
|
+
# v[v == 0] = self.epsilon
|
|
1409
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1410
|
+
# return np.sum(u * np.log(2 * u / (u + v)))
|
|
1411
|
+
|
|
1412
|
+
# def kl_divergence(self, u, v):
|
|
1413
|
+
# """Calculate the Kullback-Leibler divergence between two vectors.
|
|
1414
|
+
|
|
1415
|
+
# The Kullback-Leibler divergence measures the difference between two
|
|
1416
|
+
# probability distributions.
|
|
1417
|
+
|
|
1418
|
+
# Parameters
|
|
1419
|
+
# ----------
|
|
1420
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1421
|
+
|
|
1422
|
+
# Returns
|
|
1423
|
+
# -------
|
|
1424
|
+
# - The Kullback-Leibler divergence between the two vectors.
|
|
1425
|
+
|
|
1426
|
+
# References
|
|
1427
|
+
# ----------
|
|
1428
|
+
# 1. Kullback S, Leibler RA (1951) On information and sufficiency.
|
|
1429
|
+
# Ann. Math. Statist. 22:79–86
|
|
1430
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1431
|
+
# Measures between Probability Density Functions. International
|
|
1432
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1433
|
+
# 1(4):300-307.
|
|
1434
|
+
# """
|
|
1435
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1436
|
+
# u[u == 0] = self.epsilon
|
|
1437
|
+
# v[v == 0] = self.epsilon
|
|
1438
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1439
|
+
# return np.sum(u * np.log(u / v))
|
|
1440
|
+
|
|
1441
|
+
# def max_symmetric_chisq(self, u, v):
|
|
1442
|
+
# """Calculate the maximum symmetric chi-square distance.
|
|
1443
|
+
|
|
1444
|
+
# Parameters
|
|
1445
|
+
# ----------
|
|
1446
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1447
|
+
|
|
1448
|
+
# Returns
|
|
1449
|
+
# -------
|
|
1450
|
+
# - The maximum symmetric chi-square distance between the two vectors.
|
|
1451
|
+
|
|
1452
|
+
# References
|
|
1453
|
+
# ----------
|
|
1454
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1455
|
+
# Measures between Probability Density Functions. International
|
|
1456
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1457
|
+
# 1(4):300-307.
|
|
1458
|
+
# """
|
|
1459
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1460
|
+
# return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
1461
|
+
|
|
1462
|
+
# def min_symmetric_chisq(self, u, v):
|
|
1463
|
+
# """Calculate the minimum symmetric chi-square distance.
|
|
1464
|
+
|
|
1465
|
+
# Parameters
|
|
1466
|
+
# ----------
|
|
1467
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1468
|
+
|
|
1469
|
+
# Returns
|
|
1470
|
+
# -------
|
|
1471
|
+
# - The minimum symmetric chi-square distance between the two vectors.
|
|
1472
|
+
|
|
1473
|
+
# Notes
|
|
1474
|
+
# -----
|
|
1475
|
+
# Added by SC.
|
|
1476
|
+
# """
|
|
1477
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1478
|
+
# return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
|
|
1479
|
+
|
|
1480
|
+
# def neyman_chisq(self, u, v):
|
|
1481
|
+
# """Calculate the Neyman chi-square distance between two vectors.
|
|
1482
|
+
|
|
1483
|
+
# Parameters
|
|
1484
|
+
# ----------
|
|
1485
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1486
|
+
|
|
1487
|
+
# Returns
|
|
1488
|
+
# -------
|
|
1489
|
+
# - The Neyman chi-square distance between the two vectors.
|
|
1490
|
+
|
|
1491
|
+
# References
|
|
1492
|
+
# ----------
|
|
1493
|
+
# 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
|
|
1494
|
+
# In Proceedings of the First Berkley Symposium on Mathematical
|
|
1495
|
+
# Statistics and Probability.
|
|
1496
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1497
|
+
# Measures between Probability Density Functions. International
|
|
1498
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1499
|
+
# 1(4), 300-307.
|
|
1500
|
+
# """
|
|
1501
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1502
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1503
|
+
# return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
|
|
1504
|
+
|
|
1505
|
+
# def pearson_chisq(self, u, v):
|
|
1506
|
+
# """Calculate the Pearson chi-square divergence between two vectors.
|
|
1507
|
+
|
|
1508
|
+
# Parameters
|
|
1509
|
+
# ----------
|
|
1510
|
+
# - u, v: Input vectors between which the divergence is to be calculated.
|
|
1511
|
+
|
|
1512
|
+
# Returns
|
|
1513
|
+
# -------
|
|
1514
|
+
# - The Pearson chi-square divergence between the two vectors.
|
|
1515
|
+
|
|
1516
|
+
# References
|
|
1517
|
+
# ----------
|
|
1518
|
+
# 1. Pearson K. (1900) On the Criterion that a given system of
|
|
1519
|
+
# deviations from the probable in the case of correlated system
|
|
1520
|
+
# of variables is such that it can be reasonable supposed to have
|
|
1521
|
+
# arisen from random sampling, Phil. Mag. 50, 157-172.
|
|
1522
|
+
# 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1523
|
+
# Measures between Probability Density Functions. International
|
|
1524
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1525
|
+
# 1(4), 300-307.
|
|
1526
|
+
|
|
1527
|
+
# Notes
|
|
1528
|
+
# -----
|
|
1529
|
+
# Pearson chi-square divergence is asymmetric.
|
|
1530
|
+
# """
|
|
1531
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1532
|
+
# with np.errstate(divide="ignore", invalid="ignore"):
|
|
1533
|
+
# return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
|
|
1534
|
+
|
|
1535
|
+
# def nonintersection(self, u, v):
|
|
1536
|
+
# """
|
|
1537
|
+
# Calculate the Nonintersection distance between two vectors.
|
|
1538
|
+
|
|
1539
|
+
# Parameters
|
|
1540
|
+
# ----------
|
|
1541
|
+
# - u, v: Input vectors between which the distance is to be calculated.
|
|
1542
|
+
|
|
1543
|
+
# Returns
|
|
1544
|
+
# -------
|
|
1545
|
+
# - The Nonintersection distance between the two vectors.
|
|
1546
|
+
|
|
1547
|
+
# References
|
|
1548
|
+
# ----------
|
|
1549
|
+
# 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
|
|
1550
|
+
# Measures between Probability Density Functions. International
|
|
1551
|
+
# Journal of Mathematical Models and Methods in Applied Sciences.
|
|
1552
|
+
# 1(4), 300-307.
|
|
1553
|
+
|
|
1554
|
+
# Notes
|
|
1555
|
+
# -----
|
|
1556
|
+
# When used for comparing two probability density functions (pdfs),
|
|
1557
|
+
# Nonintersection distance equals half of Cityblock distance.
|
|
1558
|
+
# """
|
|
1559
|
+
# u, v = np.asarray(u), np.asarray(v)
|
|
1560
|
+
# return 1 - np.sum(np.minimum(u, v))
|