distclassipy 0.1.5__py3-none-any.whl → 0.2.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -578,907 +578,907 @@ class Distance:
578
578
  with np.errstate(divide="ignore", invalid="ignore"):
579
579
  return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
580
580
 
581
+ # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
582
+ # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
581
583
 
582
- # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
583
- # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
584
-
585
- # def acc(self, u, v):
586
- # """Calculate the average of Cityblock and Chebyshev distance.
587
-
588
- # This function computes the ACC distance, also known as the
589
- # Average distance, between two vectors u and v. It is the average of the
590
- # Cityblock (or Manhattan) and Chebyshev distances.
591
-
592
- # Parameters
593
- # ----------
594
- # - u, v: Input vectors between which the distance is to be calculated.
595
-
596
- # Returns
597
- # -------
598
- # - The ACC distance between the two vectors.
599
-
600
- # References
601
- # ----------
602
- # 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
603
- # Geometry. Dover Publications.
604
- # 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
605
- # Measures between Probability Density Functions. International
606
- # Journal of Mathematical Models and Methods in Applied Sciences.
607
- # vol. 1(4), pp. 300-307.
608
- # """
609
- # return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
610
-
611
- # # def bhattacharyya(self, u, v):
612
- # # """
613
- # # Calculate the Bhattacharyya distance between two vectors.
614
-
615
- # # Returns a distance value between 0 and 1.
616
-
617
- # # Parameters
618
- # # ----------
619
- # # - u, v: Input vectors between which the distance is to be calculated.
620
-
621
- # # Returns
622
- # # -------
623
- # # - The Bhattacharyya distance between the two vectors.
624
-
625
- # # References
626
- # # ----------
627
- # # 1. Bhattacharyya A (1947) On a measure of divergence between two
628
- # # statistical populations defined by probability distributions,
629
- # # Bull. Calcutta Math. Soc., 35, 99–109.
630
- # # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
631
- # # Measures between Probability Density Functions. International
632
- # # Journal of Mathematical Models and Methods in Applied Sciences.
633
- # # 1(4), 300-307.
634
- # # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
635
- # # """
636
- # # u, v = np.asarray(u), np.asarray(v)
637
- # # return -np.log(np.sum(np.sqrt(u * v)))
638
-
639
- # def chebyshev_min(self, u, v):
640
- # """Calculate the minimum value distance between two vectors.
641
-
642
- # This measure represents a custom approach by Zielezinski to distance
643
- # measurement, focusing on the minimum absolute difference.
644
-
645
- # Parameters
646
- # ----------
647
- # - u, v: Input vectors between which the distance is to be calculated.
648
-
649
- # Returns
650
- # -------
651
- # - The minimum value distance between the two vectors.
652
- # """
653
- # u, v = np.asarray(u), np.asarray(v)
654
- # return np.amin(np.abs(u - v))
655
-
656
- # def czekanowski(self, u, v):
657
- # """Calculate the Czekanowski distance between two vectors.
658
-
659
- # Parameters
660
- # ----------
661
- # - u, v: Input vectors between which the distance is to be calculated.
662
-
663
- # Returns
664
- # -------
665
- # - The Czekanowski distance between the two vectors.
666
-
667
- # References
668
- # ----------
669
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
670
- # Measures between Probability Density Functions. International
671
- # Journal of Mathematical Models and Methods in Applied Sciences.
672
- # 1(4), 300-307.
673
- # """
674
- # u, v = np.asarray(u), np.asarray(v)
675
- # return np.sum(np.abs(u - v)) / np.sum(u + v)
676
-
677
- # def dice(self, u, v):
678
- # """Calculate the Dice dissimilarity between two vectors.
679
-
680
- # Synonyms:
681
- # Sorensen distance
682
-
683
- # Parameters
684
- # ----------
685
- # - u, v: Input vectors between which the distance is to be calculated.
686
-
687
- # Returns
688
- # -------
689
- # - The Dice dissimilarity between the two vectors.
690
-
691
- # References
692
- # ----------
693
- # 1. Dice LR (1945) Measures of the amount of ecologic association
694
- # between species. Ecology. 26, 297-302.
695
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
696
- # Measures between Probability Density Functions. International
697
- # Journal of Mathematical Models and Methods in Applied Sciences.
698
- # 1(4), 300-307.
699
- # """
700
- # u, v = np.asarray(u), np.asarray(v)
701
- # u_v = u - v
702
- # return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
703
-
704
- # def divergence(self, u, v):
705
- # """Calculate the divergence between two vectors.
706
-
707
- # Divergence equals squared Clark distance multiplied by 2.
708
-
709
- # Parameters
710
- # ----------
711
- # - u, v: Input vectors between which the distance is to be calculated.
712
-
713
- # Returns
714
- # -------
715
- # - The divergence between the two vectors.
716
-
717
- # References
718
- # ----------
719
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
720
- # Measures between Probability Density Functions. International
721
- # Journal of Mathematical Models and Methods in Applied Sciences.
722
- # 1(4), 300-307.
723
- # """
724
- # u, v = np.asarray(u), np.asarray(v)
725
- # with np.errstate(invalid="ignore"):
726
- # return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
727
-
728
- # # def fidelity(self, u, v):
729
- # # """
730
- # # Calculate the fidelity distance between two vectors.
731
-
732
- # # The fidelity distance measures the similarity between two probability
733
- # # distributions.
734
-
735
- # # Parameters
736
- # # ----------
737
- # # - u, v: Input vectors between which the distance is to be calculated.
738
-
739
- # # Returns
740
- # # -------
741
- # # - The fidelity distance between the two vectors.
742
-
743
- # # Notes
744
- # # -----
745
- # # Added by SC.
746
- # # """
747
- # # u, v = np.asarray(u), np.asarray(v)
748
- # # return 1 - (np.sum(np.sqrt(u * v)))
749
-
750
- # def google(self, u, v):
751
- # """Calculate the Normalized Google Distance (NGD) between two vectors.
752
-
753
- # NGD is a measure of similarity derived from the number of hits returned by the
754
- # Google search engine for a given set of keywords.
755
-
756
- # Parameters
757
- # ----------
758
- # - u, v: Input vectors between which the distance is to be calculated.
759
-
760
- # Returns
761
- # -------
762
- # - The Normalized Google Distance between the two vectors.
763
-
764
- # Notes
765
- # -----
766
- # When used for comparing two probability density functions (pdfs),
767
- # Google distance equals half of Cityblock distance.
768
-
769
- # References
770
- # ----------
771
- # 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
772
- # doi:10.1109/ITSIM.2008.4631601.
773
- # """
774
- # u, v = np.asarray(u), np.asarray(v)
775
- # x = float(np.sum(u))
776
- # y = float(np.sum(v))
777
- # summin = float(np.sum(np.minimum(u, v)))
778
- # return (max([x, y]) - summin) / ((x + y) - min([x, y]))
779
-
780
- # def gower(self, u, v):
781
- # """Calculate the Gower distance between two vectors.
782
-
783
- # The Gower distance equals the Cityblock distance divided by the vector length.
784
-
785
- # Parameters
786
- # ----------
787
- # - u, v: Input vectors between which the distance is to be calculated.
788
-
789
- # Returns
790
- # -------
791
- # - The Gower distance between the two vectors.
792
-
793
- # References
794
- # ----------
795
- # 1. Gower JC. (1971) General Coefficient of Similarity
796
- # and Some of Its Properties, Biometrics 27, 857-874.
797
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
798
- # Measures between Probability Density Functions. International
799
- # Journal of Mathematical Models and Methods in Applied Sciences.
800
- # 1(4), 300-307.
801
- # """
802
- # u, v = np.asarray(u), np.asarray(v)
803
- # return np.sum(np.abs(u - v)) / u.size
804
-
805
- # # NEEDS CHECKING
806
- # # def harmonicmean(self, u, v):
807
- # # """
808
- # # Harmonic mean distance.
809
- # # Notes:
810
- # # Added by SC.
811
- # # """
812
- # # u,v = np.asarray(u), np.asarray(v)
813
- # # return 1 - 2.*np.sum(u*v/(u+v))
814
-
815
- # # def inner(self, u, v):
816
- # # """
817
- # # Calculate the inner product distance between two vectors.
818
-
819
- # # The inner product distance is a measure of similarity between two vectors,
820
- # # based on their inner product.
821
-
822
- # # Parameters
823
- # # ----------
824
- # # - u, v: Input vectors between which the distance is to be calculated.
825
-
826
- # # Returns
827
- # # -------
828
- # # - The inner product distance between the two vectors.
829
-
830
- # # Notes
831
- # # -----
832
- # # Added by SC.
833
- # # """
834
- # # u, v = np.asarray(u), np.asarray(v)
835
- # # return 1 - np.dot(u, v)
836
-
837
- # def jeffreys(self, u, v):
838
- # """Calculate the Jeffreys divergence between two vectors.
839
-
840
- # The Jeffreys divergence is a symmetric version of the Kullback-Leibler
841
- # divergence.
842
-
843
- # Parameters
844
- # ----------
845
- # - u, v: Input vectors between which the divergence is to be calculated.
846
-
847
- # Returns
848
- # -------
849
- # - The Jeffreys divergence between the two vectors.
850
-
851
- # References
852
- # ----------
853
- # 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
854
- # in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
855
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
856
- # Measures between Probability Density Functions. International
857
- # Journal of Mathematical Models and Methods in Applied Sciences.
858
- # 1(4), 300-307.
859
- # """
860
- # u, v = np.asarray(u), np.asarray(v)
861
- # # Add epsilon to zeros in vectors to avoid division
862
- # # by 0 and/or log of 0. Alternatively, zeros in the
863
- # # vectors could be ignored or masked (see below).
864
- # # u = ma.masked_where(u == 0, u)
865
- # # v = ma.masked_where(v == 0, u)
866
- # u = np.where(u == 0, self.epsilon, u)
867
- # v = np.where(v == 0, self.epsilon, v)
868
- # return np.sum((u - v) * np.log(u / v))
869
-
870
- # def jensenshannon_divergence(self, u, v):
871
- # """Calculate the Jensen-Shannon divergence between two vectors.
872
-
873
- # The Jensen-Shannon divergence is a symmetric and finite measure of similarity
874
- # between two probability distributions.
875
-
876
- # Parameters
877
- # ----------
878
- # - u, v: Input vectors between which the divergence is to be calculated.
879
-
880
- # Returns
881
- # -------
882
- # - The Jensen-Shannon divergence between the two vectors.
883
-
884
- # References
885
- # ----------
886
- # 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
887
- # IEEE Transactions on Information Theory, 37(1):145–151.
888
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
889
- # Measures between Probability Density Functions. International
890
- # Journal of Mathematical Models and Methods in Applied Sciences.
891
- # 1(4), 300-307.
892
- # Comments:
893
- # Equals Jensen difference in Sung-Hyuk (2007):
894
- # u = np.where(u==0, self.epsilon, u)
895
- # v = np.where(v==0, self.epsilon, v)
896
- # el1 = (u * np.log(u) + v * np.log(v)) / 2
897
- # el2 = (u + v)/2
898
- # el3 = np.log(el2)
899
- # return np.sum(el1 - el2 * el3)
900
- # """
901
- # u, v = np.asarray(u), np.asarray(v)
902
- # u = np.where(u == 0, self.epsilon, u)
903
- # v = np.where(v == 0, self.epsilon, v)
904
- # dl = u * np.log(2 * u / (u + v))
905
- # dr = v * np.log(2 * v / (u + v))
906
- # return (np.sum(dl) + np.sum(dr)) / 2
907
-
908
- # def jensen_difference(self, u, v):
909
- # """Calculate the Jensen difference between two vectors.
910
-
911
- # The Jensen difference is considered similar to the Jensen-Shannon divergence.
912
-
913
- # Parameters
914
- # ----------
915
- # - u, v: Input vectors between which the distance is to be calculated.
916
-
917
- # Returns
918
- # -------
919
- # - The Jensen difference between the two vectors.
920
-
921
- # Notes
922
- # -----
923
- # 1. Equals half of Topsøe distance
924
- # 2. Equals squared jensenshannon_distance.
925
-
926
-
927
- # References
928
- # ----------
929
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
930
- # Measures between Probability Density Functions. International
931
- # Journal of Mathematical Models and Methods in Applied Sciences.
932
- # 1(4), 300-307.
933
- # """
934
- # u, v = np.asarray(u), np.asarray(v)
935
- # u = np.where(u == 0, self.epsilon, u)
936
- # v = np.where(v == 0, self.epsilon, v)
937
- # el1 = (u * np.log(u) + v * np.log(v)) / 2
938
- # el2 = (u + v) / 2
939
- # return np.sum(el1 - el2 * np.log(el2))
940
-
941
- # def k_divergence(self, u, v):
942
- # """Calculate the K divergence between two vectors.
943
-
944
- # Parameters
945
- # ----------
946
- # - u, v: Input vectors between which the divergence is to be calculated.
947
-
948
- # Returns
949
- # -------
950
- # - The K divergence between the two vectors.
951
-
952
- # References
953
- # ----------
954
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
955
- # Measures between Probability Density Functions. International
956
- # Journal of Mathematical Models and Methods in Applied Sciences.
957
- # 1(4), 300-307.
958
- # """
959
- # u, v = np.asarray(u), np.asarray(v)
960
- # u = np.where(u == 0, self.epsilon, u)
961
- # v = np.where(v == 0, self.epsilon, v)
962
- # return np.sum(u * np.log(2 * u / (u + v)))
963
-
964
- # def kl_divergence(self, u, v):
965
- # """Calculate the Kullback-Leibler divergence between two vectors.
966
-
967
- # The Kullback-Leibler divergence measures the difference between two
968
- # probability distributions.
969
-
970
- # Parameters
971
- # ----------
972
- # - u, v: Input vectors between which the divergence is to be calculated.
973
-
974
- # Returns
975
- # -------
976
- # - The Kullback-Leibler divergence between the two vectors.
977
-
978
- # References
979
- # ----------
980
- # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
981
- # Ann. Math. Statist. 22:79–86
982
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
983
- # Measures between Probability Density Functions. International
984
- # Journal of Mathematical Models and Methods in Applied Sciences.
985
- # 1(4):300-307.
986
- # """
987
- # u, v = np.asarray(u), np.asarray(v)
988
- # u = np.where(u == 0, self.epsilon, u)
989
- # v = np.where(v == 0, self.epsilon, v)
990
- # return np.sum(u * np.log(u / v))
991
-
992
- # def kumarjohnson(self, u, v):
993
- # """Calculate the Kumar-Johnson distance between two vectors.
994
-
995
- # Parameters
996
- # ----------
997
- # - u, v: Input vectors between which the distance is to be calculated.
998
-
999
- # Returns
1000
- # -------
1001
- # - The Kumar-Johnson distance between the two vectors.
1002
-
1003
- # References
1004
- # ----------
1005
- # 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
1006
- # and information inequalities, Journal of Inequalities in pure
1007
- # and applied Mathematics. 6(3).
1008
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1009
- # Measures between Probability Density Functions. International
1010
- # Journal of Mathematical Models and Methods in Applied Sciences.
1011
- # 1(4):300-307.
1012
- # """
1013
- # u, v = np.asarray(u), np.asarray(v)
1014
- # uvmult = u * v
1015
- # with np.errstate(divide="ignore", invalid="ignore"):
1016
- # numer = np.power(u**2 - v**2, 2)
1017
- # denom = 2 * np.power(uvmult, 3 / 2)
1018
- # return np.sum(np.where(uvmult != 0, numer / denom, 0))
1019
-
1020
- # def matusita(self, u, v):
1021
- # """Calculate the Matusita distance between two vectors.
1022
-
1023
- # Parameters
1024
- # ----------
1025
- # - u, v: Input vectors between which the distance is to be calculated.
1026
-
1027
- # Returns
1028
- # -------
1029
- # - The Matusita distance between the two vectors.
1030
-
1031
- # References
1032
- # ----------
1033
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1034
- # Measures between Probability Density Functions. International
1035
- # Journal of Mathematical Models and Methods in Applied Sciences.
1036
- # 1(4):300-307.
1037
-
1038
- # Notes
1039
- # -----
1040
- # Equals square root of Squared-chord distance.
1041
- # """
1042
- # u, v = np.asarray(u), np.asarray(v)
1043
- # return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
1044
-
1045
- # def max_symmetric_chisq(self, u, v):
1046
- # """Calculate the maximum symmetric chi-square distance.
1047
-
1048
- # Parameters
1049
- # ----------
1050
- # - u, v: Input vectors between which the distance is to be calculated.
1051
-
1052
- # Returns
1053
- # -------
1054
- # - The maximum symmetric chi-square distance between the two vectors.
1055
-
1056
- # References
1057
- # ----------
1058
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1059
- # Measures between Probability Density Functions. International
1060
- # Journal of Mathematical Models and Methods in Applied Sciences.
1061
- # 1(4):300-307.
1062
- # """
1063
- # u, v = np.asarray(u), np.asarray(v)
1064
- # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1065
-
1066
- # def min_symmetric_chisq(self, u, v):
1067
- # """Calculate the minimum symmetric chi-square distance.
1068
-
1069
- # Parameters
1070
- # ----------
1071
- # - u, v: Input vectors between which the distance is to be calculated.
1072
-
1073
- # Returns
1074
- # -------
1075
- # - The minimum symmetric chi-square distance between the two vectors.
1076
-
1077
- # Notes
1078
- # -----
1079
- # Added by SC.
1080
- # """
1081
- # u, v = np.asarray(u), np.asarray(v)
1082
- # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1083
-
1084
- # def minkowski(self, u, v, p=2):
1085
- # """Calculate the Minkowski distance between two vectors.
1086
-
1087
- # Parameters
1088
- # ----------
1089
- # - u, v: Input vectors between which the distance is to be calculated.
1090
- # - p: The order of the norm of the difference.
1091
-
1092
- # Returns
1093
- # -------
1094
- # - The Minkowski distance between the two vectors.
1095
-
1096
- # Notes
1097
- # -----
1098
- # When p goes to infinite, the Chebyshev distance is derived.
1099
-
1100
- # References
1101
- # ----------
1102
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1103
- # Measures between Probability Density Functions. International
1104
- # Journal of Mathematical Models and Methods in Applied Sciences.
1105
- # 1(4):300-307.
1106
- # """
1107
- # u, v = np.asarray(u), np.asarray(v)
1108
- # return np.linalg.norm(u - v, ord=p)
1109
-
1110
- # def neyman_chisq(self, u, v):
1111
- # """Calculate the Neyman chi-square distance between two vectors.
1112
-
1113
- # Parameters
1114
- # ----------
1115
- # - u, v: Input vectors between which the distance is to be calculated.
1116
-
1117
- # Returns
1118
- # -------
1119
- # - The Neyman chi-square distance between the two vectors.
1120
-
1121
- # References
1122
- # ----------
1123
- # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1124
- # In Proceedings of the First Berkley Symposium on Mathematical
1125
- # Statistics and Probability.
1126
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1127
- # Measures between Probability Density Functions. International
1128
- # Journal of Mathematical Models and Methods in Applied Sciences.
1129
- # 1(4), 300-307.
1130
- # """
1131
- # u, v = np.asarray(u), np.asarray(v)
1132
- # with np.errstate(divide="ignore", invalid="ignore"):
1133
- # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1134
-
1135
- # # def nonintersection(self, u, v):
1136
- # # """
1137
- # # Calculate the Nonintersection distance between two vectors.
1138
-
1139
- # # Parameters
1140
- # # ----------
1141
- # # - u, v: Input vectors between which the distance is to be calculated.
1142
-
1143
- # # Returns
1144
- # # -------
1145
- # # - The Nonintersection distance between the two vectors.
1146
-
1147
- # # References
1148
- # # ----------
1149
- # # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1150
- # # Measures between Probability Density Functions. International
1151
- # # Journal of Mathematical Models and Methods in Applied Sciences.
1152
- # # 1(4), 300-307.
1153
-
1154
- # # Notes
1155
- # # -----
1156
- # # When used for comparing two probability density functions (pdfs),
1157
- # # Nonintersection distance equals half of Cityblock distance.
1158
- # # """
1159
- # # u, v = np.asarray(u), np.asarray(v)
1160
- # # return 1 - np.sum(np.minimum(u, v))
1161
-
1162
- # def pearson_chisq(self, u, v):
1163
- # """Calculate the Pearson chi-square divergence between two vectors.
1164
-
1165
- # Parameters
1166
- # ----------
1167
- # - u, v: Input vectors between which the divergence is to be calculated.
1168
-
1169
- # Returns
1170
- # -------
1171
- # - The Pearson chi-square divergence between the two vectors.
1172
-
1173
- # References
1174
- # ----------
1175
- # 1. Pearson K. (1900) On the Criterion that a given system of
1176
- # deviations from the probable in the case of correlated system
1177
- # of variables is such that it can be reasonable supposed to have
1178
- # arisen from random sampling, Phil. Mag. 50, 157-172.
1179
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1180
- # Measures between Probability Density Functions. International
1181
- # Journal of Mathematical Models and Methods in Applied Sciences.
1182
- # 1(4), 300-307.
1183
-
1184
- # Notes
1185
- # -----
1186
- # Pearson chi-square divergence is asymmetric.
1187
- # """
1188
- # u, v = np.asarray(u), np.asarray(v)
1189
- # with np.errstate(divide="ignore", invalid="ignore"):
1190
- # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1191
-
1192
- # def penroseshape(self, u, v):
1193
- # """Calculate the Penrose shape distance between two vectors.
1194
-
1195
- # Parameters
1196
- # ----------
1197
- # - u, v: Input vectors between which the distance is to be calculated.
1198
-
1199
- # Returns
1200
- # -------
1201
- # - The Penrose shape distance between the two vectors.
1202
-
1203
- # References
1204
- # ----------
1205
- # 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1206
- # Springer-Verlag Berlin Heidelberg. 1-590.
1207
- # """
1208
- # u, v = np.asarray(u), np.asarray(v)
1209
- # umu = np.mean(u)
1210
- # vmu = np.mean(v)
1211
- # return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1212
-
1213
- # def prob_chisq(self, u, v):
1214
- # """Calculate the Probabilistic chi-square distance between two vectors.
1215
-
1216
- # Parameters
1217
- # ----------
1218
- # - u, v: Input vectors between which the distance is to be calculated.
1219
-
1220
- # Returns
1221
- # -------
1222
- # - The Probabilistic chi-square distance between the two vectors.
1223
-
1224
- # Notes
1225
- # -----
1226
- # Added by SC.
1227
- # """
1228
- # u, v = np.asarray(u), np.asarray(v)
1229
- # uvsum = u + v
1230
- # with np.errstate(divide="ignore", invalid="ignore"):
1231
- # return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1232
-
1233
- # def ruzicka(self, u, v):
1234
- # """Calculate the Ruzicka distance between two vectors.
1235
-
1236
- # Parameters
1237
- # ----------
1238
- # - u, v: Input vectors between which the distance is to be calculated.
1239
-
1240
- # Returns
1241
- # -------
1242
- # - The Ruzicka distance between the two vectors.
1243
-
1244
- # Notes
1245
- # -----
1246
- # Added by SC.
1247
- # """
1248
- # u, v = np.asarray(u), np.asarray(v)
1249
- # den = np.sum(np.maximum(u, v))
1250
-
1251
- # return 1 - np.sum(np.minimum(u, v)) / den
1252
-
1253
- # def sorensen(self, u, v):
1254
- # """Calculate the Sorensen distance between two vectors.
1255
-
1256
- # Parameters
1257
- # ----------
1258
- # - u, v: Input vectors between which the distance is to be calculated.
1259
-
1260
- # Returns
1261
- # -------
1262
- # - The Sorensen distance between the two vectors.
1263
-
1264
- # Notes
1265
- # -----
1266
- # The Sorensen distance equals the Manhattan distance divided by the sum of
1267
- # the two vectors.
1268
-
1269
- # Added by SC.
1270
- # """
1271
- # u, v = np.asarray(u), np.asarray(v)
1272
- # return np.sum(np.abs(u - v)) / np.sum(u + v)
1273
-
1274
- # def squared_chisq(self, u, v):
1275
- # """Calculate the Squared chi-square distance between two vectors.
1276
-
1277
- # Parameters
1278
- # ----------
1279
- # - u, v: Input vectors between which the distance is to be calculated.
1280
-
1281
- # Returns
1282
- # -------
1283
- # - The Squared chi-square distance between the two vectors.
1284
-
1285
- # References
1286
- # ----------
1287
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1288
- # Measures between Probability Density Functions. International
1289
- # Journal of Mathematical Models and Methods in Applied Sciences.
1290
- # 1(4), 300-307.
1291
- # """
1292
- # u, v = np.asarray(u), np.asarray(v)
1293
- # uvsum = u + v
1294
- # with np.errstate(divide="ignore", invalid="ignore"):
1295
- # return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1296
-
1297
- # def squaredchord(self, u, v):
1298
- # """Calculate the Squared-chord distance between two vectors.
1299
-
1300
- # Parameters
1301
- # ----------
1302
- # - u, v: Input vectors between which the distance is to be calculated.
1303
-
1304
- # Returns
1305
- # -------
1306
- # - The Squared-chord distance between the two vectors.
1307
-
1308
- # References
1309
- # ----------
1310
- # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1311
- # distance metrics and analog assignments for pollen records.
1312
- # Quaternary Research 60:356–367.
1313
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1314
- # Measures between Probability Density Functions. International
1315
- # Journal of Mathematical Models and Methods in Applied Sciences.
1316
- # 1(4), 300-307.
1317
-
1318
- # Notes
1319
- # -----
1320
- # Equals to squared Matusita distance.
1321
- # """
1322
- # u, v = np.asarray(u), np.asarray(v)
1323
- # return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1324
-
1325
- # def squared_euclidean(self, u, v):
1326
- # """Calculate the Squared Euclidean distance between two vectors.
1327
-
1328
- # Parameters
1329
- # ----------
1330
- # - u, v: Input vectors between which the distance is to be calculated.
1331
-
1332
- # Returns
1333
- # -------
1334
- # - The Squared Euclidean distance between the two vectors.
1335
-
1336
- # References
1337
- # ----------
1338
- # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1339
- # distance metrics and analog assignments for pollen records.
1340
- # Quaternary Research 60:356–367.
1341
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1342
- # Measures between Probability Density Functions. International
1343
- # Journal of Mathematical Models and Methods in Applied Sciences.
1344
- # 1(4), 300-307.
1345
-
1346
- # Notes
1347
- # -----
1348
- # Equals to squared Euclidean distance.
1349
- # """
1350
- # u, v = np.asarray(u), np.asarray(v)
1351
- # return np.dot((u - v), (u - v))
1352
-
1353
- # def taneja(self, u, v):
1354
- # """Calculate the Taneja distance between two vectors.
1355
-
1356
- # Parameters
1357
- # ----------
1358
- # - u, v: Input vectors between which the distance is to be calculated.
1359
-
1360
- # Returns
1361
- # -------
1362
- # - The Taneja distance between the two vectors.
1363
-
1364
- # References
1365
- # ----------
1366
- # 1. Taneja IJ. (1995), New Developments in Generalized Information
1367
- # Measures, Chapter in: Advances in Imaging and Electron Physics,
1368
- # Ed. P.W. Hawkes, 91, 37-135.
1369
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1370
- # Measures between Probability Density Functions. International
1371
- # Journal of Mathematical Models and Methods in Applied Sciences.
1372
- # 1(4), 300-307.
1373
- # """
1374
- # u, v = np.asarray(u), np.asarray(v)
1375
- # u = np.where(u == 0, self.epsilon, u)
1376
- # v = np.where(v == 0, self.epsilon, v)
1377
- # uvsum = u + v
1378
- # return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1379
-
1380
- # def tanimoto(self, u, v):
1381
- # """Calculate the Tanimoto distance between two vectors.
1382
-
1383
- # Parameters
1384
- # ----------
1385
- # - u, v: Input vectors between which the distance is to be calculated.
1386
-
1387
- # Returns
1388
- # -------
1389
- # - The Tanimoto distance between the two vectors.
1390
-
1391
- # References
1392
- # ----------
1393
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1394
- # Measures between Probability Density Functions. International
1395
- # Journal of Mathematical Models and Methods in Applied Sciences.
1396
- # 1(4), 300-307.
1397
-
1398
- # Notes
1399
- # -----
1400
- # Equals Soergel distance.
1401
- # """
1402
- # u, v = np.asarray(u), np.asarray(v)
1403
- # # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1404
- # usum = np.sum(u)
1405
- # vsum = np.sum(v)
1406
- # minsum = np.sum(np.minimum(u, v))
1407
- # return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1408
-
1409
- # def topsoe(self, u, v):
1410
- # """Calculate the Topsøe distance between two vectors.
1411
-
1412
- # Parameters
1413
- # ----------
1414
- # - u, v: Input vectors between which the distance is to be calculated.
1415
-
1416
- # Returns
1417
- # -------
1418
- # - The Topsøe distance between the two vectors.
1419
-
1420
- # References
1421
- # ----------
1422
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1423
- # Measures between Probability Density Functions. International
1424
- # Journal of Mathematical Models and Methods in Applied Sciences.
1425
- # 1(4), 300-307.
1426
-
1427
- # Notes
1428
- # -----
1429
- # Equals two times Jensen-Shannon divergence.
1430
- # """
1431
- # u, v = np.asarray(u), np.asarray(v)
1432
- # u = np.where(u == 0, self.epsilon, u)
1433
- # v = np.where(v == 0, self.epsilon, v)
1434
- # dl = u * np.log(2 * u / (u + v))
1435
- # dr = v * np.log(2 * v / (u + v))
1436
- # return np.sum(dl + dr)
1437
-
1438
- # def vicis_symmetric_chisq(self, u, v):
1439
- # """Calculate the Vicis Symmetric chi-square distance.
1440
-
1441
- # Parameters
1442
- # ----------
1443
- # - u, v: Input vectors between which the distance is to be calculated.
1444
-
1445
- # Returns
1446
- # -------
1447
- # - The Vicis Symmetric chi-square distance between the two vectors.
1448
-
1449
- # References
1450
- # ----------
1451
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1452
- # Measures between Probability Density Functions. International
1453
- # Journal of Mathematical Models and Methods in Applied Sciences.
1454
- # 1(4), 300-307
1455
- # """
1456
- # u, v = np.asarray(u), np.asarray(v)
1457
- # with np.errstate(divide="ignore", invalid="ignore"):
1458
- # u_v = (u - v) ** 2
1459
- # uvmin = np.minimum(u, v) ** 2
1460
- # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1461
-
1462
- # def vicis_wave_hedges(self, u, v):
1463
- # """Calculate the Vicis-Wave Hedges distance between two vectors.
1464
-
1465
- # Parameters
1466
- # ----------
1467
- # - u, v: Input vectors between which the distance is to be calculated.
1468
-
1469
- # Returns
1470
- # -------
1471
- # - The Vicis-Wave Hedges distance between the two vectors.
1472
-
1473
- # References
1474
- # ----------
1475
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1476
- # Measures between Probability Density Functions. International
1477
- # Journal of Mathematical Models and Methods in Applied Sciences.
1478
- # 1(4), 300-307.
1479
- # """
1480
- # u, v = np.asarray(u), np.asarray(v)
1481
- # with np.errstate(divide="ignore", invalid="ignore"):
1482
- # u_v = abs(u - v)
1483
- # uvmin = np.minimum(u, v)
1484
- # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
584
+ def acc(self, u, v):
585
+ """Calculate the average of Cityblock and Chebyshev distance.
586
+
587
+ This function computes the ACC distance, also known as the
588
+ Average distance, between two vectors u and v. It is the average of the
589
+ Cityblock (or Manhattan) and Chebyshev distances.
590
+
591
+ Parameters
592
+ ----------
593
+ - u, v: Input vectors between which the distance is to be calculated.
594
+
595
+ Returns
596
+ -------
597
+ - The ACC distance between the two vectors.
598
+
599
+ References
600
+ ----------
601
+ 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
602
+ Geometry. Dover Publications.
603
+ 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
604
+ Measures between Probability Density Functions. International
605
+ Journal of Mathematical Models and Methods in Applied Sciences.
606
+ vol. 1(4), pp. 300-307.
607
+ """
608
+ return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
609
+
610
+ # def bhattacharyya(self, u, v):
611
+ # """
612
+ # Calculate the Bhattacharyya distance between two vectors.
613
+
614
+ # Returns a distance value between 0 and 1.
615
+
616
+ # Parameters
617
+ # ----------
618
+ # - u, v: Input vectors between which the distance is to be calculated.
619
+
620
+ # Returns
621
+ # -------
622
+ # - The Bhattacharyya distance between the two vectors.
623
+
624
+ # References
625
+ # ----------
626
+ # 1. Bhattacharyya A (1947) On a measure of divergence between two
627
+ # statistical populations defined by probability distributions,
628
+ # Bull. Calcutta Math. Soc., 35, 99–109.
629
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
630
+ # Measures between Probability Density Functions. International
631
+ # Journal of Mathematical Models and Methods in Applied Sciences.
632
+ # 1(4), 300-307.
633
+ # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
634
+ # """
635
+ # u, v = np.asarray(u), np.asarray(v)
636
+ # return -np.log(np.sum(np.sqrt(u * v)))
637
+
638
+ def chebyshev_min(self, u, v):
639
+ """Calculate the minimum value distance between two vectors.
640
+
641
+ This measure represents a custom approach by Zielezinski to distance
642
+ measurement, focusing on the minimum absolute difference.
643
+
644
+ Parameters
645
+ ----------
646
+ - u, v: Input vectors between which the distance is to be calculated.
647
+
648
+ Returns
649
+ -------
650
+ - The minimum value distance between the two vectors.
651
+ """
652
+ u, v = np.asarray(u), np.asarray(v)
653
+ return np.amin(np.abs(u - v))
654
+
655
+ def czekanowski(self, u, v):
656
+ """Calculate the Czekanowski distance between two vectors.
657
+
658
+ Parameters
659
+ ----------
660
+ - u, v: Input vectors between which the distance is to be calculated.
661
+
662
+ Returns
663
+ -------
664
+ - The Czekanowski distance between the two vectors.
665
+
666
+ References
667
+ ----------
668
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
669
+ Measures between Probability Density Functions. International
670
+ Journal of Mathematical Models and Methods in Applied Sciences.
671
+ 1(4), 300-307.
672
+ """
673
+ u, v = np.asarray(u), np.asarray(v)
674
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
675
+
676
+ def dice(self, u, v):
677
+ """Calculate the Dice dissimilarity between two vectors.
678
+
679
+ Synonyms:
680
+ Sorensen distance
681
+
682
+ Parameters
683
+ ----------
684
+ - u, v: Input vectors between which the distance is to be calculated.
685
+
686
+ Returns
687
+ -------
688
+ - The Dice dissimilarity between the two vectors.
689
+
690
+ References
691
+ ----------
692
+ 1. Dice LR (1945) Measures of the amount of ecologic association
693
+ between species. Ecology. 26, 297-302.
694
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
695
+ Measures between Probability Density Functions. International
696
+ Journal of Mathematical Models and Methods in Applied Sciences.
697
+ 1(4), 300-307.
698
+ """
699
+ u, v = np.asarray(u), np.asarray(v)
700
+ u_v = u - v
701
+ return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
702
+
703
+ def divergence(self, u, v):
704
+ """Calculate the divergence between two vectors.
705
+
706
+ Divergence equals squared Clark distance multiplied by 2.
707
+
708
+ Parameters
709
+ ----------
710
+ - u, v: Input vectors between which the distance is to be calculated.
711
+
712
+ Returns
713
+ -------
714
+ - The divergence between the two vectors.
715
+
716
+ References
717
+ ----------
718
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
719
+ Measures between Probability Density Functions. International
720
+ Journal of Mathematical Models and Methods in Applied Sciences.
721
+ 1(4), 300-307.
722
+ """
723
+ u, v = np.asarray(u), np.asarray(v)
724
+ with np.errstate(invalid="ignore"):
725
+ return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
726
+
727
+ def google(self, u, v):
728
+ """Calculate the Normalized Google Distance (NGD) between two vectors.
729
+
730
+ NGD is a measure of similarity derived from the number of hits returned by the
731
+ Google search engine for a given set of keywords.
732
+
733
+ Parameters
734
+ ----------
735
+ - u, v: Input vectors between which the distance is to be calculated.
736
+
737
+ Returns
738
+ -------
739
+ - The Normalized Google Distance between the two vectors.
740
+
741
+ Notes
742
+ -----
743
+ When used for comparing two probability density functions (pdfs),
744
+ Google distance equals half of Cityblock distance.
745
+
746
+ References
747
+ ----------
748
+ 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
749
+ doi:10.1109/ITSIM.2008.4631601.
750
+ """
751
+ u, v = np.asarray(u), np.asarray(v)
752
+ x = float(np.sum(u))
753
+ y = float(np.sum(v))
754
+ summin = float(np.sum(np.minimum(u, v)))
755
+ return (max([x, y]) - summin) / ((x + y) - min([x, y]))
756
+
757
+ def gower(self, u, v):
758
+ """Calculate the Gower distance between two vectors.
759
+
760
+ The Gower distance equals the Cityblock distance divided by the vector length.
761
+
762
+ Parameters
763
+ ----------
764
+ - u, v: Input vectors between which the distance is to be calculated.
765
+
766
+ Returns
767
+ -------
768
+ - The Gower distance between the two vectors.
769
+
770
+ References
771
+ ----------
772
+ 1. Gower JC. (1971) General Coefficient of Similarity
773
+ and Some of Its Properties, Biometrics 27, 857-874.
774
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
775
+ Measures between Probability Density Functions. International
776
+ Journal of Mathematical Models and Methods in Applied Sciences.
777
+ 1(4), 300-307.
778
+ """
779
+ u, v = np.asarray(u), np.asarray(v)
780
+ return np.sum(np.abs(u - v)) / u.size
781
+
782
+ def jeffreys(self, u, v):
783
+ """Calculate the Jeffreys divergence between two vectors.
784
+
785
+ The Jeffreys divergence is a symmetric version of the Kullback-Leibler
786
+ divergence.
787
+
788
+ Parameters
789
+ ----------
790
+ - u, v: Input vectors between which the divergence is to be calculated.
791
+
792
+ Returns
793
+ -------
794
+ - The Jeffreys divergence between the two vectors.
795
+
796
+ References
797
+ ----------
798
+ 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
799
+ in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
800
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
801
+ Measures between Probability Density Functions. International
802
+ Journal of Mathematical Models and Methods in Applied Sciences.
803
+ 1(4), 300-307.
804
+ """
805
+ u, v = np.asarray(u), np.asarray(v)
806
+ # Add epsilon to zeros in vectors to avoid division
807
+ # by 0 and/or log of 0. Alternatively, zeros in the
808
+ # vectors could be ignored or masked (see below).
809
+ # u = ma.masked_where(u == 0, u)
810
+ # v = ma.masked_where(v == 0, u)
811
+ u = np.where(u == 0, self.epsilon, u)
812
+ v = np.where(v == 0, self.epsilon, v)
813
+ return np.sum((u - v) * np.log(u / v))
814
+
815
+ def jensenshannon_divergence(self, u, v):
816
+ """Calculate the Jensen-Shannon divergence between two vectors.
817
+
818
+ The Jensen-Shannon divergence is a symmetric and finite measure of similarity
819
+ between two probability distributions.
820
+
821
+ Parameters
822
+ ----------
823
+ - u, v: Input vectors between which the divergence is to be calculated.
824
+
825
+ Returns
826
+ -------
827
+ - The Jensen-Shannon divergence between the two vectors.
828
+
829
+ References
830
+ ----------
831
+ 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
832
+ IEEE Transactions on Information Theory, 37(1):145–151.
833
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
834
+ Measures between Probability Density Functions. International
835
+ Journal of Mathematical Models and Methods in Applied Sciences.
836
+ 1(4), 300-307.
837
+ Comments:
838
+ Equals Jensen difference in Sung-Hyuk (2007):
839
+ u = np.where(u==0, self.epsilon, u)
840
+ v = np.where(v==0, self.epsilon, v)
841
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
842
+ el2 = (u + v)/2
843
+ el3 = np.log(el2)
844
+ return np.sum(el1 - el2 * el3)
845
+ """
846
+ u, v = np.asarray(u), np.asarray(v)
847
+ u = np.where(u == 0, self.epsilon, u)
848
+ v = np.where(v == 0, self.epsilon, v)
849
+ dl = u * np.log(2 * u / (u + v))
850
+ dr = v * np.log(2 * v / (u + v))
851
+ return (np.sum(dl) + np.sum(dr)) / 2
852
+
853
+ def jensen_difference(self, u, v):
854
+ """Calculate the Jensen difference between two vectors.
855
+
856
+ The Jensen difference is considered similar to the Jensen-Shannon divergence.
857
+
858
+ Parameters
859
+ ----------
860
+ - u, v: Input vectors between which the distance is to be calculated.
861
+
862
+ Returns
863
+ -------
864
+ - The Jensen difference between the two vectors.
865
+
866
+ Notes
867
+ -----
868
+ 1. Equals half of Topsøe distance
869
+ 2. Equals squared jensenshannon_distance.
870
+
871
+
872
+ References
873
+ ----------
874
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
875
+ Measures between Probability Density Functions. International
876
+ Journal of Mathematical Models and Methods in Applied Sciences.
877
+ 1(4), 300-307.
878
+ """
879
+ u, v = np.asarray(u), np.asarray(v)
880
+ u = np.where(u == 0, self.epsilon, u)
881
+ v = np.where(v == 0, self.epsilon, v)
882
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
883
+ el2 = (u + v) / 2
884
+ return np.sum(el1 - el2 * np.log(el2))
885
+
886
+ def kumarjohnson(self, u, v):
887
+ """Calculate the Kumar-Johnson distance between two vectors.
888
+
889
+ Parameters
890
+ ----------
891
+ - u, v: Input vectors between which the distance is to be calculated.
892
+
893
+ Returns
894
+ -------
895
+ - The Kumar-Johnson distance between the two vectors.
896
+
897
+ References
898
+ ----------
899
+ 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
900
+ and information inequalities, Journal of Inequalities in pure
901
+ and applied Mathematics. 6(3).
902
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
903
+ Measures between Probability Density Functions. International
904
+ Journal of Mathematical Models and Methods in Applied Sciences.
905
+ 1(4):300-307.
906
+ """
907
+ u, v = np.asarray(u), np.asarray(v)
908
+ uvmult = u * v
909
+ with np.errstate(divide="ignore", invalid="ignore"):
910
+ numer = np.power(u**2 - v**2, 2)
911
+ denom = 2 * np.power(uvmult, 3 / 2)
912
+ return np.sum(np.where(uvmult != 0, numer / denom, 0))
913
+
914
+ def matusita(self, u, v):
915
+ """Calculate the Matusita distance between two vectors.
916
+
917
+ Parameters
918
+ ----------
919
+ - u, v: Input vectors between which the distance is to be calculated.
920
+
921
+ Returns
922
+ -------
923
+ - The Matusita distance between the two vectors.
924
+
925
+ References
926
+ ----------
927
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
928
+ Measures between Probability Density Functions. International
929
+ Journal of Mathematical Models and Methods in Applied Sciences.
930
+ 1(4):300-307.
931
+
932
+ Notes
933
+ -----
934
+ Equals square root of Squared-chord distance.
935
+ """
936
+ u, v = np.asarray(u), np.asarray(v)
937
+ return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
938
+
939
+ def minkowski(self, u, v, p=2):
940
+ """Calculate the Minkowski distance between two vectors.
941
+
942
+ Parameters
943
+ ----------
944
+ - u, v: Input vectors between which the distance is to be calculated.
945
+ - p: The order of the norm of the difference.
946
+
947
+ Returns
948
+ -------
949
+ - The Minkowski distance between the two vectors.
950
+
951
+ Notes
952
+ -----
953
+ When p goes to infinite, the Chebyshev distance is derived.
954
+
955
+ References
956
+ ----------
957
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
958
+ Measures between Probability Density Functions. International
959
+ Journal of Mathematical Models and Methods in Applied Sciences.
960
+ 1(4):300-307.
961
+ """
962
+ u, v = np.asarray(u), np.asarray(v)
963
+ return np.linalg.norm(u - v, ord=p)
964
+
965
+ def penroseshape(self, u, v):
966
+ """Calculate the Penrose shape distance between two vectors.
967
+
968
+ Parameters
969
+ ----------
970
+ - u, v: Input vectors between which the distance is to be calculated.
971
+
972
+ Returns
973
+ -------
974
+ - The Penrose shape distance between the two vectors.
975
+
976
+ References
977
+ ----------
978
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
979
+ Springer-Verlag Berlin Heidelberg. 1-590.
980
+ """
981
+ u, v = np.asarray(u), np.asarray(v)
982
+ umu = np.mean(u)
983
+ vmu = np.mean(v)
984
+ return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
985
+
986
+ def prob_chisq(self, u, v):
987
+ """Calculate the Probabilistic chi-square distance between two vectors.
988
+
989
+ Parameters
990
+ ----------
991
+ - u, v: Input vectors between which the distance is to be calculated.
992
+
993
+ Returns
994
+ -------
995
+ - The Probabilistic chi-square distance between the two vectors.
996
+
997
+ Notes
998
+ -----
999
+ Added by SC.
1000
+ """
1001
+ u, v = np.asarray(u), np.asarray(v)
1002
+ uvsum = u + v
1003
+ with np.errstate(divide="ignore", invalid="ignore"):
1004
+ return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1005
+
1006
+ def ruzicka(self, u, v):
1007
+ """Calculate the Ruzicka distance between two vectors.
1008
+
1009
+ Parameters
1010
+ ----------
1011
+ - u, v: Input vectors between which the distance is to be calculated.
1012
+
1013
+ Returns
1014
+ -------
1015
+ - The Ruzicka distance between the two vectors.
1016
+
1017
+ Notes
1018
+ -----
1019
+ Added by SC.
1020
+ """
1021
+ u, v = np.asarray(u), np.asarray(v)
1022
+ den = np.sum(np.maximum(u, v))
1023
+
1024
+ return 1 - np.sum(np.minimum(u, v)) / den
1025
+
1026
+ def sorensen(self, u, v):
1027
+ """Calculate the Sorensen distance between two vectors.
1028
+
1029
+ Parameters
1030
+ ----------
1031
+ - u, v: Input vectors between which the distance is to be calculated.
1032
+
1033
+ Returns
1034
+ -------
1035
+ - The Sorensen distance between the two vectors.
1036
+
1037
+ Notes
1038
+ -----
1039
+ The Sorensen distance equals the Manhattan distance divided by the sum of
1040
+ the two vectors.
1041
+
1042
+ Added by SC.
1043
+ """
1044
+ u, v = np.asarray(u), np.asarray(v)
1045
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
1046
+
1047
+ def squared_chisq(self, u, v):
1048
+ """Calculate the Squared chi-square distance between two vectors.
1049
+
1050
+ Parameters
1051
+ ----------
1052
+ - u, v: Input vectors between which the distance is to be calculated.
1053
+
1054
+ Returns
1055
+ -------
1056
+ - The Squared chi-square distance between the two vectors.
1057
+
1058
+ References
1059
+ ----------
1060
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1061
+ Measures between Probability Density Functions. International
1062
+ Journal of Mathematical Models and Methods in Applied Sciences.
1063
+ 1(4), 300-307.
1064
+ """
1065
+ u, v = np.asarray(u), np.asarray(v)
1066
+ uvsum = u + v
1067
+ with np.errstate(divide="ignore", invalid="ignore"):
1068
+ return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1069
+
1070
+ def squaredchord(self, u, v):
1071
+ """Calculate the Squared-chord distance between two vectors.
1072
+
1073
+ Parameters
1074
+ ----------
1075
+ - u, v: Input vectors between which the distance is to be calculated.
1076
+
1077
+ Returns
1078
+ -------
1079
+ - The Squared-chord distance between the two vectors.
1080
+
1081
+ References
1082
+ ----------
1083
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1084
+ distance metrics and analog assignments for pollen records.
1085
+ Quaternary Research 60:356–367.
1086
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1087
+ Measures between Probability Density Functions. International
1088
+ Journal of Mathematical Models and Methods in Applied Sciences.
1089
+ 1(4), 300-307.
1090
+
1091
+ Notes
1092
+ -----
1093
+ Equals to squared Matusita distance.
1094
+ """
1095
+ u, v = np.asarray(u), np.asarray(v)
1096
+ return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1097
+
1098
+ def squared_euclidean(self, u, v):
1099
+ """Calculate the Squared Euclidean distance between two vectors.
1100
+
1101
+ Parameters
1102
+ ----------
1103
+ - u, v: Input vectors between which the distance is to be calculated.
1104
+
1105
+ Returns
1106
+ -------
1107
+ - The Squared Euclidean distance between the two vectors.
1108
+
1109
+ References
1110
+ ----------
1111
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1112
+ distance metrics and analog assignments for pollen records.
1113
+ Quaternary Research 60:356–367.
1114
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1115
+ Measures between Probability Density Functions. International
1116
+ Journal of Mathematical Models and Methods in Applied Sciences.
1117
+ 1(4), 300-307.
1118
+
1119
+ Notes
1120
+ -----
1121
+ Equals to squared Euclidean distance.
1122
+ """
1123
+ u, v = np.asarray(u), np.asarray(v)
1124
+ return np.dot((u - v), (u - v))
1125
+
1126
+ def taneja(self, u, v):
1127
+ """Calculate the Taneja distance between two vectors.
1128
+
1129
+ Parameters
1130
+ ----------
1131
+ - u, v: Input vectors between which the distance is to be calculated.
1132
+
1133
+ Returns
1134
+ -------
1135
+ - The Taneja distance between the two vectors.
1136
+
1137
+ References
1138
+ ----------
1139
+ 1. Taneja IJ. (1995), New Developments in Generalized Information
1140
+ Measures, Chapter in: Advances in Imaging and Electron Physics,
1141
+ Ed. P.W. Hawkes, 91, 37-135.
1142
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1143
+ Measures between Probability Density Functions. International
1144
+ Journal of Mathematical Models and Methods in Applied Sciences.
1145
+ 1(4), 300-307.
1146
+ """
1147
+ u, v = np.asarray(u), np.asarray(v)
1148
+ u = np.where(u == 0, self.epsilon, u)
1149
+ v = np.where(v == 0, self.epsilon, v)
1150
+ uvsum = u + v
1151
+ return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1152
+
1153
+ def tanimoto(self, u, v):
1154
+ """Calculate the Tanimoto distance between two vectors.
1155
+
1156
+ Parameters
1157
+ ----------
1158
+ - u, v: Input vectors between which the distance is to be calculated.
1159
+
1160
+ Returns
1161
+ -------
1162
+ - The Tanimoto distance between the two vectors.
1163
+
1164
+ References
1165
+ ----------
1166
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1167
+ Measures between Probability Density Functions. International
1168
+ Journal of Mathematical Models and Methods in Applied Sciences.
1169
+ 1(4), 300-307.
1170
+
1171
+ Notes
1172
+ -----
1173
+ Equals Soergel distance.
1174
+ """
1175
+ u, v = np.asarray(u), np.asarray(v)
1176
+ # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1177
+ usum = np.sum(u)
1178
+ vsum = np.sum(v)
1179
+ minsum = np.sum(np.minimum(u, v))
1180
+ return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1181
+
1182
+ def topsoe(self, u, v):
1183
+ """Calculate the Topsøe distance between two vectors.
1184
+
1185
+ Parameters
1186
+ ----------
1187
+ - u, v: Input vectors between which the distance is to be calculated.
1188
+
1189
+ Returns
1190
+ -------
1191
+ - The Topsøe distance between the two vectors.
1192
+
1193
+ References
1194
+ ----------
1195
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1196
+ Measures between Probability Density Functions. International
1197
+ Journal of Mathematical Models and Methods in Applied Sciences.
1198
+ 1(4), 300-307.
1199
+
1200
+ Notes
1201
+ -----
1202
+ Equals two times Jensen-Shannon divergence.
1203
+ """
1204
+ u, v = np.asarray(u), np.asarray(v)
1205
+ u = np.where(u == 0, self.epsilon, u)
1206
+ v = np.where(v == 0, self.epsilon, v)
1207
+ dl = u * np.log(2 * u / (u + v))
1208
+ dr = v * np.log(2 * v / (u + v))
1209
+ return np.sum(dl + dr)
1210
+
1211
+ def vicis_symmetric_chisq(self, u, v):
1212
+ """Calculate the Vicis Symmetric chi-square distance.
1213
+
1214
+ Parameters
1215
+ ----------
1216
+ - u, v: Input vectors between which the distance is to be calculated.
1217
+
1218
+ Returns
1219
+ -------
1220
+ - The Vicis Symmetric chi-square distance between the two vectors.
1221
+
1222
+ References
1223
+ ----------
1224
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1225
+ Measures between Probability Density Functions. International
1226
+ Journal of Mathematical Models and Methods in Applied Sciences.
1227
+ 1(4), 300-307
1228
+ """
1229
+ u, v = np.asarray(u), np.asarray(v)
1230
+ with np.errstate(divide="ignore", invalid="ignore"):
1231
+ u_v = (u - v) ** 2
1232
+ uvmin = np.minimum(u, v) ** 2
1233
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1234
+
1235
+ def vicis_wave_hedges(self, u, v):
1236
+ """Calculate the Vicis-Wave Hedges distance between two vectors.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ - u, v: Input vectors between which the distance is to be calculated.
1241
+
1242
+ Returns
1243
+ -------
1244
+ - The Vicis-Wave Hedges distance between the two vectors.
1245
+
1246
+ References
1247
+ ----------
1248
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1249
+ Measures between Probability Density Functions. International
1250
+ Journal of Mathematical Models and Methods in Applied Sciences.
1251
+ 1(4), 300-307.
1252
+ """
1253
+ u, v = np.asarray(u), np.asarray(v)
1254
+ with np.errstate(divide="ignore", invalid="ignore"):
1255
+ u_v = abs(u - v)
1256
+ uvmin = np.minimum(u, v)
1257
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1258
+
1259
+ # def fidelity(self, u, v):
1260
+ # """
1261
+ # Calculate the fidelity distance between two vectors.
1262
+
1263
+ # The fidelity distance measures the similarity between two probability
1264
+ # distributions.
1265
+
1266
+ # Parameters
1267
+ # ----------
1268
+ # - u, v: Input vectors between which the distance is to be calculated.
1269
+
1270
+ # Returns
1271
+ # -------
1272
+ # - The fidelity distance between the two vectors.
1273
+
1274
+ # Notes
1275
+ # -----
1276
+ # Added by SC.
1277
+ # """
1278
+ # u, v = np.asarray(u), np.asarray(v)
1279
+ # return 1 - (np.sum(np.sqrt(u * v)))
1280
+
1281
+ # # NEEDS CHECKING
1282
+ # # def harmonicmean(self, u, v):
1283
+ # # """
1284
+ # # Harmonic mean distance.
1285
+ # # Notes:
1286
+ # # Added by SC.
1287
+ # # """
1288
+ # # u, v = np.asarray(u), np.asarray(v)
1289
+ # # return 1 - 2.0 * np.sum(u * v / (u + v))
1290
+
1291
+ # # def inner(self, u, v):
1292
+ # # """
1293
+ # # Calculate the inner product distance between two vectors.
1294
+
1295
+ # # The inner product distance is a measure of
1296
+ # # similarity between two vectors,
1297
+ # # based on their inner product.
1298
+
1299
+ # # Parameters
1300
+ # # ----------
1301
+ # # - u, v: Input vectors between which the distance is to be calculated.
1302
+
1303
+ # # Returns
1304
+ # # -------
1305
+ # # - The inner product distance between the two vectors.
1306
+
1307
+ # # Notes
1308
+ # # -----
1309
+ # # Added by SC.
1310
+ # # """
1311
+ # # u, v = np.asarray(u), np.asarray(v)
1312
+ # # return 1 - np.dot(u, v)
1313
+
1314
+ # def k_divergence(self, u, v):
1315
+ # """Calculate the K divergence between two vectors.
1316
+
1317
+ # Parameters
1318
+ # ----------
1319
+ # - u, v: Input vectors between which the divergence is to be calculated.
1320
+
1321
+ # Returns
1322
+ # -------
1323
+ # - The K divergence between the two vectors.
1324
+
1325
+ # References
1326
+ # ----------
1327
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1328
+ # Measures between Probability Density Functions. International
1329
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1330
+ # 1(4), 300-307.
1331
+ # """
1332
+ # u, v = np.asarray(u), np.asarray(v)
1333
+ # u = np.where(u == 0, self.epsilon, u)
1334
+ # v = np.where(v == 0, self.epsilon, v)
1335
+ # return np.sum(u * np.log(2 * u / (u + v)))
1336
+
1337
+ # def kl_divergence(self, u, v):
1338
+ # """Calculate the Kullback-Leibler divergence between two vectors.
1339
+
1340
+ # The Kullback-Leibler divergence measures the difference between two
1341
+ # probability distributions.
1342
+
1343
+ # Parameters
1344
+ # ----------
1345
+ # - u, v: Input vectors between which the divergence is to be calculated.
1346
+
1347
+ # Returns
1348
+ # -------
1349
+ # - The Kullback-Leibler divergence between the two vectors.
1350
+
1351
+ # References
1352
+ # ----------
1353
+ # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1354
+ # Ann. Math. Statist. 22:79–86
1355
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1356
+ # Measures between Probability Density Functions. International
1357
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1358
+ # 1(4):300-307.
1359
+ # """
1360
+ # u, v = np.asarray(u), np.asarray(v)
1361
+ # u = np.where(u == 0, self.epsilon, u)
1362
+ # v = np.where(v == 0, self.epsilon, v)
1363
+ # return np.sum(u * np.log(u / v))
1364
+
1365
+ # def max_symmetric_chisq(self, u, v):
1366
+ # """Calculate the maximum symmetric chi-square distance.
1367
+
1368
+ # Parameters
1369
+ # ----------
1370
+ # - u, v: Input vectors between which the distance is to be calculated.
1371
+
1372
+ # Returns
1373
+ # -------
1374
+ # - The maximum symmetric chi-square distance between the two vectors.
1375
+
1376
+ # References
1377
+ # ----------
1378
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1379
+ # Measures between Probability Density Functions. International
1380
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1381
+ # 1(4):300-307.
1382
+ # """
1383
+ # u, v = np.asarray(u), np.asarray(v)
1384
+ # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1385
+
1386
+ # def min_symmetric_chisq(self, u, v):
1387
+ # """Calculate the minimum symmetric chi-square distance.
1388
+
1389
+ # Parameters
1390
+ # ----------
1391
+ # - u, v: Input vectors between which the distance is to be calculated.
1392
+
1393
+ # Returns
1394
+ # -------
1395
+ # - The minimum symmetric chi-square distance between the two vectors.
1396
+
1397
+ # Notes
1398
+ # -----
1399
+ # Added by SC.
1400
+ # """
1401
+ # u, v = np.asarray(u), np.asarray(v)
1402
+ # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1403
+
1404
+ # def neyman_chisq(self, u, v):
1405
+ # """Calculate the Neyman chi-square distance between two vectors.
1406
+
1407
+ # Parameters
1408
+ # ----------
1409
+ # - u, v: Input vectors between which the distance is to be calculated.
1410
+
1411
+ # Returns
1412
+ # -------
1413
+ # - The Neyman chi-square distance between the two vectors.
1414
+
1415
+ # References
1416
+ # ----------
1417
+ # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1418
+ # In Proceedings of the First Berkley Symposium on Mathematical
1419
+ # Statistics and Probability.
1420
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1421
+ # Measures between Probability Density Functions. International
1422
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1423
+ # 1(4), 300-307.
1424
+ # """
1425
+ # u, v = np.asarray(u), np.asarray(v)
1426
+ # with np.errstate(divide="ignore", invalid="ignore"):
1427
+ # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1428
+
1429
+ # def pearson_chisq(self, u, v):
1430
+ # """Calculate the Pearson chi-square divergence between two vectors.
1431
+
1432
+ # Parameters
1433
+ # ----------
1434
+ # - u, v: Input vectors between which the divergence is to be calculated.
1435
+
1436
+ # Returns
1437
+ # -------
1438
+ # - The Pearson chi-square divergence between the two vectors.
1439
+
1440
+ # References
1441
+ # ----------
1442
+ # 1. Pearson K. (1900) On the Criterion that a given system of
1443
+ # deviations from the probable in the case of correlated system
1444
+ # of variables is such that it can be reasonable supposed to have
1445
+ # arisen from random sampling, Phil. Mag. 50, 157-172.
1446
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1447
+ # Measures between Probability Density Functions. International
1448
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1449
+ # 1(4), 300-307.
1450
+
1451
+ # Notes
1452
+ # -----
1453
+ # Pearson chi-square divergence is asymmetric.
1454
+ # """
1455
+ # u, v = np.asarray(u), np.asarray(v)
1456
+ # with np.errstate(divide="ignore", invalid="ignore"):
1457
+ # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1458
+
1459
+ # def nonintersection(self, u, v):
1460
+ # """
1461
+ # Calculate the Nonintersection distance between two vectors.
1462
+
1463
+ # Parameters
1464
+ # ----------
1465
+ # - u, v: Input vectors between which the distance is to be calculated.
1466
+
1467
+ # Returns
1468
+ # -------
1469
+ # - The Nonintersection distance between the two vectors.
1470
+
1471
+ # References
1472
+ # ----------
1473
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1474
+ # Measures between Probability Density Functions. International
1475
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1476
+ # 1(4), 300-307.
1477
+
1478
+ # Notes
1479
+ # -----
1480
+ # When used for comparing two probability density functions (pdfs),
1481
+ # Nonintersection distance equals half of Cityblock distance.
1482
+ # """
1483
+ # u, v = np.asarray(u), np.asarray(v)
1484
+ # return 1 - np.sum(np.minimum(u, v))