distclassipy 0.1.6a0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
distclassipy/distances.py CHANGED
@@ -48,6 +48,52 @@ import numpy as np
48
48
 
49
49
  import scipy
50
50
 
51
+ _ALL_METRICS = [
52
+ "euclidean",
53
+ "braycurtis",
54
+ "canberra",
55
+ "cityblock",
56
+ "chebyshev",
57
+ "clark",
58
+ "correlation",
59
+ "cosine",
60
+ "hellinger",
61
+ "jaccard",
62
+ "lorentzian",
63
+ "marylandbridge",
64
+ "meehl",
65
+ "motyka",
66
+ "soergel",
67
+ "wave_hedges",
68
+ "kulczynski",
69
+ "add_chisq",
70
+ "acc",
71
+ "chebyshev_min",
72
+ "czekanowski",
73
+ "dice",
74
+ "divergence",
75
+ "google",
76
+ "gower",
77
+ "jeffreys",
78
+ "jensenshannon_divergence",
79
+ "jensen_difference",
80
+ "kumarjohnson",
81
+ "matusita",
82
+ "minkowski",
83
+ "penroseshape",
84
+ "prob_chisq",
85
+ "ruzicka",
86
+ "sorensen",
87
+ "squared_chisq",
88
+ "squaredchord",
89
+ "squared_euclidean",
90
+ "taneja",
91
+ "tanimoto",
92
+ "topsoe",
93
+ "vicis_symmetric_chisq",
94
+ "vicis_wave_hedges",
95
+ ]
96
+
51
97
 
52
98
  class Distance:
53
99
  """A class to calculate various distance metrics between vectors.
@@ -352,7 +398,11 @@ class Distance:
352
398
  1(4), 300-307.
353
399
  """
354
400
  u, v = np.asarray(u), np.asarray(v)
355
- return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
401
+ # Clip negative values to zero for valid sqrt
402
+ with np.errstate(divide="ignore", invalid="ignore"):
403
+ u = np.clip(u, a_min=0, a_max=None)
404
+ v = np.clip(v, a_min=0, a_max=None)
405
+ return np.sqrt(2 * np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
356
406
 
357
407
  def jaccard(self, u, v):
358
408
  """Calculate the Jaccard distance between two vectors.
@@ -402,7 +452,8 @@ class Distance:
402
452
  eschew the log of zero.
403
453
  """
404
454
  u, v = np.asarray(u), np.asarray(v)
405
- return np.sum(np.log(np.abs(u - v) + 1))
455
+ with np.errstate(divide="ignore", invalid="ignore"):
456
+ return np.sum(np.log(np.abs(u - v) + 1))
406
457
 
407
458
  def marylandbridge(self, u, v):
408
459
  """Calculate the Maryland Bridge distance between two vectors.
@@ -578,907 +629,932 @@ class Distance:
578
629
  with np.errstate(divide="ignore", invalid="ignore"):
579
630
  return np.sum(np.where(uvmult != 0, ((u - v) ** 2 * (u + v)) / uvmult, 0))
580
631
 
632
+ # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
633
+ # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
634
+
635
+ def acc(self, u, v):
636
+ """Calculate the average of Cityblock and Chebyshev distance.
637
+
638
+ This function computes the ACC distance, also known as the
639
+ Average distance, between two vectors u and v. It is the average of the
640
+ Cityblock (or Manhattan) and Chebyshev distances.
641
+
642
+ Parameters
643
+ ----------
644
+ - u, v: Input vectors between which the distance is to be calculated.
645
+
646
+ Returns
647
+ -------
648
+ - The ACC distance between the two vectors.
649
+
650
+ References
651
+ ----------
652
+ 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
653
+ Geometry. Dover Publications.
654
+ 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
655
+ Measures between Probability Density Functions. International
656
+ Journal of Mathematical Models and Methods in Applied Sciences.
657
+ vol. 1(4), pp. 300-307.
658
+ """
659
+ return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
660
+
661
+ # def bhattacharyya(self, u, v):
662
+ # """
663
+ # Calculate the Bhattacharyya distance between two vectors.
664
+
665
+ # Returns a distance value between 0 and 1.
666
+
667
+ # Parameters
668
+ # ----------
669
+ # - u, v: Input vectors between which the distance is to be calculated.
670
+
671
+ # Returns
672
+ # -------
673
+ # - The Bhattacharyya distance between the two vectors.
674
+
675
+ # References
676
+ # ----------
677
+ # 1. Bhattacharyya A (1947) On a measure of divergence between two
678
+ # statistical populations defined by probability distributions,
679
+ # Bull. Calcutta Math. Soc., 35, 99–109.
680
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
681
+ # Measures between Probability Density Functions. International
682
+ # Journal of Mathematical Models and Methods in Applied Sciences.
683
+ # 1(4), 300-307.
684
+ # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
685
+ # """
686
+ # u, v = np.asarray(u), np.asarray(v)
687
+ # with np.errstate(divide="ignore", invalid="ignore"):
688
+ # return -np.log(np.sum(np.sqrt(u * v)))
689
+
690
+ def chebyshev_min(self, u, v):
691
+ """Calculate the minimum value distance between two vectors.
692
+
693
+ This measure represents a custom approach by Zielezinski to distance
694
+ measurement, focusing on the minimum absolute difference.
695
+
696
+ Parameters
697
+ ----------
698
+ - u, v: Input vectors between which the distance is to be calculated.
699
+
700
+ Returns
701
+ -------
702
+ - The minimum value distance between the two vectors.
703
+ """
704
+ u, v = np.asarray(u), np.asarray(v)
705
+ return np.amin(np.abs(u - v))
706
+
707
+ def czekanowski(self, u, v):
708
+ """Calculate the Czekanowski distance between two vectors.
709
+
710
+ Parameters
711
+ ----------
712
+ - u, v: Input vectors between which the distance is to be calculated.
713
+
714
+ Returns
715
+ -------
716
+ - The Czekanowski distance between the two vectors.
717
+
718
+ References
719
+ ----------
720
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
721
+ Measures between Probability Density Functions. International
722
+ Journal of Mathematical Models and Methods in Applied Sciences.
723
+ 1(4), 300-307.
724
+ """
725
+ u, v = np.asarray(u), np.asarray(v)
726
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
727
+
728
+ def dice(self, u, v):
729
+ """Calculate the Dice dissimilarity between two vectors.
730
+
731
+ Synonyms:
732
+ Sorensen distance
733
+
734
+ Parameters
735
+ ----------
736
+ - u, v: Input vectors between which the distance is to be calculated.
737
+
738
+ Returns
739
+ -------
740
+ - The Dice dissimilarity between the two vectors.
741
+
742
+ References
743
+ ----------
744
+ 1. Dice LR (1945) Measures of the amount of ecologic association
745
+ between species. Ecology. 26, 297-302.
746
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
747
+ Measures between Probability Density Functions. International
748
+ Journal of Mathematical Models and Methods in Applied Sciences.
749
+ 1(4), 300-307.
750
+ """
751
+ u, v = np.asarray(u), np.asarray(v)
752
+ u_v = u - v
753
+ return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
754
+
755
+ def divergence(self, u, v):
756
+ """Calculate the divergence between two vectors.
581
757
 
582
- # NOTE: THE FOLLOWING DISTANCES HAVE NOT BEEN TESTED EXTENSIVELY AND ARE
583
- # CURRENTLY IN ALPHA AND SO HAVE BEEN COMMENTED
584
-
585
- # def acc(self, u, v):
586
- # """Calculate the average of Cityblock and Chebyshev distance.
587
-
588
- # This function computes the ACC distance, also known as the
589
- # Average distance, between two vectors u and v. It is the average of the
590
- # Cityblock (or Manhattan) and Chebyshev distances.
591
-
592
- # Parameters
593
- # ----------
594
- # - u, v: Input vectors between which the distance is to be calculated.
595
-
596
- # Returns
597
- # -------
598
- # - The ACC distance between the two vectors.
599
-
600
- # References
601
- # ----------
602
- # 1. Krause EF (2012) Taxicab Geometry An Adventure in Non-Euclidean
603
- # Geometry. Dover Publications.
604
- # 2. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
605
- # Measures between Probability Density Functions. International
606
- # Journal of Mathematical Models and Methods in Applied Sciences.
607
- # vol. 1(4), pp. 300-307.
608
- # """
609
- # return (self.cityblock(u, v) + self.chebyshev(u, v)) / 2
610
-
611
- # # def bhattacharyya(self, u, v):
612
- # # """
613
- # # Calculate the Bhattacharyya distance between two vectors.
614
-
615
- # # Returns a distance value between 0 and 1.
616
-
617
- # # Parameters
618
- # # ----------
619
- # # - u, v: Input vectors between which the distance is to be calculated.
620
-
621
- # # Returns
622
- # # -------
623
- # # - The Bhattacharyya distance between the two vectors.
624
-
625
- # # References
626
- # # ----------
627
- # # 1. Bhattacharyya A (1947) On a measure of divergence between two
628
- # # statistical populations defined by probability distributions,
629
- # # Bull. Calcutta Math. Soc., 35, 99–109.
630
- # # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
631
- # # Measures between Probability Density Functions. International
632
- # # Journal of Mathematical Models and Methods in Applied Sciences.
633
- # # 1(4), 300-307.
634
- # # 3. https://en.wikipedia.org/wiki/Bhattacharyya_distance
635
- # # """
636
- # # u, v = np.asarray(u), np.asarray(v)
637
- # # return -np.log(np.sum(np.sqrt(u * v)))
638
-
639
- # def chebyshev_min(self, u, v):
640
- # """Calculate the minimum value distance between two vectors.
641
-
642
- # This measure represents a custom approach by Zielezinski to distance
643
- # measurement, focusing on the minimum absolute difference.
644
-
645
- # Parameters
646
- # ----------
647
- # - u, v: Input vectors between which the distance is to be calculated.
648
-
649
- # Returns
650
- # -------
651
- # - The minimum value distance between the two vectors.
652
- # """
653
- # u, v = np.asarray(u), np.asarray(v)
654
- # return np.amin(np.abs(u - v))
655
-
656
- # def czekanowski(self, u, v):
657
- # """Calculate the Czekanowski distance between two vectors.
658
-
659
- # Parameters
660
- # ----------
661
- # - u, v: Input vectors between which the distance is to be calculated.
662
-
663
- # Returns
664
- # -------
665
- # - The Czekanowski distance between the two vectors.
666
-
667
- # References
668
- # ----------
669
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
670
- # Measures between Probability Density Functions. International
671
- # Journal of Mathematical Models and Methods in Applied Sciences.
672
- # 1(4), 300-307.
673
- # """
674
- # u, v = np.asarray(u), np.asarray(v)
675
- # return np.sum(np.abs(u - v)) / np.sum(u + v)
676
-
677
- # def dice(self, u, v):
678
- # """Calculate the Dice dissimilarity between two vectors.
679
-
680
- # Synonyms:
681
- # Sorensen distance
682
-
683
- # Parameters
684
- # ----------
685
- # - u, v: Input vectors between which the distance is to be calculated.
686
-
687
- # Returns
688
- # -------
689
- # - The Dice dissimilarity between the two vectors.
690
-
691
- # References
692
- # ----------
693
- # 1. Dice LR (1945) Measures of the amount of ecologic association
694
- # between species. Ecology. 26, 297-302.
695
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
696
- # Measures between Probability Density Functions. International
697
- # Journal of Mathematical Models and Methods in Applied Sciences.
698
- # 1(4), 300-307.
699
- # """
700
- # u, v = np.asarray(u), np.asarray(v)
701
- # u_v = u - v
702
- # return np.dot(u_v, u_v) / (np.dot(u, u) + np.dot(v, v))
703
-
704
- # def divergence(self, u, v):
705
- # """Calculate the divergence between two vectors.
706
-
707
- # Divergence equals squared Clark distance multiplied by 2.
708
-
709
- # Parameters
710
- # ----------
711
- # - u, v: Input vectors between which the distance is to be calculated.
712
-
713
- # Returns
714
- # -------
715
- # - The divergence between the two vectors.
716
-
717
- # References
718
- # ----------
719
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
720
- # Measures between Probability Density Functions. International
721
- # Journal of Mathematical Models and Methods in Applied Sciences.
722
- # 1(4), 300-307.
723
- # """
724
- # u, v = np.asarray(u), np.asarray(v)
725
- # with np.errstate(invalid="ignore"):
726
- # return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
727
-
728
- # # def fidelity(self, u, v):
729
- # # """
730
- # # Calculate the fidelity distance between two vectors.
731
-
732
- # # The fidelity distance measures the similarity between two probability
733
- # # distributions.
734
-
735
- # # Parameters
736
- # # ----------
737
- # # - u, v: Input vectors between which the distance is to be calculated.
738
-
739
- # # Returns
740
- # # -------
741
- # # - The fidelity distance between the two vectors.
742
-
743
- # # Notes
744
- # # -----
745
- # # Added by SC.
746
- # # """
747
- # # u, v = np.asarray(u), np.asarray(v)
748
- # # return 1 - (np.sum(np.sqrt(u * v)))
749
-
750
- # def google(self, u, v):
751
- # """Calculate the Normalized Google Distance (NGD) between two vectors.
752
-
753
- # NGD is a measure of similarity derived from the number of hits returned by the
754
- # Google search engine for a given set of keywords.
755
-
756
- # Parameters
757
- # ----------
758
- # - u, v: Input vectors between which the distance is to be calculated.
759
-
760
- # Returns
761
- # -------
762
- # - The Normalized Google Distance between the two vectors.
763
-
764
- # Notes
765
- # -----
766
- # When used for comparing two probability density functions (pdfs),
767
- # Google distance equals half of Cityblock distance.
768
-
769
- # References
770
- # ----------
771
- # 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
772
- # doi:10.1109/ITSIM.2008.4631601.
773
- # """
774
- # u, v = np.asarray(u), np.asarray(v)
775
- # x = float(np.sum(u))
776
- # y = float(np.sum(v))
777
- # summin = float(np.sum(np.minimum(u, v)))
778
- # return (max([x, y]) - summin) / ((x + y) - min([x, y]))
779
-
780
- # def gower(self, u, v):
781
- # """Calculate the Gower distance between two vectors.
782
-
783
- # The Gower distance equals the Cityblock distance divided by the vector length.
784
-
785
- # Parameters
786
- # ----------
787
- # - u, v: Input vectors between which the distance is to be calculated.
788
-
789
- # Returns
790
- # -------
791
- # - The Gower distance between the two vectors.
792
-
793
- # References
794
- # ----------
795
- # 1. Gower JC. (1971) General Coefficient of Similarity
796
- # and Some of Its Properties, Biometrics 27, 857-874.
797
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
798
- # Measures between Probability Density Functions. International
799
- # Journal of Mathematical Models and Methods in Applied Sciences.
800
- # 1(4), 300-307.
801
- # """
802
- # u, v = np.asarray(u), np.asarray(v)
803
- # return np.sum(np.abs(u - v)) / u.size
804
-
805
- # # NEEDS CHECKING
806
- # # def harmonicmean(self, u, v):
807
- # # """
808
- # # Harmonic mean distance.
809
- # # Notes:
810
- # # Added by SC.
811
- # # """
812
- # # u,v = np.asarray(u), np.asarray(v)
813
- # # return 1 - 2.*np.sum(u*v/(u+v))
814
-
815
- # # def inner(self, u, v):
816
- # # """
817
- # # Calculate the inner product distance between two vectors.
818
-
819
- # # The inner product distance is a measure of similarity between two vectors,
820
- # # based on their inner product.
821
-
822
- # # Parameters
823
- # # ----------
824
- # # - u, v: Input vectors between which the distance is to be calculated.
825
-
826
- # # Returns
827
- # # -------
828
- # # - The inner product distance between the two vectors.
829
-
830
- # # Notes
831
- # # -----
832
- # # Added by SC.
833
- # # """
834
- # # u, v = np.asarray(u), np.asarray(v)
835
- # # return 1 - np.dot(u, v)
836
-
837
- # def jeffreys(self, u, v):
838
- # """Calculate the Jeffreys divergence between two vectors.
839
-
840
- # The Jeffreys divergence is a symmetric version of the Kullback-Leibler
841
- # divergence.
842
-
843
- # Parameters
844
- # ----------
845
- # - u, v: Input vectors between which the divergence is to be calculated.
846
-
847
- # Returns
848
- # -------
849
- # - The Jeffreys divergence between the two vectors.
850
-
851
- # References
852
- # ----------
853
- # 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
854
- # in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
855
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
856
- # Measures between Probability Density Functions. International
857
- # Journal of Mathematical Models and Methods in Applied Sciences.
858
- # 1(4), 300-307.
859
- # """
860
- # u, v = np.asarray(u), np.asarray(v)
861
- # # Add epsilon to zeros in vectors to avoid division
862
- # # by 0 and/or log of 0. Alternatively, zeros in the
863
- # # vectors could be ignored or masked (see below).
864
- # # u = ma.masked_where(u == 0, u)
865
- # # v = ma.masked_where(v == 0, u)
866
- # u = np.where(u == 0, self.epsilon, u)
867
- # v = np.where(v == 0, self.epsilon, v)
868
- # return np.sum((u - v) * np.log(u / v))
869
-
870
- # def jensenshannon_divergence(self, u, v):
871
- # """Calculate the Jensen-Shannon divergence between two vectors.
872
-
873
- # The Jensen-Shannon divergence is a symmetric and finite measure of similarity
874
- # between two probability distributions.
875
-
876
- # Parameters
877
- # ----------
878
- # - u, v: Input vectors between which the divergence is to be calculated.
879
-
880
- # Returns
881
- # -------
882
- # - The Jensen-Shannon divergence between the two vectors.
883
-
884
- # References
885
- # ----------
886
- # 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
887
- # IEEE Transactions on Information Theory, 37(1):145–151.
888
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
889
- # Measures between Probability Density Functions. International
890
- # Journal of Mathematical Models and Methods in Applied Sciences.
891
- # 1(4), 300-307.
892
- # Comments:
893
- # Equals Jensen difference in Sung-Hyuk (2007):
894
- # u = np.where(u==0, self.epsilon, u)
895
- # v = np.where(v==0, self.epsilon, v)
896
- # el1 = (u * np.log(u) + v * np.log(v)) / 2
897
- # el2 = (u + v)/2
898
- # el3 = np.log(el2)
899
- # return np.sum(el1 - el2 * el3)
900
- # """
901
- # u, v = np.asarray(u), np.asarray(v)
902
- # u = np.where(u == 0, self.epsilon, u)
903
- # v = np.where(v == 0, self.epsilon, v)
904
- # dl = u * np.log(2 * u / (u + v))
905
- # dr = v * np.log(2 * v / (u + v))
906
- # return (np.sum(dl) + np.sum(dr)) / 2
907
-
908
- # def jensen_difference(self, u, v):
909
- # """Calculate the Jensen difference between two vectors.
910
-
911
- # The Jensen difference is considered similar to the Jensen-Shannon divergence.
912
-
913
- # Parameters
914
- # ----------
915
- # - u, v: Input vectors between which the distance is to be calculated.
916
-
917
- # Returns
918
- # -------
919
- # - The Jensen difference between the two vectors.
920
-
921
- # Notes
922
- # -----
923
- # 1. Equals half of Topsøe distance
924
- # 2. Equals squared jensenshannon_distance.
925
-
926
-
927
- # References
928
- # ----------
929
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
930
- # Measures between Probability Density Functions. International
931
- # Journal of Mathematical Models and Methods in Applied Sciences.
932
- # 1(4), 300-307.
933
- # """
934
- # u, v = np.asarray(u), np.asarray(v)
935
- # u = np.where(u == 0, self.epsilon, u)
936
- # v = np.where(v == 0, self.epsilon, v)
937
- # el1 = (u * np.log(u) + v * np.log(v)) / 2
938
- # el2 = (u + v) / 2
939
- # return np.sum(el1 - el2 * np.log(el2))
940
-
941
- # def k_divergence(self, u, v):
942
- # """Calculate the K divergence between two vectors.
943
-
944
- # Parameters
945
- # ----------
946
- # - u, v: Input vectors between which the divergence is to be calculated.
947
-
948
- # Returns
949
- # -------
950
- # - The K divergence between the two vectors.
951
-
952
- # References
953
- # ----------
954
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
955
- # Measures between Probability Density Functions. International
956
- # Journal of Mathematical Models and Methods in Applied Sciences.
957
- # 1(4), 300-307.
958
- # """
959
- # u, v = np.asarray(u), np.asarray(v)
960
- # u = np.where(u == 0, self.epsilon, u)
961
- # v = np.where(v == 0, self.epsilon, v)
962
- # return np.sum(u * np.log(2 * u / (u + v)))
963
-
964
- # def kl_divergence(self, u, v):
965
- # """Calculate the Kullback-Leibler divergence between two vectors.
966
-
967
- # The Kullback-Leibler divergence measures the difference between two
968
- # probability distributions.
969
-
970
- # Parameters
971
- # ----------
972
- # - u, v: Input vectors between which the divergence is to be calculated.
973
-
974
- # Returns
975
- # -------
976
- # - The Kullback-Leibler divergence between the two vectors.
977
-
978
- # References
979
- # ----------
980
- # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
981
- # Ann. Math. Statist. 22:79–86
982
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
983
- # Measures between Probability Density Functions. International
984
- # Journal of Mathematical Models and Methods in Applied Sciences.
985
- # 1(4):300-307.
986
- # """
987
- # u, v = np.asarray(u), np.asarray(v)
988
- # u = np.where(u == 0, self.epsilon, u)
989
- # v = np.where(v == 0, self.epsilon, v)
990
- # return np.sum(u * np.log(u / v))
991
-
992
- # def kumarjohnson(self, u, v):
993
- # """Calculate the Kumar-Johnson distance between two vectors.
994
-
995
- # Parameters
996
- # ----------
997
- # - u, v: Input vectors between which the distance is to be calculated.
998
-
999
- # Returns
1000
- # -------
1001
- # - The Kumar-Johnson distance between the two vectors.
1002
-
1003
- # References
1004
- # ----------
1005
- # 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
1006
- # and information inequalities, Journal of Inequalities in pure
1007
- # and applied Mathematics. 6(3).
1008
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1009
- # Measures between Probability Density Functions. International
1010
- # Journal of Mathematical Models and Methods in Applied Sciences.
1011
- # 1(4):300-307.
1012
- # """
1013
- # u, v = np.asarray(u), np.asarray(v)
1014
- # uvmult = u * v
1015
- # with np.errstate(divide="ignore", invalid="ignore"):
1016
- # numer = np.power(u**2 - v**2, 2)
1017
- # denom = 2 * np.power(uvmult, 3 / 2)
1018
- # return np.sum(np.where(uvmult != 0, numer / denom, 0))
1019
-
1020
- # def matusita(self, u, v):
1021
- # """Calculate the Matusita distance between two vectors.
1022
-
1023
- # Parameters
1024
- # ----------
1025
- # - u, v: Input vectors between which the distance is to be calculated.
1026
-
1027
- # Returns
1028
- # -------
1029
- # - The Matusita distance between the two vectors.
1030
-
1031
- # References
1032
- # ----------
1033
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1034
- # Measures between Probability Density Functions. International
1035
- # Journal of Mathematical Models and Methods in Applied Sciences.
1036
- # 1(4):300-307.
1037
-
1038
- # Notes
1039
- # -----
1040
- # Equals square root of Squared-chord distance.
1041
- # """
1042
- # u, v = np.asarray(u), np.asarray(v)
1043
- # return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
1044
-
1045
- # def max_symmetric_chisq(self, u, v):
1046
- # """Calculate the maximum symmetric chi-square distance.
1047
-
1048
- # Parameters
1049
- # ----------
1050
- # - u, v: Input vectors between which the distance is to be calculated.
1051
-
1052
- # Returns
1053
- # -------
1054
- # - The maximum symmetric chi-square distance between the two vectors.
1055
-
1056
- # References
1057
- # ----------
1058
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1059
- # Measures between Probability Density Functions. International
1060
- # Journal of Mathematical Models and Methods in Applied Sciences.
1061
- # 1(4):300-307.
1062
- # """
1063
- # u, v = np.asarray(u), np.asarray(v)
1064
- # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1065
-
1066
- # def min_symmetric_chisq(self, u, v):
1067
- # """Calculate the minimum symmetric chi-square distance.
1068
-
1069
- # Parameters
1070
- # ----------
1071
- # - u, v: Input vectors between which the distance is to be calculated.
1072
-
1073
- # Returns
1074
- # -------
1075
- # - The minimum symmetric chi-square distance between the two vectors.
1076
-
1077
- # Notes
1078
- # -----
1079
- # Added by SC.
1080
- # """
1081
- # u, v = np.asarray(u), np.asarray(v)
1082
- # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1083
-
1084
- # def minkowski(self, u, v, p=2):
1085
- # """Calculate the Minkowski distance between two vectors.
1086
-
1087
- # Parameters
1088
- # ----------
1089
- # - u, v: Input vectors between which the distance is to be calculated.
1090
- # - p: The order of the norm of the difference.
1091
-
1092
- # Returns
1093
- # -------
1094
- # - The Minkowski distance between the two vectors.
1095
-
1096
- # Notes
1097
- # -----
1098
- # When p goes to infinite, the Chebyshev distance is derived.
1099
-
1100
- # References
1101
- # ----------
1102
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1103
- # Measures between Probability Density Functions. International
1104
- # Journal of Mathematical Models and Methods in Applied Sciences.
1105
- # 1(4):300-307.
1106
- # """
1107
- # u, v = np.asarray(u), np.asarray(v)
1108
- # return np.linalg.norm(u - v, ord=p)
1109
-
1110
- # def neyman_chisq(self, u, v):
1111
- # """Calculate the Neyman chi-square distance between two vectors.
1112
-
1113
- # Parameters
1114
- # ----------
1115
- # - u, v: Input vectors between which the distance is to be calculated.
1116
-
1117
- # Returns
1118
- # -------
1119
- # - The Neyman chi-square distance between the two vectors.
1120
-
1121
- # References
1122
- # ----------
1123
- # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1124
- # In Proceedings of the First Berkley Symposium on Mathematical
1125
- # Statistics and Probability.
1126
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1127
- # Measures between Probability Density Functions. International
1128
- # Journal of Mathematical Models and Methods in Applied Sciences.
1129
- # 1(4), 300-307.
1130
- # """
1131
- # u, v = np.asarray(u), np.asarray(v)
1132
- # with np.errstate(divide="ignore", invalid="ignore"):
1133
- # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1134
-
1135
- # # def nonintersection(self, u, v):
1136
- # # """
1137
- # # Calculate the Nonintersection distance between two vectors.
1138
-
1139
- # # Parameters
1140
- # # ----------
1141
- # # - u, v: Input vectors between which the distance is to be calculated.
1142
-
1143
- # # Returns
1144
- # # -------
1145
- # # - The Nonintersection distance between the two vectors.
1146
-
1147
- # # References
1148
- # # ----------
1149
- # # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1150
- # # Measures between Probability Density Functions. International
1151
- # # Journal of Mathematical Models and Methods in Applied Sciences.
1152
- # # 1(4), 300-307.
1153
-
1154
- # # Notes
1155
- # # -----
1156
- # # When used for comparing two probability density functions (pdfs),
1157
- # # Nonintersection distance equals half of Cityblock distance.
1158
- # # """
1159
- # # u, v = np.asarray(u), np.asarray(v)
1160
- # # return 1 - np.sum(np.minimum(u, v))
1161
-
1162
- # def pearson_chisq(self, u, v):
1163
- # """Calculate the Pearson chi-square divergence between two vectors.
1164
-
1165
- # Parameters
1166
- # ----------
1167
- # - u, v: Input vectors between which the divergence is to be calculated.
1168
-
1169
- # Returns
1170
- # -------
1171
- # - The Pearson chi-square divergence between the two vectors.
1172
-
1173
- # References
1174
- # ----------
1175
- # 1. Pearson K. (1900) On the Criterion that a given system of
1176
- # deviations from the probable in the case of correlated system
1177
- # of variables is such that it can be reasonable supposed to have
1178
- # arisen from random sampling, Phil. Mag. 50, 157-172.
1179
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1180
- # Measures between Probability Density Functions. International
1181
- # Journal of Mathematical Models and Methods in Applied Sciences.
1182
- # 1(4), 300-307.
1183
-
1184
- # Notes
1185
- # -----
1186
- # Pearson chi-square divergence is asymmetric.
1187
- # """
1188
- # u, v = np.asarray(u), np.asarray(v)
1189
- # with np.errstate(divide="ignore", invalid="ignore"):
1190
- # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1191
-
1192
- # def penroseshape(self, u, v):
1193
- # """Calculate the Penrose shape distance between two vectors.
1194
-
1195
- # Parameters
1196
- # ----------
1197
- # - u, v: Input vectors between which the distance is to be calculated.
1198
-
1199
- # Returns
1200
- # -------
1201
- # - The Penrose shape distance between the two vectors.
1202
-
1203
- # References
1204
- # ----------
1205
- # 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1206
- # Springer-Verlag Berlin Heidelberg. 1-590.
1207
- # """
1208
- # u, v = np.asarray(u), np.asarray(v)
1209
- # umu = np.mean(u)
1210
- # vmu = np.mean(v)
1211
- # return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1212
-
1213
- # def prob_chisq(self, u, v):
1214
- # """Calculate the Probabilistic chi-square distance between two vectors.
1215
-
1216
- # Parameters
1217
- # ----------
1218
- # - u, v: Input vectors between which the distance is to be calculated.
1219
-
1220
- # Returns
1221
- # -------
1222
- # - The Probabilistic chi-square distance between the two vectors.
1223
-
1224
- # Notes
1225
- # -----
1226
- # Added by SC.
1227
- # """
1228
- # u, v = np.asarray(u), np.asarray(v)
1229
- # uvsum = u + v
1230
- # with np.errstate(divide="ignore", invalid="ignore"):
1231
- # return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1232
-
1233
- # def ruzicka(self, u, v):
1234
- # """Calculate the Ruzicka distance between two vectors.
1235
-
1236
- # Parameters
1237
- # ----------
1238
- # - u, v: Input vectors between which the distance is to be calculated.
1239
-
1240
- # Returns
1241
- # -------
1242
- # - The Ruzicka distance between the two vectors.
1243
-
1244
- # Notes
1245
- # -----
1246
- # Added by SC.
1247
- # """
1248
- # u, v = np.asarray(u), np.asarray(v)
1249
- # den = np.sum(np.maximum(u, v))
1250
-
1251
- # return 1 - np.sum(np.minimum(u, v)) / den
1252
-
1253
- # def sorensen(self, u, v):
1254
- # """Calculate the Sorensen distance between two vectors.
1255
-
1256
- # Parameters
1257
- # ----------
1258
- # - u, v: Input vectors between which the distance is to be calculated.
1259
-
1260
- # Returns
1261
- # -------
1262
- # - The Sorensen distance between the two vectors.
1263
-
1264
- # Notes
1265
- # -----
1266
- # The Sorensen distance equals the Manhattan distance divided by the sum of
1267
- # the two vectors.
1268
-
1269
- # Added by SC.
1270
- # """
1271
- # u, v = np.asarray(u), np.asarray(v)
1272
- # return np.sum(np.abs(u - v)) / np.sum(u + v)
1273
-
1274
- # def squared_chisq(self, u, v):
1275
- # """Calculate the Squared chi-square distance between two vectors.
1276
-
1277
- # Parameters
1278
- # ----------
1279
- # - u, v: Input vectors between which the distance is to be calculated.
1280
-
1281
- # Returns
1282
- # -------
1283
- # - The Squared chi-square distance between the two vectors.
1284
-
1285
- # References
1286
- # ----------
1287
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1288
- # Measures between Probability Density Functions. International
1289
- # Journal of Mathematical Models and Methods in Applied Sciences.
1290
- # 1(4), 300-307.
1291
- # """
1292
- # u, v = np.asarray(u), np.asarray(v)
1293
- # uvsum = u + v
1294
- # with np.errstate(divide="ignore", invalid="ignore"):
1295
- # return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1296
-
1297
- # def squaredchord(self, u, v):
1298
- # """Calculate the Squared-chord distance between two vectors.
1299
-
1300
- # Parameters
1301
- # ----------
1302
- # - u, v: Input vectors between which the distance is to be calculated.
1303
-
1304
- # Returns
1305
- # -------
1306
- # - The Squared-chord distance between the two vectors.
1307
-
1308
- # References
1309
- # ----------
1310
- # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1311
- # distance metrics and analog assignments for pollen records.
1312
- # Quaternary Research 60:356–367.
1313
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1314
- # Measures between Probability Density Functions. International
1315
- # Journal of Mathematical Models and Methods in Applied Sciences.
1316
- # 1(4), 300-307.
1317
-
1318
- # Notes
1319
- # -----
1320
- # Equals to squared Matusita distance.
1321
- # """
1322
- # u, v = np.asarray(u), np.asarray(v)
1323
- # return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1324
-
1325
- # def squared_euclidean(self, u, v):
1326
- # """Calculate the Squared Euclidean distance between two vectors.
1327
-
1328
- # Parameters
1329
- # ----------
1330
- # - u, v: Input vectors between which the distance is to be calculated.
1331
-
1332
- # Returns
1333
- # -------
1334
- # - The Squared Euclidean distance between the two vectors.
1335
-
1336
- # References
1337
- # ----------
1338
- # 1. Gavin DG et al. (2003) A statistical approach to evaluating
1339
- # distance metrics and analog assignments for pollen records.
1340
- # Quaternary Research 60:356–367.
1341
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1342
- # Measures between Probability Density Functions. International
1343
- # Journal of Mathematical Models and Methods in Applied Sciences.
1344
- # 1(4), 300-307.
1345
-
1346
- # Notes
1347
- # -----
1348
- # Equals to squared Euclidean distance.
1349
- # """
1350
- # u, v = np.asarray(u), np.asarray(v)
1351
- # return np.dot((u - v), (u - v))
1352
-
1353
- # def taneja(self, u, v):
1354
- # """Calculate the Taneja distance between two vectors.
1355
-
1356
- # Parameters
1357
- # ----------
1358
- # - u, v: Input vectors between which the distance is to be calculated.
1359
-
1360
- # Returns
1361
- # -------
1362
- # - The Taneja distance between the two vectors.
1363
-
1364
- # References
1365
- # ----------
1366
- # 1. Taneja IJ. (1995), New Developments in Generalized Information
1367
- # Measures, Chapter in: Advances in Imaging and Electron Physics,
1368
- # Ed. P.W. Hawkes, 91, 37-135.
1369
- # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1370
- # Measures between Probability Density Functions. International
1371
- # Journal of Mathematical Models and Methods in Applied Sciences.
1372
- # 1(4), 300-307.
1373
- # """
1374
- # u, v = np.asarray(u), np.asarray(v)
1375
- # u = np.where(u == 0, self.epsilon, u)
1376
- # v = np.where(v == 0, self.epsilon, v)
1377
- # uvsum = u + v
1378
- # return np.sum((uvsum / 2) * np.log(uvsum / (2 * np.sqrt(u * v))))
1379
-
1380
- # def tanimoto(self, u, v):
1381
- # """Calculate the Tanimoto distance between two vectors.
1382
-
1383
- # Parameters
1384
- # ----------
1385
- # - u, v: Input vectors between which the distance is to be calculated.
1386
-
1387
- # Returns
1388
- # -------
1389
- # - The Tanimoto distance between the two vectors.
1390
-
1391
- # References
1392
- # ----------
1393
- # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1394
- # Measures between Probability Density Functions. International
1395
- # Journal of Mathematical Models and Methods in Applied Sciences.
1396
- # 1(4), 300-307.
1397
-
1398
- # Notes
1399
- # -----
1400
- # Equals Soergel distance.
1401
- # """
1402
- # u, v = np.asarray(u), np.asarray(v)
1403
- # # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1404
- # usum = np.sum(u)
1405
- # vsum = np.sum(v)
1406
- # minsum = np.sum(np.minimum(u, v))
1407
- # return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1408
-
1409
- # def topsoe(self, u, v):
1410
- # """Calculate the Topsøe distance between two vectors.
1411
-
1412
- # Parameters
1413
- # ----------
1414
- # - u, v: Input vectors between which the distance is to be calculated.
1415
-
1416
- # Returns
1417
- # -------
1418
- # - The Topsøe distance between the two vectors.
1419
-
1420
- # References
1421
- # ----------
1422
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1423
- # Measures between Probability Density Functions. International
1424
- # Journal of Mathematical Models and Methods in Applied Sciences.
1425
- # 1(4), 300-307.
1426
-
1427
- # Notes
1428
- # -----
1429
- # Equals two times Jensen-Shannon divergence.
1430
- # """
1431
- # u, v = np.asarray(u), np.asarray(v)
1432
- # u = np.where(u == 0, self.epsilon, u)
1433
- # v = np.where(v == 0, self.epsilon, v)
1434
- # dl = u * np.log(2 * u / (u + v))
1435
- # dr = v * np.log(2 * v / (u + v))
1436
- # return np.sum(dl + dr)
1437
-
1438
- # def vicis_symmetric_chisq(self, u, v):
1439
- # """Calculate the Vicis Symmetric chi-square distance.
1440
-
1441
- # Parameters
1442
- # ----------
1443
- # - u, v: Input vectors between which the distance is to be calculated.
1444
-
1445
- # Returns
1446
- # -------
1447
- # - The Vicis Symmetric chi-square distance between the two vectors.
1448
-
1449
- # References
1450
- # ----------
1451
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1452
- # Measures between Probability Density Functions. International
1453
- # Journal of Mathematical Models and Methods in Applied Sciences.
1454
- # 1(4), 300-307
1455
- # """
1456
- # u, v = np.asarray(u), np.asarray(v)
1457
- # with np.errstate(divide="ignore", invalid="ignore"):
1458
- # u_v = (u - v) ** 2
1459
- # uvmin = np.minimum(u, v) ** 2
1460
- # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1461
-
1462
- # def vicis_wave_hedges(self, u, v):
1463
- # """Calculate the Vicis-Wave Hedges distance between two vectors.
1464
-
1465
- # Parameters
1466
- # ----------
1467
- # - u, v: Input vectors between which the distance is to be calculated.
1468
-
1469
- # Returns
1470
- # -------
1471
- # - The Vicis-Wave Hedges distance between the two vectors.
1472
-
1473
- # References
1474
- # ----------
1475
- # 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1476
- # Measures between Probability Density Functions. International
1477
- # Journal of Mathematical Models and Methods in Applied Sciences.
1478
- # 1(4), 300-307.
1479
- # """
1480
- # u, v = np.asarray(u), np.asarray(v)
1481
- # with np.errstate(divide="ignore", invalid="ignore"):
1482
- # u_v = abs(u - v)
1483
- # uvmin = np.minimum(u, v)
1484
- # return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
758
+ Divergence equals squared Clark distance multiplied by 2.
759
+
760
+ Parameters
761
+ ----------
762
+ - u, v: Input vectors between which the distance is to be calculated.
763
+
764
+ Returns
765
+ -------
766
+ - The divergence between the two vectors.
767
+
768
+ References
769
+ ----------
770
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
771
+ Measures between Probability Density Functions. International
772
+ Journal of Mathematical Models and Methods in Applied Sciences.
773
+ 1(4), 300-307.
774
+ """
775
+ u, v = np.asarray(u), np.asarray(v)
776
+ with np.errstate(invalid="ignore"):
777
+ return 2 * np.nansum(np.power(u - v, 2) / np.power(u + v, 2))
778
+
779
+ def google(self, u, v):
780
+ """Calculate the Normalized Google Distance (NGD) between two vectors.
781
+
782
+ NGD is a measure of similarity derived from the number of hits returned by the
783
+ Google search engine for a given set of keywords.
784
+
785
+ Parameters
786
+ ----------
787
+ - u, v: Input vectors between which the distance is to be calculated.
788
+
789
+ Returns
790
+ -------
791
+ - The Normalized Google Distance between the two vectors.
792
+
793
+ Notes
794
+ -----
795
+ When used for comparing two probability density functions (pdfs),
796
+ Google distance equals half of Cityblock distance.
797
+
798
+ References
799
+ ----------
800
+ 1. Lee & Rashid (2008) Information Technology, ITSim 2008.
801
+ doi:10.1109/ITSIM.2008.4631601.
802
+ """
803
+ u, v = np.asarray(u), np.asarray(v)
804
+ x = float(np.sum(u))
805
+ y = float(np.sum(v))
806
+ summin = float(np.sum(np.minimum(u, v)))
807
+ return (max([x, y]) - summin) / ((x + y) - min([x, y]))
808
+
809
+ def gower(self, u, v):
810
+ """Calculate the Gower distance between two vectors.
811
+
812
+ The Gower distance equals the Cityblock distance divided by the vector length.
813
+
814
+ Parameters
815
+ ----------
816
+ - u, v: Input vectors between which the distance is to be calculated.
817
+
818
+ Returns
819
+ -------
820
+ - The Gower distance between the two vectors.
821
+
822
+ References
823
+ ----------
824
+ 1. Gower JC. (1971) General Coefficient of Similarity
825
+ and Some of Its Properties, Biometrics 27, 857-874.
826
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
827
+ Measures between Probability Density Functions. International
828
+ Journal of Mathematical Models and Methods in Applied Sciences.
829
+ 1(4), 300-307.
830
+ """
831
+ u, v = np.asarray(u), np.asarray(v)
832
+ return np.sum(np.abs(u - v)) / u.size
833
+
834
+ def jeffreys(self, u, v):
835
+ """Calculate the Jeffreys divergence between two vectors.
836
+
837
+ The Jeffreys divergence is a symmetric version of the Kullback-Leibler
838
+ divergence.
839
+
840
+ Parameters
841
+ ----------
842
+ - u, v: Input vectors between which the divergence is to be calculated.
843
+
844
+ Returns
845
+ -------
846
+ - The Jeffreys divergence between the two vectors.
847
+
848
+ References
849
+ ----------
850
+ 1. Jeffreys H (1946) An Invariant Form for the Prior Probability
851
+ in Estimation Problems. Proc.Roy.Soc.Lon., Ser. A 186, 453-461.
852
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
853
+ Measures between Probability Density Functions. International
854
+ Journal of Mathematical Models and Methods in Applied Sciences.
855
+ 1(4), 300-307.
856
+ """
857
+ u, v = np.asarray(u), np.asarray(v)
858
+ # Add epsilon to zeros in vectors to avoid division
859
+ # by 0 and/or log of 0. Alternatively, zeros in the
860
+ # vectors could be ignored or masked (see below).
861
+ # u = ma.masked_where(u == 0, u)
862
+ # v = ma.masked_where(v == 0, u)
863
+ with np.errstate(divide="ignore", invalid="ignore"):
864
+ u[u == 0] = self.epsilon
865
+ v[v == 0] = self.epsilon
866
+ # Clip negative values to zero for valid log
867
+ udivv = np.clip(u / v, a_min=self.epsilon, a_max=None)
868
+ return np.sum((u - v) * np.log(udivv))
869
+
870
+ def jensenshannon_divergence(self, u, v):
871
+ """Calculate the Jensen-Shannon divergence between two vectors.
872
+
873
+ The Jensen-Shannon divergence is a symmetric and finite measure of similarity
874
+ between two probability distributions.
875
+
876
+ Parameters
877
+ ----------
878
+ - u, v: Input vectors between which the divergence is to be calculated.
879
+
880
+ Returns
881
+ -------
882
+ - The Jensen-Shannon divergence between the two vectors.
883
+
884
+ References
885
+ ----------
886
+ 1. Lin J. (1991) Divergence measures based on the Shannon entropy.
887
+ IEEE Transactions on Information Theory, 37(1):145–151.
888
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
889
+ Measures between Probability Density Functions. International
890
+ Journal of Mathematical Models and Methods in Applied Sciences.
891
+ 1(4), 300-307.
892
+ Comments:
893
+ Equals Jensen difference in Sung-Hyuk (2007):
894
+ u = np.where(u==0, self.epsilon, u)
895
+ v = np.where(v==0, self.epsilon, v)
896
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
897
+ el2 = (u + v)/2
898
+ el3 = np.log(el2)
899
+ return np.sum(el1 - el2 * el3)
900
+ """
901
+ u, v = np.asarray(u), np.asarray(v)
902
+ with np.errstate(divide="ignore", invalid="ignore"):
903
+ # Clip negative values to zero for valid log
904
+ u[u == 0] = self.epsilon
905
+ v[v == 0] = self.epsilon
906
+
907
+ term1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
908
+ term2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
909
+
910
+ dl = u * np.log(term1)
911
+ dr = v * np.log(term2)
912
+ return (np.sum(dl) + np.sum(dr)) / 2
913
+
914
+ def jensen_difference(self, u, v):
915
+ """Calculate the Jensen difference between two vectors.
916
+
917
+ The Jensen difference is considered similar to the Jensen-Shannon divergence.
918
+
919
+ Parameters
920
+ ----------
921
+ - u, v: Input vectors between which the distance is to be calculated.
922
+
923
+ Returns
924
+ -------
925
+ - The Jensen difference between the two vectors.
926
+
927
+ Notes
928
+ -----
929
+ 1. Equals half of Topsøe distance
930
+ 2. Equals squared jensenshannon_distance.
931
+
932
+
933
+ References
934
+ ----------
935
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
936
+ Measures between Probability Density Functions. International
937
+ Journal of Mathematical Models and Methods in Applied Sciences.
938
+ 1(4), 300-307.
939
+ """
940
+ u, v = np.asarray(u), np.asarray(v)
941
+
942
+ with np.errstate(divide="ignore", invalid="ignore"):
943
+ # Clip negative values to eps for valid log
944
+ u = np.clip(u, self.epsilon, None)
945
+ v = np.clip(v, self.epsilon, None)
946
+ el1 = (u * np.log(u) + v * np.log(v)) / 2
947
+ el2 = np.clip((u + v) / 2, a_min=self.epsilon, a_max=None)
948
+ return np.sum(el1 - el2 * np.log(el2))
949
+
950
+ def kumarjohnson(self, u, v):
951
+ """Calculate the Kumar-Johnson distance between two vectors.
952
+
953
+ Parameters
954
+ ----------
955
+ - u, v: Input vectors between which the distance is to be calculated.
956
+
957
+ Returns
958
+ -------
959
+ - The Kumar-Johnson distance between the two vectors.
960
+
961
+ References
962
+ ----------
963
+ 1. Kumar P, Johnson A. (2005) On a symmetric divergence measure
964
+ and information inequalities, Journal of Inequalities in pure
965
+ and applied Mathematics. 6(3).
966
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
967
+ Measures between Probability Density Functions. International
968
+ Journal of Mathematical Models and Methods in Applied Sciences.
969
+ 1(4):300-307.
970
+ """
971
+ u, v = np.asarray(u), np.asarray(v)
972
+ uvmult = u * v
973
+ with np.errstate(divide="ignore", invalid="ignore"):
974
+ numer = np.power(u**2 - v**2, 2)
975
+ denom = 2 * np.power(uvmult, 3 / 2)
976
+ return np.sum(np.where(uvmult != 0, numer / denom, 0))
977
+
978
+ def matusita(self, u, v):
979
+ """Calculate the Matusita distance between two vectors.
980
+
981
+ Parameters
982
+ ----------
983
+ - u, v: Input vectors between which the distance is to be calculated.
984
+
985
+ Returns
986
+ -------
987
+ - The Matusita distance between the two vectors.
988
+
989
+ References
990
+ ----------
991
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
992
+ Measures between Probability Density Functions. International
993
+ Journal of Mathematical Models and Methods in Applied Sciences.
994
+ 1(4):300-307.
995
+
996
+ Notes
997
+ -----
998
+ Equals square root of Squared-chord distance.
999
+ """
1000
+ u, v = np.asarray(u), np.asarray(v)
1001
+ with np.errstate(divide="ignore", invalid="ignore"):
1002
+ return np.sqrt(np.sum((np.sqrt(u) - np.sqrt(v)) ** 2))
1003
+
1004
+ def minkowski(self, u, v, p=2):
1005
+ """Calculate the Minkowski distance between two vectors.
1006
+
1007
+ Parameters
1008
+ ----------
1009
+ - u, v: Input vectors between which the distance is to be calculated.
1010
+ - p: The order of the norm of the difference.
1011
+
1012
+ Returns
1013
+ -------
1014
+ - The Minkowski distance between the two vectors.
1015
+
1016
+ Notes
1017
+ -----
1018
+ When p goes to infinite, the Chebyshev distance is derived.
1019
+
1020
+ References
1021
+ ----------
1022
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1023
+ Measures between Probability Density Functions. International
1024
+ Journal of Mathematical Models and Methods in Applied Sciences.
1025
+ 1(4):300-307.
1026
+ """
1027
+ u, v = np.asarray(u), np.asarray(v)
1028
+ return np.linalg.norm(u - v, ord=p)
1029
+
1030
+ def penroseshape(self, u, v):
1031
+ """Calculate the Penrose shape distance between two vectors.
1032
+
1033
+ Parameters
1034
+ ----------
1035
+ - u, v: Input vectors between which the distance is to be calculated.
1036
+
1037
+ Returns
1038
+ -------
1039
+ - The Penrose shape distance between the two vectors.
1040
+
1041
+ References
1042
+ ----------
1043
+ 1. Deza M, Deza E (2009) Encyclopedia of Distances.
1044
+ Springer-Verlag Berlin Heidelberg. 1-590.
1045
+ """
1046
+ u, v = np.asarray(u), np.asarray(v)
1047
+ umu = np.mean(u)
1048
+ vmu = np.mean(v)
1049
+ with np.errstate(divide="ignore", invalid="ignore"):
1050
+ return np.sqrt(np.sum(((u - umu) - (v - vmu)) ** 2))
1051
+
1052
+ def prob_chisq(self, u, v):
1053
+ """Calculate the Probabilistic chi-square distance between two vectors.
1054
+
1055
+ Parameters
1056
+ ----------
1057
+ - u, v: Input vectors between which the distance is to be calculated.
1058
+
1059
+ Returns
1060
+ -------
1061
+ - The Probabilistic chi-square distance between the two vectors.
1062
+
1063
+ Notes
1064
+ -----
1065
+ Added by SC.
1066
+ """
1067
+ u, v = np.asarray(u), np.asarray(v)
1068
+ uvsum = u + v
1069
+ with np.errstate(divide="ignore", invalid="ignore"):
1070
+ return 2 * np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1071
+
1072
+ def ruzicka(self, u, v):
1073
+ """Calculate the Ruzicka distance between two vectors.
1074
+
1075
+ Parameters
1076
+ ----------
1077
+ - u, v: Input vectors between which the distance is to be calculated.
1078
+
1079
+ Returns
1080
+ -------
1081
+ - The Ruzicka distance between the two vectors.
1082
+
1083
+ Notes
1084
+ -----
1085
+ Added by SC.
1086
+ """
1087
+ u, v = np.asarray(u), np.asarray(v)
1088
+ den = np.sum(np.maximum(u, v))
1089
+
1090
+ return 1 - np.sum(np.minimum(u, v)) / den
1091
+
1092
+ def sorensen(self, u, v):
1093
+ """Calculate the Sorensen distance between two vectors.
1094
+
1095
+ Parameters
1096
+ ----------
1097
+ - u, v: Input vectors between which the distance is to be calculated.
1098
+
1099
+ Returns
1100
+ -------
1101
+ - The Sorensen distance between the two vectors.
1102
+
1103
+ Notes
1104
+ -----
1105
+ The Sorensen distance equals the Manhattan distance divided by the sum of
1106
+ the two vectors.
1107
+
1108
+ Added by SC.
1109
+ """
1110
+ u, v = np.asarray(u), np.asarray(v)
1111
+ return np.sum(np.abs(u - v)) / np.sum(u + v)
1112
+
1113
+ def squared_chisq(self, u, v):
1114
+ """Calculate the Squared chi-square distance between two vectors.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+ - u, v: Input vectors between which the distance is to be calculated.
1119
+
1120
+ Returns
1121
+ -------
1122
+ - The Squared chi-square distance between the two vectors.
1123
+
1124
+ References
1125
+ ----------
1126
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1127
+ Measures between Probability Density Functions. International
1128
+ Journal of Mathematical Models and Methods in Applied Sciences.
1129
+ 1(4), 300-307.
1130
+ """
1131
+ u, v = np.asarray(u), np.asarray(v)
1132
+ uvsum = u + v
1133
+ with np.errstate(divide="ignore", invalid="ignore"):
1134
+ return np.sum(np.where(uvsum != 0, (u - v) ** 2 / uvsum, 0))
1135
+
1136
+ def squaredchord(self, u, v):
1137
+ """Calculate the Squared-chord distance between two vectors.
1138
+
1139
+ Parameters
1140
+ ----------
1141
+ - u, v: Input vectors between which the distance is to be calculated.
1142
+
1143
+ Returns
1144
+ -------
1145
+ - The Squared-chord distance between the two vectors.
1146
+
1147
+ References
1148
+ ----------
1149
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1150
+ distance metrics and analog assignments for pollen records.
1151
+ Quaternary Research 60:356–367.
1152
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1153
+ Measures between Probability Density Functions. International
1154
+ Journal of Mathematical Models and Methods in Applied Sciences.
1155
+ 1(4), 300-307.
1156
+
1157
+ Notes
1158
+ -----
1159
+ Equals to squared Matusita distance.
1160
+ """
1161
+ u, v = np.asarray(u), np.asarray(v)
1162
+ with np.errstate(divide="ignore", invalid="ignore"):
1163
+ return np.sum((np.sqrt(u) - np.sqrt(v)) ** 2)
1164
+
1165
+ def squared_euclidean(self, u, v):
1166
+ """Calculate the Squared Euclidean distance between two vectors.
1167
+
1168
+ Parameters
1169
+ ----------
1170
+ - u, v: Input vectors between which the distance is to be calculated.
1171
+
1172
+ Returns
1173
+ -------
1174
+ - The Squared Euclidean distance between the two vectors.
1175
+
1176
+ References
1177
+ ----------
1178
+ 1. Gavin DG et al. (2003) A statistical approach to evaluating
1179
+ distance metrics and analog assignments for pollen records.
1180
+ Quaternary Research 60:356–367.
1181
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1182
+ Measures between Probability Density Functions. International
1183
+ Journal of Mathematical Models and Methods in Applied Sciences.
1184
+ 1(4), 300-307.
1185
+
1186
+ Notes
1187
+ -----
1188
+ Equals to squared Euclidean distance.
1189
+ """
1190
+ u, v = np.asarray(u), np.asarray(v)
1191
+ return np.dot((u - v), (u - v))
1192
+
1193
+ def taneja(self, u, v):
1194
+ """Calculate the Taneja distance between two vectors.
1195
+
1196
+ Parameters
1197
+ ----------
1198
+ - u, v: Input vectors between which the distance is to be calculated.
1199
+
1200
+ Returns
1201
+ -------
1202
+ - The Taneja distance between the two vectors.
1203
+
1204
+ References
1205
+ ----------
1206
+ 1. Taneja IJ. (1995), New Developments in Generalized Information
1207
+ Measures, Chapter in: Advances in Imaging and Electron Physics,
1208
+ Ed. P.W. Hawkes, 91, 37-135.
1209
+ 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1210
+ Measures between Probability Density Functions. International
1211
+ Journal of Mathematical Models and Methods in Applied Sciences.
1212
+ 1(4), 300-307.
1213
+ """
1214
+ u, v = np.asarray(u), np.asarray(v)
1215
+ with np.errstate(divide="ignore", invalid="ignore"):
1216
+ u[u == 0] = self.epsilon
1217
+ v[v == 0] = self.epsilon
1218
+ uvsum = u + v
1219
+ logarg = np.clip(
1220
+ uvsum / (2 * np.sqrt(u * v)), a_min=self.epsilon, a_max=None
1221
+ )
1222
+ return np.sum((uvsum / 2) * np.log(logarg))
1223
+
1224
+ def tanimoto(self, u, v):
1225
+ """Calculate the Tanimoto distance between two vectors.
1226
+
1227
+ Parameters
1228
+ ----------
1229
+ - u, v: Input vectors between which the distance is to be calculated.
1230
+
1231
+ Returns
1232
+ -------
1233
+ - The Tanimoto distance between the two vectors.
1234
+
1235
+ References
1236
+ ----------
1237
+ 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1238
+ Measures between Probability Density Functions. International
1239
+ Journal of Mathematical Models and Methods in Applied Sciences.
1240
+ 1(4), 300-307.
1241
+
1242
+ Notes
1243
+ -----
1244
+ Equals Soergel distance.
1245
+ """
1246
+ u, v = np.asarray(u), np.asarray(v)
1247
+ # return np.sum(abs(u-v)) / np.sum(np.maximum(u, v))
1248
+ usum = np.sum(u)
1249
+ vsum = np.sum(v)
1250
+ minsum = np.sum(np.minimum(u, v))
1251
+ return (usum + vsum - 2 * minsum) / (usum + vsum - minsum)
1252
+
1253
+ def topsoe(self, u, v):
1254
+ """Calculate the Topsøe distance between two vectors.
1255
+
1256
+ Parameters
1257
+ ----------
1258
+ - u, v: Input vectors between which the distance is to be calculated.
1259
+
1260
+ Returns
1261
+ -------
1262
+ - The Topsøe distance between the two vectors.
1263
+
1264
+ References
1265
+ ----------
1266
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1267
+ Measures between Probability Density Functions. International
1268
+ Journal of Mathematical Models and Methods in Applied Sciences.
1269
+ 1(4), 300-307.
1270
+
1271
+ Notes
1272
+ -----
1273
+ Equals two times Jensen-Shannon divergence.
1274
+ """
1275
+ u, v = np.asarray(u), np.asarray(v)
1276
+ with np.errstate(divide="ignore", invalid="ignore"):
1277
+ u[u == 0] = self.epsilon
1278
+ v[v == 0] = self.epsilon
1279
+ logarg1 = np.clip(2 * u / (u + v), a_min=self.epsilon, a_max=None)
1280
+ logarg2 = np.clip(2 * v / (u + v), a_min=self.epsilon, a_max=None)
1281
+ dl = u * np.log(logarg1)
1282
+ dr = v * np.log(logarg2)
1283
+ return np.sum(dl + dr)
1284
+
1285
+ def vicis_symmetric_chisq(self, u, v):
1286
+ """Calculate the Vicis Symmetric chi-square distance.
1287
+
1288
+ Parameters
1289
+ ----------
1290
+ - u, v: Input vectors between which the distance is to be calculated.
1291
+
1292
+ Returns
1293
+ -------
1294
+ - The Vicis Symmetric chi-square distance between the two vectors.
1295
+
1296
+ References
1297
+ ----------
1298
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1299
+ Measures between Probability Density Functions. International
1300
+ Journal of Mathematical Models and Methods in Applied Sciences.
1301
+ 1(4), 300-307
1302
+ """
1303
+ u, v = np.asarray(u), np.asarray(v)
1304
+ with np.errstate(divide="ignore", invalid="ignore"):
1305
+ u_v = (u - v) ** 2
1306
+ uvmin = np.minimum(u, v) ** 2
1307
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1308
+
1309
+ def vicis_wave_hedges(self, u, v):
1310
+ """Calculate the Vicis-Wave Hedges distance between two vectors.
1311
+
1312
+ Parameters
1313
+ ----------
1314
+ - u, v: Input vectors between which the distance is to be calculated.
1315
+
1316
+ Returns
1317
+ -------
1318
+ - The Vicis-Wave Hedges distance between the two vectors.
1319
+
1320
+ References
1321
+ ----------
1322
+ 1. Sung-Hyuk C (2007) Comprehensive Survey on Distance/Similarity
1323
+ Measures between Probability Density Functions. International
1324
+ Journal of Mathematical Models and Methods in Applied Sciences.
1325
+ 1(4), 300-307.
1326
+ """
1327
+ u, v = np.asarray(u), np.asarray(v)
1328
+ with np.errstate(divide="ignore", invalid="ignore"):
1329
+ u_v = abs(u - v)
1330
+ uvmin = np.minimum(u, v)
1331
+ return np.sum(np.where(uvmin != 0, u_v / uvmin, 0))
1332
+
1333
+ # def fidelity(self, u, v):
1334
+ # """
1335
+ # Calculate the fidelity distance between two vectors.
1336
+
1337
+ # The fidelity distance measures the similarity between two probability
1338
+ # distributions.
1339
+
1340
+ # Parameters
1341
+ # ----------
1342
+ # - u, v: Input vectors between which the distance is to be calculated.
1343
+
1344
+ # Returns
1345
+ # -------
1346
+ # - The fidelity distance between the two vectors.
1347
+
1348
+ # Notes
1349
+ # -----
1350
+ # Added by SC.
1351
+ # """
1352
+ # u, v = np.asarray(u), np.asarray(v)
1353
+ # return 1 - (np.sum(np.sqrt(u * v)))
1354
+
1355
+ # # NEEDS CHECKING
1356
+ # # def harmonicmean(self, u, v):
1357
+ # # """
1358
+ # # Harmonic mean distance.
1359
+ # # Notes:
1360
+ # # Added by SC.
1361
+ # # """
1362
+ # # u, v = np.asarray(u), np.asarray(v)
1363
+ # # return 1 - 2.0 * np.sum(u * v / (u + v))
1364
+
1365
+ # # def inner(self, u, v):
1366
+ # # """
1367
+ # # Calculate the inner product distance between two vectors.
1368
+
1369
+ # # The inner product distance is a measure of
1370
+ # # similarity between two vectors,
1371
+ # # based on their inner product.
1372
+
1373
+ # # Parameters
1374
+ # # ----------
1375
+ # # - u, v: Input vectors between which the distance is to be calculated.
1376
+
1377
+ # # Returns
1378
+ # # -------
1379
+ # # - The inner product distance between the two vectors.
1380
+
1381
+ # # Notes
1382
+ # # -----
1383
+ # # Added by SC.
1384
+ # # """
1385
+ # # u, v = np.asarray(u), np.asarray(v)
1386
+ # # return 1 - np.dot(u, v)
1387
+
1388
+ # def k_divergence(self, u, v):
1389
+ # """Calculate the K divergence between two vectors.
1390
+
1391
+ # Parameters
1392
+ # ----------
1393
+ # - u, v: Input vectors between which the divergence is to be calculated.
1394
+
1395
+ # Returns
1396
+ # -------
1397
+ # - The K divergence between the two vectors.
1398
+
1399
+ # References
1400
+ # ----------
1401
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1402
+ # Measures between Probability Density Functions. International
1403
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1404
+ # 1(4), 300-307.
1405
+ # """
1406
+ # u, v = np.asarray(u), np.asarray(v)
1407
+ # u[u == 0] = self.epsilon
1408
+ # v[v == 0] = self.epsilon
1409
+ # with np.errstate(divide="ignore", invalid="ignore"):
1410
+ # return np.sum(u * np.log(2 * u / (u + v)))
1411
+
1412
+ # def kl_divergence(self, u, v):
1413
+ # """Calculate the Kullback-Leibler divergence between two vectors.
1414
+
1415
+ # The Kullback-Leibler divergence measures the difference between two
1416
+ # probability distributions.
1417
+
1418
+ # Parameters
1419
+ # ----------
1420
+ # - u, v: Input vectors between which the divergence is to be calculated.
1421
+
1422
+ # Returns
1423
+ # -------
1424
+ # - The Kullback-Leibler divergence between the two vectors.
1425
+
1426
+ # References
1427
+ # ----------
1428
+ # 1. Kullback S, Leibler RA (1951) On information and sufficiency.
1429
+ # Ann. Math. Statist. 22:79–86
1430
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1431
+ # Measures between Probability Density Functions. International
1432
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1433
+ # 1(4):300-307.
1434
+ # """
1435
+ # u, v = np.asarray(u), np.asarray(v)
1436
+ # u[u == 0] = self.epsilon
1437
+ # v[v == 0] = self.epsilon
1438
+ # with np.errstate(divide="ignore", invalid="ignore"):
1439
+ # return np.sum(u * np.log(u / v))
1440
+
1441
+ # def max_symmetric_chisq(self, u, v):
1442
+ # """Calculate the maximum symmetric chi-square distance.
1443
+
1444
+ # Parameters
1445
+ # ----------
1446
+ # - u, v: Input vectors between which the distance is to be calculated.
1447
+
1448
+ # Returns
1449
+ # -------
1450
+ # - The maximum symmetric chi-square distance between the two vectors.
1451
+
1452
+ # References
1453
+ # ----------
1454
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1455
+ # Measures between Probability Density Functions. International
1456
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1457
+ # 1(4):300-307.
1458
+ # """
1459
+ # u, v = np.asarray(u), np.asarray(v)
1460
+ # return max(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1461
+
1462
+ # def min_symmetric_chisq(self, u, v):
1463
+ # """Calculate the minimum symmetric chi-square distance.
1464
+
1465
+ # Parameters
1466
+ # ----------
1467
+ # - u, v: Input vectors between which the distance is to be calculated.
1468
+
1469
+ # Returns
1470
+ # -------
1471
+ # - The minimum symmetric chi-square distance between the two vectors.
1472
+
1473
+ # Notes
1474
+ # -----
1475
+ # Added by SC.
1476
+ # """
1477
+ # u, v = np.asarray(u), np.asarray(v)
1478
+ # return min(self.neyman_chisq(u, v), self.pearson_chisq(u, v))
1479
+
1480
+ # def neyman_chisq(self, u, v):
1481
+ # """Calculate the Neyman chi-square distance between two vectors.
1482
+
1483
+ # Parameters
1484
+ # ----------
1485
+ # - u, v: Input vectors between which the distance is to be calculated.
1486
+
1487
+ # Returns
1488
+ # -------
1489
+ # - The Neyman chi-square distance between the two vectors.
1490
+
1491
+ # References
1492
+ # ----------
1493
+ # 1. Neyman J (1949) Contributions to the theory of the chi^2 test.
1494
+ # In Proceedings of the First Berkley Symposium on Mathematical
1495
+ # Statistics and Probability.
1496
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1497
+ # Measures between Probability Density Functions. International
1498
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1499
+ # 1(4), 300-307.
1500
+ # """
1501
+ # u, v = np.asarray(u), np.asarray(v)
1502
+ # with np.errstate(divide="ignore", invalid="ignore"):
1503
+ # return np.sum(np.where(u != 0, (u - v) ** 2 / u, 0))
1504
+
1505
+ # def pearson_chisq(self, u, v):
1506
+ # """Calculate the Pearson chi-square divergence between two vectors.
1507
+
1508
+ # Parameters
1509
+ # ----------
1510
+ # - u, v: Input vectors between which the divergence is to be calculated.
1511
+
1512
+ # Returns
1513
+ # -------
1514
+ # - The Pearson chi-square divergence between the two vectors.
1515
+
1516
+ # References
1517
+ # ----------
1518
+ # 1. Pearson K. (1900) On the Criterion that a given system of
1519
+ # deviations from the probable in the case of correlated system
1520
+ # of variables is such that it can be reasonable supposed to have
1521
+ # arisen from random sampling, Phil. Mag. 50, 157-172.
1522
+ # 2. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1523
+ # Measures between Probability Density Functions. International
1524
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1525
+ # 1(4), 300-307.
1526
+
1527
+ # Notes
1528
+ # -----
1529
+ # Pearson chi-square divergence is asymmetric.
1530
+ # """
1531
+ # u, v = np.asarray(u), np.asarray(v)
1532
+ # with np.errstate(divide="ignore", invalid="ignore"):
1533
+ # return np.sum(np.where(v != 0, (u - v) ** 2 / v, 0))
1534
+
1535
+ # def nonintersection(self, u, v):
1536
+ # """
1537
+ # Calculate the Nonintersection distance between two vectors.
1538
+
1539
+ # Parameters
1540
+ # ----------
1541
+ # - u, v: Input vectors between which the distance is to be calculated.
1542
+
1543
+ # Returns
1544
+ # -------
1545
+ # - The Nonintersection distance between the two vectors.
1546
+
1547
+ # References
1548
+ # ----------
1549
+ # 1. Sung-Hyuk C. (2007) Comprehensive Survey on Distance/Similarity
1550
+ # Measures between Probability Density Functions. International
1551
+ # Journal of Mathematical Models and Methods in Applied Sciences.
1552
+ # 1(4), 300-307.
1553
+
1554
+ # Notes
1555
+ # -----
1556
+ # When used for comparing two probability density functions (pdfs),
1557
+ # Nonintersection distance equals half of Cityblock distance.
1558
+ # """
1559
+ # u, v = np.asarray(u), np.asarray(v)
1560
+ # return 1 - np.sum(np.minimum(u, v))