mlquantify 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -907,10 +907,140 @@ class PCC(AggregativeQuantifier):
907
907
 
908
908
 
909
909
 
910
+ class PACC(AggregativeQuantifier):
911
+ """
912
+ Probabilistic Adjusted Classify and Count (PACC).
913
+ This method extends the Adjusted Classify and Count (AC) approach
914
+ by leveraging the average class-conditional confidences obtained
915
+ from a probabilistic classifier instead of relying solely on true
916
+ positive and false positive rates.
917
+
918
+ Parameters
919
+ ----------
920
+ learner : BaseEstimator
921
+ A scikit-learn compatible classifier to be used for quantification.
922
+ threshold : float, optional
923
+ The decision threshold for classification. Default is 0.5.
924
+
925
+ Attributes
926
+ ----------
927
+ learner : BaseEstimator
928
+ A scikit-learn compatible classifier.
929
+ threshold : float
930
+ Decision threshold for classification. Default is 0.5.
931
+ tpr : float
932
+ True positive rate computed during the fitting process.
933
+ fpr : float
934
+ False positive rate computed during the fitting process.
935
+
936
+ See Also
937
+ --------
938
+ ThresholdOptimization : Base class for threshold-based quantification methods.
939
+ ACC : Adjusted Classify and Count quantification method.
940
+ CC : Classify and Count quantification method.
941
+
942
+ References
943
+ ----------
944
+ A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
910
945
 
946
+ Examples
947
+ --------
948
+ >>> from mlquantify.methods.aggregative import PACC
949
+ >>> from mlquantify.utils.general import get_real_prev
950
+ >>> from sklearn.datasets import load_breast_cancer
951
+ >>> from sklearn.svm import SVC
952
+ >>> from sklearn.model_selection import train_test_split
953
+ >>>
954
+ >>> features, target = load_breast_cancer(return_X_y=True)
955
+ >>>
956
+ >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
957
+ >>>
958
+ >>> pacc = PACC(learner=SVC(probability=True))
959
+ >>> pacc.fit(X_train, y_train)
960
+ >>> y_pred = pacc.predict(X_test)
961
+ >>> y_pred
962
+ {0: 0.4664886119311328, 1: 0.5335113880688672}
963
+ >>> get_real_prev(y_test)
964
+ {0: 0.3991228070175439, 1: 0.6008771929824561}
965
+ """
911
966
 
967
+ def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
968
+ self.learner = learner
969
+ self.threshold = threshold
970
+ self.mean_pos = None
971
+ self.mean_neg = None
972
+
973
+ @property
974
+ def is_probabilistic(self) -> bool:
975
+ return True
976
+
977
+ @property
978
+ def is_multiclass(self) -> bool:
979
+ return False
912
980
 
981
+ def _fit_method(self, X, y):
982
+ # Get predicted labels and probabilities
983
+ if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
984
+ y_labels = mq.arguments["y_labels"]
985
+ probabilities = mq.arguments["posteriors_train"]
986
+ else:
987
+ y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
988
+
989
+ # Adjust thresholds and compute true and false positive rates
990
+
991
+ self.mean_pos = np.mean(probabilities[y_labels == self.classes[1], 1])
992
+ self.mean_neg = np.mean(probabilities[y_labels != self.classes[1], 1])
993
+
994
+ return self
995
+
996
+
997
+ def _predict_method(self, X):
998
+ """
999
+ Predicts the class prevalence using the mean class-conditional
1000
+ probabilities from a probabilistic classifier.
913
1001
 
1002
+ Parameters
1003
+ ----------
1004
+ X : array-like or sparse matrix of shape (n_samples, n_features)
1005
+ The input data for prediction.
1006
+
1007
+ Returns
1008
+ -------
1009
+ dict
1010
+ A dictionary with class labels as keys and their respective
1011
+ prevalence estimates as values.
1012
+
1013
+ Notes
1014
+ -----
1015
+ The prevalence is adjusted using the formula:
1016
+ prevalence = |mean_score - FPR| / (TPR - FPR),
1017
+ where mean_score is the average probability for the positive class.
1018
+
1019
+ Raises
1020
+ ------
1021
+ ZeroDivisionError
1022
+ If `TPR - FPR` equals zero, indicating that the classifier's
1023
+ performance does not vary across the threshold range.
1024
+ """
1025
+ prevalences = {}
1026
+
1027
+ # Calculate probabilities for the positive class
1028
+ probabilities = self.predict_learner(X)[:, 1]
1029
+
1030
+ # Compute the mean score for the positive class
1031
+ mean_scores = np.mean(probabilities)
1032
+
1033
+ # Adjust prevalence based on TPR and FPR
1034
+ if self.mean_pos - self.mean_neg == 0:
1035
+ prevalence = mean_scores
1036
+ else:
1037
+ prevalence = np.clip(abs(mean_scores - self.mean_neg) / (self.mean_pos - self.mean_neg), 0, 1)
1038
+
1039
+ # Map the computed prevalence to the class labels
1040
+ prevalences[self.classes[0]] = 1 - prevalence
1041
+ prevalences[self.classes[1]] = prevalence
1042
+
1043
+ return prevalences
914
1044
 
915
1045
 
916
1046
  class PWK(AggregativeQuantifier):
@@ -1012,7 +1142,6 @@ class PWK(AggregativeQuantifier):
1012
1142
  from . import threshold_optimization
1013
1143
 
1014
1144
  ACC = threshold_optimization.ACC
1015
- PACC = threshold_optimization.PACC
1016
1145
  T50 = threshold_optimization.T50
1017
1146
  MAX = threshold_optimization.MAX
1018
1147
  X_method = threshold_optimization.X_method
@@ -659,157 +659,6 @@ class MS2(ThresholdOptimization):
659
659
 
660
660
  return np.asarray(prevalences)
661
661
 
662
- class PACC(ThresholdOptimization):
663
- """
664
- Probabilistic Adjusted Classify and Count (PACC).
665
- This method extends the Adjusted Classify and Count (AC) approach
666
- by leveraging the average class-conditional confidences obtained
667
- from a probabilistic classifier instead of relying solely on true
668
- positive and false positive rates.
669
-
670
- Parameters
671
- ----------
672
- learner : BaseEstimator
673
- A scikit-learn compatible classifier to be used for quantification.
674
- threshold : float, optional
675
- The decision threshold for classification. Default is 0.5.
676
-
677
- Attributes
678
- ----------
679
- learner : BaseEstimator
680
- A scikit-learn compatible classifier.
681
- threshold : float
682
- Decision threshold for classification. Default is 0.5.
683
- tpr : float
684
- True positive rate computed during the fitting process.
685
- fpr : float
686
- False positive rate computed during the fitting process.
687
-
688
- See Also
689
- --------
690
- ThresholdOptimization : Base class for threshold-based quantification methods.
691
- ACC : Adjusted Classify and Count quantification method.
692
- CC : Classify and Count quantification method.
693
-
694
- References
695
- ----------
696
- A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
697
-
698
- Examples
699
- --------
700
- >>> from mlquantify.methods.aggregative import PACC
701
- >>> from mlquantify.utils.general import get_real_prev
702
- >>> from sklearn.datasets import load_breast_cancer
703
- >>> from sklearn.svm import SVC
704
- >>> from sklearn.model_selection import train_test_split
705
- >>>
706
- >>> features, target = load_breast_cancer(return_X_y=True)
707
- >>>
708
- >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
709
- >>>
710
- >>> pacc = PACC(learner=SVC(probability=True))
711
- >>> pacc.fit(X_train, y_train)
712
- >>> y_pred = pacc.predict(X_test)
713
- >>> y_pred
714
- {0: 0.4664886119311328, 1: 0.5335113880688672}
715
- >>> get_real_prev(y_test)
716
- {0: 0.3991228070175439, 1: 0.6008771929824561}
717
- """
718
-
719
- def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
720
- super().__init__(learner)
721
- self.threshold = threshold
722
-
723
- def _predict_method(self, X):
724
- """
725
- Predicts the class prevalence using the mean class-conditional
726
- probabilities from a probabilistic classifier.
727
-
728
- Parameters
729
- ----------
730
- X : array-like or sparse matrix of shape (n_samples, n_features)
731
- The input data for prediction.
732
-
733
- Returns
734
- -------
735
- dict
736
- A dictionary with class labels as keys and their respective
737
- prevalence estimates as values.
738
-
739
- Notes
740
- -----
741
- The prevalence is adjusted using the formula:
742
- prevalence = |mean_score - FPR| / (TPR - FPR),
743
- where mean_score is the average probability for the positive class.
744
-
745
- Raises
746
- ------
747
- ZeroDivisionError
748
- If `TPR - FPR` equals zero, indicating that the classifier's
749
- performance does not vary across the threshold range.
750
- """
751
- prevalences = {}
752
-
753
- # Calculate probabilities for the positive class
754
- probabilities = self.predict_learner(X)[:, 1]
755
-
756
- # Compute the mean score for the positive class
757
- mean_scores = np.mean(probabilities)
758
-
759
- # Adjust prevalence based on TPR and FPR
760
- if self.tpr - self.fpr == 0:
761
- prevalence = mean_scores
762
- else:
763
- prevalence = np.clip(abs(mean_scores - self.fpr) / (self.tpr - self.fpr), 0, 1)
764
-
765
- # Map the computed prevalence to the class labels
766
- prevalences[self.classes[0]] = 1 - prevalence
767
- prevalences[self.classes[1]] = prevalence
768
-
769
- return prevalences
770
-
771
- def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
772
- """
773
- Finds the true positive rate (TPR) and false positive rate (FPR)
774
- corresponding to the specified decision threshold.
775
-
776
- Parameters
777
- ----------
778
- thresholds : np.ndarray
779
- An array of threshold values.
780
- tprs : np.ndarray
781
- An array of true positive rates corresponding to the thresholds.
782
- fprs : np.ndarray
783
- An array of false positive rates corresponding to the thresholds.
784
-
785
- Returns
786
- -------
787
- tuple
788
- A tuple containing the specified threshold, TPR, and FPR.
789
-
790
- Raises
791
- ------
792
- IndexError
793
- If the specified threshold is not found in the `thresholds` array.
794
- """
795
- # Locate TPR and FPR for the specified threshold
796
- tpr = tprs[thresholds == self.threshold][0]
797
- fpr = fprs[thresholds == self.threshold][0]
798
- return (self.threshold, tpr, fpr)
799
-
800
-
801
-
802
-
803
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
804
- tpr = tprs[thresholds == self.threshold][0]
805
- fpr = fprs[thresholds == self.threshold][0]
806
- return (self.threshold, tpr, fpr)
807
-
808
-
809
-
810
-
811
-
812
-
813
662
 
814
663
 
815
664
  class T50(ThresholdOptimization):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -8,15 +8,15 @@ mlquantify/evaluation/__init__.py,sha256=x1grng0n_QeZpVBU8-pwagYdBMkbMRILtrp1qk_
8
8
  mlquantify/evaluation/measures.py,sha256=fIKyxxlD8em3oaj4u_BeXmNyUQG_A0vXWY8APPgNoJ0,6579
9
9
  mlquantify/evaluation/protocol.py,sha256=OsOXm_vf7sYlw9pQv08WxAvvgzo10bAqiDM-1cpz7nQ,24020
10
10
  mlquantify/methods/__init__.py,sha256=ya3Mn7bcz2r3oaIT7yVR4iJkAfgEAwF4xDK54C0rZ7U,536
11
- mlquantify/methods/aggregative.py,sha256=rL_xlX2nYECrxFSjBJNlxj6h3b-iIs7l_XgxIRSYHpw,34164
11
+ mlquantify/methods/aggregative.py,sha256=F5Z-tGA9OcZgMBLKOeaos6wIgvvnDeriZ4y0TyMpDrc,39051
12
12
  mlquantify/methods/meta.py,sha256=sZWQHUGkm6iiqujmIpHDL_8tDdKQ161bzD5mcpXLWEY,19066
13
13
  mlquantify/methods/mixture_models.py,sha256=si2Pzaka5Kbva4QKBzLolvb_8V0ZEjp68UBAiOwl49s,35166
14
14
  mlquantify/methods/non_aggregative.py,sha256=xaBu21TUtiYkOEUKO16NaNMwdNa6-SNjfBsc5PpIMyI,4815
15
- mlquantify/methods/threshold_optimization.py,sha256=-iOcP5YcXZd0XZHGvbmcoE72hXR6D9YCoTnr1l80-9k,35796
15
+ mlquantify/methods/threshold_optimization.py,sha256=NYGKbYvtfmiBeU8wpTiFCdURkijcPRZtybPOt6vtXbY,30489
16
16
  mlquantify/utils/__init__.py,sha256=logWrL6B6mukP8tvYm_UPEdO9eNA-J-ySILr7-syDoc,44
17
17
  mlquantify/utils/general.py,sha256=Li5ix_dy19dUhYNgiUsNHdqqnSVYvznUBUuyr-zYSPI,7554
18
18
  mlquantify/utils/method.py,sha256=RL4vBJGl5_6DZ59Bs62hdNXI_hnoDIWilMMyMPiOjBg,12631
19
- mlquantify-0.1.2.dist-info/METADATA,sha256=2j3pqrm5djMAPm7bKTIjBjtg71OzAbFpwC-_ofOoSlc,4940
20
- mlquantify-0.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
21
- mlquantify-0.1.2.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
22
- mlquantify-0.1.2.dist-info/RECORD,,
19
+ mlquantify-0.1.3.dist-info/METADATA,sha256=FkF8Qt_lHsa0Lf0sXAQ36Ri5bs5aMkAoNVzubTPty1A,4940
20
+ mlquantify-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ mlquantify-0.1.3.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
22
+ mlquantify-0.1.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5