mlquantify 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. mlquantify-0.1.3/MANIFEST.in +1 -0
  2. {mlquantify-0.1.1 → mlquantify-0.1.3}/PKG-INFO +1 -1
  3. mlquantify-0.1.3/VERSION.txt +1 -0
  4. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/aggregative.py +130 -1
  5. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/threshold_optimization.py +66 -154
  6. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/PKG-INFO +1 -1
  7. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/SOURCES.txt +2 -0
  8. {mlquantify-0.1.1 → mlquantify-0.1.3}/setup.py +6 -6
  9. {mlquantify-0.1.1 → mlquantify-0.1.3}/README.md +0 -0
  10. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/__init__.py +0 -0
  11. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/base.py +0 -0
  12. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/classification/__init__.py +0 -0
  13. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/classification/methods.py +0 -0
  14. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/__init__.py +0 -0
  15. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/measures.py +0 -0
  16. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/evaluation/protocol.py +0 -0
  17. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/__init__.py +0 -0
  18. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/meta.py +0 -0
  19. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/mixture_models.py +0 -0
  20. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/methods/non_aggregative.py +0 -0
  21. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/model_selection.py +0 -0
  22. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/plots.py +0 -0
  23. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/__init__.py +0 -0
  24. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/general.py +0 -0
  25. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify/utils/method.py +0 -0
  26. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/dependency_links.txt +0 -0
  27. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/requires.txt +0 -0
  28. {mlquantify-0.1.1 → mlquantify-0.1.3}/mlquantify.egg-info/top_level.txt +0 -0
  29. {mlquantify-0.1.1 → mlquantify-0.1.3}/setup.cfg +0 -0
@@ -0,0 +1 @@
1
+ include VERSION.txt
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -0,0 +1 @@
1
+ 0.1.3
@@ -907,10 +907,140 @@ class PCC(AggregativeQuantifier):
907
907
 
908
908
 
909
909
 
910
+ class PACC(AggregativeQuantifier):
911
+ """
912
+ Probabilistic Adjusted Classify and Count (PACC).
913
+ This method extends the Adjusted Classify and Count (AC) approach
914
+ by leveraging the average class-conditional confidences obtained
915
+ from a probabilistic classifier instead of relying solely on true
916
+ positive and false positive rates.
917
+
918
+ Parameters
919
+ ----------
920
+ learner : BaseEstimator
921
+ A scikit-learn compatible classifier to be used for quantification.
922
+ threshold : float, optional
923
+ The decision threshold for classification. Default is 0.5.
924
+
925
+ Attributes
926
+ ----------
927
+ learner : BaseEstimator
928
+ A scikit-learn compatible classifier.
929
+ threshold : float
930
+ Decision threshold for classification. Default is 0.5.
931
+ tpr : float
932
+ True positive rate computed during the fitting process.
933
+ fpr : float
934
+ False positive rate computed during the fitting process.
935
+
936
+ See Also
937
+ --------
938
+ ThresholdOptimization : Base class for threshold-based quantification methods.
939
+ ACC : Adjusted Classify and Count quantification method.
940
+ CC : Classify and Count quantification method.
941
+
942
+ References
943
+ ----------
944
+ A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
910
945
 
946
+ Examples
947
+ --------
948
+ >>> from mlquantify.methods.aggregative import PACC
949
+ >>> from mlquantify.utils.general import get_real_prev
950
+ >>> from sklearn.datasets import load_breast_cancer
951
+ >>> from sklearn.svm import SVC
952
+ >>> from sklearn.model_selection import train_test_split
953
+ >>>
954
+ >>> features, target = load_breast_cancer(return_X_y=True)
955
+ >>>
956
+ >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
957
+ >>>
958
+ >>> pacc = PACC(learner=SVC(probability=True))
959
+ >>> pacc.fit(X_train, y_train)
960
+ >>> y_pred = pacc.predict(X_test)
961
+ >>> y_pred
962
+ {0: 0.4664886119311328, 1: 0.5335113880688672}
963
+ >>> get_real_prev(y_test)
964
+ {0: 0.3991228070175439, 1: 0.6008771929824561}
965
+ """
911
966
 
967
+ def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
968
+ self.learner = learner
969
+ self.threshold = threshold
970
+ self.mean_pos = None
971
+ self.mean_neg = None
972
+
973
+ @property
974
+ def is_probabilistic(self) -> bool:
975
+ return True
976
+
977
+ @property
978
+ def is_multiclass(self) -> bool:
979
+ return False
912
980
 
981
+ def _fit_method(self, X, y):
982
+ # Get predicted labels and probabilities
983
+ if mq.arguments["y_labels"] is not None and mq.arguments["posteriors_train"] is not None:
984
+ y_labels = mq.arguments["y_labels"]
985
+ probabilities = mq.arguments["posteriors_train"]
986
+ else:
987
+ y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
988
+
989
+ # Adjust thresholds and compute true and false positive rates
990
+
991
+ self.mean_pos = np.mean(probabilities[y_labels == self.classes[1], 1])
992
+ self.mean_neg = np.mean(probabilities[y_labels != self.classes[1], 1])
993
+
994
+ return self
995
+
996
+
997
+ def _predict_method(self, X):
998
+ """
999
+ Predicts the class prevalence using the mean class-conditional
1000
+ probabilities from a probabilistic classifier.
913
1001
 
1002
+ Parameters
1003
+ ----------
1004
+ X : array-like or sparse matrix of shape (n_samples, n_features)
1005
+ The input data for prediction.
1006
+
1007
+ Returns
1008
+ -------
1009
+ dict
1010
+ A dictionary with class labels as keys and their respective
1011
+ prevalence estimates as values.
1012
+
1013
+ Notes
1014
+ -----
1015
+ The prevalence is adjusted using the formula:
1016
+ prevalence = |mean_score - FPR| / (TPR - FPR),
1017
+ where mean_score is the average probability for the positive class.
1018
+
1019
+ Raises
1020
+ ------
1021
+ ZeroDivisionError
1022
+ If `TPR - FPR` equals zero, indicating that the classifier's
1023
+ performance does not vary across the threshold range.
1024
+ """
1025
+ prevalences = {}
1026
+
1027
+ # Calculate probabilities for the positive class
1028
+ probabilities = self.predict_learner(X)[:, 1]
1029
+
1030
+ # Compute the mean score for the positive class
1031
+ mean_scores = np.mean(probabilities)
1032
+
1033
+ # Adjust prevalence based on TPR and FPR
1034
+ if self.mean_pos - self.mean_neg == 0:
1035
+ prevalence = mean_scores
1036
+ else:
1037
+ prevalence = np.clip(abs(mean_scores - self.mean_neg) / (self.mean_pos - self.mean_neg), 0, 1)
1038
+
1039
+ # Map the computed prevalence to the class labels
1040
+ prevalences[self.classes[0]] = 1 - prevalence
1041
+ prevalences[self.classes[1]] = prevalence
1042
+
1043
+ return prevalences
914
1044
 
915
1045
 
916
1046
  class PWK(AggregativeQuantifier):
@@ -1012,7 +1142,6 @@ class PWK(AggregativeQuantifier):
1012
1142
  from . import threshold_optimization
1013
1143
 
1014
1144
  ACC = threshold_optimization.ACC
1015
- PACC = threshold_optimization.PACC
1016
1145
  T50 = threshold_optimization.T50
1017
1146
  MAX = threshold_optimization.MAX
1018
1147
  X_method = threshold_optimization.X_method
@@ -447,9 +447,8 @@ class MS(ThresholdOptimization):
447
447
  {0: 0.3991228070175439, 1: 0.6008771929824561}
448
448
  """
449
449
 
450
- def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
450
+ def __init__(self, learner: BaseEstimator=None):
451
451
  super().__init__(learner)
452
- self.threshold = threshold
453
452
 
454
453
  def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
455
454
  """
@@ -482,11 +481,42 @@ class MS(ThresholdOptimization):
482
481
  ValueError
483
482
  If `thresholds`, `tprs`, or `fprs` are empty or have mismatched lengths.
484
483
  """
485
- # Compute median TPR and FPR
486
- tpr = np.median(tprs)
487
- fpr = np.median(fprs)
488
484
 
489
- return (self.threshold, tpr, fpr)
485
+ return (thresholds, tprs, fprs)
486
+
487
+ def _predict_method(self, X) -> dict:
488
+ """
489
+ Predicts class prevalences using the adjusted threshold.
490
+
491
+ Parameters
492
+ ----------
493
+ X : pd.DataFrame or np.ndarray
494
+ The input features for prediction.
495
+
496
+ Returns
497
+ -------
498
+ np.ndarray
499
+ An array of predicted prevalences for the classes.
500
+ """
501
+ # Get predicted probabilities for the positive class
502
+ probabilities = self.predict_learner(X)[:, 1]
503
+
504
+ prevs = []
505
+
506
+ for thr, tpr, fpr in zip(self.threshold, self.tpr, self.fpr):
507
+ cc_output = len(probabilities[probabilities >= thr]) / len(probabilities)
508
+
509
+ if tpr - fpr == 0:
510
+ prevalence = cc_output
511
+ else:
512
+ prev = np.clip((cc_output - fpr) / (tpr - fpr), 0, 1)
513
+ prevs.append(prev)
514
+
515
+ prevalence = np.median(prevs)
516
+
517
+ prevalences = [1 - prevalence, prevalence]
518
+
519
+ return np.asarray(prevalences)
490
520
 
491
521
 
492
522
 
@@ -586,166 +616,48 @@ class MS2(ThresholdOptimization):
586
616
  # Identify indices where the condition is satisfied
587
617
  indices = np.where(np.abs(tprs - fprs) > 0.25)[0]
588
618
  if len(indices) == 0:
589
- raise ValueError("No cases meet the condition |TPR - FPR| > 0.25.")
590
-
591
- # Compute medians for the selected cases
592
- threshold = np.median(thresholds[indices])
593
- tpr = np.median(tprs[indices])
594
- fpr = np.median(fprs[indices])
595
-
596
- return (threshold, tpr, fpr)
597
-
598
-
599
- class PACC(ThresholdOptimization):
600
- """
601
- Probabilistic Adjusted Classify and Count (PACC).
602
- This method extends the Adjusted Classify and Count (AC) approach
603
- by leveraging the average class-conditional confidences obtained
604
- from a probabilistic classifier instead of relying solely on true
605
- positive and false positive rates.
606
-
607
- Parameters
608
- ----------
609
- learner : BaseEstimator
610
- A scikit-learn compatible classifier to be used for quantification.
611
- threshold : float, optional
612
- The decision threshold for classification. Default is 0.5.
613
-
614
- Attributes
615
- ----------
616
- learner : BaseEstimator
617
- A scikit-learn compatible classifier.
618
- threshold : float
619
- Decision threshold for classification. Default is 0.5.
620
- tpr : float
621
- True positive rate computed during the fitting process.
622
- fpr : float
623
- False positive rate computed during the fitting process.
624
-
625
- See Also
626
- --------
627
- ThresholdOptimization : Base class for threshold-based quantification methods.
628
- ACC : Adjusted Classify and Count quantification method.
629
- CC : Classify and Count quantification method.
630
-
631
- References
632
- ----------
633
- A. Bella, C. Ferri, J. Hernández-Orallo and M. J. Ramírez-Quintana, "Quantification via Probability Estimators," 2010 IEEE International Conference on Data Mining, Sydney, NSW, Australia, 2010, pp. 737-742, doi: 10.1109/ICDM.2010.75. Available at: https://ieeexplore.ieee.org/abstract/document/5694031
634
-
635
- Examples
636
- --------
637
- >>> from mlquantify.methods.aggregative import PACC
638
- >>> from mlquantify.utils.general import get_real_prev
639
- >>> from sklearn.datasets import load_breast_cancer
640
- >>> from sklearn.svm import SVC
641
- >>> from sklearn.model_selection import train_test_split
642
- >>>
643
- >>> features, target = load_breast_cancer(return_X_y=True)
644
- >>>
645
- >>> X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
646
- >>>
647
- >>> pacc = PACC(learner=SVC(probability=True))
648
- >>> pacc.fit(X_train, y_train)
649
- >>> y_pred = pacc.predict(X_test)
650
- >>> y_pred
651
- {0: 0.4664886119311328, 1: 0.5335113880688672}
652
- >>> get_real_prev(y_test)
653
- {0: 0.3991228070175439, 1: 0.6008771929824561}
654
- """
619
+ warnings.warn("No cases satisfy the condition |TPR - FPR| > 0.25.")
620
+ indices = np.where(np.abs(tprs - fprs) >= 0)[0]
621
+
622
+ thresholds_ = thresholds[indices]
623
+ tprs_ = tprs[indices]
624
+ fprs_ = fprs[indices]
655
625
 
656
- def __init__(self, learner: BaseEstimator=None, threshold: float = 0.5):
657
- super().__init__(learner)
658
- self.threshold = threshold
626
+ return (thresholds_, tprs_, fprs_)
659
627
 
660
- def _predict_method(self, X):
628
+ def _predict_method(self, X) -> dict:
661
629
  """
662
- Predicts the class prevalence using the mean class-conditional
663
- probabilities from a probabilistic classifier.
630
+ Predicts class prevalences using the adjusted threshold.
664
631
 
665
632
  Parameters
666
633
  ----------
667
- X : array-like or sparse matrix of shape (n_samples, n_features)
668
- The input data for prediction.
634
+ X : pd.DataFrame or np.ndarray
635
+ The input features for prediction.
669
636
 
670
637
  Returns
671
638
  -------
672
- dict
673
- A dictionary with class labels as keys and their respective
674
- prevalence estimates as values.
675
-
676
- Notes
677
- -----
678
- The prevalence is adjusted using the formula:
679
- prevalence = |mean_score - FPR| / (TPR - FPR),
680
- where mean_score is the average probability for the positive class.
681
-
682
- Raises
683
- ------
684
- ZeroDivisionError
685
- If `TPR - FPR` equals zero, indicating that the classifier's
686
- performance does not vary across the threshold range.
639
+ np.ndarray
640
+ An array of predicted prevalences for the classes.
687
641
  """
688
- prevalences = {}
689
-
690
- # Calculate probabilities for the positive class
642
+ # Get predicted probabilities for the positive class
691
643
  probabilities = self.predict_learner(X)[:, 1]
644
+
645
+ prevs = []
646
+
647
+ for thr, tpr, fpr in zip(self.threshold, self.tpr, self.fpr):
648
+ cc_output = len(probabilities[probabilities >= thr]) / len(probabilities)
649
+
650
+ if tpr - fpr == 0:
651
+ prevalence = cc_output
652
+ else:
653
+ prev = np.clip((cc_output - fpr) / (tpr - fpr), 0, 1)
654
+ prevs.append(prev)
655
+
656
+ prevalence = np.median(prevs)
657
+
658
+ prevalences = [1 - prevalence, prevalence]
692
659
 
693
- # Compute the mean score for the positive class
694
- mean_scores = np.mean(probabilities)
695
-
696
- # Adjust prevalence based on TPR and FPR
697
- if self.tpr - self.fpr == 0:
698
- prevalence = mean_scores
699
- else:
700
- prevalence = np.clip(abs(mean_scores - self.fpr) / (self.tpr - self.fpr), 0, 1)
701
-
702
- # Map the computed prevalence to the class labels
703
- prevalences[self.classes[0]] = 1 - prevalence
704
- prevalences[self.classes[1]] = prevalence
705
-
706
- return prevalences
707
-
708
- def best_tprfpr(self, thresholds: np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
709
- """
710
- Finds the true positive rate (TPR) and false positive rate (FPR)
711
- corresponding to the specified decision threshold.
712
-
713
- Parameters
714
- ----------
715
- thresholds : np.ndarray
716
- An array of threshold values.
717
- tprs : np.ndarray
718
- An array of true positive rates corresponding to the thresholds.
719
- fprs : np.ndarray
720
- An array of false positive rates corresponding to the thresholds.
721
-
722
- Returns
723
- -------
724
- tuple
725
- A tuple containing the specified threshold, TPR, and FPR.
726
-
727
- Raises
728
- ------
729
- IndexError
730
- If the specified threshold is not found in the `thresholds` array.
731
- """
732
- # Locate TPR and FPR for the specified threshold
733
- tpr = tprs[thresholds == self.threshold][0]
734
- fpr = fprs[thresholds == self.threshold][0]
735
- return (self.threshold, tpr, fpr)
736
-
737
-
738
-
739
-
740
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
741
- tpr = tprs[thresholds == self.threshold][0]
742
- fpr = fprs[thresholds == self.threshold][0]
743
- return (self.threshold, tpr, fpr)
744
-
745
-
746
-
747
-
748
-
660
+ return np.asarray(prevalences)
749
661
 
750
662
 
751
663
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mlquantify
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -1,4 +1,6 @@
1
+ MANIFEST.in
1
2
  README.md
3
+ VERSION.txt
2
4
  setup.py
3
5
  mlquantify/__init__.py
4
6
  mlquantify/base.py
@@ -1,20 +1,20 @@
1
1
  from setuptools import setup, find_packages
2
-
3
2
  import pathlib
4
3
 
5
4
  here = pathlib.Path(__file__).parent.resolve()
6
-
7
5
  long_description = (here / 'README.md').read_text(encoding='utf-8')
8
6
 
9
- VERSION = '0.1.1'
7
+ # a versão do arquivo VERSION.txt
8
+ version_file = here / 'VERSION.txt'
9
+ VERSION = version_file.read_text(encoding='utf-8').strip()
10
+
10
11
  DESCRIPTION = 'Quantification Library'
11
12
 
12
- # Setting up
13
13
  setup(
14
14
  name="mlquantify",
15
15
  version=VERSION,
16
16
  url="https://github.com/luizfernandolj/QuantifyML/tree/master",
17
- maintainer="Luiz Fernando Luth Junior",
17
+ maintainer="Luiz Fernando Luth Junior",
18
18
  description=DESCRIPTION,
19
19
  long_description=long_description,
20
20
  long_description_content_type="text/markdown",
@@ -30,4 +30,4 @@ setup(
30
30
  "Operating System :: MacOS :: MacOS X",
31
31
  "Operating System :: Microsoft :: Windows",
32
32
  ]
33
- )
33
+ )
File without changes
File without changes