mlquantify 0.0.11.2__py3-none-any.whl → 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. mlquantify/__init__.py +32 -6
  2. mlquantify/base.py +559 -257
  3. mlquantify/classification/__init__.py +1 -1
  4. mlquantify/classification/methods.py +160 -0
  5. mlquantify/evaluation/__init__.py +14 -2
  6. mlquantify/evaluation/measures.py +215 -0
  7. mlquantify/evaluation/protocol.py +647 -0
  8. mlquantify/methods/__init__.py +37 -40
  9. mlquantify/methods/aggregative.py +1030 -0
  10. mlquantify/methods/meta.py +472 -0
  11. mlquantify/methods/mixture_models.py +1003 -0
  12. mlquantify/methods/non_aggregative.py +136 -0
  13. mlquantify/methods/threshold_optimization.py +959 -0
  14. mlquantify/model_selection.py +377 -232
  15. mlquantify/plots.py +367 -0
  16. mlquantify/utils/__init__.py +2 -2
  17. mlquantify/utils/general.py +334 -0
  18. mlquantify/utils/method.py +449 -0
  19. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/METADATA +137 -122
  20. mlquantify-0.1.0.dist-info/RECORD +22 -0
  21. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/WHEEL +1 -1
  22. mlquantify/classification/pwkclf.py +0 -73
  23. mlquantify/evaluation/measures/__init__.py +0 -26
  24. mlquantify/evaluation/measures/ae.py +0 -11
  25. mlquantify/evaluation/measures/bias.py +0 -16
  26. mlquantify/evaluation/measures/kld.py +0 -8
  27. mlquantify/evaluation/measures/mse.py +0 -12
  28. mlquantify/evaluation/measures/nae.py +0 -16
  29. mlquantify/evaluation/measures/nkld.py +0 -13
  30. mlquantify/evaluation/measures/nrae.py +0 -16
  31. mlquantify/evaluation/measures/rae.py +0 -12
  32. mlquantify/evaluation/measures/se.py +0 -12
  33. mlquantify/evaluation/protocol/_Protocol.py +0 -202
  34. mlquantify/evaluation/protocol/__init__.py +0 -2
  35. mlquantify/evaluation/protocol/app.py +0 -146
  36. mlquantify/evaluation/protocol/npp.py +0 -34
  37. mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
  38. mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
  39. mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
  40. mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
  41. mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
  42. mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
  43. mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
  44. mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
  45. mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
  46. mlquantify/methods/aggregative/__init__.py +0 -9
  47. mlquantify/methods/aggregative/cc.py +0 -32
  48. mlquantify/methods/aggregative/emq.py +0 -86
  49. mlquantify/methods/aggregative/fm.py +0 -72
  50. mlquantify/methods/aggregative/gac.py +0 -96
  51. mlquantify/methods/aggregative/gpac.py +0 -87
  52. mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
  53. mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
  54. mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
  55. mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
  56. mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
  57. mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
  58. mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
  59. mlquantify/methods/aggregative/pcc.py +0 -33
  60. mlquantify/methods/aggregative/pwk.py +0 -38
  61. mlquantify/methods/meta/__init__.py +0 -1
  62. mlquantify/methods/meta/ensemble.py +0 -236
  63. mlquantify/methods/non_aggregative/__init__.py +0 -1
  64. mlquantify/methods/non_aggregative/hdx.py +0 -71
  65. mlquantify/plots/__init__.py +0 -2
  66. mlquantify/plots/distribution_plot.py +0 -109
  67. mlquantify/plots/protocol_plot.py +0 -193
  68. mlquantify/utils/general_purposes/__init__.py +0 -8
  69. mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
  70. mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
  71. mlquantify/utils/general_purposes/get_real_prev.py +0 -9
  72. mlquantify/utils/general_purposes/load_quantifier.py +0 -4
  73. mlquantify/utils/general_purposes/make_prevs.py +0 -23
  74. mlquantify/utils/general_purposes/normalize.py +0 -20
  75. mlquantify/utils/general_purposes/parallel.py +0 -10
  76. mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
  77. mlquantify/utils/method_purposes/__init__.py +0 -6
  78. mlquantify/utils/method_purposes/distances.py +0 -21
  79. mlquantify/utils/method_purposes/getHist.py +0 -13
  80. mlquantify/utils/method_purposes/get_scores.py +0 -33
  81. mlquantify/utils/method_purposes/moss.py +0 -16
  82. mlquantify/utils/method_purposes/ternary_search.py +0 -14
  83. mlquantify/utils/method_purposes/tprfpr.py +0 -42
  84. mlquantify-0.0.11.2.dist-info/RECORD +0 -73
  85. {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.0.dist-info}/top_level.txt +0 -0
@@ -1,202 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- import numpy as np
3
- import pandas as pd
4
- from typing import Union, List
5
- from sklearn.base import BaseEstimator
6
- from time import time
7
- from tqdm import tqdm
8
-
9
- from ...methods import get_method, METHODS, AGGREGATIVE, NON_AGGREGATIVE
10
- from ...utils import *
11
- from ..measures import get_measure, MEASURES
12
- from ...base import Quantifier, AggregativeQuantifier
13
-
14
- class Protocol(ABC):
15
- """Base class for implementing different quantification protocols.
16
-
17
- This abstract class provides a structure for creating protocols that involve
18
- fitting quantification models to training data and generating predictions on test data.
19
- It supports parallel processing, multiple iterations, and different output formats.
20
-
21
- Args:
22
- models (Union[List[Union[str, Quantifier]], str, Quantifier]):
23
- List of quantification models, a single model name, or 'all' for all models.
24
- batch_size (Union[List[int], int]):
25
- Size of the batches to be processed, or a list of sizes.
26
- learner (BaseEstimator, optional):
27
- Machine learning model to be used with the quantifiers. Required for model methods.
28
- n_iterations (int, optional):
29
- Number of iterations for the protocol. Default is 1.
30
- n_jobs (int, optional):
31
- Number of jobs to run in parallel. Default is 1.
32
- random_state (int, optional):
33
- Seed for random number generation. Default is 32.
34
- verbose (bool, optional):
35
- Whether to print progress messages. Default is False.
36
- return_type (str, optional):
37
- Type of return value ('predictions' or 'table'). Default is 'predictions'.
38
- measures (List[str], optional):
39
- List of error measures to calculate. Must be in MEASURES or None. Default is None.
40
- """
41
-
42
-
43
- def __init__(self,
44
- models: Union[List[Union[str, Quantifier]], str, Quantifier],
45
- batch_size: Union[List[int], int],
46
- learner: BaseEstimator = None,
47
- n_iterations: int = 1,
48
- n_jobs: int = 1,
49
- random_state: int = 32,
50
- verbose: bool = False,
51
- return_type: str = "predictions",
52
- measures: List[str] = None):
53
-
54
- assert not measures or all(m in MEASURES for m in measures), \
55
- f"Invalid measure(s) provided. Valid options: {list(MEASURES.keys())} or None"
56
- assert return_type in ["predictions", "table"], \
57
- "Invalid return_type. Valid options: ['predictions', 'table']"
58
-
59
- self.models = self._initialize_models(models, learner)
60
- self.learner = learner
61
- self.batch_size = batch_size
62
- self.n_iterations = n_iterations
63
- self.n_jobs = n_jobs
64
- self.random_state = random_state
65
- self.verbose = verbose
66
- self.return_type = return_type
67
- self.measures = measures
68
-
69
- def _initialize_models(self, models, learner):
70
- if isinstance(models, list):
71
- if isinstance(models[0], Quantifier):
72
- return models
73
- assert learner is not None, "Learner is required for model methods."
74
- return [get_method(model)(learner) for model in models]
75
- if isinstance(models, Quantifier):
76
- return [models]
77
-
78
- assert learner is not None, "Learner is required for model methods."
79
-
80
- if models == "all":
81
- print(hasattr(list(AGGREGATIVE.values())[0], "learner"))
82
- models = [model(learner) if hasattr(model, "learner") else model() for model in METHODS.values()]
83
- return models
84
- if models == "aggregative":
85
- return [model(learner) for model in AGGREGATIVE.values()]
86
- if models == "non_aggregative":
87
- return [model() for model in NON_AGGREGATIVE.values()]
88
-
89
- return [get_method(models)(learner)]
90
-
91
-
92
- def sout(self, msg):
93
- if self.verbose:
94
- print('[APP]' + msg)
95
-
96
-
97
- def fit(self, X_train, y_train):
98
- """Fit all methods into the training data.
99
-
100
- Args:
101
- X_train (array-like): Features of training.
102
- y_train (array-like): Labels of training.
103
- """
104
- self.sout("Fitting models")
105
-
106
- args = ((model, X_train, y_train, self.verbose) for model in self.models)
107
- self.models = parallel(
108
- self._delayed_fit,
109
- tqdm(args, desc="Fitting models", total=len(self.models)) if self.verbose else args,
110
- self.n_jobs)
111
-
112
- self.sout("Fit [Done]")
113
- return self
114
-
115
-
116
- def predict(self, X_test, y_test) -> np.any:
117
- """Generate several samples with artificial prevalences, and sizes.
118
- And for each method, predicts with this sample, aggregating all toguether
119
- with a pandas dataframe if request, or else just the predictions.
120
-
121
- Args:
122
- X_test (array-like): Features of test.
123
- y_test (array-like): Labels of test.
124
-
125
- Returns:
126
- tuple: tuple containing the model, real_prev and pred_prev, or.
127
- DataFrame: table of results, along with error measures if requested.
128
- """
129
-
130
-
131
- predictions = self.predict_protocol(X_test, y_test)
132
-
133
-
134
- predictions_df = pd.DataFrame(predictions)
135
-
136
- if self.return_type == "table":
137
- predictions_df.columns = ["QUANTIFIER", "REAL_PREVS", "PRED_PREVS", "BATCH_SIZE"]
138
-
139
- if self.measures:
140
-
141
- def smooth(values:np.ndarray) ->np.ndarray:
142
- smoothed_factor = 1/(2 * len(X_test))
143
-
144
- values = (values + smoothed_factor) / (smoothed_factor * len(values) + 1)
145
-
146
- return values
147
-
148
-
149
- for metric in self.measures:
150
- predictions_df[metric] = predictions_df.apply(
151
- lambda row: get_measure(metric)(smooth(row["REAL_PREVS"]), smooth(row["PRED_PREVS"])),
152
- axis=1
153
- )
154
-
155
- return predictions_df
156
-
157
- predictions_array = predictions_df.to_numpy()
158
- return (
159
- predictions_array[:, 0], # Model names
160
- np.stack(predictions_array[:, 1]), # Prev
161
- np.stack(predictions_array[:, 2]) # Prev_pred
162
- )
163
-
164
-
165
- @abstractmethod
166
- def predict_protocol(self) -> np.ndarray:
167
- """ Abstract method that every protocol has to implement """
168
- ...
169
-
170
- @abstractmethod
171
- def _new_sample(self) -> tuple:
172
- """ Abstract method of sample extraction for each protocol
173
-
174
- Returns:
175
- tuple: tuple containing the X_sample and the y_sample
176
- """
177
- ...
178
-
179
-
180
- @abstractmethod
181
- def _delayed_predict(self, args) -> tuple:
182
- """abstract method for predicting in the extracted
183
- samples, is delayed for running in parallel for
184
- eficciency purposes.
185
- """
186
- ...
187
-
188
-
189
-
190
- def _delayed_fit(self, args):
191
- model, X_train, y_train, verbose = args
192
-
193
- if verbose:
194
- print(f"\tFitting {model.__class__.__name__}")
195
- start = time()
196
-
197
- model = model.fit(X=X_train, y=y_train)
198
-
199
- if verbose:
200
- end = time()
201
- print(f"\t\\--Fit ended for {model.__class__.__name__} in {round(end - start, 3)} seconds")
202
- return model
@@ -1,2 +0,0 @@
1
- from .app import APP
2
- from .npp import NPP
@@ -1,146 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from typing import Union, List
4
- from sklearn.base import BaseEstimator
5
- import itertools
6
- from tqdm import tqdm
7
-
8
- from ...utils import generate_artificial_indexes, parallel
9
- from ...base import Quantifier
10
- from ._Protocol import Protocol
11
-
12
- class APP(Protocol):
13
- """Artificial Prevalence Protocol. It splits a test into several
14
- samples varying prevalence and sample size, with n iterations.
15
- For a list of Quantifiers, it computes training and testing
16
- for each one and returns either a table of results with error measures
17
- or just the predictions.
18
- """
19
-
20
- def __init__(self,
21
- models: Union[List[Union[str, Quantifier]], str, Quantifier],
22
- batch_size: Union[List[int], int],
23
- learner: BaseEstimator = None,
24
- n_prevs: int = 100,
25
- n_iterations: int = 1,
26
- n_jobs: int = 1,
27
- random_state: int = 32,
28
- verbose: bool = False,
29
- return_type: str = "predictions",
30
- measures: List[str] = None):
31
-
32
- super().__init__(models, batch_size, learner, n_iterations, n_jobs, random_state, verbose, return_type, measures)
33
- self.n_prevs = n_prevs
34
-
35
- def predict_protocol(self, X_test, y_test) -> tuple:
36
- """Generates several samples with artificial prevalences and sizes.
37
- For each model, predicts with this sample, aggregating all together
38
- with a pandas dataframe if requested, or else just the predictions.
39
-
40
- Args:
41
- X_test (array-like): Features of the test set.
42
- y_test (array-like): Labels of the test set.
43
-
44
- Returns:
45
- tuple: predictions containing the model name, real prev, pred prev, and batch size
46
- """
47
-
48
- n_dim = len(np.unique(y_test))
49
- prevs = self._generate_artificial_prevalences(n_dim, self.n_prevs, self.n_iterations)
50
-
51
- args = self._generate_args(X_test, y_test, prevs)
52
- batch_size = 1
53
-
54
- if isinstance(self.batch_size, list):
55
- batch_size = len(self.batch_size)
56
-
57
- size = len(prevs) * len(self.models) * batch_size * self.n_iterations
58
-
59
- predictions = parallel(
60
- self._delayed_predict,
61
- tqdm(args, desc="Running APP", total=size) if self.verbose else args,
62
- n_jobs=self.n_jobs
63
- )
64
-
65
- return predictions
66
-
67
-
68
- def _new_sample(self, X, y, prev: List[float], batch_size: int) -> tuple:
69
- """Generates a new sample with a specified prevalence and size.
70
-
71
- Args:
72
- X (array-like): Features from which to take the new sample.
73
- y (array-like): Labels from which to take the new sample.
74
- prev (List[float]): The specified prevalences.
75
- batch_size (int): Sample size.
76
-
77
- Returns:
78
- tuple: New sample's features and labels.
79
- """
80
- sample_index = generate_artificial_indexes(y, prev, batch_size, np.unique(y))
81
- return np.take(X, sample_index, axis=0), np.take(y, sample_index, axis=0)
82
-
83
-
84
-
85
- def _delayed_predict(self, args) -> tuple:
86
- """Method predicts into the new sample, is delayed for running
87
- in parallel for eficciency purposes
88
-
89
- Args:
90
- args (Any): arguments to use
91
-
92
- Returns:
93
- tuple: returns the (method name, real_prev, pred_prev and sample_size)
94
- """
95
-
96
- X, y, model, prev, batch_size, verbose = args
97
-
98
- if verbose:
99
- print(f'\t {model.__class__.__name__} with {str(batch_size)} instances and prev {str(prev)}')
100
-
101
- X_sample, _ = self._new_sample(X, y, prev, batch_size)
102
- prev_pred = np.asarray(list(model.predict(X=X_sample).values()))
103
-
104
- if verbose:
105
- print(f'\t \\--Ending {model.__class__.__name__} with {str(batch_size)} instances and prev {str(prev)} \n')
106
-
107
- return [model.__class__.__name__, prev, prev_pred, batch_size]
108
-
109
-
110
-
111
-
112
- def _generate_artificial_prevalences(self, n_dim: int, n_prev: int, n_iter: int) -> np.ndarray:
113
- """Generates n artificial prevalences with n dimensions.
114
-
115
- Args:
116
- n_dim (int): Number of dimensions for the artificial prevalence.
117
- n_prev (int): Number of prevalence points to generate.
118
- n_iter (int): Number of iterations.
119
-
120
- Returns:
121
- np.ndarray: Generated artificial prevalences.
122
- """
123
- s = np.linspace(0., 1., n_prev, endpoint=True)
124
- prevs = np.array([p + (1 - sum(p),) for p in itertools.product(*(s,) * (n_dim - 1)) if sum(p) <= 1])
125
-
126
- return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
127
-
128
-
129
-
130
- def _generate_args(self, X_test, y_test, prevs):
131
- """Generates arguments for parallel processing based on the model, prevalence, and batch size.
132
-
133
- Args:
134
- X_test (array-like): Features of the test set.
135
- y_test (array-like): Labels of the test set.
136
- prevs (np.ndarray): Artificial prevalences generated.
137
-
138
- Returns:
139
- List[tuple]: List of arguments for parallel processing.
140
- """
141
- if isinstance(self.batch_size, list):
142
- return [(X_test, y_test, model, prev, bs, self.verbose)
143
- for prev in prevs for bs in self.batch_size for model in self.models]
144
- else:
145
- return [(X_test, y_test, model, prev, self.batch_size, self.verbose)
146
- for prev in prevs for model in self.models]
@@ -1,34 +0,0 @@
1
- from typing import Union, List
2
- from sklearn.base import BaseEstimator
3
-
4
- from ...base import Quantifier
5
- from ._Protocol import Protocol
6
-
7
-
8
- class NPP(Protocol):
9
-
10
-
11
- def __init__(self,
12
- models: Union[List[Union[str, Quantifier]], str, Quantifier],
13
- batch_size: Union[List[int], int],
14
- learner: BaseEstimator = None,
15
- n_iterations: int = 1,
16
- n_jobs: int = 1,
17
- random_state: int = 32,
18
- verbose: bool = False,
19
- return_type: str = "predictions",
20
- measures: List[str] = None):
21
-
22
- super().__init__(models, batch_size, learner, n_iterations, n_jobs, random_state, verbose, return_type, measures)
23
-
24
-
25
- def predict_protocol(self, X_test, y_test) -> tuple:
26
- raise NotImplementedError
27
-
28
-
29
- def _new_sample(self, X, y, prev: List[float], batch_size: int) -> tuple:
30
- raise NotImplementedError
31
-
32
-
33
- def _delayed_predict(self, args) -> tuple:
34
- raise NotImplementedError
@@ -1,62 +0,0 @@
1
- from abc import abstractmethod
2
- import numpy as np
3
- from sklearn.base import BaseEstimator
4
-
5
- from ....base import AggregativeQuantifier
6
- from ....utils import adjust_threshold, get_scores
7
-
8
- class ThresholdOptimization(AggregativeQuantifier):
9
- """Generic Class for methods that are based on adjustments
10
- of the decision boundary of the underlying classifier in order
11
- to make the ACC (base method for threshold methods) estimation
12
- more numerically stable. Most of its strategies involve changing
13
- the behavior of the denominator of the ACC equation.
14
- """
15
- # Class for optimizing classification thresholds
16
-
17
- def __init__(self, learner: BaseEstimator):
18
- self.learner = learner
19
- self.threshold = None
20
- self.cc_output = None
21
- self.tpr = None
22
- self.fpr = None
23
-
24
- @property
25
- def multiclass_method(self) -> bool:
26
- """ All threshold Methods are binary or non multiclass """
27
- return False
28
-
29
- def _fit_method(self, X, y):
30
-
31
- y_labels, probabilities = get_scores(X, y, self.learner, self.cv_folds, self.learner_fitted)
32
-
33
- # Adjust thresholds and compute true and false positive rates
34
- thresholds, tprs, fprs = adjust_threshold(y_labels, probabilities[:, 1], self.classes)
35
-
36
- # Find the best threshold based on TPR and FPR
37
- self.threshold, self.tpr, self.fpr = self.best_tprfpr(thresholds, tprs, fprs)
38
-
39
- return self
40
-
41
- def _predict_method(self, X) -> dict:
42
-
43
- probabilities = self.learner.predict_proba(X)[:, 1]
44
-
45
- # Compute the classification count output
46
- self.cc_output = len(probabilities[probabilities >= self.threshold]) / len(probabilities)
47
-
48
- # Calculate prevalence, ensuring it's within [0, 1]
49
- if self.tpr - self.fpr == 0:
50
- prevalence = self.cc_output
51
- else:
52
- # Equation of all threshold methods to compute prevalence
53
- prevalence = np.clip((self.cc_output - self.fpr) / (self.tpr - self.fpr), 0, 1)
54
-
55
- prevalences = [1- prevalence, prevalence]
56
-
57
- return np.asarray(prevalences)
58
-
59
- @abstractmethod
60
- def best_tprfpr(self, thresholds: np.ndarray, tpr: np.ndarray, fpr: np.ndarray) -> float:
61
- """Abstract method for determining the best TPR and FPR to use in the equation"""
62
- ...
@@ -1,7 +0,0 @@
1
- from .acc import ACC
2
- from .max import MAX
3
- from .x import X_method
4
- from .t50 import T50
5
- from .ms import MS
6
- from .ms2 import MS2
7
- from .pacc import PACC
@@ -1,27 +0,0 @@
1
-
2
- import numpy as np
3
- from sklearn.base import BaseEstimator
4
-
5
- from ._ThreholdOptimization import ThresholdOptimization
6
-
7
- class ACC(ThresholdOptimization):
8
- """ Adjusted Classify and Count or Adjusted Count. Is a
9
- base method for the threhold methods.
10
- As described on the Threshold base class, this method
11
- estimate the true positive and false positive rates from
12
- the training data and utilize them to adjust the output
13
- of the CC method.
14
- """
15
-
16
- def __init__(self, learner:BaseEstimator, threshold:float=0.5):
17
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
18
- super().__init__(learner)
19
- self.threshold = threshold
20
-
21
-
22
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
23
- # Get the tpr and fpr where the threshold is equal to the base threshold, default is 0.5
24
-
25
- tpr = tprs[thresholds == self.threshold][0]
26
- fpr = fprs[thresholds == self.threshold][0]
27
- return (self.threshold, tpr, fpr)
@@ -1,23 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._ThreholdOptimization import ThresholdOptimization
5
-
6
- class MAX(ThresholdOptimization):
7
- """ Threshold MAX. This method tries to use the
8
- threshold where it maximizes the difference between
9
- tpr and fpr to use in the denominator of the equation.
10
- """
11
-
12
- def __init__(self, learner:BaseEstimator):
13
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
14
- super().__init__(learner)
15
-
16
-
17
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
18
- max_index = np.argmax(np.abs(tprs - fprs))
19
-
20
- threshold = thresholds[max_index]
21
- tpr= tprs[max_index]
22
- fpr = fprs[max_index]
23
- return (threshold, tpr, fpr)
@@ -1,21 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._ThreholdOptimization import ThresholdOptimization
5
-
6
- class MS(ThresholdOptimization):
7
- """ Median Sweep. This method uses an
8
- ensemble of such threshold-based methods and
9
- takes the median prediction.
10
- """
11
-
12
- def __init__(self, learner:BaseEstimator, threshold:float=0.5):
13
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
14
- super().__init__(learner)
15
- self.threshold = threshold
16
-
17
-
18
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
19
- tpr = np.median(tprs)
20
- fpr = np.median(fprs)
21
- return (self.threshold, tpr, fpr)
@@ -1,25 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._ThreholdOptimization import ThresholdOptimization
5
-
6
- class MS2(ThresholdOptimization):
7
- """ Median Sweep 2. It relies on the same
8
- strategy of the Median Sweep, but compute
9
- the median only for cases in which
10
- tpr -fpr > 0.25
11
- """
12
-
13
- def __init__(self, learner:BaseEstimator):
14
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
15
- super().__init__(learner)
16
-
17
-
18
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
19
- indices = np.where(np.abs(tprs - fprs) > 0.25)[0]
20
-
21
- threshold = np.median(thresholds[indices])
22
- tpr = np.median(tprs[indices])
23
- fpr = np.median(fprs[indices])
24
-
25
- return (threshold, tpr, fpr)
@@ -1,41 +0,0 @@
1
-
2
- import numpy as np
3
- from sklearn.base import BaseEstimator
4
-
5
- from ._ThreholdOptimization import ThresholdOptimization
6
-
7
- class PACC(ThresholdOptimization):
8
- """ Probabilistic Adjusted Classify and Count.
9
- This method adapts the AC approach by using average
10
- classconditional confidences from a probabilistic
11
- classifier instead of true positive and false positive rates.
12
- """
13
-
14
- def __init__(self, learner:BaseEstimator, threshold:float=0.5):
15
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
16
- super().__init__(learner)
17
- self.threshold = threshold
18
-
19
- def _predict_method(self, X):
20
- prevalences = {}
21
-
22
- probabilities = self.learner.predict_proba(X)[:, 1]
23
-
24
- mean_scores = np.mean(probabilities)
25
-
26
- if self.tpr - self.fpr == 0:
27
- prevalence = mean_scores
28
- else:
29
- prevalence = np.clip(abs(mean_scores - self.fpr) / (self.tpr - self.fpr), 0, 1)
30
-
31
- prevalences[self.classes[1]] = prevalence
32
- prevalences[self.classes[0]] = 1 - prevalence
33
-
34
- return prevalences
35
-
36
-
37
-
38
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
39
- tpr = tprs[thresholds == self.threshold][0]
40
- fpr = fprs[thresholds == self.threshold][0]
41
- return (self.threshold, tpr, fpr)
@@ -1,21 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._ThreholdOptimization import ThresholdOptimization
5
-
6
- class T50(ThresholdOptimization):
7
- """ Threshold 50. This method tries to
8
- use the threshold where tpr = 0.5.
9
- """
10
-
11
- def __init__(self, learner:BaseEstimator):
12
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
13
- super().__init__(learner)
14
-
15
-
16
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
17
- min_index = np.argmin(np.abs(tprs - 0.5))
18
- threshold = thresholds[min_index]
19
- tpr = tprs[min_index]
20
- fpr = fprs[min_index]
21
- return (threshold, tpr, fpr)
@@ -1,23 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._ThreholdOptimization import ThresholdOptimization
5
-
6
- class X_method(ThresholdOptimization):
7
- """ Threshold X. This method tries to
8
- use the threshold where fpr = 1 - tpr
9
- """
10
-
11
- def __init__(self, learner:BaseEstimator):
12
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
13
- super().__init__(learner)
14
-
15
-
16
- def best_tprfpr(self, thresholds:np.ndarray, tprs: np.ndarray, fprs: np.ndarray) -> tuple:
17
- min_index = np.argmin(abs(1 - (tprs + fprs)))
18
-
19
- threshold = thresholds[min_index]
20
- tpr = tprs[min_index]
21
- fpr = fprs[min_index]
22
-
23
- return (threshold, tpr, fpr)
@@ -1,9 +0,0 @@
1
- from .ThreholdOptm import *
2
- from .mixtureModels import *
3
- from .cc import CC
4
- from .pcc import PCC
5
- from .gac import GAC
6
- from .gpac import GPAC
7
- from .fm import FM
8
- from .emq import EMQ
9
- from .pwk import PWK