mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +0 -29
- mlquantify/adjust_counting/__init__.py +14 -0
- mlquantify/adjust_counting/_adjustment.py +365 -0
- mlquantify/adjust_counting/_base.py +247 -0
- mlquantify/adjust_counting/_counting.py +145 -0
- mlquantify/adjust_counting/_utils.py +114 -0
- mlquantify/base.py +117 -519
- mlquantify/base_aggregative.py +209 -0
- mlquantify/calibration.py +1 -0
- mlquantify/confidence.py +335 -0
- mlquantify/likelihood/__init__.py +5 -0
- mlquantify/likelihood/_base.py +161 -0
- mlquantify/likelihood/_classes.py +414 -0
- mlquantify/meta/__init__.py +1 -0
- mlquantify/meta/_classes.py +761 -0
- mlquantify/metrics/__init__.py +21 -0
- mlquantify/metrics/_oq.py +109 -0
- mlquantify/metrics/_rq.py +98 -0
- mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
- mlquantify/mixture/__init__.py +7 -0
- mlquantify/mixture/_base.py +153 -0
- mlquantify/mixture/_classes.py +400 -0
- mlquantify/mixture/_utils.py +112 -0
- mlquantify/model_selection/__init__.py +9 -0
- mlquantify/model_selection/_protocol.py +358 -0
- mlquantify/model_selection/_search.py +315 -0
- mlquantify/model_selection/_split.py +1 -0
- mlquantify/multiclass.py +350 -0
- mlquantify/neighbors/__init__.py +9 -0
- mlquantify/neighbors/_base.py +198 -0
- mlquantify/neighbors/_classes.py +159 -0
- mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
- mlquantify/neighbors/_kde.py +270 -0
- mlquantify/neighbors/_utils.py +135 -0
- mlquantify/neural/__init__.py +1 -0
- mlquantify/utils/__init__.py +47 -2
- mlquantify/utils/_artificial.py +27 -0
- mlquantify/utils/_constraints.py +219 -0
- mlquantify/utils/_context.py +21 -0
- mlquantify/utils/_decorators.py +36 -0
- mlquantify/utils/_exceptions.py +12 -0
- mlquantify/utils/_get_scores.py +159 -0
- mlquantify/utils/_load.py +18 -0
- mlquantify/utils/_parallel.py +6 -0
- mlquantify/utils/_random.py +36 -0
- mlquantify/utils/_sampling.py +273 -0
- mlquantify/utils/_tags.py +44 -0
- mlquantify/utils/_validation.py +447 -0
- mlquantify/utils/prevalence.py +61 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
- mlquantify-0.1.9.dist-info/RECORD +53 -0
- mlquantify/classification/__init__.py +0 -1
- mlquantify/evaluation/__init__.py +0 -14
- mlquantify/evaluation/protocol.py +0 -291
- mlquantify/methods/__init__.py +0 -37
- mlquantify/methods/aggregative.py +0 -1159
- mlquantify/methods/meta.py +0 -472
- mlquantify/methods/mixture_models.py +0 -1003
- mlquantify/methods/non_aggregative.py +0 -136
- mlquantify/methods/threshold_optimization.py +0 -869
- mlquantify/model_selection.py +0 -377
- mlquantify/plots.py +0 -367
- mlquantify/utils/general.py +0 -371
- mlquantify/utils/method.py +0 -449
- mlquantify-0.1.7.dist-info/RECORD +0 -22
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from logging import warning
|
|
3
|
-
import numpy as np
|
|
4
|
-
from typing import Generator, Tuple
|
|
5
|
-
from tqdm import tqdm
|
|
6
|
-
|
|
7
|
-
from ..utils.general import *
|
|
8
|
-
|
|
9
|
-
class Protocol(ABC):
|
|
10
|
-
"""Base class for evaluation protocols.
|
|
11
|
-
|
|
12
|
-
Parameters
|
|
13
|
-
----------
|
|
14
|
-
batch_size : int or list of int
|
|
15
|
-
The size of the batches to be used in the evaluation.
|
|
16
|
-
random_state : int, optional
|
|
17
|
-
The random seed for reproducibility.
|
|
18
|
-
|
|
19
|
-
Attributes
|
|
20
|
-
----------
|
|
21
|
-
n_combinations : int
|
|
22
|
-
|
|
23
|
-
Raises
|
|
24
|
-
------
|
|
25
|
-
ValueError
|
|
26
|
-
If the batch size is not a positive integer or list of positive integers.
|
|
27
|
-
|
|
28
|
-
Notes
|
|
29
|
-
-----
|
|
30
|
-
This class serves as a base class for different evaluation protocols, each with its own strategy for splitting the data into batches.
|
|
31
|
-
|
|
32
|
-
Examples
|
|
33
|
-
--------
|
|
34
|
-
>>> class MyCustomProtocol(Protocol):
|
|
35
|
-
... def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
|
|
36
|
-
... for batch_size in self.batch_size:
|
|
37
|
-
... yield np.random.choice(X.shape[0], batch_size, replace=True)
|
|
38
|
-
...
|
|
39
|
-
>>> protocol = MyCustomProtocol(batch_size=100, random_state=42)
|
|
40
|
-
>>> for train_idx, test_idx in protocol.split(X, y):
|
|
41
|
-
... # Train and evaluate model
|
|
42
|
-
... pass
|
|
43
|
-
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
def __init__(self, batch_size, random_state=None, **kwargs):
|
|
47
|
-
if isinstance(batch_size, int):
|
|
48
|
-
self.n_combinations = 1
|
|
49
|
-
else:
|
|
50
|
-
self.n_combinations = len(batch_size)
|
|
51
|
-
|
|
52
|
-
self.batch_size = [batch_size] if isinstance(batch_size, int) else batch_size
|
|
53
|
-
self.random_state = random_state
|
|
54
|
-
|
|
55
|
-
for name, value in kwargs.items():
|
|
56
|
-
setattr(self, name, value)
|
|
57
|
-
if isinstance(value, list):
|
|
58
|
-
self.n_combinations *= len(value)
|
|
59
|
-
elif isinstance(value, (int, float)):
|
|
60
|
-
self.n_combinations *= value
|
|
61
|
-
else:
|
|
62
|
-
raise ValueError(f"Invalid argument {name}={value}: must be int/float or list of int/float.")
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def split(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray, np.ndarray]:
|
|
66
|
-
"""
|
|
67
|
-
Split the data into samples for evaluation.
|
|
68
|
-
|
|
69
|
-
Parameters
|
|
70
|
-
----------
|
|
71
|
-
X : np.ndarray
|
|
72
|
-
The input features.
|
|
73
|
-
y : np.ndarray
|
|
74
|
-
The target labels.
|
|
75
|
-
|
|
76
|
-
Yields
|
|
77
|
-
------
|
|
78
|
-
Generator[np.ndarray, np.ndarray]
|
|
79
|
-
A generator that yields the indices for each split.
|
|
80
|
-
"""
|
|
81
|
-
for idx in self._iter_indices(X, y):
|
|
82
|
-
if len(idx) > len(X):
|
|
83
|
-
warning(f"Batch size {len(idx)} exceeds dataset size {len(X)}. Replacement sampling will be used.")
|
|
84
|
-
yield idx
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
@abstractmethod
|
|
88
|
-
def _iter_indices(self, X, y):
|
|
89
|
-
"""Abstract method to be implemented by subclasses to yield indices for each batch."""
|
|
90
|
-
pass
|
|
91
|
-
|
|
92
|
-
def get_n_combinations(self) -> int:
|
|
93
|
-
"""
|
|
94
|
-
Get the number of combinations for the current protocol.
|
|
95
|
-
"""
|
|
96
|
-
return self.n_combinations
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
class APP(Protocol):
|
|
100
|
-
"""Artificial Prevalence Protocol (APP) for evaluation.
|
|
101
|
-
This protocol generates artificial prevalence distributions for the evaluation in an exhaustive manner, testing all possible combinations of prevalences.
|
|
102
|
-
|
|
103
|
-
Parameters
|
|
104
|
-
----------
|
|
105
|
-
batch_size : int or list of int
|
|
106
|
-
The size of the batches to be used in the evaluation.
|
|
107
|
-
n_prevalences : int
|
|
108
|
-
The number of artificial prevalences to generate.
|
|
109
|
-
repeats : int, optional
|
|
110
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
111
|
-
random_state : int, optional
|
|
112
|
-
The random seed for reproducibility.
|
|
113
|
-
|
|
114
|
-
Attributes
|
|
115
|
-
----------
|
|
116
|
-
n_prevalences : int
|
|
117
|
-
The number of artificial prevalences to generate.
|
|
118
|
-
repeats : int
|
|
119
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
120
|
-
random_state : int
|
|
121
|
-
The random seed for reproducibility.
|
|
122
|
-
|
|
123
|
-
Notes
|
|
124
|
-
-----
|
|
125
|
-
It is important to note that in case of multiclass problems, the time complexity of this protocol can be significantly higher due to the increased number of combinations to evaluate.
|
|
126
|
-
|
|
127
|
-
Examples
|
|
128
|
-
--------
|
|
129
|
-
>>> protocol = APP(batch_size=[100, 200], n_prevalences=5, repeats=3, random_state=42)
|
|
130
|
-
>>> for train_idx, test_idx in protocol.split(X, y):
|
|
131
|
-
... # Train and evaluate model
|
|
132
|
-
... pass
|
|
133
|
-
|
|
134
|
-
"""
|
|
135
|
-
|
|
136
|
-
def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
|
|
137
|
-
super().__init__(batch_size=batch_size,
|
|
138
|
-
random_state=random_state,
|
|
139
|
-
n_prevalences=n_prevalences,
|
|
140
|
-
repeats=repeats)
|
|
141
|
-
|
|
142
|
-
def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
|
|
143
|
-
|
|
144
|
-
n_dim = len(np.unique(y))
|
|
145
|
-
|
|
146
|
-
for batch_size in self.batch_size:
|
|
147
|
-
prevalences = generate_artificial_prevalences(n_dim=n_dim,
|
|
148
|
-
n_prev=self.n_prevalences,
|
|
149
|
-
n_iter=self.repeats)
|
|
150
|
-
for prev in prevalences:
|
|
151
|
-
indexes = get_indexes_with_prevalence(y, prev, batch_size)
|
|
152
|
-
yield indexes
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
class NPP(Protocol):
|
|
158
|
-
"""No Prevalence Protocol (NPP) for evaluation.
|
|
159
|
-
This protocol just samples the data without any consideration for prevalence, with all instances having equal probability of being selected.
|
|
160
|
-
|
|
161
|
-
Parameters
|
|
162
|
-
----------
|
|
163
|
-
batch_size : int or list of int
|
|
164
|
-
The size of the batches to be used in the evaluation.
|
|
165
|
-
random_state : int, optional
|
|
166
|
-
The random seed for reproducibility.
|
|
167
|
-
|
|
168
|
-
Attributes
|
|
169
|
-
----------
|
|
170
|
-
n_prevalences : int
|
|
171
|
-
The number of artificial prevalences to generate.
|
|
172
|
-
repeats : int
|
|
173
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
174
|
-
random_state : int
|
|
175
|
-
The random seed for reproducibility.
|
|
176
|
-
|
|
177
|
-
Examples
|
|
178
|
-
--------
|
|
179
|
-
>>> protocol = NPP(batch_size=100, random_state=42)
|
|
180
|
-
>>> for train_idx, test_idx in protocol.split(X, y):
|
|
181
|
-
... # Train and evaluate model
|
|
182
|
-
... pass
|
|
183
|
-
"""
|
|
184
|
-
|
|
185
|
-
def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
|
|
186
|
-
|
|
187
|
-
for batch_size in self.batch_size:
|
|
188
|
-
yield np.random.choice(X.shape[0], batch_size, replace=True)
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
class UPP(Protocol):
|
|
192
|
-
"""Uniform Prevalence Protocol (UPP) for evaluation.
|
|
193
|
-
An extension of the APP that generates artificial prevalence distributions uniformly across all classes utilizing the kraemer sampling method.
|
|
194
|
-
|
|
195
|
-
Parameters
|
|
196
|
-
----------
|
|
197
|
-
batch_size : int or list of int
|
|
198
|
-
The size of the batches to be used in the evaluation.
|
|
199
|
-
n_prevalences : int
|
|
200
|
-
The number of artificial prevalences to generate.
|
|
201
|
-
repeats : int
|
|
202
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
203
|
-
random_state : int, optional
|
|
204
|
-
The random seed for reproducibility.
|
|
205
|
-
|
|
206
|
-
Attributes
|
|
207
|
-
----------
|
|
208
|
-
n_prevalences : int
|
|
209
|
-
The number of artificial prevalences to generate.
|
|
210
|
-
repeats : int
|
|
211
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
212
|
-
random_state : int
|
|
213
|
-
The random seed for reproducibility.
|
|
214
|
-
|
|
215
|
-
Examples
|
|
216
|
-
--------
|
|
217
|
-
>>> protocol = UPP(batch_size=100, n_prevalences=5, repeats=3, random_state=42)
|
|
218
|
-
>>> for train_idx, test_idx in protocol.split(X, y):
|
|
219
|
-
... # Train and evaluate model
|
|
220
|
-
... pass
|
|
221
|
-
"""
|
|
222
|
-
|
|
223
|
-
def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
|
|
224
|
-
super().__init__(batch_size=batch_size,
|
|
225
|
-
random_state=random_state,
|
|
226
|
-
n_prevalences=n_prevalences,
|
|
227
|
-
repeats=repeats)
|
|
228
|
-
|
|
229
|
-
def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
|
|
230
|
-
|
|
231
|
-
n_dim = len(np.unique(y))
|
|
232
|
-
|
|
233
|
-
for batch_size in self.batch_size:
|
|
234
|
-
|
|
235
|
-
prevalences = kraemer_sampling(n_dim=n_dim,
|
|
236
|
-
n_prev=self.n_prevalences,
|
|
237
|
-
n_iter=self.repeats)
|
|
238
|
-
|
|
239
|
-
for prev in prevalences:
|
|
240
|
-
indexes = get_indexes_with_prevalence(y, prev, batch_size)
|
|
241
|
-
yield indexes
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
class PPP(Protocol):
|
|
245
|
-
""" Personalized Prevalence Protocol (PPP) for evaluation.
|
|
246
|
-
This protocol generates artificial prevalence distributions personalized for each class.
|
|
247
|
-
|
|
248
|
-
Parameters
|
|
249
|
-
----------
|
|
250
|
-
batch_size : int or list of int
|
|
251
|
-
The size of the batches to be used in the evaluation.
|
|
252
|
-
prevalences : list of float
|
|
253
|
-
The list of artificial prevalences to generate for each class.
|
|
254
|
-
repeats : int
|
|
255
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
256
|
-
random_state : int, optional
|
|
257
|
-
The random seed for reproducibility.
|
|
258
|
-
|
|
259
|
-
Attributes
|
|
260
|
-
----------
|
|
261
|
-
prevalences : list of float
|
|
262
|
-
The list of artificial prevalences to generate for each class.
|
|
263
|
-
repeats : int
|
|
264
|
-
The number of times to repeat the evaluation with different random seeds.
|
|
265
|
-
random_state : int
|
|
266
|
-
The random seed for reproducibility.
|
|
267
|
-
|
|
268
|
-
Examples
|
|
269
|
-
--------
|
|
270
|
-
>>> protocol = PPP(batch_size=100, prevalences=[0.1, 0.9], repeats=3, random_state=42)
|
|
271
|
-
>>> for train_idx, test_idx in protocol.split(X, y):
|
|
272
|
-
... # Train and evaluate model
|
|
273
|
-
... pass
|
|
274
|
-
"""
|
|
275
|
-
|
|
276
|
-
def __init__(self, batch_size, prevalences, repeats=1, random_state=None):
|
|
277
|
-
super().__init__(batch_size=batch_size,
|
|
278
|
-
random_state=random_state,
|
|
279
|
-
prevalences=prevalences,
|
|
280
|
-
repeats=repeats)
|
|
281
|
-
|
|
282
|
-
def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
|
|
283
|
-
|
|
284
|
-
for batch_size in self.batch_size:
|
|
285
|
-
for prev in self.prevalences:
|
|
286
|
-
if isinstance(prev, float):
|
|
287
|
-
prev = [1-prev, prev]
|
|
288
|
-
|
|
289
|
-
indexes = get_indexes_with_prevalence(y, prev, batch_size)
|
|
290
|
-
yield indexes
|
|
291
|
-
|
mlquantify/methods/__init__.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from .aggregative import *
|
|
2
|
-
from .meta import *
|
|
3
|
-
from .non_aggregative import *
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
AGGREGATIVE = {
|
|
7
|
-
"CC": CC,
|
|
8
|
-
"PCC": PCC,
|
|
9
|
-
"EMQ": EMQ,
|
|
10
|
-
"FM": FM,
|
|
11
|
-
"GAC": GAC,
|
|
12
|
-
"GPAC": GPAC,
|
|
13
|
-
"PWK": PWK,
|
|
14
|
-
"ACC": ACC,
|
|
15
|
-
"MAX": MAX,
|
|
16
|
-
"MS": MS,
|
|
17
|
-
"MS2": MS2,
|
|
18
|
-
"PACC": PACC,
|
|
19
|
-
"T50": T50,
|
|
20
|
-
"X": X_method,
|
|
21
|
-
"DyS": DyS,
|
|
22
|
-
"DySsyn": DySsyn,
|
|
23
|
-
"HDy": HDy,
|
|
24
|
-
"SMM": SMM,
|
|
25
|
-
"SORD": SORD,
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
NON_AGGREGATIVE = {
|
|
29
|
-
"HDx": HDx
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
META = {
|
|
33
|
-
"ENSEMBLE": Ensemble
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
METHODS = AGGREGATIVE | NON_AGGREGATIVE | META
|