mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -291
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.7.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,291 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from logging import warning
3
- import numpy as np
4
- from typing import Generator, Tuple
5
- from tqdm import tqdm
6
-
7
- from ..utils.general import *
8
-
9
- class Protocol(ABC):
10
- """Base class for evaluation protocols.
11
-
12
- Parameters
13
- ----------
14
- batch_size : int or list of int
15
- The size of the batches to be used in the evaluation.
16
- random_state : int, optional
17
- The random seed for reproducibility.
18
-
19
- Attributes
20
- ----------
21
- n_combinations : int
22
-
23
- Raises
24
- ------
25
- ValueError
26
- If the batch size is not a positive integer or list of positive integers.
27
-
28
- Notes
29
- -----
30
- This class serves as a base class for different evaluation protocols, each with its own strategy for splitting the data into batches.
31
-
32
- Examples
33
- --------
34
- >>> class MyCustomProtocol(Protocol):
35
- ... def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
36
- ... for batch_size in self.batch_size:
37
- ... yield np.random.choice(X.shape[0], batch_size, replace=True)
38
- ...
39
- >>> protocol = MyCustomProtocol(batch_size=100, random_state=42)
40
- >>> for train_idx, test_idx in protocol.split(X, y):
41
- ... # Train and evaluate model
42
- ... pass
43
-
44
- """
45
-
46
- def __init__(self, batch_size, random_state=None, **kwargs):
47
- if isinstance(batch_size, int):
48
- self.n_combinations = 1
49
- else:
50
- self.n_combinations = len(batch_size)
51
-
52
- self.batch_size = [batch_size] if isinstance(batch_size, int) else batch_size
53
- self.random_state = random_state
54
-
55
- for name, value in kwargs.items():
56
- setattr(self, name, value)
57
- if isinstance(value, list):
58
- self.n_combinations *= len(value)
59
- elif isinstance(value, (int, float)):
60
- self.n_combinations *= value
61
- else:
62
- raise ValueError(f"Invalid argument {name}={value}: must be int/float or list of int/float.")
63
-
64
-
65
- def split(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray, np.ndarray]:
66
- """
67
- Split the data into samples for evaluation.
68
-
69
- Parameters
70
- ----------
71
- X : np.ndarray
72
- The input features.
73
- y : np.ndarray
74
- The target labels.
75
-
76
- Yields
77
- ------
78
- Generator[np.ndarray, np.ndarray]
79
- A generator that yields the indices for each split.
80
- """
81
- for idx in self._iter_indices(X, y):
82
- if len(idx) > len(X):
83
- warning(f"Batch size {len(idx)} exceeds dataset size {len(X)}. Replacement sampling will be used.")
84
- yield idx
85
-
86
-
87
- @abstractmethod
88
- def _iter_indices(self, X, y):
89
- """Abstract method to be implemented by subclasses to yield indices for each batch."""
90
- pass
91
-
92
- def get_n_combinations(self) -> int:
93
- """
94
- Get the number of combinations for the current protocol.
95
- """
96
- return self.n_combinations
97
-
98
-
99
- class APP(Protocol):
100
- """Artificial Prevalence Protocol (APP) for evaluation.
101
- This protocol generates artificial prevalence distributions for the evaluation in an exhaustive manner, testing all possible combinations of prevalences.
102
-
103
- Parameters
104
- ----------
105
- batch_size : int or list of int
106
- The size of the batches to be used in the evaluation.
107
- n_prevalences : int
108
- The number of artificial prevalences to generate.
109
- repeats : int, optional
110
- The number of times to repeat the evaluation with different random seeds.
111
- random_state : int, optional
112
- The random seed for reproducibility.
113
-
114
- Attributes
115
- ----------
116
- n_prevalences : int
117
- The number of artificial prevalences to generate.
118
- repeats : int
119
- The number of times to repeat the evaluation with different random seeds.
120
- random_state : int
121
- The random seed for reproducibility.
122
-
123
- Notes
124
- -----
125
- It is important to note that in case of multiclass problems, the time complexity of this protocol can be significantly higher due to the increased number of combinations to evaluate.
126
-
127
- Examples
128
- --------
129
- >>> protocol = APP(batch_size=[100, 200], n_prevalences=5, repeats=3, random_state=42)
130
- >>> for train_idx, test_idx in protocol.split(X, y):
131
- ... # Train and evaluate model
132
- ... pass
133
-
134
- """
135
-
136
- def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
137
- super().__init__(batch_size=batch_size,
138
- random_state=random_state,
139
- n_prevalences=n_prevalences,
140
- repeats=repeats)
141
-
142
- def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
143
-
144
- n_dim = len(np.unique(y))
145
-
146
- for batch_size in self.batch_size:
147
- prevalences = generate_artificial_prevalences(n_dim=n_dim,
148
- n_prev=self.n_prevalences,
149
- n_iter=self.repeats)
150
- for prev in prevalences:
151
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
152
- yield indexes
153
-
154
-
155
-
156
-
157
- class NPP(Protocol):
158
- """No Prevalence Protocol (NPP) for evaluation.
159
- This protocol just samples the data without any consideration for prevalence, with all instances having equal probability of being selected.
160
-
161
- Parameters
162
- ----------
163
- batch_size : int or list of int
164
- The size of the batches to be used in the evaluation.
165
- random_state : int, optional
166
- The random seed for reproducibility.
167
-
168
- Attributes
169
- ----------
170
- n_prevalences : int
171
- The number of artificial prevalences to generate.
172
- repeats : int
173
- The number of times to repeat the evaluation with different random seeds.
174
- random_state : int
175
- The random seed for reproducibility.
176
-
177
- Examples
178
- --------
179
- >>> protocol = NPP(batch_size=100, random_state=42)
180
- >>> for train_idx, test_idx in protocol.split(X, y):
181
- ... # Train and evaluate model
182
- ... pass
183
- """
184
-
185
- def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
186
-
187
- for batch_size in self.batch_size:
188
- yield np.random.choice(X.shape[0], batch_size, replace=True)
189
-
190
-
191
- class UPP(Protocol):
192
- """Uniform Prevalence Protocol (UPP) for evaluation.
193
- An extension of the APP that generates artificial prevalence distributions uniformly across all classes utilizing the kraemer sampling method.
194
-
195
- Parameters
196
- ----------
197
- batch_size : int or list of int
198
- The size of the batches to be used in the evaluation.
199
- n_prevalences : int
200
- The number of artificial prevalences to generate.
201
- repeats : int
202
- The number of times to repeat the evaluation with different random seeds.
203
- random_state : int, optional
204
- The random seed for reproducibility.
205
-
206
- Attributes
207
- ----------
208
- n_prevalences : int
209
- The number of artificial prevalences to generate.
210
- repeats : int
211
- The number of times to repeat the evaluation with different random seeds.
212
- random_state : int
213
- The random seed for reproducibility.
214
-
215
- Examples
216
- --------
217
- >>> protocol = UPP(batch_size=100, n_prevalences=5, repeats=3, random_state=42)
218
- >>> for train_idx, test_idx in protocol.split(X, y):
219
- ... # Train and evaluate model
220
- ... pass
221
- """
222
-
223
- def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
224
- super().__init__(batch_size=batch_size,
225
- random_state=random_state,
226
- n_prevalences=n_prevalences,
227
- repeats=repeats)
228
-
229
- def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
230
-
231
- n_dim = len(np.unique(y))
232
-
233
- for batch_size in self.batch_size:
234
-
235
- prevalences = kraemer_sampling(n_dim=n_dim,
236
- n_prev=self.n_prevalences,
237
- n_iter=self.repeats)
238
-
239
- for prev in prevalences:
240
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
241
- yield indexes
242
-
243
-
244
- class PPP(Protocol):
245
- """ Personalized Prevalence Protocol (PPP) for evaluation.
246
- This protocol generates artificial prevalence distributions personalized for each class.
247
-
248
- Parameters
249
- ----------
250
- batch_size : int or list of int
251
- The size of the batches to be used in the evaluation.
252
- prevalences : list of float
253
- The list of artificial prevalences to generate for each class.
254
- repeats : int
255
- The number of times to repeat the evaluation with different random seeds.
256
- random_state : int, optional
257
- The random seed for reproducibility.
258
-
259
- Attributes
260
- ----------
261
- prevalences : list of float
262
- The list of artificial prevalences to generate for each class.
263
- repeats : int
264
- The number of times to repeat the evaluation with different random seeds.
265
- random_state : int
266
- The random seed for reproducibility.
267
-
268
- Examples
269
- --------
270
- >>> protocol = PPP(batch_size=100, prevalences=[0.1, 0.9], repeats=3, random_state=42)
271
- >>> for train_idx, test_idx in protocol.split(X, y):
272
- ... # Train and evaluate model
273
- ... pass
274
- """
275
-
276
- def __init__(self, batch_size, prevalences, repeats=1, random_state=None):
277
- super().__init__(batch_size=batch_size,
278
- random_state=random_state,
279
- prevalences=prevalences,
280
- repeats=repeats)
281
-
282
- def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
283
-
284
- for batch_size in self.batch_size:
285
- for prev in self.prevalences:
286
- if isinstance(prev, float):
287
- prev = [1-prev, prev]
288
-
289
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
290
- yield indexes
291
-
@@ -1,37 +0,0 @@
1
- from .aggregative import *
2
- from .meta import *
3
- from .non_aggregative import *
4
-
5
-
6
- AGGREGATIVE = {
7
- "CC": CC,
8
- "PCC": PCC,
9
- "EMQ": EMQ,
10
- "FM": FM,
11
- "GAC": GAC,
12
- "GPAC": GPAC,
13
- "PWK": PWK,
14
- "ACC": ACC,
15
- "MAX": MAX,
16
- "MS": MS,
17
- "MS2": MS2,
18
- "PACC": PACC,
19
- "T50": T50,
20
- "X": X_method,
21
- "DyS": DyS,
22
- "DySsyn": DySsyn,
23
- "HDy": HDy,
24
- "SMM": SMM,
25
- "SORD": SORD,
26
- }
27
-
28
- NON_AGGREGATIVE = {
29
- "HDx": HDx
30
- }
31
-
32
- META = {
33
- "ENSEMBLE": Ensemble
34
- }
35
-
36
-
37
- METHODS = AGGREGATIVE | NON_AGGREGATIVE | META