mlquantify 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +10 -29
  2. mlquantify/adjust_counting/__init__.py +24 -0
  3. mlquantify/adjust_counting/_adjustment.py +648 -0
  4. mlquantify/adjust_counting/_base.py +245 -0
  5. mlquantify/adjust_counting/_counting.py +153 -0
  6. mlquantify/adjust_counting/_utils.py +109 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +329 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +147 -0
  13. mlquantify/likelihood/_classes.py +430 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +785 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +51 -36
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +147 -0
  22. mlquantify/mixture/_classes.py +458 -0
  23. mlquantify/mixture/_utils.py +163 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +168 -0
  31. mlquantify/neighbors/_classes.py +150 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +37 -62
  33. mlquantify/neighbors/_kde.py +268 -0
  34. mlquantify/neighbors/_utils.py +131 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +64 -0
  50. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.10.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -289
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.8.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/top_level.txt +0 -0
@@ -1,289 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from logging import warning
3
- import numpy as np
4
-
5
- from ..utils.general import *
6
-
7
- class Protocol(ABC):
8
- """Base class for evaluation protocols.
9
-
10
- Parameters
11
- ----------
12
- batch_size : int or list of int
13
- The size of the batches to be used in the evaluation.
14
- random_state : int, optional
15
- The random seed for reproducibility.
16
-
17
- Attributes
18
- ----------
19
- n_combinations : int
20
-
21
- Raises
22
- ------
23
- ValueError
24
- If the batch size is not a positive integer or list of positive integers.
25
-
26
- Notes
27
- -----
28
- This class serves as a base class for different evaluation protocols, each with its own strategy for splitting the data into batches.
29
-
30
- Examples
31
- --------
32
- >>> class MyCustomProtocol(Protocol):
33
- ... def _iter_indices(self, X: np.ndarray, y: np.ndarray) -> Generator[np.ndarray]:
34
- ... for batch_size in self.batch_size:
35
- ... yield np.random.choice(X.shape[0], batch_size, replace=True)
36
- ...
37
- >>> protocol = MyCustomProtocol(batch_size=100, random_state=42)
38
- >>> for train_idx, test_idx in protocol.split(X, y):
39
- ... # Train and evaluate model
40
- ... pass
41
-
42
- """
43
-
44
- def __init__(self, batch_size, random_state=None, **kwargs):
45
- if isinstance(batch_size, int):
46
- self.n_combinations = 1
47
- else:
48
- self.n_combinations = len(batch_size)
49
-
50
- self.batch_size = [batch_size] if isinstance(batch_size, int) else batch_size
51
- self.random_state = random_state
52
-
53
- for name, value in kwargs.items():
54
- setattr(self, name, value)
55
- if isinstance(value, list):
56
- self.n_combinations *= len(value)
57
- elif isinstance(value, (int, float)):
58
- self.n_combinations *= value
59
- else:
60
- raise ValueError(f"Invalid argument {name}={value}: must be int/float or list of int/float.")
61
-
62
-
63
- def split(self, X: np.ndarray, y: np.ndarray):
64
- """
65
- Split the data into samples for evaluation.
66
-
67
- Parameters
68
- ----------
69
- X : np.ndarray
70
- The input features.
71
- y : np.ndarray
72
- The target labels.
73
-
74
- Yields
75
- ------
76
- Generator[np.ndarray, np.ndarray]
77
- A generator that yields the indices for each split.
78
- """
79
- for idx in self._iter_indices(X, y):
80
- if len(idx) > len(X):
81
- warning(f"Batch size {len(idx)} exceeds dataset size {len(X)}. Replacement sampling will be used.")
82
- yield idx
83
-
84
-
85
- @abstractmethod
86
- def _iter_indices(self, X, y):
87
- """Abstract method to be implemented by subclasses to yield indices for each batch."""
88
- pass
89
-
90
- def get_n_combinations(self) -> int:
91
- """
92
- Get the number of combinations for the current protocol.
93
- """
94
- return self.n_combinations
95
-
96
-
97
- class APP(Protocol):
98
- """Artificial Prevalence Protocol (APP) for evaluation.
99
- This protocol generates artificial prevalence distributions for the evaluation in an exhaustive manner, testing all possible combinations of prevalences.
100
-
101
- Parameters
102
- ----------
103
- batch_size : int or list of int
104
- The size of the batches to be used in the evaluation.
105
- n_prevalences : int
106
- The number of artificial prevalences to generate.
107
- repeats : int, optional
108
- The number of times to repeat the evaluation with different random seeds.
109
- random_state : int, optional
110
- The random seed for reproducibility.
111
-
112
- Attributes
113
- ----------
114
- n_prevalences : int
115
- The number of artificial prevalences to generate.
116
- repeats : int
117
- The number of times to repeat the evaluation with different random seeds.
118
- random_state : int
119
- The random seed for reproducibility.
120
-
121
- Notes
122
- -----
123
- It is important to note that in case of multiclass problems, the time complexity of this protocol can be significantly higher due to the increased number of combinations to evaluate.
124
-
125
- Examples
126
- --------
127
- >>> protocol = APP(batch_size=[100, 200], n_prevalences=5, repeats=3, random_state=42)
128
- >>> for train_idx, test_idx in protocol.split(X, y):
129
- ... # Train and evaluate model
130
- ... pass
131
-
132
- """
133
-
134
- def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
135
- super().__init__(batch_size=batch_size,
136
- random_state=random_state,
137
- n_prevalences=n_prevalences,
138
- repeats=repeats)
139
-
140
- def _iter_indices(self, X: np.ndarray, y: np.ndarray):
141
-
142
- n_dim = len(np.unique(y))
143
-
144
- for batch_size in self.batch_size:
145
- prevalences = generate_artificial_prevalences(n_dim=n_dim,
146
- n_prev=self.n_prevalences,
147
- n_iter=self.repeats)
148
- for prev in prevalences:
149
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
150
- yield indexes
151
-
152
-
153
-
154
-
155
- class NPP(Protocol):
156
- """No Prevalence Protocol (NPP) for evaluation.
157
- This protocol just samples the data without any consideration for prevalence, with all instances having equal probability of being selected.
158
-
159
- Parameters
160
- ----------
161
- batch_size : int or list of int
162
- The size of the batches to be used in the evaluation.
163
- random_state : int, optional
164
- The random seed for reproducibility.
165
-
166
- Attributes
167
- ----------
168
- n_prevalences : int
169
- The number of artificial prevalences to generate.
170
- repeats : int
171
- The number of times to repeat the evaluation with different random seeds.
172
- random_state : int
173
- The random seed for reproducibility.
174
-
175
- Examples
176
- --------
177
- >>> protocol = NPP(batch_size=100, random_state=42)
178
- >>> for train_idx, test_idx in protocol.split(X, y):
179
- ... # Train and evaluate model
180
- ... pass
181
- """
182
-
183
- def _iter_indices(self, X: np.ndarray, y: np.ndarray):
184
-
185
- for batch_size in self.batch_size:
186
- yield np.random.choice(X.shape[0], batch_size, replace=True)
187
-
188
-
189
- class UPP(Protocol):
190
- """Uniform Prevalence Protocol (UPP) for evaluation.
191
- An extension of the APP that generates artificial prevalence distributions uniformly across all classes utilizing the kraemer sampling method.
192
-
193
- Parameters
194
- ----------
195
- batch_size : int or list of int
196
- The size of the batches to be used in the evaluation.
197
- n_prevalences : int
198
- The number of artificial prevalences to generate.
199
- repeats : int
200
- The number of times to repeat the evaluation with different random seeds.
201
- random_state : int, optional
202
- The random seed for reproducibility.
203
-
204
- Attributes
205
- ----------
206
- n_prevalences : int
207
- The number of artificial prevalences to generate.
208
- repeats : int
209
- The number of times to repeat the evaluation with different random seeds.
210
- random_state : int
211
- The random seed for reproducibility.
212
-
213
- Examples
214
- --------
215
- >>> protocol = UPP(batch_size=100, n_prevalences=5, repeats=3, random_state=42)
216
- >>> for train_idx, test_idx in protocol.split(X, y):
217
- ... # Train and evaluate model
218
- ... pass
219
- """
220
-
221
- def __init__(self, batch_size, n_prevalences, repeats=1, random_state=None):
222
- super().__init__(batch_size=batch_size,
223
- random_state=random_state,
224
- n_prevalences=n_prevalences,
225
- repeats=repeats)
226
-
227
- def _iter_indices(self, X: np.ndarray, y: np.ndarray):
228
-
229
- n_dim = len(np.unique(y))
230
-
231
- for batch_size in self.batch_size:
232
-
233
- prevalences = kraemer_sampling(n_dim=n_dim,
234
- n_prev=self.n_prevalences,
235
- n_iter=self.repeats)
236
-
237
- for prev in prevalences:
238
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
239
- yield indexes
240
-
241
-
242
- class PPP(Protocol):
243
- """ Personalized Prevalence Protocol (PPP) for evaluation.
244
- This protocol generates artificial prevalence distributions personalized for each class.
245
-
246
- Parameters
247
- ----------
248
- batch_size : int or list of int
249
- The size of the batches to be used in the evaluation.
250
- prevalences : list of float
251
- The list of artificial prevalences to generate for each class.
252
- repeats : int
253
- The number of times to repeat the evaluation with different random seeds.
254
- random_state : int, optional
255
- The random seed for reproducibility.
256
-
257
- Attributes
258
- ----------
259
- prevalences : list of float
260
- The list of artificial prevalences to generate for each class.
261
- repeats : int
262
- The number of times to repeat the evaluation with different random seeds.
263
- random_state : int
264
- The random seed for reproducibility.
265
-
266
- Examples
267
- --------
268
- >>> protocol = PPP(batch_size=100, prevalences=[0.1, 0.9], repeats=3, random_state=42)
269
- >>> for train_idx, test_idx in protocol.split(X, y):
270
- ... # Train and evaluate model
271
- ... pass
272
- """
273
-
274
- def __init__(self, batch_size, prevalences, repeats=1, random_state=None):
275
- super().__init__(batch_size=batch_size,
276
- random_state=random_state,
277
- prevalences=prevalences,
278
- repeats=repeats)
279
-
280
- def _iter_indices(self, X: np.ndarray, y: np.ndarray):
281
-
282
- for batch_size in self.batch_size:
283
- for prev in self.prevalences:
284
- if isinstance(prev, float):
285
- prev = [1-prev, prev]
286
-
287
- indexes = get_indexes_with_prevalence(y, prev, batch_size)
288
- yield indexes
289
-
@@ -1,37 +0,0 @@
1
- from .aggregative import *
2
- from .meta import *
3
- from .non_aggregative import *
4
-
5
-
6
- AGGREGATIVE = {
7
- "CC": CC,
8
- "PCC": PCC,
9
- "EMQ": EMQ,
10
- "FM": FM,
11
- "GAC": GAC,
12
- "GPAC": GPAC,
13
- "PWK": PWK,
14
- "ACC": ACC,
15
- "MAX": MAX,
16
- "MS": MS,
17
- "MS2": MS2,
18
- "PACC": PACC,
19
- "T50": T50,
20
- "X": X_method,
21
- "DyS": DyS,
22
- "DySsyn": DySsyn,
23
- "HDy": HDy,
24
- "SMM": SMM,
25
- "SORD": SORD,
26
- }
27
-
28
- NON_AGGREGATIVE = {
29
- "HDx": HDx
30
- }
31
-
32
- META = {
33
- "ENSEMBLE": Ensemble
34
- }
35
-
36
-
37
- METHODS = AGGREGATIVE | NON_AGGREGATIVE | META