mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -291
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.7.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,273 @@
1
+ import numpy as np
2
+ import itertools
3
+
4
+
5
+ def get_indexes_with_prevalence(y, prevalence: list, sample_size:int):
6
+ """
7
+ Get indexes for a stratified sample based on the prevalence of each class.
8
+
9
+ Parameters
10
+ ----------
11
+ y : np.ndarray
12
+ Array of class labels.
13
+ prevalence : list
14
+ List of prevalences for each class.
15
+ sample_size : int
16
+ Number of samples to generate.
17
+ classes : list
18
+ List of unique classes.
19
+
20
+ Returns
21
+ -------
22
+ list
23
+ List of indexes for the stratified sample.
24
+ """
25
+ classes = np.unique(y)
26
+
27
+ # Ensure the sum of prevalences is 1
28
+ assert np.isclose(sum(prevalence), 1), "The sum of prevalences must be 1"
29
+ # Ensure the number of prevalences matches the number of classes
30
+ assert len(prevalence) == len(classes), "The number of prevalences must match the number of classes"
31
+
32
+ sampled_indexes = []
33
+ total_sampled = 0
34
+
35
+ for i, class_ in enumerate(classes):
36
+
37
+ if i == len(classes) - 1:
38
+ num_samples = sample_size - total_sampled
39
+ else:
40
+ num_samples = int(sample_size * prevalence[i])
41
+
42
+ # Get the indexes of the current class
43
+ class_indexes = np.where(y == class_)[0]
44
+
45
+ # Sample the indexes for the current class
46
+ sampled_class_indexes = np.random.choice(class_indexes, size=num_samples, replace=True)
47
+
48
+ sampled_indexes.extend(sampled_class_indexes)
49
+ total_sampled += num_samples
50
+
51
+ np.random.shuffle(sampled_indexes) # Shuffle after collecting all indexes
52
+
53
+ return sampled_indexes
54
+
55
+
56
+
57
+ def simplex_uniform_kraemer(n_dim: int,
58
+ n_prev: int,
59
+ n_iter: int,
60
+ min_val: float = 0.0,
61
+ max_val: float = 1.0,
62
+ max_tries: int = 1000) -> np.ndarray:
63
+ """
64
+ Generates n_prev prevalence vectors of n_dim classes uniformly
65
+ distributed on the simplex, with optional lower and upper bounds.
66
+
67
+ Based on the algorithm of Kramer et al. for uniform sampling on a simplex.
68
+
69
+ Parameters
70
+ ----------
71
+ n_dim : int
72
+ Number of dimensions (classes).
73
+ n_prev : int
74
+ Number of prevalence vectors to generate.
75
+ min_val : float, optional
76
+ Minimum allowed prevalence for each class (default=0.0).
77
+ max_val : float, optional
78
+ Maximum allowed prevalence for each class (default=1.0).
79
+ max_tries : int, optional
80
+ Maximum number of sampling iterations to reach the target n_prev.
81
+
82
+ Returns
83
+ -------
84
+ np.ndarray
85
+ Array of shape (n_prev, n_dim) with valid prevalence vectors.
86
+ """
87
+ if n_dim < 2:
88
+ raise ValueError("n_dim must be >= 2.")
89
+ if not (0 <= min_val < 1) or not (0 < max_val <= 1):
90
+ raise ValueError("min_val and max_val must be between 0 and 1.")
91
+ if min_val * n_dim > 1 or max_val * n_dim < 1:
92
+ raise ValueError("Invalid bounds: they make it impossible to sum to 1.")
93
+
94
+ effective_simplex_size = 1 - n_dim * min_val
95
+ prevs = []
96
+
97
+ # Amostragem em blocos até atingir n_prev válidos
98
+ tries = 0
99
+ batch_size = max(n_prev, 1000) # Gera em blocos grandes para eficiência
100
+
101
+ while len(prevs) < n_prev and tries < max_tries:
102
+ tries += 1
103
+
104
+ # Geração de pontos uniformes no simplex reduzido
105
+ u = np.random.uniform(0, 1, (batch_size, n_dim - 1))
106
+ u.sort(axis=1)
107
+ simplex = np.diff(np.concatenate([np.zeros((batch_size, 1)), u, np.ones((batch_size, 1))], axis=1), axis=1)
108
+
109
+ # Escala para [min_val, max_val]
110
+ scaled = min_val + simplex * effective_simplex_size
111
+
112
+ # Normaliza para garantir soma = 1
113
+ scaled /= scaled.sum(axis=1, keepdims=True)
114
+
115
+ # Filtra apenas vetores válidos
116
+ mask = np.all((scaled >= min_val) & (scaled <= max_val), axis=1)
117
+ valid = scaled[mask]
118
+
119
+ if valid.size > 0:
120
+ prevs.append(valid)
121
+
122
+ if not prevs:
123
+ raise RuntimeError("No valid prevalences found with given constraints. Try adjusting min_val/max_val.")
124
+
125
+ if n_iter > 1:
126
+ prevs = np.tile(prevs, (n_iter, 1))
127
+
128
+ result = np.vstack(prevs)
129
+ return result[:n_prev]
130
+
131
+
132
+
133
+ def simplex_grid_sampling(
134
+ n_dim: int,
135
+ n_prev: int,
136
+ n_iter: int,
137
+ min_val: float,
138
+ max_val: float
139
+ ) -> np.ndarray:
140
+ """
141
+ Efficiently generates artificial prevalence vectors that sum to 1
142
+ and respect min_val ≤ p_i ≤ max_val for all i.
143
+
144
+ Parameters
145
+ ----------
146
+ n_dim : int
147
+ Number of dimensions (classes).
148
+ n_prev : int
149
+ Number of prevalence points per dimension (grid density).
150
+ n_iter : int
151
+ Number of repetitions.
152
+ min_val : float
153
+ Minimum allowed value for each prevalence component.
154
+ max_val : float
155
+ Maximum allowed value for each prevalence component.
156
+
157
+ Returns
158
+ -------
159
+ np.ndarray
160
+ Array of shape (n_samples, n_dim) with all valid prevalence vectors.
161
+ """
162
+ if n_dim < 2:
163
+ raise ValueError("n_dim must be at least 2.")
164
+ if not (0 <= min_val < max_val <= 1):
165
+ raise ValueError("min_val and max_val must satisfy 0 <= min_val < max_val <= 1.")
166
+ if min_val * n_dim > 1 or max_val * n_dim < 1:
167
+ raise ValueError("Impossible combination of min_val, max_val, and n_dim — cannot sum to 1.")
168
+
169
+ # Intervalo de possíveis valores para cada dimensão (exceto a última)
170
+ s = np.linspace(min_val, max_val, n_prev)
171
+ grids = np.stack(np.meshgrid(*([s] * (n_dim - 1)), indexing="ij"), axis=-1)
172
+ grid_flat = grids.reshape(-1, n_dim - 1)
173
+
174
+ # Calcula o último valor para garantir soma = 1
175
+ last_col = 1.0 - np.sum(grid_flat, axis=1)
176
+ prevs = np.hstack([grid_flat, last_col[:, None]])
177
+
178
+ # Filtro de validade: dentro dos limites
179
+ mask = np.all((prevs >= min_val) & (prevs <= max_val), axis=1)
180
+ prevs = prevs[mask]
181
+
182
+ # Repetição se necessário
183
+ if n_iter > 1:
184
+ prevs = np.tile(prevs, (n_iter, 1))
185
+
186
+ return prevs
187
+
188
+
189
+
190
+
191
+ def simplex_uniform_sampling(
192
+ n_dim: int,
193
+ n_prev: int,
194
+ n_iter: int,
195
+ min_val: float,
196
+ max_val: float
197
+ ) -> np.ndarray:
198
+ """
199
+ Generates uniformly distributed prevalence vectors within the simplex,
200
+ constrained by min_val ≤ p_i ≤ max_val.
201
+
202
+ Parameters
203
+ ----------
204
+ n_dim : int
205
+ Number of dimensions.
206
+ n_prev : int
207
+ Number of prevalence samples to generate.
208
+ n_iter : int
209
+ Number of repetitions.
210
+ min_val : float
211
+ Minimum allowed value for each prevalence component.
212
+ max_val : float
213
+ Maximum allowed value for each prevalence component.
214
+
215
+ Returns
216
+ -------
217
+ np.ndarray
218
+ Array of shape (n_samples, n_dim) with uniformly distributed prevalences.
219
+ """
220
+ if min_val * n_dim > 1 or max_val * n_dim < 1:
221
+ raise ValueError("Invalid min_val/max_val for simplex constraints.")
222
+
223
+ total_samples = n_prev * n_iter
224
+ samples = []
225
+
226
+ while len(samples) < total_samples:
227
+ # Gera candidatos via Dirichlet (uniforme no simplex)
228
+ x = np.random.dirichlet(np.ones(n_dim), size=total_samples * 2)
229
+ # Filtra os que respeitam os limites
230
+ mask = np.all((x >= min_val) & (x <= max_val), axis=1)
231
+ valid = x[mask]
232
+ if len(valid) > 0:
233
+ samples.append(valid)
234
+
235
+ if len(samples) > 0:
236
+ all_samples = np.concatenate(samples, axis=0)
237
+ if len(all_samples) >= total_samples:
238
+ return all_samples[:total_samples]
239
+
240
+ return np.concatenate(samples, axis=0)[:total_samples]
241
+
242
+
243
+ def bootstrap_sample_indices(
244
+ n_samples: int,
245
+ batch_size: int,
246
+ n_bootstraps: int,
247
+ random_state: int = None
248
+ ):
249
+ """
250
+ Generate bootstrap sample indices for a dataset.
251
+
252
+ Parameters
253
+ ----------
254
+ n_samples : int
255
+ Total number of samples in the dataset.
256
+ batch_size : int
257
+ Number of samples in each bootstrap sample.
258
+ n_bootstraps : int
259
+ Number of bootstrap samples to generate.
260
+ random_state : int, optional
261
+ Random seed for reproducibility.
262
+
263
+ Yields
264
+ ------
265
+ np.ndarray
266
+ Array containing indices for a bootstrap sample.
267
+ """
268
+ if random_state is not None:
269
+ np.random.seed(random_state)
270
+
271
+ for _ in range(n_bootstraps):
272
+ indices = np.random.choice(n_samples, size=batch_size, replace=True)
273
+ yield indices
@@ -0,0 +1,44 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class TargetInputTags:
6
+
7
+ one_d: bool = True
8
+ two_d: bool = False
9
+ continuous: bool = False
10
+ categorical: bool = True
11
+ multi_class: bool = True
12
+ required: bool = False
13
+
14
+ @dataclass
15
+ class PredictionRequirements:
16
+
17
+ requires_train_proba: bool = True
18
+ requires_train_labels: bool = True
19
+ requires_test_predictions: bool = True
20
+
21
+
22
+ @dataclass
23
+ class Tags:
24
+
25
+ estimation_type: str | None
26
+ estimator_function: str | None
27
+ estimator_type: str | None
28
+ aggregation_type: str | None
29
+ target_input_tags: TargetInputTags
30
+ prediction_requirements: PredictionRequirements
31
+ has_estimator: bool = False
32
+ requires_fit: bool = True
33
+
34
+
35
+
36
+ def get_tags(quantifier):
37
+ try:
38
+ tags = quantifier.__mlquantify_tags__()
39
+ except AttributeError as ext:
40
+ if "has no attribute '__mlquantify_tags__'" in str(ext):
41
+ raise AttributeError("Quantifier is missing __mlquantify_tags__ method, ensure your class is inheriting from BaseQuantifier.")
42
+ else:
43
+ raise
44
+ return tags