mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mlquantify/__init__.py +0 -29
  2. mlquantify/adjust_counting/__init__.py +14 -0
  3. mlquantify/adjust_counting/_adjustment.py +365 -0
  4. mlquantify/adjust_counting/_base.py +247 -0
  5. mlquantify/adjust_counting/_counting.py +145 -0
  6. mlquantify/adjust_counting/_utils.py +114 -0
  7. mlquantify/base.py +117 -519
  8. mlquantify/base_aggregative.py +209 -0
  9. mlquantify/calibration.py +1 -0
  10. mlquantify/confidence.py +335 -0
  11. mlquantify/likelihood/__init__.py +5 -0
  12. mlquantify/likelihood/_base.py +161 -0
  13. mlquantify/likelihood/_classes.py +414 -0
  14. mlquantify/meta/__init__.py +1 -0
  15. mlquantify/meta/_classes.py +761 -0
  16. mlquantify/metrics/__init__.py +21 -0
  17. mlquantify/metrics/_oq.py +109 -0
  18. mlquantify/metrics/_rq.py +98 -0
  19. mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
  20. mlquantify/mixture/__init__.py +7 -0
  21. mlquantify/mixture/_base.py +153 -0
  22. mlquantify/mixture/_classes.py +400 -0
  23. mlquantify/mixture/_utils.py +112 -0
  24. mlquantify/model_selection/__init__.py +9 -0
  25. mlquantify/model_selection/_protocol.py +358 -0
  26. mlquantify/model_selection/_search.py +315 -0
  27. mlquantify/model_selection/_split.py +1 -0
  28. mlquantify/multiclass.py +350 -0
  29. mlquantify/neighbors/__init__.py +9 -0
  30. mlquantify/neighbors/_base.py +198 -0
  31. mlquantify/neighbors/_classes.py +159 -0
  32. mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
  33. mlquantify/neighbors/_kde.py +270 -0
  34. mlquantify/neighbors/_utils.py +135 -0
  35. mlquantify/neural/__init__.py +1 -0
  36. mlquantify/utils/__init__.py +47 -2
  37. mlquantify/utils/_artificial.py +27 -0
  38. mlquantify/utils/_constraints.py +219 -0
  39. mlquantify/utils/_context.py +21 -0
  40. mlquantify/utils/_decorators.py +36 -0
  41. mlquantify/utils/_exceptions.py +12 -0
  42. mlquantify/utils/_get_scores.py +159 -0
  43. mlquantify/utils/_load.py +18 -0
  44. mlquantify/utils/_parallel.py +6 -0
  45. mlquantify/utils/_random.py +36 -0
  46. mlquantify/utils/_sampling.py +273 -0
  47. mlquantify/utils/_tags.py +44 -0
  48. mlquantify/utils/_validation.py +447 -0
  49. mlquantify/utils/prevalence.py +61 -0
  50. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
  51. mlquantify-0.1.9.dist-info/RECORD +53 -0
  52. mlquantify/classification/__init__.py +0 -1
  53. mlquantify/evaluation/__init__.py +0 -14
  54. mlquantify/evaluation/protocol.py +0 -291
  55. mlquantify/methods/__init__.py +0 -37
  56. mlquantify/methods/aggregative.py +0 -1159
  57. mlquantify/methods/meta.py +0 -472
  58. mlquantify/methods/mixture_models.py +0 -1003
  59. mlquantify/methods/non_aggregative.py +0 -136
  60. mlquantify/methods/threshold_optimization.py +0 -869
  61. mlquantify/model_selection.py +0 -377
  62. mlquantify/plots.py +0 -367
  63. mlquantify/utils/general.py +0 -371
  64. mlquantify/utils/method.py +0 -449
  65. mlquantify-0.1.7.dist-info/RECORD +0 -22
  66. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
  67. {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,371 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from joblib import Parallel, delayed, load
4
- from collections import defaultdict
5
- import itertools
6
-
7
-
8
- def convert_columns_to_arrays(df, columns:list = ['PRED_PREVS', 'REAL_PREVS']):
9
- """
10
- Converts specified columns in a DataFrame from strings of arrays to NumPy arrays.
11
-
12
- Parameters
13
- ----------
14
- df : pd.DataFrame
15
- DataFrame to convert.
16
- columns : list
17
- List of columns to convert.
18
-
19
- Returns
20
- -------
21
- pd.DataFrame
22
- DataFrame with the specified columns converted to NumPy arrays.
23
- """
24
- for col in columns:
25
- df[col] = df[col].apply(lambda x: np.fromstring(x.strip('[]'), sep=' ') if isinstance(x, str) else x)
26
- return df
27
-
28
-
29
- def get_indexes_with_prevalence(y, prevalence: list, sample_size:int):
30
- """
31
- Get indexes for a stratified sample based on the prevalence of each class.
32
-
33
- Parameters
34
- ----------
35
- y : np.ndarray
36
- Array of class labels.
37
- prevalence : list
38
- List of prevalences for each class.
39
- sample_size : int
40
- Number of samples to generate.
41
- classes : list
42
- List of unique classes.
43
-
44
- Returns
45
- -------
46
- list
47
- List of indexes for the stratified sample.
48
- """
49
- classes = np.unique(y)
50
-
51
- # Ensure the sum of prevalences is 1
52
- assert np.isclose(sum(prevalence), 1), "The sum of prevalences must be 1"
53
- # Ensure the number of prevalences matches the number of classes
54
- assert len(prevalence) == len(classes), "The number of prevalences must match the number of classes"
55
-
56
- sampled_indexes = []
57
- total_sampled = 0
58
-
59
- for i, class_ in enumerate(classes):
60
-
61
- if i == len(classes) - 1:
62
- num_samples = sample_size - total_sampled
63
- else:
64
- num_samples = int(sample_size * prevalence[i])
65
-
66
- # Get the indexes of the current class
67
- class_indexes = np.where(y == class_)[0]
68
-
69
- # Sample the indexes for the current class
70
- sampled_class_indexes = np.random.choice(class_indexes, size=num_samples, replace=True)
71
-
72
- sampled_indexes.extend(sampled_class_indexes)
73
- total_sampled += num_samples
74
-
75
- np.random.shuffle(sampled_indexes) # Shuffle after collecting all indexes
76
-
77
- return sampled_indexes
78
-
79
-
80
-
81
- def kraemer_sampling(n_dim: int, n_prev: int, n_iter: int = 1) -> np.ndarray:
82
- """
83
- Uniform sampling from the unit simplex using Kraemer's algorithm.
84
-
85
- Parameters
86
- ----------
87
- n_dim : int
88
- Number of dimensions.
89
- n_prev : int
90
- Size of the sample.
91
- n_iter : int
92
- Number of iterations.
93
-
94
- Returns
95
- -------
96
- np.ndarray
97
- Array of sampled prevalences.
98
- """
99
-
100
- def _sampling(n_dim: int, n_prev: int) -> np.ndarray:
101
- if n_dim == 2:
102
- u = np.random.rand(n_prev)
103
- return np.vstack([1 - u, u]).T
104
- else:
105
- u = np.random.rand(n_prev, n_dim - 1)
106
- u.sort(axis=-1) # sort each row
107
- _0s = np.zeros((n_prev, 1))
108
- _1s = np.ones((n_prev, 1))
109
- a = np.hstack([_0s, u])
110
- b = np.hstack([u, _1s])
111
- return b - a
112
-
113
- # repeat n_iter times
114
- prevs = _sampling(n_dim, n_prev)
115
-
116
- return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
117
-
118
-
119
- def generate_artificial_prevalences(n_dim: int, n_prev: int, n_iter: int) -> np.ndarray:
120
- """Generates n artificial prevalences with n dimensions.
121
-
122
- Parameters
123
- ----------
124
- n_dim : int
125
- Number of dimensions.
126
- n_prev : int
127
- Number of prevalences to generate.
128
- n_iter : int
129
- Number of iterations.
130
-
131
- Returns
132
- -------
133
- np.ndarray
134
- Array of artificial prevalences.
135
-
136
- """
137
- s = np.linspace(0., 1., n_prev, endpoint=True)
138
- prevs = np.array([p + (1 - sum(p),) for p in itertools.product(*(s,) * (n_dim - 1)) if sum(p) <= 1])
139
-
140
- return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
141
-
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
- def get_real_prev(y) -> dict:
150
- """
151
- Get the real prevalence of each class in the target array.
152
-
153
- Parameters
154
- ----------
155
- y : np.ndarray or pd.Series
156
- Array of class labels.
157
-
158
- Returns
159
- -------
160
- dict
161
- Dictionary of class labels and their corresponding prevalence.
162
- """
163
- if isinstance(y, np.ndarray):
164
- y = pd.Series(y)
165
- real_prevs = y.value_counts(normalize=True).to_dict()
166
- real_prevs = dict(sorted(real_prevs.items()))
167
- return real_prevs
168
-
169
-
170
-
171
-
172
-
173
-
174
-
175
-
176
-
177
- def load_quantifier(path:str):
178
- """
179
- Load a quantifier from a file.
180
-
181
- Parameters
182
- ----------
183
- path : str
184
- Path to the file containing the quantifier.
185
-
186
- Returns
187
- -------
188
- Quantifier
189
- Loaded quantifier.
190
- """
191
- return load(path)
192
-
193
-
194
-
195
-
196
-
197
-
198
-
199
-
200
-
201
- def make_prevs(ndim:int) -> list:
202
- """
203
- Generate a list of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
204
-
205
- Parameters
206
- ----------
207
- ndim : int
208
- Number of dimensions.
209
-
210
- Returns
211
- -------
212
- list
213
- List of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
214
- """
215
- # Generate n_dim-1 random u_dist uniformly distributed between 0 and 1
216
- u_dist = np.random.uniform(0, 1, ndim - 1)
217
- # Add 0 and 1 to the u_dist
218
- u_dist = np.append(u_dist, [0, 1])
219
- # Sort the u_dist
220
- u_dist.sort()
221
- # Calculate the differences between consecutive u_dist
222
- prevs = np.diff(u_dist)
223
-
224
- return prevs
225
-
226
-
227
-
228
-
229
-
230
-
231
-
232
-
233
-
234
-
235
-
236
-
237
-
238
- def normalize_prevalence(prevalences: np.ndarray, classes:list):
239
- """
240
- Normalize the prevalence of each class to sum to 1.
241
-
242
- Parameters
243
- ----------
244
- prevalences : np.ndarray
245
- Array of prevalences.
246
- classes : list
247
- List of unique classes.
248
-
249
- Returns
250
- -------
251
- dict
252
- Dictionary of class labels and their corresponding prevalence.
253
- """
254
- if isinstance(prevalences, dict):
255
- summ = sum(prevalences.values())
256
- prevalences = {int(_class):float(value/summ) for _class, value in prevalences.items()}
257
- return prevalences
258
-
259
- summ = np.sum(prevalences, axis=-1, keepdims=True)
260
- prevalences = np.true_divide(prevalences, sum(prevalences), where=summ>0)
261
- prevalences = {int(_class):float(prev) for _class, prev in zip(classes, prevalences)}
262
- prevalences = defaultdict(lambda: 0, prevalences)
263
-
264
- # Ensure all classes are present in the result
265
- for cls in classes:
266
- prevalences[cls] = prevalences[cls]
267
-
268
- return dict(prevalences)
269
-
270
-
271
-
272
-
273
-
274
-
275
-
276
- def parallel(func, elements, n_jobs: int = 1, *args):
277
- """
278
- Run a function in parallel on a list of elements.
279
-
280
- Parameters
281
- ----------
282
- func : function
283
- Function to run in parallel.
284
- elements : list
285
- List of elements to run the function on.
286
- n_jobs : int
287
- Number of jobs to run in parallel.
288
- args : tuple
289
- Additional arguments to pass to the function.
290
-
291
- Returns
292
- -------
293
- list
294
- List of results from running the function on each element.
295
- """
296
- return Parallel(n_jobs=n_jobs, backend="threading")(
297
- delayed(func)(e, *args) for e in elements
298
- )
299
-
300
-
301
-
302
-
303
-
304
-
305
-
306
-
307
-
308
-
309
- def round_protocol_df(dataframe: pd.DataFrame, frac: int = 3):
310
- """
311
- Round the columns of a protocol dataframe to a specified number of decimal places.
312
-
313
- Parameters
314
- ----------
315
- dataframe : pd.DataFrame
316
- Protocol dataframe to round.
317
- frac : int
318
- Number of decimal places to round to.
319
-
320
- Returns
321
- -------
322
- pd.DataFrame
323
- Protocol dataFrame with the columns rounded to the specified number of decimal places.
324
- """
325
- def round_column(col):
326
- if col.name in ['PRED_PREVS', 'REAL_PREVS']:
327
- return col.apply(lambda x: np.round(x, frac) if isinstance(x, (np.ndarray, float, int)) else x)
328
- elif np.issubdtype(col.dtype, np.number):
329
- return col.round(frac)
330
- else:
331
- return col
332
-
333
- return dataframe.apply(round_column)
334
-
335
-
336
-
337
-
338
- def get_measure(measure:str):
339
- """
340
- Get the measure from the evaluation module.
341
-
342
- Parameters
343
- ----------
344
- measure : str
345
- Measure to get.
346
-
347
- Returns
348
- -------
349
- Measure
350
- Measure function from the evaluation module.
351
- """
352
- from ..evaluation import MEASURES
353
- return MEASURES.get(measure)
354
-
355
-
356
- def get_method(method: str):
357
- """
358
- Get the method from the methods module.
359
-
360
- Parameters
361
- ----------
362
- method : str
363
- Method to get.
364
-
365
- Returns
366
- -------
367
- Method
368
- Method class from the methods module.
369
- """
370
- from ..methods import METHODS
371
- return METHODS.get(method)