mlquantify 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +0 -29
- mlquantify/adjust_counting/__init__.py +14 -0
- mlquantify/adjust_counting/_adjustment.py +365 -0
- mlquantify/adjust_counting/_base.py +247 -0
- mlquantify/adjust_counting/_counting.py +145 -0
- mlquantify/adjust_counting/_utils.py +114 -0
- mlquantify/base.py +117 -519
- mlquantify/base_aggregative.py +209 -0
- mlquantify/calibration.py +1 -0
- mlquantify/confidence.py +335 -0
- mlquantify/likelihood/__init__.py +5 -0
- mlquantify/likelihood/_base.py +161 -0
- mlquantify/likelihood/_classes.py +414 -0
- mlquantify/meta/__init__.py +1 -0
- mlquantify/meta/_classes.py +761 -0
- mlquantify/metrics/__init__.py +21 -0
- mlquantify/metrics/_oq.py +109 -0
- mlquantify/metrics/_rq.py +98 -0
- mlquantify/{evaluation/measures.py → metrics/_slq.py} +43 -28
- mlquantify/mixture/__init__.py +7 -0
- mlquantify/mixture/_base.py +153 -0
- mlquantify/mixture/_classes.py +400 -0
- mlquantify/mixture/_utils.py +112 -0
- mlquantify/model_selection/__init__.py +9 -0
- mlquantify/model_selection/_protocol.py +358 -0
- mlquantify/model_selection/_search.py +315 -0
- mlquantify/model_selection/_split.py +1 -0
- mlquantify/multiclass.py +350 -0
- mlquantify/neighbors/__init__.py +9 -0
- mlquantify/neighbors/_base.py +198 -0
- mlquantify/neighbors/_classes.py +159 -0
- mlquantify/{classification/methods.py → neighbors/_classification.py} +48 -66
- mlquantify/neighbors/_kde.py +270 -0
- mlquantify/neighbors/_utils.py +135 -0
- mlquantify/neural/__init__.py +1 -0
- mlquantify/utils/__init__.py +47 -2
- mlquantify/utils/_artificial.py +27 -0
- mlquantify/utils/_constraints.py +219 -0
- mlquantify/utils/_context.py +21 -0
- mlquantify/utils/_decorators.py +36 -0
- mlquantify/utils/_exceptions.py +12 -0
- mlquantify/utils/_get_scores.py +159 -0
- mlquantify/utils/_load.py +18 -0
- mlquantify/utils/_parallel.py +6 -0
- mlquantify/utils/_random.py +36 -0
- mlquantify/utils/_sampling.py +273 -0
- mlquantify/utils/_tags.py +44 -0
- mlquantify/utils/_validation.py +447 -0
- mlquantify/utils/prevalence.py +61 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/METADATA +2 -1
- mlquantify-0.1.9.dist-info/RECORD +53 -0
- mlquantify/classification/__init__.py +0 -1
- mlquantify/evaluation/__init__.py +0 -14
- mlquantify/evaluation/protocol.py +0 -291
- mlquantify/methods/__init__.py +0 -37
- mlquantify/methods/aggregative.py +0 -1159
- mlquantify/methods/meta.py +0 -472
- mlquantify/methods/mixture_models.py +0 -1003
- mlquantify/methods/non_aggregative.py +0 -136
- mlquantify/methods/threshold_optimization.py +0 -869
- mlquantify/model_selection.py +0 -377
- mlquantify/plots.py +0 -367
- mlquantify/utils/general.py +0 -371
- mlquantify/utils/method.py +0 -449
- mlquantify-0.1.7.dist-info/RECORD +0 -22
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.7.dist-info → mlquantify-0.1.9.dist-info}/top_level.txt +0 -0
mlquantify/utils/general.py
DELETED
|
@@ -1,371 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
from joblib import Parallel, delayed, load
|
|
4
|
-
from collections import defaultdict
|
|
5
|
-
import itertools
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def convert_columns_to_arrays(df, columns:list = ['PRED_PREVS', 'REAL_PREVS']):
|
|
9
|
-
"""
|
|
10
|
-
Converts specified columns in a DataFrame from strings of arrays to NumPy arrays.
|
|
11
|
-
|
|
12
|
-
Parameters
|
|
13
|
-
----------
|
|
14
|
-
df : pd.DataFrame
|
|
15
|
-
DataFrame to convert.
|
|
16
|
-
columns : list
|
|
17
|
-
List of columns to convert.
|
|
18
|
-
|
|
19
|
-
Returns
|
|
20
|
-
-------
|
|
21
|
-
pd.DataFrame
|
|
22
|
-
DataFrame with the specified columns converted to NumPy arrays.
|
|
23
|
-
"""
|
|
24
|
-
for col in columns:
|
|
25
|
-
df[col] = df[col].apply(lambda x: np.fromstring(x.strip('[]'), sep=' ') if isinstance(x, str) else x)
|
|
26
|
-
return df
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def get_indexes_with_prevalence(y, prevalence: list, sample_size:int):
|
|
30
|
-
"""
|
|
31
|
-
Get indexes for a stratified sample based on the prevalence of each class.
|
|
32
|
-
|
|
33
|
-
Parameters
|
|
34
|
-
----------
|
|
35
|
-
y : np.ndarray
|
|
36
|
-
Array of class labels.
|
|
37
|
-
prevalence : list
|
|
38
|
-
List of prevalences for each class.
|
|
39
|
-
sample_size : int
|
|
40
|
-
Number of samples to generate.
|
|
41
|
-
classes : list
|
|
42
|
-
List of unique classes.
|
|
43
|
-
|
|
44
|
-
Returns
|
|
45
|
-
-------
|
|
46
|
-
list
|
|
47
|
-
List of indexes for the stratified sample.
|
|
48
|
-
"""
|
|
49
|
-
classes = np.unique(y)
|
|
50
|
-
|
|
51
|
-
# Ensure the sum of prevalences is 1
|
|
52
|
-
assert np.isclose(sum(prevalence), 1), "The sum of prevalences must be 1"
|
|
53
|
-
# Ensure the number of prevalences matches the number of classes
|
|
54
|
-
assert len(prevalence) == len(classes), "The number of prevalences must match the number of classes"
|
|
55
|
-
|
|
56
|
-
sampled_indexes = []
|
|
57
|
-
total_sampled = 0
|
|
58
|
-
|
|
59
|
-
for i, class_ in enumerate(classes):
|
|
60
|
-
|
|
61
|
-
if i == len(classes) - 1:
|
|
62
|
-
num_samples = sample_size - total_sampled
|
|
63
|
-
else:
|
|
64
|
-
num_samples = int(sample_size * prevalence[i])
|
|
65
|
-
|
|
66
|
-
# Get the indexes of the current class
|
|
67
|
-
class_indexes = np.where(y == class_)[0]
|
|
68
|
-
|
|
69
|
-
# Sample the indexes for the current class
|
|
70
|
-
sampled_class_indexes = np.random.choice(class_indexes, size=num_samples, replace=True)
|
|
71
|
-
|
|
72
|
-
sampled_indexes.extend(sampled_class_indexes)
|
|
73
|
-
total_sampled += num_samples
|
|
74
|
-
|
|
75
|
-
np.random.shuffle(sampled_indexes) # Shuffle after collecting all indexes
|
|
76
|
-
|
|
77
|
-
return sampled_indexes
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def kraemer_sampling(n_dim: int, n_prev: int, n_iter: int = 1) -> np.ndarray:
|
|
82
|
-
"""
|
|
83
|
-
Uniform sampling from the unit simplex using Kraemer's algorithm.
|
|
84
|
-
|
|
85
|
-
Parameters
|
|
86
|
-
----------
|
|
87
|
-
n_dim : int
|
|
88
|
-
Number of dimensions.
|
|
89
|
-
n_prev : int
|
|
90
|
-
Size of the sample.
|
|
91
|
-
n_iter : int
|
|
92
|
-
Number of iterations.
|
|
93
|
-
|
|
94
|
-
Returns
|
|
95
|
-
-------
|
|
96
|
-
np.ndarray
|
|
97
|
-
Array of sampled prevalences.
|
|
98
|
-
"""
|
|
99
|
-
|
|
100
|
-
def _sampling(n_dim: int, n_prev: int) -> np.ndarray:
|
|
101
|
-
if n_dim == 2:
|
|
102
|
-
u = np.random.rand(n_prev)
|
|
103
|
-
return np.vstack([1 - u, u]).T
|
|
104
|
-
else:
|
|
105
|
-
u = np.random.rand(n_prev, n_dim - 1)
|
|
106
|
-
u.sort(axis=-1) # sort each row
|
|
107
|
-
_0s = np.zeros((n_prev, 1))
|
|
108
|
-
_1s = np.ones((n_prev, 1))
|
|
109
|
-
a = np.hstack([_0s, u])
|
|
110
|
-
b = np.hstack([u, _1s])
|
|
111
|
-
return b - a
|
|
112
|
-
|
|
113
|
-
# repeat n_iter times
|
|
114
|
-
prevs = _sampling(n_dim, n_prev)
|
|
115
|
-
|
|
116
|
-
return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
def generate_artificial_prevalences(n_dim: int, n_prev: int, n_iter: int) -> np.ndarray:
|
|
120
|
-
"""Generates n artificial prevalences with n dimensions.
|
|
121
|
-
|
|
122
|
-
Parameters
|
|
123
|
-
----------
|
|
124
|
-
n_dim : int
|
|
125
|
-
Number of dimensions.
|
|
126
|
-
n_prev : int
|
|
127
|
-
Number of prevalences to generate.
|
|
128
|
-
n_iter : int
|
|
129
|
-
Number of iterations.
|
|
130
|
-
|
|
131
|
-
Returns
|
|
132
|
-
-------
|
|
133
|
-
np.ndarray
|
|
134
|
-
Array of artificial prevalences.
|
|
135
|
-
|
|
136
|
-
"""
|
|
137
|
-
s = np.linspace(0., 1., n_prev, endpoint=True)
|
|
138
|
-
prevs = np.array([p + (1 - sum(p),) for p in itertools.product(*(s,) * (n_dim - 1)) if sum(p) <= 1])
|
|
139
|
-
|
|
140
|
-
return np.repeat(prevs, n_iter, axis=0) if n_iter > 1 else prevs
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def get_real_prev(y) -> dict:
|
|
150
|
-
"""
|
|
151
|
-
Get the real prevalence of each class in the target array.
|
|
152
|
-
|
|
153
|
-
Parameters
|
|
154
|
-
----------
|
|
155
|
-
y : np.ndarray or pd.Series
|
|
156
|
-
Array of class labels.
|
|
157
|
-
|
|
158
|
-
Returns
|
|
159
|
-
-------
|
|
160
|
-
dict
|
|
161
|
-
Dictionary of class labels and their corresponding prevalence.
|
|
162
|
-
"""
|
|
163
|
-
if isinstance(y, np.ndarray):
|
|
164
|
-
y = pd.Series(y)
|
|
165
|
-
real_prevs = y.value_counts(normalize=True).to_dict()
|
|
166
|
-
real_prevs = dict(sorted(real_prevs.items()))
|
|
167
|
-
return real_prevs
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
def load_quantifier(path:str):
|
|
178
|
-
"""
|
|
179
|
-
Load a quantifier from a file.
|
|
180
|
-
|
|
181
|
-
Parameters
|
|
182
|
-
----------
|
|
183
|
-
path : str
|
|
184
|
-
Path to the file containing the quantifier.
|
|
185
|
-
|
|
186
|
-
Returns
|
|
187
|
-
-------
|
|
188
|
-
Quantifier
|
|
189
|
-
Loaded quantifier.
|
|
190
|
-
"""
|
|
191
|
-
return load(path)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def make_prevs(ndim:int) -> list:
|
|
202
|
-
"""
|
|
203
|
-
Generate a list of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
|
|
204
|
-
|
|
205
|
-
Parameters
|
|
206
|
-
----------
|
|
207
|
-
ndim : int
|
|
208
|
-
Number of dimensions.
|
|
209
|
-
|
|
210
|
-
Returns
|
|
211
|
-
-------
|
|
212
|
-
list
|
|
213
|
-
List of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
|
|
214
|
-
"""
|
|
215
|
-
# Generate n_dim-1 random u_dist uniformly distributed between 0 and 1
|
|
216
|
-
u_dist = np.random.uniform(0, 1, ndim - 1)
|
|
217
|
-
# Add 0 and 1 to the u_dist
|
|
218
|
-
u_dist = np.append(u_dist, [0, 1])
|
|
219
|
-
# Sort the u_dist
|
|
220
|
-
u_dist.sort()
|
|
221
|
-
# Calculate the differences between consecutive u_dist
|
|
222
|
-
prevs = np.diff(u_dist)
|
|
223
|
-
|
|
224
|
-
return prevs
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def normalize_prevalence(prevalences: np.ndarray, classes:list):
|
|
239
|
-
"""
|
|
240
|
-
Normalize the prevalence of each class to sum to 1.
|
|
241
|
-
|
|
242
|
-
Parameters
|
|
243
|
-
----------
|
|
244
|
-
prevalences : np.ndarray
|
|
245
|
-
Array of prevalences.
|
|
246
|
-
classes : list
|
|
247
|
-
List of unique classes.
|
|
248
|
-
|
|
249
|
-
Returns
|
|
250
|
-
-------
|
|
251
|
-
dict
|
|
252
|
-
Dictionary of class labels and their corresponding prevalence.
|
|
253
|
-
"""
|
|
254
|
-
if isinstance(prevalences, dict):
|
|
255
|
-
summ = sum(prevalences.values())
|
|
256
|
-
prevalences = {int(_class):float(value/summ) for _class, value in prevalences.items()}
|
|
257
|
-
return prevalences
|
|
258
|
-
|
|
259
|
-
summ = np.sum(prevalences, axis=-1, keepdims=True)
|
|
260
|
-
prevalences = np.true_divide(prevalences, sum(prevalences), where=summ>0)
|
|
261
|
-
prevalences = {int(_class):float(prev) for _class, prev in zip(classes, prevalences)}
|
|
262
|
-
prevalences = defaultdict(lambda: 0, prevalences)
|
|
263
|
-
|
|
264
|
-
# Ensure all classes are present in the result
|
|
265
|
-
for cls in classes:
|
|
266
|
-
prevalences[cls] = prevalences[cls]
|
|
267
|
-
|
|
268
|
-
return dict(prevalences)
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def parallel(func, elements, n_jobs: int = 1, *args):
|
|
277
|
-
"""
|
|
278
|
-
Run a function in parallel on a list of elements.
|
|
279
|
-
|
|
280
|
-
Parameters
|
|
281
|
-
----------
|
|
282
|
-
func : function
|
|
283
|
-
Function to run in parallel.
|
|
284
|
-
elements : list
|
|
285
|
-
List of elements to run the function on.
|
|
286
|
-
n_jobs : int
|
|
287
|
-
Number of jobs to run in parallel.
|
|
288
|
-
args : tuple
|
|
289
|
-
Additional arguments to pass to the function.
|
|
290
|
-
|
|
291
|
-
Returns
|
|
292
|
-
-------
|
|
293
|
-
list
|
|
294
|
-
List of results from running the function on each element.
|
|
295
|
-
"""
|
|
296
|
-
return Parallel(n_jobs=n_jobs, backend="threading")(
|
|
297
|
-
delayed(func)(e, *args) for e in elements
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def round_protocol_df(dataframe: pd.DataFrame, frac: int = 3):
|
|
310
|
-
"""
|
|
311
|
-
Round the columns of a protocol dataframe to a specified number of decimal places.
|
|
312
|
-
|
|
313
|
-
Parameters
|
|
314
|
-
----------
|
|
315
|
-
dataframe : pd.DataFrame
|
|
316
|
-
Protocol dataframe to round.
|
|
317
|
-
frac : int
|
|
318
|
-
Number of decimal places to round to.
|
|
319
|
-
|
|
320
|
-
Returns
|
|
321
|
-
-------
|
|
322
|
-
pd.DataFrame
|
|
323
|
-
Protocol dataFrame with the columns rounded to the specified number of decimal places.
|
|
324
|
-
"""
|
|
325
|
-
def round_column(col):
|
|
326
|
-
if col.name in ['PRED_PREVS', 'REAL_PREVS']:
|
|
327
|
-
return col.apply(lambda x: np.round(x, frac) if isinstance(x, (np.ndarray, float, int)) else x)
|
|
328
|
-
elif np.issubdtype(col.dtype, np.number):
|
|
329
|
-
return col.round(frac)
|
|
330
|
-
else:
|
|
331
|
-
return col
|
|
332
|
-
|
|
333
|
-
return dataframe.apply(round_column)
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
def get_measure(measure:str):
|
|
339
|
-
"""
|
|
340
|
-
Get the measure from the evaluation module.
|
|
341
|
-
|
|
342
|
-
Parameters
|
|
343
|
-
----------
|
|
344
|
-
measure : str
|
|
345
|
-
Measure to get.
|
|
346
|
-
|
|
347
|
-
Returns
|
|
348
|
-
-------
|
|
349
|
-
Measure
|
|
350
|
-
Measure function from the evaluation module.
|
|
351
|
-
"""
|
|
352
|
-
from ..evaluation import MEASURES
|
|
353
|
-
return MEASURES.get(measure)
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def get_method(method: str):
|
|
357
|
-
"""
|
|
358
|
-
Get the method from the methods module.
|
|
359
|
-
|
|
360
|
-
Parameters
|
|
361
|
-
----------
|
|
362
|
-
method : str
|
|
363
|
-
Method to get.
|
|
364
|
-
|
|
365
|
-
Returns
|
|
366
|
-
-------
|
|
367
|
-
Method
|
|
368
|
-
Method class from the methods module.
|
|
369
|
-
"""
|
|
370
|
-
from ..methods import METHODS
|
|
371
|
-
return METHODS.get(method)
|