mlquantify 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +10 -29
- mlquantify/adjust_counting/__init__.py +24 -0
- mlquantify/adjust_counting/_adjustment.py +648 -0
- mlquantify/adjust_counting/_base.py +245 -0
- mlquantify/adjust_counting/_counting.py +153 -0
- mlquantify/adjust_counting/_utils.py +109 -0
- mlquantify/base.py +117 -519
- mlquantify/base_aggregative.py +209 -0
- mlquantify/calibration.py +1 -0
- mlquantify/confidence.py +329 -0
- mlquantify/likelihood/__init__.py +5 -0
- mlquantify/likelihood/_base.py +147 -0
- mlquantify/likelihood/_classes.py +430 -0
- mlquantify/meta/__init__.py +1 -0
- mlquantify/meta/_classes.py +785 -0
- mlquantify/metrics/__init__.py +21 -0
- mlquantify/metrics/_oq.py +109 -0
- mlquantify/metrics/_rq.py +98 -0
- mlquantify/{evaluation/measures.py → metrics/_slq.py} +51 -36
- mlquantify/mixture/__init__.py +7 -0
- mlquantify/mixture/_base.py +147 -0
- mlquantify/mixture/_classes.py +458 -0
- mlquantify/mixture/_utils.py +163 -0
- mlquantify/model_selection/__init__.py +9 -0
- mlquantify/model_selection/_protocol.py +358 -0
- mlquantify/model_selection/_search.py +315 -0
- mlquantify/model_selection/_split.py +1 -0
- mlquantify/multiclass.py +350 -0
- mlquantify/neighbors/__init__.py +9 -0
- mlquantify/neighbors/_base.py +168 -0
- mlquantify/neighbors/_classes.py +150 -0
- mlquantify/{classification/methods.py → neighbors/_classification.py} +37 -62
- mlquantify/neighbors/_kde.py +268 -0
- mlquantify/neighbors/_utils.py +131 -0
- mlquantify/neural/__init__.py +1 -0
- mlquantify/utils/__init__.py +47 -2
- mlquantify/utils/_artificial.py +27 -0
- mlquantify/utils/_constraints.py +219 -0
- mlquantify/utils/_context.py +21 -0
- mlquantify/utils/_decorators.py +36 -0
- mlquantify/utils/_exceptions.py +12 -0
- mlquantify/utils/_get_scores.py +159 -0
- mlquantify/utils/_load.py +18 -0
- mlquantify/utils/_parallel.py +6 -0
- mlquantify/utils/_random.py +36 -0
- mlquantify/utils/_sampling.py +273 -0
- mlquantify/utils/_tags.py +44 -0
- mlquantify/utils/_validation.py +447 -0
- mlquantify/utils/prevalence.py +64 -0
- {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/METADATA +2 -1
- mlquantify-0.1.10.dist-info/RECORD +53 -0
- mlquantify/classification/__init__.py +0 -1
- mlquantify/evaluation/__init__.py +0 -14
- mlquantify/evaluation/protocol.py +0 -289
- mlquantify/methods/__init__.py +0 -37
- mlquantify/methods/aggregative.py +0 -1159
- mlquantify/methods/meta.py +0 -472
- mlquantify/methods/mixture_models.py +0 -1003
- mlquantify/methods/non_aggregative.py +0 -136
- mlquantify/methods/threshold_optimization.py +0 -869
- mlquantify/model_selection.py +0 -377
- mlquantify/plots.py +0 -367
- mlquantify/utils/general.py +0 -371
- mlquantify/utils/method.py +0 -449
- mlquantify-0.1.8.dist-info/RECORD +0 -22
- {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/WHEEL +0 -0
- {mlquantify-0.1.8.dist-info → mlquantify-0.1.10.dist-info}/top_level.txt +0 -0
mlquantify/plots.py
DELETED
|
@@ -1,367 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import matplotlib.pyplot as plt
|
|
3
|
-
import matplotlib.colors as mcolors
|
|
4
|
-
import matplotlib.patches as mpatches
|
|
5
|
-
import pandas as pd
|
|
6
|
-
from typing import List, Optional, Dict, Any, Union
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
plt.rcParams.update({
|
|
12
|
-
'lines.markersize': 6,
|
|
13
|
-
'axes.facecolor': "#F8F8F8",
|
|
14
|
-
'figure.facecolor': "#F8F8F8",
|
|
15
|
-
'font.family': 'sans-serif',
|
|
16
|
-
'font.sans-serif': 'Arial',
|
|
17
|
-
'font.size': 12,
|
|
18
|
-
'font.weight': 'light',
|
|
19
|
-
'axes.labelsize': 14,
|
|
20
|
-
'axes.labelweight': 'light',
|
|
21
|
-
'axes.titlesize': 16,
|
|
22
|
-
'axes.titleweight': 'normal',
|
|
23
|
-
'boxplot.boxprops.linewidth': 0.3,
|
|
24
|
-
'boxplot.whiskerprops.linewidth': 0.3,
|
|
25
|
-
'boxplot.capprops.linewidth': 0.3,
|
|
26
|
-
'boxplot.medianprops.linewidth': 0.6,
|
|
27
|
-
'boxplot.flierprops.linewidth': 0.3,
|
|
28
|
-
'boxplot.flierprops.markersize': 0.9,
|
|
29
|
-
'boxplot.medianprops.color': 'black',
|
|
30
|
-
'figure.subplot.bottom': 0.2,
|
|
31
|
-
'axes.grid': True,
|
|
32
|
-
'grid.color': 'black',
|
|
33
|
-
'grid.alpha': 0.1,
|
|
34
|
-
'grid.linewidth': 0.5,
|
|
35
|
-
'grid.linestyle': '--'
|
|
36
|
-
})
|
|
37
|
-
|
|
38
|
-
# Colors and markers
|
|
39
|
-
COLORS = [
|
|
40
|
-
'#FFAB91', '#FFE082', '#A5D6A7', '#4DD0E1', '#FF6F61', '#FF8C94', '#D4A5A5',
|
|
41
|
-
'#FF677D', '#B9FBC0', '#C2C2F0', '#E3F9A6', '#E2A8F7', '#F7B7A3', '#F7C6C7',
|
|
42
|
-
'#8D9BFC', '#B4E6FF', '#FF8A65', '#FFC3A0', '#FFCCBC', '#F8BBD0', '#FF9AA2',
|
|
43
|
-
'#FFB3B3', '#FFDDC1', '#FFE0B2', '#E2A8F7', '#F7C6C7', '#E57373', '#BA68C8',
|
|
44
|
-
'#4FC3F7', '#FFB3B3', '#FF6F61'
|
|
45
|
-
]
|
|
46
|
-
|
|
47
|
-
MARKERS = ["o", "s", "^", "D", "p", "*", "+", "x", "H", "1", "2", "3", "4", "|", "_"]
|
|
48
|
-
|
|
49
|
-
def adjust_color_saturation(color: str, saturation_factor: float = 5) -> str:
|
|
50
|
-
"""
|
|
51
|
-
Adjusts the saturation of a given color.
|
|
52
|
-
|
|
53
|
-
Parameters
|
|
54
|
-
----------
|
|
55
|
-
color : str
|
|
56
|
-
Color in hexadecimal format.
|
|
57
|
-
saturation_factor : float, optional
|
|
58
|
-
Factor to adjust the saturation. Default is 5.
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
str
|
|
63
|
-
Color in hexadecimal format with adjusted saturation.
|
|
64
|
-
"""
|
|
65
|
-
# Convert color to HSV (Hue, Saturation, Value)
|
|
66
|
-
h, s, v = mcolors.rgb_to_hsv(mcolors.to_rgb(color))
|
|
67
|
-
|
|
68
|
-
# Adjust saturation
|
|
69
|
-
s = min(1, s * saturation_factor)
|
|
70
|
-
|
|
71
|
-
# Convert back to RGB and then to hex
|
|
72
|
-
return mcolors.to_hex(mcolors.hsv_to_rgb((h, s, v)))
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def protocol_boxplot(
|
|
77
|
-
table_protocol: pd.DataFrame,
|
|
78
|
-
x: str,
|
|
79
|
-
y: str,
|
|
80
|
-
methods: Optional[List[str]] = None,
|
|
81
|
-
title: Optional[str] = None,
|
|
82
|
-
legend: bool = True,
|
|
83
|
-
save_path: Optional[str] = None,
|
|
84
|
-
order: Optional[str] = None,
|
|
85
|
-
plot_params: Optional[Dict[str, Any]] = None):
|
|
86
|
-
"""
|
|
87
|
-
Plots a boxplot based on the provided DataFrame and selected methods.
|
|
88
|
-
|
|
89
|
-
Parameters
|
|
90
|
-
----------
|
|
91
|
-
table_protocol : pd.DataFrame
|
|
92
|
-
DataFrame containing the protocol results.
|
|
93
|
-
x : str
|
|
94
|
-
Column name to use as the x-axis.
|
|
95
|
-
y : str
|
|
96
|
-
Column name to use as the y-axis.
|
|
97
|
-
methods : List[str], optional
|
|
98
|
-
List of quantifiers to plot. If not provided, all quantifiers will be plotted.
|
|
99
|
-
title : str, optional
|
|
100
|
-
Title of the plot. Default is None.
|
|
101
|
-
legend : bool, optional
|
|
102
|
-
Whether to display a legend. Default is True.
|
|
103
|
-
save_path : str, optional
|
|
104
|
-
File path to save the plot image. If not provided, the plot will not be saved.
|
|
105
|
-
order : str, optional
|
|
106
|
-
Order to plot the methods. If 'rank', methods will be ordered by median value.
|
|
107
|
-
plot_params : Dict[str, Any], optional
|
|
108
|
-
Dictionary of custom plotting parameters to apply. Default is None
|
|
109
|
-
"""
|
|
110
|
-
# Handle plot_params
|
|
111
|
-
plot_params = plot_params or {}
|
|
112
|
-
figsize = plot_params.pop('figsize', (10, 6)) # Default figsize if not provided
|
|
113
|
-
|
|
114
|
-
# Prepare data
|
|
115
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
116
|
-
methods = methods or table['QUANTIFIER'].unique()
|
|
117
|
-
table = table[table['QUANTIFIER'].isin(methods)]
|
|
118
|
-
|
|
119
|
-
# Order methods by ranking if specified
|
|
120
|
-
if order == 'rank':
|
|
121
|
-
methods = table.groupby('QUANTIFIER')[y].median().sort_values().index.tolist()
|
|
122
|
-
|
|
123
|
-
# Create plot with custom figsize
|
|
124
|
-
fig, ax = plt.subplots(figsize=figsize)
|
|
125
|
-
ax.grid(False)
|
|
126
|
-
|
|
127
|
-
box = ax.boxplot([table[table['QUANTIFIER'] == method][y] for method in methods],
|
|
128
|
-
patch_artist=True, widths=0.8, labels=methods, **plot_params)
|
|
129
|
-
|
|
130
|
-
# Apply colors
|
|
131
|
-
for patch, color in zip(box['boxes'], COLORS[:len(methods)]):
|
|
132
|
-
patch.set_facecolor(color)
|
|
133
|
-
|
|
134
|
-
# Add legend
|
|
135
|
-
if legend:
|
|
136
|
-
handles = [mpatches.Patch(color=COLORS[i], label=method) for i, method in enumerate(methods)]
|
|
137
|
-
ax.legend(handles=handles, title="Quantifiers", loc='upper left', bbox_to_anchor=(1, 1), fontsize=10, title_fontsize='11')
|
|
138
|
-
|
|
139
|
-
# Customize plot
|
|
140
|
-
ax.set_xticklabels(methods, rotation=45, fontstyle='italic')
|
|
141
|
-
ax.set_xlabel(x.capitalize())
|
|
142
|
-
ax.set_ylabel(f"{y.capitalize()}")
|
|
143
|
-
if title:
|
|
144
|
-
ax.set_title(title)
|
|
145
|
-
|
|
146
|
-
# Adjust layout and save plot
|
|
147
|
-
plt.tight_layout(rect=[0, 0, 0.9, 1])
|
|
148
|
-
if save_path:
|
|
149
|
-
plt.savefig(save_path, bbox_inches='tight')
|
|
150
|
-
plt.show()
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def protocol_lineplot(
|
|
159
|
-
table_protocol: pd.DataFrame,
|
|
160
|
-
methods: Union[List[str], str, None],
|
|
161
|
-
x: str,
|
|
162
|
-
y: str,
|
|
163
|
-
title: Optional[str] = None,
|
|
164
|
-
legend: bool = True,
|
|
165
|
-
save_path: Optional[str] = None,
|
|
166
|
-
group_by: str = "mean",
|
|
167
|
-
pos_alpha: int = 1,
|
|
168
|
-
plot_params: Optional[Dict[str, Any]] = None):
|
|
169
|
-
"""
|
|
170
|
-
Plots a line plot based on the provided DataFrame of the protocol and selected methods.
|
|
171
|
-
|
|
172
|
-
Parameters
|
|
173
|
-
----------
|
|
174
|
-
table_protocol : pd.DataFrame
|
|
175
|
-
DataFrame containing the protocol results.
|
|
176
|
-
methods : Union[List[str], str, None]
|
|
177
|
-
List of quantifiers to plot. If not provided, all quantifiers will be plotted.
|
|
178
|
-
x : str
|
|
179
|
-
Column name to use as the x-axis.
|
|
180
|
-
- If 'ALPHA', the real prevalence of the positive class will be used.
|
|
181
|
-
- You can also use any other column name, as long as the x has the same name.
|
|
182
|
-
y : str
|
|
183
|
-
Column name to use as the y-axis.
|
|
184
|
-
title : str, optional
|
|
185
|
-
Title of the plot. Default is None.
|
|
186
|
-
legend : bool, optional
|
|
187
|
-
Whether to display a legend. Default is True.
|
|
188
|
-
save_path : str, optional
|
|
189
|
-
File path to save the plot image. If not provided, the plot will not be saved.
|
|
190
|
-
group_by : str, optional
|
|
191
|
-
Column to group the data. Default is 'mean'.
|
|
192
|
-
pos_alpha : int, optional
|
|
193
|
-
Position of the positive class in the 'PREVS' column, this attribute only works for binary problems. Default is 1.
|
|
194
|
-
plot_params : Dict[str, Any], optional
|
|
195
|
-
Dictionary of custom plotting parameters to apply. Default is None.
|
|
196
|
-
"""
|
|
197
|
-
# Handle plot_params
|
|
198
|
-
plot_params = plot_params or {}
|
|
199
|
-
figsize = plot_params.pop('figsize', (10, 6)) # Default figsize if not provided
|
|
200
|
-
|
|
201
|
-
# Filter data
|
|
202
|
-
methods = methods or table_protocol['QUANTIFIER'].unique()
|
|
203
|
-
table_protocol = table_protocol[table_protocol['QUANTIFIER'].isin(methods)]
|
|
204
|
-
|
|
205
|
-
if x == "ALPHA":
|
|
206
|
-
real = table_protocol["REAL_PREVS"].apply(lambda x: x[pos_alpha])
|
|
207
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
208
|
-
table["ALPHA"] = real
|
|
209
|
-
else:
|
|
210
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
211
|
-
|
|
212
|
-
# Aggregate data
|
|
213
|
-
if group_by:
|
|
214
|
-
table = table.groupby(['QUANTIFIER', x])[y].agg(group_by).reset_index()
|
|
215
|
-
|
|
216
|
-
# Create plot with custom figsize
|
|
217
|
-
fig, ax = plt.subplots(figsize=figsize)
|
|
218
|
-
for i, (method, marker) in enumerate(zip(methods, MARKERS[:len(methods)+1])):
|
|
219
|
-
method_data = table[table['QUANTIFIER'] == method]
|
|
220
|
-
y_data = real if y == "ALPHA" else method_data[y]
|
|
221
|
-
color = adjust_color_saturation(COLORS[i % len(COLORS)]) # Aumenta a saturação das cores
|
|
222
|
-
ax.plot(method_data[x],
|
|
223
|
-
y_data, color=color,
|
|
224
|
-
marker=marker,
|
|
225
|
-
label=method,
|
|
226
|
-
alpha=1.0,
|
|
227
|
-
**plot_params)
|
|
228
|
-
|
|
229
|
-
# Add legend
|
|
230
|
-
if legend:
|
|
231
|
-
ax.legend(title="Quantifiers", loc='upper left', bbox_to_anchor=(1, 1), fontsize=10, title_fontsize='11')
|
|
232
|
-
|
|
233
|
-
# Customize plot
|
|
234
|
-
ax.set_xlabel(x.capitalize())
|
|
235
|
-
ax.set_ylabel(y.capitalize())
|
|
236
|
-
if title:
|
|
237
|
-
ax.set_title(title)
|
|
238
|
-
|
|
239
|
-
# Adjust layout and save plot
|
|
240
|
-
plt.tight_layout(rect=[0, 0, 0.9, 1])
|
|
241
|
-
if save_path:
|
|
242
|
-
plt.savefig(save_path, bbox_inches='tight')
|
|
243
|
-
plt.show()
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
def class_distribution_plot(values: Union[List, np.ndarray],
|
|
259
|
-
labels: Union[List, np.ndarray],
|
|
260
|
-
bins: int = 30,
|
|
261
|
-
title: Optional[str] = None,
|
|
262
|
-
legend: bool = True,
|
|
263
|
-
save_path: Optional[str] = None,
|
|
264
|
-
plot_params: Optional[Dict[str, Any]] = None):
|
|
265
|
-
"""Plot overlaid histograms of class distributions.
|
|
266
|
-
|
|
267
|
-
This function creates a plot with overlaid histograms, each representing the distribution
|
|
268
|
-
of a different class or category. Custom colors, titles, legends, and other plot parameters
|
|
269
|
-
can be applied to enhance visualization.
|
|
270
|
-
|
|
271
|
-
Parameters
|
|
272
|
-
----------
|
|
273
|
-
values : Union[List, np.ndarray]
|
|
274
|
-
List or array of values to plot.
|
|
275
|
-
labels : Union[List, np.ndarray]
|
|
276
|
-
List or array of labels corresponding to the values.
|
|
277
|
-
bins : int, optional
|
|
278
|
-
Number of bins to use for the histogram. Default is 30.
|
|
279
|
-
title : str, optional
|
|
280
|
-
Title of the plot. Default is None.
|
|
281
|
-
legend : bool, optional
|
|
282
|
-
Whether to display a legend. Default is True.
|
|
283
|
-
save_path : str, optional
|
|
284
|
-
File path to save the plot image. If not provided, the plot will not be saved.
|
|
285
|
-
plot_params : Dict[str, Any], optional
|
|
286
|
-
Dictionary of custom plotting parameters to apply. Default is None.
|
|
287
|
-
|
|
288
|
-
Raises
|
|
289
|
-
------
|
|
290
|
-
AssertionError
|
|
291
|
-
If the number of value sets does not match the number of labels.
|
|
292
|
-
"""
|
|
293
|
-
|
|
294
|
-
# Ensure the number of labels matches the number of value sets
|
|
295
|
-
assert len(values) == len(labels), "The number of value sets must match the number of labels."
|
|
296
|
-
|
|
297
|
-
if isinstance(values, list):
|
|
298
|
-
values = np.asarray(values)
|
|
299
|
-
if isinstance(labels, list):
|
|
300
|
-
labels = np.asarray(labels)
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
# Apply custom plotting parameters if provided
|
|
304
|
-
if plot_params:
|
|
305
|
-
plt.rcParams.update(plot_params)
|
|
306
|
-
|
|
307
|
-
if values.shape[1] > 1:
|
|
308
|
-
num_plots = values.shape[1] # Number of columns in `values`
|
|
309
|
-
cols = int(np.ceil(np.sqrt(num_plots)))
|
|
310
|
-
rows = int(np.ceil(num_plots / cols))
|
|
311
|
-
|
|
312
|
-
fig, axs = plt.subplots(rows, cols, figsize=(cols * 5, rows * 4))
|
|
313
|
-
axs = axs.flatten()
|
|
314
|
-
|
|
315
|
-
# Create the overlaid histogram
|
|
316
|
-
for i, label in enumerate(np.unique(labels)):
|
|
317
|
-
if values.shape[1] > 1:
|
|
318
|
-
for j, lab in enumerate(np.unique(labels)):
|
|
319
|
-
value_set = values[:, j][label == labels]
|
|
320
|
-
axs[i].hist(value_set, bins=bins, color=COLORS[j % len(COLORS)], edgecolor='black', alpha=0.5, label=lab)
|
|
321
|
-
axs[i].set_xlim([0, 1]) # Fix x-axis range between 0 and 1
|
|
322
|
-
else:
|
|
323
|
-
value_set = values[label == labels]
|
|
324
|
-
plt.hist(value_set, bins=bins, color=COLORS[i % len(COLORS)], edgecolor='black', alpha=0.5, label=label)
|
|
325
|
-
plt.xlim([0, 1]) # Fix x-axis range between 0 and 1
|
|
326
|
-
|
|
327
|
-
if values.shape[1] > 1:
|
|
328
|
-
for i in range(i + 1, len(axs)):
|
|
329
|
-
fig.delaxes(axs[i])
|
|
330
|
-
|
|
331
|
-
# Add title to the plot if provided
|
|
332
|
-
if title:
|
|
333
|
-
if values.shape[1] > 1:
|
|
334
|
-
for i in range(values.shape[1]):
|
|
335
|
-
axs[i].set_title(f'{title} for class {i+1}')
|
|
336
|
-
else:
|
|
337
|
-
plt.title(title)
|
|
338
|
-
|
|
339
|
-
# Add legend to the plot if enabled
|
|
340
|
-
if legend:
|
|
341
|
-
if values.shape[1] > 1:
|
|
342
|
-
for i in range(values.shape[1]):
|
|
343
|
-
axs[i].legend(loc='upper right')
|
|
344
|
-
else:
|
|
345
|
-
plt.legend(loc='upper right')
|
|
346
|
-
|
|
347
|
-
# Set axis labels
|
|
348
|
-
if values.shape[1] > 1:
|
|
349
|
-
for i in range(values.shape[1]):
|
|
350
|
-
axs[i].set_xlabel('Values')
|
|
351
|
-
axs[i].set_ylabel('Frequency')
|
|
352
|
-
else:
|
|
353
|
-
plt.xlabel('Values')
|
|
354
|
-
plt.ylabel('Frequency')
|
|
355
|
-
|
|
356
|
-
# Adjust layout to prevent overlapping
|
|
357
|
-
plt.subplots_adjust(hspace=0.9, wspace=0.4)
|
|
358
|
-
plt.tight_layout()
|
|
359
|
-
|
|
360
|
-
# Save the figure if a path is specified
|
|
361
|
-
if save_path:
|
|
362
|
-
plt.savefig(save_path, bbox_inches='tight')
|
|
363
|
-
|
|
364
|
-
# Show the plot
|
|
365
|
-
plt.show()
|
|
366
|
-
|
|
367
|
-
|