mlquantify 0.0.11.2__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlquantify/__init__.py +32 -6
- mlquantify/base.py +559 -257
- mlquantify/classification/__init__.py +1 -1
- mlquantify/classification/methods.py +160 -0
- mlquantify/evaluation/__init__.py +14 -2
- mlquantify/evaluation/measures.py +215 -0
- mlquantify/evaluation/protocol.py +647 -0
- mlquantify/methods/__init__.py +37 -40
- mlquantify/methods/aggregative.py +1030 -0
- mlquantify/methods/meta.py +472 -0
- mlquantify/methods/mixture_models.py +1003 -0
- mlquantify/methods/non_aggregative.py +136 -0
- mlquantify/methods/threshold_optimization.py +957 -0
- mlquantify/model_selection.py +377 -232
- mlquantify/plots.py +367 -0
- mlquantify/utils/__init__.py +2 -2
- mlquantify/utils/general.py +334 -0
- mlquantify/utils/method.py +449 -0
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/METADATA +137 -122
- mlquantify-0.1.1.dist-info/RECORD +22 -0
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/WHEEL +1 -1
- mlquantify/classification/pwkclf.py +0 -73
- mlquantify/evaluation/measures/__init__.py +0 -26
- mlquantify/evaluation/measures/ae.py +0 -11
- mlquantify/evaluation/measures/bias.py +0 -16
- mlquantify/evaluation/measures/kld.py +0 -8
- mlquantify/evaluation/measures/mse.py +0 -12
- mlquantify/evaluation/measures/nae.py +0 -16
- mlquantify/evaluation/measures/nkld.py +0 -13
- mlquantify/evaluation/measures/nrae.py +0 -16
- mlquantify/evaluation/measures/rae.py +0 -12
- mlquantify/evaluation/measures/se.py +0 -12
- mlquantify/evaluation/protocol/_Protocol.py +0 -202
- mlquantify/evaluation/protocol/__init__.py +0 -2
- mlquantify/evaluation/protocol/app.py +0 -146
- mlquantify/evaluation/protocol/npp.py +0 -34
- mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -62
- mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -7
- mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -27
- mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -23
- mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -21
- mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -25
- mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -41
- mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -21
- mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -23
- mlquantify/methods/aggregative/__init__.py +0 -9
- mlquantify/methods/aggregative/cc.py +0 -32
- mlquantify/methods/aggregative/emq.py +0 -86
- mlquantify/methods/aggregative/fm.py +0 -72
- mlquantify/methods/aggregative/gac.py +0 -96
- mlquantify/methods/aggregative/gpac.py +0 -87
- mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -81
- mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -5
- mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
- mlquantify/methods/aggregative/mixtureModels/dys_syn.py +0 -89
- mlquantify/methods/aggregative/mixtureModels/hdy.py +0 -46
- mlquantify/methods/aggregative/mixtureModels/smm.py +0 -27
- mlquantify/methods/aggregative/mixtureModels/sord.py +0 -77
- mlquantify/methods/aggregative/pcc.py +0 -33
- mlquantify/methods/aggregative/pwk.py +0 -38
- mlquantify/methods/meta/__init__.py +0 -1
- mlquantify/methods/meta/ensemble.py +0 -236
- mlquantify/methods/non_aggregative/__init__.py +0 -1
- mlquantify/methods/non_aggregative/hdx.py +0 -71
- mlquantify/plots/__init__.py +0 -2
- mlquantify/plots/distribution_plot.py +0 -109
- mlquantify/plots/protocol_plot.py +0 -193
- mlquantify/utils/general_purposes/__init__.py +0 -8
- mlquantify/utils/general_purposes/convert_col_to_array.py +0 -13
- mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -29
- mlquantify/utils/general_purposes/get_real_prev.py +0 -9
- mlquantify/utils/general_purposes/load_quantifier.py +0 -4
- mlquantify/utils/general_purposes/make_prevs.py +0 -23
- mlquantify/utils/general_purposes/normalize.py +0 -20
- mlquantify/utils/general_purposes/parallel.py +0 -10
- mlquantify/utils/general_purposes/round_protocol_df.py +0 -14
- mlquantify/utils/method_purposes/__init__.py +0 -6
- mlquantify/utils/method_purposes/distances.py +0 -21
- mlquantify/utils/method_purposes/getHist.py +0 -13
- mlquantify/utils/method_purposes/get_scores.py +0 -33
- mlquantify/utils/method_purposes/moss.py +0 -16
- mlquantify/utils/method_purposes/ternary_search.py +0 -14
- mlquantify/utils/method_purposes/tprfpr.py +0 -42
- mlquantify-0.0.11.2.dist-info/RECORD +0 -73
- {mlquantify-0.0.11.2.dist-info → mlquantify-0.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
import matplotlib.pyplot as plt
|
|
2
|
-
import matplotlib.colors as mcolors
|
|
3
|
-
import matplotlib.patches as mpatches
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from typing import List, Optional, Dict, Any, Union
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
plt.rcParams.update({
|
|
9
|
-
'lines.markersize': 6,
|
|
10
|
-
'axes.facecolor': "#F8F8F8",
|
|
11
|
-
'figure.facecolor': "#F8F8F8",
|
|
12
|
-
'font.family': 'sans-serif',
|
|
13
|
-
'font.sans-serif': 'Arial',
|
|
14
|
-
'font.size': 12,
|
|
15
|
-
'font.weight': 'light',
|
|
16
|
-
'axes.labelsize': 14,
|
|
17
|
-
'axes.labelweight': 'light',
|
|
18
|
-
'axes.titlesize': 16,
|
|
19
|
-
'axes.titleweight': 'normal',
|
|
20
|
-
'boxplot.boxprops.linewidth': 0.3,
|
|
21
|
-
'boxplot.whiskerprops.linewidth': 0.3,
|
|
22
|
-
'boxplot.capprops.linewidth': 0.3,
|
|
23
|
-
'boxplot.medianprops.linewidth': 0.6,
|
|
24
|
-
'boxplot.flierprops.linewidth': 0.3,
|
|
25
|
-
'boxplot.flierprops.markersize': 0.9,
|
|
26
|
-
'boxplot.medianprops.color': 'black',
|
|
27
|
-
'figure.subplot.bottom': 0.2,
|
|
28
|
-
'axes.grid': True,
|
|
29
|
-
'grid.color': 'black',
|
|
30
|
-
'grid.alpha': 0.1,
|
|
31
|
-
'grid.linewidth': 0.5,
|
|
32
|
-
'grid.linestyle': '--'
|
|
33
|
-
})
|
|
34
|
-
|
|
35
|
-
# Colors and markers
|
|
36
|
-
COLORS = [
|
|
37
|
-
'#FFAB91', '#FFE082', '#A5D6A7', '#4DD0E1', '#FF6F61', '#FF8C94', '#D4A5A5',
|
|
38
|
-
'#FF677D', '#B9FBC0', '#C2C2F0', '#E3F9A6', '#E2A8F7', '#F7B7A3', '#F7C6C7',
|
|
39
|
-
'#8D9BFC', '#B4E6FF', '#FF8A65', '#FFC3A0', '#FFCCBC', '#F8BBD0', '#FF9AA2',
|
|
40
|
-
'#FFB3B3', '#FFDDC1', '#FFE0B2', '#E2A8F7', '#F7C6C7', '#E57373', '#BA68C8',
|
|
41
|
-
'#4FC3F7', '#FFB3B3', '#FF6F61'
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
MARKERS = ["o", "s", "^", "D", "p", "*", "+", "x", "H", "1", "2", "3", "4", "|", "_"]
|
|
45
|
-
|
|
46
|
-
def adjust_color_saturation(color: str, saturation_factor: float = 5) -> str:
|
|
47
|
-
"""
|
|
48
|
-
Adjusts the saturation of a given color.
|
|
49
|
-
|
|
50
|
-
Parameters:
|
|
51
|
-
- color (str): The original color in hexadecimal format.
|
|
52
|
-
- saturation_factor (float): The factor by which to adjust the saturation.
|
|
53
|
-
Values > 1 will increase saturation,
|
|
54
|
-
values < 1 will decrease it. Default is 1.5.
|
|
55
|
-
|
|
56
|
-
Returns:
|
|
57
|
-
- str: The color with adjusted saturation in hexadecimal format.
|
|
58
|
-
"""
|
|
59
|
-
# Convert color to HSV (Hue, Saturation, Value)
|
|
60
|
-
h, s, v = mcolors.rgb_to_hsv(mcolors.to_rgb(color))
|
|
61
|
-
|
|
62
|
-
# Adjust saturation
|
|
63
|
-
s = min(1, s * saturation_factor)
|
|
64
|
-
|
|
65
|
-
# Convert back to RGB and then to hex
|
|
66
|
-
return mcolors.to_hex(mcolors.hsv_to_rgb((h, s, v)))
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def protocol_boxplot(
|
|
71
|
-
table_protocol: pd.DataFrame,
|
|
72
|
-
x: str,
|
|
73
|
-
y: str,
|
|
74
|
-
methods: Optional[List[str]] = None,
|
|
75
|
-
title: Optional[str] = None,
|
|
76
|
-
legend: bool = True,
|
|
77
|
-
save_path: Optional[str] = None,
|
|
78
|
-
order: Optional[str] = None,
|
|
79
|
-
plot_params: Optional[Dict[str, Any]] = None):
|
|
80
|
-
"""
|
|
81
|
-
Plots a boxplot based on the provided DataFrame and selected methods.
|
|
82
|
-
"""
|
|
83
|
-
# Handle plot_params
|
|
84
|
-
plot_params = plot_params or {}
|
|
85
|
-
figsize = plot_params.pop('figsize', (10, 6)) # Default figsize if not provided
|
|
86
|
-
|
|
87
|
-
# Prepare data
|
|
88
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
89
|
-
methods = methods or table['QUANTIFIER'].unique()
|
|
90
|
-
table = table[table['QUANTIFIER'].isin(methods)]
|
|
91
|
-
|
|
92
|
-
# Order methods by ranking if specified
|
|
93
|
-
if order == 'rank':
|
|
94
|
-
methods = table.groupby('QUANTIFIER')[y].median().sort_values().index.tolist()
|
|
95
|
-
|
|
96
|
-
# Create plot with custom figsize
|
|
97
|
-
fig, ax = plt.subplots(figsize=figsize)
|
|
98
|
-
ax.grid(False)
|
|
99
|
-
|
|
100
|
-
box = ax.boxplot([table[table['QUANTIFIER'] == method][y] for method in methods],
|
|
101
|
-
patch_artist=True, widths=0.8, labels=methods, **plot_params)
|
|
102
|
-
|
|
103
|
-
# Apply colors
|
|
104
|
-
for patch, color in zip(box['boxes'], COLORS[:len(methods)]):
|
|
105
|
-
patch.set_facecolor(color)
|
|
106
|
-
|
|
107
|
-
# Add legend
|
|
108
|
-
if legend:
|
|
109
|
-
handles = [mpatches.Patch(color=COLORS[i], label=method) for i, method in enumerate(methods)]
|
|
110
|
-
ax.legend(handles=handles, title="Quantifiers", loc='upper left', bbox_to_anchor=(1, 1), fontsize=10, title_fontsize='11')
|
|
111
|
-
|
|
112
|
-
# Customize plot
|
|
113
|
-
ax.set_xticklabels(methods, rotation=45, fontstyle='italic')
|
|
114
|
-
ax.set_xlabel(x.capitalize())
|
|
115
|
-
ax.set_ylabel(f"{y.capitalize()}")
|
|
116
|
-
if title:
|
|
117
|
-
ax.set_title(title)
|
|
118
|
-
|
|
119
|
-
# Adjust layout and save plot
|
|
120
|
-
plt.tight_layout(rect=[0, 0, 0.9, 1])
|
|
121
|
-
if save_path:
|
|
122
|
-
plt.savefig(save_path, bbox_inches='tight')
|
|
123
|
-
plt.show()
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def protocol_lineplot(
|
|
132
|
-
table_protocol: pd.DataFrame,
|
|
133
|
-
methods: Union[List[str], str, None],
|
|
134
|
-
x: str,
|
|
135
|
-
y: str,
|
|
136
|
-
title: Optional[str] = None,
|
|
137
|
-
legend: bool = True,
|
|
138
|
-
save_path: Optional[str] = None,
|
|
139
|
-
group_by: str = "mean",
|
|
140
|
-
pos_alpha: int = 1,
|
|
141
|
-
plot_params: Optional[Dict[str, Any]] = None):
|
|
142
|
-
"""
|
|
143
|
-
Plots a line plot based on the provided DataFrame of the protocol and selected methods.
|
|
144
|
-
"""
|
|
145
|
-
# Handle plot_params
|
|
146
|
-
plot_params = plot_params or {}
|
|
147
|
-
figsize = plot_params.pop('figsize', (10, 6)) # Default figsize if not provided
|
|
148
|
-
|
|
149
|
-
# Filter data
|
|
150
|
-
methods = methods or table_protocol['QUANTIFIER'].unique()
|
|
151
|
-
table_protocol = table_protocol[table_protocol['QUANTIFIER'].isin(methods)]
|
|
152
|
-
|
|
153
|
-
if x == "ALPHA":
|
|
154
|
-
real = table_protocol["REAL_PREVS"].apply(lambda x: x[pos_alpha])
|
|
155
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
156
|
-
table["ALPHA"] = real
|
|
157
|
-
else:
|
|
158
|
-
table = table_protocol.drop(["PRED_PREVS", "REAL_PREVS"], axis=1).copy()
|
|
159
|
-
|
|
160
|
-
# Aggregate data
|
|
161
|
-
if group_by:
|
|
162
|
-
table = table.groupby(['QUANTIFIER', x])[y].agg(group_by).reset_index()
|
|
163
|
-
|
|
164
|
-
# Create plot with custom figsize
|
|
165
|
-
fig, ax = plt.subplots(figsize=figsize)
|
|
166
|
-
for i, (method, marker) in enumerate(zip(methods, MARKERS[:len(methods)+1])):
|
|
167
|
-
method_data = table[table['QUANTIFIER'] == method]
|
|
168
|
-
y_data = real if y == "ALPHA" else method_data[y]
|
|
169
|
-
color = adjust_color_saturation(COLORS[i % len(COLORS)]) # Aumenta a saturação das cores
|
|
170
|
-
ax.plot(method_data[x],
|
|
171
|
-
y_data, color=color,
|
|
172
|
-
marker=marker,
|
|
173
|
-
label=method,
|
|
174
|
-
alpha=1.0,
|
|
175
|
-
**plot_params)
|
|
176
|
-
|
|
177
|
-
# Add legend
|
|
178
|
-
if legend:
|
|
179
|
-
ax.legend(title="Quantifiers", loc='upper left', bbox_to_anchor=(1, 1), fontsize=10, title_fontsize='11')
|
|
180
|
-
|
|
181
|
-
# Customize plot
|
|
182
|
-
ax.set_xlabel(x.capitalize())
|
|
183
|
-
ax.set_ylabel(y.capitalize())
|
|
184
|
-
if title:
|
|
185
|
-
ax.set_title(title)
|
|
186
|
-
|
|
187
|
-
# Adjust layout and save plot
|
|
188
|
-
plt.tight_layout(rect=[0, 0, 0.9, 1])
|
|
189
|
-
if save_path:
|
|
190
|
-
plt.savefig(save_path, bbox_inches='tight')
|
|
191
|
-
plt.show()
|
|
192
|
-
|
|
193
|
-
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
from .normalize import normalize_prevalence
|
|
2
|
-
from .parallel import parallel
|
|
3
|
-
from .get_real_prev import get_real_prev
|
|
4
|
-
from .make_prevs import make_prevs
|
|
5
|
-
from .generate_artificial_indexes import generate_artificial_indexes
|
|
6
|
-
from .round_protocol_df import round_protocol_df
|
|
7
|
-
from .convert_col_to_array import convert_columns_to_arrays
|
|
8
|
-
from .load_quantifier import load_quantifier
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def convert_columns_to_arrays(df, columns:list = ['PRED_PREVS', 'REAL_PREVS']):
|
|
4
|
-
"""Converts the specified columns from string of arrays to numpy arrays
|
|
5
|
-
|
|
6
|
-
Args:
|
|
7
|
-
df (array-like): the dataframe from which to change convert the coluns
|
|
8
|
-
columns (list, optional): the coluns with string of arrays, default is the options for
|
|
9
|
-
the protocol dataframes
|
|
10
|
-
"""
|
|
11
|
-
for col in columns:
|
|
12
|
-
df[col] = df[col].apply(lambda x: np.fromstring(x.strip('[]'), sep=' ') if isinstance(x, str) else x)
|
|
13
|
-
return df
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def generate_artificial_indexes(y, prevalence: list, sample_size:int, classes:list):
|
|
4
|
-
# Ensure the sum of prevalences is 1
|
|
5
|
-
assert np.isclose(sum(prevalence), 1), "The sum of prevalences must be 1"
|
|
6
|
-
# Ensure the number of prevalences matches the number of classes
|
|
7
|
-
|
|
8
|
-
sampled_indexes = []
|
|
9
|
-
total_sampled = 0
|
|
10
|
-
|
|
11
|
-
for i, class_ in enumerate(classes):
|
|
12
|
-
|
|
13
|
-
if i == len(classes) - 1:
|
|
14
|
-
num_samples = sample_size - total_sampled
|
|
15
|
-
else:
|
|
16
|
-
num_samples = int(sample_size * prevalence[i])
|
|
17
|
-
|
|
18
|
-
# Get the indexes of the current class
|
|
19
|
-
class_indexes = np.where(y == class_)[0]
|
|
20
|
-
|
|
21
|
-
# Sample the indexes for the current class
|
|
22
|
-
sampled_class_indexes = np.random.choice(class_indexes, size=num_samples, replace=True)
|
|
23
|
-
|
|
24
|
-
sampled_indexes.extend(sampled_class_indexes)
|
|
25
|
-
total_sampled += num_samples
|
|
26
|
-
|
|
27
|
-
np.random.shuffle(sampled_indexes) # Shuffle after collecting all indexes
|
|
28
|
-
|
|
29
|
-
return sampled_indexes
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def make_prevs(ndim:int) -> list:
|
|
4
|
-
"""
|
|
5
|
-
Generate a list of n_dim values uniformly distributed between 0 and 1 that sum exactly to 1.
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
n_dim (int): Number of values in the list.
|
|
9
|
-
|
|
10
|
-
Returns:
|
|
11
|
-
list: List of n_dim values that sum to 1.
|
|
12
|
-
"""
|
|
13
|
-
# Generate n_dim-1 random u_dist uniformly distributed between 0 and 1
|
|
14
|
-
u_dist = np.random.uniform(0, 1, ndim - 1)
|
|
15
|
-
# Add 0 and 1 to the u_dist
|
|
16
|
-
u_dist = np.append(u_dist, [0, 1])
|
|
17
|
-
# Sort the u_dist
|
|
18
|
-
u_dist.sort()
|
|
19
|
-
# Calculate the differences between consecutive u_dist
|
|
20
|
-
prevs = np.diff(u_dist)
|
|
21
|
-
|
|
22
|
-
return prevs
|
|
23
|
-
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from collections import defaultdict
|
|
3
|
-
|
|
4
|
-
def normalize_prevalence(prevalences: np.ndarray, classes:list):
|
|
5
|
-
|
|
6
|
-
if isinstance(prevalences, dict):
|
|
7
|
-
summ = sum(prevalences.values())
|
|
8
|
-
prevalences = {int(_class):float(value/summ) for _class, value in prevalences.items()}
|
|
9
|
-
return prevalences
|
|
10
|
-
|
|
11
|
-
summ = np.sum(prevalences, axis=-1, keepdims=True)
|
|
12
|
-
prevalences = np.true_divide(prevalences, sum(prevalences), where=summ>0)
|
|
13
|
-
prevalences = {int(_class):float(prev) for _class, prev in zip(classes, prevalences)}
|
|
14
|
-
prevalences = defaultdict(lambda: 0, prevalences)
|
|
15
|
-
|
|
16
|
-
# Ensure all classes are present in the result
|
|
17
|
-
for cls in classes:
|
|
18
|
-
prevalences[cls] = prevalences[cls]
|
|
19
|
-
|
|
20
|
-
return dict(prevalences)
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pandas as pd
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
def round_protocol_df(dataframe: pd.DataFrame, frac: int = 3):
|
|
6
|
-
def round_column(col):
|
|
7
|
-
if col.name in ['PRED_PREVS', 'REAL_PREVS']:
|
|
8
|
-
return col.apply(lambda x: np.round(x, frac) if isinstance(x, (np.ndarray, float, int)) else x)
|
|
9
|
-
elif np.issubdtype(col.dtype, np.number):
|
|
10
|
-
return col.round(frac)
|
|
11
|
-
else:
|
|
12
|
-
return col
|
|
13
|
-
|
|
14
|
-
return dataframe.apply(round_column)
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
from .getHist import getHist
|
|
2
|
-
from .distances import sqEuclidean, probsymm, hellinger, topsoe
|
|
3
|
-
from .ternary_search import ternary_search
|
|
4
|
-
from .tprfpr import compute_table, compute_tpr, compute_fpr, adjust_threshold
|
|
5
|
-
from .get_scores import get_scores
|
|
6
|
-
from .moss import MoSS
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def sqEuclidean(dist1, dist2):
|
|
4
|
-
P=dist1
|
|
5
|
-
Q=dist2
|
|
6
|
-
return sum((P-Q)**2)
|
|
7
|
-
|
|
8
|
-
def probsymm(dist1, dist2):
|
|
9
|
-
P=dist1
|
|
10
|
-
Q=dist2
|
|
11
|
-
return 2*sum((P-Q)**2/(P+Q))
|
|
12
|
-
|
|
13
|
-
def topsoe(dist1, dist2):
|
|
14
|
-
P=dist1
|
|
15
|
-
Q=dist2
|
|
16
|
-
return sum(P*np.log(2*P/(P+Q))+Q*np.log(2*Q/(P+Q)))
|
|
17
|
-
|
|
18
|
-
def hellinger(dist1, dist2):
|
|
19
|
-
P=dist1
|
|
20
|
-
Q=dist2
|
|
21
|
-
return 2 * np.sqrt(np.abs(1 - sum(np.sqrt(P * Q))))
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def getHist(scores, nbins):
|
|
4
|
-
|
|
5
|
-
breaks = np.linspace(0, 1, int(nbins)+1)
|
|
6
|
-
breaks = np.delete(breaks, -1)
|
|
7
|
-
breaks = np.append(breaks,1.1)
|
|
8
|
-
|
|
9
|
-
re = np.repeat(1/(len(breaks)-1), (len(breaks)-1))
|
|
10
|
-
for i in range(1,len(breaks)):
|
|
11
|
-
re[i-1] = (re[i-1] + len(np.where((scores >= breaks[i-1]) & (scores < breaks[i]))[0]) ) / (len(scores)+ 1)
|
|
12
|
-
|
|
13
|
-
return re
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import numpy as np
|
|
3
|
-
from sklearn.model_selection import StratifiedKFold
|
|
4
|
-
|
|
5
|
-
def get_scores(X, y, learner, folds:int=10, learner_fitted:bool=False) -> tuple:
|
|
6
|
-
if isinstance(X, np.ndarray):
|
|
7
|
-
X = pd.DataFrame(X)
|
|
8
|
-
if isinstance(y, np.ndarray):
|
|
9
|
-
y = pd.Series(y)
|
|
10
|
-
|
|
11
|
-
if learner_fitted:
|
|
12
|
-
probabilities = learner.predict_proba(X)
|
|
13
|
-
y_label = y
|
|
14
|
-
else:
|
|
15
|
-
|
|
16
|
-
skf = StratifiedKFold(n_splits=folds)
|
|
17
|
-
probabilities = []
|
|
18
|
-
y_label = []
|
|
19
|
-
|
|
20
|
-
for train_index, valid_index in skf.split(X,y):
|
|
21
|
-
|
|
22
|
-
tr_data = pd.DataFrame(X.iloc[train_index]) #Train data and labels
|
|
23
|
-
tr_label = y.iloc[train_index]
|
|
24
|
-
|
|
25
|
-
valid_data = pd.DataFrame(X.iloc[valid_index]) #Validation data and labels
|
|
26
|
-
valid_label = y.iloc[valid_index]
|
|
27
|
-
|
|
28
|
-
learner.fit(tr_data, tr_label)
|
|
29
|
-
|
|
30
|
-
probabilities.extend(learner.predict_proba(valid_data)) #evaluating scores
|
|
31
|
-
y_label.extend(valid_label)
|
|
32
|
-
|
|
33
|
-
return np.asarray(y_label), np.asarray(probabilities)
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
def MoSS(n:int, alpha:float, m:float):
|
|
4
|
-
|
|
5
|
-
n_pos = int(n*alpha)
|
|
6
|
-
n_neg = int((1-alpha)*n)
|
|
7
|
-
|
|
8
|
-
x_pos = np.arange(1, n_pos, 1)
|
|
9
|
-
x_neg = np.arange(1, n_neg, 1)
|
|
10
|
-
|
|
11
|
-
syn_plus = np.power(x_pos/(n_pos+1), m)
|
|
12
|
-
syn_neg = 1 - np.power(x_neg/(n_neg+1), m)
|
|
13
|
-
|
|
14
|
-
#moss = np.union1d(syn_plus, syn_neg)
|
|
15
|
-
|
|
16
|
-
return syn_plus, syn_neg
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
def ternary_search(left, right, f, eps=1e-4):
|
|
2
|
-
"""This function applies Ternary search"""
|
|
3
|
-
|
|
4
|
-
while True:
|
|
5
|
-
if abs(left - right) < eps:
|
|
6
|
-
return(left + right) / 2
|
|
7
|
-
|
|
8
|
-
leftThird = left + (right - left) / 3
|
|
9
|
-
rightThird = right - (right - left) / 3
|
|
10
|
-
|
|
11
|
-
if f(leftThird) > f(rightThird):
|
|
12
|
-
left = leftThird
|
|
13
|
-
else:
|
|
14
|
-
right = rightThird
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def compute_table(y, y_pred, classes):
|
|
5
|
-
TP = np.logical_and(y == y_pred, y == classes[1]).sum()
|
|
6
|
-
FP = np.logical_and(y != y_pred, y == classes[0]).sum()
|
|
7
|
-
FN = np.logical_and(y != y_pred, y == classes[1]).sum()
|
|
8
|
-
TN = np.logical_and(y == y_pred, y == classes[0]).sum()
|
|
9
|
-
return TP, FP, FN, TN
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def compute_tpr(TP, FN):
|
|
13
|
-
if TP + FN == 0:
|
|
14
|
-
return 0
|
|
15
|
-
return TP / (TP + FN)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def compute_fpr(FP, TN):
|
|
19
|
-
if FP + TN == 0:
|
|
20
|
-
return 0
|
|
21
|
-
return FP / (FP + TN)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def adjust_threshold(y, probabilities:np.ndarray, classes:np.ndarray) -> tuple:
|
|
25
|
-
unique_scores = np.linspace(0, 1, 101)
|
|
26
|
-
|
|
27
|
-
tprs = []
|
|
28
|
-
fprs = []
|
|
29
|
-
|
|
30
|
-
for threshold in unique_scores:
|
|
31
|
-
y_pred = np.where(probabilities >= threshold, classes[1], classes[0])
|
|
32
|
-
|
|
33
|
-
TP, FP, FN, TN = compute_table(y, y_pred, classes)
|
|
34
|
-
|
|
35
|
-
tpr = compute_tpr(TP, FN)
|
|
36
|
-
fpr = compute_fpr(FP, TN)
|
|
37
|
-
|
|
38
|
-
tprs.append(tpr)
|
|
39
|
-
fprs.append(fpr)
|
|
40
|
-
|
|
41
|
-
#best_tpr, best_fpr = self.adjust_threshold(np.asarray(tprs), np.asarray(fprs))
|
|
42
|
-
return (unique_scores, np.asarray(tprs), np.asarray(fprs))
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
mlquantify/__init__.py,sha256=BGxGCeZhlNsTYZkLoJJ-zcpVDRU4jBFVsz9ZmEZCtvs,166
|
|
2
|
-
mlquantify/base.py,sha256=przWKpQtoTT6ekbyTrAU3Kp_JsQ1982rAXWX_oAoSQM,8393
|
|
3
|
-
mlquantify/model_selection.py,sha256=zN-qkrCe35GoJ8u1_Ab3EKXvrnAppk68Fj5kQVi61nU,8617
|
|
4
|
-
mlquantify/classification/__init__.py,sha256=zxE6_ouh3kJVL56aJAhpFh36EBWKq9U4IcID4noROPQ,26
|
|
5
|
-
mlquantify/classification/pwkclf.py,sha256=K1XBgI9FIh1Yu571bzdw8PM-TJXFGOQAzP4ewDV9b3Y,2323
|
|
6
|
-
mlquantify/evaluation/__init__.py,sha256=_7orbRyYuJydD_W9o5l9Ko_-EopGGhewekoEhR82S3Q,48
|
|
7
|
-
mlquantify/evaluation/measures/__init__.py,sha256=JvxIm3k31EFBzm-4Guj9Huh--B7u-o3gvln3VSFK2nk,761
|
|
8
|
-
mlquantify/evaluation/measures/ae.py,sha256=eY8oKfj7TIpbNCsEfbkbeVVduSAaiKE5Vxy7xACwrL8,363
|
|
9
|
-
mlquantify/evaluation/measures/bias.py,sha256=AmTZ9dP5XW6ZR-gdBfOF008IzVFeBNeL2SIuSLkABQc,497
|
|
10
|
-
mlquantify/evaluation/measures/kld.py,sha256=f443cf31Sm904zEwk0q1093XwvEEQbroHUzNLKJj-BI,341
|
|
11
|
-
mlquantify/evaluation/measures/mse.py,sha256=VS38G83ygVz_qxtsjqa-tTGbPVERQOraY_aga5JmVuo,408
|
|
12
|
-
mlquantify/evaluation/measures/nae.py,sha256=D5W2qiD1EC53uy7AILRHvdZNvAcAQkLTuFNFNmHjkEs,499
|
|
13
|
-
mlquantify/evaluation/measures/nkld.py,sha256=UsTDZP-CjiEgQjfyoKynZntpFV8msYPrEma3GtHK1-s,491
|
|
14
|
-
mlquantify/evaluation/measures/nrae.py,sha256=iTxEG7k6DQRieSbwKrHfcynmbRR-qCviudDecoRkmdk,574
|
|
15
|
-
mlquantify/evaluation/measures/rae.py,sha256=KYa9HY7k5w3acMp5yUSMVuwZIdzeUB9CzkSA-jpXn7Y,416
|
|
16
|
-
mlquantify/evaluation/measures/se.py,sha256=aScxX56dmbSj1HxubD8Dlq4gTmbNrA-H3AdE145wgDI,396
|
|
17
|
-
mlquantify/evaluation/protocol/_Protocol.py,sha256=XsLsu-sDQxHVyRqc-vGAs4407H5IJGSYAuH4sWAYQL8,7752
|
|
18
|
-
mlquantify/evaluation/protocol/__init__.py,sha256=RO3fHIvhAK95t8EUaD67JfJV2WeLb17HMqbXo3LcUSI,42
|
|
19
|
-
mlquantify/evaluation/protocol/app.py,sha256=WJ2OaYe49R8JqCoZDy6gMN50eIQ7jThPDT5W5lSRlK0,5713
|
|
20
|
-
mlquantify/evaluation/protocol/npp.py,sha256=c287_9LmxYvv72hh2iwoCwSkwqTeFvknKOWWG4Jq0QY,1117
|
|
21
|
-
mlquantify/methods/__init__.py,sha256=rvpN5T6vXdfX1bOHKYGC0aKZMc5bHSPb-pCVQ3mlb08,627
|
|
22
|
-
mlquantify/methods/aggregative/__init__.py,sha256=rBtSfgTsTIjbH5rJhQ9V05n68o-9hm9QNZzxRCh2VME,209
|
|
23
|
-
mlquantify/methods/aggregative/cc.py,sha256=HXFgcLqyM35CTszBn5UmUmT60Z1J1Ow0i4lZf5BctH8,1123
|
|
24
|
-
mlquantify/methods/aggregative/emq.py,sha256=3N4PQhTlBu6vBwOaEpcNZf2QYNBF2XC8cCH3qsyKt9Y,3208
|
|
25
|
-
mlquantify/methods/aggregative/fm.py,sha256=RgXVwsdoZsWV_DanvzcsRmll_e2Etvl27PNI7iYM9Nw,2912
|
|
26
|
-
mlquantify/methods/aggregative/gac.py,sha256=gTIRqMN0S7teheSOgrk2NB80UxfC-KfbNuk0A1tflh8,3640
|
|
27
|
-
mlquantify/methods/aggregative/gpac.py,sha256=VcDCcEYXiknEOTwVDGOXWeZCJS9T6vd4YFqEbnUB3YU,3531
|
|
28
|
-
mlquantify/methods/aggregative/pcc.py,sha256=8WuiWFrYvskx0Xd2wTDgcn-GyswwB5hv-VH6HaRkilY,1251
|
|
29
|
-
mlquantify/methods/aggregative/pwk.py,sha256=JCUzS8hrsiFMacLh00FR4Xhk7zhe4Vy4za-0QeFr-l8,1466
|
|
30
|
-
mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py,sha256=amgklOfPe-htQIhuDb3l4usqj-cVkGFa7z0KXYmibUM,2433
|
|
31
|
-
mlquantify/methods/aggregative/ThreholdOptm/__init__.py,sha256=ALB-ZEl9M_DyHr3U9PEw6E9XSMFfwci_aSwbpyzvp0E,155
|
|
32
|
-
mlquantify/methods/aggregative/ThreholdOptm/acc.py,sha256=tgcMhYW9cD66j0T6mWDulaxP9-FA1mNozvlaKMQV2UQ,1082
|
|
33
|
-
mlquantify/methods/aggregative/ThreholdOptm/max.py,sha256=A-_0RQ-HAiTE2FfeNvcInqauXpzn8xtSf0LDiuC5wDk,824
|
|
34
|
-
mlquantify/methods/aggregative/ThreholdOptm/ms.py,sha256=BpVrDaiCZa8xe1_YX9qhGCsOAcCXZ_3EQF5ZCHWRCn8,739
|
|
35
|
-
mlquantify/methods/aggregative/ThreholdOptm/ms2.py,sha256=Pxxc4p_41KLcNUaN0fhjih3mSVMJhoZrm-KUy45paT4,852
|
|
36
|
-
mlquantify/methods/aggregative/ThreholdOptm/pacc.py,sha256=zfRQhklB8NuvQAn8-a1hJ9sm6iG6yRGIWh-xlLTnU3M,1432
|
|
37
|
-
mlquantify/methods/aggregative/ThreholdOptm/t50.py,sha256=-zWNDXdGpW7CR-9O6MUQYOBqMuH15lLV-uK0u8nIAYA,728
|
|
38
|
-
mlquantify/methods/aggregative/ThreholdOptm/x.py,sha256=Lwn5MtaL9tb4eRFMrFxerUjmumh2US90KvDfdNQxvF8,759
|
|
39
|
-
mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py,sha256=pcnCN0IArb_AJ71AyUnIt0uVPhN9IWnXZF9ad1d0g8c,3336
|
|
40
|
-
mlquantify/methods/aggregative/mixtureModels/__init__.py,sha256=F7QvGhVkG7NEHjnau_fNm7T9MLCUUDwfrrNvncqPpPI,117
|
|
41
|
-
mlquantify/methods/aggregative/mixtureModels/dys.py,sha256=JC3h_gdIYG-5pphl7C6Ez8Kbp0UPbccqX8ZVcsTAI3g,2356
|
|
42
|
-
mlquantify/methods/aggregative/mixtureModels/dys_syn.py,sha256=GGt9TOXGUBBD9PdiozT6QpC-IGa6QvLf1QAGEI7El8s,3906
|
|
43
|
-
mlquantify/methods/aggregative/mixtureModels/hdy.py,sha256=JykK4Fg5yPKmEuL1aAqsG9wnrmTbXNuxbK4ugI1XNBo,1986
|
|
44
|
-
mlquantify/methods/aggregative/mixtureModels/smm.py,sha256=2r82umOXqOrffVO-SdnyUzsJKg6hz5_z82HzNsn7GRY,1050
|
|
45
|
-
mlquantify/methods/aggregative/mixtureModels/sord.py,sha256=Kz5YAXd3RezeTI66wfrnXxY_AXDos44WS8N2fYE4oxA,3166
|
|
46
|
-
mlquantify/methods/meta/__init__.py,sha256=uj--leWdOrpBs0u_djhCYlLCrQo3GIxg0AoJyAkHC-c,30
|
|
47
|
-
mlquantify/methods/meta/ensemble.py,sha256=rX9fyGi6xMYVUo0jm_rttnSDTDt_XokvzZJUWDPxQTM,10396
|
|
48
|
-
mlquantify/methods/non_aggregative/__init__.py,sha256=HD34lwNmAkw26qlEA5PDHay9V6ShgFiQvlPipOsrDAs,20
|
|
49
|
-
mlquantify/methods/non_aggregative/hdx.py,sha256=vwodV_Z89_be8HBOSriX3lsFhzmPZVLdOgKewa6la-E,2420
|
|
50
|
-
mlquantify/plots/__init__.py,sha256=IDnv_KVALIsVuohiudclnIMSM5F1dNRxUFpjygLblBI,118
|
|
51
|
-
mlquantify/plots/distribution_plot.py,sha256=8l22Lq6LhlOd3yv6gHX6wg2p1MTozYASyNzPcLlPHzk,4038
|
|
52
|
-
mlquantify/plots/protocol_plot.py,sha256=DfkCSHELr9lyyxMqeGNJgx0iywODQmH8EL5tlMjeAFQ,6618
|
|
53
|
-
mlquantify/utils/__init__.py,sha256=VKplOsrL4ONf0-9anhcWFOxXjvGSQTH0Kh922s_hGb0,63
|
|
54
|
-
mlquantify/utils/general_purposes/__init__.py,sha256=-Pwx1t2M_rGCEHxIc2TyomSe92w6U5Bdp6wtpsdcMZI,380
|
|
55
|
-
mlquantify/utils/general_purposes/convert_col_to_array.py,sha256=Pq6_U6BdsIAl4Vr_836_vM-g2iRQAJ1b4kGOsYoFukI,560
|
|
56
|
-
mlquantify/utils/general_purposes/generate_artificial_indexes.py,sha256=L_Sb1hBYGllCxDbYKRNY_qqjCfSbbkXo6E1XuaWxQMs,1049
|
|
57
|
-
mlquantify/utils/general_purposes/get_real_prev.py,sha256=nHSXaxAdp-rznyhauN7ta_1QCDrlfCZSJWxxQQ4_pvE,266
|
|
58
|
-
mlquantify/utils/general_purposes/load_quantifier.py,sha256=6BfObUBVJ7_v-pzP-cQlMpblmg6ygpvDRTimOhMWepw,77
|
|
59
|
-
mlquantify/utils/general_purposes/make_prevs.py,sha256=lamVz-MTRDP4bqDjS4xm2SMa3YpnMcL__1OF6ZpEX2I,661
|
|
60
|
-
mlquantify/utils/general_purposes/normalize.py,sha256=kIRG22249IkBuDBGP-qQbLsH-edPFC8gPnJS_huxLhY,785
|
|
61
|
-
mlquantify/utils/general_purposes/parallel.py,sha256=vkert2m-A5Rs_VKhLUk84nZ0scIVPFadvVsTF2bItaU,218
|
|
62
|
-
mlquantify/utils/general_purposes/round_protocol_df.py,sha256=74zS7ws7mgzjObv6xoQi7rvH2xskschHF93czoFaVFY,474
|
|
63
|
-
mlquantify/utils/method_purposes/__init__.py,sha256=MFB3LtYTsWuETmyFokn8EZoK2wwPZspCknIMdutLLCo,276
|
|
64
|
-
mlquantify/utils/method_purposes/distances.py,sha256=l5neBoqLN_Npyf2L9VB19V_RHpB2w9AJuddTQ2pFzxU,428
|
|
65
|
-
mlquantify/utils/method_purposes/getHist.py,sha256=dgO57Wo-Sbix6h1ixbT2XIKuRU0p-lZNLGI0ZGwadkM,402
|
|
66
|
-
mlquantify/utils/method_purposes/get_scores.py,sha256=qdIVYUS8xd8Vt86k19yETDNfibTXaTur5pCrMA1b254,1151
|
|
67
|
-
mlquantify/utils/method_purposes/moss.py,sha256=CVDDMHxPBnl_U2hz7Aqvne7jhB2mBUsVzTTsaiLQhOc,352
|
|
68
|
-
mlquantify/utils/method_purposes/ternary_search.py,sha256=JpNrfJsA5kWuanVW_hyMucy7rQ9UzTSgazFpTRi9jMI,416
|
|
69
|
-
mlquantify/utils/method_purposes/tprfpr.py,sha256=VKniG5aK8IwAA2fXEhkdHtwnx1zHH12qhwS4kKW5Dlo,1181
|
|
70
|
-
mlquantify-0.0.11.2.dist-info/METADATA,sha256=CQ6GvMVgM4JfD6KW4vXoDAixa35QM1zOFs5Erdm3EEM,4717
|
|
71
|
-
mlquantify-0.0.11.2.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
72
|
-
mlquantify-0.0.11.2.dist-info/top_level.txt,sha256=tGEkYkbbFElwULvqENjam3u1uXtyC1J9dRmibsq8_n0,11
|
|
73
|
-
mlquantify-0.0.11.2.dist-info/RECORD,,
|
|
File without changes
|