mlquantify 0.0.11.4__tar.gz → 0.0.11.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/PKG-INFO +2 -2
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/README.md +1 -1
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/gac.py +16 -23
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/gpac.py +17 -26
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -1
- mlquantify-0.0.11.6/mlquantify/methods/aggregative/mixtureModels/dys.py +107 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +63 -16
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/hdy.py +44 -7
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify.egg-info/PKG-INFO +2 -2
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/setup.py +1 -1
- mlquantify-0.0.11.4/mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/base.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/classification/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/classification/pwkclf.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/ae.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/bias.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/kld.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/mse.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/nae.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/nkld.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/nrae.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/rae.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/measures/se.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/protocol/_Protocol.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/protocol/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/protocol/app.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/evaluation/protocol/npp.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/cc.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/emq.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/fm.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/smm.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/sord.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/pcc.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/pwk.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/meta/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/meta/ensemble.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/non_aggregative/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/non_aggregative/hdx.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/model_selection.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/plots/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/plots/distribution_plot.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/plots/protocol_plot.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/convert_col_to_array.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/get_real_prev.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/load_quantifier.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/make_prevs.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/normalize.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/parallel.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/round_protocol_df.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/__init__.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/distances.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/getHist.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/get_scores.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/moss.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/ternary_search.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/tprfpr.py +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify.egg-info/SOURCES.txt +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify.egg-info/dependency_links.txt +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify.egg-info/requires.txt +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify.egg-info/top_level.txt +0 -0
- {mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlquantify
|
|
3
|
-
Version: 0.0.11.
|
|
3
|
+
Version: 0.0.11.6
|
|
4
4
|
Summary: Quantification Library
|
|
5
5
|
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
6
|
Maintainer: Luiz Fernando Luth Junior
|
|
@@ -32,7 +32,7 @@ ___
|
|
|
32
32
|
|
|
33
33
|
## Latest Release
|
|
34
34
|
|
|
35
|
-
- **Version 0.0.
|
|
35
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
36
36
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
37
37
|
- Explore the [API documentation](#) for detailed developer information.
|
|
38
38
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -9,7 +9,7 @@ ___
|
|
|
9
9
|
|
|
10
10
|
## Latest Release
|
|
11
11
|
|
|
12
|
-
- **Version 0.0.
|
|
12
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
13
13
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
14
14
|
- Explore the [API documentation](#) for detailed developer information.
|
|
15
15
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -2,7 +2,7 @@ import numpy as np
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from sklearn.base import BaseEstimator
|
|
4
4
|
from sklearn.metrics import confusion_matrix
|
|
5
|
-
from sklearn.model_selection import
|
|
5
|
+
from sklearn.model_selection import train_test_split
|
|
6
6
|
|
|
7
7
|
from ...base import AggregativeQuantifier
|
|
8
8
|
|
|
@@ -13,10 +13,12 @@ class GAC(AggregativeQuantifier):
|
|
|
13
13
|
and solve it via constrained least-squares regression.
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
|
-
def __init__(self, learner: BaseEstimator):
|
|
16
|
+
def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
|
|
17
17
|
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
18
18
|
self.learner = learner
|
|
19
19
|
self.cond_prob_matrix = None
|
|
20
|
+
self.train_size = train_size
|
|
21
|
+
self.random_state = random_state
|
|
20
22
|
|
|
21
23
|
def _fit_method(self, X, y):
|
|
22
24
|
# Ensure X and y are DataFrames
|
|
@@ -29,26 +31,17 @@ class GAC(AggregativeQuantifier):
|
|
|
29
31
|
y_pred = self.learner.predict(X)
|
|
30
32
|
y_label = y
|
|
31
33
|
else:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
y_label = []
|
|
34
|
+
X_train, X_val, y_train, y_val = train_test_split(
|
|
35
|
+
X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
|
|
36
|
+
)
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
valid_data = pd.DataFrame(X.iloc[valid_index])
|
|
43
|
-
valid_label = y.iloc[valid_index]
|
|
44
|
-
|
|
45
|
-
self.learner.fit(train_data, train_label)
|
|
46
|
-
|
|
47
|
-
y_pred.extend(self.learner.predict(valid_data))
|
|
48
|
-
y_label.extend(valid_label)
|
|
38
|
+
self.learner.fit(X_train, y_train)
|
|
39
|
+
|
|
40
|
+
y_label = y_val
|
|
41
|
+
y_pred = self.learner.predict(X_val)
|
|
49
42
|
|
|
50
43
|
# Compute conditional probability matrix
|
|
51
|
-
self.cond_prob_matrix =
|
|
44
|
+
self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_label, y_pred)
|
|
52
45
|
|
|
53
46
|
return self
|
|
54
47
|
|
|
@@ -66,11 +59,11 @@ class GAC(AggregativeQuantifier):
|
|
|
66
59
|
return adjusted_prevalences
|
|
67
60
|
|
|
68
61
|
@classmethod
|
|
69
|
-
def get_cond_prob_matrix(cls, classes:list,
|
|
62
|
+
def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
|
|
70
63
|
""" Estimate the conditional probability matrix P(yi|yj)"""
|
|
71
64
|
|
|
72
|
-
CM = confusion_matrix(
|
|
73
|
-
CM = CM.astype(
|
|
65
|
+
CM = confusion_matrix(y_labels, predictions, labels=classes).T
|
|
66
|
+
CM = CM.astype(float)
|
|
74
67
|
class_counts = CM.sum(axis=0)
|
|
75
68
|
for i, _ in enumerate(classes):
|
|
76
69
|
if class_counts[i] == 0:
|
|
@@ -91,6 +84,6 @@ class GAC(AggregativeQuantifier):
|
|
|
91
84
|
adjusted_prevalences = np.linalg.solve(A, B)
|
|
92
85
|
adjusted_prevalences = np.clip(adjusted_prevalences, 0, 1)
|
|
93
86
|
adjusted_prevalences /= adjusted_prevalences.sum()
|
|
94
|
-
except (np.linalg.LinAlgError
|
|
87
|
+
except (np.linalg.LinAlgError):
|
|
95
88
|
adjusted_prevalences = predicted_prevalences # No way to adjust them
|
|
96
89
|
return adjusted_prevalences
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import pandas as pd
|
|
3
3
|
from sklearn.base import BaseEstimator
|
|
4
|
-
from sklearn.model_selection import
|
|
4
|
+
from sklearn.model_selection import train_test_split
|
|
5
5
|
|
|
6
6
|
from .gac import GAC
|
|
7
7
|
from ...base import AggregativeQuantifier
|
|
@@ -14,10 +14,12 @@ class GPAC(AggregativeQuantifier):
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
def __init__(self, learner: BaseEstimator):
|
|
17
|
+
def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
|
|
18
18
|
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
19
19
|
self.learner = learner
|
|
20
20
|
self.cond_prob_matrix = None
|
|
21
|
+
self.train_size = train_size
|
|
22
|
+
self.random_state = random_state
|
|
21
23
|
|
|
22
24
|
def _fit_method(self, X, y):
|
|
23
25
|
# Convert X and y to DataFrames if they are numpy arrays
|
|
@@ -28,31 +30,20 @@ class GPAC(AggregativeQuantifier):
|
|
|
28
30
|
|
|
29
31
|
if self.learner_fitted:
|
|
30
32
|
# Use existing model to predict
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
y_pred = self.learner.predict(X)
|
|
34
|
+
y_labels = y
|
|
33
35
|
else:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
true_labels = []
|
|
36
|
+
X_train, X_val, y_train, y_val = train_test_split(
|
|
37
|
+
X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
|
|
38
|
+
)
|
|
38
39
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
valid_data = pd.DataFrame(X.iloc[valid_index])
|
|
45
|
-
valid_labels = y.iloc[valid_index]
|
|
46
|
-
|
|
47
|
-
# Train the learner
|
|
48
|
-
self.learner.fit(train_data, train_labels)
|
|
49
|
-
|
|
50
|
-
# Predict and collect results
|
|
51
|
-
predictions.extend(self.learner.predict(valid_data))
|
|
52
|
-
true_labels.extend(valid_labels)
|
|
40
|
+
self.learner.fit(X_train, y_train)
|
|
41
|
+
|
|
42
|
+
y_labels = y_val
|
|
43
|
+
y_pred = self.learner.predict(X_val)
|
|
53
44
|
|
|
54
45
|
# Compute conditional probability matrix using GAC
|
|
55
|
-
self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes,
|
|
46
|
+
self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_labels, y_pred)
|
|
56
47
|
|
|
57
48
|
return self
|
|
58
49
|
|
|
@@ -73,15 +64,15 @@ class GPAC(AggregativeQuantifier):
|
|
|
73
64
|
return adjusted_prevalences
|
|
74
65
|
|
|
75
66
|
@classmethod
|
|
76
|
-
def get_cond_prob_matrix(cls, classes:list,
|
|
67
|
+
def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, y_pred:np.ndarray) -> np.ndarray:
|
|
77
68
|
"""Estimate the matrix where entry (i,j) is the estimate of P(yi|yj)"""
|
|
78
69
|
|
|
79
70
|
n_classes = len(classes)
|
|
80
71
|
cond_prob_matrix = np.eye(n_classes)
|
|
81
72
|
|
|
82
73
|
for i, class_ in enumerate(classes):
|
|
83
|
-
class_indices =
|
|
74
|
+
class_indices = y_labels == class_
|
|
84
75
|
if class_indices.any():
|
|
85
|
-
cond_prob_matrix[i] =
|
|
76
|
+
cond_prob_matrix[i] = y_pred[class_indices].mean(axis=0)
|
|
86
77
|
|
|
87
78
|
return cond_prob_matrix.T
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.base import BaseEstimator
|
|
3
|
+
|
|
4
|
+
from ._MixtureModel import MixtureModel
|
|
5
|
+
from ....utils import getHist, ternary_search
|
|
6
|
+
|
|
7
|
+
class DyS(MixtureModel):
|
|
8
|
+
"""Distribution y-Similarity framework. Is a
|
|
9
|
+
method that generalises the HDy approach by
|
|
10
|
+
considering the dissimilarity function DS as
|
|
11
|
+
a parameter of the model
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
|
|
15
|
+
assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
|
|
16
|
+
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
17
|
+
super().__init__(learner)
|
|
18
|
+
|
|
19
|
+
# Set up bins_size
|
|
20
|
+
if not bins_size:
|
|
21
|
+
bins_size = np.append(np.linspace(2,20,10), 30)
|
|
22
|
+
if isinstance(bins_size, list):
|
|
23
|
+
bins_size = np.asarray(bins_size)
|
|
24
|
+
|
|
25
|
+
self.bins_size = bins_size
|
|
26
|
+
self.measure = measure
|
|
27
|
+
self.prevs = None # Array of prevalences that minimizes the distances
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _compute_prevalence(self, test_scores:np.ndarray) -> float:
|
|
31
|
+
|
|
32
|
+
prevs = self.GetMinDistancesDyS(test_scores)
|
|
33
|
+
# Use the median of the prevalences as the final prevalence estimate
|
|
34
|
+
prevalence = np.median(prevs)
|
|
35
|
+
|
|
36
|
+
return prevalence
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def best_distance(self, X_test) -> float:
|
|
41
|
+
|
|
42
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
43
|
+
|
|
44
|
+
prevs = self.GetMinDistancesDyS(test_scores)
|
|
45
|
+
|
|
46
|
+
size = len(prevs)
|
|
47
|
+
best_prev = np.median(prevs)
|
|
48
|
+
|
|
49
|
+
if size % 2 != 0: # ODD
|
|
50
|
+
index = np.argmax(prevs == best_prev)
|
|
51
|
+
bin_size = self.bins_size[index]
|
|
52
|
+
else: # EVEN
|
|
53
|
+
# Sort the values in self.prevs
|
|
54
|
+
ordered_prevs = np.sort(prevs)
|
|
55
|
+
|
|
56
|
+
# Find the two middle indices
|
|
57
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
58
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
59
|
+
|
|
60
|
+
# Get the values corresponding to the median positions
|
|
61
|
+
median1 = ordered_prevs[middle1]
|
|
62
|
+
median2 = ordered_prevs[middle2]
|
|
63
|
+
|
|
64
|
+
# Find the indices of median1 and median2 in prevs
|
|
65
|
+
index1 = np.argmax(prevs == median1)
|
|
66
|
+
index2 = np.argmax(prevs == median2)
|
|
67
|
+
|
|
68
|
+
# Calculate the average of the corresponding bin sizes
|
|
69
|
+
bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
pos_bin_density = getHist(self.pos_scores, bin_size)
|
|
73
|
+
neg_bin_density = getHist(self.neg_scores, bin_size)
|
|
74
|
+
test_bin_density = getHist(test_scores, bin_size)
|
|
75
|
+
|
|
76
|
+
train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
|
|
77
|
+
|
|
78
|
+
distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
79
|
+
|
|
80
|
+
return distance
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def GetMinDistancesDyS(self, test_scores) -> list:
|
|
84
|
+
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
85
|
+
|
|
86
|
+
prevs = []
|
|
87
|
+
|
|
88
|
+
# Iterate over each bin size
|
|
89
|
+
for bins in self.bins_size:
|
|
90
|
+
# Compute histogram densities for positive, negative, and test scores
|
|
91
|
+
pos_bin_density = getHist(self.pos_scores, bins)
|
|
92
|
+
neg_bin_density = getHist(self.neg_scores, bins)
|
|
93
|
+
test_bin_density = getHist(test_scores, bins)
|
|
94
|
+
|
|
95
|
+
# Define the function to minimize
|
|
96
|
+
def f(x):
|
|
97
|
+
# Combine densities using a mixture of positive and negative densities
|
|
98
|
+
train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
|
|
99
|
+
# Calculate the distance between combined density and test density
|
|
100
|
+
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
101
|
+
|
|
102
|
+
# Use ternary search to find the best x that minimizes the distance
|
|
103
|
+
prevs.append(ternary_search(0, 1, f))
|
|
104
|
+
|
|
105
|
+
return prevs
|
|
106
|
+
|
|
107
|
+
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py
RENAMED
|
@@ -34,6 +34,7 @@ class DySsyn(MixtureModel):
|
|
|
34
34
|
self.m = None
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
def _fit_method(self, X, y):
|
|
38
39
|
if not self.learner_fitted:
|
|
39
40
|
self.learner.fit(X, y)
|
|
@@ -45,16 +46,41 @@ class DySsyn(MixtureModel):
|
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
|
|
49
|
+
|
|
50
|
+
distances = self.GetMinDistancesDySsyn(test_scores)
|
|
51
|
+
|
|
52
|
+
# Use the median of the prevss as the final prevalence estimate
|
|
53
|
+
index = min(distances, key=lambda d: distances[d][0])
|
|
54
|
+
prevalence = distances[index][1]
|
|
55
|
+
|
|
56
|
+
return prevalence
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def best_distance(self, X_test):
|
|
60
|
+
|
|
61
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
62
|
+
|
|
63
|
+
distances = self.GetMinDistancesDySsyn(test_scores)
|
|
64
|
+
|
|
65
|
+
index = min(distances, key=lambda d: distances[d][0])
|
|
66
|
+
|
|
67
|
+
distance = distances[index][0]
|
|
68
|
+
|
|
69
|
+
return distance
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def GetMinDistancesDySsyn(self, test_scores) -> list:
|
|
48
74
|
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
49
75
|
if self.n is None:
|
|
50
76
|
self.n = len(test_scores)
|
|
51
77
|
|
|
52
|
-
|
|
78
|
+
values = {}
|
|
53
79
|
|
|
54
80
|
# Iterate over each bin size
|
|
55
81
|
for m in self.merge_factor:
|
|
56
82
|
pos_scores, neg_scores = MoSS(self.n, self.alpha_train, m)
|
|
57
|
-
|
|
83
|
+
prevs = []
|
|
58
84
|
for bins in self.bins_size:
|
|
59
85
|
# Compute histogram densities for positive, negative, and test scores
|
|
60
86
|
pos_bin_density = getHist(pos_scores, bins)
|
|
@@ -69,21 +95,42 @@ class DySsyn(MixtureModel):
|
|
|
69
95
|
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
70
96
|
|
|
71
97
|
# Use ternary search to find the best x that minimizes the distance
|
|
72
|
-
|
|
73
|
-
|
|
98
|
+
prevs.append(ternary_search(0, 1, f))
|
|
99
|
+
|
|
100
|
+
size = len(prevs)
|
|
101
|
+
best_prev = np.median(prevs)
|
|
102
|
+
|
|
103
|
+
if size % 2 != 0: # ODD
|
|
104
|
+
index = np.argmax(prevs == best_prev)
|
|
105
|
+
bin_size = self.bins_size[index]
|
|
106
|
+
else: # EVEN
|
|
107
|
+
# Sort the values in self.prevs
|
|
108
|
+
ordered_prevs = np.sort(prevs)
|
|
109
|
+
|
|
110
|
+
# Find the two middle indices
|
|
111
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
112
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
113
|
+
|
|
114
|
+
# Get the values corresponding to the median positions
|
|
115
|
+
median1 = ordered_prevs[middle1]
|
|
116
|
+
median2 = ordered_prevs[middle2]
|
|
117
|
+
|
|
118
|
+
# Find the indices of median1 and median2 in prevs
|
|
119
|
+
index1 = np.argmax(prevs == median1)
|
|
120
|
+
index2 = np.argmax(prevs == median2)
|
|
121
|
+
|
|
122
|
+
# Calculate the average of the corresponding bin sizes
|
|
123
|
+
bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
|
|
124
|
+
|
|
74
125
|
|
|
75
|
-
|
|
126
|
+
pos_bin_density = getHist(pos_scores, bin_size)
|
|
127
|
+
neg_bin_density = getHist(neg_scores, bin_size)
|
|
128
|
+
test_bin_density = getHist(test_scores, bin_size)
|
|
76
129
|
|
|
77
|
-
|
|
78
|
-
neg_bin_density = getHist(neg_scores, bins_size)
|
|
79
|
-
test_bin_density = getHist(test_scores, bins_size)
|
|
130
|
+
train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
|
|
80
131
|
|
|
81
|
-
|
|
82
|
-
d = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
83
|
-
distances[m] = (d, prevalence)
|
|
84
|
-
# Use the median of the results as the final prevalence estimate
|
|
85
|
-
index = min(distances, key=lambda d: distances[d][0])
|
|
86
|
-
prevalence = distances[index][1]
|
|
132
|
+
distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
87
133
|
|
|
88
|
-
|
|
89
|
-
|
|
134
|
+
values[m] = (distance, best_prev)
|
|
135
|
+
|
|
136
|
+
return values
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/hdy.py
RENAMED
|
@@ -14,15 +14,54 @@ class HDy(MixtureModel):
|
|
|
14
14
|
def __init__(self, learner: BaseEstimator):
|
|
15
15
|
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
16
16
|
super().__init__(learner)
|
|
17
|
+
|
|
17
18
|
|
|
18
19
|
def _compute_prevalence(self, test_scores: np.ndarray) -> float:
|
|
20
|
+
|
|
21
|
+
best_alphas, _ = self.GetMinDistancesHDy(test_scores)
|
|
22
|
+
# Compute the median of the best alpha values as the final prevalence estimate
|
|
23
|
+
prevalence = np.median(best_alphas)
|
|
24
|
+
|
|
25
|
+
return prevalence
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def best_distance(self, X_test) -> float:
|
|
30
|
+
|
|
31
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
32
|
+
|
|
33
|
+
_, distances = self.GetMinDistancesHDy(test_scores)
|
|
34
|
+
|
|
35
|
+
size = len(distances)
|
|
36
|
+
|
|
37
|
+
if size % 2 != 0: # ODD
|
|
38
|
+
index = size // 2
|
|
39
|
+
distance = distances[index]
|
|
40
|
+
else: # EVEN
|
|
41
|
+
# Find the two middle indices
|
|
42
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
43
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
44
|
+
|
|
45
|
+
# Get the values corresponding to the median positions
|
|
46
|
+
dist1 = distances[middle1]
|
|
47
|
+
dist2 = distances[middle2]
|
|
48
|
+
|
|
49
|
+
# Calculate the average of the corresponding distances
|
|
50
|
+
distance = np.mean([dist1, dist2])
|
|
51
|
+
|
|
52
|
+
return distance
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def GetMinDistancesHDy(self, test_scores: np.ndarray) -> tuple:
|
|
56
|
+
|
|
19
57
|
# Define bin sizes and alpha values
|
|
20
|
-
|
|
58
|
+
bins_size = np.arange(10, 110, 11) # Bins from 10 to 110 with a step size of 10
|
|
21
59
|
alpha_values = np.round(np.linspace(0, 1, 101), 2) # Alpha values from 0 to 1, rounded to 2 decimal places
|
|
22
60
|
|
|
23
61
|
best_alphas = []
|
|
24
|
-
|
|
25
|
-
|
|
62
|
+
distances = []
|
|
63
|
+
|
|
64
|
+
for bins in bins_size:
|
|
26
65
|
|
|
27
66
|
pos_bin_density = getHist(self.pos_scores, bins)
|
|
28
67
|
neg_bin_density = getHist(self.neg_scores, bins)
|
|
@@ -39,8 +78,6 @@ class HDy(MixtureModel):
|
|
|
39
78
|
|
|
40
79
|
# Find the alpha value that minimizes the distance
|
|
41
80
|
best_alphas.append(alpha_values[np.argmin(distances)])
|
|
42
|
-
|
|
43
|
-
# Compute the median of the best alpha values as the final prevalence estimate
|
|
44
|
-
prevalence = np.median(best_alphas)
|
|
81
|
+
distances.append(min(distances))
|
|
45
82
|
|
|
46
|
-
return
|
|
83
|
+
return best_alphas, distances
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlquantify
|
|
3
|
-
Version: 0.0.11.
|
|
3
|
+
Version: 0.0.11.6
|
|
4
4
|
Summary: Quantification Library
|
|
5
5
|
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
6
|
Maintainer: Luiz Fernando Luth Junior
|
|
@@ -32,7 +32,7 @@ ___
|
|
|
32
32
|
|
|
33
33
|
## Latest Release
|
|
34
34
|
|
|
35
|
-
- **Version 0.0.
|
|
35
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
36
36
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
37
37
|
- Explore the [API documentation](#) for detailed developer information.
|
|
38
38
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from sklearn.base import BaseEstimator
|
|
3
|
-
|
|
4
|
-
from ._MixtureModel import MixtureModel
|
|
5
|
-
from ....utils import getHist, ternary_search
|
|
6
|
-
|
|
7
|
-
class DyS(MixtureModel):
|
|
8
|
-
"""Distribution y-Similarity framework. Is a
|
|
9
|
-
method that generalises the HDy approach by
|
|
10
|
-
considering the dissimilarity function DS as
|
|
11
|
-
a parameter of the model
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
|
|
15
|
-
assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
|
|
16
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
17
|
-
super().__init__(learner)
|
|
18
|
-
|
|
19
|
-
# Set up bins_size
|
|
20
|
-
if not bins_size:
|
|
21
|
-
bins_size = np.append(np.linspace(2,20,10), 30)
|
|
22
|
-
if isinstance(bins_size, list):
|
|
23
|
-
bins_size = np.asarray(bins_size)
|
|
24
|
-
|
|
25
|
-
self.bins_size = bins_size
|
|
26
|
-
self.measure = measure
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
|
|
30
|
-
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
31
|
-
|
|
32
|
-
result = []
|
|
33
|
-
|
|
34
|
-
# Iterate over each bin size
|
|
35
|
-
for bins in self.bins_size:
|
|
36
|
-
# Compute histogram densities for positive, negative, and test scores
|
|
37
|
-
pos_bin_density = getHist(self.pos_scores, bins)
|
|
38
|
-
neg_bin_density = getHist(self.neg_scores, bins)
|
|
39
|
-
test_bin_density = getHist(test_scores, bins)
|
|
40
|
-
|
|
41
|
-
# Define the function to minimize
|
|
42
|
-
def f(x):
|
|
43
|
-
# Combine densities using a mixture of positive and negative densities
|
|
44
|
-
train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
|
|
45
|
-
# Calculate the distance between combined density and test density
|
|
46
|
-
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
47
|
-
|
|
48
|
-
# Use ternary search to find the best x that minimizes the distance
|
|
49
|
-
result.append(ternary_search(0, 1, f))
|
|
50
|
-
|
|
51
|
-
# Use the median of the results as the final prevalence estimate
|
|
52
|
-
prevalence = np.median(result)
|
|
53
|
-
|
|
54
|
-
return prevalence
|
|
55
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/acc.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/max.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/ms.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/t50.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/ThreholdOptm/x.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/__init__.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/smm.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/methods/aggregative/mixtureModels/sord.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/get_real_prev.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/load_quantifier.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/general_purposes/round_protocol_df.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.4 → mlquantify-0.0.11.6}/mlquantify/utils/method_purposes/ternary_search.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|