mlquantify 0.0.11.5__tar.gz → 0.0.11.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/PKG-INFO +8 -2
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/README.md +7 -1
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -1
- mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/dys.py +107 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +63 -16
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/hdy.py +44 -7
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/PKG-INFO +8 -2
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/setup.py +1 -1
- mlquantify-0.0.11.5/mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/base.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/classification/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/classification/pwkclf.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/ae.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/bias.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/kld.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/mse.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nae.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nkld.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nrae.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/rae.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/se.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/_Protocol.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/app.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/npp.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/cc.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/emq.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/fm.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/gac.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/gpac.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/smm.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/sord.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/pcc.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/pwk.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/meta/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/meta/ensemble.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/non_aggregative/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/non_aggregative/hdx.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/model_selection.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/distribution_plot.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/protocol_plot.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/convert_col_to_array.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/get_real_prev.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/load_quantifier.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/make_prevs.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/normalize.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/parallel.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/round_protocol_df.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/__init__.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/distances.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/getHist.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/get_scores.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/moss.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/ternary_search.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/tprfpr.py +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/SOURCES.txt +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/dependency_links.txt +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/requires.txt +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/top_level.txt +0 -0
- {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlquantify
|
|
3
|
-
Version: 0.0.11.
|
|
3
|
+
Version: 0.0.11.7
|
|
4
4
|
Summary: Quantification Library
|
|
5
5
|
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
6
|
Maintainer: Luiz Fernando Luth Junior
|
|
@@ -32,7 +32,7 @@ ___
|
|
|
32
32
|
|
|
33
33
|
## Latest Release
|
|
34
34
|
|
|
35
|
-
- **Version 0.0.
|
|
35
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
36
36
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
37
37
|
- Explore the [API documentation](#) for detailed developer information.
|
|
38
38
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
|
|
|
47
47
|
pip install mlquantify
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
+
If you only want to update, run the code below:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install --update mlquantify
|
|
54
|
+
```
|
|
55
|
+
|
|
50
56
|
___
|
|
51
57
|
|
|
52
58
|
## Contents
|
|
@@ -9,7 +9,7 @@ ___
|
|
|
9
9
|
|
|
10
10
|
## Latest Release
|
|
11
11
|
|
|
12
|
-
- **Version 0.0.
|
|
12
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
13
13
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
14
14
|
- Explore the [API documentation](#) for detailed developer information.
|
|
15
15
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -24,6 +24,12 @@ To install mlquantify, run the following command:
|
|
|
24
24
|
pip install mlquantify
|
|
25
25
|
```
|
|
26
26
|
|
|
27
|
+
If you only want to update, run the code below:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install --update mlquantify
|
|
31
|
+
```
|
|
32
|
+
|
|
27
33
|
___
|
|
28
34
|
|
|
29
35
|
## Contents
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.base import BaseEstimator
|
|
3
|
+
|
|
4
|
+
from ._MixtureModel import MixtureModel
|
|
5
|
+
from ....utils import getHist, ternary_search
|
|
6
|
+
|
|
7
|
+
class DyS(MixtureModel):
|
|
8
|
+
"""Distribution y-Similarity framework. Is a
|
|
9
|
+
method that generalises the HDy approach by
|
|
10
|
+
considering the dissimilarity function DS as
|
|
11
|
+
a parameter of the model
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
|
|
15
|
+
assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
|
|
16
|
+
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
17
|
+
super().__init__(learner)
|
|
18
|
+
|
|
19
|
+
# Set up bins_size
|
|
20
|
+
if not bins_size:
|
|
21
|
+
bins_size = np.append(np.linspace(2,20,10), 30)
|
|
22
|
+
if isinstance(bins_size, list):
|
|
23
|
+
bins_size = np.asarray(bins_size)
|
|
24
|
+
|
|
25
|
+
self.bins_size = bins_size
|
|
26
|
+
self.measure = measure
|
|
27
|
+
self.prevs = None # Array of prevalences that minimizes the distances
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _compute_prevalence(self, test_scores:np.ndarray) -> float:
|
|
31
|
+
|
|
32
|
+
prevs = self.GetMinDistancesDyS(test_scores)
|
|
33
|
+
# Use the median of the prevalences as the final prevalence estimate
|
|
34
|
+
prevalence = np.median(prevs)
|
|
35
|
+
|
|
36
|
+
return prevalence
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def best_distance(self, X_test) -> float:
|
|
41
|
+
|
|
42
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
43
|
+
|
|
44
|
+
prevs = self.GetMinDistancesDyS(test_scores)
|
|
45
|
+
|
|
46
|
+
size = len(prevs)
|
|
47
|
+
best_prev = np.median(prevs)
|
|
48
|
+
|
|
49
|
+
if size % 2 != 0: # ODD
|
|
50
|
+
index = np.argmax(prevs == best_prev)
|
|
51
|
+
bin_size = self.bins_size[index]
|
|
52
|
+
else: # EVEN
|
|
53
|
+
# Sort the values in self.prevs
|
|
54
|
+
ordered_prevs = np.sort(prevs)
|
|
55
|
+
|
|
56
|
+
# Find the two middle indices
|
|
57
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
58
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
59
|
+
|
|
60
|
+
# Get the values corresponding to the median positions
|
|
61
|
+
median1 = ordered_prevs[middle1]
|
|
62
|
+
median2 = ordered_prevs[middle2]
|
|
63
|
+
|
|
64
|
+
# Find the indices of median1 and median2 in prevs
|
|
65
|
+
index1 = np.argmax(prevs == median1)
|
|
66
|
+
index2 = np.argmax(prevs == median2)
|
|
67
|
+
|
|
68
|
+
# Calculate the average of the corresponding bin sizes
|
|
69
|
+
bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
pos_bin_density = getHist(self.pos_scores, bin_size)
|
|
73
|
+
neg_bin_density = getHist(self.neg_scores, bin_size)
|
|
74
|
+
test_bin_density = getHist(test_scores, bin_size)
|
|
75
|
+
|
|
76
|
+
train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
|
|
77
|
+
|
|
78
|
+
distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
79
|
+
|
|
80
|
+
return distance
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def GetMinDistancesDyS(self, test_scores) -> list:
|
|
84
|
+
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
85
|
+
|
|
86
|
+
prevs = []
|
|
87
|
+
|
|
88
|
+
# Iterate over each bin size
|
|
89
|
+
for bins in self.bins_size:
|
|
90
|
+
# Compute histogram densities for positive, negative, and test scores
|
|
91
|
+
pos_bin_density = getHist(self.pos_scores, bins)
|
|
92
|
+
neg_bin_density = getHist(self.neg_scores, bins)
|
|
93
|
+
test_bin_density = getHist(test_scores, bins)
|
|
94
|
+
|
|
95
|
+
# Define the function to minimize
|
|
96
|
+
def f(x):
|
|
97
|
+
# Combine densities using a mixture of positive and negative densities
|
|
98
|
+
train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
|
|
99
|
+
# Calculate the distance between combined density and test density
|
|
100
|
+
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
101
|
+
|
|
102
|
+
# Use ternary search to find the best x that minimizes the distance
|
|
103
|
+
prevs.append(ternary_search(0, 1, f))
|
|
104
|
+
|
|
105
|
+
return prevs
|
|
106
|
+
|
|
107
|
+
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py
RENAMED
|
@@ -34,6 +34,7 @@ class DySsyn(MixtureModel):
|
|
|
34
34
|
self.m = None
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
def _fit_method(self, X, y):
|
|
38
39
|
if not self.learner_fitted:
|
|
39
40
|
self.learner.fit(X, y)
|
|
@@ -45,16 +46,41 @@ class DySsyn(MixtureModel):
|
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
|
|
49
|
+
|
|
50
|
+
distances = self.GetMinDistancesDySsyn(test_scores)
|
|
51
|
+
|
|
52
|
+
# Use the median of the prevss as the final prevalence estimate
|
|
53
|
+
index = min(distances, key=lambda d: distances[d][0])
|
|
54
|
+
prevalence = distances[index][1]
|
|
55
|
+
|
|
56
|
+
return prevalence
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def best_distance(self, X_test):
|
|
60
|
+
|
|
61
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
62
|
+
|
|
63
|
+
distances = self.GetMinDistancesDySsyn(test_scores)
|
|
64
|
+
|
|
65
|
+
index = min(distances, key=lambda d: distances[d][0])
|
|
66
|
+
|
|
67
|
+
distance = distances[index][0]
|
|
68
|
+
|
|
69
|
+
return distance
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def GetMinDistancesDySsyn(self, test_scores) -> list:
|
|
48
74
|
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
49
75
|
if self.n is None:
|
|
50
76
|
self.n = len(test_scores)
|
|
51
77
|
|
|
52
|
-
|
|
78
|
+
values = {}
|
|
53
79
|
|
|
54
80
|
# Iterate over each bin size
|
|
55
81
|
for m in self.merge_factor:
|
|
56
82
|
pos_scores, neg_scores = MoSS(self.n, self.alpha_train, m)
|
|
57
|
-
|
|
83
|
+
prevs = []
|
|
58
84
|
for bins in self.bins_size:
|
|
59
85
|
# Compute histogram densities for positive, negative, and test scores
|
|
60
86
|
pos_bin_density = getHist(pos_scores, bins)
|
|
@@ -69,21 +95,42 @@ class DySsyn(MixtureModel):
|
|
|
69
95
|
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
70
96
|
|
|
71
97
|
# Use ternary search to find the best x that minimizes the distance
|
|
72
|
-
|
|
73
|
-
|
|
98
|
+
prevs.append(ternary_search(0, 1, f))
|
|
99
|
+
|
|
100
|
+
size = len(prevs)
|
|
101
|
+
best_prev = np.median(prevs)
|
|
102
|
+
|
|
103
|
+
if size % 2 != 0: # ODD
|
|
104
|
+
index = np.argmax(prevs == best_prev)
|
|
105
|
+
bin_size = self.bins_size[index]
|
|
106
|
+
else: # EVEN
|
|
107
|
+
# Sort the values in self.prevs
|
|
108
|
+
ordered_prevs = np.sort(prevs)
|
|
109
|
+
|
|
110
|
+
# Find the two middle indices
|
|
111
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
112
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
113
|
+
|
|
114
|
+
# Get the values corresponding to the median positions
|
|
115
|
+
median1 = ordered_prevs[middle1]
|
|
116
|
+
median2 = ordered_prevs[middle2]
|
|
117
|
+
|
|
118
|
+
# Find the indices of median1 and median2 in prevs
|
|
119
|
+
index1 = np.argmax(prevs == median1)
|
|
120
|
+
index2 = np.argmax(prevs == median2)
|
|
121
|
+
|
|
122
|
+
# Calculate the average of the corresponding bin sizes
|
|
123
|
+
bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
|
|
124
|
+
|
|
74
125
|
|
|
75
|
-
|
|
126
|
+
pos_bin_density = getHist(pos_scores, bin_size)
|
|
127
|
+
neg_bin_density = getHist(neg_scores, bin_size)
|
|
128
|
+
test_bin_density = getHist(test_scores, bin_size)
|
|
76
129
|
|
|
77
|
-
|
|
78
|
-
neg_bin_density = getHist(neg_scores, bins_size)
|
|
79
|
-
test_bin_density = getHist(test_scores, bins_size)
|
|
130
|
+
train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
|
|
80
131
|
|
|
81
|
-
|
|
82
|
-
d = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
83
|
-
distances[m] = (d, prevalence)
|
|
84
|
-
# Use the median of the results as the final prevalence estimate
|
|
85
|
-
index = min(distances, key=lambda d: distances[d][0])
|
|
86
|
-
prevalence = distances[index][1]
|
|
132
|
+
distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
87
133
|
|
|
88
|
-
|
|
89
|
-
|
|
134
|
+
values[m] = (distance, best_prev)
|
|
135
|
+
|
|
136
|
+
return values
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/hdy.py
RENAMED
|
@@ -14,15 +14,54 @@ class HDy(MixtureModel):
|
|
|
14
14
|
def __init__(self, learner: BaseEstimator):
|
|
15
15
|
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
16
16
|
super().__init__(learner)
|
|
17
|
+
|
|
17
18
|
|
|
18
19
|
def _compute_prevalence(self, test_scores: np.ndarray) -> float:
|
|
20
|
+
|
|
21
|
+
best_alphas, _ = self.GetMinDistancesHDy(test_scores)
|
|
22
|
+
# Compute the median of the best alpha values as the final prevalence estimate
|
|
23
|
+
prevalence = np.median(best_alphas)
|
|
24
|
+
|
|
25
|
+
return prevalence
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def best_distance(self, X_test) -> float:
|
|
30
|
+
|
|
31
|
+
test_scores = self.learner.predict_proba(X_test)
|
|
32
|
+
|
|
33
|
+
_, distances = self.GetMinDistancesHDy(test_scores)
|
|
34
|
+
|
|
35
|
+
size = len(distances)
|
|
36
|
+
|
|
37
|
+
if size % 2 != 0: # ODD
|
|
38
|
+
index = size // 2
|
|
39
|
+
distance = distances[index]
|
|
40
|
+
else: # EVEN
|
|
41
|
+
# Find the two middle indices
|
|
42
|
+
middle1 = np.floor(size / 2).astype(int)
|
|
43
|
+
middle2 = np.ceil(size / 2).astype(int)
|
|
44
|
+
|
|
45
|
+
# Get the values corresponding to the median positions
|
|
46
|
+
dist1 = distances[middle1]
|
|
47
|
+
dist2 = distances[middle2]
|
|
48
|
+
|
|
49
|
+
# Calculate the average of the corresponding distances
|
|
50
|
+
distance = np.mean([dist1, dist2])
|
|
51
|
+
|
|
52
|
+
return distance
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def GetMinDistancesHDy(self, test_scores: np.ndarray) -> tuple:
|
|
56
|
+
|
|
19
57
|
# Define bin sizes and alpha values
|
|
20
|
-
|
|
58
|
+
bins_size = np.arange(10, 110, 11) # Bins from 10 to 110 with a step size of 10
|
|
21
59
|
alpha_values = np.round(np.linspace(0, 1, 101), 2) # Alpha values from 0 to 1, rounded to 2 decimal places
|
|
22
60
|
|
|
23
61
|
best_alphas = []
|
|
24
|
-
|
|
25
|
-
|
|
62
|
+
distances = []
|
|
63
|
+
|
|
64
|
+
for bins in bins_size:
|
|
26
65
|
|
|
27
66
|
pos_bin_density = getHist(self.pos_scores, bins)
|
|
28
67
|
neg_bin_density = getHist(self.neg_scores, bins)
|
|
@@ -39,8 +78,6 @@ class HDy(MixtureModel):
|
|
|
39
78
|
|
|
40
79
|
# Find the alpha value that minimizes the distance
|
|
41
80
|
best_alphas.append(alpha_values[np.argmin(distances)])
|
|
42
|
-
|
|
43
|
-
# Compute the median of the best alpha values as the final prevalence estimate
|
|
44
|
-
prevalence = np.median(best_alphas)
|
|
81
|
+
distances.append(min(distances))
|
|
45
82
|
|
|
46
|
-
return
|
|
83
|
+
return best_alphas, distances
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlquantify
|
|
3
|
-
Version: 0.0.11.
|
|
3
|
+
Version: 0.0.11.7
|
|
4
4
|
Summary: Quantification Library
|
|
5
5
|
Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
|
|
6
6
|
Maintainer: Luiz Fernando Luth Junior
|
|
@@ -32,7 +32,7 @@ ___
|
|
|
32
32
|
|
|
33
33
|
## Latest Release
|
|
34
34
|
|
|
35
|
-
- **Version 0.0.
|
|
35
|
+
- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
|
|
36
36
|
- In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
|
|
37
37
|
- Explore the [API documentation](#) for detailed developer information.
|
|
38
38
|
- See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
|
|
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
|
|
|
47
47
|
pip install mlquantify
|
|
48
48
|
```
|
|
49
49
|
|
|
50
|
+
If you only want to update, run the code below:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install --update mlquantify
|
|
54
|
+
```
|
|
55
|
+
|
|
50
56
|
___
|
|
51
57
|
|
|
52
58
|
## Contents
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from sklearn.base import BaseEstimator
|
|
3
|
-
|
|
4
|
-
from ._MixtureModel import MixtureModel
|
|
5
|
-
from ....utils import getHist, ternary_search
|
|
6
|
-
|
|
7
|
-
class DyS(MixtureModel):
|
|
8
|
-
"""Distribution y-Similarity framework. Is a
|
|
9
|
-
method that generalises the HDy approach by
|
|
10
|
-
considering the dissimilarity function DS as
|
|
11
|
-
a parameter of the model
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
|
|
15
|
-
assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
|
|
16
|
-
assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
|
|
17
|
-
super().__init__(learner)
|
|
18
|
-
|
|
19
|
-
# Set up bins_size
|
|
20
|
-
if not bins_size:
|
|
21
|
-
bins_size = np.append(np.linspace(2,20,10), 30)
|
|
22
|
-
if isinstance(bins_size, list):
|
|
23
|
-
bins_size = np.asarray(bins_size)
|
|
24
|
-
|
|
25
|
-
self.bins_size = bins_size
|
|
26
|
-
self.measure = measure
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
|
|
30
|
-
# Compute prevalence by evaluating the distance metric across various bin sizes
|
|
31
|
-
|
|
32
|
-
result = []
|
|
33
|
-
|
|
34
|
-
# Iterate over each bin size
|
|
35
|
-
for bins in self.bins_size:
|
|
36
|
-
# Compute histogram densities for positive, negative, and test scores
|
|
37
|
-
pos_bin_density = getHist(self.pos_scores, bins)
|
|
38
|
-
neg_bin_density = getHist(self.neg_scores, bins)
|
|
39
|
-
test_bin_density = getHist(test_scores, bins)
|
|
40
|
-
|
|
41
|
-
# Define the function to minimize
|
|
42
|
-
def f(x):
|
|
43
|
-
# Combine densities using a mixture of positive and negative densities
|
|
44
|
-
train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
|
|
45
|
-
# Calculate the distance between combined density and test density
|
|
46
|
-
return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
|
|
47
|
-
|
|
48
|
-
# Use ternary search to find the best x that minimizes the distance
|
|
49
|
-
result.append(ternary_search(0, 1, f))
|
|
50
|
-
|
|
51
|
-
# Use the median of the results as the final prevalence estimate
|
|
52
|
-
prevalence = np.median(result)
|
|
53
|
-
|
|
54
|
-
return prevalence
|
|
55
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/acc.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/max.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/t50.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/x.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/__init__.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/smm.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/sord.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/get_real_prev.py
RENAMED
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/load_quantifier.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/round_protocol_df.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/ternary_search.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|