mlquantify 0.0.11.5__tar.gz → 0.0.11.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/PKG-INFO +8 -2
  2. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/README.md +7 -1
  3. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py +0 -1
  4. mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/dys.py +107 -0
  5. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py +63 -16
  6. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/hdy.py +44 -7
  7. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/PKG-INFO +8 -2
  8. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/setup.py +1 -1
  9. mlquantify-0.0.11.5/mlquantify/methods/aggregative/mixtureModels/dys.py +0 -55
  10. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/__init__.py +0 -0
  11. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/base.py +0 -0
  12. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/classification/__init__.py +0 -0
  13. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/classification/pwkclf.py +0 -0
  14. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/__init__.py +0 -0
  15. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/__init__.py +0 -0
  16. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/ae.py +0 -0
  17. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/bias.py +0 -0
  18. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/kld.py +0 -0
  19. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/mse.py +0 -0
  20. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nae.py +0 -0
  21. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nkld.py +0 -0
  22. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/nrae.py +0 -0
  23. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/rae.py +0 -0
  24. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/measures/se.py +0 -0
  25. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/_Protocol.py +0 -0
  26. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/__init__.py +0 -0
  27. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/app.py +0 -0
  28. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/evaluation/protocol/npp.py +0 -0
  29. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/__init__.py +0 -0
  30. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/_ThreholdOptimization.py +0 -0
  31. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/__init__.py +0 -0
  32. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/acc.py +0 -0
  33. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/max.py +0 -0
  34. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms.py +0 -0
  35. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/ms2.py +0 -0
  36. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/pacc.py +0 -0
  37. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/t50.py +0 -0
  38. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/ThreholdOptm/x.py +0 -0
  39. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/__init__.py +0 -0
  40. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/cc.py +0 -0
  41. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/emq.py +0 -0
  42. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/fm.py +0 -0
  43. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/gac.py +0 -0
  44. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/gpac.py +0 -0
  45. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/__init__.py +0 -0
  46. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/smm.py +0 -0
  47. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/sord.py +0 -0
  48. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/pcc.py +0 -0
  49. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/pwk.py +0 -0
  50. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/meta/__init__.py +0 -0
  51. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/meta/ensemble.py +0 -0
  52. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/non_aggregative/__init__.py +0 -0
  53. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/non_aggregative/hdx.py +0 -0
  54. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/model_selection.py +0 -0
  55. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/__init__.py +0 -0
  56. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/distribution_plot.py +0 -0
  57. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/plots/protocol_plot.py +0 -0
  58. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/__init__.py +0 -0
  59. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/__init__.py +0 -0
  60. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/convert_col_to_array.py +0 -0
  61. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/generate_artificial_indexes.py +0 -0
  62. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/get_real_prev.py +0 -0
  63. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/load_quantifier.py +0 -0
  64. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/make_prevs.py +0 -0
  65. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/normalize.py +0 -0
  66. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/parallel.py +0 -0
  67. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/general_purposes/round_protocol_df.py +0 -0
  68. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/__init__.py +0 -0
  69. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/distances.py +0 -0
  70. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/getHist.py +0 -0
  71. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/get_scores.py +0 -0
  72. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/moss.py +0 -0
  73. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/ternary_search.py +0 -0
  74. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/utils/method_purposes/tprfpr.py +0 -0
  75. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/SOURCES.txt +0 -0
  76. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/dependency_links.txt +0 -0
  77. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/requires.txt +0 -0
  78. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/top_level.txt +0 -0
  79. {mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.5
3
+ Version: 0.0.11.7
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -32,7 +32,7 @@ ___
32
32
 
33
33
  ## Latest Release
34
34
 
35
- - **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
35
+ - **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
36
36
  - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
37
37
  - Explore the [API documentation](#) for detailed developer information.
38
38
  - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
47
47
  pip install mlquantify
48
48
  ```
49
49
 
50
+ If you only want to update, run the code below:
51
+
52
+ ```bash
53
+ pip install --update mlquantify
54
+ ```
55
+
50
56
  ___
51
57
 
52
58
  ## Contents
@@ -9,7 +9,7 @@ ___
9
9
 
10
10
  ## Latest Release
11
11
 
12
- - **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
12
+ - **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
13
13
  - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
14
14
  - Explore the [API documentation](#) for detailed developer information.
15
15
  - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -24,6 +24,12 @@ To install mlquantify, run the following command:
24
24
  pip install mlquantify
25
25
  ```
26
26
 
27
+ If you only want to update, run the code below:
28
+
29
+ ```bash
30
+ pip install --update mlquantify
31
+ ```
32
+
27
33
  ___
28
34
 
29
35
  ## Contents
@@ -16,7 +16,6 @@ class MixtureModel(AggregativeQuantifier):
16
16
  self.learner = learner
17
17
  self.pos_scores = None
18
18
  self.neg_scores = None
19
- self.distance = None
20
19
 
21
20
  @property
22
21
  def multiclass_method(self) -> bool:
@@ -0,0 +1,107 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator
3
+
4
+ from ._MixtureModel import MixtureModel
5
+ from ....utils import getHist, ternary_search
6
+
7
+ class DyS(MixtureModel):
8
+ """Distribution y-Similarity framework. Is a
9
+ method that generalises the HDy approach by
10
+ considering the dissimilarity function DS as
11
+ a parameter of the model
12
+ """
13
+
14
+ def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
15
+ assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
16
+ assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
17
+ super().__init__(learner)
18
+
19
+ # Set up bins_size
20
+ if not bins_size:
21
+ bins_size = np.append(np.linspace(2,20,10), 30)
22
+ if isinstance(bins_size, list):
23
+ bins_size = np.asarray(bins_size)
24
+
25
+ self.bins_size = bins_size
26
+ self.measure = measure
27
+ self.prevs = None # Array of prevalences that minimizes the distances
28
+
29
+
30
+ def _compute_prevalence(self, test_scores:np.ndarray) -> float:
31
+
32
+ prevs = self.GetMinDistancesDyS(test_scores)
33
+ # Use the median of the prevalences as the final prevalence estimate
34
+ prevalence = np.median(prevs)
35
+
36
+ return prevalence
37
+
38
+
39
+
40
+ def best_distance(self, X_test) -> float:
41
+
42
+ test_scores = self.learner.predict_proba(X_test)
43
+
44
+ prevs = self.GetMinDistancesDyS(test_scores)
45
+
46
+ size = len(prevs)
47
+ best_prev = np.median(prevs)
48
+
49
+ if size % 2 != 0: # ODD
50
+ index = np.argmax(prevs == best_prev)
51
+ bin_size = self.bins_size[index]
52
+ else: # EVEN
53
+ # Sort the values in self.prevs
54
+ ordered_prevs = np.sort(prevs)
55
+
56
+ # Find the two middle indices
57
+ middle1 = np.floor(size / 2).astype(int)
58
+ middle2 = np.ceil(size / 2).astype(int)
59
+
60
+ # Get the values corresponding to the median positions
61
+ median1 = ordered_prevs[middle1]
62
+ median2 = ordered_prevs[middle2]
63
+
64
+ # Find the indices of median1 and median2 in prevs
65
+ index1 = np.argmax(prevs == median1)
66
+ index2 = np.argmax(prevs == median2)
67
+
68
+ # Calculate the average of the corresponding bin sizes
69
+ bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
70
+
71
+
72
+ pos_bin_density = getHist(self.pos_scores, bin_size)
73
+ neg_bin_density = getHist(self.neg_scores, bin_size)
74
+ test_bin_density = getHist(test_scores, bin_size)
75
+
76
+ train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
77
+
78
+ distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
79
+
80
+ return distance
81
+
82
+
83
+ def GetMinDistancesDyS(self, test_scores) -> list:
84
+ # Compute prevalence by evaluating the distance metric across various bin sizes
85
+
86
+ prevs = []
87
+
88
+ # Iterate over each bin size
89
+ for bins in self.bins_size:
90
+ # Compute histogram densities for positive, negative, and test scores
91
+ pos_bin_density = getHist(self.pos_scores, bins)
92
+ neg_bin_density = getHist(self.neg_scores, bins)
93
+ test_bin_density = getHist(test_scores, bins)
94
+
95
+ # Define the function to minimize
96
+ def f(x):
97
+ # Combine densities using a mixture of positive and negative densities
98
+ train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
99
+ # Calculate the distance between combined density and test density
100
+ return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
101
+
102
+ # Use ternary search to find the best x that minimizes the distance
103
+ prevs.append(ternary_search(0, 1, f))
104
+
105
+ return prevs
106
+
107
+
@@ -34,6 +34,7 @@ class DySsyn(MixtureModel):
34
34
  self.m = None
35
35
 
36
36
 
37
+
37
38
  def _fit_method(self, X, y):
38
39
  if not self.learner_fitted:
39
40
  self.learner.fit(X, y)
@@ -45,16 +46,41 @@ class DySsyn(MixtureModel):
45
46
 
46
47
 
47
48
  def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
49
+
50
+ distances = self.GetMinDistancesDySsyn(test_scores)
51
+
52
+ # Use the median of the prevss as the final prevalence estimate
53
+ index = min(distances, key=lambda d: distances[d][0])
54
+ prevalence = distances[index][1]
55
+
56
+ return prevalence
57
+
58
+
59
+ def best_distance(self, X_test):
60
+
61
+ test_scores = self.learner.predict_proba(X_test)
62
+
63
+ distances = self.GetMinDistancesDySsyn(test_scores)
64
+
65
+ index = min(distances, key=lambda d: distances[d][0])
66
+
67
+ distance = distances[index][0]
68
+
69
+ return distance
70
+
71
+
72
+
73
+ def GetMinDistancesDySsyn(self, test_scores) -> list:
48
74
  # Compute prevalence by evaluating the distance metric across various bin sizes
49
75
  if self.n is None:
50
76
  self.n = len(test_scores)
51
77
 
52
- distances = {}
78
+ values = {}
53
79
 
54
80
  # Iterate over each bin size
55
81
  for m in self.merge_factor:
56
82
  pos_scores, neg_scores = MoSS(self.n, self.alpha_train, m)
57
- result = []
83
+ prevs = []
58
84
  for bins in self.bins_size:
59
85
  # Compute histogram densities for positive, negative, and test scores
60
86
  pos_bin_density = getHist(pos_scores, bins)
@@ -69,21 +95,42 @@ class DySsyn(MixtureModel):
69
95
  return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
70
96
 
71
97
  # Use ternary search to find the best x that minimizes the distance
72
- result.append(ternary_search(0, 1, f))
73
- prevalence = np.median(result)
98
+ prevs.append(ternary_search(0, 1, f))
99
+
100
+ size = len(prevs)
101
+ best_prev = np.median(prevs)
102
+
103
+ if size % 2 != 0: # ODD
104
+ index = np.argmax(prevs == best_prev)
105
+ bin_size = self.bins_size[index]
106
+ else: # EVEN
107
+ # Sort the values in self.prevs
108
+ ordered_prevs = np.sort(prevs)
109
+
110
+ # Find the two middle indices
111
+ middle1 = np.floor(size / 2).astype(int)
112
+ middle2 = np.ceil(size / 2).astype(int)
113
+
114
+ # Get the values corresponding to the median positions
115
+ median1 = ordered_prevs[middle1]
116
+ median2 = ordered_prevs[middle2]
117
+
118
+ # Find the indices of median1 and median2 in prevs
119
+ index1 = np.argmax(prevs == median1)
120
+ index2 = np.argmax(prevs == median2)
121
+
122
+ # Calculate the average of the corresponding bin sizes
123
+ bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
124
+
74
125
 
75
- bins_size = self.bins_size[result == prevalence][0]
126
+ pos_bin_density = getHist(pos_scores, bin_size)
127
+ neg_bin_density = getHist(neg_scores, bin_size)
128
+ test_bin_density = getHist(test_scores, bin_size)
76
129
 
77
- pos_bin_density = getHist(pos_scores, bins_size)
78
- neg_bin_density = getHist(neg_scores, bins_size)
79
- test_bin_density = getHist(test_scores, bins_size)
130
+ train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
80
131
 
81
- train_combined_density = (pos_bin_density * prevalence) + (neg_bin_density * (1 - prevalence))
82
- d = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
83
- distances[m] = (d, prevalence)
84
- # Use the median of the results as the final prevalence estimate
85
- index = min(distances, key=lambda d: distances[d][0])
86
- prevalence = distances[index][1]
132
+ distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
87
133
 
88
- return prevalence
89
-
134
+ values[m] = (distance, best_prev)
135
+
136
+ return values
@@ -14,15 +14,54 @@ class HDy(MixtureModel):
14
14
  def __init__(self, learner: BaseEstimator):
15
15
  assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
16
16
  super().__init__(learner)
17
+
17
18
 
18
19
  def _compute_prevalence(self, test_scores: np.ndarray) -> float:
20
+
21
+ best_alphas, _ = self.GetMinDistancesHDy(test_scores)
22
+ # Compute the median of the best alpha values as the final prevalence estimate
23
+ prevalence = np.median(best_alphas)
24
+
25
+ return prevalence
26
+
27
+
28
+
29
+ def best_distance(self, X_test) -> float:
30
+
31
+ test_scores = self.learner.predict_proba(X_test)
32
+
33
+ _, distances = self.GetMinDistancesHDy(test_scores)
34
+
35
+ size = len(distances)
36
+
37
+ if size % 2 != 0: # ODD
38
+ index = size // 2
39
+ distance = distances[index]
40
+ else: # EVEN
41
+ # Find the two middle indices
42
+ middle1 = np.floor(size / 2).astype(int)
43
+ middle2 = np.ceil(size / 2).astype(int)
44
+
45
+ # Get the values corresponding to the median positions
46
+ dist1 = distances[middle1]
47
+ dist2 = distances[middle2]
48
+
49
+ # Calculate the average of the corresponding distances
50
+ distance = np.mean([dist1, dist2])
51
+
52
+ return distance
53
+
54
+
55
+ def GetMinDistancesHDy(self, test_scores: np.ndarray) -> tuple:
56
+
19
57
  # Define bin sizes and alpha values
20
- bin_size = np.arange(10, 110, 11) # Bins from 10 to 110 with a step size of 10
58
+ bins_size = np.arange(10, 110, 11) # Bins from 10 to 110 with a step size of 10
21
59
  alpha_values = np.round(np.linspace(0, 1, 101), 2) # Alpha values from 0 to 1, rounded to 2 decimal places
22
60
 
23
61
  best_alphas = []
24
-
25
- for bins in bin_size:
62
+ distances = []
63
+
64
+ for bins in bins_size:
26
65
 
27
66
  pos_bin_density = getHist(self.pos_scores, bins)
28
67
  neg_bin_density = getHist(self.neg_scores, bins)
@@ -39,8 +78,6 @@ class HDy(MixtureModel):
39
78
 
40
79
  # Find the alpha value that minimizes the distance
41
80
  best_alphas.append(alpha_values[np.argmin(distances)])
42
-
43
- # Compute the median of the best alpha values as the final prevalence estimate
44
- prevalence = np.median(best_alphas)
81
+ distances.append(min(distances))
45
82
 
46
- return prevalence
83
+ return best_alphas, distances
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mlquantify
3
- Version: 0.0.11.5
3
+ Version: 0.0.11.7
4
4
  Summary: Quantification Library
5
5
  Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
6
6
  Maintainer: Luiz Fernando Luth Junior
@@ -32,7 +32,7 @@ ___
32
32
 
33
33
  ## Latest Release
34
34
 
35
- - **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
35
+ - **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
36
36
  - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
37
37
  - Explore the [API documentation](#) for detailed developer information.
38
38
  - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
47
47
  pip install mlquantify
48
48
  ```
49
49
 
50
+ If you only want to update, run the code below:
51
+
52
+ ```bash
53
+ pip install --update mlquantify
54
+ ```
55
+
50
56
  ___
51
57
 
52
58
  ## Contents
@@ -6,7 +6,7 @@ here = pathlib.Path(__file__).parent.resolve()
6
6
 
7
7
  long_description = (here / 'README.md').read_text(encoding='utf-8')
8
8
 
9
- VERSION = '0.0.11.5'
9
+ VERSION = '0.0.11.7'
10
10
  DESCRIPTION = 'Quantification Library'
11
11
 
12
12
  # Setting up
@@ -1,55 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator
3
-
4
- from ._MixtureModel import MixtureModel
5
- from ....utils import getHist, ternary_search
6
-
7
- class DyS(MixtureModel):
8
- """Distribution y-Similarity framework. Is a
9
- method that generalises the HDy approach by
10
- considering the dissimilarity function DS as
11
- a parameter of the model
12
- """
13
-
14
- def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
15
- assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
16
- assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
17
- super().__init__(learner)
18
-
19
- # Set up bins_size
20
- if not bins_size:
21
- bins_size = np.append(np.linspace(2,20,10), 30)
22
- if isinstance(bins_size, list):
23
- bins_size = np.asarray(bins_size)
24
-
25
- self.bins_size = bins_size
26
- self.measure = measure
27
-
28
-
29
- def _compute_prevalence(self, test_scores:np.ndarray) -> float: #creating bins from 10 to 110 with step size 10
30
- # Compute prevalence by evaluating the distance metric across various bin sizes
31
-
32
- result = []
33
-
34
- # Iterate over each bin size
35
- for bins in self.bins_size:
36
- # Compute histogram densities for positive, negative, and test scores
37
- pos_bin_density = getHist(self.pos_scores, bins)
38
- neg_bin_density = getHist(self.neg_scores, bins)
39
- test_bin_density = getHist(test_scores, bins)
40
-
41
- # Define the function to minimize
42
- def f(x):
43
- # Combine densities using a mixture of positive and negative densities
44
- train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
45
- # Calculate the distance between combined density and test density
46
- return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
47
-
48
- # Use ternary search to find the best x that minimizes the distance
49
- result.append(ternary_search(0, 1, f))
50
-
51
- # Use the median of the results as the final prevalence estimate
52
- prevalence = np.median(result)
53
-
54
- return prevalence
55
-
File without changes