PyPI - mlquantify - Versions diffs - 0.0.11.5__tar.gz → 0.0.11.7__tar.gz - Mend

mlquantify 0.0.11.5tar.gz → 0.0.11.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mlquantify
-Version: 0.0.11.5
+Version: 0.0.11.7
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior
@@ -32,7 +32,7 @@ ___
 ## Latest Release
-- **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
+- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
 - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
 - Explore the [API documentation](#) for detailed developer information.
 - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
 pip install mlquantify
 ```
+If you only want to update, run the code below:
+```bash
+pip install --update mlquantify
+```
 ___
 ## Contents

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/README.md RENAMED Viewed

@@ -9,7 +9,7 @@ ___
 ## Latest Release
-- **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
+- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
 - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
 - Explore the [API documentation](#) for detailed developer information.
 - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -24,6 +24,12 @@ To install mlquantify, run the following command:
 pip install mlquantify
 ```
+If you only want to update, run the code below:
+```bash
+pip install --update mlquantify
+```
 ___
 ## Contents

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/_MixtureModel.py RENAMED Viewed

@@ -16,7 +16,6 @@ class MixtureModel(AggregativeQuantifier):
         self.learner = learner
         self.pos_scores = None
         self.neg_scores = None
-        self.distance = None
     @property
     def multiclass_method(self) -> bool:

mlquantify-0.0.11.7/mlquantify/methods/aggregative/mixtureModels/dys.py ADDED Viewed

@@ -0,0 +1,107 @@
+import numpy as np
+from sklearn.base import BaseEstimator
+from ._MixtureModel import MixtureModel
+from ....utils import getHist, ternary_search
+class DyS(MixtureModel):
+    """Distribution y-Similarity framework. Is a
+    method that generalises the HDy approach by
+    considering the dissimilarity function DS as
+    a parameter of the model
+    """
+    def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
+        assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
+        assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
+        super().__init__(learner)
+        # Set up bins_size
+        if not bins_size:
+            bins_size = np.append(np.linspace(2,20,10), 30)
+        if isinstance(bins_size, list):
+            bins_size = np.asarray(bins_size)
+        self.bins_size = bins_size
+        self.measure = measure
+        self.prevs = None # Array of prevalences that minimizes the distances
+    def _compute_prevalence(self, test_scores:np.ndarray) -> float:
+        prevs = self.GetMinDistancesDyS(test_scores)
+        # Use the median of the prevalences as the final prevalence estimate
+        prevalence = np.median(prevs)
+        return prevalence
+    def best_distance(self, X_test) -> float:
+        test_scores = self.learner.predict_proba(X_test)
+        prevs = self.GetMinDistancesDyS(test_scores)
+        size = len(prevs)
+        best_prev = np.median(prevs)
+        if size % 2 != 0:  # ODD
+            index = np.argmax(prevs == best_prev)
+            bin_size = self.bins_size[index]
+        else:  # EVEN
+            # Sort the values in self.prevs
+            ordered_prevs = np.sort(prevs)
+            # Find the two middle indices
+            middle1 = np.floor(size / 2).astype(int)
+            middle2 = np.ceil(size / 2).astype(int)
+            # Get the values corresponding to the median positions
+            median1 = ordered_prevs[middle1]
+            median2 = ordered_prevs[middle2]
+            # Find the indices of median1 and median2 in prevs
+            index1 = np.argmax(prevs == median1)
+            index2 = np.argmax(prevs == median2)
+            # Calculate the average of the corresponding bin sizes
+            bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
+        pos_bin_density = getHist(self.pos_scores, bin_size)
+        neg_bin_density = getHist(self.neg_scores, bin_size)
+        test_bin_density = getHist(test_scores, bin_size)
+        train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
+        distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
+        return distance
+    def GetMinDistancesDyS(self, test_scores) -> list:
+        # Compute prevalence by evaluating the distance metric across various bin sizes
+        prevs = []
+        # Iterate over each bin size
+        for bins in self.bins_size:
+            # Compute histogram densities for positive, negative, and test scores
+            pos_bin_density = getHist(self.pos_scores, bins)
+            neg_bin_density = getHist(self.neg_scores, bins)
+            test_bin_density = getHist(test_scores, bins)
+            # Define the function to minimize
+            def f(x):
+                # Combine densities using a mixture of positive and negative densities
+                train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
+                # Calculate the distance between combined density and test density
+                return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
+            # Use ternary search to find the best x that minimizes the distance
+            prevs.append(ternary_search(0, 1, f))
+        return prevs

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/dys_syn.py RENAMED Viewed

@@ -34,6 +34,7 @@ class DySsyn(MixtureModel):
         self.m = None
     def _fit_method(self, X, y):
         if not self.learner_fitted:
             self.learner.fit(X, y)
@@ -45,16 +46,41 @@ class DySsyn(MixtureModel):
     def _compute_prevalence(self, test_scores:np.ndarray) -> float:    #creating bins from 10 to 110 with step size 10
+        distances = self.GetMinDistancesDySsyn(test_scores)
+        # Use the median of the prevss as the final prevalence estimate
+        index = min(distances, key=lambda d: distances[d][0])
+        prevalence = distances[index][1]
+        return prevalence
+    def best_distance(self, X_test):
+        test_scores = self.learner.predict_proba(X_test)
+        distances = self.GetMinDistancesDySsyn(test_scores)
+        index = min(distances, key=lambda d: distances[d][0])
+        distance = distances[index][0]
+        return distance
+    def GetMinDistancesDySsyn(self, test_scores) -> list:
         # Compute prevalence by evaluating the distance metric across various bin sizes
         if self.n is None:
             self.n = len(test_scores)
-        distances = {}
+        values = {}
         # Iterate over each bin size
         for m in self.merge_factor:
             pos_scores, neg_scores = MoSS(self.n, self.alpha_train, m)
-            result  = []
+            prevs  = []
             for bins in self.bins_size:
                 # Compute histogram densities for positive, negative, and test scores
                 pos_bin_density = getHist(pos_scores, bins)
@@ -69,21 +95,42 @@ class DySsyn(MixtureModel):
                     return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
                 # Use ternary search to find the best x that minimizes the distance
-                result.append(ternary_search(0, 1, f))
-            prevalence = np.median(result)
+                prevs.append(ternary_search(0, 1, f))
+            size = len(prevs)
+            best_prev = np.median(prevs)
+            if size % 2 != 0:  # ODD
+                index = np.argmax(prevs == best_prev)
+                bin_size = self.bins_size[index]
+            else:  # EVEN
+                # Sort the values in self.prevs
+                ordered_prevs = np.sort(prevs)
+                # Find the two middle indices
+                middle1 = np.floor(size / 2).astype(int)
+                middle2 = np.ceil(size / 2).astype(int)
+                # Get the values corresponding to the median positions
+                median1 = ordered_prevs[middle1]
+                median2 = ordered_prevs[middle2]
+                # Find the indices of median1 and median2 in prevs
+                index1 = np.argmax(prevs == median1)
+                index2 = np.argmax(prevs == median2)
+                # Calculate the average of the corresponding bin sizes
+                bin_size = np.mean([self.bins_size[index1], self.bins_size[index2]])
-            bins_size = self.bins_size[result == prevalence][0]
+            pos_bin_density = getHist(pos_scores, bin_size)
+            neg_bin_density = getHist(neg_scores, bin_size)
+            test_bin_density = getHist(test_scores, bin_size)
-            pos_bin_density = getHist(pos_scores, bins_size)
-            neg_bin_density = getHist(neg_scores, bins_size)
-            test_bin_density = getHist(test_scores, bins_size)
+            train_combined_density = (pos_bin_density * best_prev) + (neg_bin_density * (1 - best_prev))
-            train_combined_density = (pos_bin_density * prevalence) + (neg_bin_density * (1 - prevalence))
-            d = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
-            distances[m] = (d, prevalence)
-        # Use the median of the results as the final prevalence estimate
-        index = min(distances, key=lambda d: distances[d][0])
-        prevalence = distances[index][1]
+            distance = self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
-        return prevalence
+            values[m] = (distance, best_prev)
+        return values

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify/methods/aggregative/mixtureModels/hdy.py RENAMED Viewed

@@ -14,15 +14,54 @@ class HDy(MixtureModel):
     def __init__(self, learner: BaseEstimator):
         assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
         super().__init__(learner)
     def _compute_prevalence(self, test_scores: np.ndarray) -> float:
+        best_alphas, _ = self.GetMinDistancesHDy(test_scores)
+        # Compute the median of the best alpha values as the final prevalence estimate
+        prevalence = np.median(best_alphas)
+        return prevalence
+    def best_distance(self, X_test) -> float:
+        test_scores = self.learner.predict_proba(X_test)
+        _, distances = self.GetMinDistancesHDy(test_scores)
+        size = len(distances)
+        if size % 2 != 0:  # ODD
+            index = size // 2
+            distance = distances[index]
+        else:  # EVEN
+            # Find the two middle indices
+            middle1 = np.floor(size / 2).astype(int)
+            middle2 = np.ceil(size / 2).astype(int)
+            # Get the values corresponding to the median positions
+            dist1 = distances[middle1]
+            dist2 = distances[middle2]
+            # Calculate the average of the corresponding distances
+            distance = np.mean([dist1, dist2])
+        return distance
+    def GetMinDistancesHDy(self, test_scores: np.ndarray) -> tuple:
         # Define bin sizes and alpha values
-        bin_size = np.arange(10, 110, 11)  # Bins from 10 to 110 with a step size of 10
+        bins_size = np.arange(10, 110, 11)  # Bins from 10 to 110 with a step size of 10
         alpha_values = np.round(np.linspace(0, 1, 101), 2)  # Alpha values from 0 to 1, rounded to 2 decimal places
         best_alphas = []
-        for bins in bin_size:
+        distances = []
+        for bins in bins_size:
             pos_bin_density = getHist(self.pos_scores, bins)
             neg_bin_density = getHist(self.neg_scores, bins)
@@ -39,8 +78,6 @@ class HDy(MixtureModel):
             # Find the alpha value that minimizes the distance
             best_alphas.append(alpha_values[np.argmin(distances)])
-        # Compute the median of the best alpha values as the final prevalence estimate
-        prevalence = np.median(best_alphas)
+            distances.append(min(distances))
-        return prevalence
+        return best_alphas, distances

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/mlquantify.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mlquantify
-Version: 0.0.11.5
+Version: 0.0.11.7
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior
@@ -32,7 +32,7 @@ ___
 ## Latest Release
-- **Version 0.0.1**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
+- **Version 0.0.11.6**: Inicial beta version. For a detailed list of changes, check the [changelog](#).
 - In case you need any help, refer to the [wiki](https://github.com/luizfernandolj/mlquantify/wiki).
 - Explore the [API documentation](#) for detailed developer information.
 - See also the library in the pypi site in [pypi mlquantify](https://pypi.org/project/mlquantify/)
@@ -47,6 +47,12 @@ To install mlquantify, run the following command:
 pip install mlquantify
 ```
+If you only want to update, run the code below:
+```bash
+pip install --update mlquantify
+```
 ___
 ## Contents

{mlquantify-0.0.11.5 → mlquantify-0.0.11.7}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ here = pathlib.Path(__file__).parent.resolve()
 long_description = (here / 'README.md').read_text(encoding='utf-8')
-VERSION = '0.0.11.5'
+VERSION = '0.0.11.7'
 DESCRIPTION = 'Quantification Library'
 # Setting up

mlquantify-0.0.11.5/mlquantify/methods/aggregative/mixtureModels/dys.py DELETED Viewed

@@ -1,55 +0,0 @@
-import numpy as np
-from sklearn.base import BaseEstimator
-from ._MixtureModel import MixtureModel
-from ....utils import getHist, ternary_search
-class DyS(MixtureModel):
-    """Distribution y-Similarity framework. Is a
-    method that generalises the HDy approach by
-    considering the dissimilarity function DS as
-    a parameter of the model
-    """
-    def __init__(self, learner:BaseEstimator, measure:str="topsoe", bins_size:np.ndarray=None):
-        assert measure in ["hellinger", "topsoe", "probsymm"], "measure not valid"
-        assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
-        super().__init__(learner)
-        # Set up bins_size
-        if not bins_size:
-            bins_size = np.append(np.linspace(2,20,10), 30)
-        if isinstance(bins_size, list):
-            bins_size = np.asarray(bins_size)
-        self.bins_size = bins_size
-        self.measure = measure
-    def _compute_prevalence(self, test_scores:np.ndarray) -> float:    #creating bins from 10 to 110 with step size 10
-        # Compute prevalence by evaluating the distance metric across various bin sizes
-        result = []
-        # Iterate over each bin size
-        for bins in self.bins_size:
-            # Compute histogram densities for positive, negative, and test scores
-            pos_bin_density = getHist(self.pos_scores, bins)
-            neg_bin_density = getHist(self.neg_scores, bins)
-            test_bin_density = getHist(test_scores, bins)
-            # Define the function to minimize
-            def f(x):
-                # Combine densities using a mixture of positive and negative densities
-                train_combined_density = (pos_bin_density * x) + (neg_bin_density * (1 - x))
-                # Calculate the distance between combined density and test density
-                return self.get_distance(train_combined_density, test_bin_density, measure=self.measure)
-            # Use ternary search to find the best x that minimizes the distance
-            result.append(ternary_search(0, 1, f))
-        # Use the median of the results as the final prevalence estimate
-        prevalence = np.median(result)
-        return prevalence