PyPI - mlquantify - Versions diffs - 0.0.11.3__tar.gz → 0.0.11.5__tar.gz - Mend

mlquantify 0.0.11.3tar.gz → 0.0.11.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mlquantify
-Version: 0.0.11.3
+Version: 0.0.11.5
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior
@@ -12,6 +12,14 @@ Classifier: Operating System :: Unix
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Description-Content-Type: text/markdown
+Requires-Dist: scikit-learn
+Requires-Dist: numpy
+Requires-Dist: scipy
+Requires-Dist: joblib
+Requires-Dist: tqdm
+Requires-Dist: pandas
+Requires-Dist: xlrd
+Requires-Dist: matplotlib
 <h1 align="center">MLQuantify</h1>
 <h4 align="center">A Python Package for Quantification</h4>
@@ -45,7 +53,7 @@ ___
 | Section | Description |
 |---|---|
-| **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
+| **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
 | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
 | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
 | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/README.md RENAMED Viewed

@@ -30,7 +30,7 @@ ___
 | Section | Description |
 |---|---|
-| **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
+| **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
 | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
 | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
 | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/evaluation/measures/bias.py RENAMED Viewed

@@ -11,6 +11,6 @@ def bias(prev_real:np.any, prev_pred:np.any):
     abs_errors = abs(prev_pred - prev_real)
     if classes:
-        return {class_:abs_error for class_, abs_error in zip(classes, abs_errors)}
+        return {class_:float(abs_error) for class_, abs_error in zip(classes, abs_errors)}
     return abs_errors

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/gac.py RENAMED Viewed

@@ -2,7 +2,7 @@ import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator
 from sklearn.metrics import confusion_matrix
-from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import train_test_split
 from ...base import AggregativeQuantifier
@@ -13,10 +13,12 @@ class GAC(AggregativeQuantifier):
     and solve it via constrained least-squares regression.
     """
-    def __init__(self, learner: BaseEstimator):
+    def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
         assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
         self.learner = learner
         self.cond_prob_matrix = None
+        self.train_size = train_size
+        self.random_state = random_state
     def _fit_method(self, X, y):
         # Ensure X and y are DataFrames
@@ -29,26 +31,17 @@ class GAC(AggregativeQuantifier):
             y_pred = self.learner.predict(X)
             y_label = y
         else:
-            # Cross-validation for generating predictions
-            skf = StratifiedKFold(n_splits=self.cv_folds)
-            y_pred = []
-            y_label = []
+            X_train, X_val, y_train, y_val = train_test_split(
+                X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
+            )
-            for train_index, valid_index in skf.split(X, y):
-                train_data = pd.DataFrame(X.iloc[train_index])
-                train_label = y.iloc[train_index]
-                valid_data = pd.DataFrame(X.iloc[valid_index])
-                valid_label = y.iloc[valid_index]
-                self.learner.fit(train_data, train_label)
-                y_pred.extend(self.learner.predict(valid_data))
-                y_label.extend(valid_label)
+            self.learner.fit(X_train, y_train)
+            y_label = y_val
+            y_pred = self.learner.predict(X_val)
         # Compute conditional probability matrix
-        self.cond_prob_matrix = self.get_cond_prob_matrix(self.classes, y, y_pred)
+        self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_label, y_pred)
         return self
@@ -66,11 +59,11 @@ class GAC(AggregativeQuantifier):
         return adjusted_prevalences
     @classmethod
-    def get_cond_prob_matrix(cls, classes:list, true_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
+    def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
         """ Estimate the conditional probability matrix P(yi|yj)"""
-        CM = confusion_matrix(true_labels, predictions, labels=classes).T
-        CM = CM.astype(np.float32)
+        CM = confusion_matrix(y_labels, predictions, labels=classes).T
+        CM = CM.astype(float)
         class_counts = CM.sum(axis=0)
         for i, _ in enumerate(classes):
             if class_counts[i] == 0:
@@ -91,6 +84,6 @@ class GAC(AggregativeQuantifier):
             adjusted_prevalences = np.linalg.solve(A, B)
             adjusted_prevalences = np.clip(adjusted_prevalences, 0, 1)
             adjusted_prevalences /= adjusted_prevalences.sum()
-        except (np.linalg.LinAlgError, ValueError):
+        except (np.linalg.LinAlgError):
             adjusted_prevalences = predicted_prevalences  # No way to adjust them
         return adjusted_prevalences

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify/methods/aggregative/gpac.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 from sklearn.base import BaseEstimator
-from sklearn.model_selection import StratifiedKFold
+from sklearn.model_selection import train_test_split
 from .gac import GAC
 from ...base import AggregativeQuantifier
@@ -14,10 +14,12 @@ class GPAC(AggregativeQuantifier):
     """
-    def __init__(self, learner: BaseEstimator):
+    def __init__(self, learner: BaseEstimator, train_size:float=0.6, random_state:int=None):
         assert isinstance(learner, BaseEstimator), "learner object is not an estimator"
         self.learner = learner
         self.cond_prob_matrix = None
+        self.train_size = train_size
+        self.random_state = random_state
     def _fit_method(self, X, y):
         # Convert X and y to DataFrames if they are numpy arrays
@@ -28,31 +30,20 @@ class GPAC(AggregativeQuantifier):
         if self.learner_fitted:
             # Use existing model to predict
-            predictions = self.learner.predict(X)
-            true_labels = y
+            y_pred = self.learner.predict(X)
+            y_labels = y
         else:
-            # Perform cross-validation to generate predictions
-            skf = StratifiedKFold(n_splits=self.cv_folds)
-            predictions = []
-            true_labels = []
+            X_train, X_val, y_train, y_val = train_test_split(
+                X, y, train_size=self.train_size, stratify=y, random_state=self.random_state
+            )
-            for train_index, valid_index in skf.split(X, y):
-                # Split data into training and validation sets
-                train_data = pd.DataFrame(X.iloc[train_index])
-                train_labels = y.iloc[train_index]
-                valid_data = pd.DataFrame(X.iloc[valid_index])
-                valid_labels = y.iloc[valid_index]
-                # Train the learner
-                self.learner.fit(train_data, train_labels)
-                # Predict and collect results
-                predictions.extend(self.learner.predict(valid_data))
-                true_labels.extend(valid_labels)
+            self.learner.fit(X_train, y_train)
+            y_labels = y_val
+            y_pred = self.learner.predict(X_val)
         # Compute conditional probability matrix using GAC
-        self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, true_labels, predictions)
+        self.cond_prob_matrix = GAC.get_cond_prob_matrix(self.classes, y_labels, y_pred)
         return self
@@ -73,15 +64,15 @@ class GPAC(AggregativeQuantifier):
         return adjusted_prevalences
     @classmethod
-    def get_cond_prob_matrix(cls, classes:list, true_labels:np.ndarray, predictions:np.ndarray) -> np.ndarray:
+    def get_cond_prob_matrix(cls, classes:list, y_labels:np.ndarray, y_pred:np.ndarray) -> np.ndarray:
         """Estimate the matrix where entry (i,j) is the estimate of P(yi|yj)"""
         n_classes = len(classes)
         cond_prob_matrix = np.eye(n_classes)
         for i, class_ in enumerate(classes):
-            class_indices = true_labels == class_
+            class_indices = y_labels == class_
             if class_indices.any():
-                cond_prob_matrix[i] = predictions[class_indices].mean(axis=0)
+                cond_prob_matrix[i] = y_pred[class_indices].mean(axis=0)
         return cond_prob_matrix.T

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/mlquantify.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mlquantify
-Version: 0.0.11.3
+Version: 0.0.11.5
 Summary: Quantification Library
 Home-page: https://github.com/luizfernandolj/QuantifyML/tree/master
 Maintainer: Luiz Fernando Luth Junior
@@ -12,6 +12,14 @@ Classifier: Operating System :: Unix
 Classifier: Operating System :: MacOS :: MacOS X
 Classifier: Operating System :: Microsoft :: Windows
 Description-Content-Type: text/markdown
+Requires-Dist: scikit-learn
+Requires-Dist: numpy
+Requires-Dist: scipy
+Requires-Dist: joblib
+Requires-Dist: tqdm
+Requires-Dist: pandas
+Requires-Dist: xlrd
+Requires-Dist: matplotlib
 <h1 align="center">MLQuantify</h1>
 <h4 align="center">A Python Package for Quantification</h4>
@@ -45,7 +53,7 @@ ___
 | Section | Description |
 |---|---|
-| **Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
+| **21 Quantification Methods** | Methods for quantification, such as classify & Count Correct methods, Threshold Optimization, Mixture Models and more.|
 | **Dynamic class management** | All methods are dynamic, and handles multiclass and binary problems, in case of binary it makes One-Vs-All (OVA) automatically. |
 | **Model Selection** | Criteria and processes used to select the best model, such as grid-search for the case of quantification|
 | **Evaluation Metrics** | Specific metrics used to evaluate quantification performance, (e.g., AE, BIAS, NAE, SE, KLD, etc.). |

{mlquantify-0.0.11.3 → mlquantify-0.0.11.5}/setup.py RENAMED Viewed

@@ -6,7 +6,7 @@ here = pathlib.Path(__file__).parent.resolve()
 long_description = (here / 'README.md').read_text(encoding='utf-8')
-VERSION = '0.0.11.3'
+VERSION = '0.0.11.5'
 DESCRIPTION = 'Quantification Library'
 # Setting up