PyPI - combatlearn - Versions diffs - 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

combatlearn 0.2.1py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

combatlearn/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from .combat import ComBatModel, ComBat
+from .combat import ComBat
-__all__ = ["ComBatModel", "ComBat"]
-__version__ = "0.2.1"
+__all__ = ["ComBat"]
+__version__ = "1.0.0"
+__author__ = "Ettore Rocchi"

combatlearn/combat.py CHANGED Viewed

@@ -14,28 +14,17 @@ import numpy as np
 import numpy.linalg as la
 import pandas as pd
 from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.utils.validation import check_is_fitted
 from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
+import matplotlib
 import matplotlib.pyplot as plt
-from typing import Literal, Optional, Union, Dict, Tuple, Any, cast
+import matplotlib.colors as mcolors
+from typing import Literal, Optional, Union, Dict, Tuple, Any
 import numpy.typing as npt
 import warnings
-try:
-    import umap
-    UMAP_AVAILABLE = True
-except ImportError:
-    UMAP_AVAILABLE = False
-try:
-    import plotly.graph_objects as go
-    from plotly.subplots import make_subplots
-    PLOTLY_AVAILABLE = True
-except ImportError:
-    PLOTLY_AVAILABLE = False
-__author__ = "Ettore Rocchi"
+import umap
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
 ArrayLike = Union[pd.DataFrame, pd.Series, npt.NDArray[Any]]
 FloatArray = npt.NDArray[np.float64]
@@ -57,8 +46,9 @@ class ComBatModel:
         ignoring the variance (`delta_star`).
     reference_batch : str, optional
         If specified, the batch level to use as reference.
-    covbat_cov_thresh : float, default=0.9
-        CovBat: cumulative explained variance threshold for PCA.
+    covbat_cov_thresh : float or int, default=0.9
+        CovBat: cumulative variance threshold (0, 1] to retain PCs, or
+        integer >= 1 specifying the number of components directly.
     eps : float, default=1e-8
         Numerical jitter to avoid division-by-zero.
     """
@@ -66,19 +56,19 @@ class ComBatModel:
     def __init__(
         self,
         *,
-        method: Literal["johnson", "fortin", "chen"] = "johnson",
+        method: Literal["johnson", "fortin", "chen"] = "johnson",
         parametric: bool = True,
         mean_only: bool = False,
         reference_batch: Optional[str] = None,
         eps: float = 1e-8,
-        covbat_cov_thresh: float = 0.9,
+        covbat_cov_thresh: Union[float, int] = 0.9,
     ) -> None:
         self.method: str = method
         self.parametric: bool = parametric
         self.mean_only: bool = bool(mean_only)
         self.reference_batch: Optional[str] = reference_batch
         self.eps: float = float(eps)
-        self.covbat_cov_thresh: float = float(covbat_cov_thresh)
+        self.covbat_cov_thresh: Union[float, int] = covbat_cov_thresh
         self._batch_levels: pd.Index
         self._grand_mean: pd.Series
@@ -95,9 +85,16 @@ class ComBatModel:
         self._batch_levels_pc: pd.Index
         self._pc_gamma_star: FloatArray
         self._pc_delta_star: FloatArray
-        if not (0.0 < self.covbat_cov_thresh <= 1.0):
-            raise ValueError("covbat_cov_thresh must be in (0, 1].")
+        # Validate covbat_cov_thresh
+        if isinstance(self.covbat_cov_thresh, float):
+            if not (0.0 < self.covbat_cov_thresh <= 1.0):
+                raise ValueError("covbat_cov_thresh must be in (0, 1] when float.")
+        elif isinstance(self.covbat_cov_thresh, int):
+            if self.covbat_cov_thresh < 1:
+                raise ValueError("covbat_cov_thresh must be >= 1 when int.")
+        else:
+            raise TypeError("covbat_cov_thresh must be float or int.")
     @staticmethod
     def _as_series(
@@ -335,8 +332,14 @@ class ComBatModel:
         X_meanvar_adj = self._transform_fortin(X, batch, disc, cont)
         X_centered = X_meanvar_adj - X_meanvar_adj.mean(axis=0)
         pca = PCA(svd_solver="full", whiten=False).fit(X_centered)
-        cumulative = np.cumsum(pca.explained_variance_ratio_)
-        n_pc = int(np.searchsorted(cumulative, self.covbat_cov_thresh) + 1)
+        # Determine number of components based on threshold type
+        if isinstance(self.covbat_cov_thresh, int):
+            n_pc = min(self.covbat_cov_thresh, len(pca.explained_variance_ratio_))
+        else:
+            cumulative = np.cumsum(pca.explained_variance_ratio_)
+            n_pc = int(np.searchsorted(cumulative, self.covbat_cov_thresh) + 1)
         self._covbat_pca = pca
         self._covbat_n_pc = n_pc
@@ -487,7 +490,8 @@ class ComBatModel:
         continuous_covariates: Optional[ArrayLike] = None,
     ) -> pd.DataFrame:
         """Transform the data using fitted ComBat parameters."""
-        check_is_fitted(self, ["_gamma_star"])
+        if not hasattr(self, "_gamma_star"):
+            raise ValueError("This ComBatModel instance is not fitted yet. Call 'fit' before 'transform'.")
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
         idx = X.index
@@ -599,7 +603,7 @@ class ComBatModel:
         """Chen transform implementation."""
         X_meanvar_adj = self._transform_fortin(X, batch, disc, cont)
         X_centered = X_meanvar_adj - self._covbat_pca.mean_
-        scores = self._covbat_pca.transform(X_centered.values)
+        scores = self._covbat_pca.transform(X_centered)
         n_pc = self._covbat_n_pc
         scores_adj = scores.copy()
@@ -638,7 +642,7 @@ class ComBat(BaseEstimator, TransformerMixin):
         mean_only: bool = False,
         reference_batch: Optional[str] = None,
         eps: float = 1e-8,
-        covbat_cov_thresh: float = 0.9,
+        covbat_cov_thresh: Union[float, int] = 0.9,
     ) -> None:
         self.batch = batch
         self.discrete_covariates = discrete_covariates
@@ -758,7 +762,8 @@ class ComBat(BaseEstimator, TransformerMixin):
             - `'original'`: embedding of original data
             - `'transformed'`: embedding of ComBat-transformed data
         """
-        check_is_fitted(self._model, ["_gamma_star"])
+        if not hasattr(self._model, "_gamma_star"):
+            raise ValueError("This ComBat instance is not fitted yet. Call 'fit' before 'plot_transformation'.")
         if n_components not in [2, 3]:
             raise ValueError(f"n_components must be 2 or 3, got {n_components}")
@@ -767,11 +772,6 @@ class ComBat(BaseEstimator, TransformerMixin):
         if plot_type not in ['static', 'interactive']:
             raise ValueError(f"plot_type must be 'static' or 'interactive', got '{plot_type}'")
-        if reduction_method == 'umap' and not UMAP_AVAILABLE:
-            raise ImportError("UMAP is not installed. Install with: pip install umap-learn")
-        if plot_type == 'interactive' and not PLOTLY_AVAILABLE:
-            raise ImportError("Plotly is not installed. Install with: pip install plotly")
         if not isinstance(X, pd.DataFrame):
             X = pd.DataFrame(X)
@@ -796,8 +796,8 @@ class ComBat(BaseEstimator, TransformerMixin):
         else:
             umap_params = {'random_state': 42}
             umap_params.update(reduction_kwargs)
-            reducer_orig = umap.UMAP(n_components=n_components, **reduction_kwargs)
-            reducer_trans = umap.UMAP(n_components=n_components, **reduction_kwargs)
+            reducer_orig = umap.UMAP(n_components=n_components, **umap_params)
+            reducer_trans = umap.UMAP(n_components=n_components, **umap_params)
         X_embedded_orig = reducer_orig.fit_transform(X_np)
         X_embedded_trans = reducer_trans.fit_transform(X_trans_np)
@@ -811,7 +811,7 @@ class ComBat(BaseEstimator, TransformerMixin):
         else:
             fig = self._create_interactive_plot(
                 X_embedded_orig, X_embedded_trans, batch_vec,
-                reduction_method, n_components, title, show_legend
+                reduction_method, n_components, cmap, title, show_legend
             )
         if return_embeddings:
@@ -844,9 +844,9 @@ class ComBat(BaseEstimator, TransformerMixin):
         n_batches = len(unique_batches)
         if n_batches <= 10:
-            colors = plt.cm.get_cmap(cmap)(np.linspace(0, 1, n_batches))
+            colors = matplotlib.colormaps.get_cmap(cmap)(np.linspace(0, 1, n_batches))
         else:
-            colors = plt.cm.get_cmap('tab20')(np.linspace(0, 1, n_batches))
+            colors = matplotlib.colormaps.get_cmap('tab20')(np.linspace(0, 1, n_batches))
         if n_components == 2:
             ax1 = plt.subplot(1, 2, 1)
@@ -930,6 +930,7 @@ class ComBat(BaseEstimator, TransformerMixin):
             batch_labels: pd.Series,
             method: str,
             n_components: int,
+            cmap: str,
             title: Optional[str],
             show_legend: bool) -> Any:
         """Create interactive plots using plotly."""
@@ -953,43 +954,69 @@ class ComBat(BaseEstimator, TransformerMixin):
         unique_batches = batch_labels.drop_duplicates()
+        n_batches = len(unique_batches)
+        cmap_func = matplotlib.colormaps.get_cmap(cmap)
+        color_list = [mcolors.to_hex(cmap_func(i / max(n_batches - 1, 1))) for i in range(n_batches)]
+        batch_to_color = dict(zip(unique_batches, color_list))
         for batch in unique_batches:
             mask = batch_labels == batch
             if n_components == 2:
                 fig.add_trace(
-                    go.Scatter(x=X_orig[mask, 0], y=X_orig[mask, 1],
-                              mode='markers',
-                              name=f'Batch {batch}',
-                              marker=dict(size=8, line=dict(width=1, color='black')),
-                              showlegend=False),
+                    go.Scatter(
+                        x=X_orig[mask, 0], y=X_orig[mask, 1],
+                        mode='markers',
+                        name=f'Batch {batch}',
+                        marker=dict(
+                            size=8,
+                            color=batch_to_color[batch],
+                            line=dict(width=1, color='black')
+                        ),
+                        showlegend=False),
                     row=1, col=1
                 )
                 fig.add_trace(
-                    go.Scatter(x=X_trans[mask, 0], y=X_trans[mask, 1],
-                              mode='markers',
-                              name=f'Batch {batch}',
-                              marker=dict(size=8, line=dict(width=1, color='black')),
-                              showlegend=show_legend),
+                    go.Scatter(
+                        x=X_trans[mask, 0], y=X_trans[mask, 1],
+                        mode='markers',
+                        name=f'Batch {batch}',
+                        marker=dict(
+                            size=8,
+                            color=batch_to_color[batch],
+                            line=dict(width=1, color='black')
+                        ),
+                        showlegend=show_legend),
                     row=1, col=2
                 )
             else:
                 fig.add_trace(
-                    go.Scatter3d(x=X_orig[mask, 0], y=X_orig[mask, 1], z=X_orig[mask, 2],
-                                mode='markers',
-                                name=f'Batch {batch}',
-                                marker=dict(size=5, line=dict(width=0.5, color='black')),
-                                showlegend=False),
+                    go.Scatter3d(
+                        x=X_orig[mask, 0], y=X_orig[mask, 1], z=X_orig[mask, 2],
+                        mode='markers',
+                        name=f'Batch {batch}',
+                        marker=dict(
+                            size=5,
+                            color=batch_to_color[batch],
+                            line=dict(width=0.5, color='black')
+                        ),
+                        showlegend=False),
                     row=1, col=1
                 )
                 fig.add_trace(
-                    go.Scatter3d(x=X_trans[mask, 0], y=X_trans[mask, 1], z=X_trans[mask, 2],
-                                mode='markers',
-                                name=f'Batch {batch}',
-                                marker=dict(size=5, line=dict(width=0.5, color='black')),
-                                showlegend=show_legend),
+                    go.Scatter3d(
+                        x=X_trans[mask, 0], y=X_trans[mask, 1], z=X_trans[mask, 2],
+                        mode='markers',
+                        name=f'Batch {batch}',
+                        marker=dict(
+                            size=5,
+                            color=batch_to_color[batch],
+                            line=dict(width=0.5, color='black')
+                        ),
+                        showlegend=show_legend),
                     row=1, col=2
                 )

{combatlearn-0.2.1.dist-info → combatlearn-1.0.0.dist-info}/METADATA RENAMED Viewed

@@ -1,9 +1,9 @@
 Metadata-Version: 2.4
 Name: combatlearn
-Version: 0.2.1
+Version: 1.0.0
 Summary: Batch-effect harmonization for machine learning frameworks.
 Author-email: Ettore Rocchi <ettoreroc@gmail.com>
-License-Expression: MIT
+License: MIT
 Keywords: machine-learning,harmonization,combat,preprocessing
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Science/Research
@@ -19,13 +19,23 @@ Requires-Dist: matplotlib>=3.4
 Requires-Dist: plotly>=5.0
 Requires-Dist: nbformat>=4.2
 Requires-Dist: umap-learn>=0.5
-Requires-Dist: pytest>=7
+Provides-Extra: dev
+Requires-Dist: pytest>=7; extra == "dev"
+Requires-Dist: pytest-cov>=4.0; extra == "dev"
+Requires-Dist: ruff>=0.1; extra == "dev"
+Requires-Dist: mypy>=1.0; extra == "dev"
+Provides-Extra: docs
+Requires-Dist: mkdocs>=1.5.0; extra == "docs"
+Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
+Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
+Requires-Dist: pymdown-extensions>=10.0; extra == "docs"
 Dynamic: license-file
 # **combatlearn**
 [![Python versions](https://img.shields.io/badge/python-%3E%3D3.10-blue?logo=python)](https://www.python.org/)
 [![Test](https://github.com/EttoreRocchi/combatlearn/actions/workflows/test.yaml/badge.svg)](https://github.com/EttoreRocchi/combatlearn/actions/workflows/test.yaml)
+[![Documentation](https://readthedocs.org/projects/combatlearn/badge/?version=latest)](https://combatlearn.readthedocs.io)
 [![PyPI Downloads](https://static.pepy.tech/badge/combatlearn)](https://pepy.tech/projects/combatlearn)
 [![PyPI Version](https://img.shields.io/pypi/v/combatlearn?cacheSeconds=300)](https://pypi.org/project/combatlearn/)
 [![License](https://img.shields.io/github/license/EttoreRocchi/combatlearn)](https://github.com/EttoreRocchi/combatlearn/blob/main/LICENSE)
@@ -95,6 +105,17 @@ print(f"Best CV AUROC: {grid.best_score_:.3f}")
 For a full example of how to use **combatlearn** see the [notebook demo](https://github.com/EttoreRocchi/combatlearn/blob/main/docs/demo/combatlearn_demo.ipynb)
+## Documentation
+**Full documentation is available at [combatlearn.readthedocs.io](https://combatlearn.readthedocs.io)**
+The documentation includes:
+- [Installation Guide](https://combatlearn.readthedocs.io/en/latest/installation/)
+- [Quick Start Tutorial](https://combatlearn.readthedocs.io/en/latest/quickstart/)
+- [User Guide](https://combatlearn.readthedocs.io/en/latest/user-guide/overview/)
+- [API Reference](https://combatlearn.readthedocs.io/en/latest/api/)
+- [Examples](https://combatlearn.readthedocs.io/en/latest/examples/basic-usage/)
 ## `ComBat` parameters
 The following section provides a detailed explanation of all parameters available in the scikit-learn-compatible `ComBat` class.

combatlearn-1.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+combatlearn/__init__.py,sha256=ck_EGW8iqLGUebg2wc-h794lwG3uAkHn9GaWjHgUIX4,99
+combatlearn/combat.py,sha256=Hri1XwnfSXWLoC1KD2VkqtNLkZpixI5ax0UrT1HtjyU,38505
+combatlearn-1.0.0.dist-info/licenses/LICENSE,sha256=O34CBRTmdL59PxDYOa6nq1N0-2A9xyXGkBXKbsL1NeY,1070
+combatlearn-1.0.0.dist-info/METADATA,sha256=hJvZEiA_ekTq06wzfOf2p6M_4vwNXGOdoS-K5MvT4P0,8558
+combatlearn-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+combatlearn-1.0.0.dist-info/top_level.txt,sha256=3cFQv4oj2sh_NKra45cPy8Go0v8W9x9-zkkUibqZCMk,12
+combatlearn-1.0.0.dist-info/RECORD,,

combatlearn-0.2.1.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-combatlearn/__init__.py,sha256=UzqGt-P5ZVBfK6SXGTi-OOgG5Ae5ZJO7ugZhFp3EHCM,98
-combatlearn/combat.py,sha256=g6YnCVWq40j_fMU2OcXrJ1O0MCSyt2owCaZ4gfyF-Pw,37268
-combatlearn-0.2.1.dist-info/licenses/LICENSE,sha256=O34CBRTmdL59PxDYOa6nq1N0-2A9xyXGkBXKbsL1NeY,1070
-combatlearn-0.2.1.dist-info/METADATA,sha256=zYMV3IEi0vgrGuu6dwYwkLH-cCXxQTr9GekUjUGwTgc,7491
-combatlearn-0.2.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-combatlearn-0.2.1.dist-info/top_level.txt,sha256=3cFQv4oj2sh_NKra45cPy8Go0v8W9x9-zkkUibqZCMk,12
-combatlearn-0.2.1.dist-info/RECORD,,

{combatlearn-0.2.1.dist-info → combatlearn-1.0.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{combatlearn-0.2.1.dist-info → combatlearn-1.0.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{combatlearn-0.2.1.dist-info → combatlearn-1.0.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

combatlearn 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

combatlearn 0.2.1py3-none-any.whl → 1.0.0py3-none-any.whl