PyPI - scib-metrics - Versions diffs - 0.5.5__tar.gz → 0.5.7__tar.gz - Mend

scib-metrics 0.5.5tar.gz → 0.5.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/build.yaml RENAMED Viewed

@@ -15,10 +15,10 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
-      - name: Set up Python 3.11
+      - name: Set up Python 3.12
         uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.12"
           cache: "pip"
           cache-dependency-path: "**/pyproject.toml"
       - name: Install build dependencies

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux.yaml RENAMED Viewed

@@ -21,7 +21,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python: ["3.10", "3.11", "3.12"]
+        python: ["3.11", "3.12", "3.13"]
     name: Integration

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux_cuda.yaml RENAMED Viewed

@@ -30,8 +30,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.11"]
-        cuda: ["11"]
+        python: ["3.12"]
+        cuda: ["12"]
     container:
       image: scverse/scvi-tools:py${{ matrix.python }}-cu${{ matrix.cuda }}-base

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux_pre.yaml RENAMED Viewed

@@ -31,7 +31,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest]
-        python: ["3.10", "3.11", "3.12"]
+        python: ["3.11", "3.12", "3.13"]
     name: Integration (Prereleases)

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_macos.yaml RENAMED Viewed

@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-latest]
-        python: ["3.10", "3.11", "3.12"]
+        python: ["3.11", "3.12", "3.13"]
     name: Integration

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_macos_m1.yaml RENAMED Viewed

@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [macos-14]
-        python: ["3.10", "3.11", "3.12"]
+        python: ["3.11", "3.12", "3.13"]
     name: Integration

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_windows.yaml RENAMED Viewed

@@ -20,7 +20,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [windows-latest]
-        python: ["3.10", "3.11", "3.12"]
+        python: ["3.11", "3.12", "3.13"]
     name: Integration

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.pre-commit-config.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ repos:
     hooks:
       - id: prettier
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.11.12
+    rev: v0.12.12
     hooks:
       - id: ruff
         types_or: [python, pyi, jupyter]
@@ -19,7 +19,7 @@ repos:
       - id: ruff-format
         types_or: [python, pyi, jupyter]
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
+    rev: v6.0.0
     hooks:
       - id: detect-private-key
       - id: check-ast

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/.readthedocs.yaml RENAMED Viewed

@@ -3,7 +3,7 @@ version: 2
 build:
   os: ubuntu-20.04
   tools:
-    python: "3.10"
+    python: "3.11"
 sphinx:
   configuration: docs/conf.py
   # disable this for more lenient docs builds

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/CHANGELOG.md RENAMED Viewed

@@ -10,6 +10,21 @@ and this project adheres to [Semantic Versioning][].
 ## 0.6.0 (unreleased)
+## 0.5.6 (2025-07-08)
+### Added
+- Add BRAS to Benchmarker as default, instead of regular silhouette batch {pr}`217`
+- Added the option to manually set the KNN graphs before running a benchmarker.
+### Changed
+- Changed default of min_max_scale in {func}`scib_metrics.benchmark.get_results` to False {pr}`215`.
+### Fixed
+- Reverted Skip labels before loop {pr}`180`, which caused wrong selection of clusters {pr}`213`.
 ## 0.5.5 (2025-06-03)
 ### Added

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scib-metrics
-Version: 0.5.5
+Version: 0.5.7
 Summary: Accelerated and Python-only scIB metrics
 Project-URL: Documentation, https://scib-metrics.readthedocs.io/
 Project-URL: Source, https://github.com/yoseflab/scib-metrics
@@ -37,7 +37,7 @@ License: BSD 3-Clause License
         OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
         OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 License-File: LICENSE
-Requires-Python: >=3.10
+Requires-Python: >=3.11
 Requires-Dist: anndata
 Requires-Dist: chex
 Requires-Dist: igraph>0.9.0

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/pyproject.toml RENAMED Viewed

@@ -5,10 +5,10 @@ requires = ["hatchling"]
 [project]
 name = "scib-metrics"
-version = "0.5.5"
+version = "0.5.7"
 description = "Accelerated and Python-only scIB metrics"
 readme = "README.md"
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 license = { file = "LICENSE" }
 authors = [{ name = "Adam Gayoso" }]
 maintainers = [{ name = "Adam Gayoso", email = "adamgayoso@berkeley.edu" }]

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/benchmark/_core.py RENAMED Viewed

@@ -1,3 +1,4 @@
+import gc
 import os
 import warnings
 from collections.abc import Callable
@@ -42,6 +43,7 @@ metric_name_cleaner = {
     "clisi_knn": "cLISI",
     "ilisi_knn": "iLISI",
     "kbet_per_label": "KBET",
+    "bras": "BRAS",
     "graph_connectivity": "Graph connectivity",
     "pcr_comparison": "PCR comparison",
 }
@@ -72,7 +74,7 @@ class BatchCorrection:
     parameters, such as `X` or `labels`.
     """
-    silhouette_batch: MetricType = True
+    bras: MetricType = True
     ilisi_knn: MetricType = True
     kbet_per_label: MetricType = True
     graph_connectivity: MetricType = True
@@ -88,7 +90,7 @@ class MetricAnnDataAPI(Enum):
     silhouette_label = lambda ad, fn: fn(ad.X, ad.obs[_LABELS])
     clisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_LABELS])
     graph_connectivity = lambda ad, fn: fn(ad.uns["15_neighbor_res"], ad.obs[_LABELS])
-    silhouette_batch = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
+    bras = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
     pcr_comparison = lambda ad, fn: fn(ad.obsm[_X_PRE], ad.X, ad.obs[_BATCH], categorical=True)
     ilisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_BATCH])
     kbet_per_label = lambda ad, fn: fn(ad.uns["50_neighbor_res"], ad.obs[_BATCH], ad.obs[_LABELS])
@@ -119,6 +121,8 @@ class Benchmarker:
     progress_bar
         Whether to show a progress bar for :meth:`~scib_metrics.benchmark.Benchmarker.prepare` and
         :meth:`~scib_metrics.benchmark.Benchmarker.benchmark`.
+    solver
+        SVD solver to use during PCA. can help stability issues. Choose from: "arpack", "randomized" or "auto"
     Notes
     -----
@@ -141,6 +145,7 @@ class Benchmarker:
         pre_integrated_embedding_obsm_key: str | None = None,
         n_jobs: int = 1,
         progress_bar: bool = True,
+        solver: str = "arpack",
     ):
         self._adata = adata
         self._embedding_obsm_keys = embedding_obsm_keys
@@ -156,6 +161,8 @@ class Benchmarker:
         self._label_key = label_key
         self._n_jobs = n_jobs
         self._progress_bar = progress_bar
+        self._compute_neighbors = True
+        self._solver = solver
         if self._bio_conservation_metrics is None and self._batch_correction_metrics is None:
             raise ValueError("Either batch or bio metrics must be defined.")
@@ -177,11 +184,13 @@ class Benchmarker:
             the data and the number of neighbors to compute and return a :class:`~scib_metrics.utils.nearest_neighbors.NeighborsResults`
             object.
         """
+        gc.collect()
         # Compute PCA
         if self._pre_integrated_embedding_obsm_key is None:
             # This is how scib does it
             # https://github.com/theislab/scib/blob/896f689e5fe8c57502cb012af06bed1a9b2b61d2/scib/metrics/pcr.py#L197
-            sc.tl.pca(self._adata, use_highly_variable=False)
+            sc.tl.pca(self._adata, svd_solver=self._solver, use_highly_variable=False)
             self._pre_integrated_embedding_obsm_key = "X_pca"
         for emb_key in self._embedding_obsm_keys:
@@ -191,19 +200,25 @@ class Benchmarker:
             self._emb_adatas[emb_key].obsm[_X_PRE] = self._adata.obsm[self._pre_integrated_embedding_obsm_key]
         # Compute neighbors
-        progress = self._emb_adatas.values()
-        if self._progress_bar:
-            progress = tqdm(progress, desc="Computing neighbors")
-        for ad in progress:
-            if neighbor_computer is not None:
-                neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
-            else:
-                neigh_result = pynndescent(
-                    ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
-                )
-            for n in self._neighbor_values:
-                ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
+        if self._compute_neighbors:
+            progress = self._emb_adatas.values()
+            if self._progress_bar:
+                progress = tqdm(progress, desc="Computing neighbors")
+            for ad in progress:
+                if neighbor_computer is not None:
+                    neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
+                else:
+                    neigh_result = pynndescent(
+                        ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
+                    )
+                for n in self._neighbor_values:
+                    ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
+        else:
+            warnings.warn(
+                "Computing Neighbors Skipped",
+                UserWarning,
+            )
         self._prepared = True
@@ -232,6 +247,7 @@ class Benchmarker:
                 pbar = tqdm(total=num_metrics, desc="Metrics", position=1, leave=False, colour="blue")
             for metric_type, metric_collection in self._metric_collection_dict.items():
                 for metric_name, use_metric_or_kwargs in asdict(metric_collection).items():
+                    gc.collect()
                     if use_metric_or_kwargs:
                         pbar.set_postfix_str(f"{metric_type}: {metric_name}") if pbar is not None else None
                         metric_fn = getattr(scib_metrics, metric_name)
@@ -251,7 +267,7 @@ class Benchmarker:
         self._benchmarked = True
-    def get_results(self, min_max_scale: bool = True, clean_names: bool = True) -> pd.DataFrame:
+    def get_results(self, min_max_scale: bool = False, clean_names: bool = True) -> pd.DataFrame:
         """Return the benchmarking results.
         Parameters
@@ -291,7 +307,7 @@ class Benchmarker:
         df.loc[_METRIC_TYPE, per_class_score.columns] = _AGGREGATE_SCORE
         return df
-    def plot_results_table(self, min_max_scale: bool = True, show: bool = True, save_dir: str | None = None) -> Table:
+    def plot_results_table(self, min_max_scale: bool = False, show: bool = True, save_dir: str | None = None) -> Table:
         """Plot the benchmarking results.
         Parameters

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_kbet.py RENAMED Viewed

@@ -138,14 +138,8 @@ def kbet_per_label(
     conn_graph = X.knn_graph_connectivities
     # prepare call of kBET per cluster
-    clusters = []
-    clusters, counts = np.unique(labels, return_counts=True)
-    skipped = clusters[counts > 10]
-    clusters = clusters[counts <= 10]
-    kbet_scores = {"cluster": list(skipped), "kBET": [np.nan] * len(skipped)}
-    logger.info(f"{len(skipped)} clusters consist of a single batch or are too small. Skip.")
-    for clus in clusters:
+    kbet_scores = {"cluster": [], "kBET": []}
+    for clus in np.unique(labels):
         # subset by label
         mask = labels == clus
         conn_graph_sub = conn_graph[mask, :][:, mask]
@@ -153,55 +147,60 @@ def kbet_per_label(
         n_obs = conn_graph_sub.shape[0]
         batches_sub = batches[mask]
-        quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
-        k0 = np.min([70, np.max([10, quarter_mean])])
-        # check k0 for reasonability
-        if k0 * n_obs >= size_max:
-            k0 = np.floor(size_max / n_obs).astype("int")
-        n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
-        if n_comp == 1:  # a single component to compute kBET on
-            try:
-                diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
-                nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
-                # call kBET
-                score, _, _ = kbet(
-                    nn_graph_sub,
-                    batches=batches_sub,
-                    alpha=alpha,
-                )
-            except ValueError:
-                logger.info("Diffusion distance failed. Skip.")
-                score = 0  # i.e. 100% rejection
+        # check if neighborhood size too small or only one batch in subset
+        if np.logical_or(n_obs < 10, len(np.unique(batches_sub)) == 1):
+            logger.info(f"{clus} consists of a single batch or is too small. Skip.")
+            score = np.nan
         else:
-            # check the number of components where kBET can be computed upon
-            comp_size = pd.Series(labs).value_counts()
-            # check which components are small
-            comp_size_thresh = 3 * k0
-            idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
-            # check if 75% of all cells can be used for kBET run
-            if len(idx_nonan) / len(labs) >= 0.75:
-                # create another subset of components, assume they are not visited in a diffusion process
-                conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
-                conn_graph_sub_sub.sort_indices()
+            quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
+            k0 = np.min([70, np.max([10, quarter_mean])])
+            # check k0 for reasonability
+            if k0 * n_obs >= size_max:
+                k0 = np.floor(size_max / n_obs).astype("int")
+            n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
+            if n_comp == 1:  # a single component to compute kBET on
                 try:
-                    diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
-                    nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
+                    diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
+                    nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
                     # call kBET
                     score, _, _ = kbet(
-                        nn_results_sub_sub,
-                        batches=batches_sub[idx_nonan],
+                        nn_graph_sub,
+                        batches=batches_sub,
                         alpha=alpha,
                     )
                 except ValueError:
                     logger.info("Diffusion distance failed. Skip.")
                     score = 0  # i.e. 100% rejection
-            else:  # if there are too many too small connected components, set kBET score to 0
-                score = 0  # i.e. 100% rejection
+            else:
+                # check the number of components where kBET can be computed upon
+                comp_size = pd.Series(labs).value_counts()
+                # check which components are small
+                comp_size_thresh = 3 * k0
+                idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
+                # check if 75% of all cells can be used for kBET run
+                if len(idx_nonan) / len(labs) >= 0.75:
+                    # create another subset of components, assume they are not visited in a diffusion process
+                    conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
+                    conn_graph_sub_sub.sort_indices()
+                    try:
+                        diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
+                        nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
+                        # call kBET
+                        score, _, _ = kbet(
+                            nn_results_sub_sub,
+                            batches=batches_sub[idx_nonan],
+                            alpha=alpha,
+                        )
+                    except ValueError:
+                        logger.info("Diffusion distance failed. Skip.")
+                        score = 0  # i.e. 100% rejection
+                else:  # if there are too many too small connected components, set kBET score to 0
+                    score = 0  # i.e. 100% rejection
         kbet_scores["cluster"].append(clus)
         kbet_scores["kBET"].append(score)

{scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_benchmarker.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import pandas as pd
+import pytest
 from scib_metrics.benchmark import BatchCorrection, Benchmarker, BioConservation
 from scib_metrics.nearest_neighbors import jax_approx_min_k
@@ -52,6 +53,7 @@ def test_benchmarker_custom_metric_booleans():
     assert "kbet_per_label" not in results.columns
     assert "graph_connectivity" not in results.columns
     assert "ilisi_knn" in results.columns
+    assert "bras" in results.columns
 def test_benchmarker_custom_metric_callable():
@@ -80,3 +82,13 @@ def test_benchmarker_custom_near_neighs():
     results = bm.get_results()
     assert isinstance(results, pd.DataFrame)
     bm.plot_results_table()
+@pytest.mark.parametrize("solver", ["arpack", "randomized"])
+def test_benchmarker_different_solvers(solver):
+    ad, emb_keys, batch_key, labels_key = dummy_benchmarker_adata()
+    bm = Benchmarker(ad, batch_key, labels_key, emb_keys, solver=solver)
+    bm.benchmark()
+    results = bm.get_results()
+    assert isinstance(results, pd.DataFrame)
+    bm.plot_results_table()