PyPI - scib-metrics - Versions diffs - 0.5.2__tar.gz → 0.5.4__tar.gz - Mend

scib-metrics 0.5.2tar.gz → 0.5.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/.pre-commit-config.yaml RENAMED Viewed

@@ -11,7 +11,7 @@ repos:
     hooks:
       - id: prettier
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.6
+    rev: v0.11.6
     hooks:
       - id: ruff
         types_or: [python, pyi, jupyter]

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/CHANGELOG.md RENAMED Viewed

@@ -10,6 +10,18 @@ and this project adheres to [Semantic Versioning][].
 ## 0.6.0 (unreleased)
+## 0.5.4 (2025-04-23)
+### Fixed
+- Apply default values for benchmarker metrics {pr}`203`.
+## 0.5.3 (2025-02-17)
+### Removed
+- Reverted a change that was needed for scib-autotune in scvi-tools {pr}`189`.
 ## 0.5.2 (2025-02-13)
 ### Added

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scib-metrics
-Version: 0.5.2
+Version: 0.5.4
 Summary: Accelerated and Python-only scIB metrics
 Project-URL: Documentation, https://scib-metrics.readthedocs.io/
 Project-URL: Source, https://github.com/yoseflab/scib-metrics

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ requires = ["hatchling"]
 [project]
 name = "scib-metrics"
-version = "0.5.2"
+version = "0.5.4"
 description = "Accelerated and Python-only scIB metrics"
 readme = "README.md"
 requires-python = ">=3.10"

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/src/scib_metrics/benchmark/_core.py RENAMED Viewed

@@ -136,8 +136,8 @@ class Benchmarker:
         batch_key: str,
         label_key: str,
         embedding_obsm_keys: list[str],
-        bio_conservation_metrics: BioConservation | None,
-        batch_correction_metrics: BatchCorrection | None,
+        bio_conservation_metrics: BioConservation | None = BioConservation(),
+        batch_correction_metrics: BatchCorrection | None = BatchCorrection(),
         pre_integrated_embedding_obsm_key: str | None = None,
         n_jobs: int = 1,
         progress_bar: bool = True,
@@ -282,10 +282,6 @@ class Benchmarker:
         # Compute scores
         per_class_score = df.groupby(_METRIC_TYPE).mean().transpose()
-        if "Batch correction" not in per_class_score.columns:
-            per_class_score["Batch correction"] = 0.5
-        if "Bio conservation" not in per_class_score.columns:
-            per_class_score["Bio conservation"] = 0.5
         # This is the default scIB weighting from the manuscript
         if self._batch_correction_metrics is not None and self._bio_conservation_metrics is not None:
             per_class_score["Total"] = (

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/src/scib_metrics/metrics/_kbet.py RENAMED Viewed

@@ -138,8 +138,14 @@ def kbet_per_label(
     conn_graph = X.knn_graph_connectivities
     # prepare call of kBET per cluster
-    kbet_scores = {"cluster": [], "kBET": []}
-    for clus in np.unique(labels):
+    clusters = []
+    clusters, counts = np.unique(labels, return_counts=True)
+    skipped = clusters[counts > 10]
+    clusters = clusters[counts <= 10]
+    kbet_scores = {"cluster": list(skipped), "kBET": [np.nan] * len(skipped)}
+    logger.info(f"{len(skipped)} clusters consist of a single batch or are too small. Skip.")
+    for clus in clusters:
         # subset by label
         mask = labels == clus
         conn_graph_sub = conn_graph[mask, :][:, mask]
@@ -147,60 +153,55 @@ def kbet_per_label(
         n_obs = conn_graph_sub.shape[0]
         batches_sub = batches[mask]
-        # check if neighborhood size too small or only one batch in subset
-        if np.logical_or(n_obs < 10, len(np.unique(batches_sub)) == 1):
-            logger.info(f"{clus} consists of a single batch or is too small. Skip.")
-            score = np.nan
-        else:
-            quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
-            k0 = np.min([70, np.max([10, quarter_mean])])
-            # check k0 for reasonability
-            if k0 * n_obs >= size_max:
-                k0 = np.floor(size_max / n_obs).astype("int")
+        quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
+        k0 = np.min([70, np.max([10, quarter_mean])])
+        # check k0 for reasonability
+        if k0 * n_obs >= size_max:
+            k0 = np.floor(size_max / n_obs).astype("int")
+        n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
+        if n_comp == 1:  # a single component to compute kBET on
+            try:
+                diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
+                nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
+                # call kBET
+                score, _, _ = kbet(
+                    nn_graph_sub,
+                    batches=batches_sub,
+                    alpha=alpha,
+                )
+            except ValueError:
+                logger.info("Diffusion distance failed. Skip.")
+                score = 0  # i.e. 100% rejection
-            n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
+        else:
+            # check the number of components where kBET can be computed upon
+            comp_size = pd.Series(labs).value_counts()
+            # check which components are small
+            comp_size_thresh = 3 * k0
+            idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
+            # check if 75% of all cells can be used for kBET run
+            if len(idx_nonan) / len(labs) >= 0.75:
+                # create another subset of components, assume they are not visited in a diffusion process
+                conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
+                conn_graph_sub_sub.sort_indices()
-            if n_comp == 1:  # a single component to compute kBET on
                 try:
-                    diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
-                    nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
+                    diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
+                    nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
                     # call kBET
                     score, _, _ = kbet(
-                        nn_graph_sub,
-                        batches=batches_sub,
+                        nn_results_sub_sub,
+                        batches=batches_sub[idx_nonan],
                         alpha=alpha,
                     )
                 except ValueError:
                     logger.info("Diffusion distance failed. Skip.")
                     score = 0  # i.e. 100% rejection
-            else:
-                # check the number of components where kBET can be computed upon
-                comp_size = pd.Series(labs).value_counts()
-                # check which components are small
-                comp_size_thresh = 3 * k0
-                idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
-                # check if 75% of all cells can be used for kBET run
-                if len(idx_nonan) / len(labs) >= 0.75:
-                    # create another subset of components, assume they are not visited in a diffusion process
-                    conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
-                    conn_graph_sub_sub.sort_indices()
-                    try:
-                        diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
-                        nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
-                        # call kBET
-                        score, _, _ = kbet(
-                            nn_results_sub_sub,
-                            batches=batches_sub[idx_nonan],
-                            alpha=alpha,
-                        )
-                    except ValueError:
-                        logger.info("Diffusion distance failed. Skip.")
-                        score = 0  # i.e. 100% rejection
-                else:  # if there are too many too small connected components, set kBET score to 0
-                    score = 0  # i.e. 100% rejection
+            else:  # if there are too many too small connected components, set kBET score to 0
+                score = 0  # i.e. 100% rejection
         kbet_scores["cluster"].append(clus)
         kbet_scores["kBET"].append(score)

{scib_metrics-0.5.2 → scib_metrics-0.5.4}/tests/test_benchmarker.py RENAMED Viewed

@@ -21,6 +21,20 @@ def test_benchmarker():
     bm.plot_results_table()
+def test_benchmarker_default():
+    ad, emb_keys, batch_key, labels_key = dummy_benchmarker_adata()
+    bm = Benchmarker(
+        ad,
+        batch_key,
+        labels_key,
+        emb_keys,
+    )
+    bm.benchmark()
+    results = bm.get_results()
+    assert isinstance(results, pd.DataFrame)
+    bm.plot_results_table()
 def test_benchmarker_custom_metric_booleans():
     bioc = BioConservation(
         isolated_labels=False, nmi_ari_cluster_labels_leiden=False, silhouette_label=False, clisi_knn=True