scib-metrics 0.5.5__tar.gz → 0.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/build.yaml +2 -2
  2. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux.yaml +1 -1
  3. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux_cuda.yaml +2 -2
  4. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_linux_pre.yaml +1 -1
  5. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_macos.yaml +1 -1
  6. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_macos_m1.yaml +1 -1
  7. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/test_windows.yaml +1 -1
  8. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.pre-commit-config.yaml +2 -2
  9. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.readthedocs.yaml +1 -1
  10. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/CHANGELOG.md +15 -0
  11. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/PKG-INFO +2 -2
  12. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/pyproject.toml +2 -2
  13. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/benchmark/_core.py +34 -18
  14. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_kbet.py +46 -47
  15. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_benchmarker.py +12 -0
  16. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.codecov.yaml +0 -0
  17. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.cruft.json +0 -0
  18. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.editorconfig +0 -0
  19. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  20. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  21. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  22. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.github/workflows/release.yaml +0 -0
  23. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/.gitignore +0 -0
  24. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/LICENSE +0 -0
  25. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/README.md +0 -0
  26. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/Makefile +0 -0
  27. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/_static/.gitkeep +0 -0
  28. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/_static/css/custom.css +0 -0
  29. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/_templates/.gitkeep +0 -0
  30. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/_templates/autosummary/class.rst +0 -0
  31. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/_templates/class_no_inherited.rst +0 -0
  32. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/api.md +0 -0
  33. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/changelog.md +0 -0
  34. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/conf.py +0 -0
  35. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/contributing.md +0 -0
  36. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/extensions/.gitkeep +0 -0
  37. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/extensions/typed_returns.py +0 -0
  38. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/index.md +0 -0
  39. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/notebooks/large_scale.ipynb +0 -0
  40. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/notebooks/lung_example.ipynb +0 -0
  41. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/references.bib +0 -0
  42. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/references.md +0 -0
  43. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/template_usage.md +0 -0
  44. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/docs/tutorials.md +0 -0
  45. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/setup.py +0 -0
  46. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/__init__.py +0 -0
  47. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/_settings.py +0 -0
  48. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/_types.py +0 -0
  49. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/benchmark/__init__.py +0 -0
  50. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/__init__.py +0 -0
  51. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_graph_connectivity.py +0 -0
  52. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_isolated_labels.py +0 -0
  53. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_lisi.py +0 -0
  54. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_nmi_ari.py +0 -0
  55. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_pcr_comparison.py +0 -0
  56. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/metrics/_silhouette.py +0 -0
  57. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/nearest_neighbors/__init__.py +0 -0
  58. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/nearest_neighbors/_dataclass.py +0 -0
  59. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/nearest_neighbors/_jax.py +0 -0
  60. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/nearest_neighbors/_pynndescent.py +0 -0
  61. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/__init__.py +0 -0
  62. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_diffusion_nn.py +0 -0
  63. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_dist.py +0 -0
  64. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_kmeans.py +0 -0
  65. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_lisi.py +0 -0
  66. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_pca.py +0 -0
  67. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_pcr.py +0 -0
  68. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_silhouette.py +0 -0
  69. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/src/scib_metrics/utils/_utils.py +0 -0
  70. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/__init__.py +0 -0
  71. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_BRAS_metric.py +0 -0
  72. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_metrics.py +0 -0
  73. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_neighbors.py +0 -0
  74. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/test_pcr_comparison.py +0 -0
  75. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/utils/__init__.py +0 -0
  76. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/utils/data.py +0 -0
  77. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/utils/sampling.py +0 -0
  78. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/utils/test_pca.py +0 -0
  79. {scib_metrics-0.5.5 → scib_metrics-0.5.7}/tests/utils/test_pcr.py +0 -0
@@ -15,10 +15,10 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  steps:
17
17
  - uses: actions/checkout@v4
18
- - name: Set up Python 3.11
18
+ - name: Set up Python 3.12
19
19
  uses: actions/setup-python@v5
20
20
  with:
21
- python-version: "3.11"
21
+ python-version: "3.12"
22
22
  cache: "pip"
23
23
  cache-dependency-path: "**/pyproject.toml"
24
24
  - name: Install build dependencies
@@ -21,7 +21,7 @@ jobs:
21
21
  fail-fast: false
22
22
  matrix:
23
23
  os: [ubuntu-latest]
24
- python: ["3.10", "3.11", "3.12"]
24
+ python: ["3.11", "3.12", "3.13"]
25
25
 
26
26
  name: Integration
27
27
 
@@ -30,8 +30,8 @@ jobs:
30
30
  strategy:
31
31
  fail-fast: false
32
32
  matrix:
33
- python: ["3.11"]
34
- cuda: ["11"]
33
+ python: ["3.12"]
34
+ cuda: ["12"]
35
35
 
36
36
  container:
37
37
  image: scverse/scvi-tools:py${{ matrix.python }}-cu${{ matrix.cuda }}-base
@@ -31,7 +31,7 @@ jobs:
31
31
  fail-fast: false
32
32
  matrix:
33
33
  os: [ubuntu-latest]
34
- python: ["3.10", "3.11", "3.12"]
34
+ python: ["3.11", "3.12", "3.13"]
35
35
 
36
36
  name: Integration (Prereleases)
37
37
 
@@ -20,7 +20,7 @@ jobs:
20
20
  fail-fast: false
21
21
  matrix:
22
22
  os: [macos-latest]
23
- python: ["3.10", "3.11", "3.12"]
23
+ python: ["3.11", "3.12", "3.13"]
24
24
 
25
25
  name: Integration
26
26
 
@@ -20,7 +20,7 @@ jobs:
20
20
  fail-fast: false
21
21
  matrix:
22
22
  os: [macos-14]
23
- python: ["3.10", "3.11", "3.12"]
23
+ python: ["3.11", "3.12", "3.13"]
24
24
 
25
25
  name: Integration
26
26
 
@@ -20,7 +20,7 @@ jobs:
20
20
  fail-fast: false
21
21
  matrix:
22
22
  os: [windows-latest]
23
- python: ["3.10", "3.11", "3.12"]
23
+ python: ["3.11", "3.12", "3.13"]
24
24
 
25
25
  name: Integration
26
26
 
@@ -11,7 +11,7 @@ repos:
11
11
  hooks:
12
12
  - id: prettier
13
13
  - repo: https://github.com/astral-sh/ruff-pre-commit
14
- rev: v0.11.12
14
+ rev: v0.12.12
15
15
  hooks:
16
16
  - id: ruff
17
17
  types_or: [python, pyi, jupyter]
@@ -19,7 +19,7 @@ repos:
19
19
  - id: ruff-format
20
20
  types_or: [python, pyi, jupyter]
21
21
  - repo: https://github.com/pre-commit/pre-commit-hooks
22
- rev: v5.0.0
22
+ rev: v6.0.0
23
23
  hooks:
24
24
  - id: detect-private-key
25
25
  - id: check-ast
@@ -3,7 +3,7 @@ version: 2
3
3
  build:
4
4
  os: ubuntu-20.04
5
5
  tools:
6
- python: "3.10"
6
+ python: "3.11"
7
7
  sphinx:
8
8
  configuration: docs/conf.py
9
9
  # disable this for more lenient docs builds
@@ -10,6 +10,21 @@ and this project adheres to [Semantic Versioning][].
10
10
 
11
11
  ## 0.6.0 (unreleased)
12
12
 
13
+ ## 0.5.6 (2025-07-08)
14
+
15
+ ### Added
16
+
17
+ - Add BRAS to Benchmarker as default, instead of regular silhouette batch {pr}`217`
18
+ - Added the option to manually set the KNN graphs before running a benchmarker.
19
+
20
+ ### Changed
21
+
22
+ - Changed default of min_max_scale in {func}`scib_metrics.benchmark.get_results` to False {pr}`215`.
23
+
24
+ ### Fixed
25
+
26
+ - Reverted Skip labels before loop {pr}`180`, which caused wrong selection of clusters {pr}`213`.
27
+
13
28
  ## 0.5.5 (2025-06-03)
14
29
 
15
30
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scib-metrics
3
- Version: 0.5.5
3
+ Version: 0.5.7
4
4
  Summary: Accelerated and Python-only scIB metrics
5
5
  Project-URL: Documentation, https://scib-metrics.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/yoseflab/scib-metrics
@@ -37,7 +37,7 @@ License: BSD 3-Clause License
37
37
  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
38
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
39
  License-File: LICENSE
40
- Requires-Python: >=3.10
40
+ Requires-Python: >=3.11
41
41
  Requires-Dist: anndata
42
42
  Requires-Dist: chex
43
43
  Requires-Dist: igraph>0.9.0
@@ -5,10 +5,10 @@ requires = ["hatchling"]
5
5
 
6
6
  [project]
7
7
  name = "scib-metrics"
8
- version = "0.5.5"
8
+ version = "0.5.7"
9
9
  description = "Accelerated and Python-only scIB metrics"
10
10
  readme = "README.md"
11
- requires-python = ">=3.10"
11
+ requires-python = ">=3.11"
12
12
  license = { file = "LICENSE" }
13
13
  authors = [{ name = "Adam Gayoso" }]
14
14
  maintainers = [{ name = "Adam Gayoso", email = "adamgayoso@berkeley.edu" }]
@@ -1,3 +1,4 @@
1
+ import gc
1
2
  import os
2
3
  import warnings
3
4
  from collections.abc import Callable
@@ -42,6 +43,7 @@ metric_name_cleaner = {
42
43
  "clisi_knn": "cLISI",
43
44
  "ilisi_knn": "iLISI",
44
45
  "kbet_per_label": "KBET",
46
+ "bras": "BRAS",
45
47
  "graph_connectivity": "Graph connectivity",
46
48
  "pcr_comparison": "PCR comparison",
47
49
  }
@@ -72,7 +74,7 @@ class BatchCorrection:
72
74
  parameters, such as `X` or `labels`.
73
75
  """
74
76
 
75
- silhouette_batch: MetricType = True
77
+ bras: MetricType = True
76
78
  ilisi_knn: MetricType = True
77
79
  kbet_per_label: MetricType = True
78
80
  graph_connectivity: MetricType = True
@@ -88,7 +90,7 @@ class MetricAnnDataAPI(Enum):
88
90
  silhouette_label = lambda ad, fn: fn(ad.X, ad.obs[_LABELS])
89
91
  clisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_LABELS])
90
92
  graph_connectivity = lambda ad, fn: fn(ad.uns["15_neighbor_res"], ad.obs[_LABELS])
91
- silhouette_batch = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
93
+ bras = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
92
94
  pcr_comparison = lambda ad, fn: fn(ad.obsm[_X_PRE], ad.X, ad.obs[_BATCH], categorical=True)
93
95
  ilisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_BATCH])
94
96
  kbet_per_label = lambda ad, fn: fn(ad.uns["50_neighbor_res"], ad.obs[_BATCH], ad.obs[_LABELS])
@@ -119,6 +121,8 @@ class Benchmarker:
119
121
  progress_bar
120
122
  Whether to show a progress bar for :meth:`~scib_metrics.benchmark.Benchmarker.prepare` and
121
123
  :meth:`~scib_metrics.benchmark.Benchmarker.benchmark`.
124
+ solver
125
+ SVD solver to use during PCA. can help stability issues. Choose from: "arpack", "randomized" or "auto"
122
126
 
123
127
  Notes
124
128
  -----
@@ -141,6 +145,7 @@ class Benchmarker:
141
145
  pre_integrated_embedding_obsm_key: str | None = None,
142
146
  n_jobs: int = 1,
143
147
  progress_bar: bool = True,
148
+ solver: str = "arpack",
144
149
  ):
145
150
  self._adata = adata
146
151
  self._embedding_obsm_keys = embedding_obsm_keys
@@ -156,6 +161,8 @@ class Benchmarker:
156
161
  self._label_key = label_key
157
162
  self._n_jobs = n_jobs
158
163
  self._progress_bar = progress_bar
164
+ self._compute_neighbors = True
165
+ self._solver = solver
159
166
 
160
167
  if self._bio_conservation_metrics is None and self._batch_correction_metrics is None:
161
168
  raise ValueError("Either batch or bio metrics must be defined.")
@@ -177,11 +184,13 @@ class Benchmarker:
177
184
  the data and the number of neighbors to compute and return a :class:`~scib_metrics.utils.nearest_neighbors.NeighborsResults`
178
185
  object.
179
186
  """
187
+ gc.collect()
188
+
180
189
  # Compute PCA
181
190
  if self._pre_integrated_embedding_obsm_key is None:
182
191
  # This is how scib does it
183
192
  # https://github.com/theislab/scib/blob/896f689e5fe8c57502cb012af06bed1a9b2b61d2/scib/metrics/pcr.py#L197
184
- sc.tl.pca(self._adata, use_highly_variable=False)
193
+ sc.tl.pca(self._adata, svd_solver=self._solver, use_highly_variable=False)
185
194
  self._pre_integrated_embedding_obsm_key = "X_pca"
186
195
 
187
196
  for emb_key in self._embedding_obsm_keys:
@@ -191,19 +200,25 @@ class Benchmarker:
191
200
  self._emb_adatas[emb_key].obsm[_X_PRE] = self._adata.obsm[self._pre_integrated_embedding_obsm_key]
192
201
 
193
202
  # Compute neighbors
194
- progress = self._emb_adatas.values()
195
- if self._progress_bar:
196
- progress = tqdm(progress, desc="Computing neighbors")
197
-
198
- for ad in progress:
199
- if neighbor_computer is not None:
200
- neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
201
- else:
202
- neigh_result = pynndescent(
203
- ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
204
- )
205
- for n in self._neighbor_values:
206
- ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
203
+ if self._compute_neighbors:
204
+ progress = self._emb_adatas.values()
205
+ if self._progress_bar:
206
+ progress = tqdm(progress, desc="Computing neighbors")
207
+
208
+ for ad in progress:
209
+ if neighbor_computer is not None:
210
+ neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
211
+ else:
212
+ neigh_result = pynndescent(
213
+ ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
214
+ )
215
+ for n in self._neighbor_values:
216
+ ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
217
+ else:
218
+ warnings.warn(
219
+ "Computing Neighbors Skipped",
220
+ UserWarning,
221
+ )
207
222
 
208
223
  self._prepared = True
209
224
 
@@ -232,6 +247,7 @@ class Benchmarker:
232
247
  pbar = tqdm(total=num_metrics, desc="Metrics", position=1, leave=False, colour="blue")
233
248
  for metric_type, metric_collection in self._metric_collection_dict.items():
234
249
  for metric_name, use_metric_or_kwargs in asdict(metric_collection).items():
250
+ gc.collect()
235
251
  if use_metric_or_kwargs:
236
252
  pbar.set_postfix_str(f"{metric_type}: {metric_name}") if pbar is not None else None
237
253
  metric_fn = getattr(scib_metrics, metric_name)
@@ -251,7 +267,7 @@ class Benchmarker:
251
267
 
252
268
  self._benchmarked = True
253
269
 
254
- def get_results(self, min_max_scale: bool = True, clean_names: bool = True) -> pd.DataFrame:
270
+ def get_results(self, min_max_scale: bool = False, clean_names: bool = True) -> pd.DataFrame:
255
271
  """Return the benchmarking results.
256
272
 
257
273
  Parameters
@@ -291,7 +307,7 @@ class Benchmarker:
291
307
  df.loc[_METRIC_TYPE, per_class_score.columns] = _AGGREGATE_SCORE
292
308
  return df
293
309
 
294
- def plot_results_table(self, min_max_scale: bool = True, show: bool = True, save_dir: str | None = None) -> Table:
310
+ def plot_results_table(self, min_max_scale: bool = False, show: bool = True, save_dir: str | None = None) -> Table:
295
311
  """Plot the benchmarking results.
296
312
 
297
313
  Parameters
@@ -138,14 +138,8 @@ def kbet_per_label(
138
138
  conn_graph = X.knn_graph_connectivities
139
139
 
140
140
  # prepare call of kBET per cluster
141
- clusters = []
142
- clusters, counts = np.unique(labels, return_counts=True)
143
- skipped = clusters[counts > 10]
144
- clusters = clusters[counts <= 10]
145
- kbet_scores = {"cluster": list(skipped), "kBET": [np.nan] * len(skipped)}
146
- logger.info(f"{len(skipped)} clusters consist of a single batch or are too small. Skip.")
147
-
148
- for clus in clusters:
141
+ kbet_scores = {"cluster": [], "kBET": []}
142
+ for clus in np.unique(labels):
149
143
  # subset by label
150
144
  mask = labels == clus
151
145
  conn_graph_sub = conn_graph[mask, :][:, mask]
@@ -153,55 +147,60 @@ def kbet_per_label(
153
147
  n_obs = conn_graph_sub.shape[0]
154
148
  batches_sub = batches[mask]
155
149
 
156
- quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
157
- k0 = np.min([70, np.max([10, quarter_mean])])
158
- # check k0 for reasonability
159
- if k0 * n_obs >= size_max:
160
- k0 = np.floor(size_max / n_obs).astype("int")
161
-
162
- n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
163
-
164
- if n_comp == 1: # a single component to compute kBET on
165
- try:
166
- diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
167
- nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
168
- # call kBET
169
- score, _, _ = kbet(
170
- nn_graph_sub,
171
- batches=batches_sub,
172
- alpha=alpha,
173
- )
174
- except ValueError:
175
- logger.info("Diffusion distance failed. Skip.")
176
- score = 0 # i.e. 100% rejection
177
-
150
+ # check if neighborhood size too small or only one batch in subset
151
+ if np.logical_or(n_obs < 10, len(np.unique(batches_sub)) == 1):
152
+ logger.info(f"{clus} consists of a single batch or is too small. Skip.")
153
+ score = np.nan
178
154
  else:
179
- # check the number of components where kBET can be computed upon
180
- comp_size = pd.Series(labs).value_counts()
181
- # check which components are small
182
- comp_size_thresh = 3 * k0
183
- idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
184
-
185
- # check if 75% of all cells can be used for kBET run
186
- if len(idx_nonan) / len(labs) >= 0.75:
187
- # create another subset of components, assume they are not visited in a diffusion process
188
- conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
189
- conn_graph_sub_sub.sort_indices()
155
+ quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
156
+ k0 = np.min([70, np.max([10, quarter_mean])])
157
+ # check k0 for reasonability
158
+ if k0 * n_obs >= size_max:
159
+ k0 = np.floor(size_max / n_obs).astype("int")
160
+
161
+ n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
190
162
 
163
+ if n_comp == 1: # a single component to compute kBET on
191
164
  try:
192
- diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
193
- nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
165
+ diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
166
+ nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
194
167
  # call kBET
195
168
  score, _, _ = kbet(
196
- nn_results_sub_sub,
197
- batches=batches_sub[idx_nonan],
169
+ nn_graph_sub,
170
+ batches=batches_sub,
198
171
  alpha=alpha,
199
172
  )
200
173
  except ValueError:
201
174
  logger.info("Diffusion distance failed. Skip.")
202
175
  score = 0 # i.e. 100% rejection
203
- else: # if there are too many too small connected components, set kBET score to 0
204
- score = 0 # i.e. 100% rejection
176
+
177
+ else:
178
+ # check the number of components where kBET can be computed upon
179
+ comp_size = pd.Series(labs).value_counts()
180
+ # check which components are small
181
+ comp_size_thresh = 3 * k0
182
+ idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
183
+
184
+ # check if 75% of all cells can be used for kBET run
185
+ if len(idx_nonan) / len(labs) >= 0.75:
186
+ # create another subset of components, assume they are not visited in a diffusion process
187
+ conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
188
+ conn_graph_sub_sub.sort_indices()
189
+
190
+ try:
191
+ diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
192
+ nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
193
+ # call kBET
194
+ score, _, _ = kbet(
195
+ nn_results_sub_sub,
196
+ batches=batches_sub[idx_nonan],
197
+ alpha=alpha,
198
+ )
199
+ except ValueError:
200
+ logger.info("Diffusion distance failed. Skip.")
201
+ score = 0 # i.e. 100% rejection
202
+ else: # if there are too many too small connected components, set kBET score to 0
203
+ score = 0 # i.e. 100% rejection
205
204
 
206
205
  kbet_scores["cluster"].append(clus)
207
206
  kbet_scores["kBET"].append(score)
@@ -1,4 +1,5 @@
1
1
  import pandas as pd
2
+ import pytest
2
3
 
3
4
  from scib_metrics.benchmark import BatchCorrection, Benchmarker, BioConservation
4
5
  from scib_metrics.nearest_neighbors import jax_approx_min_k
@@ -52,6 +53,7 @@ def test_benchmarker_custom_metric_booleans():
52
53
  assert "kbet_per_label" not in results.columns
53
54
  assert "graph_connectivity" not in results.columns
54
55
  assert "ilisi_knn" in results.columns
56
+ assert "bras" in results.columns
55
57
 
56
58
 
57
59
  def test_benchmarker_custom_metric_callable():
@@ -80,3 +82,13 @@ def test_benchmarker_custom_near_neighs():
80
82
  results = bm.get_results()
81
83
  assert isinstance(results, pd.DataFrame)
82
84
  bm.plot_results_table()
85
+
86
+
87
+ @pytest.mark.parametrize("solver", ["arpack", "randomized"])
88
+ def test_benchmarker_different_solvers(solver):
89
+ ad, emb_keys, batch_key, labels_key = dummy_benchmarker_adata()
90
+ bm = Benchmarker(ad, batch_key, labels_key, emb_keys, solver=solver)
91
+ bm.benchmark()
92
+ results = bm.get_results()
93
+ assert isinstance(results, pd.DataFrame)
94
+ bm.plot_results_table()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes