scib-metrics 0.5.5__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,6 +42,7 @@ metric_name_cleaner = {
42
42
  "clisi_knn": "cLISI",
43
43
  "ilisi_knn": "iLISI",
44
44
  "kbet_per_label": "KBET",
45
+ "bras": "BRAS",
45
46
  "graph_connectivity": "Graph connectivity",
46
47
  "pcr_comparison": "PCR comparison",
47
48
  }
@@ -72,7 +73,7 @@ class BatchCorrection:
72
73
  parameters, such as `X` or `labels`.
73
74
  """
74
75
 
75
- silhouette_batch: MetricType = True
76
+ bras: MetricType = True
76
77
  ilisi_knn: MetricType = True
77
78
  kbet_per_label: MetricType = True
78
79
  graph_connectivity: MetricType = True
@@ -88,7 +89,7 @@ class MetricAnnDataAPI(Enum):
88
89
  silhouette_label = lambda ad, fn: fn(ad.X, ad.obs[_LABELS])
89
90
  clisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_LABELS])
90
91
  graph_connectivity = lambda ad, fn: fn(ad.uns["15_neighbor_res"], ad.obs[_LABELS])
91
- silhouette_batch = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
92
+ bras = lambda ad, fn: fn(ad.X, ad.obs[_LABELS], ad.obs[_BATCH])
92
93
  pcr_comparison = lambda ad, fn: fn(ad.obsm[_X_PRE], ad.X, ad.obs[_BATCH], categorical=True)
93
94
  ilisi_knn = lambda ad, fn: fn(ad.uns["90_neighbor_res"], ad.obs[_BATCH])
94
95
  kbet_per_label = lambda ad, fn: fn(ad.uns["50_neighbor_res"], ad.obs[_BATCH], ad.obs[_LABELS])
@@ -156,6 +157,7 @@ class Benchmarker:
156
157
  self._label_key = label_key
157
158
  self._n_jobs = n_jobs
158
159
  self._progress_bar = progress_bar
160
+ self._compute_neighbors = True
159
161
 
160
162
  if self._bio_conservation_metrics is None and self._batch_correction_metrics is None:
161
163
  raise ValueError("Either batch or bio metrics must be defined.")
@@ -191,19 +193,25 @@ class Benchmarker:
191
193
  self._emb_adatas[emb_key].obsm[_X_PRE] = self._adata.obsm[self._pre_integrated_embedding_obsm_key]
192
194
 
193
195
  # Compute neighbors
194
- progress = self._emb_adatas.values()
195
- if self._progress_bar:
196
- progress = tqdm(progress, desc="Computing neighbors")
197
-
198
- for ad in progress:
199
- if neighbor_computer is not None:
200
- neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
201
- else:
202
- neigh_result = pynndescent(
203
- ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
204
- )
205
- for n in self._neighbor_values:
206
- ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
196
+ if self._compute_neighbors:
197
+ progress = self._emb_adatas.values()
198
+ if self._progress_bar:
199
+ progress = tqdm(progress, desc="Computing neighbors")
200
+
201
+ for ad in progress:
202
+ if neighbor_computer is not None:
203
+ neigh_result = neighbor_computer(ad.X, max(self._neighbor_values))
204
+ else:
205
+ neigh_result = pynndescent(
206
+ ad.X, n_neighbors=max(self._neighbor_values), random_state=0, n_jobs=self._n_jobs
207
+ )
208
+ for n in self._neighbor_values:
209
+ ad.uns[f"{n}_neighbor_res"] = neigh_result.subset_neighbors(n=n)
210
+ else:
211
+ warnings.warn(
212
+ "Computing Neighbors Skipped",
213
+ UserWarning,
214
+ )
207
215
 
208
216
  self._prepared = True
209
217
 
@@ -251,7 +259,7 @@ class Benchmarker:
251
259
 
252
260
  self._benchmarked = True
253
261
 
254
- def get_results(self, min_max_scale: bool = True, clean_names: bool = True) -> pd.DataFrame:
262
+ def get_results(self, min_max_scale: bool = False, clean_names: bool = True) -> pd.DataFrame:
255
263
  """Return the benchmarking results.
256
264
 
257
265
  Parameters
@@ -291,7 +299,7 @@ class Benchmarker:
291
299
  df.loc[_METRIC_TYPE, per_class_score.columns] = _AGGREGATE_SCORE
292
300
  return df
293
301
 
294
- def plot_results_table(self, min_max_scale: bool = True, show: bool = True, save_dir: str | None = None) -> Table:
302
+ def plot_results_table(self, min_max_scale: bool = False, show: bool = True, save_dir: str | None = None) -> Table:
295
303
  """Plot the benchmarking results.
296
304
 
297
305
  Parameters
@@ -138,14 +138,8 @@ def kbet_per_label(
138
138
  conn_graph = X.knn_graph_connectivities
139
139
 
140
140
  # prepare call of kBET per cluster
141
- clusters = []
142
- clusters, counts = np.unique(labels, return_counts=True)
143
- skipped = clusters[counts > 10]
144
- clusters = clusters[counts <= 10]
145
- kbet_scores = {"cluster": list(skipped), "kBET": [np.nan] * len(skipped)}
146
- logger.info(f"{len(skipped)} clusters consist of a single batch or are too small. Skip.")
147
-
148
- for clus in clusters:
141
+ kbet_scores = {"cluster": [], "kBET": []}
142
+ for clus in np.unique(labels):
149
143
  # subset by label
150
144
  mask = labels == clus
151
145
  conn_graph_sub = conn_graph[mask, :][:, mask]
@@ -153,55 +147,60 @@ def kbet_per_label(
153
147
  n_obs = conn_graph_sub.shape[0]
154
148
  batches_sub = batches[mask]
155
149
 
156
- quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
157
- k0 = np.min([70, np.max([10, quarter_mean])])
158
- # check k0 for reasonability
159
- if k0 * n_obs >= size_max:
160
- k0 = np.floor(size_max / n_obs).astype("int")
161
-
162
- n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
163
-
164
- if n_comp == 1: # a single component to compute kBET on
165
- try:
166
- diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
167
- nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
168
- # call kBET
169
- score, _, _ = kbet(
170
- nn_graph_sub,
171
- batches=batches_sub,
172
- alpha=alpha,
173
- )
174
- except ValueError:
175
- logger.info("Diffusion distance failed. Skip.")
176
- score = 0 # i.e. 100% rejection
177
-
150
+ # check if neighborhood size too small or only one batch in subset
151
+ if np.logical_or(n_obs < 10, len(np.unique(batches_sub)) == 1):
152
+ logger.info(f"{clus} consists of a single batch or is too small. Skip.")
153
+ score = np.nan
178
154
  else:
179
- # check the number of components where kBET can be computed upon
180
- comp_size = pd.Series(labs).value_counts()
181
- # check which components are small
182
- comp_size_thresh = 3 * k0
183
- idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
184
-
185
- # check if 75% of all cells can be used for kBET run
186
- if len(idx_nonan) / len(labs) >= 0.75:
187
- # create another subset of components, assume they are not visited in a diffusion process
188
- conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
189
- conn_graph_sub_sub.sort_indices()
155
+ quarter_mean = np.floor(np.mean(pd.Series(batches_sub).value_counts()) / 4).astype("int")
156
+ k0 = np.min([70, np.max([10, quarter_mean])])
157
+ # check k0 for reasonability
158
+ if k0 * n_obs >= size_max:
159
+ k0 = np.floor(size_max / n_obs).astype("int")
160
+
161
+ n_comp, labs = scipy.sparse.csgraph.connected_components(conn_graph_sub, connection="strong")
190
162
 
163
+ if n_comp == 1: # a single component to compute kBET on
191
164
  try:
192
- diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
193
- nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
165
+ diffusion_n_comps = np.min([diffusion_n_comps, n_obs - 1])
166
+ nn_graph_sub = diffusion_nn(conn_graph_sub, k=k0, n_comps=diffusion_n_comps)
194
167
  # call kBET
195
168
  score, _, _ = kbet(
196
- nn_results_sub_sub,
197
- batches=batches_sub[idx_nonan],
169
+ nn_graph_sub,
170
+ batches=batches_sub,
198
171
  alpha=alpha,
199
172
  )
200
173
  except ValueError:
201
174
  logger.info("Diffusion distance failed. Skip.")
202
175
  score = 0 # i.e. 100% rejection
203
- else: # if there are too many too small connected components, set kBET score to 0
204
- score = 0 # i.e. 100% rejection
176
+
177
+ else:
178
+ # check the number of components where kBET can be computed upon
179
+ comp_size = pd.Series(labs).value_counts()
180
+ # check which components are small
181
+ comp_size_thresh = 3 * k0
182
+ idx_nonan = np.flatnonzero(np.in1d(labs, comp_size[comp_size >= comp_size_thresh].index))
183
+
184
+ # check if 75% of all cells can be used for kBET run
185
+ if len(idx_nonan) / len(labs) >= 0.75:
186
+ # create another subset of components, assume they are not visited in a diffusion process
187
+ conn_graph_sub_sub = conn_graph_sub[idx_nonan, :][:, idx_nonan]
188
+ conn_graph_sub_sub.sort_indices()
189
+
190
+ try:
191
+ diffusion_n_comps = np.min([diffusion_n_comps, conn_graph_sub_sub.shape[0] - 1])
192
+ nn_results_sub_sub = diffusion_nn(conn_graph_sub_sub, k=k0, n_comps=diffusion_n_comps)
193
+ # call kBET
194
+ score, _, _ = kbet(
195
+ nn_results_sub_sub,
196
+ batches=batches_sub[idx_nonan],
197
+ alpha=alpha,
198
+ )
199
+ except ValueError:
200
+ logger.info("Diffusion distance failed. Skip.")
201
+ score = 0 # i.e. 100% rejection
202
+ else: # if there are too many too small connected components, set kBET score to 0
203
+ score = 0 # i.e. 100% rejection
205
204
 
206
205
  kbet_scores["cluster"].append(clus)
207
206
  kbet_scores["kBET"].append(score)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scib-metrics
3
- Version: 0.5.5
3
+ Version: 0.5.6
4
4
  Summary: Accelerated and Python-only scIB metrics
5
5
  Project-URL: Documentation, https://scib-metrics.readthedocs.io/
6
6
  Project-URL: Source, https://github.com/yoseflab/scib-metrics
@@ -2,11 +2,11 @@ scib_metrics/__init__.py,sha256=Vejvv3Nhi8fTlIKij3CAMe484URT9quMUD9MlwVvZBg,971
2
2
  scib_metrics/_settings.py,sha256=Rd4ymmbFoNTOfxnB87TU2-CFfQ4OJDZ94mJxhJTyS6A,4261
3
3
  scib_metrics/_types.py,sha256=yp76iBm1XjWhTmU_cbufJwrgiwUz4-L3J7DV3imiAhk,201
4
4
  scib_metrics/benchmark/__init__.py,sha256=HVUERJlRrEZV7BhpKJIsmOd1D_4XPoG2bp48AMlR3wY,130
5
- scib_metrics/benchmark/_core.py,sha256=HOI9gG-TGt1nSadtdSgK9oRz0ZKel_iCrUfskwV03nk,16253
5
+ scib_metrics/benchmark/_core.py,sha256=lEGxUZnjTrWHrz2T5i4txO222soTKCd5aqYbHH30rG4,16505
6
6
  scib_metrics/metrics/__init__.py,sha256=coYlfeP21IrXdB4KG9T5K4i8C3WTl_uyX2ANu23sIkg,685
7
7
  scib_metrics/metrics/_graph_connectivity.py,sha256=jFc10EINB4AohbgAjV7-m1SGfAgu5tBeUk9ZcuoaSwY,1076
8
8
  scib_metrics/metrics/_isolated_labels.py,sha256=HkZKRop-I561rm_2H_23hsLFgCT36BHIyEWjzq2GYOc,2457
9
- scib_metrics/metrics/_kbet.py,sha256=fgVMO0RbevGvtR0hOSIWyrr4aw93qCCNcFVf_7PFIuE,8241
9
+ scib_metrics/metrics/_kbet.py,sha256=JAmudHHv2EZ8l09fVTgFL_mgs99lgwHidx3zmKMEMvU,8419
10
10
  scib_metrics/metrics/_lisi.py,sha256=Le-1qCeJP_O1gnFgxaNXhirJU6QcK1j6km4nb28Oa7k,3503
11
11
  scib_metrics/metrics/_nmi_ari.py,sha256=kNKxPeEJKPVQXpMHFrENCTQdHLH84zdthq84xslsecg,4683
12
12
  scib_metrics/metrics/_pcr_comparison.py,sha256=dxj8uKhsMMmD-GrryfVgJyTlzhdRu08kujd8-e9SC8g,1983
@@ -24,7 +24,7 @@ scib_metrics/utils/_pca.py,sha256=uXYwX9gZkA1bCKGpOtbe6aKgnfMoL-SK3u5q7CACXWA,42
24
24
  scib_metrics/utils/_pcr.py,sha256=wORbVtZrStbhFHWNMuY5F6qajyn3jnESoc5oTT9sjXI,2466
25
25
  scib_metrics/utils/_silhouette.py,sha256=JlxlEDW55KgrlxJSCYCukg1MBnILnFFludcjQyn9Z7Q,6644
26
26
  scib_metrics/utils/_utils.py,sha256=CRzgQykQPK0XTM46Ukpr7nI2yE5_rIAgZbotj5ZqDJs,1999
27
- scib_metrics-0.5.5.dist-info/METADATA,sha256=BYZZUQNVwbadSvUdD_KfdxdDURI4999mh81cSYmp6b8,8210
28
- scib_metrics-0.5.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
29
- scib_metrics-0.5.5.dist-info/licenses/LICENSE,sha256=GRRQaq9hdMbxLTKedbegvvcbeF-Vh8UeHYIXoYUXXKM,1519
30
- scib_metrics-0.5.5.dist-info/RECORD,,
27
+ scib_metrics-0.5.6.dist-info/METADATA,sha256=F_rf6lsKCIN25eA0anYEprtCbMJOSvoSTZOKFAi-uKw,8210
28
+ scib_metrics-0.5.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
29
+ scib_metrics-0.5.6.dist-info/licenses/LICENSE,sha256=GRRQaq9hdMbxLTKedbegvvcbeF-Vh8UeHYIXoYUXXKM,1519
30
+ scib_metrics-0.5.6.dist-info/RECORD,,