sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sclab/__init__.py +3 -1
  2. sclab/_io.py +83 -12
  3. sclab/_methods_registry.py +65 -0
  4. sclab/_sclab.py +241 -21
  5. sclab/dataset/_dataset.py +4 -6
  6. sclab/dataset/processor/_processor.py +41 -19
  7. sclab/dataset/processor/_results_panel.py +94 -0
  8. sclab/dataset/processor/step/_processor_step_base.py +12 -6
  9. sclab/examples/processor_steps/__init__.py +8 -0
  10. sclab/examples/processor_steps/_cluster.py +2 -2
  11. sclab/examples/processor_steps/_differential_expression.py +329 -0
  12. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  13. sclab/examples/processor_steps/_gene_expression.py +125 -0
  14. sclab/examples/processor_steps/_integration.py +116 -0
  15. sclab/examples/processor_steps/_neighbors.py +26 -6
  16. sclab/examples/processor_steps/_pca.py +13 -8
  17. sclab/examples/processor_steps/_preprocess.py +52 -25
  18. sclab/examples/processor_steps/_qc.py +24 -8
  19. sclab/examples/processor_steps/_umap.py +2 -2
  20. sclab/gui/__init__.py +0 -0
  21. sclab/gui/components/__init__.py +7 -0
  22. sclab/gui/components/_guided_pseudotime.py +482 -0
  23. sclab/gui/components/_transfer_metadata.py +186 -0
  24. sclab/methods/__init__.py +50 -0
  25. sclab/preprocess/__init__.py +26 -0
  26. sclab/preprocess/_cca.py +176 -0
  27. sclab/preprocess/_cca_integrate.py +109 -0
  28. sclab/preprocess/_filter_obs.py +42 -0
  29. sclab/preprocess/_harmony.py +421 -0
  30. sclab/preprocess/_harmony_integrate.py +53 -0
  31. sclab/preprocess/_normalize_weighted.py +65 -0
  32. sclab/preprocess/_pca.py +51 -0
  33. sclab/preprocess/_preprocess.py +155 -0
  34. sclab/preprocess/_qc.py +38 -0
  35. sclab/preprocess/_rpca.py +116 -0
  36. sclab/preprocess/_subset.py +208 -0
  37. sclab/preprocess/_transfer_metadata.py +196 -0
  38. sclab/preprocess/_transform.py +82 -0
  39. sclab/preprocess/_utils.py +96 -0
  40. sclab/scanpy/__init__.py +0 -0
  41. sclab/scanpy/_compat.py +92 -0
  42. sclab/scanpy/_settings.py +526 -0
  43. sclab/scanpy/logging.py +290 -0
  44. sclab/scanpy/plotting/__init__.py +0 -0
  45. sclab/scanpy/plotting/_rcmod.py +73 -0
  46. sclab/scanpy/plotting/palettes.py +221 -0
  47. sclab/scanpy/readwrite.py +1108 -0
  48. sclab/tools/__init__.py +0 -0
  49. sclab/tools/cellflow/__init__.py +0 -0
  50. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  51. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  52. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  53. sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
  54. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  55. sclab/tools/cellflow/utils/__init__.py +0 -0
  56. sclab/tools/cellflow/utils/density_nd.py +215 -0
  57. sclab/tools/cellflow/utils/interpolate.py +334 -0
  58. sclab/tools/cellflow/utils/periodic_genes.py +106 -0
  59. sclab/tools/cellflow/utils/smoothen.py +124 -0
  60. sclab/tools/cellflow/utils/times.py +55 -0
  61. sclab/tools/differential_expression/__init__.py +7 -0
  62. sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
  63. sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
  64. sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
  65. sclab/tools/doublet_detection/__init__.py +5 -0
  66. sclab/tools/doublet_detection/_scrublet.py +64 -0
  67. sclab/tools/embedding/__init__.py +0 -0
  68. sclab/tools/imputation/__init__.py +0 -0
  69. sclab/tools/imputation/_alra.py +135 -0
  70. sclab/tools/labeling/__init__.py +6 -0
  71. sclab/tools/labeling/sctype.py +233 -0
  72. sclab/tools/utils/__init__.py +5 -0
  73. sclab/tools/utils/_aggregate_and_filter.py +290 -0
  74. sclab/utils/__init__.py +5 -0
  75. sclab/utils/_write_excel.py +510 -0
  76. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
  77. sclab-0.3.4.dist-info/RECORD +93 -0
  78. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
  79. sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
  80. sclab-0.1.7.dist-info/RECORD +0 -30
@@ -0,0 +1,155 @@
1
+ import warnings
2
+ from typing import Literal
3
+
4
+ import numpy as np
5
+ from anndata import AnnData, ImplicitModificationWarning
6
+ from tqdm.auto import tqdm
7
+
8
+
9
+ def preprocess(
10
+ adata: AnnData,
11
+ counts_layer: str = "counts",
12
+ group_by: str | None = None,
13
+ min_cells: int = 5,
14
+ min_genes: int = 5,
15
+ compute_hvg: bool = True,
16
+ regress_total_counts: bool = False,
17
+ regress_n_genes: bool = False,
18
+ normalization_method: Literal["library", "weighted", "none"] = "library",
19
+ target_scale: float = 1e4,
20
+ weighted_norm_quantile: float = 0.9,
21
+ log1p: bool = True,
22
+ scale: bool = True,
23
+ ):
24
+ import scanpy as sc
25
+
26
+ from ._normalize_weighted import normalize_weighted
27
+
28
+ with tqdm(total=100, bar_format="{percentage:3.0f}%|{bar}|") as pbar:
29
+ if counts_layer not in adata.layers:
30
+ adata.layers[counts_layer] = adata.X.copy()
31
+
32
+ if f"{counts_layer}_log1p" not in adata.layers:
33
+ adata.layers[f"{counts_layer}_log1p"] = sc.pp.log1p(
34
+ adata.layers[counts_layer].copy()
35
+ )
36
+ pbar.update(10)
37
+
38
+ adata.X = adata.layers[counts_layer].copy()
39
+ sc.pp.calculate_qc_metrics(
40
+ adata,
41
+ percent_top=None,
42
+ log1p=False,
43
+ inplace=True,
44
+ )
45
+ sc.pp.filter_cells(adata, min_genes=min_genes)
46
+ sc.pp.filter_genes(adata, min_cells=min_cells)
47
+ pbar.update(10)
48
+
49
+ sc.pp.calculate_qc_metrics(
50
+ adata,
51
+ percent_top=None,
52
+ log1p=False,
53
+ inplace=True,
54
+ )
55
+ pbar.update(10)
56
+
57
+ if compute_hvg:
58
+ if group_by is not None:
59
+ adata.var["highly_variable"] = False
60
+ for name, idx in adata.obs.groupby(
61
+ group_by, observed=True
62
+ ).groups.items():
63
+ hvg_seurat = sc.pp.highly_variable_genes(
64
+ adata[idx],
65
+ layer=f"{counts_layer}_log1p",
66
+ flavor="seurat",
67
+ inplace=False,
68
+ )["highly_variable"]
69
+
70
+ hvg_seurat_v3 = sc.pp.highly_variable_genes(
71
+ adata[idx],
72
+ layer=counts_layer,
73
+ flavor="seurat_v3_paper",
74
+ n_top_genes=hvg_seurat.sum(),
75
+ inplace=False,
76
+ )["highly_variable"]
77
+
78
+ adata.var[f"highly_variable_{name}"] = hvg_seurat | hvg_seurat_v3
79
+ adata.var["highly_variable"] |= adata.var[f"highly_variable_{name}"]
80
+
81
+ else:
82
+ sc.pp.highly_variable_genes(
83
+ adata, layer=f"{counts_layer}_log1p", flavor="seurat"
84
+ )
85
+ hvg_seurat = adata.var["highly_variable"]
86
+
87
+ sc.pp.highly_variable_genes(
88
+ adata,
89
+ layer=counts_layer,
90
+ flavor="seurat_v3_paper",
91
+ n_top_genes=hvg_seurat.sum(),
92
+ )
93
+ hvg_seurat_v3 = adata.var["highly_variable"]
94
+
95
+ adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
96
+
97
+ pbar.update(10)
98
+ pbar.update(10)
99
+
100
+ new_layer = counts_layer
101
+ if normalization_method == "library":
102
+ new_layer += "_normt"
103
+ sc.pp.normalize_total(adata, target_sum=target_scale)
104
+ elif normalization_method == "weighted":
105
+ new_layer += "_normw"
106
+ normalize_weighted(
107
+ adata,
108
+ target_scale=target_scale,
109
+ batch_key=group_by,
110
+ q=weighted_norm_quantile,
111
+ )
112
+
113
+ pbar.update(10)
114
+ pbar.update(10)
115
+
116
+ if log1p:
117
+ new_layer += "_log1p"
118
+ adata.uns.pop("log1p", None)
119
+ sc.pp.log1p(adata)
120
+ pbar.update(10)
121
+
122
+ vars_to_regress = []
123
+ if regress_n_genes:
124
+ vars_to_regress.append("n_genes_by_counts")
125
+
126
+ if regress_total_counts and log1p:
127
+ adata.obs["log1p_total_counts"] = np.log1p(adata.obs["total_counts"])
128
+ vars_to_regress.append("log1p_total_counts")
129
+ elif regress_total_counts:
130
+ vars_to_regress.append("total_counts")
131
+
132
+ if vars_to_regress:
133
+ new_layer += "_regr"
134
+ sc.pp.regress_out(adata, keys=vars_to_regress, n_jobs=1)
135
+ pbar.update(10)
136
+
137
+ if scale:
138
+ new_layer += "_scale"
139
+ if group_by is not None:
140
+ for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
141
+ with warnings.catch_warnings():
142
+ warnings.filterwarnings(
143
+ "ignore",
144
+ category=ImplicitModificationWarning,
145
+ message="Modifying `X` on a view results in data being overridden",
146
+ )
147
+ adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
148
+ else:
149
+ sc.pp.scale(adata, zero_center=False)
150
+
151
+ adata.layers[new_layer] = adata.X.copy()
152
+
153
+ pbar.update(10)
154
+
155
+ adata.X = adata.X.astype(np.float32)
@@ -0,0 +1,38 @@
1
+ import numpy as np
2
+ from anndata import AnnData
3
+
4
+
5
+ def qc(
6
+ adata: AnnData,
7
+ counts_layer: str = "counts",
8
+ min_counts: int = 50,
9
+ min_genes: int = 5,
10
+ min_cells: int = 5,
11
+ max_rank: int = 0,
12
+ ):
13
+ import scanpy as sc
14
+
15
+ if counts_layer not in adata.layers:
16
+ adata.layers[counts_layer] = adata.X.copy()
17
+
18
+ adata.layers["qc_tmp_current_X"] = adata.X
19
+ adata.X = adata.layers[counts_layer].copy()
20
+ rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
21
+
22
+ obs_idx = adata.obs_names[rowsums >= min_counts]
23
+ adata._inplace_subset_obs(obs_idx)
24
+
25
+ sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
26
+
27
+ sc.pp.filter_cells(adata, min_genes=min_genes)
28
+ sc.pp.filter_genes(adata, min_cells=min_cells)
29
+ sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
30
+ adata.obs["barcode_rank"] = adata.obs["total_counts"].rank(ascending=False)
31
+
32
+ # Restore original X
33
+ adata.X = adata.layers.pop("qc_tmp_current_X")
34
+
35
+ if max_rank > 0:
36
+ series = adata.obs["barcode_rank"]
37
+ index = series.loc[series < max_rank].index
38
+ adata._inplace_subset_obs(index)
@@ -0,0 +1,116 @@
1
+ import numpy as np
2
+ from anndata import AnnData
3
+ from numpy.typing import NDArray
4
+
5
+
6
+ def rpca(
7
+ adata: AnnData,
8
+ key: str,
9
+ *,
10
+ basis: str = "X",
11
+ adjusted_basis: str | None = None,
12
+ reference_batch: str | list[str] | None = None,
13
+ mask_var: str | None = None,
14
+ n_components: int = 30,
15
+ min_variance_ratio: float = 0.0005,
16
+ svd_solver: str = "arpack",
17
+ normalize: bool = True,
18
+ ):
19
+ if basis is None:
20
+ basis = "X"
21
+
22
+ if adjusted_basis is None:
23
+ adjusted_basis = basis + "_rpca"
24
+
25
+ if mask_var is not None:
26
+ mask = adata.var[mask_var].values
27
+ else:
28
+ mask = np.ones(adata.n_vars, dtype=bool)
29
+
30
+ X = _get_basis(adata[:, mask], basis)
31
+ uns = {}
32
+
33
+ groups = adata.obs.groupby(key, observed=True).groups
34
+ if reference_batch is None:
35
+ reference_batch = list(groups.keys())
36
+ elif isinstance(reference_batch, str):
37
+ reference_batch = [reference_batch]
38
+
39
+ for gr, idx in groups.items():
40
+ if gr not in reference_batch:
41
+ continue
42
+
43
+ ref_basis_key = f"{adjusted_basis}_{gr}"
44
+ ref_PCs_key = f"{adjusted_basis}_{gr}_PCs"
45
+
46
+ X_reference = _get_basis(adata[idx, mask], basis)
47
+ proj_result = pca_projection(
48
+ X,
49
+ X_reference,
50
+ n_components=n_components,
51
+ min_variance_ratio=min_variance_ratio,
52
+ svd_solver=svd_solver,
53
+ normalize=normalize,
54
+ )
55
+ res_ncomps = proj_result[0].shape[1]
56
+ components = np.zeros((res_ncomps, adata.n_vars))
57
+ components[:, mask] = proj_result[1]
58
+
59
+ adata.obsm[ref_basis_key] = proj_result[0]
60
+ adata.varm[ref_PCs_key] = components.T
61
+
62
+ uns[gr] = {
63
+ "n_components": res_ncomps,
64
+ "explained_variance_ratio": proj_result[2],
65
+ "explained_variance": proj_result[3],
66
+ }
67
+
68
+ adata.uns[adjusted_basis] = uns
69
+
70
+
71
+ def pca_projection(
72
+ X: NDArray,
73
+ X_reference: NDArray,
74
+ n_components: int = 30,
75
+ min_variance_ratio: float = 0.0005,
76
+ svd_solver: str = "arpack",
77
+ normalize: bool = False,
78
+ ) -> tuple[NDArray, NDArray, NDArray, NDArray]:
79
+ import scanpy as sc
80
+
81
+ pca_kwargs = dict(
82
+ n_comps=n_components,
83
+ svd_solver=svd_solver,
84
+ return_info=True,
85
+ )
86
+
87
+ pca_result = sc.pp.pca(X_reference, **pca_kwargs)
88
+ _, components, explained_variance_ratio, explained_variance = pca_result
89
+
90
+ components_mask = explained_variance_ratio > min_variance_ratio
91
+ components = components[components_mask]
92
+ explained_variance_ratio = explained_variance_ratio[components_mask]
93
+ explained_variance = explained_variance[components_mask]
94
+
95
+ X_pca = X.dot(components.T)
96
+
97
+ if normalize:
98
+ X_pca = X_pca / np.linalg.norm(X_pca, axis=1, keepdims=True)
99
+
100
+ return X_pca, components, explained_variance_ratio, explained_variance
101
+
102
+
103
+ def _get_basis(adata: AnnData, basis: str):
104
+ if basis == "X":
105
+ X = adata.X
106
+
107
+ elif basis in adata.layers:
108
+ X = adata.layers[basis]
109
+
110
+ elif basis in adata.obsm:
111
+ X = adata.obsm[basis]
112
+
113
+ else:
114
+ raise ValueError(f"Unknown basis {basis}")
115
+
116
+ return X
@@ -0,0 +1,208 @@
1
+ from typing import Sequence
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from anndata import AnnData
6
+
7
+
8
+ def subset_obs(
9
+ adata: AnnData,
10
+ subset: pd.Index | Sequence[str | int | bool] | str,
11
+ ) -> None:
12
+ """Subset observations (rows) in an AnnData object.
13
+
14
+ This function modifies the AnnData object in-place by selecting a subset of observations
15
+ based on the provided subset parameter. The subsetting can be done using observation
16
+ names, integer indices, a boolean mask, a query string, or a pandas Index.
17
+
18
+ Parameters
19
+ ----------
20
+ adata : AnnData
21
+ The annotated data matrix to subset. Will be modified in-place.
22
+ subset : pd.Index | Sequence[str | int | bool] | str
23
+ The subset specification. Can be one of:
24
+ * A pandas Index containing observation names
25
+ * A sequence of observation names (strings)
26
+ * A sequence of integer indices
27
+ * A boolean mask of length `adata.n_obs`
28
+ * A query string to match observations by their metadata columns
29
+
30
+ Examples
31
+ --------
32
+ >>> # Create an example AnnData object
33
+ >>> import anndata
34
+ >>> import pandas as pd
35
+ >>> import numpy as np
36
+ >>>
37
+ >>> obs = pd.DataFrame(
38
+ ... index=['A', 'B', 'C'],
39
+ ... data={'cell_type': ['type1', 'type2', 'type2']})
40
+ >>> adata_ = anndata.AnnData(obs=obs)
41
+ >>>
42
+ >>> # Subset using pandas Index
43
+ >>> adata = adata_.copy()
44
+ >>> subset_obs(adata, pd.Index(['B', 'C']))
45
+ >>> adata.obs_names.tolist()
46
+ ['B', 'C']
47
+ >>>
48
+ >>> # Subset using observation names
49
+ >>> adata = adata_.copy()
50
+ >>> subset_obs(adata, ['A', 'B'])
51
+ >>> adata.obs_names.tolist()
52
+ ['A', 'B']
53
+ >>>
54
+ >>> # Subset using integer indices
55
+ >>> adata = adata_.copy()
56
+ >>> subset_obs(adata, [0, 1])
57
+ >>> adata.obs_names.tolist()
58
+ ['A', 'B']
59
+ >>>
60
+ >>> # Subset using boolean mask
61
+ >>> adata = adata_.copy()
62
+ >>> subset_obs(adata, [True, False, True])
63
+ >>> adata.obs_names.tolist()
64
+ ['A', 'C']
65
+ >>>
66
+ >>> # Subset using query string
67
+ >>> adata = adata_.copy()
68
+ >>> subset_obs(adata, 'cell_type == "type2"')
69
+ >>> adata.obs_names.tolist()
70
+ ['B', 'C']
71
+
72
+ Notes
73
+ -----
74
+ - The function modifies the AnnData object in-place
75
+ - When using a boolean mask, its length must match the number of observations
76
+ - When using integer indices, they must be valid indices for the observations
77
+ - Invalid observation names or indices will raise KeyError or IndexError respectively
78
+ - The order of observations in the output will match the order in the subset parameter
79
+ """
80
+ if isinstance(subset, str):
81
+ subset = adata.obs.query(subset).index
82
+
83
+ if not isinstance(subset, pd.Index):
84
+ subset = np.asarray(subset)
85
+
86
+ # Handle boolean mask
87
+ if subset.dtype.kind == "b":
88
+ if len(subset) != adata.n_obs:
89
+ raise IndexError(
90
+ f"Boolean mask length ({len(subset)}) does not match number of "
91
+ f"observations ({adata.n_obs})"
92
+ )
93
+ subset = adata.obs_names[subset]
94
+
95
+ # Handle integer indices
96
+ elif subset.dtype.kind in "iu":
97
+ if np.any(subset < 0) or np.any(subset >= adata.n_obs):
98
+ raise IndexError(f"Integer indices must be between 0 and {adata.n_obs - 1}")
99
+ subset = adata.obs_names[subset]
100
+
101
+ if adata.n_obs == subset.size and (subset == adata.obs_names).all():
102
+ # No need to subset, avoid making a copy. Useful for large AnnData objects
103
+ return
104
+
105
+ adata._inplace_subset_obs(subset)
106
+
107
+
108
+ def subset_var(
109
+ adata: AnnData,
110
+ subset: pd.Index | Sequence[str | int | bool] | str,
111
+ ) -> None:
112
+ """Subset variables (columns) in an AnnData object.
113
+
114
+ This function modifies the AnnData object in-place by selecting a subset of variables
115
+ based on the provided subset parameter. The subsetting can be done using variable
116
+ names, integer indices, a boolean mask, a query string, or a pandas Index.
117
+
118
+ Parameters
119
+ ----------
120
+ adata : AnnData
121
+ The annotated data matrix to subset. Will be modified in-place.
122
+ subset : pd.Index | Sequence[str | int | bool] | str
123
+ The subset specification. Can be one of:
124
+ * A pandas Index containing variable names
125
+ * A sequence of variable names (strings)
126
+ * A sequence of integer indices
127
+ * A boolean mask of length `adata.n_vars`
128
+ * A query string to match variables by their metadata columns
129
+
130
+ Examples
131
+ --------
132
+ >>> # Create an example AnnData object
133
+ >>> import anndata
134
+ >>> import pandas as pd
135
+ >>> import numpy as np
136
+ >>>
137
+ >>> var = pd.DataFrame(
138
+ ... index=['gene1', 'gene2', 'gene3'],
139
+ ... data={'gene_type': ['type1', 'type2', 'type1']})
140
+ >>> adata_ = anndata.AnnData(var=var)
141
+ >>>
142
+ >>> # Subset using pandas Index
143
+ >>> adata = adata_.copy()
144
+ >>> subset_var(adata, pd.Index(['gene2', 'gene3']))
145
+ >>> adata.var_names.tolist()
146
+ ['gene2', 'gene3']
147
+ >>>
148
+ >>> # Subset using variable names
149
+ >>> adata = adata_.copy()
150
+ >>> subset_var(adata, ['gene1', 'gene2'])
151
+ >>> adata.var_names.tolist()
152
+ ['gene1', 'gene2']
153
+ >>>
154
+ >>> # Subset using integer indices
155
+ >>> adata = adata_.copy()
156
+ >>> subset_var(adata, [0, 1])
157
+ >>> adata.var_names.tolist()
158
+ ['gene1', 'gene2']
159
+ >>>
160
+ >>> # Subset using boolean mask
161
+ >>> adata = adata_.copy()
162
+ >>> subset_var(adata, [True, False, True])
163
+ >>> adata.var_names.tolist()
164
+ ['gene1', 'gene3']
165
+ >>>
166
+ >>> # Subset using query string
167
+ >>> adata = adata_.copy()
168
+ >>> subset_var(adata, 'gene_type == "type1"')
169
+ >>> adata.var_names.tolist()
170
+ ['gene1', 'gene3']
171
+
172
+ Notes
173
+ -----
174
+ - The function modifies the AnnData object in-place
175
+ - When using a boolean mask, its length must match the number of variables
176
+ - When using integer indices, they must be valid indices for the variables
177
+ - Invalid variable names or indices will raise KeyError or IndexError respectively
178
+ - The order of variables in the output will match the order in the subset parameter
179
+ """
180
+
181
+ if isinstance(subset, str):
182
+ subset = adata.var.query(subset).index
183
+
184
+ if not isinstance(subset, pd.Index):
185
+ subset = np.asarray(subset)
186
+
187
+ # Handle boolean mask
188
+ if subset.dtype.kind == "b":
189
+ if len(subset) != adata.n_vars:
190
+ raise IndexError(
191
+ f"Boolean mask length ({len(subset)}) does not match number of "
192
+ f"variables ({adata.n_vars})"
193
+ )
194
+ subset = adata.var_names[subset]
195
+
196
+ # Handle integer indices
197
+ elif subset.dtype.kind in "iu":
198
+ if np.any(subset < 0) or np.any(subset >= adata.n_vars):
199
+ raise IndexError(
200
+ f"Integer indices must be between 0 and {adata.n_vars - 1}"
201
+ )
202
+ subset = adata.var_names[subset]
203
+
204
+ if adata.n_vars == subset.size and (subset == adata.var_names).all():
205
+ # No need to subset, avoid making a copy. Useful for large AnnData objects
206
+ return
207
+
208
+ adata._inplace_subset_var(subset)