sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sclab/__init__.py +3 -1
  2. sclab/_io.py +83 -12
  3. sclab/_methods_registry.py +65 -0
  4. sclab/_sclab.py +241 -21
  5. sclab/dataset/_dataset.py +4 -6
  6. sclab/dataset/processor/_processor.py +41 -19
  7. sclab/dataset/processor/_results_panel.py +94 -0
  8. sclab/dataset/processor/step/_processor_step_base.py +12 -6
  9. sclab/examples/processor_steps/__init__.py +8 -0
  10. sclab/examples/processor_steps/_cluster.py +2 -2
  11. sclab/examples/processor_steps/_differential_expression.py +329 -0
  12. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  13. sclab/examples/processor_steps/_gene_expression.py +125 -0
  14. sclab/examples/processor_steps/_integration.py +116 -0
  15. sclab/examples/processor_steps/_neighbors.py +26 -6
  16. sclab/examples/processor_steps/_pca.py +13 -8
  17. sclab/examples/processor_steps/_preprocess.py +52 -25
  18. sclab/examples/processor_steps/_qc.py +24 -8
  19. sclab/examples/processor_steps/_umap.py +2 -2
  20. sclab/gui/__init__.py +0 -0
  21. sclab/gui/components/__init__.py +7 -0
  22. sclab/gui/components/_guided_pseudotime.py +482 -0
  23. sclab/gui/components/_transfer_metadata.py +186 -0
  24. sclab/methods/__init__.py +50 -0
  25. sclab/preprocess/__init__.py +26 -0
  26. sclab/preprocess/_cca.py +176 -0
  27. sclab/preprocess/_cca_integrate.py +109 -0
  28. sclab/preprocess/_filter_obs.py +42 -0
  29. sclab/preprocess/_harmony.py +421 -0
  30. sclab/preprocess/_harmony_integrate.py +53 -0
  31. sclab/preprocess/_normalize_weighted.py +65 -0
  32. sclab/preprocess/_pca.py +51 -0
  33. sclab/preprocess/_preprocess.py +155 -0
  34. sclab/preprocess/_qc.py +38 -0
  35. sclab/preprocess/_rpca.py +116 -0
  36. sclab/preprocess/_subset.py +208 -0
  37. sclab/preprocess/_transfer_metadata.py +196 -0
  38. sclab/preprocess/_transform.py +82 -0
  39. sclab/preprocess/_utils.py +96 -0
  40. sclab/scanpy/__init__.py +0 -0
  41. sclab/scanpy/_compat.py +92 -0
  42. sclab/scanpy/_settings.py +526 -0
  43. sclab/scanpy/logging.py +290 -0
  44. sclab/scanpy/plotting/__init__.py +0 -0
  45. sclab/scanpy/plotting/_rcmod.py +73 -0
  46. sclab/scanpy/plotting/palettes.py +221 -0
  47. sclab/scanpy/readwrite.py +1108 -0
  48. sclab/tools/__init__.py +0 -0
  49. sclab/tools/cellflow/__init__.py +0 -0
  50. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  51. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  52. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  53. sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
  54. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  55. sclab/tools/cellflow/utils/__init__.py +0 -0
  56. sclab/tools/cellflow/utils/density_nd.py +215 -0
  57. sclab/tools/cellflow/utils/interpolate.py +334 -0
  58. sclab/tools/cellflow/utils/periodic_genes.py +106 -0
  59. sclab/tools/cellflow/utils/smoothen.py +124 -0
  60. sclab/tools/cellflow/utils/times.py +55 -0
  61. sclab/tools/differential_expression/__init__.py +7 -0
  62. sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
  63. sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
  64. sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
  65. sclab/tools/doublet_detection/__init__.py +5 -0
  66. sclab/tools/doublet_detection/_scrublet.py +64 -0
  67. sclab/tools/embedding/__init__.py +0 -0
  68. sclab/tools/imputation/__init__.py +0 -0
  69. sclab/tools/imputation/_alra.py +135 -0
  70. sclab/tools/labeling/__init__.py +6 -0
  71. sclab/tools/labeling/sctype.py +233 -0
  72. sclab/tools/utils/__init__.py +5 -0
  73. sclab/tools/utils/_aggregate_and_filter.py +290 -0
  74. sclab/utils/__init__.py +5 -0
  75. sclab/utils/_write_excel.py +510 -0
  76. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
  77. sclab-0.3.4.dist-info/RECORD +93 -0
  78. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
  79. sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
  80. sclab-0.1.7.dist-info/RECORD +0 -30
@@ -0,0 +1,50 @@
1
+ from importlib.util import find_spec
2
+
3
+ from .._methods_registry import register_sclab_method
4
+ from ..examples.processor_steps import (
5
+ PCA,
6
+ QC,
7
+ UMAP,
8
+ Cluster,
9
+ DifferentialExpression,
10
+ DoubletDetection,
11
+ GeneExpression,
12
+ Integration,
13
+ Neighbors,
14
+ Preprocess,
15
+ )
16
+ from ..gui.components import GuidedPseudotime, TransferMetadata
17
+
18
+ __all__ = [
19
+ "QC",
20
+ "Preprocess",
21
+ "PCA",
22
+ "Integration",
23
+ "Neighbors",
24
+ "UMAP",
25
+ "Cluster",
26
+ "DoubletDetection",
27
+ "GeneExpression",
28
+ "DifferentialExpression",
29
+ "GuidedPseudotime",
30
+ ]
31
+
32
+ register_sclab_method("Processing")(QC)
33
+ register_sclab_method("Processing")(Preprocess)
34
+ register_sclab_method("Processing")(PCA)
35
+ register_sclab_method("Processing")(Integration)
36
+ register_sclab_method("Processing")(Neighbors)
37
+ register_sclab_method("Processing")(TransferMetadata)
38
+ register_sclab_method("Processing")(UMAP)
39
+ register_sclab_method("Processing")(Cluster)
40
+
41
+ if any(
42
+ [
43
+ find_spec("scrublet"),
44
+ ]
45
+ ):
46
+ register_sclab_method("Processing")(DoubletDetection)
47
+
48
+ register_sclab_method("Analysis")(GeneExpression)
49
+ register_sclab_method("Analysis")(DifferentialExpression)
50
+ register_sclab_method("Analysis")(GuidedPseudotime)
@@ -0,0 +1,26 @@
1
+ from ._cca_integrate import cca_integrate, cca_integrate_pair
2
+ from ._filter_obs import filter_obs
3
+ from ._harmony_integrate import harmony_integrate
4
+ from ._normalize_weighted import normalize_weighted
5
+ from ._pca import pca
6
+ from ._preprocess import preprocess
7
+ from ._qc import qc
8
+ from ._subset import subset_obs, subset_var
9
+ from ._transfer_metadata import propagate_metadata, transfer_metadata
10
+ from ._transform import pool_neighbors
11
+
12
+ __all__ = [
13
+ "cca_integrate",
14
+ "cca_integrate_pair",
15
+ "filter_obs",
16
+ "harmony_integrate",
17
+ "normalize_weighted",
18
+ "pca",
19
+ "pool_neighbors",
20
+ "preprocess",
21
+ "propagate_metadata",
22
+ "qc",
23
+ "subset_obs",
24
+ "subset_var",
25
+ "transfer_metadata",
26
+ ]
@@ -0,0 +1,176 @@
1
+ import logging
2
+ import os
3
+ from typing import Literal
4
+
5
+ import numpy as np
6
+ from joblib import Parallel, delayed
7
+ from numpy import matrix
8
+ from numpy.typing import NDArray
9
+ from scipy.linalg import svd
10
+ from scipy.sparse import csc_matrix, csr_matrix, issparse
11
+ from scipy.sparse import vstack as sparse_vstack
12
+ from scipy.sparse.linalg import svds
13
+ from sklearn.utils.extmath import randomized_svd
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ N_CPUS = os.cpu_count()
19
+
20
+
21
+ def cca(
22
+ X: NDArray | csr_matrix | csc_matrix,
23
+ Y: NDArray | csr_matrix | csc_matrix,
24
+ n_components=None,
25
+ svd_solver: Literal["full", "partial", "randomized"] = "randomized",
26
+ normalize: bool = False,
27
+ random_state=42,
28
+ n_jobs: int = N_CPUS,
29
+ ) -> tuple[NDArray, NDArray, NDArray]:
30
+ """
31
+ CCA-style integration for two single-cell matrices with unequal numbers of cells.
32
+
33
+ Parameters
34
+ ----------
35
+ X, Y : array-like, shape (n_cells, n_features)
36
+ feature-by-cell matrices with same column space (variable genes/pcs) in the same order.
37
+ n_components : int or None
38
+ Dimensionality of the canonical space (default = all that the smaller
39
+ dataset allows).
40
+ svd_solver : {'full', 'partial', 'randomized'}
41
+ 'randomized' uses Halko et al. algorithm (`sklearn.utils.extmath.randomized_svd`)
42
+ and is strongly recommended when only the leading few components are needed.
43
+ random_state : int or None
44
+ Passed through to the randomized SVD for reproducibility.
45
+
46
+ Returns
47
+ -------
48
+ U : (n_cells(X), k) ndarray
49
+ V : (n_cells(Y), k) ndarray
50
+ Cell-level canonical variables.
51
+ """
52
+ n1, p1 = X.shape
53
+ n2, p2 = Y.shape
54
+ if p1 != p2:
55
+ raise ValueError("The two matrices must have the same number of features.")
56
+
57
+ k = n_components or min(n1, n2)
58
+
59
+ if issparse(X):
60
+ C = _cross_covariance_sparse(X, Y, n_jobs=n_jobs)
61
+ else:
62
+ C = _cross_covariance_dense(X, Y)
63
+
64
+ logger.info(f"Cross-covariance computed. Shape: {C.shape}")
65
+
66
+ Uc, s, Vct = _svd_decomposition(C, k, svd_solver, random_state)
67
+
68
+ # canonical variables
69
+ # Left and right singular vectors are cell embeddings
70
+ U = Uc # (n1 x k)
71
+ V = Vct.T # (n2 x k)
72
+
73
+ if normalize:
74
+ logger.info("Normalizing canonical variables...")
75
+ U = U / np.linalg.norm(U, axis=1, keepdims=True)
76
+ V = V / np.linalg.norm(V, axis=1, keepdims=True)
77
+
78
+ logger.info("Done.")
79
+
80
+ return U, s, V
81
+
82
+
83
+ def _svd_decomposition(
84
+ C: NDArray,
85
+ k: int,
86
+ svd_solver: Literal["full", "partial", "randomized"],
87
+ random_state: int | None,
88
+ ) -> tuple[NDArray, NDArray, NDArray]:
89
+ if svd_solver == "full":
90
+ logger.info("SVD decomposition with full SVD...")
91
+ Uc, s, Vct = svd(C, full_matrices=False)
92
+ Uc, s, Vct = Uc[:, :k], s[:k], Vct[:k, :]
93
+
94
+ elif svd_solver == "partial":
95
+ logger.info("SVD decomposition with partial SVD...")
96
+ Uc, s, Vct = svds(C, k=k)
97
+
98
+ elif svd_solver == "randomized":
99
+ logger.info("SVD decomposition with randomized SVD...")
100
+ Uc, s, Vct = randomized_svd(C, n_components=k, random_state=random_state)
101
+
102
+ else:
103
+ raise ValueError("svd_solver must be 'full' or 'partial'.")
104
+
105
+ order = np.argsort(-s)
106
+ s = s[order]
107
+ Uc = Uc[:, order]
108
+ Vct = Vct[order, :]
109
+
110
+ return Uc, s, Vct
111
+
112
+
113
+ def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix, n_jobs=N_CPUS) -> NDArray:
114
+ _, p1 = X.shape
115
+ _, p2 = Y.shape
116
+ if p1 != p2:
117
+ raise ValueError("The two matrices must have the same number of features.")
118
+
119
+ p = p1
120
+
121
+ # TODO: incorporate sparse scaling
122
+
123
+ logger.info("Computing cross-covariance on sparse matrices...")
124
+
125
+ mux: matrix = X.mean(axis=0)
126
+ muy: matrix = Y.mean(axis=0)
127
+
128
+ XYt: csr_matrix = _spmm_parallel(X, Y.T, n_jobs=n_jobs)
129
+ Xmuyt: matrix = X.dot(muy.T)
130
+ muxYt: matrix = Y.dot(mux.T).T
131
+ muxmuyt: float = (mux @ muy.T)[0, 0]
132
+
133
+ C = (XYt - Xmuyt - muxYt + muxmuyt) / (p - 1)
134
+
135
+ return np.asarray(C)
136
+
137
+
138
+ def _cross_covariance_dense(X: NDArray, Y: NDArray) -> NDArray:
139
+ _, p1 = X.shape
140
+ _, p2 = Y.shape
141
+ if p1 != p2:
142
+ raise ValueError("The two matrices must have the same number of features.")
143
+
144
+ p = p1
145
+
146
+ logger.info("Computing cross-covariance on dense matrices...")
147
+ X = _dense_scale(X)
148
+ Y = _dense_scale(Y)
149
+
150
+ X = X - X.mean(axis=0, keepdims=True)
151
+ Y = Y - Y.mean(axis=0, keepdims=True)
152
+
153
+ C: NDArray = (X @ Y.T) / (p - 1)
154
+
155
+ return C
156
+
157
+
158
+ def _dense_scale(A: NDArray) -> NDArray:
159
+ A = np.asarray(A)
160
+ eps = np.finfo(A.dtype).eps
161
+ return A / (A.std(axis=0, ddof=1, keepdims=True) + eps)
162
+
163
+
164
+ def _spmm_chunk(A_csr, X, start, stop):
165
+ return A_csr[start:stop, :] @ X
166
+
167
+
168
+ def _spmm_parallel(A_csr: csr_matrix, X_csc: csc_matrix, n_jobs=N_CPUS):
169
+ n = A_csr.shape[0]
170
+
171
+ bounds = np.linspace(0, n, n_jobs + 1, dtype=int)
172
+ Ys = Parallel(n_jobs=n_jobs, prefer="processes")(
173
+ delayed(_spmm_chunk)(A_csr, X_csc, bounds[i], bounds[i + 1])
174
+ for i in range(n_jobs)
175
+ )
176
+ return sparse_vstack(Ys) # result is sparse if X is sparse, dense otherwise
@@ -0,0 +1,109 @@
1
+ import numpy as np
2
+ from anndata import AnnData
3
+
4
+ from ._cca import cca
5
+
6
+
7
+ def cca_integrate(
8
+ adata: AnnData,
9
+ key: str,
10
+ *,
11
+ basis: str = "X",
12
+ adjusted_basis: str | None = None,
13
+ reference_batch: str | list[str] | None = None,
14
+ mask_var: str | None = None,
15
+ n_components: int = 30,
16
+ svd_solver: str = "randomized",
17
+ normalize: bool = True,
18
+ random_state: int | None = None,
19
+ ):
20
+ n_groups = adata.obs[key].nunique()
21
+ if n_groups == 2:
22
+ cca_integrate_pair(
23
+ adata,
24
+ key,
25
+ adata.obs[key].unique()[0],
26
+ adata.obs[key].unique()[1],
27
+ basis=basis,
28
+ adjusted_basis=adjusted_basis,
29
+ mask_var=mask_var,
30
+ n_components=n_components,
31
+ svd_solver=svd_solver,
32
+ normalize=normalize,
33
+ random_state=random_state,
34
+ )
35
+ else:
36
+ raise NotImplementedError
37
+
38
+
39
+ def cca_integrate_pair(
40
+ adata: AnnData,
41
+ key: str,
42
+ group1: str,
43
+ group2: str,
44
+ *,
45
+ basis: str | None = None,
46
+ adjusted_basis: str | None = None,
47
+ mask_var: str | None = None,
48
+ n_components: int = 30,
49
+ svd_solver: str = "randomized",
50
+ normalize: bool = True,
51
+ random_state: int | None = None,
52
+ ):
53
+ if basis is None:
54
+ basis = "X"
55
+
56
+ if adjusted_basis is None:
57
+ adjusted_basis = basis + "_cca"
58
+
59
+ if mask_var is not None:
60
+ mask = adata.var[mask_var].values
61
+ else:
62
+ mask = np.ones(adata.n_vars, dtype=bool)
63
+
64
+ Xs = {}
65
+ groups = adata.obs.groupby(key, observed=True).groups
66
+ for gr, idx in groups.items():
67
+ Xs[gr] = _get_basis(adata[idx, mask], basis)
68
+
69
+ Ys = {}
70
+ Ys[group1], sigma, Ys[group2] = cca(
71
+ Xs[group1],
72
+ Xs[group2],
73
+ n_components=n_components,
74
+ svd_solver=svd_solver,
75
+ normalize=normalize,
76
+ random_state=random_state,
77
+ )
78
+
79
+ if (
80
+ adjusted_basis not in adata.obsm
81
+ or adata.obsm[adjusted_basis].shape[1] != n_components
82
+ ):
83
+ adata.obsm[adjusted_basis] = np.full((adata.n_obs, n_components), np.nan)
84
+
85
+ if adjusted_basis not in adata.uns:
86
+ adata.uns[adjusted_basis] = {}
87
+
88
+ uns = adata.uns[adjusted_basis]
89
+ uns[f"{group1}-{group2}"] = {"sigma": sigma}
90
+ for gr, obs_names in groups.items():
91
+ idx = adata.obs_names.get_indexer(obs_names)
92
+ adata.obsm[adjusted_basis][idx] = Ys[gr]
93
+ uns[gr] = Ys[gr]
94
+
95
+
96
+ def _get_basis(adata: AnnData, basis: str):
97
+ if basis == "X":
98
+ X = adata.X
99
+
100
+ elif basis in adata.layers:
101
+ X = adata.layers[basis]
102
+
103
+ elif basis in adata.obsm:
104
+ X = adata.obsm[basis]
105
+
106
+ else:
107
+ raise ValueError(f"Unknown basis {basis}")
108
+
109
+ return X
@@ -0,0 +1,42 @@
1
+ import numpy as np
2
+ from anndata import AnnData
3
+ from scipy.stats import rankdata
4
+
5
+
6
+ def filter_obs(
7
+ adata: AnnData,
8
+ *,
9
+ layer: str | None = None,
10
+ min_counts: int | None = None,
11
+ min_genes: int | None = None,
12
+ max_counts: int | None = None,
13
+ max_cells: int | None = None,
14
+ ) -> None:
15
+ if layer is not None:
16
+ X = adata.layers[layer]
17
+ else:
18
+ X = adata.X
19
+
20
+ remove_mask = np.zeros(X.shape[0], dtype=bool)
21
+
22
+ if min_genes is not None:
23
+ M = X > 0
24
+ rowsums = np.asarray(M.sum(axis=1)).squeeze()
25
+ remove_mask[rowsums < min_genes] = True
26
+
27
+ if min_counts is not None or max_counts is not None or max_cells is not None:
28
+ rowsums = np.asarray(X.sum(axis=1)).squeeze()
29
+
30
+ if min_counts is not None:
31
+ remove_mask[rowsums < min_counts] = True
32
+
33
+ if max_counts is not None:
34
+ remove_mask[rowsums > max_counts] = True
35
+
36
+ if max_cells is not None:
37
+ ranks = rankdata(-rowsums, method="min")
38
+ remove_mask[ranks > max_cells] = True
39
+
40
+ if remove_mask.any():
41
+ obs_idx = adata.obs_names[~remove_mask]
42
+ adata._inplace_subset_obs(obs_idx)