sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sclab/__init__.py +3 -1
  2. sclab/_io.py +83 -12
  3. sclab/_methods_registry.py +65 -0
  4. sclab/_sclab.py +241 -21
  5. sclab/dataset/_dataset.py +4 -6
  6. sclab/dataset/processor/_processor.py +41 -19
  7. sclab/dataset/processor/_results_panel.py +94 -0
  8. sclab/dataset/processor/step/_processor_step_base.py +12 -6
  9. sclab/examples/processor_steps/__init__.py +8 -0
  10. sclab/examples/processor_steps/_cluster.py +2 -2
  11. sclab/examples/processor_steps/_differential_expression.py +329 -0
  12. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  13. sclab/examples/processor_steps/_gene_expression.py +125 -0
  14. sclab/examples/processor_steps/_integration.py +116 -0
  15. sclab/examples/processor_steps/_neighbors.py +26 -6
  16. sclab/examples/processor_steps/_pca.py +13 -8
  17. sclab/examples/processor_steps/_preprocess.py +52 -25
  18. sclab/examples/processor_steps/_qc.py +24 -8
  19. sclab/examples/processor_steps/_umap.py +2 -2
  20. sclab/gui/__init__.py +0 -0
  21. sclab/gui/components/__init__.py +7 -0
  22. sclab/gui/components/_guided_pseudotime.py +482 -0
  23. sclab/gui/components/_transfer_metadata.py +186 -0
  24. sclab/methods/__init__.py +50 -0
  25. sclab/preprocess/__init__.py +26 -0
  26. sclab/preprocess/_cca.py +176 -0
  27. sclab/preprocess/_cca_integrate.py +109 -0
  28. sclab/preprocess/_filter_obs.py +42 -0
  29. sclab/preprocess/_harmony.py +421 -0
  30. sclab/preprocess/_harmony_integrate.py +53 -0
  31. sclab/preprocess/_normalize_weighted.py +65 -0
  32. sclab/preprocess/_pca.py +51 -0
  33. sclab/preprocess/_preprocess.py +155 -0
  34. sclab/preprocess/_qc.py +38 -0
  35. sclab/preprocess/_rpca.py +116 -0
  36. sclab/preprocess/_subset.py +208 -0
  37. sclab/preprocess/_transfer_metadata.py +196 -0
  38. sclab/preprocess/_transform.py +82 -0
  39. sclab/preprocess/_utils.py +96 -0
  40. sclab/scanpy/__init__.py +0 -0
  41. sclab/scanpy/_compat.py +92 -0
  42. sclab/scanpy/_settings.py +526 -0
  43. sclab/scanpy/logging.py +290 -0
  44. sclab/scanpy/plotting/__init__.py +0 -0
  45. sclab/scanpy/plotting/_rcmod.py +73 -0
  46. sclab/scanpy/plotting/palettes.py +221 -0
  47. sclab/scanpy/readwrite.py +1108 -0
  48. sclab/tools/__init__.py +0 -0
  49. sclab/tools/cellflow/__init__.py +0 -0
  50. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  51. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  52. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  53. sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
  54. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  55. sclab/tools/cellflow/utils/__init__.py +0 -0
  56. sclab/tools/cellflow/utils/density_nd.py +215 -0
  57. sclab/tools/cellflow/utils/interpolate.py +334 -0
  58. sclab/tools/cellflow/utils/periodic_genes.py +106 -0
  59. sclab/tools/cellflow/utils/smoothen.py +124 -0
  60. sclab/tools/cellflow/utils/times.py +55 -0
  61. sclab/tools/differential_expression/__init__.py +7 -0
  62. sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
  63. sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
  64. sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
  65. sclab/tools/doublet_detection/__init__.py +5 -0
  66. sclab/tools/doublet_detection/_scrublet.py +64 -0
  67. sclab/tools/embedding/__init__.py +0 -0
  68. sclab/tools/imputation/__init__.py +0 -0
  69. sclab/tools/imputation/_alra.py +135 -0
  70. sclab/tools/labeling/__init__.py +6 -0
  71. sclab/tools/labeling/sctype.py +233 -0
  72. sclab/tools/utils/__init__.py +5 -0
  73. sclab/tools/utils/_aggregate_and_filter.py +290 -0
  74. sclab/utils/__init__.py +5 -0
  75. sclab/utils/_write_excel.py +510 -0
  76. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
  77. sclab-0.3.4.dist-info/RECORD +93 -0
  78. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
  79. sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
  80. sclab-0.1.7.dist-info/RECORD +0 -30
@@ -0,0 +1,196 @@
1
+ from collections import Counter
2
+ from functools import partial
3
+ from typing import Callable, Literal
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from anndata import AnnData
8
+ from numpy.typing import NDArray
9
+ from pandas.api.types import is_bool_dtype, is_numeric_dtype
10
+ from scipy.sparse import csr_matrix
11
+ from scipy.special import gamma
12
+ from tqdm.auto import tqdm
13
+
14
+
15
+ def transfer_metadata(
16
+ adata: AnnData,
17
+ group_key: str,
18
+ source_group: str,
19
+ column: str,
20
+ periodic: bool = False,
21
+ vmin: float = 0,
22
+ vmax: float = 1,
23
+ min_neighs: int = 5,
24
+ weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
25
+ ):
26
+ new_values, new_values_err = _propagate_metadata(
27
+ adata,
28
+ column=column,
29
+ periodic=periodic,
30
+ vmin=vmin,
31
+ vmax=vmax,
32
+ min_neighs=min_neighs,
33
+ weight_by=weight_by,
34
+ mask=adata.obs[group_key] != source_group,
35
+ )
36
+
37
+ adata.obs[f"transferred_{new_values.name}"] = new_values
38
+ adata.obs[f"transferred_{new_values_err.name}"] = new_values_err
39
+
40
+
41
+ def propagate_metadata(
42
+ adata: AnnData,
43
+ column: str,
44
+ periodic: bool = False,
45
+ vmin: float = 0,
46
+ vmax: float = 1,
47
+ min_neighs: int = 5,
48
+ weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
49
+ ):
50
+ new_values, new_values_err = _propagate_metadata(
51
+ adata,
52
+ column=column,
53
+ periodic=periodic,
54
+ vmin=vmin,
55
+ vmax=vmax,
56
+ min_neighs=min_neighs,
57
+ weight_by=weight_by,
58
+ )
59
+
60
+ mask = adata.obs[column].isna()
61
+ adata.obs.loc[mask, column] = new_values.loc[mask]
62
+ adata.obs.loc[mask, new_values_err.name] = new_values_err.loc[mask]
63
+
64
+
65
+ def _propagate_metadata(
66
+ adata: AnnData,
67
+ column: str,
68
+ periodic: bool = False,
69
+ vmin: float = 0,
70
+ vmax: float = 1,
71
+ min_neighs: int = 5,
72
+ weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
73
+ mask: np.ndarray | pd.Series | None = None,
74
+ ) -> tuple[pd.Series, pd.Series]:
75
+ D, W = _get_neighbors_and_weights(adata, weight_by=weight_by)
76
+
77
+ assign_value_fn: Callable
78
+ series = adata.obs[column]
79
+ if isinstance(series.dtype, pd.CategoricalDtype) or is_bool_dtype(series.dtype):
80
+ assign_value_fn = _assign_categorical
81
+ elif is_numeric_dtype(series.dtype) and periodic:
82
+ assign_value_fn = partial(_assign_numerical_periodic, vmin=vmin, vmax=vmax)
83
+ elif is_numeric_dtype(series.dtype):
84
+ assign_value_fn = _assign_numerical
85
+ else:
86
+ raise ValueError(f"Unsupported dtype {series.dtype} for column {column}")
87
+
88
+ if isinstance(series.dtype, pd.CategoricalDtype) or is_bool_dtype(series.dtype):
89
+ column_err = f"{column}_proportion"
90
+ else:
91
+ column_err = f"{column}_error"
92
+
93
+ meta_values: pd.Series = series.copy()
94
+ if mask is not None:
95
+ meta_values[mask] = pd.NA
96
+
97
+ new_values = pd.Series(index=series.index, dtype=series.dtype, name=column)
98
+ new_values_err = pd.Series(index=series.index, dtype=float, name=column_err)
99
+
100
+ for i, (d, w) in tqdm(enumerate(zip(D, W)), total=D.shape[0]):
101
+ if not pd.isna(meta_values.iloc[i]):
102
+ continue
103
+
104
+ d = d.tocoo()
105
+ w = w.toarray().ravel()
106
+ neighs = d.coords[1]
107
+
108
+ values: pd.Series = meta_values.iloc[neighs]
109
+ msk = pd.notna(values)
110
+ if msk.sum() < min_neighs:
111
+ continue
112
+
113
+ values = values.loc[msk]
114
+ weights = w[neighs][msk]
115
+
116
+ if np.allclose(weights, 0):
117
+ continue
118
+
119
+ assigned_value, assigned_value_err = assign_value_fn(values, weights)
120
+ new_values.iloc[i] = assigned_value
121
+ new_values_err.iloc[i] = assigned_value_err
122
+
123
+ new_values = pd.concat([new_values, meta_values], axis=1).bfill(axis=1).iloc[:, 0]
124
+
125
+ return new_values, new_values_err
126
+
127
+
128
+ def _get_neighbors_and_weights(
129
+ adata: AnnData,
130
+ weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
131
+ ):
132
+ D: csr_matrix = adata.obsp["distances"].copy()
133
+ C: csr_matrix = adata.obsp["connectivities"].copy()
134
+ D = D.tocsr()
135
+ W: csr_matrix
136
+
137
+ match weight_by:
138
+ case "connectivity":
139
+ W = C.tocsr().copy()
140
+ case "distance":
141
+ W = D.tocsr().copy()
142
+ W.data = 1.0 / W.data
143
+ case "constant":
144
+ W = D.tocsr().copy()
145
+ W.data[:] = 1.0
146
+ case _:
147
+ raise ValueError(f"Unsupported weight_by {weight_by}")
148
+
149
+ return D, W
150
+
151
+
152
+ def _assign_categorical(values: pd.Series, weights: NDArray):
153
+ # weighted majority and proportion of votes
154
+ tally = Counter()
155
+ for v, w in zip(values, weights):
156
+ tally[v] += w
157
+
158
+ winner, shares = tally.most_common()[0]
159
+ return winner, shares / weights.sum()
160
+
161
+
162
+ def _assign_numerical(values: pd.Series, weights: NDArray):
163
+ # weighted mean and standard error
164
+ sum_w: float = weights.sum()
165
+ sum2_w: float = weights.sum() ** 2
166
+ sum_w2: float = (weights**2).sum()
167
+ n_eff: float = sum2_w / sum_w2
168
+
169
+ mean_x: float = (values * weights).sum() / sum_w
170
+ var_x: float = ((values - mean_x) ** 2 * weights).sum() * sum_w / (sum2_w - sum_w2)
171
+ err_x: float = np.sqrt(var_x / n_eff)
172
+
173
+ return mean_x, err_x
174
+
175
+
176
+ def _assign_numerical_periodic(
177
+ values: pd.Series, weights: NDArray, vmin: float, vmax: float
178
+ ):
179
+ vspan = vmax - vmin
180
+
181
+ values = values - vmin
182
+ offset = np.median(values)
183
+ values = values - offset + vspan / 2
184
+ values = values % vspan
185
+ assigned_value, assigned_value_err = _assign_numerical(values, weights)
186
+ assigned_value = assigned_value + offset - vspan / 2
187
+ assigned_value = assigned_value % vspan
188
+ assigned_value = assigned_value + vmin
189
+
190
+ return assigned_value, assigned_value_err
191
+
192
+
193
+ def _c4(n: float):
194
+ # correct for bias
195
+ nm1 = n - 1
196
+ return np.sqrt(2 / nm1) * gamma(n / 2) / gamma(nm1 / 2)
@@ -0,0 +1,82 @@
1
+ from typing import Optional
2
+
3
+ from anndata import AnnData
4
+ from numpy import ndarray
5
+ from scipy.sparse import csr_matrix
6
+
7
+ from ._utils import get_neighbors_adjacency_matrix
8
+
9
+
10
+ def pool_neighbors(
11
+ adata: AnnData,
12
+ *,
13
+ layer: Optional[str] = None,
14
+ n_neighbors: Optional[int] = None,
15
+ neighbors_key: Optional[str] = None,
16
+ weighted: bool = False,
17
+ directed: bool = True,
18
+ key_added: Optional[str] = None,
19
+ copy: bool = False,
20
+ ) -> csr_matrix | ndarray | None:
21
+ """
22
+ Given an adjacency matrix, pool cell features using a weighted sum of feature counts
23
+ from neighboring cells. The weights are the normalized connectivities from the
24
+ adjacency matrix.
25
+
26
+ Parameters
27
+ ----------
28
+ adata : AnnData
29
+ Annotated data matrix.
30
+ layer : str, optional
31
+ Layer in AnnData object to use for pooling. Defaults to None.
32
+ n_neighbors : int, optional
33
+ Number of neighbors to consider. Defaults to None.
34
+ neighbors_key : str, optional
35
+ Key in AnnData object to use for neighbors. Defaults to None.
36
+ weighted : bool, optional
37
+ Whether to weight neighbors by their connectivities in the adjacency matrix.
38
+ Defaults to False.
39
+ directed : bool, optional
40
+ Whether to use directed or undirected neighbors. Defaults to True.
41
+ key_added : str, optional
42
+ Key to use in AnnData object for the pooled features. Defaults to None.
43
+ copy : bool, optional
44
+ Whether to return a copy of the pooled features instead of modifying the
45
+ original AnnData object. Defaults to False.
46
+
47
+ Returns
48
+ -------
49
+ csr_matrix | ndarray | None
50
+ The pooled features if copy is True, otherwise None.
51
+ """
52
+ if layer is None or layer == "X":
53
+ X = adata.X
54
+ else:
55
+ X = adata.layers[layer]
56
+
57
+ adjacency = get_neighbors_adjacency_matrix(
58
+ adata,
59
+ key=neighbors_key,
60
+ n_neighbors=n_neighbors,
61
+ weighted=weighted,
62
+ directed=directed,
63
+ )
64
+
65
+ W = adjacency.tolil()
66
+ W.setdiag(1)
67
+
68
+ W = W / W.sum(axis=1)
69
+
70
+ pooled = W.dot(X)
71
+
72
+ if copy:
73
+ return pooled
74
+
75
+ if key_added is not None:
76
+ adata.layers[key_added] = pooled
77
+ return
78
+
79
+ if layer is None or layer == "X":
80
+ adata.X = pooled
81
+ else:
82
+ adata.layers[layer] = pooled
@@ -0,0 +1,96 @@
1
+ from typing import Literal, Optional
2
+
3
+ import numpy as np
4
+ from anndata import AnnData
5
+ from scanpy import Neighbors
6
+ from scipy.sparse import coo_matrix, csr_matrix
7
+
8
+
9
+ def get_neighbors_adjacency_matrix(
10
+ adata: AnnData,
11
+ *,
12
+ key: Optional[str] = "neighbors",
13
+ n_neighbors: Optional[int] = None,
14
+ weighted: bool = False,
15
+ directed: bool = True,
16
+ ) -> csr_matrix:
17
+ # get the current neighbors
18
+ neigh = Neighbors(adata, neighbors_key=key)
19
+ params = adata.uns[key]["params"]
20
+
21
+ if n_neighbors is None:
22
+ n_neighbors = neigh.n_neighbors
23
+
24
+ if n_neighbors < neigh.n_neighbors and not weighted:
25
+ distances = _filter_knn_matrix(
26
+ neigh.distances, n_neighbors=n_neighbors, mode="distances"
27
+ )
28
+
29
+ elif n_neighbors != neigh.n_neighbors:
30
+ neigh.compute_neighbors(**{**params, "n_neighbors": n_neighbors})
31
+ distances = neigh.distances
32
+
33
+ else:
34
+ distances = neigh.distances
35
+
36
+ adjacency = distances.copy()
37
+ adjacency.data = np.ones_like(adjacency.data)
38
+
39
+ if not directed:
40
+ # make the adjacency matrix symmetric
41
+ adjacency = _symmetrize_sparse_matrix(adjacency)
42
+
43
+ if weighted:
44
+ # use the connectivities to assign weights
45
+ adjacency = adjacency.multiply(neigh.connectivities)
46
+
47
+ return adjacency
48
+
49
+
50
+ def _filter_knn_matrix(
51
+ matrix: csr_matrix, *, n_neighbors: int, mode: Literal["distances", "weights"]
52
+ ) -> csr_matrix:
53
+ assert mode in ["distances", "weights"]
54
+ nrows, _ = matrix.shape
55
+
56
+ # Initialize arrays for new sparse matrix with pre-allocated size
57
+ indptr = np.arange(0, (n_neighbors - 1) * (nrows + 1), n_neighbors - 1)
58
+ data = np.zeros(nrows * (n_neighbors - 1), dtype=float)
59
+ indices = np.zeros(nrows * (n_neighbors - 1), dtype=int)
60
+
61
+ # Process each row to keep top n_neighbors-1 connections
62
+ for i in range(nrows):
63
+ start, end = matrix.indptr[i : i + 2]
64
+ idxs = matrix.indices[start:end]
65
+ vals = matrix.data[start:end]
66
+
67
+ # Sort by values and keep top n_neighbors-1
68
+ if mode == "weights":
69
+ # Sort in descending order (keep largest weights)
70
+ o = np.argsort(-vals)[: n_neighbors - 1]
71
+ else:
72
+ # Sort in ascending order (keep smallest distances)
73
+ o = np.argsort(vals)[: n_neighbors - 1]
74
+
75
+ # Maintain original order within top neighbors
76
+ oo = np.argsort(idxs[o])
77
+ start, end = indptr[i : i + 2]
78
+ indices[start:end] = idxs[o][oo]
79
+ data[start:end] = vals[o][oo]
80
+
81
+ return csr_matrix((data, indices, indptr))
82
+
83
+
84
+ def _symmetrize_sparse_matrix(matrix: csr_matrix) -> csr_matrix:
85
+ A = matrix.tocoo()
86
+
87
+ # Make matrix symmetric by duplicating entries in both directions
88
+ coords = np.array([[*A.row, *A.col], [*A.col, *A.row]])
89
+ data = np.array([*A.data, *A.data])
90
+
91
+ # Remove duplicate entries that might occur in symmetrization
92
+ idxs = np.unique(coords, axis=1, return_index=True)[1]
93
+ coords, data = coords[:, idxs], data[idxs]
94
+ A = coo_matrix((data, coords), shape=matrix.shape)
95
+
96
+ return A.tocsr()
File without changes
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from dataclasses import dataclass, field
5
+ from functools import cache, partial
6
+ from importlib.util import find_spec
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING
9
+
10
+ from packaging.version import Version
11
+
12
+ if TYPE_CHECKING:
13
+ from importlib.metadata import PackageMetadata
14
+
15
+
16
+ if TYPE_CHECKING:
17
+ # type checkers are confused and can only see …core.Array
18
+ from dask.array.core import Array as DaskArray
19
+ elif find_spec("dask"):
20
+ from dask.array import Array as DaskArray
21
+ else:
22
+
23
+ class DaskArray:
24
+ pass
25
+
26
+
27
+ if find_spec("zappy") or TYPE_CHECKING:
28
+ from zappy.base import ZappyArray
29
+ else:
30
+
31
+ class ZappyArray:
32
+ pass
33
+
34
+
35
+ __all__ = [
36
+ "DaskArray",
37
+ "ZappyArray",
38
+ "fullname",
39
+ "pkg_metadata",
40
+ "pkg_version",
41
+ ]
42
+
43
+
44
+ def fullname(typ: type) -> str:
45
+ module = typ.__module__
46
+ name = typ.__qualname__
47
+ if module == "builtins" or module is None:
48
+ return name
49
+ return f"{module}.{name}"
50
+
51
+
52
+ if sys.version_info >= (3, 11):
53
+ from contextlib import chdir
54
+ else:
55
+ import os
56
+ from contextlib import AbstractContextManager
57
+
58
+ @dataclass
59
+ class chdir(AbstractContextManager):
60
+ path: Path
61
+ _old_cwd: list[Path] = field(default_factory=list)
62
+
63
+ def __enter__(self) -> None:
64
+ self._old_cwd.append(Path.cwd())
65
+ os.chdir(self.path)
66
+
67
+ def __exit__(self, *_excinfo) -> None:
68
+ os.chdir(self._old_cwd.pop())
69
+
70
+
71
+ def pkg_metadata(package: str) -> PackageMetadata:
72
+ from importlib.metadata import metadata
73
+
74
+ return metadata(package)
75
+
76
+
77
+ @cache
78
+ def pkg_version(package: str) -> Version:
79
+ from importlib.metadata import version
80
+
81
+ return Version(version(package))
82
+
83
+
84
+ if find_spec("legacy_api_wrap") or TYPE_CHECKING:
85
+ from legacy_api_wrap import legacy_api # noqa: TID251
86
+
87
+ old_positionals = partial(legacy_api, category=FutureWarning)
88
+ else:
89
+ # legacy_api_wrap is currently a hard dependency,
90
+ # but this code makes it possible to run scanpy without it.
91
+ def old_positionals(*old_positionals: str):
92
+ return lambda func: func