sclab 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sclab might be problematic. Click here for more details.

Files changed (53) hide show
  1. sclab/__init__.py +1 -1
  2. sclab/_sclab.py +7 -3
  3. sclab/dataset/_dataset.py +1 -1
  4. sclab/dataset/processor/_processor.py +19 -4
  5. sclab/examples/processor_steps/__init__.py +2 -0
  6. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  7. sclab/examples/processor_steps/_integration.py +47 -20
  8. sclab/examples/processor_steps/_neighbors.py +24 -4
  9. sclab/examples/processor_steps/_pca.py +11 -6
  10. sclab/examples/processor_steps/_preprocess.py +14 -1
  11. sclab/examples/processor_steps/_qc.py +22 -6
  12. sclab/gui/__init__.py +0 -0
  13. sclab/gui/components/__init__.py +7 -0
  14. sclab/gui/components/_guided_pseudotime.py +482 -0
  15. sclab/gui/components/_transfer_metadata.py +186 -0
  16. sclab/methods/__init__.py +16 -0
  17. sclab/preprocess/__init__.py +19 -0
  18. sclab/preprocess/_cca.py +154 -0
  19. sclab/preprocess/_cca_integrate.py +109 -0
  20. sclab/preprocess/_filter_obs.py +42 -0
  21. sclab/preprocess/_harmony.py +421 -0
  22. sclab/preprocess/_harmony_integrate.py +53 -0
  23. sclab/preprocess/_normalize_weighted.py +61 -0
  24. sclab/preprocess/_subset.py +208 -0
  25. sclab/preprocess/_transfer_metadata.py +137 -0
  26. sclab/preprocess/_transform.py +82 -0
  27. sclab/preprocess/_utils.py +96 -0
  28. sclab/tools/__init__.py +0 -0
  29. sclab/tools/cellflow/__init__.py +0 -0
  30. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  31. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  32. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  33. sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
  34. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  35. sclab/tools/cellflow/utils/__init__.py +0 -0
  36. sclab/tools/cellflow/utils/density_nd.py +215 -0
  37. sclab/tools/cellflow/utils/interpolate.py +334 -0
  38. sclab/tools/cellflow/utils/smoothen.py +124 -0
  39. sclab/tools/cellflow/utils/times.py +55 -0
  40. sclab/tools/differential_expression/__init__.py +5 -0
  41. sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
  42. sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
  43. sclab/tools/doublet_detection/__init__.py +5 -0
  44. sclab/tools/doublet_detection/_scrublet.py +64 -0
  45. sclab/tools/labeling/__init__.py +6 -0
  46. sclab/tools/labeling/sctype.py +233 -0
  47. sclab/utils/__init__.py +5 -0
  48. sclab/utils/_write_excel.py +510 -0
  49. {sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/METADATA +6 -2
  50. sclab-0.3.1.dist-info/RECORD +82 -0
  51. sclab-0.2.5.dist-info/RECORD +0 -45
  52. {sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/WHEEL +0 -0
  53. {sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/licenses/LICENSE +0 -0
sclab/__init__.py CHANGED
@@ -6,4 +6,4 @@ __all__ = [
6
6
  "SCLabDashboard",
7
7
  ]
8
8
 
9
- __version__ = "0.2.5"
9
+ __version__ = "0.3.1"
sclab/_sclab.py CHANGED
@@ -171,7 +171,11 @@ class DataLoader(VBox):
171
171
  self.upload = FileUpload(layout=Layout(width="200px"))
172
172
  self.upload_info = Output(layout=Layout(width="95%"))
173
173
  self.upload_row = HBox(
174
- [self.upload_row_label, self.upload, self.upload_info],
174
+ [
175
+ self.upload_row_label,
176
+ self.upload,
177
+ self.upload_info,
178
+ ],
175
179
  layout=Layout(width="100%"),
176
180
  )
177
181
  self.upload.observe(self.on_upload, "value")
@@ -214,8 +218,8 @@ class DataLoader(VBox):
214
218
  VBox.__init__(
215
219
  self,
216
220
  [
217
- self.url_row,
218
- self.upload_row,
221
+ # self.url_row,
222
+ # self.upload_row,
219
223
  self.defined_adatas_row,
220
224
  self.progress_output,
221
225
  ],
sclab/dataset/_dataset.py CHANGED
@@ -365,7 +365,7 @@ class SCLabDataset(EventClient):
365
365
  if not index.isin(self.metadata.index).all():
366
366
  raise InvalidRowSubset("index contains invalid values")
367
367
 
368
- self.adata = self.adata[index].copy()
368
+ self.adata._inplace_subset_obs(index)
369
369
 
370
370
  self.broker.publish("dset_total_rows_change", self.metadata)
371
371
 
@@ -1025,22 +1025,37 @@ class Processor(EventClient):
1025
1025
  else:
1026
1026
  control.value = current_value
1027
1027
 
1028
- def dset_anndata_layers_change_callback(self, layers):
1029
- options = {layer: layer for layer in layers}
1028
+ def dset_anndata_layers_change_callback(self, *args, **kwargs):
1029
+ layer_options = {key: key for key in self.dataset.adata.layers.keys()}
1030
+ obsm_options = {key: key for key in self.dataset.adata.obsm.keys()}
1031
+
1030
1032
  for control in self.all_controls_list:
1031
1033
  if not isinstance(control, Dropdown):
1032
1034
  continue
1033
1035
  description: str = control.description
1036
+
1034
1037
  if description.lower().strip(" :.") == "layer":
1035
1038
  current_value = control.value
1036
- control.options = options
1039
+ control.options = layer_options
1040
+ if current_value not in control.options:
1041
+ control.value = None
1042
+ else:
1043
+ control.value = current_value
1044
+
1045
+ if description.lower().strip(" :.") == "use rep":
1046
+ current_value = control.value
1047
+ control.options = {**layer_options, **obsm_options}
1037
1048
  if current_value not in control.options:
1038
1049
  control.value = None
1039
1050
  else:
1040
1051
  control.value = current_value
1041
1052
 
1042
1053
  def dset_data_dict_change_callback(self, *args, **kwargs):
1043
- options = {v: v for v in self.dataset.adata.obsm.keys()}
1054
+ options = [
1055
+ *self.dataset.adata.layers.keys(),
1056
+ *self.dataset.adata.obsm.keys(),
1057
+ ]
1058
+ options = {v: v for v in options}
1044
1059
  for control in self.all_controls_list:
1045
1060
  if not isinstance(control, Dropdown):
1046
1061
  continue
@@ -1,5 +1,6 @@
1
1
  from ._cluster import Cluster
2
2
  from ._differential_expression import DifferentialExpression
3
+ from ._doublet_detection import DoubletDetection
3
4
  from ._gene_expression import GeneExpression
4
5
  from ._integration import Integration
5
6
  from ._neighbors import Neighbors
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "Neighbors",
17
18
  "UMAP",
18
19
  "Cluster",
20
+ "DoubletDetection",
19
21
  "GeneExpression",
20
22
  "DifferentialExpression",
21
23
  ]
@@ -0,0 +1,68 @@
1
+ from ipywidgets import Dropdown
2
+
3
+ from sclab.dataset.processor import Processor
4
+ from sclab.dataset.processor.step import ProcessorStepBase
5
+ from sclab.tools.doublet_detection import scrublet
6
+
7
+ # from sclab.tools.doublet_detection import doubletdetection
8
+ # from sclab.tools.doublet_detection import scdblfinder
9
+
10
+
11
+ class DoubletDetection(ProcessorStepBase):
12
+ parent: Processor
13
+ name: str = "doublet_detection"
14
+ description: str = "Doublet Detection"
15
+
16
+ def __init__(self, parent: Processor) -> None:
17
+ variable_controls = dict(
18
+ layer=Dropdown(
19
+ options=tuple(parent.dataset.adata.layers.keys()),
20
+ value=None,
21
+ description="Layer",
22
+ ),
23
+ flavor=Dropdown(
24
+ options=[
25
+ "scrublet",
26
+ # "doubletdetection",
27
+ # "scDblFinder",
28
+ ],
29
+ description="Flavor",
30
+ ),
31
+ )
32
+
33
+ super().__init__(
34
+ parent=parent,
35
+ fixed_params={},
36
+ variable_controls=variable_controls,
37
+ )
38
+
39
+ def function(self, layer: str, flavor: str):
40
+ adata = self.parent.dataset.adata
41
+
42
+ kvargs = {"adata": adata, "layer": layer, "key_added": flavor}
43
+
44
+ self.broker.std_output.clear_output(wait=False)
45
+ with self.broker.std_output:
46
+ match flavor:
47
+ # case "scDblFinder":
48
+ # scdblfinder(**kvargs, clusters_col="leiden")
49
+
50
+ # case "doubletdetection":
51
+ # doubletdetection(
52
+ # **kvargs,
53
+ # pseudocount=1,
54
+ # clustering_algorithm="leiden",
55
+ # clustering_kwargs=dict(resolution=5.0),
56
+ # )
57
+
58
+ case "scrublet":
59
+ scrublet(**kvargs)
60
+
61
+ case _:
62
+ raise ValueError(f"Unknown flavor: {flavor}")
63
+
64
+ self.broker.publish(
65
+ "dset_metadata_change",
66
+ self.parent.dataset.metadata,
67
+ f"{flavor}_label",
68
+ )
@@ -1,4 +1,4 @@
1
- from ipywidgets import Dropdown
1
+ from ipywidgets import Dropdown, IntText
2
2
 
3
3
  from sclab.dataset.processor import Processor
4
4
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -10,20 +10,6 @@ class Integration(ProcessorStepBase):
10
10
  description: str = "Integration"
11
11
 
12
12
  def __init__(self, parent: Processor) -> None:
13
- try:
14
- from scanpy.external.pp import harmony_integrate # noqa
15
- except ImportError:
16
- try:
17
- from scanpy.external.pp import scanorama_integrate # noqa
18
- except ImportError:
19
- raise ImportError(
20
- "Integration requires scanorama or harmony to be installed.\n"
21
- "\nInstall with one of:\n"
22
- "\npip install harmony"
23
- "\npip install scanorama"
24
- "\n"
25
- )
26
-
27
13
  cat_metadata = parent.dataset._metadata.select_dtypes(
28
14
  include=["object", "category"]
29
15
  )
@@ -40,20 +26,49 @@ class Integration(ProcessorStepBase):
40
26
  value="batch" if "batch" in cat_options else None,
41
27
  description="GroupBy",
42
28
  ),
29
+ reference_batch=Dropdown(
30
+ description="Reference Batch",
31
+ ),
43
32
  flavor=Dropdown(
44
- options=["harmony", "scanorama"],
45
- value="harmony",
33
+ options=["cca", "harmony", "scanorama"],
34
+ value="cca",
46
35
  description="Flavor",
47
36
  ),
37
+ max_iters=IntText(
38
+ value=20,
39
+ description="Max iters",
40
+ ),
48
41
  )
49
42
 
43
+ def update_reference_batch(*args, **kwargs):
44
+ group_by = variable_controls["group_by"].value
45
+ options = {
46
+ "": None,
47
+ **{
48
+ c: c
49
+ for c in self.parent.dataset.adata.obs[group_by]
50
+ .sort_values()
51
+ .unique()
52
+ },
53
+ }
54
+ variable_controls["reference_batch"].options = options
55
+
56
+ variable_controls["group_by"].observe(update_reference_batch, names="value")
57
+
50
58
  super().__init__(
51
59
  parent=parent,
52
60
  fixed_params={},
53
61
  variable_controls=variable_controls,
54
62
  )
55
63
 
56
- def function(self, use_rep: str, group_by: str, flavor: str):
64
+ def function(
65
+ self,
66
+ use_rep: str,
67
+ group_by: str,
68
+ flavor: str,
69
+ reference_batch: str | None,
70
+ max_iters: int,
71
+ ):
57
72
  adata = self.parent.dataset.adata
58
73
 
59
74
  key_added = f"{use_rep}_{flavor}"
@@ -67,10 +82,22 @@ class Integration(ProcessorStepBase):
67
82
  self.broker.std_output.clear_output(wait=False)
68
83
  with self.broker.std_output:
69
84
  match flavor:
85
+ case "cca":
86
+ from sclab.preprocess import cca_integrate
87
+
88
+ cca_integrate(
89
+ **kvargs,
90
+ reference_batch=reference_batch,
91
+ )
92
+
70
93
  case "harmony":
71
- from scanpy.external.pp import harmony_integrate
94
+ from sclab.preprocess import harmony_integrate
72
95
 
73
- harmony_integrate(**kvargs)
96
+ harmony_integrate(
97
+ **kvargs,
98
+ reference_batch=reference_batch,
99
+ max_iter_harmony=max_iters,
100
+ )
74
101
 
75
102
  case "scanorama":
76
103
  from scanpy.external.pp import scanorama_integrate
@@ -1,4 +1,4 @@
1
- from ipywidgets import Dropdown, IntText
1
+ from ipywidgets import Dropdown, IntRangeSlider, IntText
2
2
 
3
3
  from sclab.dataset.processor import Processor
4
4
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -22,7 +22,12 @@ class Neighbors(ProcessorStepBase):
22
22
  description="Use rep.",
23
23
  ),
24
24
  n_neighbors=IntText(value=20, description="N neighbors"),
25
- n_dims=IntText(value=10, description="N Dims"),
25
+ dims=IntRangeSlider(
26
+ min=1,
27
+ max=30,
28
+ value=(1, 10),
29
+ description="Use dims",
30
+ ),
26
31
  metric=Dropdown(
27
32
  options=["euclidean", "cosine"],
28
33
  value="euclidean",
@@ -31,6 +36,14 @@ class Neighbors(ProcessorStepBase):
31
36
  **parent.make_groupbybatch_checkbox(),
32
37
  )
33
38
 
39
+ def update_dims_range(*args, **kwargs):
40
+ adata = self.parent.dataset.adata
41
+ use_rep = variable_controls["use_rep"].value
42
+ max_dim = adata.obsm[use_rep].shape[1]
43
+ variable_controls["dims"].max = max_dim
44
+
45
+ variable_controls["use_rep"].observe(update_dims_range, names="value")
46
+
34
47
  super().__init__(
35
48
  parent=parent,
36
49
  fixed_params={},
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
41
54
  self,
42
55
  n_neighbors: int = 20,
43
56
  use_rep: str = "X_pca",
44
- n_dims: int = 10,
57
+ dims: tuple[int, int] = (1, 10),
45
58
  metric: str = "euclidean",
46
59
  group_by_batch: bool = False,
47
60
  ):
48
61
  import scanpy as sc
49
62
 
50
63
  adata = self.parent.dataset.adata
64
+ min_dim, max_dim = dims
65
+ min_dim = min_dim - 1
66
+
67
+ if min_dim > 0:
68
+ adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
69
+ use_rep = use_rep + "_trimmed"
70
+ n_dims = max_dim - min_dim
51
71
 
52
72
  if group_by_batch and self.parent.batch_key:
53
73
  group_by = self.parent.batch_key
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
58
78
  n_pcs=n_dims,
59
79
  use_annoy=False,
60
80
  metric=metric,
61
- pynndescent_n_neighbors=n_neighbors,
81
+ neighbors_within_batch=n_neighbors,
62
82
  )
63
83
  else:
64
84
  sc.pp.neighbors(
@@ -1,7 +1,6 @@
1
- import numpy as np
2
1
  import pandas as pd
3
2
  import plotly.express as px
4
- from ipywidgets import Button, Dropdown, IntText
3
+ from ipywidgets import Button, Checkbox, Dropdown, IntText
5
4
 
6
5
  from sclab.dataset.processor import Processor
7
6
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -27,6 +26,7 @@ class PCA(ProcessorStepBase):
27
26
  n_comps=IntText(value=30, description="N comps."),
28
27
  mask_var=Dropdown(options=mask_var_options, description="Genes mask"),
29
28
  **parent.make_selectbatch_drowpdown(description="Reference Batch"),
29
+ zero_center=Checkbox(value=False, description="Zero center"),
30
30
  )
31
31
 
32
32
  super().__init__(
@@ -57,6 +57,7 @@ class PCA(ProcessorStepBase):
57
57
  n_comps: int = 30,
58
58
  mask_var: str | None = None,
59
59
  reference_batch: str | None = None,
60
+ zero_center: bool = False,
60
61
  ):
61
62
  import scanpy as sc
62
63
 
@@ -64,7 +65,9 @@ class PCA(ProcessorStepBase):
64
65
  counts_layer = self.parent.dataset.counts_layer
65
66
 
66
67
  if reference_batch:
67
- obs_mask = adata.obs[self.parent.batch_key] == reference_batch
68
+ batch_key = self.parent.batch_key
69
+
70
+ obs_mask = adata.obs[batch_key] == reference_batch
68
71
  adata_ref = adata[obs_mask].copy()
69
72
  if mask_var == "highly_variable":
70
73
  sc.pp.highly_variable_genes(
@@ -85,13 +88,15 @@ class PCA(ProcessorStepBase):
85
88
  uns_pca = adata_ref.uns["pca"]
86
89
  uns_pca["reference_batch"] = reference_batch
87
90
  PCs = adata_ref.varm["PCs"]
88
- X_pca: np.ndarray = adata.X.dot(PCs)
89
- X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
90
- adata.obsm["X_pca"] = X_pca
91
+ adata.obsm["X_pca"] = adata.X.dot(PCs)
91
92
  adata.uns["pca"] = uns_pca
92
93
  adata.varm["PCs"] = PCs
93
94
  else:
94
95
  sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
96
+ adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
97
+
98
+ if zero_center:
99
+ adata.obsm["X_pca"] -= adata.obsm["X_pca"].mean(axis=0, keepdims=True)
95
100
 
96
101
  self.plot_variance_ratio_button.disabled = False
97
102
  self.broker.publish(
@@ -1,4 +1,7 @@
1
+ import warnings
2
+
1
3
  import numpy as np
4
+ from anndata import ImplicitModificationWarning
2
5
  from ipywidgets import Checkbox, Dropdown
3
6
  from tqdm.auto import tqdm
4
7
 
@@ -156,7 +159,17 @@ class Preprocess(ProcessorStepBase):
156
159
 
157
160
  if scale:
158
161
  new_layer += "_scale"
159
- sc.pp.scale(adata, zero_center=False)
162
+ if group_by is not None:
163
+ for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
164
+ with warnings.catch_warnings():
165
+ warnings.filterwarnings(
166
+ "ignore",
167
+ category=ImplicitModificationWarning,
168
+ message="Modifying `X` on a view results in data being overridden",
169
+ )
170
+ adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
171
+ else:
172
+ sc.pp.scale(adata, zero_center=False)
160
173
 
161
174
  adata.layers[new_layer] = adata.X.copy()
162
175
 
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  from ipywidgets import Dropdown, IntText
2
3
 
3
4
  from sclab.dataset.processor import Processor
@@ -21,6 +22,7 @@ class QC(ProcessorStepBase):
21
22
  value="counts",
22
23
  description="Layer",
23
24
  ),
25
+ min_counts=IntText(value=50, description="Min. Counts"),
24
26
  min_genes=IntText(value=5, description="Min. Genes"),
25
27
  min_cells=IntText(value=0, description="Min. Cells"),
26
28
  max_rank=IntText(value=0, description="Max. Rank"),
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
45
47
  def compute_qc_metrics(
46
48
  self,
47
49
  layer: str | None = None,
50
+ min_counts: int = 50,
48
51
  min_genes: int = 5,
49
52
  min_cells: int = 5,
53
+ max_rank: int = 0,
50
54
  ):
51
55
  import scanpy as sc
52
56
 
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
58
62
 
59
63
  adata.layers["qc_tmp_current_X"] = adata.X
60
64
  adata.X = adata.layers[layer].copy()
65
+ rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
66
+
67
+ obs_idx = adata.obs_names[rowsums >= min_counts]
68
+ adata._inplace_subset_obs(obs_idx)
69
+
61
70
  sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
62
71
 
63
72
  sc.pp.filter_cells(adata, min_genes=min_genes)
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
68
77
  # Restore original X
69
78
  adata.X = adata.layers.pop("qc_tmp_current_X")
70
79
 
80
+ if max_rank > 0:
81
+ series = self.parent.dataset.adata.obs["barcode_rank"]
82
+ index = series.loc[series < max_rank].index
83
+ self.parent.dataset.filter_rows(index)
84
+
71
85
  def function(
72
86
  self,
73
87
  layer: str | None = None,
88
+ min_counts: int = 50,
74
89
  min_genes: int = 5,
75
90
  min_cells: int = 5,
76
91
  max_rank: int = 0,
77
92
  ):
78
- self.compute_qc_metrics(layer, min_genes, min_cells)
79
-
80
- if max_rank > 0:
81
- series = self.parent.dataset.adata.obs["barcode_rank"]
82
- index = series.loc[series < max_rank].index
83
- self.parent.dataset.filter_rows(index)
93
+ self.compute_qc_metrics(
94
+ layer,
95
+ min_counts,
96
+ min_genes,
97
+ min_cells,
98
+ max_rank,
99
+ )
84
100
 
85
101
  self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
86
102
  self.broker.publish(
sclab/gui/__init__.py ADDED
File without changes
@@ -0,0 +1,7 @@
1
+ from ._guided_pseudotime import GuidedPseudotime
2
+ from ._transfer_metadata import TransferMetadata
3
+
4
+ __all__ = [
5
+ "GuidedPseudotime",
6
+ "TransferMetadata",
7
+ ]