sclab 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sclab might be problematic. Click here for more details.

Files changed (51) hide show
  1. sclab/__init__.py +1 -1
  2. sclab/_sclab.py +10 -3
  3. sclab/dataset/_dataset.py +1 -1
  4. sclab/examples/processor_steps/__init__.py +2 -0
  5. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  6. sclab/examples/processor_steps/_integration.py +37 -4
  7. sclab/examples/processor_steps/_neighbors.py +24 -4
  8. sclab/examples/processor_steps/_pca.py +5 -5
  9. sclab/examples/processor_steps/_preprocess.py +14 -1
  10. sclab/examples/processor_steps/_qc.py +22 -6
  11. sclab/gui/__init__.py +0 -0
  12. sclab/gui/components/__init__.py +5 -0
  13. sclab/gui/components/_guided_pseudotime.py +482 -0
  14. sclab/methods/__init__.py +25 -1
  15. sclab/preprocess/__init__.py +18 -0
  16. sclab/preprocess/_cca.py +154 -0
  17. sclab/preprocess/_cca_integrate.py +77 -0
  18. sclab/preprocess/_filter_obs.py +42 -0
  19. sclab/preprocess/_harmony.py +421 -0
  20. sclab/preprocess/_harmony_integrate.py +50 -0
  21. sclab/preprocess/_normalize_weighted.py +61 -0
  22. sclab/preprocess/_subset.py +208 -0
  23. sclab/preprocess/_transfer_metadata.py +137 -0
  24. sclab/preprocess/_transform.py +82 -0
  25. sclab/preprocess/_utils.py +96 -0
  26. sclab/tools/__init__.py +0 -0
  27. sclab/tools/cellflow/__init__.py +0 -0
  28. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  29. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  30. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  31. sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
  32. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  33. sclab/tools/cellflow/utils/__init__.py +0 -0
  34. sclab/tools/cellflow/utils/density_nd.py +136 -0
  35. sclab/tools/cellflow/utils/interpolate.py +334 -0
  36. sclab/tools/cellflow/utils/smoothen.py +124 -0
  37. sclab/tools/cellflow/utils/times.py +55 -0
  38. sclab/tools/differential_expression/__init__.py +5 -0
  39. sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
  40. sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
  41. sclab/tools/doublet_detection/__init__.py +5 -0
  42. sclab/tools/doublet_detection/_scrublet.py +64 -0
  43. sclab/tools/labeling/__init__.py +6 -0
  44. sclab/tools/labeling/sctype.py +233 -0
  45. sclab/utils/__init__.py +5 -0
  46. sclab/utils/_write_excel.py +510 -0
  47. {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/METADATA +7 -2
  48. sclab-0.3.0.dist-info/RECORD +81 -0
  49. sclab-0.2.4.dist-info/RECORD +0 -45
  50. {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/WHEEL +0 -0
  51. {sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/licenses/LICENSE +0 -0
sclab/__init__.py CHANGED
@@ -6,4 +6,4 @@ __all__ = [
6
6
  "SCLabDashboard",
7
7
  ]
8
8
 
9
- __version__ = "0.2.4"
9
+ __version__ = "0.3.0"
sclab/_sclab.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import inspect
2
2
  from io import BytesIO
3
3
  from pathlib import Path
4
+ import tempfile
4
5
 
5
6
  from anndata import AnnData
6
7
  from IPython.display import display
@@ -256,14 +257,20 @@ class DataLoader(VBox):
256
257
 
257
258
  match path.suffix:
258
259
  case ".h5":
259
- adata = read_10x_h5(contents)
260
+ with tempfile.NamedTemporaryFile(suffix=".h5") as tmp:
261
+ tmp.write(contents.getbuffer())
262
+ tmp.flush()
263
+ adata = read_10x_h5(tmp.name)
260
264
  case ".h5ad":
261
- adata = read_h5ad(contents)
265
+ with tempfile.NamedTemporaryFile(suffix=".h5ad") as tmp:
266
+ tmp.write(contents.getbuffer())
267
+ tmp.flush()
268
+ adata = read_h5ad(tmp.name)
262
269
  case _:
263
270
  self.upload_info.clear_output()
264
271
  with self.upload_info:
265
272
  print(f"`{filename}` is not valid")
266
- print("Please upload a 10x h5 or h5ad file")
273
+ print("Please upload a 10x .h5 or .h5ad file")
267
274
  return
268
275
 
269
276
  if var_names in adata.var:
sclab/dataset/_dataset.py CHANGED
@@ -365,7 +365,7 @@ class SCLabDataset(EventClient):
365
365
  if not index.isin(self.metadata.index).all():
366
366
  raise InvalidRowSubset("index contains invalid values")
367
367
 
368
- self.adata = self.adata[index].copy()
368
+ self.adata._inplace_subset_obs(index)
369
369
 
370
370
  self.broker.publish("dset_total_rows_change", self.metadata)
371
371
 
@@ -1,5 +1,6 @@
1
1
  from ._cluster import Cluster
2
2
  from ._differential_expression import DifferentialExpression
3
+ from ._doublet_detection import DoubletDetection
3
4
  from ._gene_expression import GeneExpression
4
5
  from ._integration import Integration
5
6
  from ._neighbors import Neighbors
@@ -16,6 +17,7 @@ __all__ = [
16
17
  "Neighbors",
17
18
  "UMAP",
18
19
  "Cluster",
20
+ "DoubletDetection",
19
21
  "GeneExpression",
20
22
  "DifferentialExpression",
21
23
  ]
@@ -0,0 +1,68 @@
1
+ from ipywidgets import Dropdown
2
+
3
+ from sclab.dataset.processor import Processor
4
+ from sclab.dataset.processor.step import ProcessorStepBase
5
+ from sclab.tools.doublet_detection import scrublet
6
+
7
+ # from sclab.tools.doublet_detection import doubletdetection
8
+ # from sclab.tools.doublet_detection import scdblfinder
9
+
10
+
11
+ class DoubletDetection(ProcessorStepBase):
12
+ parent: Processor
13
+ name: str = "doublet_detection"
14
+ description: str = "Doublet Detection"
15
+
16
+ def __init__(self, parent: Processor) -> None:
17
+ variable_controls = dict(
18
+ layer=Dropdown(
19
+ options=tuple(parent.dataset.adata.layers.keys()),
20
+ value=None,
21
+ description="Layer",
22
+ ),
23
+ flavor=Dropdown(
24
+ options=[
25
+ "scrublet",
26
+ # "doubletdetection",
27
+ # "scDblFinder",
28
+ ],
29
+ description="Flavor",
30
+ ),
31
+ )
32
+
33
+ super().__init__(
34
+ parent=parent,
35
+ fixed_params={},
36
+ variable_controls=variable_controls,
37
+ )
38
+
39
+ def function(self, layer: str, flavor: str):
40
+ adata = self.parent.dataset.adata
41
+
42
+ kvargs = {"adata": adata, "layer": layer, "key_added": flavor}
43
+
44
+ self.broker.std_output.clear_output(wait=False)
45
+ with self.broker.std_output:
46
+ match flavor:
47
+ # case "scDblFinder":
48
+ # scdblfinder(**kvargs, clusters_col="leiden")
49
+
50
+ # case "doubletdetection":
51
+ # doubletdetection(
52
+ # **kvargs,
53
+ # pseudocount=1,
54
+ # clustering_algorithm="leiden",
55
+ # clustering_kwargs=dict(resolution=5.0),
56
+ # )
57
+
58
+ case "scrublet":
59
+ scrublet(**kvargs)
60
+
61
+ case _:
62
+ raise ValueError(f"Unknown flavor: {flavor}")
63
+
64
+ self.broker.publish(
65
+ "dset_metadata_change",
66
+ self.parent.dataset.metadata,
67
+ f"{flavor}_label",
68
+ )
@@ -1,4 +1,4 @@
1
- from ipywidgets import Dropdown
1
+ from ipywidgets import Dropdown, IntText
2
2
 
3
3
  from sclab.dataset.processor import Processor
4
4
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -40,20 +40,49 @@ class Integration(ProcessorStepBase):
40
40
  value="batch" if "batch" in cat_options else None,
41
41
  description="GroupBy",
42
42
  ),
43
+ reference_batch=Dropdown(
44
+ description="Reference Batch",
45
+ ),
43
46
  flavor=Dropdown(
44
47
  options=["harmony", "scanorama"],
45
48
  value="harmony",
46
49
  description="Flavor",
47
50
  ),
51
+ max_iters=IntText(
52
+ value=20,
53
+ description="Max iters",
54
+ ),
48
55
  )
49
56
 
57
+ def update_reference_batch(*args, **kwargs):
58
+ group_by = variable_controls["group_by"].value
59
+ options = {
60
+ "": None,
61
+ **{
62
+ c: c
63
+ for c in self.parent.dataset.adata.obs[group_by]
64
+ .sort_values()
65
+ .unique()
66
+ },
67
+ }
68
+ variable_controls["reference_batch"].options = options
69
+
70
+ variable_controls["group_by"].observe(update_reference_batch, names="value")
71
+
50
72
  super().__init__(
51
73
  parent=parent,
52
74
  fixed_params={},
53
75
  variable_controls=variable_controls,
54
76
  )
55
77
 
56
- def function(self, use_rep: str, group_by: str, flavor: str):
78
+ def function(
79
+ self,
80
+ use_rep: str,
81
+ group_by: str,
82
+ flavor: str,
83
+ reference_batch: str | None,
84
+ max_iters: int,
85
+ ):
57
86
  adata = self.parent.dataset.adata
58
87
 
59
88
  key_added = f"{use_rep}_{flavor}"
@@ -68,9 +97,13 @@ class Integration(ProcessorStepBase):
68
97
  with self.broker.std_output:
69
98
  match flavor:
70
99
  case "harmony":
71
- from scanpy.external.pp import harmony_integrate
100
+ from sclab.preprocess import harmony_integrate
72
101
 
73
- harmony_integrate(**kvargs)
102
+ harmony_integrate(
103
+ **kvargs,
104
+ reference_batch=reference_batch,
105
+ max_iter_harmony=max_iters,
106
+ )
74
107
 
75
108
  case "scanorama":
76
109
  from scanpy.external.pp import scanorama_integrate
@@ -1,4 +1,4 @@
1
- from ipywidgets import Dropdown, IntText
1
+ from ipywidgets import Dropdown, IntRangeSlider, IntText
2
2
 
3
3
  from sclab.dataset.processor import Processor
4
4
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -22,7 +22,12 @@ class Neighbors(ProcessorStepBase):
22
22
  description="Use rep.",
23
23
  ),
24
24
  n_neighbors=IntText(value=20, description="N neighbors"),
25
- n_dims=IntText(value=10, description="N Dims"),
25
+ dims=IntRangeSlider(
26
+ min=1,
27
+ max=30,
28
+ value=(1, 10),
29
+ description="Use dims",
30
+ ),
26
31
  metric=Dropdown(
27
32
  options=["euclidean", "cosine"],
28
33
  value="euclidean",
@@ -31,6 +36,14 @@ class Neighbors(ProcessorStepBase):
31
36
  **parent.make_groupbybatch_checkbox(),
32
37
  )
33
38
 
39
+ def update_dims_range(*args, **kwargs):
40
+ adata = self.parent.dataset.adata
41
+ use_rep = variable_controls["use_rep"].value
42
+ max_dim = adata.obsm[use_rep].shape[1]
43
+ variable_controls["dims"].max = max_dim
44
+
45
+ variable_controls["use_rep"].observe(update_dims_range, names="value")
46
+
34
47
  super().__init__(
35
48
  parent=parent,
36
49
  fixed_params={},
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
41
54
  self,
42
55
  n_neighbors: int = 20,
43
56
  use_rep: str = "X_pca",
44
- n_dims: int = 10,
57
+ dims: tuple[int, int] = (1, 10),
45
58
  metric: str = "euclidean",
46
59
  group_by_batch: bool = False,
47
60
  ):
48
61
  import scanpy as sc
49
62
 
50
63
  adata = self.parent.dataset.adata
64
+ min_dim, max_dim = dims
65
+ min_dim = min_dim - 1
66
+
67
+ if min_dim > 0:
68
+ adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
69
+ use_rep = use_rep + "_trimmed"
70
+ n_dims = max_dim - min_dim
51
71
 
52
72
  if group_by_batch and self.parent.batch_key:
53
73
  group_by = self.parent.batch_key
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
58
78
  n_pcs=n_dims,
59
79
  use_annoy=False,
60
80
  metric=metric,
61
- pynndescent_n_neighbors=n_neighbors,
81
+ neighbors_within_batch=n_neighbors,
62
82
  )
63
83
  else:
64
84
  sc.pp.neighbors(
@@ -1,4 +1,3 @@
1
- import numpy as np
2
1
  import pandas as pd
3
2
  import plotly.express as px
4
3
  from ipywidgets import Button, Dropdown, IntText
@@ -64,7 +63,9 @@ class PCA(ProcessorStepBase):
64
63
  counts_layer = self.parent.dataset.counts_layer
65
64
 
66
65
  if reference_batch:
67
- obs_mask = adata.obs[self.parent.batch_key] == reference_batch
66
+ batch_key = self.parent.batch_key
67
+
68
+ obs_mask = adata.obs[batch_key] == reference_batch
68
69
  adata_ref = adata[obs_mask].copy()
69
70
  if mask_var == "highly_variable":
70
71
  sc.pp.highly_variable_genes(
@@ -85,13 +86,12 @@ class PCA(ProcessorStepBase):
85
86
  uns_pca = adata_ref.uns["pca"]
86
87
  uns_pca["reference_batch"] = reference_batch
87
88
  PCs = adata_ref.varm["PCs"]
88
- X_pca: np.ndarray = adata.X.dot(PCs)
89
- X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
90
- adata.obsm["X_pca"] = X_pca
89
+ adata.obsm["X_pca"] = adata.X.dot(PCs)
91
90
  adata.uns["pca"] = uns_pca
92
91
  adata.varm["PCs"] = PCs
93
92
  else:
94
93
  sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
94
+ adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
95
95
 
96
96
  self.plot_variance_ratio_button.disabled = False
97
97
  self.broker.publish(
@@ -1,4 +1,7 @@
1
+ import warnings
2
+
1
3
  import numpy as np
4
+ from anndata import ImplicitModificationWarning
2
5
  from ipywidgets import Checkbox, Dropdown
3
6
  from tqdm.auto import tqdm
4
7
 
@@ -156,7 +159,17 @@ class Preprocess(ProcessorStepBase):
156
159
 
157
160
  if scale:
158
161
  new_layer += "_scale"
159
- sc.pp.scale(adata, zero_center=False)
162
+ if group_by is not None:
163
+ for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
164
+ with warnings.catch_warnings():
165
+ warnings.filterwarnings(
166
+ "ignore",
167
+ category=ImplicitModificationWarning,
168
+ message="Modifying `X` on a view results in data being overridden",
169
+ )
170
+ adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
171
+ else:
172
+ sc.pp.scale(adata, zero_center=False)
160
173
 
161
174
  adata.layers[new_layer] = adata.X.copy()
162
175
 
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  from ipywidgets import Dropdown, IntText
2
3
 
3
4
  from sclab.dataset.processor import Processor
@@ -21,6 +22,7 @@ class QC(ProcessorStepBase):
21
22
  value="counts",
22
23
  description="Layer",
23
24
  ),
25
+ min_counts=IntText(value=50, description="Min. Counts"),
24
26
  min_genes=IntText(value=5, description="Min. Genes"),
25
27
  min_cells=IntText(value=0, description="Min. Cells"),
26
28
  max_rank=IntText(value=0, description="Max. Rank"),
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
45
47
  def compute_qc_metrics(
46
48
  self,
47
49
  layer: str | None = None,
50
+ min_counts: int = 50,
48
51
  min_genes: int = 5,
49
52
  min_cells: int = 5,
53
+ max_rank: int = 0,
50
54
  ):
51
55
  import scanpy as sc
52
56
 
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
58
62
 
59
63
  adata.layers["qc_tmp_current_X"] = adata.X
60
64
  adata.X = adata.layers[layer].copy()
65
+ rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
66
+
67
+ obs_idx = adata.obs_names[rowsums >= min_counts]
68
+ adata._inplace_subset_obs(obs_idx)
69
+
61
70
  sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
62
71
 
63
72
  sc.pp.filter_cells(adata, min_genes=min_genes)
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
68
77
  # Restore original X
69
78
  adata.X = adata.layers.pop("qc_tmp_current_X")
70
79
 
80
+ if max_rank > 0:
81
+ series = self.parent.dataset.adata.obs["barcode_rank"]
82
+ index = series.loc[series < max_rank].index
83
+ self.parent.dataset.filter_rows(index)
84
+
71
85
  def function(
72
86
  self,
73
87
  layer: str | None = None,
88
+ min_counts: int = 50,
74
89
  min_genes: int = 5,
75
90
  min_cells: int = 5,
76
91
  max_rank: int = 0,
77
92
  ):
78
- self.compute_qc_metrics(layer, min_genes, min_cells)
79
-
80
- if max_rank > 0:
81
- series = self.parent.dataset.adata.obs["barcode_rank"]
82
- index = series.loc[series < max_rank].index
83
- self.parent.dataset.filter_rows(index)
93
+ self.compute_qc_metrics(
94
+ layer,
95
+ min_counts,
96
+ min_genes,
97
+ min_cells,
98
+ max_rank,
99
+ )
84
100
 
85
101
  self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
86
102
  self.broker.publish(
sclab/gui/__init__.py ADDED
File without changes
@@ -0,0 +1,5 @@
1
+ from ._guided_pseudotime import GuidedPseudotime
2
+
3
+ __all__ = [
4
+ "GuidedPseudotime",
5
+ ]