sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. sclab/__init__.py +3 -1
  2. sclab/_io.py +83 -12
  3. sclab/_methods_registry.py +65 -0
  4. sclab/_sclab.py +241 -21
  5. sclab/dataset/_dataset.py +4 -6
  6. sclab/dataset/processor/_processor.py +41 -19
  7. sclab/dataset/processor/_results_panel.py +94 -0
  8. sclab/dataset/processor/step/_processor_step_base.py +12 -6
  9. sclab/examples/processor_steps/__init__.py +8 -0
  10. sclab/examples/processor_steps/_cluster.py +2 -2
  11. sclab/examples/processor_steps/_differential_expression.py +329 -0
  12. sclab/examples/processor_steps/_doublet_detection.py +68 -0
  13. sclab/examples/processor_steps/_gene_expression.py +125 -0
  14. sclab/examples/processor_steps/_integration.py +116 -0
  15. sclab/examples/processor_steps/_neighbors.py +26 -6
  16. sclab/examples/processor_steps/_pca.py +13 -8
  17. sclab/examples/processor_steps/_preprocess.py +52 -25
  18. sclab/examples/processor_steps/_qc.py +24 -8
  19. sclab/examples/processor_steps/_umap.py +2 -2
  20. sclab/gui/__init__.py +0 -0
  21. sclab/gui/components/__init__.py +7 -0
  22. sclab/gui/components/_guided_pseudotime.py +482 -0
  23. sclab/gui/components/_transfer_metadata.py +186 -0
  24. sclab/methods/__init__.py +50 -0
  25. sclab/preprocess/__init__.py +26 -0
  26. sclab/preprocess/_cca.py +176 -0
  27. sclab/preprocess/_cca_integrate.py +109 -0
  28. sclab/preprocess/_filter_obs.py +42 -0
  29. sclab/preprocess/_harmony.py +421 -0
  30. sclab/preprocess/_harmony_integrate.py +53 -0
  31. sclab/preprocess/_normalize_weighted.py +65 -0
  32. sclab/preprocess/_pca.py +51 -0
  33. sclab/preprocess/_preprocess.py +155 -0
  34. sclab/preprocess/_qc.py +38 -0
  35. sclab/preprocess/_rpca.py +116 -0
  36. sclab/preprocess/_subset.py +208 -0
  37. sclab/preprocess/_transfer_metadata.py +196 -0
  38. sclab/preprocess/_transform.py +82 -0
  39. sclab/preprocess/_utils.py +96 -0
  40. sclab/scanpy/__init__.py +0 -0
  41. sclab/scanpy/_compat.py +92 -0
  42. sclab/scanpy/_settings.py +526 -0
  43. sclab/scanpy/logging.py +290 -0
  44. sclab/scanpy/plotting/__init__.py +0 -0
  45. sclab/scanpy/plotting/_rcmod.py +73 -0
  46. sclab/scanpy/plotting/palettes.py +221 -0
  47. sclab/scanpy/readwrite.py +1108 -0
  48. sclab/tools/__init__.py +0 -0
  49. sclab/tools/cellflow/__init__.py +0 -0
  50. sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
  51. sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
  52. sclab/tools/cellflow/pseudotime/__init__.py +0 -0
  53. sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
  54. sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
  55. sclab/tools/cellflow/utils/__init__.py +0 -0
  56. sclab/tools/cellflow/utils/density_nd.py +215 -0
  57. sclab/tools/cellflow/utils/interpolate.py +334 -0
  58. sclab/tools/cellflow/utils/periodic_genes.py +106 -0
  59. sclab/tools/cellflow/utils/smoothen.py +124 -0
  60. sclab/tools/cellflow/utils/times.py +55 -0
  61. sclab/tools/differential_expression/__init__.py +7 -0
  62. sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
  63. sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
  64. sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
  65. sclab/tools/doublet_detection/__init__.py +5 -0
  66. sclab/tools/doublet_detection/_scrublet.py +64 -0
  67. sclab/tools/embedding/__init__.py +0 -0
  68. sclab/tools/imputation/__init__.py +0 -0
  69. sclab/tools/imputation/_alra.py +135 -0
  70. sclab/tools/labeling/__init__.py +6 -0
  71. sclab/tools/labeling/sctype.py +233 -0
  72. sclab/tools/utils/__init__.py +5 -0
  73. sclab/tools/utils/_aggregate_and_filter.py +290 -0
  74. sclab/utils/__init__.py +5 -0
  75. sclab/utils/_write_excel.py +510 -0
  76. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
  77. sclab-0.3.4.dist-info/RECORD +93 -0
  78. {sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
  79. sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
  80. sclab-0.1.7.dist-info/RECORD +0 -30
@@ -0,0 +1,125 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import plotly.colors as pc
4
+ import plotly.express as px
5
+ from ipywidgets import Combobox, Dropdown
6
+
7
+ from sclab.dataset.processor import Processor
8
+ from sclab.dataset.processor.step import ProcessorStepBase
9
+
10
+ colorscales = list(
11
+ filter(lambda s: "swatch" not in s and not s.startswith("_"), dir(pc.sequential))
12
+ )
13
+
14
+
15
+ class GeneExpression(ProcessorStepBase):
16
+ parent: Processor
17
+ name: str = "gene_expression"
18
+ description: str = "Gene Expression"
19
+
20
+ run_button_description = "Plot Expression"
21
+
22
+ def __init__(self, parent: Processor) -> None:
23
+ df = parent.dataset.metadata.select_dtypes("number")
24
+ df = parent.dataset.metadata
25
+ axis_key_options = {"": None, **{c: c for c in df.columns}}
26
+
27
+ gene_input_options = parent.dataset.adata.var_names
28
+ genes_df = parent.dataset.adata.var
29
+ info_cols = ["name", "symbol", "description"]
30
+ for col in [
31
+ c for c in genes_df.columns if any([s.lower() in c for s in info_cols])
32
+ ]:
33
+ new_info = genes_df[col].astype(str).str.replace("nan", "")
34
+ gene_input_options = gene_input_options + " - " + new_info
35
+
36
+ variable_controls = dict(
37
+ gene_input=Combobox(
38
+ placeholder="Type gene name",
39
+ options=gene_input_options.to_list(),
40
+ description="Gene",
41
+ ensure_option=True,
42
+ ),
43
+ layer=Dropdown(
44
+ options=tuple(parent.dataset.adata.layers.keys()),
45
+ value=None,
46
+ description="Layer",
47
+ ),
48
+ time_key=Dropdown(
49
+ options=axis_key_options,
50
+ value=None,
51
+ description="Horiz. axis",
52
+ ),
53
+ colorscale=Dropdown(
54
+ options=colorscales, value="Oryel", description="Col. scale"
55
+ ),
56
+ )
57
+
58
+ super().__init__(
59
+ parent=parent,
60
+ fixed_params={},
61
+ variable_controls=variable_controls,
62
+ )
63
+
64
+ self.variable_controls["gene_input"].observe(self.send_plot, names="value")
65
+ self.variable_controls["layer"].observe(self.send_plot, names="value")
66
+ self.variable_controls["time_key"].observe(self.send_plot, names="value")
67
+ self.variable_controls["colorscale"].observe(self.send_plot, names="value")
68
+
69
+ def function(self, *pargs, **kwargs):
70
+ self.send_plot({})
71
+
72
+ def send_plot(self, change: dict):
73
+ adata = self.parent.dataset.adata
74
+ metadata = self.parent.dataset.metadata
75
+ selected_cells = self.parent.dataset.selected_rows
76
+
77
+ gene_input: str = self.variable_controls["gene_input"].value
78
+ layer: str = self.variable_controls["layer"].value
79
+ time_key: str = self.variable_controls["time_key"].value
80
+ colorscale: str = self.variable_controls["colorscale"].value
81
+
82
+ if gene_input is None or gene_input == "":
83
+ self.update_output("")
84
+ return
85
+
86
+ if layer is None or layer == "":
87
+ self.update_output("")
88
+ return
89
+
90
+ gene_id = gene_input.split(" ")[0]
91
+
92
+ if layer == "X":
93
+ X = adata[:, gene_id].X
94
+ else:
95
+ X = adata[:, gene_id].layers[layer]
96
+
97
+ E = np.asarray(X.sum(axis=1)).flatten()
98
+
99
+ self.update_output(f"Showing gene: {gene_id}")
100
+ # self.variable_controls["gene_input"].value = ""
101
+
102
+ df = pd.DataFrame({gene_id: E}, index=adata.obs.index)
103
+ metadata = metadata.join(df)
104
+ if selected_cells.size > 0:
105
+ metadata = metadata.loc[selected_cells]
106
+
107
+ if time_key is None:
108
+ self.broker.publish(
109
+ "dplt_plot_figure_request",
110
+ metadata=metadata,
111
+ colorby=gene_id,
112
+ color_continuous_scale=colorscale,
113
+ )
114
+ return
115
+
116
+ fig = px.scatter(
117
+ metadata,
118
+ x=time_key,
119
+ y=gene_id,
120
+ color=gene_id,
121
+ color_continuous_scale=colorscale,
122
+ hover_name=adata.obs.index,
123
+ title=f"Gene: {gene_id}, Layer: {layer}",
124
+ )
125
+ self.broker.publish("dplt_plot_figure_request", figure=fig)
@@ -0,0 +1,116 @@
1
+ from ipywidgets import Dropdown, IntText
2
+
3
+ from sclab.dataset.processor import Processor
4
+ from sclab.dataset.processor.step import ProcessorStepBase
5
+
6
+
7
+ class Integration(ProcessorStepBase):
8
+ parent: Processor
9
+ name: str = "integration"
10
+ description: str = "Integration"
11
+
12
+ def __init__(self, parent: Processor) -> None:
13
+ cat_metadata = parent.dataset._metadata.select_dtypes(
14
+ include=["object", "category"]
15
+ )
16
+ cat_options = {"": None, **{c: c for c in cat_metadata.columns}}
17
+
18
+ variable_controls = dict(
19
+ use_rep=Dropdown(
20
+ options=tuple(parent.dataset.adata.obsm.keys()),
21
+ value=None,
22
+ description="Use rep.",
23
+ ),
24
+ group_by=Dropdown(
25
+ options=cat_options,
26
+ value="batch" if "batch" in cat_options else None,
27
+ description="GroupBy",
28
+ ),
29
+ reference_batch=Dropdown(
30
+ description="Reference Batch",
31
+ ),
32
+ flavor=Dropdown(
33
+ options=["cca", "harmony", "scanorama"],
34
+ value="cca",
35
+ description="Flavor",
36
+ ),
37
+ max_iters=IntText(
38
+ value=20,
39
+ description="Max iters",
40
+ ),
41
+ )
42
+
43
+ def update_reference_batch(*args, **kwargs):
44
+ group_by = variable_controls["group_by"].value
45
+ options = {
46
+ "": None,
47
+ **{
48
+ c: c
49
+ for c in self.parent.dataset.adata.obs[group_by]
50
+ .sort_values()
51
+ .unique()
52
+ },
53
+ }
54
+ variable_controls["reference_batch"].options = options
55
+
56
+ variable_controls["group_by"].observe(update_reference_batch, names="value")
57
+
58
+ super().__init__(
59
+ parent=parent,
60
+ fixed_params={},
61
+ variable_controls=variable_controls,
62
+ )
63
+
64
+ def function(
65
+ self,
66
+ use_rep: str | None,
67
+ group_by: str,
68
+ flavor: str,
69
+ reference_batch: str | None,
70
+ max_iters: int,
71
+ ):
72
+ adata = self.parent.dataset.adata
73
+
74
+ if use_rep is None:
75
+ use_rep = "X"
76
+
77
+ key_added = f"{use_rep}_{flavor}"
78
+ kvargs = {
79
+ "adata": adata,
80
+ "key": group_by,
81
+ "basis": use_rep,
82
+ "adjusted_basis": key_added,
83
+ }
84
+
85
+ self.broker.std_output.clear_output(wait=False)
86
+ with self.broker.std_output:
87
+ match flavor:
88
+ case "cca":
89
+ from sclab.preprocess import cca_integrate
90
+
91
+ cca_integrate(
92
+ **kvargs,
93
+ reference_batch=reference_batch,
94
+ )
95
+
96
+ case "harmony":
97
+ from sclab.preprocess import harmony_integrate
98
+
99
+ harmony_integrate(
100
+ **kvargs,
101
+ reference_batch=reference_batch,
102
+ max_iter_harmony=max_iters,
103
+ )
104
+
105
+ case "scanorama":
106
+ from scanpy.external.pp import scanorama_integrate
107
+
108
+ scanorama_integrate(**kvargs)
109
+ case _:
110
+ raise ValueError(f"Unknown flavor: {flavor}")
111
+
112
+ self.broker.publish(
113
+ "dset_data_dict_change",
114
+ self.parent.dataset.data_dict,
115
+ key_added,
116
+ )
@@ -1,4 +1,4 @@
1
- from ipywidgets import Dropdown, IntText
1
+ from ipywidgets import Dropdown, IntRangeSlider, IntText
2
2
 
3
3
  from sclab.dataset.processor import Processor
4
4
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -6,6 +6,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
6
6
 
7
7
  class Neighbors(ProcessorStepBase):
8
8
  parent: Processor
9
+ name: str = "neighbors"
10
+ description: str = "Neighbors"
9
11
 
10
12
  def __init__(self, parent: Processor) -> None:
11
13
  try:
@@ -20,7 +22,12 @@ class Neighbors(ProcessorStepBase):
20
22
  description="Use rep.",
21
23
  ),
22
24
  n_neighbors=IntText(value=20, description="N neighbors"),
23
- n_dims=IntText(value=10, description="N Dims"),
25
+ dims=IntRangeSlider(
26
+ min=1,
27
+ max=30,
28
+ value=(1, 10),
29
+ description="Use dims",
30
+ ),
24
31
  metric=Dropdown(
25
32
  options=["euclidean", "cosine"],
26
33
  value="euclidean",
@@ -29,10 +36,16 @@ class Neighbors(ProcessorStepBase):
29
36
  **parent.make_groupbybatch_checkbox(),
30
37
  )
31
38
 
39
+ def update_dims_range(*args, **kwargs):
40
+ adata = self.parent.dataset.adata
41
+ use_rep = variable_controls["use_rep"].value
42
+ max_dim = adata.obsm[use_rep].shape[1]
43
+ variable_controls["dims"].max = max_dim
44
+
45
+ variable_controls["use_rep"].observe(update_dims_range, names="value")
46
+
32
47
  super().__init__(
33
48
  parent=parent,
34
- name="neighbors",
35
- description="Neighbors",
36
49
  fixed_params={},
37
50
  variable_controls=variable_controls,
38
51
  )
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
41
54
  self,
42
55
  n_neighbors: int = 20,
43
56
  use_rep: str = "X_pca",
44
- n_dims: int = 10,
57
+ dims: tuple[int, int] = (1, 10),
45
58
  metric: str = "euclidean",
46
59
  group_by_batch: bool = False,
47
60
  ):
48
61
  import scanpy as sc
49
62
 
50
63
  adata = self.parent.dataset.adata
64
+ min_dim, max_dim = dims
65
+ min_dim = min_dim - 1
66
+
67
+ if min_dim > 0:
68
+ adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
69
+ use_rep = use_rep + "_trimmed"
70
+ n_dims = max_dim - min_dim
51
71
 
52
72
  if group_by_batch and self.parent.batch_key:
53
73
  group_by = self.parent.batch_key
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
58
78
  n_pcs=n_dims,
59
79
  use_annoy=False,
60
80
  metric=metric,
61
- pynndescent_n_neighbors=n_neighbors,
81
+ neighbors_within_batch=n_neighbors,
62
82
  )
63
83
  else:
64
84
  sc.pp.neighbors(
@@ -1,7 +1,6 @@
1
- import numpy as np
2
1
  import pandas as pd
3
2
  import plotly.express as px
4
- from ipywidgets import Button, Dropdown, IntText
3
+ from ipywidgets import Button, Checkbox, Dropdown, IntText
5
4
 
6
5
  from sclab.dataset.processor import Processor
7
6
  from sclab.dataset.processor.step import ProcessorStepBase
@@ -9,6 +8,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
9
8
 
10
9
  class PCA(ProcessorStepBase):
11
10
  parent: Processor
11
+ name: str = "pca"
12
+ description: str = "PCA"
12
13
 
13
14
  def __init__(self, parent: Processor) -> None:
14
15
  try:
@@ -25,12 +26,11 @@ class PCA(ProcessorStepBase):
25
26
  n_comps=IntText(value=30, description="N comps."),
26
27
  mask_var=Dropdown(options=mask_var_options, description="Genes mask"),
27
28
  **parent.make_selectbatch_drowpdown(description="Reference Batch"),
29
+ zero_center=Checkbox(value=False, description="Zero center"),
28
30
  )
29
31
 
30
32
  super().__init__(
31
33
  parent=parent,
32
- name="pca",
33
- description="PCA",
34
34
  fixed_params={},
35
35
  variable_controls=variable_controls,
36
36
  )
@@ -57,6 +57,7 @@ class PCA(ProcessorStepBase):
57
57
  n_comps: int = 30,
58
58
  mask_var: str | None = None,
59
59
  reference_batch: str | None = None,
60
+ zero_center: bool = False,
60
61
  ):
61
62
  import scanpy as sc
62
63
 
@@ -64,7 +65,9 @@ class PCA(ProcessorStepBase):
64
65
  counts_layer = self.parent.dataset.counts_layer
65
66
 
66
67
  if reference_batch:
67
- obs_mask = adata.obs[self.parent.batch_key] == reference_batch
68
+ batch_key = self.parent.batch_key
69
+
70
+ obs_mask = adata.obs[batch_key] == reference_batch
68
71
  adata_ref = adata[obs_mask].copy()
69
72
  if mask_var == "highly_variable":
70
73
  sc.pp.highly_variable_genes(
@@ -85,13 +88,15 @@ class PCA(ProcessorStepBase):
85
88
  uns_pca = adata_ref.uns["pca"]
86
89
  uns_pca["reference_batch"] = reference_batch
87
90
  PCs = adata_ref.varm["PCs"]
88
- X_pca: np.ndarray = adata.X.dot(PCs)
89
- X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
90
- adata.obsm["X_pca"] = X_pca
91
+ adata.obsm["X_pca"] = adata.X.dot(PCs)
91
92
  adata.uns["pca"] = uns_pca
92
93
  adata.varm["PCs"] = PCs
93
94
  else:
94
95
  sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
96
+ adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
97
+
98
+ if zero_center:
99
+ adata.obsm["X_pca"] -= adata.obsm["X_pca"].mean(axis=0, keepdims=True)
95
100
 
96
101
  self.plot_variance_ratio_button.disabled = False
97
102
  self.broker.publish(
@@ -1,4 +1,7 @@
1
+ import warnings
2
+
1
3
  import numpy as np
4
+ from anndata import ImplicitModificationWarning
2
5
  from ipywidgets import Checkbox, Dropdown
3
6
  from tqdm.auto import tqdm
4
7
 
@@ -8,6 +11,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
8
11
 
9
12
  class Preprocess(ProcessorStepBase):
10
13
  parent: Processor
14
+ name: str = "preprocess"
15
+ description: str = "Preprocess"
11
16
 
12
17
  def __init__(self, parent: Processor) -> None:
13
18
  try:
@@ -50,8 +55,6 @@ class Preprocess(ProcessorStepBase):
50
55
 
51
56
  super().__init__(
52
57
  parent=parent,
53
- name="preprocess",
54
- description="Preprocess",
55
58
  fixed_params={},
56
59
  variable_controls=variable_controls,
57
60
  )
@@ -103,25 +106,41 @@ class Preprocess(ProcessorStepBase):
103
106
  )
104
107
  pbar.update(10)
105
108
 
106
- sc.pp.highly_variable_genes(
107
- adata,
108
- layer=f"{layer}_log1p",
109
- flavor="seurat",
110
- batch_key=group_by,
111
- )
112
- hvg_seurat = adata.var["highly_variable"]
113
- sc.pp.highly_variable_genes(
114
- adata,
115
- layer=layer,
116
- flavor="seurat_v3_paper",
117
- batch_key=group_by,
118
- n_top_genes=hvg_seurat.sum(),
119
- )
120
- hvg_seurat_v3 = adata.var["highly_variable"]
109
+ if group_by is not None:
110
+ adata.var["highly_variable"] = False
111
+ for name, idx in adata.obs.groupby(group_by, observed=True).groups.items():
112
+ hvg_seurat = sc.pp.highly_variable_genes(
113
+ adata[idx],
114
+ layer=f"{layer}_log1p",
115
+ flavor="seurat",
116
+ inplace=False,
117
+ )["highly_variable"]
118
+
119
+ hvg_seurat_v3 = sc.pp.highly_variable_genes(
120
+ adata[idx],
121
+ layer=layer,
122
+ flavor="seurat_v3_paper",
123
+ n_top_genes=hvg_seurat.sum(),
124
+ inplace=False,
125
+ )["highly_variable"]
126
+
127
+ adata.var[f"highly_variable_{name}"] = hvg_seurat | hvg_seurat_v3
128
+ adata.var["highly_variable"] |= adata.var[f"highly_variable_{name}"]
129
+
130
+ else:
131
+ sc.pp.highly_variable_genes(adata, layer=f"{layer}_log1p", flavor="seurat")
132
+ hvg_seurat = adata.var["highly_variable"]
133
+
134
+ sc.pp.highly_variable_genes(
135
+ adata,
136
+ layer=layer,
137
+ flavor="seurat_v3_paper",
138
+ n_top_genes=hvg_seurat.sum(),
139
+ )
140
+ hvg_seurat_v3 = adata.var["highly_variable"]
141
+
142
+ adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
121
143
 
122
- adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
123
- adata.var["highly_variable_seurat"] = hvg_seurat
124
- adata.var["highly_variable_seurat_v3"] = hvg_seurat_v3
125
144
  pbar.update(10)
126
145
  pbar.update(10)
127
146
 
@@ -129,7 +148,6 @@ class Preprocess(ProcessorStepBase):
129
148
  if normalize_total:
130
149
  new_layer += "_normt"
131
150
  sc.pp.normalize_total(adata, target_sum=1e4)
132
- adata.layers[new_layer] = adata.X.copy()
133
151
 
134
152
  pbar.update(10)
135
153
  pbar.update(10)
@@ -138,7 +156,6 @@ class Preprocess(ProcessorStepBase):
138
156
  new_layer += "_log1p"
139
157
  adata.uns.pop("log1p", None)
140
158
  sc.pp.log1p(adata)
141
- adata.layers[new_layer] = adata.X.copy()
142
159
  pbar.update(10)
143
160
 
144
161
  vars_to_regress = []
@@ -154,13 +171,23 @@ class Preprocess(ProcessorStepBase):
154
171
  if vars_to_regress:
155
172
  new_layer += "_regr"
156
173
  sc.pp.regress_out(adata, keys=vars_to_regress, n_jobs=1)
157
- adata.layers[new_layer] = adata.X.copy()
158
174
  pbar.update(10)
159
175
 
160
176
  if scale:
161
177
  new_layer += "_scale"
162
- sc.pp.scale(adata, zero_center=False)
163
- adata.layers[new_layer] = adata.X.copy()
178
+ if group_by is not None:
179
+ for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
180
+ with warnings.catch_warnings():
181
+ warnings.filterwarnings(
182
+ "ignore",
183
+ category=ImplicitModificationWarning,
184
+ message="Modifying `X` on a view results in data being overridden",
185
+ )
186
+ adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
187
+ else:
188
+ sc.pp.scale(adata, zero_center=False)
189
+
190
+ adata.layers[new_layer] = adata.X.copy()
164
191
 
165
192
  pbar.update(10)
166
193
 
@@ -1,3 +1,4 @@
1
+ import numpy as np
1
2
  from ipywidgets import Dropdown, IntText
2
3
 
3
4
  from sclab.dataset.processor import Processor
@@ -6,6 +7,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
6
7
 
7
8
  class QC(ProcessorStepBase):
8
9
  parent: Processor
10
+ name: str = "qc"
11
+ description: str = "QC"
9
12
 
10
13
  def __init__(self, parent: Processor) -> None:
11
14
  try:
@@ -19,6 +22,7 @@ class QC(ProcessorStepBase):
19
22
  value="counts",
20
23
  description="Layer",
21
24
  ),
25
+ min_counts=IntText(value=50, description="Min. Counts"),
22
26
  min_genes=IntText(value=5, description="Min. Genes"),
23
27
  min_cells=IntText(value=0, description="Min. Cells"),
24
28
  max_rank=IntText(value=0, description="Max. Rank"),
@@ -36,8 +40,6 @@ class QC(ProcessorStepBase):
36
40
 
37
41
  super().__init__(
38
42
  parent=parent,
39
- name="qc",
40
- description="QC",
41
43
  fixed_params={},
42
44
  variable_controls=variable_controls,
43
45
  )
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
45
47
  def compute_qc_metrics(
46
48
  self,
47
49
  layer: str | None = None,
50
+ min_counts: int = 50,
48
51
  min_genes: int = 5,
49
52
  min_cells: int = 5,
53
+ max_rank: int = 0,
50
54
  ):
51
55
  import scanpy as sc
52
56
 
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
58
62
 
59
63
  adata.layers["qc_tmp_current_X"] = adata.X
60
64
  adata.X = adata.layers[layer].copy()
65
+ rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
66
+
67
+ obs_idx = adata.obs_names[rowsums >= min_counts]
68
+ adata._inplace_subset_obs(obs_idx)
69
+
61
70
  sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
62
71
 
63
72
  sc.pp.filter_cells(adata, min_genes=min_genes)
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
68
77
  # Restore original X
69
78
  adata.X = adata.layers.pop("qc_tmp_current_X")
70
79
 
80
+ if max_rank > 0:
81
+ series = self.parent.dataset.adata.obs["barcode_rank"]
82
+ index = series.loc[series < max_rank].index
83
+ self.parent.dataset.filter_rows(index)
84
+
71
85
  def function(
72
86
  self,
73
87
  layer: str | None = None,
88
+ min_counts: int = 50,
74
89
  min_genes: int = 5,
75
90
  min_cells: int = 5,
76
91
  max_rank: int = 0,
77
92
  ):
78
- self.compute_qc_metrics(layer, min_genes, min_cells)
79
-
80
- if max_rank > 0:
81
- series = self.parent.dataset.adata.obs["barcode_rank"]
82
- index = series.loc[series < max_rank].index
83
- self.parent.dataset.filter_rows(index)
93
+ self.compute_qc_metrics(
94
+ layer,
95
+ min_counts,
96
+ min_genes,
97
+ min_cells,
98
+ max_rank,
99
+ )
84
100
 
85
101
  self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
86
102
  self.broker.publish(
@@ -7,6 +7,8 @@ from sclab.dataset.processor.step import ProcessorStepBase
7
7
 
8
8
  class UMAP(ProcessorStepBase):
9
9
  parent: Processor
10
+ name: str = "umap"
11
+ description: str = "UMAP"
10
12
 
11
13
  def __init__(self, parent: Processor) -> None:
12
14
  try:
@@ -21,8 +23,6 @@ class UMAP(ProcessorStepBase):
21
23
 
22
24
  super().__init__(
23
25
  parent=parent,
24
- name="umap",
25
- description="UMAP",
26
26
  fixed_params={},
27
27
  variable_controls=variable_controls,
28
28
  )
sclab/gui/__init__.py ADDED
File without changes
@@ -0,0 +1,7 @@
1
+ from ._guided_pseudotime import GuidedPseudotime
2
+ from ._transfer_metadata import TransferMetadata
3
+
4
+ __all__ = [
5
+ "GuidedPseudotime",
6
+ "TransferMetadata",
7
+ ]