PyPI - sclab - Versions diffs - 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

sclab 0.2.5py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sclab might be problematic. Click here for more details.

Files changed (53) hide show

sclab/__init__.py +1 -1
sclab/_sclab.py +7 -3
sclab/dataset/_dataset.py +1 -1
sclab/dataset/processor/_processor.py +19 -4
sclab/examples/processor_steps/__init__.py +2 -0
sclab/examples/processor_steps/_doublet_detection.py +68 -0
sclab/examples/processor_steps/_integration.py +47 -20
sclab/examples/processor_steps/_neighbors.py +24 -4
sclab/examples/processor_steps/_pca.py +11 -6
sclab/examples/processor_steps/_preprocess.py +14 -1
sclab/examples/processor_steps/_qc.py +22 -6
sclab/gui/__init__.py +0 -0
sclab/gui/components/__init__.py +7 -0
sclab/gui/components/_guided_pseudotime.py +482 -0
sclab/gui/components/_transfer_metadata.py +186 -0
sclab/methods/__init__.py +16 -0
sclab/preprocess/__init__.py +19 -0
sclab/preprocess/_cca.py +154 -0
sclab/preprocess/_cca_integrate.py +109 -0
sclab/preprocess/_filter_obs.py +42 -0
sclab/preprocess/_harmony.py +421 -0
sclab/preprocess/_harmony_integrate.py +53 -0
sclab/preprocess/_normalize_weighted.py +61 -0
sclab/preprocess/_subset.py +208 -0
sclab/preprocess/_transfer_metadata.py +137 -0
sclab/preprocess/_transform.py +82 -0
sclab/preprocess/_utils.py +96 -0
sclab/tools/__init__.py +0 -0
sclab/tools/cellflow/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
sclab/tools/cellflow/pseudotime/__init__.py +0 -0
sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
sclab/tools/cellflow/utils/__init__.py +0 -0
sclab/tools/cellflow/utils/density_nd.py +215 -0
sclab/tools/cellflow/utils/interpolate.py +334 -0
sclab/tools/cellflow/utils/smoothen.py +124 -0
sclab/tools/cellflow/utils/times.py +55 -0
sclab/tools/differential_expression/__init__.py +5 -0
sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
sclab/tools/doublet_detection/__init__.py +5 -0
sclab/tools/doublet_detection/_scrublet.py +64 -0
sclab/tools/labeling/__init__.py +6 -0
sclab/tools/labeling/sctype.py +233 -0
sclab/utils/__init__.py +5 -0
sclab/utils/_write_excel.py +510 -0
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/METADATA +6 -2
sclab-0.3.1.dist-info/RECORD +82 -0
sclab-0.2.5.dist-info/RECORD +0 -45
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/WHEEL +0 -0
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/licenses/LICENSE +0 -0

sclab/__init__.py CHANGED Viewed

@@ -6,4 +6,4 @@ __all__ = [
     "SCLabDashboard",
 ]
-__version__ = "0.2.5"
+__version__ = "0.3.1"

sclab/_sclab.py CHANGED Viewed

@@ -171,7 +171,11 @@ class DataLoader(VBox):
         self.upload = FileUpload(layout=Layout(width="200px"))
         self.upload_info = Output(layout=Layout(width="95%"))
         self.upload_row = HBox(
-            [self.upload_row_label, self.upload, self.upload_info],
+            [
+                self.upload_row_label,
+                self.upload,
+                self.upload_info,
+            ],
             layout=Layout(width="100%"),
         )
         self.upload.observe(self.on_upload, "value")
@@ -214,8 +218,8 @@ class DataLoader(VBox):
         VBox.__init__(
             self,
             [
-                self.url_row,
-                self.upload_row,
+                # self.url_row,
+                # self.upload_row,
                 self.defined_adatas_row,
                 self.progress_output,
             ],

sclab/dataset/_dataset.py CHANGED Viewed

@@ -365,7 +365,7 @@ class SCLabDataset(EventClient):
         if not index.isin(self.metadata.index).all():
             raise InvalidRowSubset("index contains invalid values")
-        self.adata = self.adata[index].copy()
+        self.adata._inplace_subset_obs(index)
         self.broker.publish("dset_total_rows_change", self.metadata)

sclab/dataset/processor/_processor.py CHANGED Viewed

@@ -1025,22 +1025,37 @@ class Processor(EventClient):
                 else:
                     control.value = current_value
-    def dset_anndata_layers_change_callback(self, layers):
-        options = {layer: layer for layer in layers}
+    def dset_anndata_layers_change_callback(self, *args, **kwargs):
+        layer_options = {key: key for key in self.dataset.adata.layers.keys()}
+        obsm_options = {key: key for key in self.dataset.adata.obsm.keys()}
         for control in self.all_controls_list:
             if not isinstance(control, Dropdown):
                 continue
             description: str = control.description
             if description.lower().strip(" :.") == "layer":
                 current_value = control.value
-                control.options = options
+                control.options = layer_options
+                if current_value not in control.options:
+                    control.value = None
+                else:
+                    control.value = current_value
+            if description.lower().strip(" :.") == "use rep":
+                current_value = control.value
+                control.options = {**layer_options, **obsm_options}
                 if current_value not in control.options:
                     control.value = None
                 else:
                     control.value = current_value
     def dset_data_dict_change_callback(self, *args, **kwargs):
-        options = {v: v for v in self.dataset.adata.obsm.keys()}
+        options = [
+            *self.dataset.adata.layers.keys(),
+            *self.dataset.adata.obsm.keys(),
+        ]
+        options = {v: v for v in options}
         for control in self.all_controls_list:
             if not isinstance(control, Dropdown):
                 continue

sclab/examples/processor_steps/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from ._cluster import Cluster
 from ._differential_expression import DifferentialExpression
+from ._doublet_detection import DoubletDetection
 from ._gene_expression import GeneExpression
 from ._integration import Integration
 from ._neighbors import Neighbors
@@ -16,6 +17,7 @@ __all__ = [
     "Neighbors",
     "UMAP",
     "Cluster",
+    "DoubletDetection",
     "GeneExpression",
     "DifferentialExpression",
 ]

sclab/examples/processor_steps/_doublet_detection.py ADDED Viewed

@@ -0,0 +1,68 @@
+from ipywidgets import Dropdown
+from sclab.dataset.processor import Processor
+from sclab.dataset.processor.step import ProcessorStepBase
+from sclab.tools.doublet_detection import scrublet
+# from sclab.tools.doublet_detection import doubletdetection
+# from sclab.tools.doublet_detection import scdblfinder
+class DoubletDetection(ProcessorStepBase):
+    parent: Processor
+    name: str = "doublet_detection"
+    description: str = "Doublet Detection"
+    def __init__(self, parent: Processor) -> None:
+        variable_controls = dict(
+            layer=Dropdown(
+                options=tuple(parent.dataset.adata.layers.keys()),
+                value=None,
+                description="Layer",
+            ),
+            flavor=Dropdown(
+                options=[
+                    "scrublet",
+                    # "doubletdetection",
+                    # "scDblFinder",
+                ],
+                description="Flavor",
+            ),
+        )
+        super().__init__(
+            parent=parent,
+            fixed_params={},
+            variable_controls=variable_controls,
+        )
+    def function(self, layer: str, flavor: str):
+        adata = self.parent.dataset.adata
+        kvargs = {"adata": adata, "layer": layer, "key_added": flavor}
+        self.broker.std_output.clear_output(wait=False)
+        with self.broker.std_output:
+            match flavor:
+                # case "scDblFinder":
+                #     scdblfinder(**kvargs, clusters_col="leiden")
+                # case "doubletdetection":
+                #     doubletdetection(
+                #         **kvargs,
+                #         pseudocount=1,
+                #         clustering_algorithm="leiden",
+                #         clustering_kwargs=dict(resolution=5.0),
+                #     )
+                case "scrublet":
+                    scrublet(**kvargs)
+                case _:
+                    raise ValueError(f"Unknown flavor: {flavor}")
+        self.broker.publish(
+            "dset_metadata_change",
+            self.parent.dataset.metadata,
+            f"{flavor}_label",
+        )

sclab/examples/processor_steps/_integration.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ipywidgets import Dropdown
+from ipywidgets import Dropdown, IntText
 from sclab.dataset.processor import Processor
 from sclab.dataset.processor.step import ProcessorStepBase
@@ -10,20 +10,6 @@ class Integration(ProcessorStepBase):
     description: str = "Integration"
     def __init__(self, parent: Processor) -> None:
-        try:
-            from scanpy.external.pp import harmony_integrate  # noqa
-        except ImportError:
-            try:
-                from scanpy.external.pp import scanorama_integrate  # noqa
-            except ImportError:
-                raise ImportError(
-                    "Integration requires scanorama or harmony to be installed.\n"
-                    "\nInstall with one of:\n"
-                    "\npip install harmony"
-                    "\npip install scanorama"
-                    "\n"
-                )
         cat_metadata = parent.dataset._metadata.select_dtypes(
             include=["object", "category"]
         )
@@ -40,20 +26,49 @@ class Integration(ProcessorStepBase):
                 value="batch" if "batch" in cat_options else None,
                 description="GroupBy",
             ),
+            reference_batch=Dropdown(
+                description="Reference Batch",
+            ),
             flavor=Dropdown(
-                options=["harmony", "scanorama"],
-                value="harmony",
+                options=["cca", "harmony", "scanorama"],
+                value="cca",
                 description="Flavor",
             ),
+            max_iters=IntText(
+                value=20,
+                description="Max iters",
+            ),
         )
+        def update_reference_batch(*args, **kwargs):
+            group_by = variable_controls["group_by"].value
+            options = {
+                "": None,
+                **{
+                    c: c
+                    for c in self.parent.dataset.adata.obs[group_by]
+                    .sort_values()
+                    .unique()
+                },
+            }
+            variable_controls["reference_batch"].options = options
+        variable_controls["group_by"].observe(update_reference_batch, names="value")
         super().__init__(
             parent=parent,
             fixed_params={},
             variable_controls=variable_controls,
         )
-    def function(self, use_rep: str, group_by: str, flavor: str):
+    def function(
+        self,
+        use_rep: str,
+        group_by: str,
+        flavor: str,
+        reference_batch: str | None,
+        max_iters: int,
+    ):
         adata = self.parent.dataset.adata
         key_added = f"{use_rep}_{flavor}"
@@ -67,10 +82,22 @@ class Integration(ProcessorStepBase):
         self.broker.std_output.clear_output(wait=False)
         with self.broker.std_output:
             match flavor:
+                case "cca":
+                    from sclab.preprocess import cca_integrate
+                    cca_integrate(
+                        **kvargs,
+                        reference_batch=reference_batch,
+                    )
                 case "harmony":
-                    from scanpy.external.pp import harmony_integrate
+                    from sclab.preprocess import harmony_integrate
-                    harmony_integrate(**kvargs)
+                    harmony_integrate(
+                        **kvargs,
+                        reference_batch=reference_batch,
+                        max_iter_harmony=max_iters,
+                    )
                 case "scanorama":
                     from scanpy.external.pp import scanorama_integrate

sclab/examples/processor_steps/_neighbors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ipywidgets import Dropdown, IntText
+from ipywidgets import Dropdown, IntRangeSlider, IntText
 from sclab.dataset.processor import Processor
 from sclab.dataset.processor.step import ProcessorStepBase
@@ -22,7 +22,12 @@ class Neighbors(ProcessorStepBase):
                 description="Use rep.",
             ),
             n_neighbors=IntText(value=20, description="N neighbors"),
-            n_dims=IntText(value=10, description="N Dims"),
+            dims=IntRangeSlider(
+                min=1,
+                max=30,
+                value=(1, 10),
+                description="Use dims",
+            ),
             metric=Dropdown(
                 options=["euclidean", "cosine"],
                 value="euclidean",
@@ -31,6 +36,14 @@ class Neighbors(ProcessorStepBase):
             **parent.make_groupbybatch_checkbox(),
         )
+        def update_dims_range(*args, **kwargs):
+            adata = self.parent.dataset.adata
+            use_rep = variable_controls["use_rep"].value
+            max_dim = adata.obsm[use_rep].shape[1]
+            variable_controls["dims"].max = max_dim
+        variable_controls["use_rep"].observe(update_dims_range, names="value")
         super().__init__(
             parent=parent,
             fixed_params={},
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
         self,
         n_neighbors: int = 20,
         use_rep: str = "X_pca",
-        n_dims: int = 10,
+        dims: tuple[int, int] = (1, 10),
         metric: str = "euclidean",
         group_by_batch: bool = False,
     ):
         import scanpy as sc
         adata = self.parent.dataset.adata
+        min_dim, max_dim = dims
+        min_dim = min_dim - 1
+        if min_dim > 0:
+            adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
+            use_rep = use_rep + "_trimmed"
+        n_dims = max_dim - min_dim
         if group_by_batch and self.parent.batch_key:
             group_by = self.parent.batch_key
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
                 n_pcs=n_dims,
                 use_annoy=False,
                 metric=metric,
-                pynndescent_n_neighbors=n_neighbors,
+                neighbors_within_batch=n_neighbors,
             )
         else:
             sc.pp.neighbors(

sclab/examples/processor_steps/_pca.py CHANGED Viewed

@@ -1,7 +1,6 @@
-import numpy as np
 import pandas as pd
 import plotly.express as px
-from ipywidgets import Button, Dropdown, IntText
+from ipywidgets import Button, Checkbox, Dropdown, IntText
 from sclab.dataset.processor import Processor
 from sclab.dataset.processor.step import ProcessorStepBase
@@ -27,6 +26,7 @@ class PCA(ProcessorStepBase):
             n_comps=IntText(value=30, description="N comps."),
             mask_var=Dropdown(options=mask_var_options, description="Genes mask"),
             **parent.make_selectbatch_drowpdown(description="Reference Batch"),
+            zero_center=Checkbox(value=False, description="Zero center"),
         )
         super().__init__(
@@ -57,6 +57,7 @@ class PCA(ProcessorStepBase):
         n_comps: int = 30,
         mask_var: str | None = None,
         reference_batch: str | None = None,
+        zero_center: bool = False,
     ):
         import scanpy as sc
@@ -64,7 +65,9 @@ class PCA(ProcessorStepBase):
         counts_layer = self.parent.dataset.counts_layer
         if reference_batch:
-            obs_mask = adata.obs[self.parent.batch_key] == reference_batch
+            batch_key = self.parent.batch_key
+            obs_mask = adata.obs[batch_key] == reference_batch
             adata_ref = adata[obs_mask].copy()
             if mask_var == "highly_variable":
                 sc.pp.highly_variable_genes(
@@ -85,13 +88,15 @@ class PCA(ProcessorStepBase):
             uns_pca = adata_ref.uns["pca"]
             uns_pca["reference_batch"] = reference_batch
             PCs = adata_ref.varm["PCs"]
-            X_pca: np.ndarray = adata.X.dot(PCs)
-            X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
-            adata.obsm["X_pca"] = X_pca
+            adata.obsm["X_pca"] = adata.X.dot(PCs)
             adata.uns["pca"] = uns_pca
             adata.varm["PCs"] = PCs
         else:
             sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
+            adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
+        if zero_center:
+            adata.obsm["X_pca"] -= adata.obsm["X_pca"].mean(axis=0, keepdims=True)
         self.plot_variance_ratio_button.disabled = False
         self.broker.publish(

sclab/examples/processor_steps/_preprocess.py CHANGED Viewed

@@ -1,4 +1,7 @@
+import warnings
 import numpy as np
+from anndata import ImplicitModificationWarning
 from ipywidgets import Checkbox, Dropdown
 from tqdm.auto import tqdm
@@ -156,7 +159,17 @@ class Preprocess(ProcessorStepBase):
         if scale:
             new_layer += "_scale"
-            sc.pp.scale(adata, zero_center=False)
+            if group_by is not None:
+                for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore",
+                            category=ImplicitModificationWarning,
+                            message="Modifying `X` on a view results in data being overridden",
+                        )
+                        adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
+            else:
+                sc.pp.scale(adata, zero_center=False)
         adata.layers[new_layer] = adata.X.copy()

sclab/examples/processor_steps/_qc.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import numpy as np
 from ipywidgets import Dropdown, IntText
 from sclab.dataset.processor import Processor
@@ -21,6 +22,7 @@ class QC(ProcessorStepBase):
                 value="counts",
                 description="Layer",
             ),
+            min_counts=IntText(value=50, description="Min. Counts"),
             min_genes=IntText(value=5, description="Min. Genes"),
             min_cells=IntText(value=0, description="Min. Cells"),
             max_rank=IntText(value=0, description="Max. Rank"),
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
     def compute_qc_metrics(
         self,
         layer: str | None = None,
+        min_counts: int = 50,
         min_genes: int = 5,
         min_cells: int = 5,
+        max_rank: int = 0,
     ):
         import scanpy as sc
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
         adata.layers["qc_tmp_current_X"] = adata.X
         adata.X = adata.layers[layer].copy()
+        rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
+        obs_idx = adata.obs_names[rowsums >= min_counts]
+        adata._inplace_subset_obs(obs_idx)
         sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
         sc.pp.filter_cells(adata, min_genes=min_genes)
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
         # Restore original X
         adata.X = adata.layers.pop("qc_tmp_current_X")
+        if max_rank > 0:
+            series = self.parent.dataset.adata.obs["barcode_rank"]
+            index = series.loc[series < max_rank].index
+            self.parent.dataset.filter_rows(index)
     def function(
         self,
         layer: str | None = None,
+        min_counts: int = 50,
         min_genes: int = 5,
         min_cells: int = 5,
         max_rank: int = 0,
     ):
-        self.compute_qc_metrics(layer, min_genes, min_cells)
-        if max_rank > 0:
-            series = self.parent.dataset.adata.obs["barcode_rank"]
-            index = series.loc[series < max_rank].index
-            self.parent.dataset.filter_rows(index)
+        self.compute_qc_metrics(
+            layer,
+            min_counts,
+            min_genes,
+            min_cells,
+            max_rank,
+        )
         self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
         self.broker.publish(

sclab/gui/__init__.py ADDED Viewed

File without changes

sclab/gui/components/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from ._guided_pseudotime import GuidedPseudotime
+from ._transfer_metadata import TransferMetadata
+__all__ = [
+    "GuidedPseudotime",
+    "TransferMetadata",
+]

sclab 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

sclab 0.2.5py3-none-any.whl → 0.3.1py3-none-any.whl