PyPI - sclab - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

sclab 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sclab might be problematic. Click here for more details.

Files changed (51) hide show

sclab/__init__.py +1 -1
sclab/_sclab.py +10 -3
sclab/dataset/_dataset.py +1 -1
sclab/examples/processor_steps/__init__.py +2 -0
sclab/examples/processor_steps/_doublet_detection.py +68 -0
sclab/examples/processor_steps/_integration.py +37 -4
sclab/examples/processor_steps/_neighbors.py +24 -4
sclab/examples/processor_steps/_pca.py +5 -5
sclab/examples/processor_steps/_preprocess.py +14 -1
sclab/examples/processor_steps/_qc.py +22 -6
sclab/gui/__init__.py +0 -0
sclab/gui/components/__init__.py +5 -0
sclab/gui/components/_guided_pseudotime.py +482 -0
sclab/methods/__init__.py +25 -1
sclab/preprocess/__init__.py +18 -0
sclab/preprocess/_cca.py +154 -0
sclab/preprocess/_cca_integrate.py +77 -0
sclab/preprocess/_filter_obs.py +42 -0
sclab/preprocess/_harmony.py +421 -0
sclab/preprocess/_harmony_integrate.py +50 -0
sclab/preprocess/_normalize_weighted.py +61 -0
sclab/preprocess/_subset.py +208 -0
sclab/preprocess/_transfer_metadata.py +137 -0
sclab/preprocess/_transform.py +82 -0
sclab/preprocess/_utils.py +96 -0
sclab/tools/__init__.py +0 -0
sclab/tools/cellflow/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
sclab/tools/cellflow/pseudotime/__init__.py +0 -0
sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
sclab/tools/cellflow/utils/__init__.py +0 -0
sclab/tools/cellflow/utils/density_nd.py +136 -0
sclab/tools/cellflow/utils/interpolate.py +334 -0
sclab/tools/cellflow/utils/smoothen.py +124 -0
sclab/tools/cellflow/utils/times.py +55 -0
sclab/tools/differential_expression/__init__.py +5 -0
sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
sclab/tools/doublet_detection/__init__.py +5 -0
sclab/tools/doublet_detection/_scrublet.py +64 -0
sclab/tools/labeling/__init__.py +6 -0
sclab/tools/labeling/sctype.py +233 -0
sclab/utils/__init__.py +5 -0
sclab/utils/_write_excel.py +510 -0
{sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/METADATA +7 -2
sclab-0.3.0.dist-info/RECORD +81 -0
sclab-0.2.4.dist-info/RECORD +0 -45
{sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/WHEEL +0 -0
{sclab-0.2.4.dist-info → sclab-0.3.0.dist-info}/licenses/LICENSE +0 -0

sclab/__init__.py CHANGED Viewed

@@ -6,4 +6,4 @@ __all__ = [
     "SCLabDashboard",
 ]
-__version__ = "0.2.4"
+__version__ = "0.3.0"

sclab/_sclab.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import inspect
 from io import BytesIO
 from pathlib import Path
+import tempfile
 from anndata import AnnData
 from IPython.display import display
@@ -256,14 +257,20 @@ class DataLoader(VBox):
         match path.suffix:
             case ".h5":
-                adata = read_10x_h5(contents)
+                with tempfile.NamedTemporaryFile(suffix=".h5") as tmp:
+                    tmp.write(contents.getbuffer())
+                    tmp.flush()
+                    adata = read_10x_h5(tmp.name)
             case ".h5ad":
-                adata = read_h5ad(contents)
+                with tempfile.NamedTemporaryFile(suffix=".h5ad") as tmp:
+                    tmp.write(contents.getbuffer())
+                    tmp.flush()
+                    adata = read_h5ad(tmp.name)
             case _:
                 self.upload_info.clear_output()
                 with self.upload_info:
                     print(f"`{filename}` is not valid")
-                    print("Please upload a 10x h5 or h5ad file")
+                    print("Please upload a 10x .h5 or .h5ad file")
                 return
         if var_names in adata.var:

sclab/dataset/_dataset.py CHANGED Viewed

@@ -365,7 +365,7 @@ class SCLabDataset(EventClient):
         if not index.isin(self.metadata.index).all():
             raise InvalidRowSubset("index contains invalid values")
-        self.adata = self.adata[index].copy()
+        self.adata._inplace_subset_obs(index)
         self.broker.publish("dset_total_rows_change", self.metadata)

sclab/examples/processor_steps/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from ._cluster import Cluster
 from ._differential_expression import DifferentialExpression
+from ._doublet_detection import DoubletDetection
 from ._gene_expression import GeneExpression
 from ._integration import Integration
 from ._neighbors import Neighbors
@@ -16,6 +17,7 @@ __all__ = [
     "Neighbors",
     "UMAP",
     "Cluster",
+    "DoubletDetection",
     "GeneExpression",
     "DifferentialExpression",
 ]

sclab/examples/processor_steps/_doublet_detection.py ADDED Viewed

@@ -0,0 +1,68 @@
+from ipywidgets import Dropdown
+from sclab.dataset.processor import Processor
+from sclab.dataset.processor.step import ProcessorStepBase
+from sclab.tools.doublet_detection import scrublet
+# from sclab.tools.doublet_detection import doubletdetection
+# from sclab.tools.doublet_detection import scdblfinder
+class DoubletDetection(ProcessorStepBase):
+    parent: Processor
+    name: str = "doublet_detection"
+    description: str = "Doublet Detection"
+    def __init__(self, parent: Processor) -> None:
+        variable_controls = dict(
+            layer=Dropdown(
+                options=tuple(parent.dataset.adata.layers.keys()),
+                value=None,
+                description="Layer",
+            ),
+            flavor=Dropdown(
+                options=[
+                    "scrublet",
+                    # "doubletdetection",
+                    # "scDblFinder",
+                ],
+                description="Flavor",
+            ),
+        )
+        super().__init__(
+            parent=parent,
+            fixed_params={},
+            variable_controls=variable_controls,
+        )
+    def function(self, layer: str, flavor: str):
+        adata = self.parent.dataset.adata
+        kvargs = {"adata": adata, "layer": layer, "key_added": flavor}
+        self.broker.std_output.clear_output(wait=False)
+        with self.broker.std_output:
+            match flavor:
+                # case "scDblFinder":
+                #     scdblfinder(**kvargs, clusters_col="leiden")
+                # case "doubletdetection":
+                #     doubletdetection(
+                #         **kvargs,
+                #         pseudocount=1,
+                #         clustering_algorithm="leiden",
+                #         clustering_kwargs=dict(resolution=5.0),
+                #     )
+                case "scrublet":
+                    scrublet(**kvargs)
+                case _:
+                    raise ValueError(f"Unknown flavor: {flavor}")
+        self.broker.publish(
+            "dset_metadata_change",
+            self.parent.dataset.metadata,
+            f"{flavor}_label",
+        )

sclab/examples/processor_steps/_integration.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ipywidgets import Dropdown
+from ipywidgets import Dropdown, IntText
 from sclab.dataset.processor import Processor
 from sclab.dataset.processor.step import ProcessorStepBase
@@ -40,20 +40,49 @@ class Integration(ProcessorStepBase):
                 value="batch" if "batch" in cat_options else None,
                 description="GroupBy",
             ),
+            reference_batch=Dropdown(
+                description="Reference Batch",
+            ),
             flavor=Dropdown(
                 options=["harmony", "scanorama"],
                 value="harmony",
                 description="Flavor",
             ),
+            max_iters=IntText(
+                value=20,
+                description="Max iters",
+            ),
         )
+        def update_reference_batch(*args, **kwargs):
+            group_by = variable_controls["group_by"].value
+            options = {
+                "": None,
+                **{
+                    c: c
+                    for c in self.parent.dataset.adata.obs[group_by]
+                    .sort_values()
+                    .unique()
+                },
+            }
+            variable_controls["reference_batch"].options = options
+        variable_controls["group_by"].observe(update_reference_batch, names="value")
         super().__init__(
             parent=parent,
             fixed_params={},
             variable_controls=variable_controls,
         )
-    def function(self, use_rep: str, group_by: str, flavor: str):
+    def function(
+        self,
+        use_rep: str,
+        group_by: str,
+        flavor: str,
+        reference_batch: str | None,
+        max_iters: int,
+    ):
         adata = self.parent.dataset.adata
         key_added = f"{use_rep}_{flavor}"
@@ -68,9 +97,13 @@ class Integration(ProcessorStepBase):
         with self.broker.std_output:
             match flavor:
                 case "harmony":
-                    from scanpy.external.pp import harmony_integrate
+                    from sclab.preprocess import harmony_integrate
-                    harmony_integrate(**kvargs)
+                    harmony_integrate(
+                        **kvargs,
+                        reference_batch=reference_batch,
+                        max_iter_harmony=max_iters,
+                    )
                 case "scanorama":
                     from scanpy.external.pp import scanorama_integrate

sclab/examples/processor_steps/_neighbors.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from ipywidgets import Dropdown, IntText
+from ipywidgets import Dropdown, IntRangeSlider, IntText
 from sclab.dataset.processor import Processor
 from sclab.dataset.processor.step import ProcessorStepBase
@@ -22,7 +22,12 @@ class Neighbors(ProcessorStepBase):
                 description="Use rep.",
             ),
             n_neighbors=IntText(value=20, description="N neighbors"),
-            n_dims=IntText(value=10, description="N Dims"),
+            dims=IntRangeSlider(
+                min=1,
+                max=30,
+                value=(1, 10),
+                description="Use dims",
+            ),
             metric=Dropdown(
                 options=["euclidean", "cosine"],
                 value="euclidean",
@@ -31,6 +36,14 @@ class Neighbors(ProcessorStepBase):
             **parent.make_groupbybatch_checkbox(),
         )
+        def update_dims_range(*args, **kwargs):
+            adata = self.parent.dataset.adata
+            use_rep = variable_controls["use_rep"].value
+            max_dim = adata.obsm[use_rep].shape[1]
+            variable_controls["dims"].max = max_dim
+        variable_controls["use_rep"].observe(update_dims_range, names="value")
         super().__init__(
             parent=parent,
             fixed_params={},
@@ -41,13 +54,20 @@ class Neighbors(ProcessorStepBase):
         self,
         n_neighbors: int = 20,
         use_rep: str = "X_pca",
-        n_dims: int = 10,
+        dims: tuple[int, int] = (1, 10),
         metric: str = "euclidean",
         group_by_batch: bool = False,
     ):
         import scanpy as sc
         adata = self.parent.dataset.adata
+        min_dim, max_dim = dims
+        min_dim = min_dim - 1
+        if min_dim > 0:
+            adata.obsm[use_rep + "_trimmed"] = adata.obsm[use_rep][:, min_dim:max_dim]
+            use_rep = use_rep + "_trimmed"
+        n_dims = max_dim - min_dim
         if group_by_batch and self.parent.batch_key:
             group_by = self.parent.batch_key
@@ -58,7 +78,7 @@ class Neighbors(ProcessorStepBase):
                 n_pcs=n_dims,
                 use_annoy=False,
                 metric=metric,
-                pynndescent_n_neighbors=n_neighbors,
+                neighbors_within_batch=n_neighbors,
             )
         else:
             sc.pp.neighbors(

sclab/examples/processor_steps/_pca.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import numpy as np
 import pandas as pd
 import plotly.express as px
 from ipywidgets import Button, Dropdown, IntText
@@ -64,7 +63,9 @@ class PCA(ProcessorStepBase):
         counts_layer = self.parent.dataset.counts_layer
         if reference_batch:
-            obs_mask = adata.obs[self.parent.batch_key] == reference_batch
+            batch_key = self.parent.batch_key
+            obs_mask = adata.obs[batch_key] == reference_batch
             adata_ref = adata[obs_mask].copy()
             if mask_var == "highly_variable":
                 sc.pp.highly_variable_genes(
@@ -85,13 +86,12 @@ class PCA(ProcessorStepBase):
             uns_pca = adata_ref.uns["pca"]
             uns_pca["reference_batch"] = reference_batch
             PCs = adata_ref.varm["PCs"]
-            X_pca: np.ndarray = adata.X.dot(PCs)
-            X_pca = X_pca - X_pca.mean(axis=0, keepdims=True)
-            adata.obsm["X_pca"] = X_pca
+            adata.obsm["X_pca"] = adata.X.dot(PCs)
             adata.uns["pca"] = uns_pca
             adata.varm["PCs"] = PCs
         else:
             sc.pp.pca(adata, n_comps=n_comps, mask_var=mask_var, svd_solver="arpack")
+            adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
         self.plot_variance_ratio_button.disabled = False
         self.broker.publish(

sclab/examples/processor_steps/_preprocess.py CHANGED Viewed

@@ -1,4 +1,7 @@
+import warnings
 import numpy as np
+from anndata import ImplicitModificationWarning
 from ipywidgets import Checkbox, Dropdown
 from tqdm.auto import tqdm
@@ -156,7 +159,17 @@ class Preprocess(ProcessorStepBase):
         if scale:
             new_layer += "_scale"
-            sc.pp.scale(adata, zero_center=False)
+            if group_by is not None:
+                for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore",
+                            category=ImplicitModificationWarning,
+                            message="Modifying `X` on a view results in data being overridden",
+                        )
+                        adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
+            else:
+                sc.pp.scale(adata, zero_center=False)
         adata.layers[new_layer] = adata.X.copy()

sclab/examples/processor_steps/_qc.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import numpy as np
 from ipywidgets import Dropdown, IntText
 from sclab.dataset.processor import Processor
@@ -21,6 +22,7 @@ class QC(ProcessorStepBase):
                 value="counts",
                 description="Layer",
             ),
+            min_counts=IntText(value=50, description="Min. Counts"),
             min_genes=IntText(value=5, description="Min. Genes"),
             min_cells=IntText(value=0, description="Min. Cells"),
             max_rank=IntText(value=0, description="Max. Rank"),
@@ -45,8 +47,10 @@ class QC(ProcessorStepBase):
     def compute_qc_metrics(
         self,
         layer: str | None = None,
+        min_counts: int = 50,
         min_genes: int = 5,
         min_cells: int = 5,
+        max_rank: int = 0,
     ):
         import scanpy as sc
@@ -58,6 +62,11 @@ class QC(ProcessorStepBase):
         adata.layers["qc_tmp_current_X"] = adata.X
         adata.X = adata.layers[layer].copy()
+        rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
+        obs_idx = adata.obs_names[rowsums >= min_counts]
+        adata._inplace_subset_obs(obs_idx)
         sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
         sc.pp.filter_cells(adata, min_genes=min_genes)
@@ -68,19 +77,26 @@ class QC(ProcessorStepBase):
         # Restore original X
         adata.X = adata.layers.pop("qc_tmp_current_X")
+        if max_rank > 0:
+            series = self.parent.dataset.adata.obs["barcode_rank"]
+            index = series.loc[series < max_rank].index
+            self.parent.dataset.filter_rows(index)
     def function(
         self,
         layer: str | None = None,
+        min_counts: int = 50,
         min_genes: int = 5,
         min_cells: int = 5,
         max_rank: int = 0,
     ):
-        self.compute_qc_metrics(layer, min_genes, min_cells)
-        if max_rank > 0:
-            series = self.parent.dataset.adata.obs["barcode_rank"]
-            index = series.loc[series < max_rank].index
-            self.parent.dataset.filter_rows(index)
+        self.compute_qc_metrics(
+            layer,
+            min_counts,
+            min_genes,
+            min_cells,
+            max_rank,
+        )
         self.broker.publish("dset_metadata_change", self.parent.dataset.metadata)
         self.broker.publish(

sclab/gui/__init__.py ADDED Viewed

File without changes

sclab/gui/components/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from ._guided_pseudotime import GuidedPseudotime
+__all__ = [
+    "GuidedPseudotime",
+]

sclab 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

sclab 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl