PyPI - sclab - Versions diffs - 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

sclab 0.2.5py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sclab might be problematic. Click here for more details.

Files changed (53) hide show

sclab/__init__.py +1 -1
sclab/_sclab.py +7 -3
sclab/dataset/_dataset.py +1 -1
sclab/dataset/processor/_processor.py +19 -4
sclab/examples/processor_steps/__init__.py +2 -0
sclab/examples/processor_steps/_doublet_detection.py +68 -0
sclab/examples/processor_steps/_integration.py +47 -20
sclab/examples/processor_steps/_neighbors.py +24 -4
sclab/examples/processor_steps/_pca.py +11 -6
sclab/examples/processor_steps/_preprocess.py +14 -1
sclab/examples/processor_steps/_qc.py +22 -6
sclab/gui/__init__.py +0 -0
sclab/gui/components/__init__.py +7 -0
sclab/gui/components/_guided_pseudotime.py +482 -0
sclab/gui/components/_transfer_metadata.py +186 -0
sclab/methods/__init__.py +16 -0
sclab/preprocess/__init__.py +19 -0
sclab/preprocess/_cca.py +154 -0
sclab/preprocess/_cca_integrate.py +109 -0
sclab/preprocess/_filter_obs.py +42 -0
sclab/preprocess/_harmony.py +421 -0
sclab/preprocess/_harmony_integrate.py +53 -0
sclab/preprocess/_normalize_weighted.py +61 -0
sclab/preprocess/_subset.py +208 -0
sclab/preprocess/_transfer_metadata.py +137 -0
sclab/preprocess/_transform.py +82 -0
sclab/preprocess/_utils.py +96 -0
sclab/tools/__init__.py +0 -0
sclab/tools/cellflow/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
sclab/tools/cellflow/pseudotime/__init__.py +0 -0
sclab/tools/cellflow/pseudotime/_pseudotime.py +332 -0
sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
sclab/tools/cellflow/utils/__init__.py +0 -0
sclab/tools/cellflow/utils/density_nd.py +215 -0
sclab/tools/cellflow/utils/interpolate.py +334 -0
sclab/tools/cellflow/utils/smoothen.py +124 -0
sclab/tools/cellflow/utils/times.py +55 -0
sclab/tools/differential_expression/__init__.py +5 -0
sclab/tools/differential_expression/_pseudobulk_edger.py +304 -0
sclab/tools/differential_expression/_pseudobulk_helpers.py +277 -0
sclab/tools/doublet_detection/__init__.py +5 -0
sclab/tools/doublet_detection/_scrublet.py +64 -0
sclab/tools/labeling/__init__.py +6 -0
sclab/tools/labeling/sctype.py +233 -0
sclab/utils/__init__.py +5 -0
sclab/utils/_write_excel.py +510 -0
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/METADATA +6 -2
sclab-0.3.1.dist-info/RECORD +82 -0
sclab-0.2.5.dist-info/RECORD +0 -45
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/WHEEL +0 -0
{sclab-0.2.5.dist-info → sclab-0.3.1.dist-info}/licenses/LICENSE +0 -0

sclab/methods/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from importlib.util import find_spec
 from .._methods_registry import register_sclab_method
 from ..examples.processor_steps import (
     PCA,
@@ -5,11 +7,13 @@ from ..examples.processor_steps import (
     UMAP,
     Cluster,
     DifferentialExpression,
+    DoubletDetection,
     GeneExpression,
     Integration,
     Neighbors,
     Preprocess,
 )
+from ..gui.components import GuidedPseudotime, TransferMetadata
 __all__ = [
     "QC",
@@ -19,8 +23,10 @@ __all__ = [
     "Neighbors",
     "UMAP",
     "Cluster",
+    "DoubletDetection",
     "GeneExpression",
     "DifferentialExpression",
+    "GuidedPseudotime",
 ]
 register_sclab_method("Processing")(QC)
@@ -28,7 +34,17 @@ register_sclab_method("Processing")(Preprocess)
 register_sclab_method("Processing")(PCA)
 register_sclab_method("Processing")(Integration)
 register_sclab_method("Processing")(Neighbors)
+register_sclab_method("Processing")(TransferMetadata)
 register_sclab_method("Processing")(UMAP)
 register_sclab_method("Processing")(Cluster)
+if any(
+    [
+        find_spec("scrublet"),
+    ]
+):
+    register_sclab_method("Processing")(DoubletDetection)
 register_sclab_method("Analysis")(GeneExpression)
 register_sclab_method("Analysis")(DifferentialExpression)
+register_sclab_method("Analysis")(GuidedPseudotime)

sclab/preprocess/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+from ._cca_integrate import cca_integrate, cca_integrate_pair
+from ._filter_obs import filter_obs
+from ._harmony_integrate import harmony_integrate
+from ._normalize_weighted import normalize_weighted
+from ._subset import subset_obs, subset_var
+from ._transfer_metadata import transfer_metadata
+from ._transform import pool_neighbors
+__all__ = [
+    "cca_integrate",
+    "cca_integrate_pair",
+    "filter_obs",
+    "harmony_integrate",
+    "normalize_weighted",
+    "pool_neighbors",
+    "subset_obs",
+    "subset_var",
+    "transfer_metadata",
+]

sclab/preprocess/_cca.py ADDED Viewed

@@ -0,0 +1,154 @@
+import logging
+from typing import Literal
+import numpy as np
+from numpy import matrix
+from numpy.typing import NDArray
+from scipy.linalg import svd
+from scipy.sparse import csc_matrix, csr_matrix, issparse
+from scipy.sparse.linalg import svds
+from sklearn.utils.extmath import randomized_svd
+logger = logging.getLogger(__name__)
+def cca(
+    X: NDArray | csr_matrix | csc_matrix,
+    Y: NDArray | csr_matrix | csc_matrix,
+    n_components=None,
+    svd_solver: Literal["full", "partial", "randomized"] = "partial",
+    normalize: bool = False,
+    random_state=42,
+) -> tuple[NDArray, NDArray, NDArray]:
+    """
+    CCA-style integration for two single-cell matrices with unequal numbers of cells.
+    Parameters
+    ----------
+    X, Y : array-like, shape (n_cells, n_features)
+        feature-by-cell matrices with same column space (variable genes/pcs) in the same order.
+    n_components : int or None
+        Dimensionality of the canonical space (default = all that the smaller
+        dataset allows).
+    svd_solver : {'full', 'partial', 'randomized'}
+        'randomized' uses Halko et al. algorithm (`sklearn.utils.extmath.randomized_svd`)
+        and is strongly recommended when only the leading few components are needed.
+    random_state : int or None
+        Passed through to the randomized SVD for reproducibility.
+    Returns
+    -------
+    U : (n_cells(X), k) ndarray
+    V : (n_cells(Y), k) ndarray
+        Cell-level canonical variables.
+    """
+    n1, p1 = X.shape
+    n2, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    k = n_components or min(n1, n2)
+    if issparse(X):
+        C = _cross_covariance_sparse(X, Y)
+    else:
+        C = _cross_covariance_dense(X, Y)
+    logger.info(f"Cross-covariance computed. Shape: {C.shape}")
+    Uc, s, Vct = _svd_decomposition(C, k, svd_solver, random_state)
+    # canonical variables
+    # Left and right singular vectors are cell embeddings
+    U = Uc  # (n1 x k)
+    V = Vct.T  # (n2 x k)
+    if normalize:
+        logger.info("Normalizing canonical variables...")
+        U = U / np.linalg.norm(U, axis=1, keepdims=True)
+        V = V / np.linalg.norm(V, axis=1, keepdims=True)
+    logger.info("Done.")
+    return U, s, V
+def _svd_decomposition(
+    C: NDArray,
+    k: int,
+    svd_solver: Literal["full", "partial", "randomized"],
+    random_state: int | None,
+) -> tuple[NDArray, NDArray, NDArray]:
+    if svd_solver == "full":
+        logger.info("SVD decomposition with full SVD...")
+        Uc, s, Vct = svd(C, full_matrices=False)
+        Uc, s, Vct = Uc[:, :k], s[:k], Vct[:k, :]
+    elif svd_solver == "partial":
+        logger.info("SVD decomposition with partial SVD...")
+        Uc, s, Vct = svds(C, k=k)
+    elif svd_solver == "randomized":
+        logger.info("SVD decomposition with randomized SVD...")
+        Uc, s, Vct = randomized_svd(C, n_components=k, random_state=random_state)
+    else:
+        raise ValueError("svd_solver must be 'full' or 'partial'.")
+    order = np.argsort(-s)
+    s = s[order]
+    Uc = Uc[:, order]
+    Vct = Vct[order, :]
+    return Uc, s, Vct
+def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix) -> NDArray:
+    _, p1 = X.shape
+    _, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    p = p1
+    # TODO: incorporate sparse scaling
+    logger.info("Computing cross-covariance on sparse matrices...")
+    mux: matrix = X.mean(axis=0)
+    muy: matrix = Y.mean(axis=0)
+    XYt: csr_matrix = X.dot(Y.T)
+    Xmuyt: matrix = X.dot(muy.T)
+    muxYt: matrix = Y.dot(mux.T).T
+    muxmuyt: float = (mux @ muy.T)[0, 0]
+    C = (XYt - Xmuyt - muxYt + muxmuyt) / (p - 1)
+    return np.asarray(C)
+def _cross_covariance_dense(X: NDArray, Y: NDArray) -> NDArray:
+    _, p1 = X.shape
+    _, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    p = p1
+    logger.info("Computing cross-covariance on dense matrices...")
+    X = _dense_scale(X)
+    Y = _dense_scale(Y)
+    X = X - X.mean(axis=0, keepdims=True)
+    Y = Y - Y.mean(axis=0, keepdims=True)
+    C: NDArray = (X @ Y.T) / (p - 1)
+    return C
+def _dense_scale(A: NDArray) -> NDArray:
+    A = np.asarray(A)
+    eps = np.finfo(A.dtype).eps
+    return A / (A.std(axis=0, ddof=1, keepdims=True) + eps)

sclab/preprocess/_cca_integrate.py ADDED Viewed

@@ -0,0 +1,109 @@
+import numpy as np
+from anndata import AnnData
+from ._cca import cca
+def cca_integrate(
+    adata: AnnData,
+    key: str,
+    *,
+    basis: str = "X",
+    adjusted_basis: str | None = None,
+    reference_batch: str | list[str] | None = None,
+    mask_var: str | None = None,
+    n_components: int = 30,
+    svd_solver: str = "partial",
+    normalize: bool = False,
+    random_state: int | None = None,
+):
+    n_groups = adata.obs[key].nunique()
+    if n_groups == 2:
+        cca_integrate_pair(
+            adata,
+            key,
+            adata.obs[key].unique()[0],
+            adata.obs[key].unique()[1],
+            basis=basis,
+            adjusted_basis=adjusted_basis,
+            mask_var=mask_var,
+            n_components=n_components,
+            svd_solver=svd_solver,
+            normalize=normalize,
+            random_state=random_state,
+        )
+    else:
+        raise NotImplementedError
+def cca_integrate_pair(
+    adata: AnnData,
+    key: str,
+    group1: str,
+    group2: str,
+    *,
+    basis: str | None = None,
+    adjusted_basis: str | None = None,
+    mask_var: str | None = None,
+    n_components: int = 30,
+    svd_solver: str = "partial",
+    normalize: bool = False,
+    random_state: int | None = None,
+):
+    if basis is None:
+        basis = "X"
+    if adjusted_basis is None:
+        adjusted_basis = basis + "_cca"
+    if mask_var is not None:
+        mask = adata.var[mask_var].values
+    else:
+        mask = np.ones(adata.n_vars, dtype=bool)
+    Xs = {}
+    groups = adata.obs.groupby(key, observed=True).groups
+    for gr, idx in groups.items():
+        Xs[gr] = _get_basis(adata[idx, mask], basis)
+    Ys = {}
+    Ys[group1], sigma, Ys[group2] = cca(
+        Xs[group1],
+        Xs[group2],
+        n_components=n_components,
+        svd_solver=svd_solver,
+        normalize=normalize,
+        random_state=random_state,
+    )
+    if (
+        adjusted_basis not in adata.obsm
+        or adata.obsm[adjusted_basis].shape[1] != n_components
+    ):
+        adata.obsm[adjusted_basis] = np.full((adata.n_obs, n_components), np.nan)
+    if adjusted_basis not in adata.uns:
+        adata.uns[adjusted_basis] = {}
+    uns = adata.uns[adjusted_basis]
+    uns[f"{group1}-{group2}"] = {"sigma": sigma}
+    for gr, obs_names in groups.items():
+        idx = adata.obs_names.get_indexer(obs_names)
+        adata.obsm[adjusted_basis][idx] = Ys[gr]
+        uns[gr] = Ys[gr]
+def _get_basis(adata: AnnData, basis: str):
+    if basis == "X":
+        X = adata.X
+    elif basis in adata.layers:
+        X = adata.layers[basis]
+    elif basis in adata.obsm:
+        X = adata.obsm[basis]
+    else:
+        raise ValueError(f"Unknown basis {basis}")
+    return X

sclab/preprocess/_filter_obs.py ADDED Viewed

@@ -0,0 +1,42 @@
+import numpy as np
+from anndata import AnnData
+from scipy.stats import rankdata
+def filter_obs(
+    adata: AnnData,
+    *,
+    layer: str | None = None,
+    min_counts: int | None = None,
+    min_genes: int | None = None,
+    max_counts: int | None = None,
+    max_cells: int | None = None,
+) -> None:
+    if layer is not None:
+        X = adata.layers[layer]
+    else:
+        X = adata.X
+    remove_mask = np.zeros(X.shape[0], dtype=bool)
+    if min_genes is not None:
+        M = X > 0
+        rowsums = np.asarray(M.sum(axis=1)).squeeze()
+        remove_mask[rowsums < min_genes] = True
+    if min_counts is not None or max_counts is not None or max_cells is not None:
+        rowsums = np.asarray(X.sum(axis=1)).squeeze()
+        if min_counts is not None:
+            remove_mask[rowsums < min_counts] = True
+        if max_counts is not None:
+            remove_mask[rowsums > max_counts] = True
+        if max_cells is not None:
+            ranks = rankdata(-rowsums, method="min")
+            remove_mask[ranks > max_cells] = True
+    if remove_mask.any():
+        obs_idx = adata.obs_names[~remove_mask]
+        adata._inplace_subset_obs(obs_idx)

sclab 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

sclab 0.2.5py3-none-any.whl → 0.3.1py3-none-any.whl