PyPI - sclab - Versions diffs - 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

sclab 0.1.7py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

sclab/__init__.py +3 -1
sclab/_io.py +83 -12
sclab/_methods_registry.py +65 -0
sclab/_sclab.py +241 -21
sclab/dataset/_dataset.py +4 -6
sclab/dataset/processor/_processor.py +41 -19
sclab/dataset/processor/_results_panel.py +94 -0
sclab/dataset/processor/step/_processor_step_base.py +12 -6
sclab/examples/processor_steps/__init__.py +8 -0
sclab/examples/processor_steps/_cluster.py +2 -2
sclab/examples/processor_steps/_differential_expression.py +329 -0
sclab/examples/processor_steps/_doublet_detection.py +68 -0
sclab/examples/processor_steps/_gene_expression.py +125 -0
sclab/examples/processor_steps/_integration.py +116 -0
sclab/examples/processor_steps/_neighbors.py +26 -6
sclab/examples/processor_steps/_pca.py +13 -8
sclab/examples/processor_steps/_preprocess.py +52 -25
sclab/examples/processor_steps/_qc.py +24 -8
sclab/examples/processor_steps/_umap.py +2 -2
sclab/gui/__init__.py +0 -0
sclab/gui/components/__init__.py +7 -0
sclab/gui/components/_guided_pseudotime.py +482 -0
sclab/gui/components/_transfer_metadata.py +186 -0
sclab/methods/__init__.py +50 -0
sclab/preprocess/__init__.py +26 -0
sclab/preprocess/_cca.py +176 -0
sclab/preprocess/_cca_integrate.py +109 -0
sclab/preprocess/_filter_obs.py +42 -0
sclab/preprocess/_harmony.py +421 -0
sclab/preprocess/_harmony_integrate.py +53 -0
sclab/preprocess/_normalize_weighted.py +65 -0
sclab/preprocess/_pca.py +51 -0
sclab/preprocess/_preprocess.py +155 -0
sclab/preprocess/_qc.py +38 -0
sclab/preprocess/_rpca.py +116 -0
sclab/preprocess/_subset.py +208 -0
sclab/preprocess/_transfer_metadata.py +196 -0
sclab/preprocess/_transform.py +82 -0
sclab/preprocess/_utils.py +96 -0
sclab/scanpy/__init__.py +0 -0
sclab/scanpy/_compat.py +92 -0
sclab/scanpy/_settings.py +526 -0
sclab/scanpy/logging.py +290 -0
sclab/scanpy/plotting/__init__.py +0 -0
sclab/scanpy/plotting/_rcmod.py +73 -0
sclab/scanpy/plotting/palettes.py +221 -0
sclab/scanpy/readwrite.py +1108 -0
sclab/tools/__init__.py +0 -0
sclab/tools/cellflow/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/__init__.py +0 -0
sclab/tools/cellflow/density_dynamics/_density_dynamics.py +349 -0
sclab/tools/cellflow/pseudotime/__init__.py +0 -0
sclab/tools/cellflow/pseudotime/_pseudotime.py +336 -0
sclab/tools/cellflow/pseudotime/timeseries.py +226 -0
sclab/tools/cellflow/utils/__init__.py +0 -0
sclab/tools/cellflow/utils/density_nd.py +215 -0
sclab/tools/cellflow/utils/interpolate.py +334 -0
sclab/tools/cellflow/utils/periodic_genes.py +106 -0
sclab/tools/cellflow/utils/smoothen.py +124 -0
sclab/tools/cellflow/utils/times.py +55 -0
sclab/tools/differential_expression/__init__.py +7 -0
sclab/tools/differential_expression/_pseudobulk_edger.py +309 -0
sclab/tools/differential_expression/_pseudobulk_helpers.py +290 -0
sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
sclab/tools/doublet_detection/__init__.py +5 -0
sclab/tools/doublet_detection/_scrublet.py +64 -0
sclab/tools/embedding/__init__.py +0 -0
sclab/tools/imputation/__init__.py +0 -0
sclab/tools/imputation/_alra.py +135 -0
sclab/tools/labeling/__init__.py +6 -0
sclab/tools/labeling/sctype.py +233 -0
sclab/tools/utils/__init__.py +5 -0
sclab/tools/utils/_aggregate_and_filter.py +290 -0
sclab/utils/__init__.py +5 -0
sclab/utils/_write_excel.py +510 -0
{sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/METADATA +29 -12
sclab-0.3.4.dist-info/RECORD +93 -0
{sclab-0.1.7.dist-info → sclab-0.3.4.dist-info}/WHEEL +1 -1
sclab-0.3.4.dist-info/licenses/LICENSE +29 -0
sclab-0.1.7.dist-info/RECORD +0 -30

sclab/methods/__init__.py ADDED Viewed

@@ -0,0 +1,50 @@
+from importlib.util import find_spec
+from .._methods_registry import register_sclab_method
+from ..examples.processor_steps import (
+    PCA,
+    QC,
+    UMAP,
+    Cluster,
+    DifferentialExpression,
+    DoubletDetection,
+    GeneExpression,
+    Integration,
+    Neighbors,
+    Preprocess,
+)
+from ..gui.components import GuidedPseudotime, TransferMetadata
+__all__ = [
+    "QC",
+    "Preprocess",
+    "PCA",
+    "Integration",
+    "Neighbors",
+    "UMAP",
+    "Cluster",
+    "DoubletDetection",
+    "GeneExpression",
+    "DifferentialExpression",
+    "GuidedPseudotime",
+]
+register_sclab_method("Processing")(QC)
+register_sclab_method("Processing")(Preprocess)
+register_sclab_method("Processing")(PCA)
+register_sclab_method("Processing")(Integration)
+register_sclab_method("Processing")(Neighbors)
+register_sclab_method("Processing")(TransferMetadata)
+register_sclab_method("Processing")(UMAP)
+register_sclab_method("Processing")(Cluster)
+if any(
+    [
+        find_spec("scrublet"),
+    ]
+):
+    register_sclab_method("Processing")(DoubletDetection)
+register_sclab_method("Analysis")(GeneExpression)
+register_sclab_method("Analysis")(DifferentialExpression)
+register_sclab_method("Analysis")(GuidedPseudotime)

sclab/preprocess/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+from ._cca_integrate import cca_integrate, cca_integrate_pair
+from ._filter_obs import filter_obs
+from ._harmony_integrate import harmony_integrate
+from ._normalize_weighted import normalize_weighted
+from ._pca import pca
+from ._preprocess import preprocess
+from ._qc import qc
+from ._subset import subset_obs, subset_var
+from ._transfer_metadata import propagate_metadata, transfer_metadata
+from ._transform import pool_neighbors
+__all__ = [
+    "cca_integrate",
+    "cca_integrate_pair",
+    "filter_obs",
+    "harmony_integrate",
+    "normalize_weighted",
+    "pca",
+    "pool_neighbors",
+    "preprocess",
+    "propagate_metadata",
+    "qc",
+    "subset_obs",
+    "subset_var",
+    "transfer_metadata",
+]

sclab/preprocess/_cca.py ADDED Viewed

@@ -0,0 +1,176 @@
+import logging
+import os
+from typing import Literal
+import numpy as np
+from joblib import Parallel, delayed
+from numpy import matrix
+from numpy.typing import NDArray
+from scipy.linalg import svd
+from scipy.sparse import csc_matrix, csr_matrix, issparse
+from scipy.sparse import vstack as sparse_vstack
+from scipy.sparse.linalg import svds
+from sklearn.utils.extmath import randomized_svd
+logger = logging.getLogger(__name__)
+N_CPUS = os.cpu_count()
+def cca(
+    X: NDArray | csr_matrix | csc_matrix,
+    Y: NDArray | csr_matrix | csc_matrix,
+    n_components=None,
+    svd_solver: Literal["full", "partial", "randomized"] = "randomized",
+    normalize: bool = False,
+    random_state=42,
+    n_jobs: int = N_CPUS,
+) -> tuple[NDArray, NDArray, NDArray]:
+    """
+    CCA-style integration for two single-cell matrices with unequal numbers of cells.
+    Parameters
+    ----------
+    X, Y : array-like, shape (n_cells, n_features)
+        feature-by-cell matrices with same column space (variable genes/pcs) in the same order.
+    n_components : int or None
+        Dimensionality of the canonical space (default = all that the smaller
+        dataset allows).
+    svd_solver : {'full', 'partial', 'randomized'}
+        'randomized' uses Halko et al. algorithm (`sklearn.utils.extmath.randomized_svd`)
+        and is strongly recommended when only the leading few components are needed.
+    random_state : int or None
+        Passed through to the randomized SVD for reproducibility.
+    Returns
+    -------
+    U : (n_cells(X), k) ndarray
+    V : (n_cells(Y), k) ndarray
+        Cell-level canonical variables.
+    """
+    n1, p1 = X.shape
+    n2, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    k = n_components or min(n1, n2)
+    if issparse(X):
+        C = _cross_covariance_sparse(X, Y, n_jobs=n_jobs)
+    else:
+        C = _cross_covariance_dense(X, Y)
+    logger.info(f"Cross-covariance computed. Shape: {C.shape}")
+    Uc, s, Vct = _svd_decomposition(C, k, svd_solver, random_state)
+    # canonical variables
+    # Left and right singular vectors are cell embeddings
+    U = Uc  # (n1 x k)
+    V = Vct.T  # (n2 x k)
+    if normalize:
+        logger.info("Normalizing canonical variables...")
+        U = U / np.linalg.norm(U, axis=1, keepdims=True)
+        V = V / np.linalg.norm(V, axis=1, keepdims=True)
+    logger.info("Done.")
+    return U, s, V
+def _svd_decomposition(
+    C: NDArray,
+    k: int,
+    svd_solver: Literal["full", "partial", "randomized"],
+    random_state: int | None,
+) -> tuple[NDArray, NDArray, NDArray]:
+    if svd_solver == "full":
+        logger.info("SVD decomposition with full SVD...")
+        Uc, s, Vct = svd(C, full_matrices=False)
+        Uc, s, Vct = Uc[:, :k], s[:k], Vct[:k, :]
+    elif svd_solver == "partial":
+        logger.info("SVD decomposition with partial SVD...")
+        Uc, s, Vct = svds(C, k=k)
+    elif svd_solver == "randomized":
+        logger.info("SVD decomposition with randomized SVD...")
+        Uc, s, Vct = randomized_svd(C, n_components=k, random_state=random_state)
+    else:
+        raise ValueError("svd_solver must be 'full' or 'partial'.")
+    order = np.argsort(-s)
+    s = s[order]
+    Uc = Uc[:, order]
+    Vct = Vct[order, :]
+    return Uc, s, Vct
+def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix, n_jobs=N_CPUS) -> NDArray:
+    _, p1 = X.shape
+    _, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    p = p1
+    # TODO: incorporate sparse scaling
+    logger.info("Computing cross-covariance on sparse matrices...")
+    mux: matrix = X.mean(axis=0)
+    muy: matrix = Y.mean(axis=0)
+    XYt: csr_matrix = _spmm_parallel(X, Y.T, n_jobs=n_jobs)
+    Xmuyt: matrix = X.dot(muy.T)
+    muxYt: matrix = Y.dot(mux.T).T
+    muxmuyt: float = (mux @ muy.T)[0, 0]
+    C = (XYt - Xmuyt - muxYt + muxmuyt) / (p - 1)
+    return np.asarray(C)
+def _cross_covariance_dense(X: NDArray, Y: NDArray) -> NDArray:
+    _, p1 = X.shape
+    _, p2 = Y.shape
+    if p1 != p2:
+        raise ValueError("The two matrices must have the same number of features.")
+    p = p1
+    logger.info("Computing cross-covariance on dense matrices...")
+    X = _dense_scale(X)
+    Y = _dense_scale(Y)
+    X = X - X.mean(axis=0, keepdims=True)
+    Y = Y - Y.mean(axis=0, keepdims=True)
+    C: NDArray = (X @ Y.T) / (p - 1)
+    return C
+def _dense_scale(A: NDArray) -> NDArray:
+    A = np.asarray(A)
+    eps = np.finfo(A.dtype).eps
+    return A / (A.std(axis=0, ddof=1, keepdims=True) + eps)
+def _spmm_chunk(A_csr, X, start, stop):
+    return A_csr[start:stop, :] @ X
+def _spmm_parallel(A_csr: csr_matrix, X_csc: csc_matrix, n_jobs=N_CPUS):
+    n = A_csr.shape[0]
+    bounds = np.linspace(0, n, n_jobs + 1, dtype=int)
+    Ys = Parallel(n_jobs=n_jobs, prefer="processes")(
+        delayed(_spmm_chunk)(A_csr, X_csc, bounds[i], bounds[i + 1])
+        for i in range(n_jobs)
+    )
+    return sparse_vstack(Ys)  # result is sparse if X is sparse, dense otherwise

sclab/preprocess/_cca_integrate.py ADDED Viewed

@@ -0,0 +1,109 @@
+import numpy as np
+from anndata import AnnData
+from ._cca import cca
+def cca_integrate(
+    adata: AnnData,
+    key: str,
+    *,
+    basis: str = "X",
+    adjusted_basis: str | None = None,
+    reference_batch: str | list[str] | None = None,
+    mask_var: str | None = None,
+    n_components: int = 30,
+    svd_solver: str = "randomized",
+    normalize: bool = True,
+    random_state: int | None = None,
+):
+    n_groups = adata.obs[key].nunique()
+    if n_groups == 2:
+        cca_integrate_pair(
+            adata,
+            key,
+            adata.obs[key].unique()[0],
+            adata.obs[key].unique()[1],
+            basis=basis,
+            adjusted_basis=adjusted_basis,
+            mask_var=mask_var,
+            n_components=n_components,
+            svd_solver=svd_solver,
+            normalize=normalize,
+            random_state=random_state,
+        )
+    else:
+        raise NotImplementedError
+def cca_integrate_pair(
+    adata: AnnData,
+    key: str,
+    group1: str,
+    group2: str,
+    *,
+    basis: str | None = None,
+    adjusted_basis: str | None = None,
+    mask_var: str | None = None,
+    n_components: int = 30,
+    svd_solver: str = "randomized",
+    normalize: bool = True,
+    random_state: int | None = None,
+):
+    if basis is None:
+        basis = "X"
+    if adjusted_basis is None:
+        adjusted_basis = basis + "_cca"
+    if mask_var is not None:
+        mask = adata.var[mask_var].values
+    else:
+        mask = np.ones(adata.n_vars, dtype=bool)
+    Xs = {}
+    groups = adata.obs.groupby(key, observed=True).groups
+    for gr, idx in groups.items():
+        Xs[gr] = _get_basis(adata[idx, mask], basis)
+    Ys = {}
+    Ys[group1], sigma, Ys[group2] = cca(
+        Xs[group1],
+        Xs[group2],
+        n_components=n_components,
+        svd_solver=svd_solver,
+        normalize=normalize,
+        random_state=random_state,
+    )
+    if (
+        adjusted_basis not in adata.obsm
+        or adata.obsm[adjusted_basis].shape[1] != n_components
+    ):
+        adata.obsm[adjusted_basis] = np.full((adata.n_obs, n_components), np.nan)
+    if adjusted_basis not in adata.uns:
+        adata.uns[adjusted_basis] = {}
+    uns = adata.uns[adjusted_basis]
+    uns[f"{group1}-{group2}"] = {"sigma": sigma}
+    for gr, obs_names in groups.items():
+        idx = adata.obs_names.get_indexer(obs_names)
+        adata.obsm[adjusted_basis][idx] = Ys[gr]
+        uns[gr] = Ys[gr]
+def _get_basis(adata: AnnData, basis: str):
+    if basis == "X":
+        X = adata.X
+    elif basis in adata.layers:
+        X = adata.layers[basis]
+    elif basis in adata.obsm:
+        X = adata.obsm[basis]
+    else:
+        raise ValueError(f"Unknown basis {basis}")
+    return X

sclab/preprocess/_filter_obs.py ADDED Viewed

@@ -0,0 +1,42 @@
+import numpy as np
+from anndata import AnnData
+from scipy.stats import rankdata
+def filter_obs(
+    adata: AnnData,
+    *,
+    layer: str | None = None,
+    min_counts: int | None = None,
+    min_genes: int | None = None,
+    max_counts: int | None = None,
+    max_cells: int | None = None,
+) -> None:
+    if layer is not None:
+        X = adata.layers[layer]
+    else:
+        X = adata.X
+    remove_mask = np.zeros(X.shape[0], dtype=bool)
+    if min_genes is not None:
+        M = X > 0
+        rowsums = np.asarray(M.sum(axis=1)).squeeze()
+        remove_mask[rowsums < min_genes] = True
+    if min_counts is not None or max_counts is not None or max_cells is not None:
+        rowsums = np.asarray(X.sum(axis=1)).squeeze()
+        if min_counts is not None:
+            remove_mask[rowsums < min_counts] = True
+        if max_counts is not None:
+            remove_mask[rowsums > max_counts] = True
+        if max_cells is not None:
+            ranks = rankdata(-rowsums, method="min")
+            remove_mask[ranks > max_cells] = True
+    if remove_mask.any():
+        obs_idx = adata.obs_names[~remove_mask]
+        adata._inplace_subset_obs(obs_idx)

sclab 0.1.7__py3-none-any.whl → 0.3.4__py3-none-any.whl

sclab 0.1.7py3-none-any.whl → 0.3.4py3-none-any.whl