PyPI - sclab - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

sclab 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sclab might be problematic. Click here for more details.

Files changed (17) hide show

sclab/__init__.py +1 -1
sclab/preprocess/__init__.py +6 -0
sclab/preprocess/_cca.py +26 -4
sclab/preprocess/_cca_integrate.py +4 -4
sclab/preprocess/_normalize_weighted.py +5 -1
sclab/preprocess/_pca.py +51 -0
sclab/preprocess/_preprocess.py +155 -0
sclab/preprocess/_qc.py +38 -0
sclab/preprocess/_transfer_metadata.py +6 -5
sclab/tools/cellflow/pseudotime/_pseudotime.py +5 -1
sclab/tools/differential_expression/__init__.py +2 -0
sclab/tools/differential_expression/_pseudobulk_edger.py +24 -21
sclab/tools/differential_expression/_pseudobulk_limma.py +257 -0
{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/METADATA +5 -5
{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/RECORD +17 -13
{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/WHEEL +0 -0
{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/licenses/LICENSE +0 -0

sclab/__init__.py CHANGED Viewed

@@ -6,4 +6,4 @@ __all__ = [
     "SCLabDashboard",
 ]
-__version__ = "0.3.2"
+__version__ = "0.3.3"

sclab/preprocess/__init__.py CHANGED Viewed

@@ -2,6 +2,9 @@ from ._cca_integrate import cca_integrate, cca_integrate_pair
 from ._filter_obs import filter_obs
 from ._harmony_integrate import harmony_integrate
 from ._normalize_weighted import normalize_weighted
+from ._pca import pca
+from ._preprocess import preprocess
+from ._qc import qc
 from ._subset import subset_obs, subset_var
 from ._transfer_metadata import transfer_metadata
 from ._transform import pool_neighbors
@@ -12,7 +15,10 @@ __all__ = [
     "filter_obs",
     "harmony_integrate",
     "normalize_weighted",
+    "pca",
     "pool_neighbors",
+    "preprocess",
+    "qc",
     "subset_obs",
     "subset_var",
     "transfer_metadata",

sclab/preprocess/_cca.py CHANGED Viewed

@@ -1,24 +1,31 @@
 import logging
+import os
 from typing import Literal
 import numpy as np
+from joblib import Parallel, delayed
 from numpy import matrix
 from numpy.typing import NDArray
 from scipy.linalg import svd
 from scipy.sparse import csc_matrix, csr_matrix, issparse
+from scipy.sparse import vstack as sparse_vstack
 from scipy.sparse.linalg import svds
 from sklearn.utils.extmath import randomized_svd
 logger = logging.getLogger(__name__)
+N_CPUS = os.cpu_count()
 def cca(
     X: NDArray | csr_matrix | csc_matrix,
     Y: NDArray | csr_matrix | csc_matrix,
     n_components=None,
-    svd_solver: Literal["full", "partial", "randomized"] = "partial",
+    svd_solver: Literal["full", "partial", "randomized"] = "randomized",
     normalize: bool = False,
     random_state=42,
+    n_jobs: int = N_CPUS,
 ) -> tuple[NDArray, NDArray, NDArray]:
     """
     CCA-style integration for two single-cell matrices with unequal numbers of cells.
@@ -50,7 +57,7 @@ def cca(
     k = n_components or min(n1, n2)
     if issparse(X):
-        C = _cross_covariance_sparse(X, Y)
+        C = _cross_covariance_sparse(X, Y, n_jobs=n_jobs)
     else:
         C = _cross_covariance_dense(X, Y)
@@ -103,7 +110,7 @@ def _svd_decomposition(
     return Uc, s, Vct
-def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix) -> NDArray:
+def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix, n_jobs=N_CPUS) -> NDArray:
     _, p1 = X.shape
     _, p2 = Y.shape
     if p1 != p2:
@@ -118,7 +125,7 @@ def _cross_covariance_sparse(X: csr_matrix, Y: csr_matrix) -> NDArray:
     mux: matrix = X.mean(axis=0)
     muy: matrix = Y.mean(axis=0)
-    XYt: csr_matrix = X.dot(Y.T)
+    XYt: csr_matrix = _spmm_parallel(X, Y.T, n_jobs=n_jobs)
     Xmuyt: matrix = X.dot(muy.T)
     muxYt: matrix = Y.dot(mux.T).T
     muxmuyt: float = (mux @ muy.T)[0, 0]
@@ -152,3 +159,18 @@ def _dense_scale(A: NDArray) -> NDArray:
     A = np.asarray(A)
     eps = np.finfo(A.dtype).eps
     return A / (A.std(axis=0, ddof=1, keepdims=True) + eps)
+def _spmm_chunk(A_csr, X, start, stop):
+    return A_csr[start:stop, :] @ X
+def _spmm_parallel(A_csr: csr_matrix, X_csc: csc_matrix, n_jobs=N_CPUS):
+    n = A_csr.shape[0]
+    bounds = np.linspace(0, n, n_jobs + 1, dtype=int)
+    Ys = Parallel(n_jobs=n_jobs, prefer="processes")(
+        delayed(_spmm_chunk)(A_csr, X_csc, bounds[i], bounds[i + 1])
+        for i in range(n_jobs)
+    )
+    return sparse_vstack(Ys)  # result is sparse if X is sparse, dense otherwise

sclab/preprocess/_cca_integrate.py CHANGED Viewed

@@ -13,8 +13,8 @@ def cca_integrate(
     reference_batch: str | list[str] | None = None,
     mask_var: str | None = None,
     n_components: int = 30,
-    svd_solver: str = "partial",
-    normalize: bool = False,
+    svd_solver: str = "randomized",
+    normalize: bool = True,
     random_state: int | None = None,
 ):
     n_groups = adata.obs[key].nunique()
@@ -46,8 +46,8 @@ def cca_integrate_pair(
     adjusted_basis: str | None = None,
     mask_var: str | None = None,
     n_components: int = 30,
-    svd_solver: str = "partial",
-    normalize: bool = False,
+    svd_solver: str = "randomized",
+    normalize: bool = True,
     random_state: int | None = None,
 ):
     if basis is None:

sclab/preprocess/_normalize_weighted.py CHANGED Viewed

@@ -9,6 +9,7 @@ def normalize_weighted(
     adata: AnnData,
     target_scale: float | None = None,
     batch_key: str | None = None,
+    q: float = 0.99,
 ) -> None:
     if batch_key is not None:
         for _, idx in adata.obs.groupby(batch_key, observed=True).groups.items():
@@ -22,6 +23,8 @@ def normalize_weighted(
         return
+    target_scale = None
     X: csr_matrix
     Y: csr_matrix
     Z: csr_matrix
@@ -38,6 +41,7 @@ def normalize_weighted(
     Y.eliminate_zeros()
     Y.data = -Y.data * np.log(Y.data)
     entropy = Y.sum(axis=0)
+    entropy[:, entropy.A1 < np.quantile(entropy.A1, q)] *= 0.0
     Z = X.multiply(entropy)
     Z = Z.tocsr()
@@ -48,7 +52,7 @@ def normalize_weighted(
             "ignore", category=RuntimeWarning, message="divide by zero"
         )
         scale = Z.sum(axis=1)
-        Z = Z.multiply(1 / scale)
+        Z = X.multiply(1 / scale)
     Z = Z.tocsr()
     if target_scale is None:

sclab/preprocess/_pca.py ADDED Viewed

@@ -0,0 +1,51 @@
+from anndata import AnnData
+def pca(
+    adata: AnnData,
+    layer: str | None = None,
+    n_comps: int = 30,
+    mask_var: str | None = None,
+    batch_key: str | None = None,
+    reference_batch: str | None = None,
+    zero_center: bool = False,
+):
+    import scanpy as sc
+    pca_kwargs = dict(
+        n_comps=n_comps,
+        layer=layer,
+        mask_var=mask_var,
+        svd_solver="arpack",
+    )
+    if reference_batch:
+        obs_mask = adata.obs[batch_key] == reference_batch
+        adata_ref = adata[obs_mask].copy()
+        if mask_var == "highly_variable":
+            sc.pp.highly_variable_genes(
+                adata_ref, layer=f"{layer if layer else 'X'}_log1p", flavor="seurat"
+            )
+            hvg_seurat = adata_ref.var["highly_variable"]
+            sc.pp.highly_variable_genes(
+                adata_ref,
+                layer=layer,
+                flavor="seurat_v3_paper",
+                n_top_genes=hvg_seurat.sum(),
+            )
+            hvg_seurat_v3 = adata_ref.var["highly_variable"]
+            adata_ref.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
+        sc.pp.pca(adata_ref, **pca_kwargs)
+        uns_pca = adata_ref.uns["pca"]
+        uns_pca["reference_batch"] = reference_batch
+        PCs = adata_ref.varm["PCs"]
+        adata.obsm["X_pca"] = adata.X.dot(PCs)
+        adata.uns["pca"] = uns_pca
+        adata.varm["PCs"] = PCs
+    else:
+        sc.pp.pca(adata, **pca_kwargs)
+        adata.obsm["X_pca"] = adata.X.dot(adata.varm["PCs"])
+    if zero_center:
+        adata.obsm["X_pca"] -= adata.obsm["X_pca"].mean(axis=0, keepdims=True)

sclab/preprocess/_preprocess.py ADDED Viewed

@@ -0,0 +1,155 @@
+import warnings
+from typing import Literal
+import numpy as np
+from anndata import AnnData, ImplicitModificationWarning
+from tqdm.auto import tqdm
+def preprocess(
+    adata: AnnData,
+    counts_layer: str = "counts",
+    group_by: str | None = None,
+    min_cells: int = 5,
+    min_genes: int = 5,
+    compute_hvg: bool = True,
+    regress_total_counts: bool = False,
+    regress_n_genes: bool = False,
+    normalization_method: Literal["library", "weighted", "none"] = "library",
+    target_scale: float = 1e4,
+    weighted_norm_quantile: float = 0.9,
+    log1p: bool = True,
+    scale: bool = True,
+):
+    import scanpy as sc
+    from ._normalize_weighted import normalize_weighted
+    with tqdm(total=100, bar_format="{percentage:3.0f}%|{bar}|") as pbar:
+        if counts_layer not in adata.layers:
+            adata.layers[counts_layer] = adata.X.copy()
+        if f"{counts_layer}_log1p" not in adata.layers:
+            adata.layers[f"{counts_layer}_log1p"] = sc.pp.log1p(
+                adata.layers[counts_layer].copy()
+            )
+        pbar.update(10)
+        adata.X = adata.layers[counts_layer].copy()
+        sc.pp.calculate_qc_metrics(
+            adata,
+            percent_top=None,
+            log1p=False,
+            inplace=True,
+        )
+        sc.pp.filter_cells(adata, min_genes=min_genes)
+        sc.pp.filter_genes(adata, min_cells=min_cells)
+        pbar.update(10)
+        sc.pp.calculate_qc_metrics(
+            adata,
+            percent_top=None,
+            log1p=False,
+            inplace=True,
+        )
+        pbar.update(10)
+        if compute_hvg:
+            if group_by is not None:
+                adata.var["highly_variable"] = False
+                for name, idx in adata.obs.groupby(
+                    group_by, observed=True
+                ).groups.items():
+                    hvg_seurat = sc.pp.highly_variable_genes(
+                        adata[idx],
+                        layer=f"{counts_layer}_log1p",
+                        flavor="seurat",
+                        inplace=False,
+                    )["highly_variable"]
+                    hvg_seurat_v3 = sc.pp.highly_variable_genes(
+                        adata[idx],
+                        layer=counts_layer,
+                        flavor="seurat_v3_paper",
+                        n_top_genes=hvg_seurat.sum(),
+                        inplace=False,
+                    )["highly_variable"]
+                    adata.var[f"highly_variable_{name}"] = hvg_seurat | hvg_seurat_v3
+                    adata.var["highly_variable"] |= adata.var[f"highly_variable_{name}"]
+            else:
+                sc.pp.highly_variable_genes(
+                    adata, layer=f"{counts_layer}_log1p", flavor="seurat"
+                )
+                hvg_seurat = adata.var["highly_variable"]
+                sc.pp.highly_variable_genes(
+                    adata,
+                    layer=counts_layer,
+                    flavor="seurat_v3_paper",
+                    n_top_genes=hvg_seurat.sum(),
+                )
+                hvg_seurat_v3 = adata.var["highly_variable"]
+                adata.var["highly_variable"] = hvg_seurat | hvg_seurat_v3
+        pbar.update(10)
+        pbar.update(10)
+        new_layer = counts_layer
+        if normalization_method == "library":
+            new_layer += "_normt"
+            sc.pp.normalize_total(adata, target_sum=target_scale)
+        elif normalization_method == "weighted":
+            new_layer += "_normw"
+            normalize_weighted(
+                adata,
+                target_scale=target_scale,
+                batch_key=group_by,
+                q=weighted_norm_quantile,
+            )
+        pbar.update(10)
+        pbar.update(10)
+        if log1p:
+            new_layer += "_log1p"
+            adata.uns.pop("log1p", None)
+            sc.pp.log1p(adata)
+        pbar.update(10)
+        vars_to_regress = []
+        if regress_n_genes:
+            vars_to_regress.append("n_genes_by_counts")
+        if regress_total_counts and log1p:
+            adata.obs["log1p_total_counts"] = np.log1p(adata.obs["total_counts"])
+            vars_to_regress.append("log1p_total_counts")
+        elif regress_total_counts:
+            vars_to_regress.append("total_counts")
+        if vars_to_regress:
+            new_layer += "_regr"
+            sc.pp.regress_out(adata, keys=vars_to_regress, n_jobs=1)
+        pbar.update(10)
+        if scale:
+            new_layer += "_scale"
+            if group_by is not None:
+                for _, idx in adata.obs.groupby(group_by, observed=True).groups.items():
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore",
+                            category=ImplicitModificationWarning,
+                            message="Modifying `X` on a view results in data being overridden",
+                        )
+                        adata[idx].X = sc.pp.scale(adata[idx].X, zero_center=False)
+            else:
+                sc.pp.scale(adata, zero_center=False)
+        adata.layers[new_layer] = adata.X.copy()
+        pbar.update(10)
+        adata.X = adata.X.astype(np.float32)

sclab/preprocess/_qc.py ADDED Viewed

@@ -0,0 +1,38 @@
+import numpy as np
+from anndata import AnnData
+def qc(
+    adata: AnnData,
+    counts_layer: str = "counts",
+    min_counts: int = 50,
+    min_genes: int = 5,
+    min_cells: int = 5,
+    max_rank: int = 0,
+):
+    import scanpy as sc
+    if counts_layer not in adata.layers:
+        adata.layers[counts_layer] = adata.X.copy()
+    adata.layers["qc_tmp_current_X"] = adata.X
+    adata.X = adata.layers[counts_layer].copy()
+    rowsums = np.asarray(adata.X.sum(axis=1)).squeeze()
+    obs_idx = adata.obs_names[rowsums >= min_counts]
+    adata._inplace_subset_obs(obs_idx)
+    sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
+    sc.pp.filter_cells(adata, min_genes=min_genes)
+    sc.pp.filter_genes(adata, min_cells=min_cells)
+    sc.pp.calculate_qc_metrics(adata, percent_top=None, log1p=False, inplace=True)
+    adata.obs["barcode_rank"] = adata.obs["total_counts"].rank(ascending=False)
+    # Restore original X
+    adata.X = adata.layers.pop("qc_tmp_current_X")
+    if max_rank > 0:
+        series = adata.obs["barcode_rank"]
+        index = series.loc[series < max_rank].index
+        adata._inplace_subset_obs(index)

sclab/preprocess/_transfer_metadata.py CHANGED Viewed

@@ -23,18 +23,19 @@ def transfer_metadata(
     min_neighs: int = 5,
     weight_by: Literal["connectivity", "distance", "constant"] = "connectivity",
 ):
-    D: csr_matrix = adata.obsp["distances"]
-    C: csr_matrix = adata.obsp["connectivities"]
+    D: csr_matrix = adata.obsp["distances"].copy()
+    C: csr_matrix = adata.obsp["connectivities"].copy()
     D = D.tocsr()
+    W: csr_matrix
     match weight_by:
         case "connectivity":
-            W = C.tocsr()
+            W = C.tocsr().copy()
         case "distance":
-            W = D.tocsr()
+            W = D.tocsr().copy()
             W.data = 1.0 / W.data
         case "constant":
-            W = D.tocsr()
+            W = D.tocsr().copy()
             W.data[:] = 1.0
         case _:
             raise ValueError(f"Unsupported weight_by {weight_by}")

sclab/tools/cellflow/pseudotime/_pseudotime.py CHANGED Viewed

@@ -280,6 +280,7 @@ def estimate_periodic_pseudotime_start(
     time_key: str = "pseudotime",
     bandwidth: float = 1 / 64,
     show_plot: bool = False,
+    nth_root: int = 1,
 ):
     # TODO: Test implementation
     pseudotime = adata.obs[time_key].values.copy()
@@ -316,7 +317,10 @@ def estimate_periodic_pseudotime_start(
     roots = (x[idx] + x[1:][idx]) / 2
     heights = yp[idx]
-    max_peak_x = roots[heights.argmin()]
+    roots = roots[heights.argsort()]
+    heights = heights[heights.argsort()]
+    max_peak_x = roots[nth_root - 1]
     if show_plot:
         plt.hist(

sclab/tools/differential_expression/__init__.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from ._pseudobulk_edger import pseudobulk_edger
+from ._pseudobulk_limma import pseudobulk_limma
 __all__ = [
     "pseudobulk_edger",
+    "pseudobulk_limma",
 ]

sclab/tools/differential_expression/_pseudobulk_edger.py CHANGED Viewed

@@ -12,9 +12,9 @@ def pseudobulk_edger(
     cell_identity_key: str | None = None,
     batch_key: str | None = None,
     layer: str | None = None,
-    replicas_per_group: int = 10,
+    replicas_per_group: int = 5,
     min_cells_per_group: int = 30,
-    bootstrap_sampling: bool = True,
+    bootstrap_sampling: bool = False,
     use_cells: dict[str, list[str]] | None = None,
     aggregate: bool = True,
     verbosity: int = 0,
@@ -134,7 +134,7 @@ def pseudobulk_edger(
         try:
             R(f"""
-                outs <- fit_model(aggr_adata, "{gk}", "{cell_identity_key}", "{batch_key}", verbosity = {verbosity})
+                outs <- fit_edger_model(aggr_adata, "{gk}", "{cell_identity_key}", "{batch_key}", verbosity = {verbosity})
                 fit <- outs$fit
                 y <- outs$y
             """)
@@ -214,33 +214,20 @@ suppressPackageStartupMessages({
     library(MAST)
 })
-fit_model <- function(adata_, group_key, cell_identity_key = "None", batch_key = "None", verbosity = 0){
+fit_edger_model <- function(adata_, group_key, cell_identity_key = "None", batch_key = "None", verbosity = 0){
     if (verbosity > 0){
         cat("Group key:", group_key, "\n")
         cat("Cell identity key:", cell_identity_key, "\n")
     }
-    # create an edgeR object with counts and grouping factor
-    y <- DGEList(assay(adata_, "X"), group = colData(adata_)[[group_key]])
-    # filter out genes with low counts
-    if (verbosity > 1){
-        cat("Dimensions before subsetting:", dim(y), "\n")
-    }
-    keep <- filterByExpr(y)
-    y <- y[keep, , keep.lib.sizes=FALSE]
-    if (verbosity > 1){
-        cat("Dimensions after subsetting:", dim(y), "\n")
-    }
-    # normalize
-    y <- calcNormFactors(y)
     # create a vector that is concatentation of condition and cell type that we will later use with contrasts
     if (cell_identity_key == "None"){
         group <- colData(adata_)[[group_key]]
     } else {
         group <- paste0(colData(adata_)[[group_key]], "_", colData(adata_)[[cell_identity_key]])
     }
     if (verbosity > 1){
         cat("Group(s):", group, "\n")
     }
@@ -255,10 +242,28 @@ fit_model <- function(adata_, group_key, cell_identity_key = "None", batch_key =
         design <- model.matrix(~ 0 + group + replica + batch)
     }
+    # create an edgeR object with counts and grouping factor
+    y <- DGEList(assay(adata_, "X"), group = colData(adata_)[[group_key]])
+    # filter out genes with low counts
+    if (verbosity > 1){
+        cat("Dimensions before subsetting:", dim(y), "\n")
+    }
+    keep <- filterByExpr(y)
+    y <- y[keep, , keep.lib.sizes=FALSE]
+    if (verbosity > 1){
+        cat("Dimensions after subsetting:", dim(y), "\n")
+    }
+    # normalize
+    y <- calcNormFactors(y)
     # estimate dispersion
     y <- estimateDisp(y, design = design)
     # fit the model
     fit <- glmQLFit(y, design)
     return(list("fit"=fit, "design"=design, "y"=y))
 }
 """
@@ -282,9 +287,7 @@ def _try_imports():
     except ModuleNotFoundError:
         message = (
             "edger_pseudobulk requires rpy2 and anndata2ri to be installed.\n"
-            "or\n"
-            "$ pip install rpy2 sclab-tools[r]\n"
-            "or\n"
+            "please install with one of the following:\n"
             "$ pip install rpy2 anndata2ri\n"
             "or\n"
             "$ conda install -c conda-forge rpy2 anndata2ri\n"

sclab/tools/differential_expression/_pseudobulk_limma.py ADDED Viewed

@@ -0,0 +1,257 @@
+import pandas as pd
+from anndata import AnnData
+from ._pseudobulk_helpers import aggregate_and_filter
+def pseudobulk_limma(
+    adata_: AnnData,
+    group_key: str,
+    condition_group: str | list[str] | None = None,
+    reference_group: str | None = None,
+    cell_identity_key: str | None = None,
+    batch_key: str | None = None,
+    layer: str | None = None,
+    replicas_per_group: int = 5,
+    min_cells_per_group: int = 30,
+    bootstrap_sampling: bool = False,
+    use_cells: dict[str, list[str]] | None = None,
+    aggregate: bool = True,
+    verbosity: int = 0,
+) -> dict[str, pd.DataFrame]:
+    _try_imports()
+    import anndata2ri  # noqa: F401
+    import rpy2.robjects as robjects
+    from rpy2.rinterface_lib.embedded import RRuntimeError  # noqa: F401
+    from rpy2.robjects import pandas2ri  # noqa: F401
+    from rpy2.robjects.conversion import localconverter  # noqa: F401
+    R = robjects.r
+    if aggregate:
+        aggr_adata = aggregate_and_filter(
+            adata_,
+            group_key,
+            cell_identity_key,
+            layer,
+            replicas_per_group,
+            min_cells_per_group,
+            bootstrap_sampling,
+            use_cells,
+        )
+    else:
+        aggr_adata = adata_.copy()
+    with localconverter(anndata2ri.converter):
+        R.assign("aggr_adata", aggr_adata)
+    # defines the R function for fitting the model with limma
+    R(_fit_model_r_script)
+    if condition_group is None:
+        condition_group_list = aggr_adata.obs[group_key].unique()
+    elif isinstance(condition_group, str):
+        condition_group_list = [condition_group]
+    else:
+        condition_group_list = condition_group
+    if cell_identity_key is not None:
+        cids = aggr_adata.obs[cell_identity_key].unique()
+    else:
+        cids = [""]
+    tt_dict = {}
+    for condition_group in condition_group_list:
+        if reference_group is not None and condition_group == reference_group:
+            continue
+        if verbosity > 0:
+            print(f"Fitting model for {condition_group}...")
+        if reference_group is not None:
+            gk = group_key
+        else:
+            gk = f"{group_key}_{condition_group}"
+        try:
+            R(f"""
+                outs <- fit_limma_model(aggr_adata, "{gk}", "{cell_identity_key}", verbosity = {verbosity})
+                fit <- outs$fit
+                v <- outs$v
+            """)
+        except RRuntimeError as e:
+            print("Error fitting model for", condition_group)
+            print("Error:", e)
+            print("Skipping...", flush=True)
+            continue
+        if reference_group is None:
+            new_contrasts_tuples = [
+                (
+                    condition_group,  # common prefix
+                    "",  # condition group
+                    "not",  # reference group
+                    cid,  # cell identity
+                )
+                for cid in cids
+            ]
+        else:
+            new_contrasts_tuples = [
+                (
+                    "",  # common prefix
+                    condition_group,  # condition group
+                    reference_group,  # reference group
+                    cid,  # cell identity
+                )
+                for cid in cids
+            ]
+        new_contrasts = [
+            f"group{cnd}{prefix}_{cid}".strip("_")
+            + "-"
+            + f"group{ref}{prefix}_{cid}".strip("_")
+            for prefix, cnd, ref, cid in new_contrasts_tuples
+        ]
+        for contrast, contrast_tuple in zip(new_contrasts, new_contrasts_tuples):
+            prefix, cnd, ref, cid = contrast_tuple
+            if ref == "not":
+                cnd, ref = "", "rest"
+            contrast_key = f"{prefix}{cnd}_vs_{ref}"
+            if cid:
+                contrast_key = f"{cell_identity_key}:{cid}|{contrast_key}"
+            if verbosity > 0:
+                print(f"Computing contrast: {contrast_key}... ({contrast})")
+            R(f"myContrast <- makeContrasts('{contrast}', levels = v$design)")
+            R("fit2 <- contrasts.fit(fit, myContrast)")
+            R("fit2 <- eBayes(fit2)")
+            R("tt <- topTable(fit2, n = Inf)")
+            tt: pd.DataFrame = pandas2ri.rpy2py(R("tt"))
+            tt.index.name = "gene_ids"
+            genes = tt.index
+            cnd, ref = [c[5:] for c in contrast.split("-")]
+            tt["pct_expr_cnd"] = aggr_adata.var[f"pct_expr_{cnd}"].loc[genes]
+            tt["pct_expr_ref"] = aggr_adata.var[f"pct_expr_{ref}"].loc[genes]
+            tt["num_expr_cnd"] = aggr_adata.var[f"num_expr_{cnd}"].loc[genes]
+            tt["num_expr_ref"] = aggr_adata.var[f"num_expr_{ref}"].loc[genes]
+            tt["tot_expr_cnd"] = aggr_adata.var[f"tot_expr_{cnd}"].loc[genes]
+            tt["tot_expr_ref"] = aggr_adata.var[f"tot_expr_{ref}"].loc[genes]
+            tt["mean_cnd"] = tt["tot_expr_cnd"] / tt["num_expr_cnd"]
+            tt["mean_ref"] = tt["tot_expr_ref"] / tt["num_expr_ref"]
+            tt_dict[contrast_key] = tt
+    return tt_dict
+_fit_model_r_script = """
+suppressPackageStartupMessages({
+    library(edgeR)
+    library(limma)
+    library(MAST)
+})
+fit_limma_model <- function(adata_, group_key, cell_identity_key = "None", batch_key = "None", verbosity = 0){
+    if (verbosity > 0){
+        cat("Group key:", group_key, "\n")
+        cat("Cell identity key:", cell_identity_key, "\n")
+    }
+    # create a vector that is concatentation of condition and cell type that we will later use with contrasts
+    if (cell_identity_key == "None"){
+        group <- colData(adata_)[[group_key]]
+    } else {
+        group <- paste0(colData(adata_)[[group_key]], "_", colData(adata_)[[cell_identity_key]])
+    }
+    if (verbosity > 1){
+        cat("Group(s):", group, "\n")
+    }
+    group   <- factor(group)
+    replica <- factor(colData(adata_)$replica)
+    # create a design matrix
+    if (batch_key == "None"){
+        design <- model.matrix(~ 0 + group + replica)
+    } else {
+        batch  <- factor(colData(adata_)[[batch_key]])
+        design <- model.matrix(~ 0 + group + replica + batch)
+    }
+    colnames(design) <- make.names(colnames(design))
+    # create an edgeR object with counts and grouping factor
+    y <- DGEList(assay(adata_, "X"), group = group)
+    # filter out genes with low counts
+    if (verbosity > 1){
+        cat("Dimensions before subsetting:", dim(y), "\n")
+    }
+    keep <- filterByExpr(y, design = design)
+    y <- y[keep, , keep.lib.sizes=FALSE]
+    if (verbosity > 1){
+        cat("Dimensions after subsetting:", dim(y), "\n")
+    }
+    # normalize
+    y <- calcNormFactors(y)
+    # Apply voom transformation to prepare for linear modeling
+    v <- voom(y, design, plot = verbosity > 1)
+    # Fit the linear model
+    fit <- lmFit(v, design)
+    ne <- limma::nonEstimable(design)
+    if (!is.null(ne) && verbosity > 0) cat("Non-estimable:", ne, "\n")
+    fit <- eBayes(fit)
+    return(list("fit"=fit, "design"=design, "v"=v))
+}
+"""
+def _try_imports():
+    try:
+        import rpy2.robjects as robjects
+        from rpy2.robjects.packages import PackageNotInstalledError, importr
+        robjects.r("options(warn=-1)")
+        import anndata2ri  # noqa: F401
+        from rpy2.rinterface_lib.embedded import RRuntimeError  # noqa: F401
+        from rpy2.robjects import numpy2ri, pandas2ri  # noqa: F401
+        from rpy2.robjects.conversion import localconverter  # noqa: F401
+        importr("edgeR")
+        importr("limma")
+        importr("MAST")
+        importr("SingleCellExperiment")
+    except ModuleNotFoundError:
+        message = (
+            "pseudobulk_limma requires rpy2 and anndata2ri to be installed.\n"
+            "please install with one of the following:\n"
+            "$ pip install rpy2 anndata2ri\n"
+            "or\n"
+            "$ conda install -c conda-forge rpy2 anndata2ri\n"
+        )
+        print(message)
+        raise ModuleNotFoundError(message)
+    except PackageNotInstalledError:
+        message = (
+            "pseudobulk_limma requires the following R packages to be installed: limma, edgeR, MAST, and SingleCellExperiment.\n"
+            "> \n"
+            "> if (!require('BiocManager', quietly = TRUE)) install.packages('BiocManager');\n"
+            "> BiocManager::install(c('limma', 'edgeR', 'MAST', 'SingleCellExperiment'));\n"
+            "> \n"
+        )
+        print(message)
+        raise ImportError(message)

{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sclab
-Version: 0.3.2
+Version: 0.3.3
 Summary: sclab
 Author-email: Argenis Arriojas <ArriojasMaldonado001@umb.edu>
 Requires-Python: >=3.10,<3.13
@@ -65,7 +65,6 @@ Open a Jupyter Notebook and run the following:
 ```python
 from IPython.display import display
 from sclab import SCLabDashboard
-from sclab.examples.processor_steps import QC, Preprocess, PCA, Neighbors, UMAP, Cluster
 import scanpy as sc
 # Load your data
@@ -73,8 +72,6 @@ adata = sc.read_10x_h5("your_data.h5")
 # Create dashboard
 dashboard = SCLabDashboard(adata, name="My Analysis")
-# Add desired processing steps to the interface
-dashboard.pr.add_steps({"Processing": [QC, Preprocess, PCA, Neighbors, UMAP, Cluster]})
 # Display dashboard
 display(dashboard)
@@ -84,8 +81,10 @@ display(dashboard)
 # dashboard.pl  # Plotter
 # dashboard.pr  # Processor
-# the resulting AnnData object is found within the dataset object:
+# the active AnnData object is found within the dataset object:
 # dashboard.ds.adata
+# by default, the dashboard will update the loaded AnnData object in-place
 ```
 ## Components
@@ -94,6 +93,7 @@ display(dashboard)
 The main interface that integrates all components with a tabbed layout:
 - Main graph for visualizations
+- Results panel
 - Observations table
 - Genes table
 - Event logs

{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-sclab/__init__.py,sha256=3ni3MpADkty43xRMRFsxvB_jIrmOjxyMKFGhHsYx8Ho,132
+sclab/__init__.py,sha256=HL5i9EwSQftjS4e4qKLVbTHNbKXTGgFhLoY0r1pf938,132
 sclab/_io.py,sha256=5ISxIPbE233UiOt3QEs9fkLO8DLLEe5HrMnZoR-KLYE,2662
 sclab/_methods_registry.py,sha256=RcffyRuuLzHqsnAdbBL4W1GmZx80d9AxdGjUnx1mbNg,1704
 sclab/_sclab.py,sha256=m9y2EgDxFO5JHZAZIK1098bHdrZxaeWfBZNyGQkFCdA,9143
@@ -36,15 +36,18 @@ sclab/gui/components/__init__.py,sha256=X0-cGJmII76qpWHEBe49miS2gPw3esMAwp61z23w
 sclab/gui/components/_guided_pseudotime.py,sha256=sxI0jmZxD4fxV9CsTJONhINWzIL--YF3nDg0nku2yp8,17670
 sclab/gui/components/_transfer_metadata.py,sha256=o9t9bQ7tr3G2-vFptUu1IiMxGhvMq-QJb_lF7AsDhQQ,6236
 sclab/methods/__init__.py,sha256=d_n5SCyzwMEBZttXwnhgkx2FnD7AxhAK9yBre6Rynfk,1215
-sclab/preprocess/__init__.py,sha256=zGt-TIDRx3qoKDpFxqnZ9yf2AfP4HBy8ZZSFLx-rGj0,547
-sclab/preprocess/_cca.py,sha256=etDk1s3rKX-r0nkRuUaLTJ7NyalauCpWXyKRq1_EwTc,4400
-sclab/preprocess/_cca_integrate.py,sha256=14Tu6TyH7wfZYAM2EsII1R92PTxfhaYbGx4bLWlMLP0,2706
+sclab/preprocess/__init__.py,sha256=NrOFnk9olVkwC9mR5orduyY7rMxAgi9Bgywo9-_Sfkk,664
+sclab/preprocess/_cca.py,sha256=77J_v5IJnHsLJnnhYmKtq2e_mJwQlsdGgB6lSIftf4Q,5080
+sclab/preprocess/_cca_integrate.py,sha256=eIvEdUon7OkNY-kbEqRJeuIaj_m6wk72PcRFow1kH9g,2710
 sclab/preprocess/_filter_obs.py,sha256=uYlcljuaq85G44Si8oxrNRcCCX2nFRdT3RN3ArqnwaY,1166
 sclab/preprocess/_harmony.py,sha256=wpFQXpr13BvljT04I_Rw5JdBvhzvAuinkrRs152CfvQ,13747
 sclab/preprocess/_harmony_integrate.py,sha256=cKN_MyYq9FtwgQZyhgxiFNTZl36YuKLQdEoc7ky-ea4,1737
-sclab/preprocess/_normalize_weighted.py,sha256=h86rQakNoXoRKOepAEoBbBtHV_F-VMG2-uW_LcaSdWs,1587
+sclab/preprocess/_normalize_weighted.py,sha256=Y_Tk3Dvv-Dd4s6D7JkAsuPLCCucrgmnAZBSrDssNFMU,1696
+sclab/preprocess/_pca.py,sha256=fFR03B1_V7CLcZXSn5Ek9HqG_zSLwVK7DUf8XgPZIoQ,1606
+sclab/preprocess/_preprocess.py,sha256=-Ve6HLbyzuqxwoUW1rd1JGb_ZWRwo4KV6ri7l-hvVjY,5200
+sclab/preprocess/_qc.py,sha256=CblkoK0CB2bkjgLuGAxcyYb89ZjhhS5NEhO4RgIQI4I,1159
 sclab/preprocess/_subset.py,sha256=8Vc5jty8WzIf8NZ1mleqNJLAp5CRWvEXGVevlT6ekNk,7066
-sclab/preprocess/_transfer_metadata.py,sha256=HA11JHHpq4ueFTeXlU4K3kHDDUzcUjyvfxpzdPBRNTo,4307
+sclab/preprocess/_transfer_metadata.py,sha256=loNsBG2HnQCE2-miu7cVPD6AHfj7yAMgxKWWndoDfA8,4360
 sclab/preprocess/_transform.py,sha256=n2xHJR3T-rRxZneCFH2oMw9RaQcGGBfHpOu1-YP1c1E,2312
 sclab/preprocess/_utils.py,sha256=dLeS_fIvQGZZfesEtbJKtnPmqjqy3fmyTC4GewRD3Fc,3078
 sclab/scanpy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -60,23 +63,24 @@ sclab/tools/cellflow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 sclab/tools/cellflow/density_dynamics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sclab/tools/cellflow/density_dynamics/_density_dynamics.py,sha256=xzmeIAHLV5xIVERpWMClZViDpJcge_dPsx6GWI0j0R8,11038
 sclab/tools/cellflow/pseudotime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-sclab/tools/cellflow/pseudotime/_pseudotime.py,sha256=RiwbOduu0InNdh2Rp8DUXG6-r5wL_vVUGTyTkwnYhTY,9842
+sclab/tools/cellflow/pseudotime/_pseudotime.py,sha256=Wud0ooPtNmManLsNiUgMHtx5TzIHs6vIXsWTu2M1hE0,9940
 sclab/tools/cellflow/pseudotime/timeseries.py,sha256=ZuMAm9LOKksJy2FzsQg3rdYKtLm1G0rbgO6dOdQuIV0,6326
 sclab/tools/cellflow/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 sclab/tools/cellflow/utils/density_nd.py,sha256=wwYoXOcF2CRxOArW_CQxJjWDE90wiui4NO7EIBG2RGM,6648
 sclab/tools/cellflow/utils/interpolate.py,sha256=HnpYEBdc4KSPC4QYOglJ2MpipLx3a5ENJQ8uhMnuwRc,9755
 sclab/tools/cellflow/utils/smoothen.py,sha256=yg2_zBrYKGRmXZY8C3pKmX3xGm0GGMI365IKqhgCmP0,3738
 sclab/tools/cellflow/utils/times.py,sha256=lV5ZRjCdBaYELGJ1pGdEBeA0w-WD77lOzPC6R7_kUxo,1811
-sclab/tools/differential_expression/__init__.py,sha256=KKaDzeGGgE1LxnC5aBcPQYcVX_e2h8qAbfMPQVvYlSA,87
-sclab/tools/differential_expression/_pseudobulk_edger.py,sha256=WWqKEA8I1I_YsgjafeznX3tyZoXyu0HV13QkxiYGFgQ,10538
+sclab/tools/differential_expression/__init__.py,sha256=xdmdaCYdJDxiY5g4o8mQHCk4Z6gUZv92gvLiF81nA-M,159
+sclab/tools/differential_expression/_pseudobulk_edger.py,sha256=W5rKnFuLCKTRiuUe3ugRaKfXUvmMS0uAFrYD2bNAq9w,10525
 sclab/tools/differential_expression/_pseudobulk_helpers.py,sha256=raQ0DBBrmrxBbGTKhOyZpZLmeJRX_tWcn3_mzuQctkw,8424
+sclab/tools/differential_expression/_pseudobulk_limma.py,sha256=Hf864a424CGvPBmogjcwEA-7eJKLeVFU44JFWKX51cY,8416
 sclab/tools/doublet_detection/__init__.py,sha256=zWyAPScrHVRaBqWaizVsm2H3oi6yr0OQ5gF-fGY2ZrA,63
 sclab/tools/doublet_detection/_scrublet.py,sha256=koi6MRUS1lWVvdpeNbzpR8njqVFrWEuWoKNMFXQLFec,1953
 sclab/tools/labeling/__init__.py,sha256=o-FJWonGNr2h_pB0o3YfnGl_y1kKU06_rYLmTt8ktlQ,57
 sclab/tools/labeling/sctype.py,sha256=jCsCFnqUgb_s1nTSK-N_5pEL_ZvZw-zUo12fUy9RLfs,8164
 sclab/utils/__init__.py,sha256=Py3dPN9ptMs6D-f7IGYisoxOS2YuX0O1oyw75nci3Os,72
 sclab/utils/_write_excel.py,sha256=DBZg9Kx7Ex6VqFrZFDZbSgvzMtu84iEwKo4nI3I2AT0,17017
-sclab-0.3.2.dist-info/licenses/LICENSE,sha256=LO7qldZoHIo9hc-HMBqclBh5800kZ9US9xTbLAQdHpg,1523
-sclab-0.3.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
-sclab-0.3.2.dist-info/METADATA,sha256=YOX9WvDuWL1ew3JOIaAwY6MvpCHCEHxOdTc6dTCp9jM,4437
-sclab-0.3.2.dist-info/RECORD,,
+sclab-0.3.3.dist-info/licenses/LICENSE,sha256=LO7qldZoHIo9hc-HMBqclBh5800kZ9US9xTbLAQdHpg,1523
+sclab-0.3.3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+sclab-0.3.3.dist-info/METADATA,sha256=E1rrLRdCRHpdTXbHDas0mhtJwO50BsfDgGJCcISo8Q0,4301
+sclab-0.3.3.dist-info/RECORD,,

{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{sclab-0.3.2.dist-info → sclab-0.3.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sclab 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

sclab 0.3.2py3-none-any.whl → 0.3.3py3-none-any.whl