PyPI - biopipen - Versions diffs - 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +307 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +14 -2
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/scrna/SeuratMap2Ref.R CHANGED Viewed

@@ -1,19 +1,12 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-library(parallel)
 library(Seurat)
-library(SeuratDisk)
 library(rlang)
-library(dplyr)
-library(tidyr)
-library(ggplot2)
-library(ggprism)
+library(biopipen.utils)
 set.seed(8525)
-theme_set(theme_prism())
 sobjfile = {{in.sobjfile | r}}
 outfile = {{out.outfile | r}}
+joboutdir = {{job.outdir | r}}
 use = {{envs.use | r}}
 ident = {{envs.ident | r}}
 ref = {{envs.ref | r}}
@@ -25,8 +18,16 @@ skip_if_normalized = {{envs.skip_if_normalized | r}}
 sctransform_args = {{envs.SCTransform | r: todot="-"}}
 normalizedata_args = {{envs.NormalizeData | r: todot="-"}}
 findtransferanchors_args = {{envs.FindTransferAnchors | r: todot="-"}}
-mappingscore_args = {{envs.MappingScore | r: todot="-"}}
 mapquery_args = {{envs.MapQuery | r: todot="-"}}
+cache = {{envs.cache | r}}
+plots = {{envs.plots | r}}
+log <- get_logger()
+reporter <- get_reporter()
+options(future.globals.maxSize = 8 * 1024 ^ 4)
+options(future.rng.onMisuse="ignore")
+options(Seurat.object.assay.version = "v5")
 # See if we have a reference
 if (is.null(ref)) {
@@ -37,376 +38,74 @@ if (is.null(use)) {
     stop("No use provided (envs.use), don't know which column to transfer as cluster")
 }
-if (is.null(mapquery_args$refdata) || length(mapquery_args$refdata) == 0) {
-    mapquery_args$refdata = list()
-}
-mapquery_args$refdata[[use]] = use
 outdir = dirname(outfile)
+if (isTRUE(cache)) {
+    cache = joboutdir
+}
 if (is.null(split_by)) {
     options(future.globals.maxSize = 8 * 1024 ^ 4)
     future::plan(strategy = "multicore", workers = ncores)
 }
-.is_sct <- function(x) {
-    return(Seurat:::IsSCT(assay = x@assays[[DefaultAssay(x)]]))
-}
-.expand_dims = function(args, name = "dims") {
-    # Expand dims from 30 to 1:30
-    if (is.numeric(args[[name]]) && length(args[[name]] == 1)) {
-        args[[name]] = 1:args[[name]]
-    }
-    args
-}
-findtransferanchors_args = .expand_dims(findtransferanchors_args)
-# Load reference
-log_info("- Loading reference")
-if (endsWith(ref, ".rds") || endsWith(ref, ".RDS")) {
-    reference = readRDS(ref)
-} else if (endsWith(ref, ".h5ad") || endsWith(ref, ".H5AD")) {
-    reference = ReadH5AD(ref)
+log$info("Loading reference ...")
+if (endsWith(ref, ".rds") || endsWith(ref, ".RDS") || endsWith(ref, ".qs") || endsWith(ref, ".qs2")) {
+    reference <- read_obj(ref)
+} else if (endsWith(ref, ".h5seurat") || endsWith(ref, ".H5Seurat")) {
+    reference <- SeuratDisk::LoadH5Seurat(ref)
 } else {
-    reference = LoadH5Seurat(ref)
-}
-reference = UpdateSeuratObject(reference)
-reference = UpdateSCTAssays(reference)
-# check if refdata exists in the reference
-for (rname in names(mapquery_args$refdata)) {
-    use_name <- mapquery_args$refdata[[rname]]
-    # transferring an assay
-    if (use_name %in% names(reference)) { next }
-    # transferring a metadata column
-    if (!use_name %in% colnames(reference@meta.data)) {
-        stop(paste0(
-            "The reference does not have the column '",
-            use_name,
-            "' in either assays or metadata. "
-        ))
-        if (startsWith(use_name, "predicted.")) {
-            stop(paste0(
-                "Do you mean: ", substring(use_name, 11),
-            ))
-        }
-    }
-}
-if (refnorm == "auto") {
-    refnorm = ifelse (.is_sct(reference), "SCTransform", "NormalizeData")
-}
-if (refnorm == "SCTransform") {
-    # Check if the reference is SCTransform'ed
-    if (!.is_sct(reference)) {
-        stop("Reference is not SCTransform'ed")
-    }
-    n_models = length(x = slot(object = reference[[DefaultAssay(reference)]], name = "SCTModel.list"))
-    if (n_models == 0) {
-        stop("Reference doesn't contain SCTModel.")
-    }
-}
-log_info("  Normalization method used: {refnorm}")
-if (refnorm == "SCTransform") {
-    findtransferanchors_args$normalization.method = "SCT"
-} else if (refnorm == "NormalizeData") {
-    findtransferanchors_args$normalization.method = "LogNormalize"
-} else {
-    stop(paste0("Unknown normalization method: ", refnorm))
-}
-# Load Seurat object
-log_info("- Loading Seurat object")
-sobj = readRDS(sobjfile)
-defassay <- DefaultAssay(sobj)
-if (!is.null(mutaters) && length(mutaters) > 0) {
-    log_info("- Applying mutaters")
-    sobj@meta.data <- sobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
-}
-if (!is.null(split_by)) {
-    # check if each split has more than 100 cells
-    cellno = table(sobj@meta.data[[split_by]])
-    cellno = cellno[cellno < 100]
-    if (length(cellno) > 0) {
-        # stop and print the splits with # cells
-        stop(paste0(
-            "The following splits have less than 100 cells: \n",
-            paste0("- ", names(cellno), ": ", cellno, collapse = "\n"),
-            "\n\n",
-            "You can use `envs.mutaters` to merge these splits and use `newsplit` as `envs.split_by`: \n",
-            "> mutaters = {\n",
-            ">   newsplit = \"if_else(oldsplit %in% c('split1', 'split2'), 'mergedsplit', oldsplit)\"\n",
-            "> }\n"
-        ))
-    }
-    sobj = SplitObject(sobj, split.by = split_by)
-}
-# Normalize data
-log_info("- Normalizing data")
-if (refnorm == "SCTransform") {
-    if (defassay == "SCT" && skip_if_normalized) {
-        log_warn("  Skipping normalization as the object is already SCTransform'ed")
-    } else {
-        log_info("  Using SCTransform normalization")
-        sctransform_args$residual.features = rownames(x = reference)
-        if (is.null(split_by)) {
-            sctransform_args$object = sobj
-            sobj = do_call(SCTransform, sctransform_args)
-            sctransform_args$object <- NULL
-            rm(sctransform_args)
-            gc()
-        } else {
-            sobj = mclapply(
-                X = sobj,
-                FUN = function(x) {
-                    sctransform_args$object = x
-                    do_call(SCTransform, sctransform_args)
-                },
-                mc.cores = ncores
-            )
-            if (any(unlist(lapply(sobj, class)) == "try-error")) {
-                stop(paste0("\nmclapply (SCTransform) error:", sobj))
-            }
-        }
-    }
-} else {
-    if (defassay == "RNA" && skip_if_normalized) {
-        log_warn("  Skipping normalization as the object is already LogNormalize'd")
-    } else {
-        log_info("  Using NormalizeData normalization")
-        if (is.null(split_by)) {
-            normalizedata_args$object = sobj
-            sobj = do_call(NormalizeData, normalizedata_args)
-        } else {
-            sobj = mclapply(
-                X = sobj,
-                FUN = function(x) {
-                    normalizedata_args$object = x
-                    do_call(NormalizeData, normalizedata_args)
-                },
-                mc.cores = ncores
-            )
-            if (any(unlist(lapply(sobj, class)) == "try-error")) {
-                stop(paste0("\nmclapply (NormalizeData) error:", sobj))
-            }
-        }
-        normalizedata_args$object <- NULL
-        rm(normalizedata_args)
-        gc()
-    }
-}
-# Find anchors between query and reference
-log_info("- Finding anchors")
-findtransferanchors_args$reference = reference
-if (is.null(split_by)) {
-    findtransferanchors_args$query = sobj
-    anchors = do_call(FindTransferAnchors, findtransferanchors_args)
-    findtransferanchors_args$reference = NULL
-    findtransferanchors_args$query = NULL
-    rm(findtransferanchors_args)
-    gc()
-} else {
-    anchors = mclapply(
-        X = sobj,
-        FUN = function(x) {
-            findtransferanchors_args$query = x
-            do_call(FindTransferAnchors, findtransferanchors_args)
-        },
-        mc.cores = ncores
-    )
-    if (any(unlist(lapply(anchors, class)) == "try-error")) {
-        stop(paste0("\nmclapply (FindTransferAnchors) error:", anchors))
-    }
-}
-# Map query to reference
-log_info("- Mapping query to reference")
-mapquery_args$reference = reference
-if (is.null(split_by)) {
-    mapquery_args$query = sobj
-    mapquery_args$anchorset = anchors
-    sobj = do_call(MapQuery, mapquery_args)
-    mapquery_args$reference = NULL
-    mapquery_args$query = NULL
-    mapquery_args$anchorset = NULL
-    gc()
-} else {
-    sobj = mclapply(
-        X = seq_along(sobj),
-        FUN = function(i) {
-            mapquery_args$query = sobj[[i]]
-            mapquery_args$anchorset = anchors[[i]]
-            do_call(MapQuery, mapquery_args)
-        },
-        mc.cores = ncores
-    )
-    if (any(unlist(lapply(sobj, class)) == "try-error")) {
-        stop(paste0("\nmclapply (MapQuery) error:", sobj))
-    }
-}
-# Calculating mapping score
-log_info("- Calculating mapping score")
-mappingscore_sob_msg = paste0(
-    "While calculating mapping score, the following error was encountered: \n",
-    "subscript out of bounds.  \n\n",
-    "You may want to try a smaller `ndim` (default: 50) in `envs.MappingScore`."
-)
-if (is.null(split_by)) {
-    mappingscore_args$anchors = anchors
-    mappingscore = tryCatch({
-        do_call(MappingScore, mappingscore_args)
-    }, error = function(e) {
-        if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
-        stop(e)
-    })
-    mappingscore_args$anchors = NULL
-    rm(mappingscore_args)
-    gc()
-} else {
-    mappingscore = mclapply(
-        X = seq_along(sobj),
-        FUN = function(i) {
-            mappingscore_args$anchors = anchors[[i]]
-            tryCatch({
-                do_call(MappingScore, mappingscore_args)
-            }, error = function(e) {
-                if (e$message == "subscript out of bounds") stop(mappingscore_sob_msg)
-                stop(e)
-            })
-        },
-        mc.cores = ncores
-    )
-    if (any(unlist(lapply(mappingscore, class)) == "try-error")) {
-        stop(paste0("\nmclapply (MappingScore) error:", mappingscore))
-    }
-}
-# Calculate mapping score and add to metadata
-log_info("- Adding mapping score to metadata")
-if (is.null(split_by)) {
-    sobj = AddMetaData(
-        object = sobj,
-        metadata = mappingscore,
-        col.name = "mapping.score"
-    )
-} else {
-    sobj = mclapply(
-        X = seq_along(sobj),
-        FUN = function(i) {
-            AddMetaData(
-                object = sobj[[i]],
-                metadata = mappingscore[[i]],
-                col.name = "mapping.score"
-            )
-        },
-        mc.cores = ncores
-    )
-    if (any(unlist(lapply(sobj, class)) == "try-error")) {
-        stop(paste0("\nmclapply (AddMetaData) error:", sobj))
-    }
-    # Combine the results
-    log_info("- Merging the results")
-    gc()
-    # Memory efficient way to merge the results
-    # query = Reduce(function(x, y) merge(x, y, merge.dr = "ref.umap"), query)
-    sobj = merge(sobj[[1]], sobj[2:length(sobj)], merge.dr = "ref.umap")
-}
-# Add the alias to the metadata for the clusters
-log_info("- Adding ident to metadata and set as ident")
-sobj@meta.data = sobj@meta.data %>% mutate(
-    !!sym(ident) := as.factor(!!parse_expr(paste0("predicted.", use)))
+    stop("Reference file must be .qs, .qs2, .rds, .RDS, .h5seurat or .H5Seurat")
+}
+reference <- tryCatch(JoinLayers(reference), error = function(e) {reference})
+Idents(reference) <- reference@meta.data[[use]]
+log$info("Loading query data ...")
+sobj <- read_obj(sobjfile)
+sobj <- RunSeuratMap2Ref(
+    object = sobj, ref = reference, use = use,
+    ident = ident, refnorm = refnorm, skip_if_normalized = skip_if_normalized,
+    split_by = split_by, ncores = ncores,
+    SCTransformArgs = sctransform_args,
+    NormalizeDataArgs = normalizedata_args,
+    FindTransferAnchorsArgs = findtransferanchors_args,
+    MapQueryArgs = mapquery_args,
+    log = log, cache = cache
 )
-Idents(sobj) = ident
-# Check if PrepSCTFindMarkers is done
-if (.is_sct(sobj) && is.null(sobj@commands$PrepSCTFindMarkers)) {
-    log_info("- Running PrepSCTFindMarkers ...")
-    sobj <- PrepSCTFindMarkers(sobj)
-    # compose a new SeuratCommand to record it to sobj@commands
-    commands <- names(pbmc_small@commands)
-    scommand <- pbmc_small@commands[[commands[length(commands)]]]
-    scommand@time.stamp <- Sys.time()
-    scommand@assay.used <- DefaultAssay(sobj)
-    scommand@call.string <- "PrepSCTFindMarkers(object = sobj)"
-    scommand@params <- list()
-    sobj@commands$PrepSCTFindMarkers <- scommand
-}
 # Save
-log_info("- Saving result ...")
-saveRDS(sobj, file = outfile)
-# ############################
-# Some plots
-# ############################
-log_info("- Plotting mapping score ...")
-p <- FeaturePlot(
-    object = sobj,
-    reduction = "ref.umap",
-    features = "mapping.score",
-    cols = c("white", "blue"),
-    pt.size = 0.5
-) + ggtitle("Mapping score for query cells")
-save_plot(p, file.path(outdir, "mapping_score"), list(width = 800, height = 600, res = 100))
+gc()
+log$info("Saving result ...")
+save_obj(sobj, file = outfile)
-log_info("- Plotting for transferred data ...")
-ref.reduction = mapquery_args$reduction.model %||% "wnn.umap"
-for (qname in names(mapquery_args$refdata)) {
-    rname <- mapquery_args$refdata[[qname]]
-    if (grepl("Array", class(reference[[rname]])) && grepl("Array", class(sobj[[qname]]))) {
-        log_warn("  Skipping transferred array: {qname} -> {rname}")
+### Plotting
+log$info("Plotting features ...")
+for (name in names(plots)) {
+    if (is.null(plots[[name]])) {
         next
     }
-    log_info("  UMAP for transferred data: {qname} -> {rname}")
-    ref_p <- DimPlot(
-        object = reference,
-        reduction = ref.reduction,
-        group.by = rname,
-        label = TRUE,
-        label.size = 3,
-        repel = TRUE,
-    ) + NoLegend()
-    query_p <- DimPlot(
-        object = sobj,
-        reduction = "ref.umap",
-        group.by = paste0("predicted.", qname),
-        label = TRUE,
-        label.size = 3,
-        repel = TRUE,
-    ) + NoLegend()
-    p <- ref_p | query_p
-    prefix <- file.path(outdir, paste0("UMAPs-", slugify(qname)))
-    save_plot(p, prefix, list(width = 1500, height = 700, res = 100))
-    # summarize the stats
-    log_info("  Summarizing stats: {qname} -> {rname}")
-    ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
-    colnames(ref_stats) <- c("CellType", "Count_Ref")
-    query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
-    colnames(query_stats) <- c("CellType", "Count_Query")
-    stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
-        replace_na(list(Count_Query = 0)) %>%
-        arrange(desc(Count_Query), desc(Count_Ref))
-    write.table(
-        stats,
-        file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
-        row.names = FALSE,
-        quote = FALSE,
-        sep = "\t"
+    log$info("- {name} ...")
+    plots[[name]]$features <- gsub("{use}", use, plots[[name]]$features, fixed = TRUE)
+    plots[[name]]$features <- gsub("{ident}", ident, plots[[name]]$features, fixed = TRUE)
+    plots[[name]]$devpars <- plots[[name]]$devpars %||% list()
+    plots[[name]]$devpars$res <- plots[[name]]$devpars$res %||% 100
+    plots[[name]]$devpars$width <- plots[[name]]$devpars$width %||% 1200
+    plots[[name]]$devpars$height <- plots[[name]]$devpars$height %||% 720
+    plots[[name]]$more_formats <- plots[[name]]$more_formats %||% character()
+    plots[[name]]$save_code <- FALSE
+    plots[[name]]$descr <- plots[[name]]$descr %||% name
+    extract_vars(plots[[name]], "devpars", "more_formats", "save_code", "descr")
+    plot_fn <- gglogger::register(VizSeuratMap2Ref)
+    p <- do_call(plot_fn, c(list(query = sobj, ref = reference), plots[[name]]))
+    prefix <- file.path(outdir, paste0(slugify(name), ".map2ref"))
+    save_plot(p, prefix, devpars, formats = c("png", more_formats))
+    reporter$add(
+        reporter$image(prefix, more_formats, save_code = FALSE, kind = "image"),
+        h1 = name
     )
 }
+reporter$save(joboutdir)

biopipen/scripts/scrna/SeuratMetadataMutater.R CHANGED Viewed

@@ -1,17 +1,15 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
 library(rlang)
 library(tibble)
 library(dplyr)
 library(Seurat)
+library(biopipen.utils)
-srtobj = {{in.srtobj | quote}}
+srtobj = {{in.srtobj | r}}
 metafile = {{in.metafile | r}}
 mutaters = {{envs.mutaters | r}}
-rdsfile = {{out.rdsfile | quote}}
+outfile = {{out.outfile | r}}
-srt = readRDS(srtobj)
+srt = read_obj(srtobj)
 metadata = srt@meta.data
 if (!is.null(metafile)) {
@@ -40,4 +38,4 @@ if (!is.null(expr) && length(expr) > 0) {
     srt@meta.data = metadata
 }
-saveRDS(srt, rdsfile)
+save_obj(srt, outfile)

biopipen/scripts/scrna/SeuratPreparing.R CHANGED Viewed

@@ -5,9 +5,9 @@ library(dplyr)
 library(glue)
 library(biopipen.utils)
-metafile <- {{in.metafile | quote}}
-rdsfile <- {{out.rdsfile | quote}}
-joboutdir <- {{job.outdir | quote}}
+metafile <- {{in.metafile | r}}
+outfile <- {{out.outfile | r}}
+joboutdir <- {{job.outdir | r}}
 envs <- {{envs | r: todot = "-", skip = 1}}
 if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
@@ -30,7 +30,9 @@ reporter$add(
             "<p>Cell filters: ", html_escape(envs$cell_qc), "</p>",
             "<p>Gene filters: </p>",
             "<p>- Min Cells: ", envs$gene_qc$min_cells, "</p>",
-            "<p>- Excludes: ", html_escape(envs$gene_qc$excludes %||% "Not set"), "</p>"
+            "<p>- Excludes: ",
+            ifelse(is.null(envs$gene_qc$excludes), "Not set", paste(envs$gene_qc$excludes, collapse = ", ")),
+            "</p>"
         )
     ),
     h1 = "Filters and QC"
@@ -57,43 +59,77 @@ dir.create(qcdir, showWarnings = FALSE, recursive = TRUE)
 sobj <- LoadSeuratAndPerformQC(
     metadata,
-    per_sample_qc = envs$cell_qc_per_sample,
+    min_cells = envs$min_cells,
+    min_features = envs$min_features,
     cell_qc = envs$cell_qc,
     gene_qc = envs$gene_qc,
     tmpdir = joboutdir,
     log = log,
     cache = envs$cache)
-log$info("Saving dimension table ...")
-dim_df <- data.frame(
-    when = c("Before QC", "After QC"),
-    nCells = c(nrow(sobj@misc$cell_qc_df), sum(sobj@misc$cell_qc_df$.QC)),
-    nGenes = c(sobj@misc$gene_qc$before, sobj@misc$gene_qc$after)
-)
-write.table(dim_df, file = file.path(qcdir, "dim.txt"),
+log$info("Saving and visualizing QC results ...")
+cell_qc_df <- VizSeuratCellQC(sobj, plot_type = "table")
+write.table(cell_qc_df, file = file.path(qcdir, "cell_qc.txt"),
             row.names = FALSE, quote = FALSE, sep = "\t")
 reporter$add(
     list(
-        kind = "descr",
-        content = "The dimension table for the Seurat object. The table contains the number of cells and genes before and after QC. Note that the cell QC is performed before gene QC."
+        name = "Cell QC metrics",
+        contents = list(
+            list(
+                kind = "descr",
+                content = paste0(
+                    "The table below show the number of cells in each sample that failed and passed the QC filters. ",
+                    "The last row shows the total number of cells that failed and passed the QC filters across all samples. "
+                )
+            ),
+            list(kind = "table", src = file.path(qcdir, "cell_qc.txt"))
+        )
     ),
+    h1 = "Filters and QC",
+    h2 = "Cell-level Quality Control",
+    ui = "tabs"
+)
+gene_qc_df <- VizSeuratGeneQC(sobj, plot_type = "table")
+write.table(gene_qc_df, file = file.path(qcdir, "gene_qc.txt"),
+            row.names = FALSE, quote = FALSE, sep = "\t")
+reporter$add(
     list(
-        kind = "table",
-        data = list(path = file.path(qcdir, "dim.txt"))
+        name = "Gene QC metrics",
+        contents = list(
+            list(
+                kind = "descr",
+                content = paste0(
+                    "The table below show the number of genes in each sample that failed and passed the QC filters. ",
+                    "The last row shows the final number of genes that failed and passed the QC filters across all samples. ",
+                    "Any gene that failed the QC filters will be excluded in the merged Seurat object."
+                )
+            ),
+            list(kind = "table", src = file.path(qcdir, "gene_qc.txt")),
+            list(kind = "list", items = list(paste0(
+                "We may still end up with features slightly less than the final passed ones. ",
+                "For example, when SCTransform is used, the number of features may be less than the number of genes that passed the QC filters. ",
+                "This is because SCTransform selects the top N features based on variance. "
+            )))
+        )
     ),
     h1 = "Filters and QC",
-    h2 = "Dimension table"
+    h2 = "Gene-level Quality Control",
+    ui = "tabs"
 )
-log$info("Visualizing QC metrics ...")
 for (pname in names(envs$qc_plots)) {
+    if (is.null(envs$qc_plots[[pname]])) next
+    log$info("- {pname} ...")
     args <- envs$qc_plots[[pname]]
     args$kind <- args$kind %||% "cell"
     args$devpars <- args$devpars %||% list()
     args$more_formats <- args$more_formats %||% character()
     args$save_code <- args$save_code %||% FALSE
-    extract_vars(args, "kind", "devpars", "more_formats", "save_code")
+    args$descr <- args$descr %||% pname
+    extract_vars(args, "kind", "devpars", "more_formats", "save_code", "descr")
     if (kind == "gene") kind <- "gene_qc"
     if (kind == "cell") kind <- "cell_qc"
     args$object <- sobj
@@ -103,21 +139,31 @@ for (pname in names(envs$qc_plots)) {
         gglogger::register(VizSeuratGeneQC)
     }
     p <- do_call(plot_fn, args)
-    prefix <- file.path(qcdir, paste0(slugify(pname), "_", kind))
+    prefix <- file.path(qcdir, paste0(slugify(pname), ".", kind))
     save_plot(p, prefix, devpars, formats = c("png", more_formats))
     if (save_code) {
         save_plotcode(p, prefix,
-            setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env('args'))"),
+            setup = c("library(biopipen.utils)", "load('data.RData')", "invisible(list2env(args, envir = .GlobalEnv))"),
             "args",
             auto_data_setup = FALSE)
     }
     reporter$add(
-        reporter$image(prefix, more_formats, save_code, kind = "image"),
+        list(
+            name = pname,
+            contents = list(
+                list(kind = "descr", content = descr),
+                reporter$image(prefix, more_formats, save_code, kind = "image")
+            )
+        ),
         h1 = "Filters and QC",
-        h2 = html_escape(pname)
+        h2 = ifelse(kind == "cell_qc", "Cell-level Quality Control", "Gene-level Quality Control"),
+        ui = "tabs"
     )
 }
+log$info("Filtering with QC criteria ...")
+sobj <- FinishSeuratQC(sobj)
 sobj <- RunSeuratTransformation(
     sobj,
     use_sct = envs$use_sct,
@@ -194,6 +240,12 @@ if (!identical(envs$doublet_detector, "none")) {
     sobj <- subset(sobj, subset = !!sym(paste0(sobj@misc$doublets$tool, "_DropletType")) != "doublet")
 }
+if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
+    log$info("Mutating metadata ...")
+    sobj@meta.data <- sobj@meta.data %>%
+        mutate(!!!lapply(envs$mutaters, rlang::parse_expr))
+}
 log$info("Saving QC'ed seurat object ...")
 reporter$save(joboutdir)
-saveRDS(sobj, rdsfile)
+save_obj(sobj, outfile)

biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.0py3-none-any.whl → 0.34.0py3-none-any.whl