PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/scrna/MetaMarkers.R CHANGED Viewed

@@ -1,5 +1,5 @@
-source("{{biopipen_dir}}/utils/misc.R")
-source("{{biopipen_dir}}/utils/mutate_helpers.R")
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
 library(rlang)
 library(dplyr)
@@ -15,13 +15,15 @@ library(tidyseurat)
 setEnrichrSite("Enrichr")
-srtfile <- {{ in.srtobj | quote }}
-outdir <- {{ out.outdir | quote }}
+srtfile <- {{ in.srtobj | r }}
+outdir <- {{ out.outdir | r }}
+joboutdir <- {{ job.outdir | r }}
 ncores <- {{ envs.ncores | int }}
 mutaters <- {{ envs.mutaters | r }}
 idents <- {{ envs.idents | r }}
 group_by <- {{ envs["group-by"] | r }}
 each <- {{ envs.each | r }}
+subset <- {{ envs.subset | r }}
 prefix_each <- {{ envs.prefix_each | r }}
 p_adjust <- {{ envs.p_adjust | r }}
 section <- {{ envs.section | r }}
@@ -32,65 +34,72 @@ cases <- {{ envs.cases | r: todot = "-" }}
 set.seed(8525)
-print("- Reading Seurat object ...")
+log_info("- Reading Seurat object ...")
 srtobj <- readRDS(srtfile)
+if (DefaultAssay(srtobj) == "SCT" && !"PrepSCTFindMarkers" %in% names(srtobj@commands)) {
+    log_warn("- SCTransform used but PrepSCTFindMarkers not applied, running ...")
-print("- Mutate meta data if needed ...")
+    srtobj <- PrepSCTFindMarkers(srtobj)
+    # compose a new SeuratCommand to record it to srtobj@commands
+    commands <- names(pbmc_small@commands)
+    scommand <- pbmc_small@commands[[commands[length(commands)]]]
+    scommand@name <- "PrepSCTFindMarkers"
+    scommand@time.stamp <- Sys.time()
+    scommand@assay.used <- "SCT"
+    scommand@call.string <- "PrepSCTFindMarkers(object = srtobj)"
+    scommand@params <- list()
+    srtobj@commands$PrepSCTFindMarkers <- scommand
+}
+log_info("- Mutate meta data if needed ...")
 if (!is.null(mutaters) && length(mutaters)) {
     srtobj@meta.data <- srtobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
 }
-print("- Expanding cases ...")
-if (is.null(cases) || length(cases) == 0) {
-    cases <- list(
-        DEFAULT = list(
-            idents = idents,
-            group_by = group_by,
-            each = each,
-            prefix_each = prefix_each,
-            p_adjust = p_adjust,
-            section = section,
-            dbs = dbs,
-            sigmarkers = sigmarkers,
-            method = method
-        )
-    )
-} else {
-    for (name in names(cases)) {
-        case <- list_setdefault(
-            cases[[name]],
-            idents = idents,
-            group_by = group_by,
-            each = each,
-            prefix_each = prefix_each,
-            p_adjust = p_adjust,
-            section = section,
-            dbs = dbs,
-            sigmarkers = sigmarkers,
-            method = method
-        )
-        cases[[name]] <- case
-    }
-}
+defaults <- list(
+    idents = idents,
+    group_by = group_by,
+    each = each,
+    prefix_each = prefix_each,
+    p_adjust = p_adjust,
+    subset = subset,
+    section = section,
+    dbs = dbs,
+    sigmarkers = sigmarkers,
+    method = method
+)
-newcases <- list()
-for (name in names(cases)) {
-    case <- cases[[name]]
-    if (is.null(case$each)) {
-        newcases[[paste0(case$section, ":", name)]] <- case
+expand_each <- function(name, case) {
+    outcases <- list()
+    if (is.null(case$each) || nchar(case$each) == 0) {
+        if (is.null(case$section) || case$section == "DEFAULT") {
+            outcases[[name]] <- case
+        } else {
+            outcases[[paste0(case$section, "::", name)]] <- case
+        }
     } else {
-        eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
+        if (!is.null(case$section) && case$section != "DEFAULT") {
+            log_warn("  Ignoring `section` in case `{name}` when `each` is set.")
+            case$section <- NULL
+        }
+        if (is.null(case$subset)) {
+            eachs <- srtobj@meta.data %>%
+                pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
+        } else {
+            eachs <- srtobj@meta.data %>% filter(!!parse_expr(case$subset)) %>%
+                pull(case$each) %>% unique() %>% na.omit()
+        }
         for (each in eachs) {
-            by = make.names(paste0(".", name, "_", case$each, "_", each))
+            by = make.names(paste0("..", name, "_", case$each, "_", each))
             idents <- case$idents
             if (is.null(idents) || length(idents) == 0) {
-                srtobj@meta.data = srtobj@meta.data %>%
+                srtobj@meta.data <<- srtobj@meta.data %>%
                     mutate(
                         !!sym(by) := if_else(!!sym(case$each) == each, !!sym(case$group_by), NA)
                     )
                 idents <- srtobj@meta.data %>% pull(case$group_by) %>% unique() %>% na.omit()
             } else {
-                srtobj@meta.data = srtobj@meta.data %>%
+                srtobj@meta.data <<- srtobj@meta.data %>%
                     mutate(
                         !!sym(by) := if_else(
                             !!sym(case$each) == each & !!sym(case$group_by) %in% case$idents,
@@ -100,158 +109,253 @@ for (name in names(cases)) {
                     )
             }
-            key <- paste0(case$each, ":", each)
-            if (name != "DEFAULT") {
-                key <- paste0(key, " - ", name)
+            if (isTRUE(case$prefix_each)) {
+                key <- paste0(name, "::", case$each, " - ", each)
+            } else {
+                key <- paste0(name, "::", each)
             }
-            newcases[[key]] <- case
-            newcases[[key]]$group_by <- by
-            newcases[[key]]$idents <- idents
+            outcases[[key]] <- case
+            outcases[[key]]$section <- name
+            outcases[[key]]$group_by <- by
         }
     }
+    outcases
 }
-cases <- newcases
+log_info("- Expanding cases ...")
+cases <- expand_cases(cases, defaults, expand_each)
 # Do enrichment analysis for a case using Enrichr
 # Args:
 #   case: case name
 #   markers: markers dataframe
 #   sig: The expression to filter significant markers
-do_enrich <- function(case, markers, sig) {
-    print(paste("  Running enrichment for case:", case))
-    parts <- strsplit(case, ":")[[1]]
-    sec <- parts[1]
-    case <- paste0(parts[-1], collapse = ":")
-    casedir <- file.path(outdir, sec, case)
-    dir.create(casedir, showWarnings = FALSE, recursive = TRUE)
+do_enrich <- function(info, markers, sig) {
+    log_info("  Running enrichment for case: {info$casename}")
     if (nrow(markers) == 0) {
-        print(paste("  No markers found for case:", case))
-        cat("No markers found.", file = file.path(casedir, "error.txt"))
-        return()
+        msg <- paste0("No markers found for case: ", info$casename)
+        return(msg)
     }
     markers_sig <- markers %>% filter(!!parse_expr(sig))
     if (nrow(markers_sig) == 0) {
-        print(paste("  No significant markers found for case:", case))
-        cat("No significant markers.", file = file.path(casedir, "error.txt"))
-        return()
+        msg <- paste0("No significant markers found for case: ", info$casename)
+        return(msg)
     }
     write.table(
         markers_sig,
-        file.path(casedir, "markers.txt"),
+        file.path(info$casedir, "markers.txt"),
         sep = "\t",
         row.names = FALSE,
         col.names = TRUE,
         quote = FALSE
     )
     if (nrow(markers_sig) < 5) {
-        for (db in dbs) {
-            write.table(
-                data.frame(Warning = "Not enough significant markers."),
-                file.path(casedir, paste0("Enrichr-", db, ".txt")),
-                sep = "\t",
-                row.names = FALSE,
-                col.names = TRUE,
-                quote = FALSE
-            )
-            png(
-                file.path(casedir, paste0("Enrichr-", db, ".png")),
-                res = 100, height = 200, width = 1000
-            )
-            print(
-                ggplot() +
-                    annotate(
-                        "text",
-                        x = 1,
-                        y = 1,
-                        label = "Not enough significant markers."
-                    ) +
-                    theme_classic()
-            )
-            dev.off()
-        }
-    } else {
-        enriched <- enrichr(markers_sig$gene, dbs)
-        for (db in dbs) {
-            write.table(
-                enriched[[db]],
-                file.path(casedir, paste0("Enrichr-", db, ".txt")),
-                sep = "\t",
-                row.names = FALSE,
-                col.names = TRUE,
-                quote = FALSE
-            )
-            png(
-                file.path(casedir, paste0("Enrichr-", db, ".png")),
-                res = 100, height = 1000, width = 1000
-            )
-            print(plotEnrich(enriched[[db]], showTerms = 20, title = db))
-            dev.off()
+        msg <- paste0("Too few significant markers found for case: ", info$casename)
+        return(msg)
+    }
+    enriched <- enrichr(markers_sig$gene, dbs)
+    for (db in dbs) {
+        write.table(
+            enriched[[db]],
+            file.path(info$casedir, paste0("Enrichr-", db, ".txt")),
+            sep = "\t",
+            row.names = FALSE,
+            col.names = TRUE,
+            quote = FALSE
+        )
+        if (nrow(enriched[[db]]) == 0) {
+            log_info(paste0("  No enriched terms for ", db))
+            next
         }
+        p <- plotEnrich(enriched[[db]], showTerms = 20, title = db) +
+            theme_prism()
+        plotfile <- file.path(info$casedir, paste0("Enrichr-", db, ".png"))
+        png(plotfile, res = 100, height = 600, width = 800)
+        print(p)
+        dev.off()
+        plotfile_pdf <- gsub(".png$", ".pdf", plotfile)
+        pdf(plotfile_pdf, height = 6, width = 8)
+        print(p)
+        dev.off()
     }
 }
+ensure_sobj <- function(expr, allow_empty) {
+    tryCatch({ expr }, error = function(e) {
+        if (allow_empty) {
+            log_warn("  Ignoring this case: {e$message}")
+            return(NULL)
+        } else {
+            stop(e)
+        }
+    })
+}
 do_case <- function(casename) {
-    cat(paste("- Dealing with case:", casename, "...\n"))
+    log_info("- Dealing with case: {casename} ...")
+    info <- casename_info(casename, cases, outdir, create = TRUE)
     case <- cases[[casename]]
-    sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
-    df <- GetAssayData(sobj, slot = "data", assay = "RNA")
-    genes <- rownames(df)
-    # rows: cells, cols: genes
-    df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
-    colnames(df)[ncol(df)] <- "GROUP"
+    allow_empty = startsWith(case$group_by, "..")
-    cat(paste("  Running tests for case...\n"))
-    test_result <- mclapply(genes, function(gene) {
-        fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
-        res <- tryCatch({
-            if (case$method == "anova") {
-                r <- summary(aov(fm, data = df))[[1]]
-                data.frame(
-                    statistic = r[1, "F value"],
-                    p.value = r[1, "Pr(>F)"],
-                    sumsq = r[1, "Sum Sq"],
-                    meansq = r[1, "Mean Sq"]
-                )
+    if (sum(!is.na(srtobj@meta.data[[case$group_by]])) == 0) {
+        msg = "Not enough cells to run tests."
+    } else {
+        sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
+        if (is.null(sobj)) { return() }
+        if (!is.null(case$subset)) {
+            sobj <- ensure_sobj({ sobj %>% filter(!!parse_expr(case$subset)) }, allow_empty)
+            if (is.null(sobj)) { return() }
+        }
+        df <- tryCatch({
+                GetAssayData(sobj, layer = "data")
+            }, error = function(e) {
+                log_warn("  Error when fetching assay data: {e}")
+                NULL
+            })
+        if (is.null(df)) {
+            msg <- "No markers found. May be due to too few cells or features."
+        } else {
+            df <- df[
+                apply(df, 1, function(x) !all(is.na(x)) && !all(x == x[1])), ,
+                drop = FALSE
+            ]
+            genes <- rownames(df)
+            # rows: cells, cols: genes
+            df <- cbind(as.data.frame(scale(Matrix::t(df))), sobj@meta.data[, case$group_by])
+            colnames(df)[ncol(df)] <- "GROUP"
+            log_info("  Running tests for case...")
+            warn_count <- 0
+            test_result <- mclapply(genes, function(gene) {
+                fm <- as.formula(paste(bQuote(gene), "~ GROUP"))
+                res <- tryCatch({
+                    if (case$method == "anova") {
+                        r <- summary(aov(fm, data = df))[[1]]
+                        data.frame(
+                            statistic = r[1, "F value"],
+                            p.value = r[1, "Pr(>F)"],
+                            sumsq = r[1, "Sum Sq"],
+                            meansq = r[1, "Mean Sq"]
+                        )
+                    } else {
+                        r <- kruskal.test(fm, data = df)
+                        data.frame(statistic = r$statistic, p.value = r$p.value)
+                    }
+                }, error = function(e) {
+                    warn_count <<- warn_count + 1
+                    if (warn_count < 10) {
+                        log_warn("  Error when testing gene: {gene}")
+                        log_warn("  {e}")
+                    } else if (warn_count == 10) {
+                        log_warn("  Too many errors, will not print more.")
+                    }
+                    NULL
+                })
+                if (is.null(res)) {
+                    return(NULL)
+                }
+                res$gene <- gene
+                res$method <- case$method
+                rownames(res) <- NULL
+                res
+            }, mc.cores = ncores)
+            markers <- do_call(rbind, test_result)
+            if (is.null(markers)) {
+                msg <- "No markers found. May be due to too few cells."
             } else {
-                r <- kruskal.test(fm, data = df)
-                data.frame(statistic = r$statistic, p.value = r$p.value)
+                markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
+                markers <- markers %>% arrange(p_adjust)
+                msg <- do_enrich(info, markers, case$sigmarkers)
             }
-        }, error = function(e) NULL)
-        if (is.null(res)) {
-            return(NULL)
         }
-        res$gene <- gene
-        res$method <- case$method
-        rownames(res) <- NULL
-        res
-    }, mc.cores = ncores)
-    markers <- do_call(rbind, test_result)
-    markers$p_adjust <- p.adjust(markers$p.value, method = case$p_adjust)
-    markers <- markers %>% arrange(p_adjust)
-    do_enrich(casename, markers, case$sigmarkers)
+    }
+    if (is.null(msg)) {
+        log_info("  Plotting top 10 genes ...")
+        markers <- markers %>% head(10)
+        plotdir <- file.path(info$casedir, "expr_plots")
+        dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
-    print(paste("  Plotting top 10 genes ...\n"))
-    markers <- markers %>% head(10)
-    parts <- strsplit(casename, ":")[[1]]
-    sec <- parts[1]
-    casename <- paste0(parts[-1], collapse = ":")
-    plotdir <- file.path(outdir, sec, casename, "plots")
-    dir.create(plotdir, showWarnings = FALSE, recursive = TRUE)
+        # Plot the top 10 genes in each group with violin plots
+        geneplots <- list()
+        for (gene in markers$gene) {
+            outfile <- file.path(plotdir, paste0(slugify(gene), ".png"))
+            p <- ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
+                geom_violin(alpha = .8) +
+                geom_boxplot(width=0.1, fill="white") +
+                theme_prism() +
+                ylab(paste0("Expression of ", gene))
+            png(outfile, res = 100, height = 600, width = 800)
+            print(p)
+            dev.off()
-    # Plot the top 10 genes in each group with violin plots
-    for (gene in markers$gene) {
-        outfile = file.path(plotdir, paste0(gene, ".png"))
-        p = ggplot(df, aes_string(x="GROUP", y=bQuote(gene), fill="GROUP")) +
-            geom_violin(alpha = .8) +
-            geom_boxplot(width=0.1, fill="white") +
-            theme_prism() +
-            ylab(paste0("Expression of ", gene))
-        png(outfile, res = 100, height = 800, width = 1000)
-        print(p)
-        dev.off()
+            outfile_pdf <- gsub(".png$", ".pdf", outfile)
+            pdf(outfile_pdf, height = 6, width = 8)
+            print(p)
+            dev.off()
+            geneplots[[length(geneplots) + 1]] <- list(
+                kind = "table_image",
+                src = outfile,
+                download = outfile_pdf,
+                name = gene
+            )
+        }
+        add_report(
+            list(
+                kind = "descr",
+                content = paste0(
+                    "Top 100 genes selected by ",
+                    "<code>", case$method, "</code> across ",
+                    "<code>", case$group_by, "</code> and filtered by ",
+                    "<code>", html_escape(case$sigmarkers), "</code>"
+                )
+            ),
+            h1 = info$h1,
+            h2 = ifelse(info$h2 == "#", "Meta-Markers", info$h2),
+            h3 = ifelse(info$h2 == "#", "#", "Meta-Markers")
+        )
+        add_report(
+            list(
+                name = "Meta-Markers",
+                contents = list(list(
+                    kind = "table",
+                    src = file.path(info$casedir, "markers.txt"),
+                    data = list(nrows = 100)
+                ))
+            ),
+            list(
+                name = "Volin Plots (Top 10)",
+                ui = "table_of_images:4",
+                contents = geneplots
+            ),
+            h1 = info$h1,
+            h2 = ifelse(info$h2 == "#", "Meta-Markers", info$h2),
+            h3 = ifelse(info$h2 == "#", "#", "Meta-Markers"),
+            ui = "tabs"
+        )
+        add_report(
+            list(kind = "enrichr", dir = info$casedir),
+            h1 = info$h1,
+            h2 = ifelse(info$h2 == "#", "Enrichment Analysis", info$h2),
+            h3 = ifelse(info$h2 == "#", "#", "Enrichment Analysis")
+        )
+    } else {
+        log_warn("  {msg}")
+        add_report(
+            list(kind = "error", content = msg),
+            h1 = info$h1,
+            h2 = info$h2
+        )
     }
 }
 sapply(sort(names(cases)), do_case)
+save_report(joboutdir)

biopipen/scripts/scrna/ModuleScoreCalculator.R CHANGED Viewed

@@ -1,15 +1,19 @@
-source("{{biopipen_dir}}/utils/misc.R")
-library(Seurat)
+library(rlang)
 library(dplyr)
+library(Seurat)
+library(biopipen.utils)
 sobjfile <- {{in.srtobj | r}}
 outfile <- {{out.rdsfile | r}}
 defaults <- {{envs.defaults | r}}
 modules <- {{envs.modules | r}}
+post_mutaters <- {{envs.post_mutaters | r}}
+log <- get_logger()
 # load seurat object
-print("Loading Seurat object ...")
-sobj <- readRDS(sobjfile)
+log$info("Loading Seurat object ...")
+sobj <- read_obj(sobjfile)
 aggs <- list(
     mean = mean,
@@ -27,7 +31,6 @@ for (key in names(modules)) {
     }
     module <- list_update(defaults, modules[[key]])
-    module$object <- sobj
     if (is.null(module$features) || length(module$features) == 0) {
         stop(paste0("Module '", key, "' has no features"))
     }
@@ -36,21 +39,67 @@ for (key in names(modules)) {
     agg <- aggs[[module$agg]]
     module$keep <- NULL
     module$agg <- NULL
-    print(paste0("Calculating module '", key, "' ..."))
+    log$info("Calculating module '{key}' ...")
     is_cc <- FALSE
-    if (module$features == "cc.genes") {
+    if (!is.null(module$kind) && module$kind %in% c("diffmap", "diffusion_map")) {
+        library(destiny)
+        features <- module$features
+        if (is.null(features)) { features <- 2 }
+        if (is.null(module$verbose)) { module$verbose <- TRUE }
+        module$features <- NULL
+        module$kind <- NULL
+        if (!is.null(module$n_pcs)) {
+            log$info("- Using cell embeddings from PCA reduction ...")
+            module$data <- Embeddings(sobj, reduction = "pca")
+            if (module$n_pcs > ncol(module$data)) {
+                log$warn("- `n_pcs` ({module$n_pcs}) is larger than the number of PCs, using all {ncol(module$data)} PCs ...")
+            }
+            module$data <- module$data[, 1:min(module$n_pcs, ncol(module$data))]
+            module$n_pcs <- NULL
+        } else {
+            log$info("- Using assay data ...")
+            module$data <- GetAssayData(sobj, layer = "data")
+        }
+        log$info("- Calculating diffusion map ...")
+        dm <- do_call(DiffusionMap, module)
+        ev <- eigenvectors(dm)
+        log$info("- Creating DimReduc object ...")
+        sobj[[key]] <- CreateDimReducObject(
+            embeddings = data.matrix(as.data.frame(ev[, 1:features])),
+            key = paste0(key, "_")
+        )
+        # add to meta.data
+        log$info("- Adding to meta.data ...")
+        sobj <- AddMetaData(
+            sobj,
+            sobj[[key]]@cell.embeddings,
+            col.name = colnames(sobj[[key]]@cell.embeddings)
+        )
+        next
+    }
+    module$object <- sobj
+    if (length(module$features) == 1 && module$features == "cc.genes") {
         is_cc <- TRUE
         module$features <- NULL
         module$s.features <- cc.genes$s.genes
         module$g2m.features <- cc.genes$g2m.genes
-    } else if (module$features == "cc.genes.updated.2019") {
+    } else if (length(module$features) == 1 && module$features == "cc.genes.updated.2019") {
         is_cc <- TRUE
         module$features <- NULL
         module$s.features <- cc.genes.updated.2019$s.genes
         module$g2m.features <- cc.genes.updated.2019$g2m.genes
     } else {
         module$name <- key
-        module$features <- trimws(strsplit(module$features, ",")[[1]])
+        if (length(module$features) == 1) {
+            module$features <- trimws(strsplit(module$features, ",")[[1]])
+        }
+        module$features <- list(module$features)
     }
     if (isTRUE(is_cc)) {
         sobj <- do_call(CellCycleScoring, module)
@@ -87,6 +136,12 @@ for (key in names(modules)) {
     }
 }
+if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
+    log$info("Applying post mutaters ...")
+    sobj@meta.data <- sobj@meta.data %>%
+        mutate(!!!lapply(post_mutaters, parse_expr))
+}
 # save seurat object
-print("Saving Seurat object ...")
-saveRDS(sobj, outfile)
+log$info("Saving Seurat object ...")
+save_obj(sobj, outfile)

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl