PyPI - biopipen - Versions diffs - 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl - Mend

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (290) hide show

biopipen/__init__.py +1 -1
biopipen/core/config.toml +28 -0
biopipen/core/filters.py +79 -4
biopipen/core/proc.py +12 -3
biopipen/core/testing.py +75 -3
biopipen/ns/bam.py +148 -6
biopipen/ns/bed.py +75 -0
biopipen/ns/cellranger.py +186 -0
biopipen/ns/cellranger_pipeline.py +126 -0
biopipen/ns/cnv.py +19 -3
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/cnvkit_pipeline.py +20 -12
biopipen/ns/delim.py +34 -35
biopipen/ns/gene.py +68 -23
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +39 -14
biopipen/ns/plot.py +304 -1
biopipen/ns/protein.py +183 -0
biopipen/ns/regulatory.py +290 -0
biopipen/ns/rnaseq.py +142 -5
biopipen/ns/scrna.py +2053 -473
biopipen/ns/scrna_metabolic_landscape.py +228 -382
biopipen/ns/snp.py +659 -0
biopipen/ns/stats.py +484 -0
biopipen/ns/tcr.py +683 -98
biopipen/ns/vcf.py +236 -2
biopipen/ns/web.py +97 -6
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cellranger/CellRangerCount.svelte +18 -0
biopipen/reports/cellranger/CellRangerSummary.svelte +16 -0
biopipen/reports/cellranger/CellRangerVdj.svelte +18 -0
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/common.svelte +15 -0
biopipen/reports/protein/ProdigySummary.svelte +16 -0
biopipen/reports/scrna/CellsDistribution.svelte +4 -39
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna/MarkersFinder.svelte +6 -126
biopipen/reports/scrna/MetaMarkers.svelte +3 -75
biopipen/reports/scrna/RadarPlots.svelte +4 -20
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +61 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +88 -82
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +70 -10
biopipen/reports/snp/PlinkCallRate.svelte +24 -0
biopipen/reports/snp/PlinkFreq.svelte +18 -0
biopipen/reports/snp/PlinkHWE.svelte +18 -0
biopipen/reports/snp/PlinkHet.svelte +18 -0
biopipen/reports/snp/PlinkIBD.svelte +18 -0
biopipen/reports/tcr/CDR3AAPhyschem.svelte +19 -66
biopipen/reports/tcr/ClonalStats.svelte +16 -0
biopipen/reports/tcr/CloneResidency.svelte +3 -93
biopipen/reports/tcr/Immunarch.svelte +4 -155
biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
biopipen/reports/tcr/TESSA.svelte +11 -28
biopipen/reports/utils/misc.liq +22 -7
biopipen/scripts/bam/BamMerge.py +11 -15
biopipen/scripts/bam/BamSampling.py +90 -0
biopipen/scripts/bam/BamSort.py +141 -0
biopipen/scripts/bam/BamSplitChroms.py +10 -10
biopipen/scripts/bam/BamSubsetByBed.py +38 -0
biopipen/scripts/bam/CNAClinic.R +41 -5
biopipen/scripts/bam/CNVpytor.py +153 -54
biopipen/scripts/bam/ControlFREEC.py +13 -14
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/bed/Bed2Vcf.py +5 -5
biopipen/scripts/bed/BedConsensus.py +5 -5
biopipen/scripts/bed/BedLiftOver.sh +6 -4
biopipen/scripts/bed/BedtoolsIntersect.py +54 -0
biopipen/scripts/bed/BedtoolsMakeWindows.py +47 -0
biopipen/scripts/bed/BedtoolsMerge.py +4 -4
biopipen/scripts/cellranger/CellRangerCount.py +138 -0
biopipen/scripts/cellranger/CellRangerSummary.R +181 -0
biopipen/scripts/cellranger/CellRangerVdj.py +112 -0
biopipen/scripts/cnv/AneuploidyScore.R +55 -20
biopipen/scripts/cnv/AneuploidyScoreSummary.R +221 -163
biopipen/scripts/cnv/TMADScore.R +25 -9
biopipen/scripts/cnv/TMADScoreSummary.R +57 -86
biopipen/scripts/cnvkit/CNVkitAccess.py +7 -6
biopipen/scripts/cnvkit/CNVkitAutobin.py +26 -18
biopipen/scripts/cnvkit/CNVkitBatch.py +6 -6
biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
biopipen/scripts/cnvkit/CNVkitCoverage.py +4 -3
biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +12 -8
biopipen/scripts/cnvkit/CNVkitHeatmap.py +5 -5
biopipen/scripts/cnvkit/CNVkitReference.py +6 -5
biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
biopipen/scripts/cnvkit/guess_baits.py +166 -93
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +116 -118
biopipen/scripts/gene/GeneNameConversion.R +67 -0
biopipen/scripts/gene/GenePromoters.R +61 -0
biopipen/scripts/gsea/Enrichr.R +5 -5
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/GSEA.R +2 -2
biopipen/scripts/gsea/PreRank.R +5 -5
biopipen/scripts/misc/Config2File.py +2 -2
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/misc/Shell.sh +15 -0
biopipen/scripts/misc/Str2File.py +2 -2
biopipen/scripts/plot/Heatmap.R +3 -3
biopipen/scripts/plot/Manhattan.R +147 -0
biopipen/scripts/plot/QQPlot.R +146 -0
biopipen/scripts/plot/ROC.R +88 -0
biopipen/scripts/plot/Scatter.R +112 -0
biopipen/scripts/plot/VennDiagram.R +5 -9
biopipen/scripts/protein/MMCIF2PDB.py +33 -0
biopipen/scripts/protein/PDB2Fasta.py +60 -0
biopipen/scripts/protein/Prodigy.py +119 -0
biopipen/scripts/protein/ProdigySummary.R +140 -0
biopipen/scripts/protein/RMSD.py +178 -0
biopipen/scripts/regulatory/MotifAffinityTest.R +102 -0
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +127 -0
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +104 -0
biopipen/scripts/regulatory/MotifScan.py +159 -0
biopipen/scripts/regulatory/VariantMotifPlot.R +78 -0
biopipen/scripts/regulatory/motifs-common.R +324 -0
biopipen/scripts/rnaseq/Simulation-ESCO.R +180 -0
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +45 -0
biopipen/scripts/rnaseq/Simulation.R +21 -0
biopipen/scripts/rnaseq/UnitConversion.R +325 -54
biopipen/scripts/scrna/AnnData2Seurat.R +40 -0
biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
biopipen/scripts/scrna/CellCellCommunication.py +150 -0
biopipen/scripts/scrna/CellCellCommunicationPlots.R +93 -0
biopipen/scripts/scrna/CellSNPLite.py +30 -0
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +185 -0
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +68 -31
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +27 -22
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +28 -20
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +48 -25
biopipen/scripts/scrna/CellTypeAnnotation.R +37 -1
biopipen/scripts/scrna/CellsDistribution.R +456 -167
biopipen/scripts/scrna/DimPlots.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +109 -0
biopipen/scripts/scrna/ExprImputation-rmagic.R +256 -0
biopipen/scripts/scrna/{ExprImpution-scimpute.R → ExprImputation-scimpute.R} +8 -5
biopipen/scripts/scrna/ExprImputation.R +7 -0
biopipen/scripts/scrna/LoomTo10X.R +51 -0
biopipen/scripts/scrna/MQuad.py +25 -0
biopipen/scripts/scrna/MarkersFinder.R +679 -400
biopipen/scripts/scrna/MetaMarkers.R +265 -161
biopipen/scripts/scrna/ModuleScoreCalculator.R +66 -11
biopipen/scripts/scrna/PseudoBulkDEG.R +678 -0
biopipen/scripts/scrna/RadarPlots.R +355 -134
biopipen/scripts/scrna/ScFGSEA.R +298 -100
biopipen/scripts/scrna/ScSimulation.R +65 -0
biopipen/scripts/scrna/ScVelo.py +617 -0
biopipen/scripts/scrna/Seurat2AnnData.R +7 -0
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +87 -0
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +36 -30
biopipen/scripts/scrna/SeuratClusterStats-features.R +138 -187
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +81 -0
biopipen/scripts/scrna/SeuratClusterStats-stats.R +78 -89
biopipen/scripts/scrna/SeuratClusterStats.R +47 -10
biopipen/scripts/scrna/SeuratClustering.R +36 -233
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +84 -113
biopipen/scripts/scrna/SeuratMetadataMutater.R +16 -6
biopipen/scripts/scrna/SeuratPreparing.R +223 -173
biopipen/scripts/scrna/SeuratSubClustering.R +64 -0
biopipen/scripts/scrna/SeuratTo10X.R +27 -0
biopipen/scripts/scrna/Slingshot.R +65 -0
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +169 -135
biopipen/scripts/scrna/celltypist-wrapper.py +195 -0
biopipen/scripts/scrna/scvelo_paga.py +313 -0
biopipen/scripts/scrna/seurat_anndata_conversion.py +98 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +447 -82
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +348 -241
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +188 -166
biopipen/scripts/snp/MatrixEQTL.R +217 -0
biopipen/scripts/snp/Plink2GTMat.py +148 -0
biopipen/scripts/snp/PlinkCallRate.R +199 -0
biopipen/scripts/snp/PlinkFilter.py +100 -0
biopipen/scripts/snp/PlinkFreq.R +291 -0
biopipen/scripts/snp/PlinkFromVcf.py +81 -0
biopipen/scripts/snp/PlinkHWE.R +85 -0
biopipen/scripts/snp/PlinkHet.R +96 -0
biopipen/scripts/snp/PlinkIBD.R +196 -0
biopipen/scripts/snp/PlinkSimulation.py +124 -0
biopipen/scripts/snp/PlinkUpdateName.py +124 -0
biopipen/scripts/stats/ChowTest.R +146 -0
biopipen/scripts/stats/DiffCoexpr.R +152 -0
biopipen/scripts/stats/LiquidAssoc.R +135 -0
biopipen/scripts/stats/Mediation.R +108 -0
biopipen/scripts/stats/MetaPvalue.R +130 -0
biopipen/scripts/stats/MetaPvalue1.R +74 -0
biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
biopipen/scripts/tcr/Attach2Seurat.R +3 -2
biopipen/scripts/tcr/CDR3AAPhyschem.R +211 -143
biopipen/scripts/tcr/CDR3Clustering.R +343 -0
biopipen/scripts/tcr/ClonalStats.R +526 -0
biopipen/scripts/tcr/CloneResidency.R +255 -131
biopipen/scripts/tcr/CloneSizeQQPlot.R +4 -4
biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
biopipen/scripts/tcr/GIANA/GIANA4.py +1362 -789
biopipen/scripts/tcr/GIANA/query.py +164 -162
biopipen/scripts/tcr/Immunarch-basic.R +31 -9
biopipen/scripts/tcr/Immunarch-clonality.R +25 -5
biopipen/scripts/tcr/Immunarch-diversity.R +352 -134
biopipen/scripts/tcr/Immunarch-geneusage.R +45 -5
biopipen/scripts/tcr/Immunarch-kmer.R +68 -8
biopipen/scripts/tcr/Immunarch-overlap.R +84 -4
biopipen/scripts/tcr/Immunarch-spectratyping.R +35 -6
biopipen/scripts/tcr/Immunarch-tracking.R +38 -6
biopipen/scripts/tcr/Immunarch-vjjunc.R +165 -0
biopipen/scripts/tcr/Immunarch.R +63 -11
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +4 -4
biopipen/scripts/tcr/ImmunarchLoading.R +38 -29
biopipen/scripts/tcr/SampleDiversity.R +1 -1
biopipen/scripts/tcr/ScRepCombiningExpression.R +40 -0
biopipen/scripts/tcr/ScRepLoading.R +166 -0
biopipen/scripts/tcr/TCRClusterStats.R +176 -22
biopipen/scripts/tcr/TCRDock.py +110 -0
biopipen/scripts/tcr/TESSA.R +102 -118
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/tcr/immunarch-patched.R +142 -0
biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
biopipen/scripts/vcf/BcftoolsAnnotate.py +91 -0
biopipen/scripts/vcf/BcftoolsFilter.py +90 -0
biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
biopipen/scripts/vcf/BcftoolsSort.py +113 -0
biopipen/scripts/vcf/BcftoolsView.py +73 -0
biopipen/scripts/vcf/TruvariBench.sh +14 -7
biopipen/scripts/vcf/TruvariBenchSummary.R +16 -13
biopipen/scripts/vcf/TruvariConsistency.R +1 -1
biopipen/scripts/vcf/Vcf2Bed.py +2 -2
biopipen/scripts/vcf/VcfAnno.py +11 -11
biopipen/scripts/vcf/VcfDownSample.sh +22 -10
biopipen/scripts/vcf/VcfFilter.py +5 -5
biopipen/scripts/vcf/VcfFix.py +7 -7
biopipen/scripts/vcf/VcfFix_utils.py +13 -4
biopipen/scripts/vcf/VcfIndex.py +3 -3
biopipen/scripts/vcf/VcfIntersect.py +3 -3
biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
biopipen/scripts/vcf/bcftools_utils.py +52 -0
biopipen/scripts/web/Download.py +8 -4
biopipen/scripts/web/DownloadList.py +5 -5
biopipen/scripts/web/GCloudStorageDownloadBucket.py +82 -0
biopipen/scripts/web/GCloudStorageDownloadFile.py +23 -0
biopipen/scripts/web/gcloud_common.py +49 -0
biopipen/utils/gene.py +108 -60
biopipen/utils/misc.py +146 -20
biopipen/utils/reference.py +64 -20
biopipen/utils/reporter.py +177 -0
biopipen/utils/vcf.py +1 -1
biopipen-0.34.26.dist-info/METADATA +27 -0
biopipen-0.34.26.dist-info/RECORD +292 -0
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
{biopipen-0.21.0.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +6 -2
biopipen/ns/bcftools.py +0 -111
biopipen/ns/scrna_basic.py +0 -255
biopipen/reports/delim/SampleInfo.svelte +0 -36
biopipen/reports/scrna/GeneExpressionInvistigation.svelte +0 -32
biopipen/reports/scrna/ScFGSEA.svelte +0 -35
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -82
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -20
biopipen/reports/scrna/SeuratPreparing.svelte +0 -38
biopipen/reports/scrna/TopExpressingGenes.svelte +0 -55
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -31
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/bcftools/BcftoolsAnnotate.py +0 -42
biopipen/scripts/bcftools/BcftoolsFilter.py +0 -79
biopipen/scripts/bcftools/BcftoolsSort.py +0 -19
biopipen/scripts/gene/GeneNameConversion.py +0 -66
biopipen/scripts/scrna/ExprImpution-alra.R +0 -32
biopipen/scripts/scrna/ExprImpution-rmagic.R +0 -29
biopipen/scripts/scrna/ExprImpution.R +0 -7
biopipen/scripts/scrna/GeneExpressionInvistigation.R +0 -132
biopipen/scripts/scrna/Write10X.R +0 -11
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -150
biopipen/scripts/tcr/TCRClustering.R +0 -280
biopipen/utils/common_docstrs.py +0 -61
biopipen/utils/gene.R +0 -49
biopipen/utils/gsea.R +0 -193
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -114
biopipen/utils/mutate_helpers.R +0 -433
biopipen/utils/plot.R +0 -173
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -115
biopipen-0.21.0.dist-info/METADATA +0 -22
biopipen-0.21.0.dist-info/RECORD +0 -218

biopipen/scripts/gene/GeneNameConversion.R ADDED Viewed

@@ -0,0 +1,67 @@
+library(biopipen.utils)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+notfound <- {{envs.notfound | r}}
+genecol <- {{envs.genecol | r}}
+output <- {{envs.output | r}}
+dup <- {{envs.dup | r}}
+infmt <- {{envs.infmt | r}}
+outfmt <- {{envs.outfmt | r}}
+species <- {{envs.species | r}}
+log <- get_logger()
+if (is.na(notfound)) {
+    notfound = "na"
+}
+df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
+if (genecol == 0) {
+    log$warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
+    genecol <- 1
+}
+if (is.numeric(genecol)) { genecol <- colnames(df)[genecol] }
+if (dup == "combine") { dup <- ";" }
+genes <- df[[genecol]]
+converted <- gene_name_conversion(
+    genes = genes,
+    species = species,
+    infmt = infmt,
+    outfmt = outfmt,
+    notfound = notfound,
+    dup = dup,
+    suppress_messages = FALSE
+)
+#    <genecol> <outfmt>
+# 1  1255_g_at   GUCA1A
+# 2    1316_at     THRA
+# 3    1320_at   PTPN21
+# 4    1294_at  MIR5193
+# order the converted dataframe by the original gene column
+converted <- converted[order(match(converted$query, genes)), , drop=FALSE]
+outcol <- outfmt
+if (notfound == "skip" || notfound == "ignore") {
+    df <- df[df[[genecol]] %in% converted$query, , drop=FALSE]
+}
+if (output == "append") {
+    if (outfmt %in% colnames(df)) {
+        log$warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
+        outcol <- paste(outfmt, "_1", sep="")
+    }
+    df[[outcol]] <- converted[[outfmt]]
+} else if (output == "replace") {
+    df[[genecol]] <- converted[[outfmt]]
+} else if (output == "with-query") {
+    df <- converted
+} else {
+    df <- converted[, outfmt, drop=FALSE]
+}
+write.table(df, file=outfile, sep="\t", quote=FALSE, row.names=FALSE)

biopipen/scripts/gene/GenePromoters.R ADDED Viewed

@@ -0,0 +1,61 @@
+library(rlang)
+library(rtracklayer)
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
+up <- {{envs.up | r}}
+down <- {{envs.down | r}}
+notfound <- {{envs.notfound | r}}
+refgene <- {{envs.refgene | r}}
+header <- {{envs.header | r}}
+genecol <- {{envs.genecol | r}}
+match_id <- {{envs.match_id | r}}
+sort_ <- {{envs.sort | r}}
+chrsize <- {{envs.chrsize | r}}
+down <- down %||% up
+refgenes <- readGFF(refgene)
+refcol <- ifelse(match_id, "gene_id", "gene_name")
+if (infile == "/dev/null") {
+    genes <- unique(refgenes[[refcol]])
+} else {
+    data <- read.table(infile, header=header, sep="\t", stringsAsFactors=FALSE, check.names=FALSE)
+    genes <- data[[genecol]]
+    rm(data)
+}
+notfound_genes <- setdiff(genes, refgenes[[refcol]])
+if (notfound == "error" && length(notfound_genes) > 0) {
+    stop(paste(
+        "The following genes were not found in the reference annotation:",
+        paste(notfound_genes, collapse=", ")
+    ))
+} else if (notfound == 'skip') {
+    genes <- genes[!genes %in% notfound_genes]
+}
+# Select the genes that are in the reference annotation and keep the order
+# of the records in genes
+refgenes <- refgenes[match(genes, refgenes[[refcol]]), , drop = FALSE]
+refgenes <- unique(makeGRangesFromDataFrame(refgenes, keep.extra.columns=TRUE))
+proms <- promoters(refgenes, up=up, down=down)
+# Scores must be non-NA numeric values
+elementMetadata(proms)$name <- elementMetadata(proms)[[refcol]]
+score(proms) <- 0
+start(proms) <- pmax(1, start(proms))
+if (sort_) {
+    chrom_sizes <- read.table(chrsize, header=FALSE, stringsAsFactors=FALSE, sep="\t")
+    common_chroms <- intersect(chrom_sizes$V1, seqlevels(proms))
+    if (length(common_chroms) == 0) {
+        stop("No common chromosomes found between the promoters and the chromosome sizes. Do you use the correct chromosome sizes file?")
+    }
+    proms <- keepSeqlevels(proms, common_chroms, pruning.mode="coarse")
+    seqlevels(proms) <- common_chroms
+    proms <- sort(proms, ignore.strand = TRUE)
+}
+export.bed(proms, outfile)

biopipen/scripts/gsea/Enrichr.R CHANGED Viewed

@@ -1,9 +1,9 @@
-source("{{biopipen_dir}}/utils/io.R")
-source("{{biopipen_dir}}/utils/gene.R")
-source("{{biopipen_dir}}/utils/gsea.R")
+{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
-infile = {{in.infile | quote}}
-outdir = {{out.outdir | quote}}
+infile = {{in.infile | r}}
+outdir = {{out.outdir | r}}
 genecol = {{envs.genecol | r}}
 genename = {{envs.genename | r}}
 dbs = {{envs.dbs | r}}

biopipen/scripts/gsea/FGSEA.R CHANGED Viewed

@@ -1,58 +1,192 @@
-# PreRank the genes for GSEA analysis
-# See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
-source("{{biopipen_dir}}/utils/io.R")
-source("{{biopipen_dir}}/utils/gsea.R")
-infile = {{in.infile | quote}}
-metafile = {{in.metafile | quote}}
-gmtfile = {{in.gmtfile | quote}}
-{% if in.configfile %}
-config = {{in.config | read | toml_loads | r}}
-{% else %}
-config = list()
-{% endif %}
-outdir = {{out.outdir | quote}}
-envs = {{envs | r}}
-clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
-classes <- if (is.null(config$classes)) envs$classes else config$classes
-if (is.null(clscol)) {
-    stop("No `clscol` specified.")
-}
+library(rlang)
+library(biopipen.utils)
-if (is.null(classes) || length(classes) != 2) {
-    stop(paste("`classes` must be a pair of labels."))
-}
+# input & output
+infile = {{in.infile | r}}
+metafile = {{in.metafile | r}}
+outdir = {{out.outdir | r}}
+joboutdir = {{job.outdir | r}}
+# envs
+ncores = {{envs.ncores | r}}
+case = {{envs.case | r}}
+control = {{envs.control | r}}
+gmtfile = {{envs.gmtfile | r}}
+method = {{envs.method | r}}
+clscol = {{envs.clscol | r}}
+top = {{envs.top | r}}
+eps = {{envs.eps | r}}
+minsize = {{envs.minSize | default: envs.minsize | r}}
+maxsize = {{envs.maxSize | default: envs.maxsize | r}}
+rest = {{envs.rest | r}}
+cases = {{envs.cases | r}}
+log <- get_logger()
+reporter <- get_reporter()
+defaults <- list(
+    case = case,
+    control = control,
+    gmtfile = gmtfile,
+    method = method,
+    clscol = clscol,
+    top = top,
+    eps = eps,
+    minsize = minsize,
+    maxsize = maxsize,
+    rest = rest
+)
+cases <- expand_cases(cases, defaults, default_case = "GSEA")
+log$info("Reading input file ...")
+indata <- read.table(infile, header=TRUE, stringsAsFactors=FALSE, row.names=1, sep="\t", quote="", check.names=FALSE)
-if (is.character(envs$inopts) && inopts == "rds") {
-    indata = readRDS(infile)
+if (!is.null(metafile)) {
+    log$info("Reading metadata file ...")
+    metadata <- read.table(metafile, header=TRUE, stringsAsFactors=FALSE, row.names=NULL, sep="\t", quote="", check.names=FALSE)
 } else {
-    indata = read.table.opts(infile, envs$inopts)
+    metadata <- NULL
 }
-metadata = read.table.opts(metafile, envs$metaopts)
-allclasses = metadata[colnames(indata), clscol]
+do_case <- function(name) {
+    log$info("Processing case: {name} ...")
+    case <- cases[[name]]
+    info <- case_info(name, outdir, create = TRUE)
-ranks = prerank(indata, classes[1], classes[2], allclasses, envs$method)
+    if (is.null(case$case) && is.null(case$control)) {
+        stop("Either `case` or `control` must be specified in the case.")
+    }
+    if (is.null(case$gmtfile)) {
+        stop("`gmtfile` must be specified in the case.")
+    }
+    if (is.null(case$clscol)) {
+        stop("`clscol` must be specified in the case.")
+    }
+    if (!is.null(metadata) && length(case$clscol) > 1) {
+        stop("When `in.metafile` is specified, `envs.clscol` must be a single column name.")
+    }
+    if (!is.null(metadata)) {
+        samples <- colnames(indata)
+        if (!"Sample" %in% colnames(metadata)) {
+            colnames(metadata)[1] <- "Sample"
+        }
+        metadata <- metadata[match(samples, metadata$Sample), , drop=FALSE]
+        case$clscol <- as.character(metadata[[case$clscol]])
+    }
+    if (length(unique(case$clscol)) < 2) {
+        stop("The `clscol` must have at least two unique values.")
+    }
+    if (length(unique(case$clscol)) == 2) {
+        case$case <- case$case %||% setdiff(unique(case$clscol), case$control)
+        case$control <- case$control %||% setdiff(unique(case$clscol), case$case)
+    } else {
+        if (is.null(case$case) || is.null(case$control)) {
+            stop("When `clscol` has more than two unique values, both `case` and `control` must be specified.")
+        }
+    }
+    log$info("- Running pre-ranking ...")
+    ranks <- RunGSEAPreRank(
+        indata,
+        classes = case$clscol,
+        case = case$case,
+        control = case$control,
+        method = case$method
+    )
+    if (all(is.na(ranks))) {
+        if (length(case$clscol) < 10) {
+            log$warn("  Ignoring this case because all gene ranks are NA and there are <10 samples.")
+            reporter$add2(
+                list(
+                    kind = "error",
+                    content = paste0("Not enough samples (n = ", length(case$clscol), ") to run fgsea.")
+                ),
+                hs = c(info$section, info$name)
+            )
+            return(NULL)
+        } else {
+            stop(paste0(
+                "All gene ranks are NA (# samples = ",
+                length(case$clscol),
+                "). ",
+                "It's probably due to high missing rate in the data. ",
+                "You may want to try a different `envs$method` for pre-ranking."
+            ))
+        }
+    }
-write.table(
-    ranks,
-    file.path(outdir, "fgsea.rank"),
-    row.names=F,
-    col.names=T,
-    sep="\t",
-    quote=F
-)
+    log$info("- Running GSEA ...")
+    case$rest$ranks <- ranks
+    case$rest$genesets <- ParseGMT(case$gmtfile)
+    case$rest$minSize <- case$rest$minSize %||% case$rest$minsize %||% case$minsize
+    case$rest$maxSize <- case$rest$maxSize %||% case$rest$maxsize %||% case$maxsize
+    case$rest$eps <- case$eps
+    case$rest$nproc <- case$ncores
+    case$rest$minsize <- NULL
+    case$rest$maxsize <- NULL
+    result <- do_call(RunGSEA, case$rest)
+    write.table(
+        result,
+        file.path(info$prefix, "fgsea.tsv"),
+        row.names = FALSE,
+        col.names = TRUE,
+        sep = "\t",
+        quote = FALSE
+    )
+    p_summary <- VizGSEA(
+        result,
+        plot_type = "summary",
+        top_term = case$top
+    )
+    save_plot(
+        p_summary,
+        file.path(info$prefix, "summary"),
+        devpars = list(res = 100, height = attr(p_summary, "height") * 100, width = attr(p_summary, "width") * 100),
+        formats = "png"
+    )
+    p_gsea <- VizGSEA(
+        result,
+        plot_type = "gsea",
+        gs = result$pathway[1:min(case$top, nrow(result))]
+    )
+    save_plot(
+        p_gsea,
+        file.path(info$prefix, "pathways"),
+        devpars = list(res = 100, height = attr(p_gsea, "height") * 100, width = attr(p_gsea, "width") * 100),
+        formats = "png"
+    )
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(
+                list(kind = "descr", content = paste0(
+                    "Showing top 50 pathways by padj in descending order. ",
+                    "Use 'Download the entire data' button to download all pathways."
+                )),
+                list(kind = "table", src = file.path(info$prefix, "fgsea"), data = list(nrows = 50))
+            )
+        ),
+        list(
+            name = "Summary Plot",
+            contents = list(
+                list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
+                list(kind = "image", src = file.path(info$prefix, "summary.png"))
+            )
+        ),
+        list(
+            name = "GSEA Plots",
+            contents = list(
+                list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
+                list(kind = "image", src = file.path(info$prefix, "pathways.png"))
+            )
+        ),
+        hs = c(info$section, info$name),
+        ui = "tabs"
+    )
+}
-top = envs$top
-envs$nproc = envs$ncores
-envs$inopts = NULL
-envs$metaopts = NULL
-envs$method = NULL
-envs$clscol = NULL
-envs$classes = NULL
-envs$ncores = NULL
-envs$top = NULL
-# the rest are the arguments for `fgsea()`
-runFGSEA(ranks, gmtfile, top, outdir, envs)
+sapply(names(cases), do_case)
+reporter$save(joboutdir)

biopipen/scripts/gsea/GSEA.R CHANGED Viewed

@@ -1,7 +1,7 @@
 # devtools::install_github("GSEA-MSigDB/GSEA_R")
-source("{{biopipen_dir}}/utils/io.R")
-source("{{biopipen_dir}}/utils/gsea.R")
+{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
 library(dplyr)
 library(tibble)

biopipen/scripts/gsea/PreRank.R CHANGED Viewed

@@ -1,16 +1,16 @@
 # PreRank the genes for GSEA analysis
 # See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
-source("{{biopipen_dir}}/utils/io.R")
-source("{{biopipen_dir}}/utils/gsea.R")
+{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
-infile = {{in.infile | quote}}
-metafile = {{in.metafile | quote}}
+infile = {{in.infile | r}}
+metafile = {{in.metafile | r}}
 {% if in.configfile %}
 config = {{in.config | read | toml_loads | r}}
 {% else %}
 config = list()
 {% endif %}
-outfile = {{out.outfile | quote}}
+outfile = {{out.outfile | r}}
 envs = {{envs | r}}
 clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
 classes <- if (is.null(config$classes)) envs$classes else config$classes

biopipen/scripts/misc/Config2File.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import json
 import rtoml
-configstr = {{in.config | repr}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
+configstr: str = {{in.config | quote}}  # pyright: ignore  # noqa
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 infmt = {{envs.infmt | quote}}  # pyright: ignore
 outfmt = {{envs.outfmt | quote}}  # pyright: ignore

biopipen/scripts/misc/Plot.R ADDED Viewed

@@ -0,0 +1,80 @@
+library(gglogger)
+library(plotthis)
+library(rlang)
+library(biopipen.utils)
+datafile <- {{in.datafile | r}}
+plotfile <- {{out.plotfile | r}}
+plotprefix <- {{out.plotfile | prefix | r}}
+read_opts <- {{envs.read_opts | r: todot="-"}}
+envs <- {{envs | r}}
+fn <- envs$fn
+envs$fn <- NULL
+devpars <- envs$devpars
+envs$devpars <- NULL
+more_formats <- envs$more_formats
+envs$more_formats <- NULL
+save_code <- envs$save_code
+envs$save_code <- NULL
+envs$read_opts <- NULL
+if (endsWith(datafile, ".qs") || endsWith(datafile, ".qs2") ||
+    endsWith(datafile, ".rds") || endsWith(datafile, ".RDS")) {
+    envs$data <- read_obj(datafile)
+} else {
+    read_opts <- read_opts %||% list()
+    read_opts$file <- datafile
+    envs$data <- do.call(read.table, read_opts)
+}
+if (fn == "ManhattanPlot" && !is.null(envs$chromosomes)) {
+    norm_chroms <- function(chrs) {
+        chrs <- as.character(chrs)
+        if (length(chrs) == 1 && grepl(",", chrs)) {
+            chrs <- trimws(unlist(strsplit(chrs, ",")))
+        }
+        if (length(chrs) > 1) {
+            return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
+        }
+        if (!grepl("-", chrs)) { return(chrs) }
+        # expand chr1-22 -> chr1, chr2, ..., chr22
+        # chr1-22 -> 'chr1', '22'
+        chrs <- unlist(strsplit(chrs, "-"))
+        if (length(chrs) != 2) {
+            stop(paste0("Invalid chroms: ", chrs))
+        }
+        # detect prefix
+        prefix1 <- gsub("[0-9]", "", chrs[1])
+        prefix2 <- gsub("[0-9]", "", chrs[2])
+        if (nchar(prefix2) > 0 && prefix1 != prefix2) {
+            stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
+        }
+        chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
+        chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
+        chr_min <- min(chr_a, chr_b)
+        chr_max <- max(chr_a, chr_b)
+        return(paste0(prefix1, chr_min:chr_max))
+    }
+    envs$chromosomes <- norm_chroms(envs$chromosomes)
+}
+plotfn <- utils::getFromNamespace(fn, "plotthis")
+if (save_code) {
+    plotfn <- gglogger::register(plotfn, name = fn)
+}
+p <- do_call(plotfn, envs)
+save_plot(p, plotprefix, devpars, formats = unique(c("png", more_formats)))
+if (save_code) {
+    save_plotcode(
+        p,
+        setup = c('library(plotthis)', '', 'load("data.RData")', 'list2env(envs, envir = .GlobalEnv)'),
+        prefix = plotprefix,
+        "envs",
+        auto_data_setup = FALSE
+    )
+}

biopipen/scripts/misc/Shell.sh ADDED Viewed

@@ -0,0 +1,15 @@
+# shellcheck disable=all
+export infile={{in.infile | quote}}
+export outfile={{out.outfile | quote}}
+is_outdir={{envs.outdir | int}}
+cmd_given={{envs.cmd | bool | int}}
+{% set _ = out.outfile | dirname | joinpath: "cmd.sh" | as_path | attr: 'write_text' | call: envs.cmd %}
+cmd="{{proc.lang}} {{out.outfile | dirname | joinpath: 'cmd.sh'}}"
+if [[ "$cmd_given" -eq 0 ]]; then
+    echo "No command given." 1>&2
+    exit 1
+fi
+if [[ $is_outdir -eq 1 ]]; then
+    mkdir -p "$outfile"
+fi
+eval "$cmd"

biopipen/scripts/misc/Str2File.py CHANGED Viewed

@@ -1,6 +1,6 @@
-instr = {{in.str | repr}}  # pyright: ignore
+instr: str = {{in.str | quote}}  # pyright: ignore  # noqa
 name = {{repr(in.name or envs.name)}}  # pyright: ignore
-outfile = {{out.outfile | quote}}  # pyright: ignore
+outfile: str = {{out.outfile | quote}}  # pyright: ignore
 with open(outfile, "wt") as fout:
     fout.write(instr)

biopipen/scripts/plot/Heatmap.R CHANGED Viewed

@@ -1,6 +1,6 @@
-source("{{biopipen_dir}}/utils/io.R")
-source("{{biopipen_dir}}/utils/misc.R")
-source("{{biopipen_dir}}/utils/plot.R")
+{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
 # to compile the expressions
 library(ComplexHeatmap)

biopipen 0.21.0__py3-none-any.whl → 0.34.26__py3-none-any.whl

biopipen 0.21.0py3-none-any.whl → 0.34.26py3-none-any.whl