PyPI - biopipen - Versions diffs - 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl - Mend

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show

biopipen/__init__.py +1 -1
biopipen/core/filters.py +10 -183
biopipen/core/proc.py +5 -3
biopipen/core/testing.py +8 -1
biopipen/ns/bam.py +40 -4
biopipen/ns/cnv.py +1 -1
biopipen/ns/cnvkit.py +1 -1
biopipen/ns/delim.py +1 -1
biopipen/ns/gsea.py +63 -37
biopipen/ns/misc.py +38 -0
biopipen/ns/plot.py +8 -0
biopipen/ns/scrna.py +290 -288
biopipen/ns/scrna_metabolic_landscape.py +207 -366
biopipen/ns/tcr.py +165 -97
biopipen/reports/bam/CNVpytor.svelte +4 -9
biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
biopipen/reports/scrna/DimPlots.svelte +1 -1
biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
biopipen/reports/snp/PlinkCallRate.svelte +2 -2
biopipen/reports/snp/PlinkFreq.svelte +1 -1
biopipen/reports/snp/PlinkHWE.svelte +1 -1
biopipen/reports/snp/PlinkHet.svelte +1 -1
biopipen/reports/snp/PlinkIBD.svelte +1 -1
biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
biopipen/scripts/bam/CNAClinic.R +41 -6
biopipen/scripts/bam/CNVpytor.py +2 -1
biopipen/scripts/bam/ControlFREEC.py +2 -3
biopipen/scripts/bam/SamtoolsView.py +33 -0
biopipen/scripts/cnv/AneuploidyScore.R +25 -13
biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
biopipen/scripts/cnv/TMADScore.R +4 -4
biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
biopipen/scripts/delim/RowsBinder.R +1 -1
biopipen/scripts/delim/SampleInfo.R +4 -1
biopipen/scripts/gene/GeneNameConversion.R +14 -12
biopipen/scripts/gsea/Enrichr.R +2 -2
biopipen/scripts/gsea/FGSEA.R +184 -50
biopipen/scripts/gsea/PreRank.R +3 -3
biopipen/scripts/misc/Plot.R +80 -0
biopipen/scripts/plot/VennDiagram.R +2 -2
biopipen/scripts/protein/ProdigySummary.R +34 -27
biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
biopipen/scripts/regulatory/motifs-common.R +10 -9
biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
biopipen/scripts/rnaseq/Simulation.R +0 -2
biopipen/scripts/rnaseq/UnitConversion.R +6 -5
biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
biopipen/scripts/scrna/CellCellCommunication.py +1 -1
biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
biopipen/scripts/scrna/CellsDistribution.R +1 -1
biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
biopipen/scripts/scrna/MarkersFinder.R +348 -217
biopipen/scripts/scrna/MetaMarkers.R +3 -3
biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
biopipen/scripts/scrna/RadarPlots.R +1 -1
biopipen/scripts/scrna/ScFGSEA.R +157 -75
biopipen/scripts/scrna/ScSimulation.R +11 -10
biopipen/scripts/scrna/ScVelo.py +605 -0
biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
biopipen/scripts/scrna/SeuratClustering.R +31 -48
biopipen/scripts/scrna/SeuratLoading.R +2 -2
biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
biopipen/scripts/scrna/SeuratPreparing.R +76 -24
biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
biopipen/scripts/scrna/Subset10X.R +2 -2
biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
biopipen/scripts/snp/MatrixEQTL.R +39 -20
biopipen/scripts/snp/PlinkCallRate.R +43 -34
biopipen/scripts/snp/PlinkFreq.R +34 -41
biopipen/scripts/snp/PlinkHWE.R +23 -18
biopipen/scripts/snp/PlinkHet.R +26 -22
biopipen/scripts/snp/PlinkIBD.R +30 -34
biopipen/scripts/stats/ChowTest.R +9 -8
biopipen/scripts/stats/DiffCoexpr.R +13 -11
biopipen/scripts/stats/LiquidAssoc.R +7 -8
biopipen/scripts/stats/Mediation.R +8 -8
biopipen/scripts/stats/MetaPvalue.R +11 -13
biopipen/scripts/stats/MetaPvalue1.R +6 -5
biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
biopipen/scripts/tcr/ClonalStats.R +5 -4
biopipen/scripts/tcr/CloneResidency.R +3 -3
biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
biopipen/scripts/tcr/ScRepLoading.R +114 -92
biopipen/scripts/tcr/TCRClusterStats.R +2 -2
biopipen/scripts/tcr/TCRClustering.R +86 -97
biopipen/scripts/tcr/TESSA.R +65 -115
biopipen/scripts/tcr/VJUsage.R +5 -5
biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
biopipen/utils/common_docstrs.py +66 -63
biopipen/utils/reporter.py +177 -0
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
biopipen/reports/utils/gsea.liq +0 -110
biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
biopipen/utils/caching.R +0 -44
biopipen/utils/gene.R +0 -95
biopipen/utils/gsea.R +0 -329
biopipen/utils/io.R +0 -20
biopipen/utils/misc.R +0 -602
biopipen/utils/mutate_helpers.R +0 -581
biopipen/utils/plot.R +0 -209
biopipen/utils/repr.R +0 -146
biopipen/utils/rnaseq.R +0 -48
biopipen/utils/single_cell.R +0 -207
{biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0

biopipen/scripts/cnv/TMADScoreSummary.R CHANGED Viewed

@@ -1,9 +1,7 @@
-library(ggplot2)
-library(ggprism)
 library(dplyr)
 library(tidyr)
 library(tibble)
-library(patchwork)
+library(plotthis)
 tmadfiles = {{in.tmadfiles | r}}
 metafile  = {{in.metafile | r}}
@@ -47,7 +45,7 @@ if (!is.null(group_cols)) {
 }
 data = data.frame(Sample = sams, tMAD = tmads)
-if (file.exists(metafile) && length(meta_cols) > 0) {
+if (is.character(metafile) && file.exists(metafile) && length(meta_cols) > 0) {
     metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
     if (!is.null(metadf$Sample)) {
         metadf$Sample = as.character(metadf$Sample)
@@ -63,20 +61,12 @@ if (file.exists(metafile) && length(meta_cols) > 0) {
 write.table(data, file=file.path(outdir, "tMAD.txt"), sep="\t", quote=F, row.names=F)
 # bar plot for all samples without grouping
-p = ggplot(data, aes(x=Sample, y=tMAD)) +
-    geom_bar(stat="identity", fill="steelblue") +
-    theme_prism() +
-    theme(
-        axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
-        axis.title.x = element_blank(),
-        axis.title.y = element_text(size=12),
-        axis.text.y = element_text(size=12),
-        legend.position = "none",
-    ) +
-    labs(
-        x = NULL,
-        y = "tMAD",
-    )
+p <- BarPlot(
+    data = data,
+    x = "Sample",
+    y = "tMAD",
+    x_text_angle = 90
+)
 png(file.path(outdir, "tMAD.png"), width=400 + length(sams) * 12, height=800, res=100)
 print(p)
@@ -88,41 +78,30 @@ if (!is.null(group_cols)) {
         if (!grepl(",", group_col, fixed = TRUE)) {
             # Bar plot with this group_col, but with different fill colors
             # for each group, and samples from the same group are next to each other
-            p = ggplot(
-                    data %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample)),
-                    aes(x=Sample, y=tMAD, fill=!!sym(group_col))
-                ) +
-                geom_bar(stat="identity") +
-                theme_prism() +
-                theme(
-                    axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
-                    axis.title.x = element_blank(),
-                    axis.title.y = element_text(size=12),
-                    axis.text.y = element_text(size=12),
-                ) +
-                labs(
-                    x = NULL,
-                    y = "tMAD",
-                )
+            gdata <- data %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=unique(Sample)))
+            p <- BarPlot(
+                data = gdata,
+                x = "Sample",
+                y = "tMAD",
+                fill = group_col,
+                x_text_angle = 90
+            )
             png(file.path(outdir, paste0("tMAD_", group_col, "_bar.png")), width=400 + length(sams) * 12, height=600, res=100)
             print(p)
             dev.off()
             # Box plot overlays with violin plot with this group_col
-            p = ggplot(data, aes(x=!!sym(group_col), y=tMAD)) +
-                geom_boxplot(outlier.shape=NA, fill="white", color="black") +
-                geom_violin(fill="steelblue", alpha=0.5) +
-                theme_prism() +
-                theme(
-                    axis.title.x = element_text(size=12),
-                    axis.title.y = element_text(size=12),
-                    axis.text.y = element_text(size=12),
-                ) +
-                labs(
-                    x = group_col,
-                    y = "tMAD",
-                )
+            p <- ViolinPlot(
+                data = gdata,
+                x = group_col,
+                y = "tMAD",
+                x_text_angle = 90,
+                add_box = TRUE,
+                add_point = TRUE,
+                comparisons = TRUE,
+                sig_label = "p.format"
+            )
             png(file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")), width=1000, height=600, res=100)
             print(p)
@@ -137,25 +116,17 @@ if (!is.null(group_cols)) {
             # concatenate them together using patch work, with ncol=2
             # calcuate the height and width of the plot based on the number of
             # groups
-            ps = data %>%
-                group_by(!!sym(group_col1)) %>%
-                group_map(function(.x, .y) {
-                    p = ggplot(
-                            .x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample)),
-                            aes(x=Sample, y=tMAD, fill=!!sym(group_col2))
-                        ) +
-                        geom_bar(stat="identity") +
-                        theme_prism() +
-                        theme(
-                            axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
-                            axis.title.x = element_blank(),
-                            axis.title.y = element_text(size=12),
-                            axis.text.y = element_text(size=12),
-                        ) +
-                        labs(x = NULL, y = "tMAD") +
-                        ggtitle(.y[[group_col1]][1])
-                    p
-                })
+            gdata <- data %>% arrange(!!sym(group_col1), !!sym(group_col2)) %>%
+                mutate(Sample=factor(Sample, levels=unique(Sample)))
+            p <- BarPlot(
+                data = gdata,
+                x = "Sample",
+                y = "tMAD",
+                split_by = group_col1,
+                fill = group_col2,
+                x_text_angle = 90,
+                ncol = 2
+            )
             png(
                 file.path(outdir, paste0("tMAD_", group_col, "_bar.png")),
@@ -163,26 +134,22 @@ if (!is.null(group_cols)) {
                 height=length(unique(data[[group_col1]])) * 200,
                 res=100
             )
-            print(wrap_plots(ps, ncol=2))
+            print(p)
             dev.off()
             # Do the same for Voilin + boxplot
-            ps = data %>%
-                group_by(!!sym(group_col1)) %>%
-                group_map(function(.x, .y) {
-                    p = ggplot(.x, aes(x=!!sym(group_col2), y=tMAD)) +
-                        geom_boxplot(outlier.shape=NA, fill="white", color="black") +
-                        geom_violin(fill="steelblue", alpha=0.5) +
-                        theme_prism() +
-                        theme(
-                            axis.title.x = element_text(size=12),
-                            axis.title.y = element_text(size=12),
-                            axis.text.y = element_text(size=12),
-                        ) +
-                        labs(x = group_col2, y = "tMAD") +
-                        ggtitle(.y[[group_col1]][1])
-                    p
-                })
+            p <- ViolinPlot(
+                data = gdata,
+                x = group_col2,
+                y = "tMAD",
+                split_by = group_col1,
+                x_text_angle = 90,
+                add_box = TRUE,
+                add_point = TRUE,
+                comparisons = TRUE,
+                sig_label = "p.format",
+                ncol = 2
+            )
             png(
                 file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")),
@@ -190,7 +157,7 @@ if (!is.null(group_cols)) {
                 height=length(unique(data[[group_col1]])) * 200,
                 res=100
             )
-            print(wrap_plots(ps, ncol=2))
+            print(p)
             dev.off()
         }
     }

biopipen/scripts/cnvkit/CNVkitGuessBaits.py CHANGED Viewed

@@ -5,10 +5,10 @@ from pathlib import Path, PosixPath  # for as_path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-bamfiles = {{in.bamfiles | repr}}  # pyright: ignore  # noqa
-atfile = {{in.atfile | repr}}  # pyright: ignore
+bamfiles = {{in.bamfiles | each: str | repr}}  # pyright: ignore  # noqa
+atfile = {{in.atfile | quote}}  # pyright: ignore
-targetfile = {{out.targetfile | repr}}  # pyright: ignore
+targetfile = {{out.targetfile | quote}}  # pyright: ignore
 covfile = {{out.targetfile | as_path | attr: "with_suffix" | call: ".cnn" | repr}}  # pyright: ignore
 cnvkit: str = {{envs.cnvkit | repr}}  # pyright: ignore

biopipen/scripts/cnvkit/CNVkitHeatmap.py CHANGED Viewed

@@ -1,12 +1,12 @@
 from pathlib import Path
-from diot import Diot
+from diot import Diot  # type: ignore[import]
 from biopipen.utils.misc import run_command, dict_to_cli_args
-segfiles = {{in.segfiles | repr}}  # pyright: ignore # noqa  # noqa
+segfiles = {{in.segfiles | default: [] | each: str | repr}}  # pyright: ignore # noqa  # noqa
 sample_sex = {{in.sample_sex | repr}}  # pyright: ignore
-outdir: str = {{out.outdir | repr}}  # pyright: ignore
+outdir: str = {{out.outdir | quote}}  # pyright: ignore
 cnvkit = {{envs.cnvkit | quote}}  # pyright: ignore
 convert = {{envs.convert | quote}}  # pyright: ignore
 convert_args = {{envs.convert_args | repr}}  # pyright: ignore

biopipen/scripts/cnvkit/CNVkitReference.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from pathlib import Path
 from biopipen.utils.misc import run_command, dict_to_cli_args
-covfiles = {{in.covfiles | repr}}  # pyright: ignore  # noqa
-target_file = {{in.target_file | repr}}  # pyright: ignore
-antitarget_file = {{in.antitarget_file | repr}}  # pyright: ignore
+covfiles = {{in.covfiles | default: [] | each: str | repr}}  # pyright: ignore  # noqa
+target_file = {{in.target_file | quote: quote_none=False}}  # pyright: ignore
+antitarget_file = {{in.antitarget_file | quote: quote_none=False}}  # pyright: ignore
 sample_sex = {{in.sample_sex | repr}}  # pyright: ignore
 outfile = {{out.outfile | quote}}  # pyright: ignore
 reffile: str = {{envs.ref | quote}}  # pyright: ignore

biopipen/scripts/delim/RowsBinder.R CHANGED Viewed

@@ -1,4 +1,4 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
+library(biopipen.utils)
 infiles <- {{in.infiles | r}}
 outfile <- {{out.outfile | r}}

biopipen/scripts/delim/SampleInfo.R CHANGED Viewed

@@ -1,5 +1,6 @@
 library(rlang)
 library(dplyr)
+library(gglogger)
 library(biopipen.utils)
 library(plotthis)
@@ -132,7 +133,7 @@ if (length(stats) > 0) {
             case$data <- mutdata
         }
-        p <- do_call(gglogger::register(plot_fn, name = plot_type), case)
+        p <- do_call(plot_fn, case)
         save_plot(p, info$prefix, devpars, formats = more_formats)
         if (save_code) {
             save_plotcode(
@@ -155,3 +156,5 @@ if (length(stats) > 0) {
         )
     }
 }
+reporter$save(joboutdir)

biopipen/scripts/gene/GeneNameConversion.R CHANGED Viewed

@@ -1,8 +1,7 @@
-{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
+library(biopipen.utils)
-infile <- {{in.infile | quote}}
-outfile <- {{out.outfile | quote}}
+infile <- {{in.infile | r}}
+outfile <- {{out.outfile | r}}
 notfound <- {{envs.notfound | r}}
 genecol <- {{envs.genecol | r}}
 output <- {{envs.output | r}}
@@ -11,6 +10,8 @@ infmt <- {{envs.infmt | r}}
 outfmt <- {{envs.outfmt | r}}
 species <- {{envs.species | r}}
+log <- get_logger()
 if (is.na(notfound)) {
     notfound = "na"
 }
@@ -18,7 +19,7 @@ if (is.na(notfound)) {
 df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
 if (genecol == 0) {
-    log_warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
+    log$warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
     genecol <- 1
 }
@@ -27,12 +28,13 @@ if (dup == "combine") { dup <- ";" }
 genes <- df[[genecol]]
 converted <- gene_name_conversion(
-    genes=genes,
-    species=species,
-    infmt=infmt,
-    outfmt=outfmt,
-    notfound=notfound,
-    dup=dup
+    genes = genes,
+    species = species,
+    infmt = infmt,
+    outfmt = outfmt,
+    notfound = notfound,
+    dup = dup,
+    suppress_messages = FALSE
 )
 #    <genecol> <outfmt>
 # 1  1255_g_at   GUCA1A
@@ -50,7 +52,7 @@ if (notfound == "skip" || notfound == "ignore") {
 if (output == "append") {
     if (outfmt %in% colnames(df)) {
-        log_warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
+        log$warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
         outcol <- paste(outfmt, "_1", sep="")
     }
     df[[outcol]] <- converted[[outfmt]]

biopipen/scripts/gsea/Enrichr.R CHANGED Viewed

@@ -2,8 +2,8 @@
 {{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
 {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
-infile = {{in.infile | quote}}
-outdir = {{out.outdir | quote}}
+infile = {{in.infile | r}}
+outdir = {{out.outdir | r}}
 genecol = {{envs.genecol | r}}
 genename = {{envs.genename | r}}
 dbs = {{envs.dbs | r}}

biopipen/scripts/gsea/FGSEA.R CHANGED Viewed

@@ -1,58 +1,192 @@
-# PreRank the genes for GSEA analysis
-# See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
-{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
-{{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
-infile = {{in.infile | quote}}
-metafile = {{in.metafile | quote}}
-gmtfile = {{in.gmtfile | quote}}
-{% if in.configfile %}
-config = {{in.config | read | toml_loads | r}}
-{% else %}
-config = list()
-{% endif %}
-outdir = {{out.outdir | quote}}
-envs = {{envs | r}}
-clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
-classes <- if (is.null(config$classes)) envs$classes else config$classes
-if (is.null(clscol)) {
-    stop("No `clscol` specified.")
-}
+library(rlang)
+library(biopipen.utils)
-if (is.null(classes) || length(classes) != 2) {
-    stop(paste("`classes` must be a pair of labels."))
-}
+# input & output
+infile = {{in.infile | r}}
+metafile = {{in.metafile | r}}
+outdir = {{out.outdir | r}}
+joboutdir = {{job.outdir | r}}
+# envs
+ncores = {{envs.ncores | r}}
+case = {{envs.case | r}}
+control = {{envs.control | r}}
+gmtfile = {{envs.gmtfile | r}}
+method = {{envs.method | r}}
+clscol = {{envs.clscol | r}}
+top = {{envs.top | r}}
+eps = {{envs.eps | r}}
+minsize = {{envs.minSize | default: envs.minsize | r}}
+maxsize = {{envs.maxSize | default: envs.maxsize | r}}
+rest = {{envs.rest | r}}
+cases = {{envs.cases | r}}
+log <- get_logger()
+reporter <- get_reporter()
+defaults <- list(
+    case = case,
+    control = control,
+    gmtfile = gmtfile,
+    method = method,
+    clscol = clscol,
+    top = top,
+    eps = eps,
+    minsize = minsize,
+    maxsize = maxsize,
+    rest = rest
+)
+cases <- expand_cases(cases, defaults, default_case = "GSEA")
+log$info("Reading input file ...")
+indata <- read.table(infile, header=TRUE, stringsAsFactors=FALSE, row.names=1, sep="\t", quote="", check.names=FALSE)
-if (is.character(envs$inopts) && inopts == "rds") {
-    indata = readRDS(infile)
+if (!is.null(metafile)) {
+    log$info("Reading metadata file ...")
+    metadata <- read.table(metafile, header=TRUE, stringsAsFactors=FALSE, row.names=NULL, sep="\t", quote="", check.names=FALSE)
 } else {
-    indata = read.table.opts(infile, envs$inopts)
+    metadata <- NULL
 }
-metadata = read.table.opts(metafile, envs$metaopts)
-allclasses = metadata[colnames(indata), clscol]
+do_case <- function(name) {
+    log$info("Processing case: {name} ...")
+    case <- cases[[name]]
+    info <- case_info(name, outdir, create = TRUE)
-ranks = prerank(indata, classes[1], classes[2], allclasses, envs$method)
+    if (is.null(case$case) && is.null(case$control)) {
+        stop("Either `case` or `control` must be specified in the case.")
+    }
+    if (is.null(case$gmtfile)) {
+        stop("`gmtfile` must be specified in the case.")
+    }
+    if (is.null(case$clscol)) {
+        stop("`clscol` must be specified in the case.")
+    }
+    if (!is.null(metadata) && length(case$clscol) > 1) {
+        stop("When `in.metafile` is specified, `envs.clscol` must be a single column name.")
+    }
+    if (!is.null(metadata)) {
+        samples <- colnames(indata)
+        if (!"Sample" %in% colnames(metadata)) {
+            colnames(metadata)[1] <- "Sample"
+        }
+        metadata <- metadata[match(samples, metadata$Sample), , drop=FALSE]
+        case$clscol <- as.character(metadata[[case$clscol]])
+    }
+    if (length(unique(case$clscol)) < 2) {
+        stop("The `clscol` must have at least two unique values.")
+    }
+    if (length(unique(case$clscol)) == 2) {
+        case$case <- case$case %||% setdiff(unique(case$clscol), case$control)
+        case$control <- case$control %||% setdiff(unique(case$clscol), case$case)
+    } else {
+        if (is.null(case$case) || is.null(case$control)) {
+            stop("When `clscol` has more than two unique values, both `case` and `control` must be specified.")
+        }
+    }
+    log$info("- Running pre-ranking ...")
+    ranks <- RunGSEAPreRank(
+        indata,
+        classes = case$clscol,
+        case = case$case,
+        control = case$control,
+        method = case$method
+    )
+    if (all(is.na(ranks))) {
+        if (length(case$clscol) < 10) {
+            log$warn("  Ignoring this case because all gene ranks are NA and there are <10 samples.")
+            reporter$add2(
+                list(
+                    kind = "error",
+                    content = paste0("Not enough samples (n = ", length(case$clscol), ") to run fgsea.")
+                ),
+                hs = c(info$section, info$name)
+            )
+            return(NULL)
+        } else {
+            stop(paste0(
+                "All gene ranks are NA (# samples = ",
+                length(case$clscol),
+                "). ",
+                "It's probably due to high missing rate in the data. ",
+                "You may want to try a different `envs$method` for pre-ranking."
+            ))
+        }
+    }
-write.table(
-    ranks,
-    file.path(outdir, "fgsea.rank"),
-    row.names=F,
-    col.names=T,
-    sep="\t",
-    quote=F
-)
+    log$info("- Running GSEA ...")
+    case$rest$ranks <- ranks
+    case$rest$genesets <- ParseGMT(case$gmtfile)
+    case$rest$minSize <- case$rest$minSize %||% case$rest$minsize %||% case$minsize
+    case$rest$maxSize <- case$rest$maxSize %||% case$rest$maxsize %||% case$maxsize
+    case$rest$eps <- case$eps
+    case$rest$nproc <- case$ncores
+    case$rest$minsize <- NULL
+    case$rest$maxsize <- NULL
+    result <- do_call(RunGSEA, case$rest)
+    write.table(
+        result,
+        file.path(info$prefix, "fgsea.tsv"),
+        row.names = FALSE,
+        col.names = TRUE,
+        sep = "\t",
+        quote = FALSE
+    )
+    p_summary <- VizGSEA(
+        result,
+        plot_type = "summary",
+        top_term = case$top
+    )
+    save_plot(
+        p_summary,
+        file.path(info$prefix, "summary"),
+        devpars = list(res = 100, height = attr(p_summary, "height") * 100, width = attr(p_summary, "width") * 100),
+        formats = "png"
+    )
+    p_gsea <- VizGSEA(
+        result,
+        plot_type = "gsea",
+        gs = result$pathway[1:min(case$top, nrow(result))]
+    )
+    save_plot(
+        p_gsea,
+        file.path(info$prefix, "pathways"),
+        devpars = list(res = 100, height = attr(p_gsea, "height") * 100, width = attr(p_gsea, "width") * 100),
+        formats = "png"
+    )
+    reporter$add2(
+        list(
+            name = "Table",
+            contents = list(
+                list(kind = "descr", content = paste0(
+                    "Showing top 50 pathways by padj in descending order. ",
+                    "Use 'Download the entire data' button to download all pathways."
+                )),
+                list(kind = "table", src = file.path(info$prefix, "fgsea"), data = list(nrows = 50))
+            )
+        ),
+        list(
+            name = "Summary Plot",
+            contents = list(
+                list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
+                list(kind = "image", src = file.path(info$prefix, "summary.png"))
+            )
+        ),
+        list(
+            name = "GSEA Plots",
+            contents = list(
+                list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
+                list(kind = "image", src = file.path(info$prefix, "pathways.png"))
+            )
+        ),
+        hs = c(info$section, info$name),
+        ui = "tabs"
+    )
+}
-top = envs$top
-envs$nproc = envs$ncores
-envs$inopts = NULL
-envs$metaopts = NULL
-envs$method = NULL
-envs$clscol = NULL
-envs$classes = NULL
-envs$ncores = NULL
-envs$top = NULL
-# the rest are the arguments for `fgsea()`
-runFGSEA(ranks, gmtfile, top, outdir, envs)
+sapply(names(cases), do_case)
+reporter$save(joboutdir)

biopipen/scripts/gsea/PreRank.R CHANGED Viewed

@@ -3,14 +3,14 @@
 {{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
 {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
-infile = {{in.infile | quote}}
-metafile = {{in.metafile | quote}}
+infile = {{in.infile | r}}
+metafile = {{in.metafile | r}}
 {% if in.configfile %}
 config = {{in.config | read | toml_loads | r}}
 {% else %}
 config = list()
 {% endif %}
-outfile = {{out.outfile | quote}}
+outfile = {{out.outfile | r}}
 envs = {{envs | r}}
 clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
 classes <- if (is.null(config$classes)) envs$classes else config$classes

biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

Potentially problematic release.

biopipen 0.33.1py3-none-any.whl → 0.34.0py3-none-any.whl