biopipen 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +307 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +14 -2
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  73. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  74. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  75. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  76. biopipen/scripts/scrna/RadarPlots.R +1 -1
  77. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  78. biopipen/scripts/scrna/ScSimulation.R +11 -10
  79. biopipen/scripts/scrna/ScVelo.py +605 -0
  80. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  81. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  82. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  83. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  84. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  85. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  86. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  87. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  88. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  89. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  90. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  91. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  92. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  93. biopipen/scripts/scrna/Subset10X.R +2 -2
  94. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  95. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  96. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  99. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  100. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  101. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  102. biopipen/scripts/snp/PlinkFreq.R +34 -41
  103. biopipen/scripts/snp/PlinkHWE.R +23 -18
  104. biopipen/scripts/snp/PlinkHet.R +26 -22
  105. biopipen/scripts/snp/PlinkIBD.R +30 -34
  106. biopipen/scripts/stats/ChowTest.R +9 -8
  107. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  108. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  109. biopipen/scripts/stats/Mediation.R +8 -8
  110. biopipen/scripts/stats/MetaPvalue.R +11 -13
  111. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  112. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  113. biopipen/scripts/tcr/ClonalStats.R +5 -4
  114. biopipen/scripts/tcr/CloneResidency.R +3 -3
  115. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  116. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  117. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  118. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  119. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  120. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  121. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  122. biopipen/scripts/tcr/TCRClustering.R +86 -97
  123. biopipen/scripts/tcr/TESSA.R +65 -115
  124. biopipen/scripts/tcr/VJUsage.R +5 -5
  125. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  126. biopipen/utils/common_docstrs.py +66 -63
  127. biopipen/utils/reporter.py +177 -0
  128. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  129. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/RECORD +131 -144
  130. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  131. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.0.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,165 +1,471 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
3
-
4
- library(parallel)
1
+ library(rlang)
5
2
  library(Seurat)
3
+ library(biopipen.utils)
4
+ library(enrichit)
5
+ library(tidyseurat)
6
6
 
7
7
  sobjfile <- {{ in.sobjfile | r }}
8
8
  outdir <- {{ out.outdir | r }}
9
- joboutdir <- {{ job.outdir | r }}
10
- gmtfile <- {{ envs.gmtfile | r }}
11
9
  ncores <- {{ envs.ncores | r }}
12
- fgsea <- {{ envs.fgsea | r }}
13
- top <- {{ envs.top | r }}
14
10
  prerank_method <- {{ envs.prerank_method | r }}
15
- grouping <- {{ envs.grouping | r }}
16
- grouping_prefix <- {{ envs.grouping_prefix | r }}
17
- subsetting_cols <- {{ envs.subsetting | r }}
18
- subsetting_prefix <- {{ envs.subsetting_prefix | r }}
11
+ gmtfile <- {{ envs.gmtfile | r }}
12
+ subset_by <- {{ envs.subset_by | r }}
13
+ group_by <- {{ envs.group_by | r }}
14
+ comparisons <- {{ envs.comparisons | r }}
15
+ fgsea_args <- {{ envs.fgsea_args | r }}
16
+ plots <- {{ envs.plots | r }}
17
+ cases <- {{ envs.cases | r }}
19
18
 
20
- if (!is.null(grouping_prefix) && nchar(grouping_prefix) > 0) {
21
- grouping_prefix = paste0(grouping_prefix, "_")
22
- }
19
+ set.seed(8525)
23
20
 
24
- if (!is.null(subsetting_prefix) && nchar(subsetting_prefix) > 0) {
25
- subsetting_prefix = paste0(subsetting_prefix, "_")
26
- }
21
+ log <- get_logger()
22
+ reporter <- get_reporter()
27
23
 
28
- set.seed(8525)
24
+ log$info("Loading Seurat object ...")
25
+ sobj <- read_obj(sobjfile)
29
26
 
30
- ## gmt_pathways is copied from fgsea package.
31
- gmt_pathways <- function(gmt_file) {
32
- pathway_lines <- strsplit(readLines(gmt_file), "\t")
33
- pathways <- lapply(pathway_lines, tail, -2)
34
- names(pathways) <- sapply(pathway_lines, head, 1)
35
- pathways
36
- }
27
+ defaults <- list(
28
+ prerank_method = prerank_method,
29
+ subset_by = subset_by,
30
+ group_by = group_by,
31
+ comparisons = comparisons,
32
+ fgsea_args = fgsea_args,
33
+ plots = plots
34
+ )
35
+ log$info("Expanding cases ...")
36
+ default_case <- subset_by %||% "DEFAULT"
37
+ cases <- expand_cases(
38
+ cases,
39
+ defaults,
40
+ function(name, case) {
41
+ if (is.null(case$group_by)) {
42
+ stop("'group_by' is required in case: ", name)
43
+ }
44
+ stats::setNames(list(case), name)
45
+ },
46
+ default_case = default_case)
37
47
 
38
- gmtfile <- localizeGmtfile(gmtfile)
39
- pathways <- gmt_pathways(gmtfile)
48
+ log$info("Loading metabolic pathways ...")
49
+ pathways <- ParseGMT(gmtfile)
50
+ pathway_names <- names(pathways)
40
51
  metabolics <- unique(as.vector(unname(unlist(pathways))))
41
- sobj <- readRDS(sobjfile)
42
52
 
43
- do_one_group <- function(obj, features, group, outputdir, h1) {
44
- log_info(paste("- Processing group", grouping, ":", group))
45
- groupname = paste0(grouping_prefix, group)
46
- odir = file.path(outputdir, slugify(groupname))
47
- dir.create(odir, showWarnings = FALSE)
48
53
 
49
- classes = as.character(obj@meta.data[[grouping]])
50
- classes[classes != group] <- "_REST"
51
- classes[classes == group] <- groupname
54
+ do_comparison <- function(object, caseinfo, subset_by, subset_val, group_by, group1, group2, prerank_method, plots, fgsea_args) {
55
+ log$info(" {group_by}: {group1} vs {group2} ...")
56
+ if (!is.null(group2)) {
57
+ # object <- subset(object, subset = !!sym(group_by) %in% c(group1, group2))
58
+ object <- tryCatch(
59
+ filter(object, !!sym(group_by) %in% c(group1, group2)),
60
+ error = function(e) NULL
61
+ )
62
+ }
63
+
64
+ if (!is.null(subset_by)) {
65
+ if (length(cases) == 1 && identical(caseinfo$name, subset_by)) {
66
+ # No need to show the case name in report
67
+ h1 <- paste0(subset_by, ": ", subset_val)
68
+ h2 <- paste0(group_by, ": ", group1, " vs ", group2 %||% "REST")
69
+ h3 <- "#"
70
+ } else {
71
+ h1 <- caseinfo$name
72
+ h2 <- paste0(subset_by, ": ", subset_val)
73
+ h3 <- paste0(group_by, ": ", group1, " vs ", group2 %||% "REST")
74
+ }
75
+ odir <- file.path(caseinfo$prefix, slugify(paste0(subset_by, "_", subset_val)))
76
+ } else if (length(cases) > 1) {
77
+ h1 <- caseinfo$name
78
+ h2 <- "#"
79
+ h3 <- paste0(group_by, ": ", group1, " vs ", group2 %||% "REST")
80
+ odir <- file.path(caseinfo$prefix, "No_Subsetting")
81
+ } else {
82
+ h1 <- paste0(group_by, ": ", group1, " vs ", group2 %||% "REST")
83
+ h2 <- "#"
84
+ h3 <- "#"
85
+ odir <- caseinfo$prefix
86
+ }
87
+
88
+ if (is.null(object) || ncol(object) < 10) {
89
+ msg <- paste0(" ! skipped. Groups together have less than 10 cells: ", group_by, " = ", group1, " vs ", group2)
90
+ log$warn(msg)
91
+ reporter$add(
92
+ list(kind = "error", content = msg),
93
+ h1 = h1,
94
+ h2 = h2,
95
+ h3 = h3
96
+ )
97
+ return(invisible())
98
+ }
99
+
100
+ classes <- as.character(object@meta.data[[group_by]])
101
+ classes[classes != group1] <- "_REST"
52
102
  if (any(table(classes) < 5)) {
53
- msg <- paste(" Skipped. One of the groups has less than 5 cells.")
54
- log_warn(msg)
55
- # write a warning.txt to odir with the message and table(classes)
56
- write(paste0(msg, "\n\n"), file = file.path(odir, "warning.txt"))
57
- write.table(
58
- table(classes),
59
- file = file.path(odir, "warning.txt"),
60
- sep = "\t",
61
- quote = FALSE,
62
- row.names = FALSE,
63
- append = TRUE
103
+ msg <- paste0(
104
+ " ! skipped. Group has less than 5 cells: ",
105
+ paste(names(table(classes)[table(classes) < 5]), collapse = ", ")
64
106
  )
65
- return(
66
- list(
67
- list(kind = "error", content = msg),
68
- h1 = ifelse(is.null(h1), groupname, h1),
69
- h2 = ifelse(is.null(h1), "#", groupname)
70
- )
107
+ log$warn(msg)
108
+
109
+ reporter$add(
110
+ list(kind = "error", content = msg),
111
+ h1 = h1,
112
+ h2 = h2,
113
+ h3 = h3
71
114
  )
115
+ return(invisible())
116
+ }
117
+
118
+ features = intersect(rownames(object), metabolics)
119
+ ranks <- RunGSEAPreRank(
120
+ GetAssayData(object)[features, , drop = FALSE],
121
+ classes = object@meta.data[[group_by]],
122
+ case = group1,
123
+ control = group2,
124
+ method = prerank_method
125
+ )
126
+
127
+ fgsea_args <- fgsea_args %||% list()
128
+ fgsea_args$ranks <- ranks
129
+ fgsea_args$genesets <- pathways
130
+ fgsea_args$nproc <- fgsea_args$nproc %||% ncores
131
+ result <- do_call(RunGSEA, fgsea_args)
132
+
133
+ if (is.null(group2)) {
134
+ odir <- file.path(odir, slugify(paste0(group_by, "_", group1, "_vs_REST")))
135
+ } else {
136
+ odir <- file.path(odir, slugify(paste0(group_by, "_", group1, "_vs_", group2)))
72
137
  }
73
138
 
74
- exprs = GetAssayData(obj)[features, , drop = FALSE]
75
- tryCatch({
76
- if (fgsea) {
77
- runFGSEA(
78
- prerank(exprs, groupname, "_REST", classes, method = prerank_method),
79
- gmtfile,
80
- top = top,
81
- outdir = odir
139
+ dir.create(odir, showWarnings = FALSE, recursive = TRUE)
140
+ write.table(as.data.frame(result), file = file.path(odir, "fgsea_results.txt"), sep = "\t", quote = FALSE, row.names = FALSE)
141
+ write.table(data.frame(Gene = names(ranks), Rank = ranks), file = file.path(odir, "fgsea_ranks.txt"), sep = "\t", quote = FALSE, row.names = FALSE)
142
+
143
+ reporter$add(
144
+ list(kind = "descr", content = "A summary table of the GSEA results"),
145
+ list(kind = "table", src = file.path(odir, "fgsea_results.txt")),
146
+ h1 = h1,
147
+ h2 = h2,
148
+ h3 = h3
149
+ )
150
+
151
+ for (plot in names(plots)) {
152
+ plotargs <- plots[[plot]]
153
+ plotargs$level <- plotargs$level %||% "group"
154
+ if (plotargs$level != "group") { next }
155
+ plotargs$devpars <- plotargs$devpars %||% list()
156
+ plotargs$devpars$res <- plotargs$devpars$res %||% 100
157
+
158
+ if (identical(plotargs$plot_type, "summary")) {
159
+ p <- do_call(VizGSEA, c(list(result), plotargs))
160
+ plotprefix <- file.path(odir, slugify(plot))
161
+ plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
162
+ plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
163
+ png(
164
+ filename = paste0(plotprefix, ".png"),
165
+ width = plotargs$devpars$width,
166
+ height = plotargs$devpars$height,
167
+ res = plotargs$devpars$res
168
+ )
169
+ print(p)
170
+ dev.off()
171
+
172
+ reporter$add(
173
+ list(
174
+ name = plot,
175
+ contents = list(
176
+ list(kind = "descr", content = plotargs$descr %||% plot),
177
+ reporter$image(plotprefix, "png", FALSE, kind = "image")
178
+ )
179
+ ),
180
+ h1 = h1,
181
+ h2 = h2,
182
+ h3 = h3,
183
+ ui = "tabs"
82
184
  )
83
185
  } else {
84
- runGSEA(
85
- exprs,
86
- classes,
87
- gmtfile,
88
- odir
186
+ plotargs$combine = FALSE
187
+ plotargs$top_term = plotargs$top_term %||% 10
188
+ plotargs$gs <- result$pathway[1:plotargs$top_term]
189
+
190
+ ps <- do_call(VizGSEA, c(list(result), plotargs))
191
+ plotprefix <- file.path(odir, slugify(plot))
192
+ devpars <- plotargs$devpars
193
+ images <- list()
194
+ for (pname in names(ps)) {
195
+ p <- ps[[pname]]
196
+ devpars$width <- devpars$width %||% (attr(p, "width") * devpars$res) %||% 800
197
+ devpars$height <- devpars$height %||% (attr(p, "height") * devpars$res) %||% 600
198
+ prefix <- paste0(plotprefix, ".", slugify(pname))
199
+ images[[length(images) + 1]] <- reporter$image(prefix, c(), FALSE, kind = "table_image")
200
+ png(
201
+ filename = paste0(prefix, ".png"),
202
+ width = devpars$width,
203
+ height = devpars$height,
204
+ res = devpars$res
205
+ )
206
+ print(p)
207
+ dev.off()
208
+ }
209
+
210
+ reporter$add(
211
+ list(
212
+ name = plot,
213
+ ui = "table_of_images:2",
214
+ contents = images
215
+ ),
216
+ h1 = h1,
217
+ h2 = h2,
218
+ h3 = h3,
219
+ ui = "tabs"
89
220
  )
90
221
  }
222
+ }
223
+ result$comparison <- paste0(group1, " vs ", group2 %||% "REST")
224
+ return(result)
225
+ }
91
226
 
92
- # Can't add report directly, mclapply can't modify global variables.
93
- report = list(
94
- list(kind = "fgsea", dir = odir),
95
- h1 = ifelse(is.null(h1), groupname, h1),
96
- h2 = ifelse(is.null(h1), "#", groupname)
97
- )
98
- }, error=function(e) {
99
- unlink(odir, recursive = T, force = T)
100
- log_warn(paste("Unable to run for", group))
101
- log_warn(e$message)
102
-
103
- report = list(
104
- list(
105
- kind = "error",
106
- content = paste0("Error running GSEA for ", group, ": ", e$message)
107
- ),
108
- h1 = ifelse(is.null(h1), groupname, h1),
109
- h2 = ifelse(is.null(h1), "#", groupname)
227
+
228
+ do_subset <- function(object, caseinfo, subset_by, subset_val, group_by, comparisons, prerank_method, plots, fgsea_args) {
229
+ if (!is.null(subset_by)) {
230
+ log$info("- Handling subset: {subset_by} = {subset_val} ...")
231
+ # object <- subset(object, subset = !!sym(subset_by) == subset_val)
232
+ object <- tryCatch(
233
+ filter(object, !!sym(subset_by) == subset_val & !is.na(!!sym(group_by))),
234
+ error = function(e) NULL
110
235
  )
111
- })
112
236
 
113
- report
114
- }
237
+ if (is.null(object) || ncol(object) < 5) {
238
+ if (length(cases) == 1 && identical(caseinfo$name, subset_by)) {
239
+ # No need to show case name in report
240
+ h1 <- paste0(subset_by, ": ", subset_val)
241
+ h2 <- "#"
242
+ } else {
243
+ h1 <- caseinfo$name
244
+ h2 <- paste0(subset_by, ": ", subset_val)
245
+ }
115
246
 
116
- do_one_subset <- function(s, subset_col, subset_prefix) {
117
- log_info(paste("Processing subset", subset_col, ":", s))
118
- if (is.null(s)) {
119
- outputdir <- file.path(outdir, "ALL")
120
- subset_obj <- sobj
247
+ msg <- paste0(" ! skipped. Subset has less than 5 cells: ", subset_by, " = ", subset_val)
248
+ log$warn(msg)
249
+ reporter$add(list(kind = "error", content = msg), h1 = h1)
250
+ return(NULL)
251
+ }
252
+ }
253
+
254
+ groups <- unique(object@meta.data[[group_by]])
255
+ if (length(comparisons) == 0) {
256
+ result <- do_call(
257
+ rbind, lapply(
258
+ as.character(groups),
259
+ function(group) {
260
+ do_comparison(object, caseinfo, subset_by, subset_val, group_by, group, NULL, prerank_method, plots, fgsea_args)
261
+ }
262
+ )
263
+ )
121
264
  } else {
122
- outputdir <- file.path(outdir, slugify(paste0(subset_prefix, s)))
123
- subset_code <- paste0("subset(sobj, subset = ", subset_col, "=='", s, "')")
124
- subset_obj <- eval(parse(text = subset_code))
265
+ result <- do_call(
266
+ rbind, lapply(
267
+ as.character(comparisons),
268
+ function(comparison) {
269
+ if (grepl(",", comparison)) {
270
+ group1 <- trimws(unlist(strsplit(comparison, ",")))
271
+ group2 <- group1[2]
272
+ group1 <- group1[1]
273
+ } else {
274
+ group1 <- comparison
275
+ group2 <- NULL
276
+ }
277
+ do_comparison(object, caseinfo, subset_by, subset_val, group_by, group1, group2, prerank_method, plots, fgsea_args)
278
+ }
279
+ )
280
+ )
125
281
  }
126
- dir.create(outputdir, showWarnings = FALSE)
127
282
 
128
- # subset_obj <- subset(subset_obj, features = intersect(rownames(subset_obj), metabolics))
129
- features = intersect(rownames(subset_obj), metabolics)
283
+ result[["-log10(pval)"]] <- -log10(result$pval)
284
+ result[["-log10(padj)"]] <- -log10(result$padj)
130
285
 
131
- h1 <- NULL
132
- if (!is.null(s)) {
133
- h1 <- paste0(subset_prefix, s)
286
+ odir <- NULL
287
+ if (!is.null(subset_by)) {
288
+ if (length(cases) == 1 && identical(caseinfo$name, subset_by)) {
289
+ # No need to show case name in report
290
+ h1 <- paste0(subset_by, ": ", subset_val)
291
+ h2 <- "Summary plots for all comparisons"
292
+ h3 <- "#"
293
+ } else {
294
+ h1 <- caseinfo$name
295
+ h2 <- paste0(subset_by, ": ", subset_val)
296
+ h3 <- "Summary plots for all comparisons"
297
+ }
298
+ odir <- file.path(caseinfo$prefix, slugify(paste0(subset_by, "_", subset_val)))
299
+ } else if (length(cases) > 1) {
300
+ h1 <- caseinfo$name
301
+ h2 <- "Summary plots for all comparisons"
302
+ h3 <- "#"
303
+ odir <- file.path(caseinfo$prefix, "No_Subsetting")
134
304
  }
135
- groups = subset_obj@meta.data[[grouping]]
136
- x = mclapply(as.character(unique(groups)), function(group) {
137
- do_one_group(subset_obj, features, group, outputdir, h1)
138
- }, mc.cores = ncores)
139
- if (any(unlist(lapply(x, class)) == "try-error")) {
140
- stop("mclapply error")
305
+
306
+ if (!is.null(odir)) {
307
+ dir.create(odir, showWarnings = FALSE, recursive = TRUE)
141
308
  }
142
- for (r in x) {
143
- if (!is.null(r)) {
144
- do.call(add_report, r)
309
+
310
+ for (plot in names(plots)) {
311
+ plotargs <- plots[[plot]]
312
+ plotargs$level <- plotargs$level %||% "group"
313
+ if (plotargs$level != "subset") { next }
314
+ if (is.null(odir)) {
315
+ stop("'subset_by' is NULL but plot level is 'subset': ", plot, ", use level = 'case' instead.")
145
316
  }
317
+ plotargs$devpars <- plotargs$devpars %||% list()
318
+ plotargs$devpars$res <- plotargs$devpars$res %||% 100
319
+ plotargs$plot_type <- plotargs$plot_type %||% "dot"
320
+
321
+ if (identical(plotargs$plot_type, "dot")) {
322
+ plotargs$x <- plotargs$x %||% "comparison"
323
+ plotargs$y <- plotargs$y %||% "pathway"
324
+ plotargs$size_by <- plotargs$size_by %||% "NES"
325
+ plotargs$fill_by <- plotargs$fill_by %||% "-log10(padj)"
326
+ plotargs$fill_cutoff <- plotargs$fill_cutoff %||% -log10(0.05)
327
+ plotargs$fill_cutoff_name <- plotargs$fill_cutoff_name %||% "Insignificant"
328
+ plotargs$aspect.ratio <- plotargs$aspect.ratio %||% (length(unique(result$pathway)) / length(unique(result$comparison)) / 4)
329
+ plotargs$x_text_angle <- plotargs$x_text_angle %||% 90
330
+ p <- do_call(plotthis::DotPlot, c(list(result), plotargs))
331
+ plotprefix <- file.path(odir, slugify(plot))
332
+ plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
333
+ plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
334
+ png(
335
+ filename = paste0(plotprefix, ".png"),
336
+ width = plotargs$devpars$width,
337
+ height = plotargs$devpars$height,
338
+ res = plotargs$devpars$res
339
+ )
340
+ print(p)
341
+ dev.off()
342
+
343
+ reporter$add(
344
+ list(kind = "descr", content = plotargs$descr %||% plot),
345
+ reporter$image(plotprefix, "png", FALSE, kind = "image"),
346
+ h1 = h1,
347
+ h2 = h2,
348
+ h3 = h3
349
+ )
350
+ } else {
351
+ stop("`subset` level plot type not supported yet: ", plotargs$plot_type)
352
+ }
353
+ }
354
+ if (!is.null(subset_by)) {
355
+ result[[subset_by]] <- subset_val
146
356
  }
357
+
358
+ return(result)
147
359
  }
148
360
 
149
- do_one_subset_col <- function(subset_col, subset_prefix) {
150
- if (is.null(subset_col)) {
151
- do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
361
+
362
+ do_case <- function(casename) {
363
+ log$info("Processing case: {casename} ...")
364
+ case <- cases[[casename]]
365
+ caseinfo <- case_info(casename, outdir, create = TRUE)
366
+
367
+ if (is.null(case$subset_by)) {
368
+ result <- do_subset(
369
+ sobj,
370
+ caseinfo,
371
+ subset_by = NULL,
372
+ subset_val = NULL,
373
+ group_by = case$group_by,
374
+ comparisons = case$comparisons,
375
+ prerank_method = case$prerank_method,
376
+ plots = case$plots,
377
+ fgsea_args = case$fgsea_args
378
+ )
379
+ } else {
380
+ sobj_avail <- filter(sobj, !is.na(!!sym(case$subset_by)))
381
+ subsets <- if (is.factor(sobj_avail@meta.data[[case$subset_by]])) {
382
+ levels(sobj_avail@meta.data[[case$subset_by]])
383
+ } else {
384
+ unique(sobj_avail@meta.data[[case$subset_by]])
385
+ }
386
+ result <- do_call(
387
+ rbind, lapply(
388
+ as.character(subsets),
389
+ function(subset_val) {
390
+ do_subset(
391
+ sobj_avail,
392
+ caseinfo,
393
+ subset_by = case$subset_by,
394
+ subset_val = subset_val,
395
+ group_by = case$group_by,
396
+ comparisons = case$comparisons,
397
+ prerank_method = case$prerank_method,
398
+ plots = case$plots,
399
+ fgsea_args = case$fgsea_args
400
+ )
401
+ }
402
+ )
403
+ )
404
+ result[[case$subset_by]] <- factor(result[[case$subset_by]], levels = subsets)
152
405
  }
153
- subsets <- na.omit(unique(sobj@meta.data[[subset_col]]))
154
- lapply(subsets, do_one_subset, subset_col = subset_col, subset_prefix = subset_prefix)
155
- }
406
+ result$pathway <- factor(result$pathway, levels = pathway_names)
156
407
 
157
- if (is.null(subsetting_cols)) {
158
- do_one_subset_col(NULL)
159
- } else {
160
- for (i in seq_along(subsetting_cols)) {
161
- do_one_subset_col(subsetting_cols[i], subsetting_prefix[i])
408
+ if (!is.null(case$subset_by)) {
409
+ if (length(cases) == 1 && identical(caseinfo$name, case$subset_by)) {
410
+ h1 <- "Summary plots for all subsets"
411
+ h2 <- "#"
412
+ } else {
413
+ h1 <- caseinfo$name
414
+ h2 <- "Summary plots for all subsets"
415
+ }
416
+ } else if (length(cases) > 1) {
417
+ h1 <- caseinfo$name
418
+ h2 <- "Summary plots for all comparisons"
419
+ } else {
420
+ h1 <- "Summary plots for all comparisons"
421
+ h2 <- "#"
422
+ }
423
+
424
+ for (plot in names(plots)) {
425
+ plotargs <- plots[[plot]]
426
+ plotargs$level <- plotargs$level %||% "group"
427
+ if (plotargs$level != "case") { next }
428
+ plotargs$devpars <- plotargs$devpars %||% list()
429
+ plotargs$devpars$res <- plotargs$devpars$res %||% 100
430
+ plotargs$plot_type <- plotargs$plot_type %||% "dot"
431
+
432
+ if (identical(plotargs$plot_type, "dot")) {
433
+ plotargs$x <- plotargs$x %||% "comparison"
434
+ plotargs$y <- plotargs$y %||% "pathway"
435
+ plotargs$size_by <- plotargs$size_by %||% "NES"
436
+ plotargs$fill_by <- plotargs$fill_by %||% "-log10(padj)"
437
+ plotargs$fill_cutoff <- plotargs$fill_cutoff %||% -log10(0.05)
438
+ plotargs$fill_cutoff_name <- plotargs$fill_cutoff_name %||% "Insignificant"
439
+ plotargs$aspect.ratio <- plotargs$aspect.ratio %||% (length(unique(result$pathway)) / length(unique(result$comparison)) / 4)
440
+ plotargs$x_text_angle <- plotargs$x_text_angle %||% 90
441
+ if (!is.null(subset_by)) {
442
+ plotargs$split_by <- plotargs$split_by %||% subset_by
443
+ }
444
+ p <- do_call(plotthis::DotPlot, c(list(result), plotargs))
445
+ plotprefix <- file.path(caseinfo$prefix, slugify(plot))
446
+ plotargs$devpars$width <- plotargs$devpars$width %||% (attr(p, "width") * plotargs$devpars$res) %||% 800
447
+ plotargs$devpars$height <- plotargs$devpars$height %||% (attr(p, "height") * plotargs$devpars$res) %||% 600
448
+ png(
449
+ filename = paste0(plotprefix, ".png"),
450
+ width = plotargs$devpars$width,
451
+ height = plotargs$devpars$height,
452
+ res = plotargs$devpars$res
453
+ )
454
+ print(p)
455
+ dev.off()
456
+
457
+ reporter$add(
458
+ list(kind = "descr", content = plotargs$descr %||% plot),
459
+ reporter$image(plotprefix, "png", FALSE, kind = "image"),
460
+ h1 = h1,
461
+ h2 = h2
462
+ )
463
+ } else {
464
+ stop("`case` level plot type not supported yet: ", plotargs$plot_type)
465
+ }
162
466
  }
163
467
  }
164
468
 
165
- save_report(joboutdir)
469
+ sapply(names(cases), do_case)
470
+
471
+ reporter$save(dirname(outdir))