biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
@@ -15,9 +15,9 @@ library(tidyseurat)
15
15
 
16
16
  setEnrichrSite("Enrichr")
17
17
 
18
- srtfile <- {{ in.srtobj | quote }}
19
- outdir <- {{ out.outdir | quote }}
20
- joboutdir <- {{ job.outdir | quote }}
18
+ srtfile <- {{ in.srtobj | r }}
19
+ outdir <- {{ out.outdir | r }}
20
+ joboutdir <- {{ job.outdir | r }}
21
21
  ncores <- {{ envs.ncores | int }}
22
22
  mutaters <- {{ envs.mutaters | r }}
23
23
  idents <- {{ envs.idents | r }}
@@ -1,16 +1,17 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(Seurat)
4
2
  library(dplyr)
3
+ library(biopipen.utils)
5
4
 
6
5
  sobjfile <- {{in.srtobj | r}}
7
6
  outfile <- {{out.rdsfile | r}}
8
7
  defaults <- {{envs.defaults | r}}
9
8
  modules <- {{envs.modules | r}}
10
9
 
10
+ log <- get_logger()
11
+
11
12
  # load seurat object
12
- log_info("Loading Seurat object ...")
13
- sobj <- readRDS(sobjfile)
13
+ log$info("Loading Seurat object ...")
14
+ sobj <- read_obj(sobjfile)
14
15
 
15
16
  aggs <- list(
16
17
  mean = mean,
@@ -36,7 +37,7 @@ for (key in names(modules)) {
36
37
  agg <- aggs[[module$agg]]
37
38
  module$keep <- NULL
38
39
  module$agg <- NULL
39
- log_info("Calculating module '{key}' ...")
40
+ log$info("Calculating module '{key}' ...")
40
41
  is_cc <- FALSE
41
42
  if (!is.null(module$kind) && module$kind %in% c("diffmap", "diffusion_map")) {
42
43
  library(destiny)
@@ -47,30 +48,30 @@ for (key in names(modules)) {
47
48
  module$kind <- NULL
48
49
 
49
50
  if (!is.null(module$n_pcs)) {
50
- log_info("- Using cell embeddings from PCA reduction ...")
51
+ log$info("- Using cell embeddings from PCA reduction ...")
51
52
  module$data <- Embeddings(sobj, reduction = "pca")
52
53
  if (module$n_pcs > ncol(module$data)) {
53
- log_warn("- `n_pcs` ({module$n_pcs}) is larger than the number of PCs, using all {ncol(module$data)} PCs ...")
54
+ log$warn("- `n_pcs` ({module$n_pcs}) is larger than the number of PCs, using all {ncol(module$data)} PCs ...")
54
55
  }
55
56
  module$data <- module$data[, 1:min(module$n_pcs, ncol(module$data))]
56
57
  module$n_pcs <- NULL
57
58
  } else {
58
- log_info("- Using assay data ...")
59
+ log$info("- Using assay data ...")
59
60
  module$data <- GetAssayData(sobj, layer = "data")
60
61
  }
61
62
 
62
- log_info("- Calculating diffusion map ...")
63
+ log$info("- Calculating diffusion map ...")
63
64
  dm <- do_call(DiffusionMap, module)
64
65
  ev <- eigenvectors(dm)
65
66
 
66
- log_info("- Creating DimReduc object ...")
67
+ log$info("- Creating DimReduc object ...")
67
68
  sobj[[key]] <- CreateDimReducObject(
68
69
  embeddings = data.matrix(as.data.frame(ev[, 1:features])),
69
70
  key = paste0(key, "_")
70
71
  )
71
72
 
72
73
  # add to meta.data
73
- log_info("- Adding to meta.data ...")
74
+ log$info("- Adding to meta.data ...")
74
75
  sobj <- AddMetaData(
75
76
  sobj,
76
77
  sobj[[key]]@cell.embeddings,
@@ -134,5 +135,5 @@ for (key in names(modules)) {
134
135
  }
135
136
 
136
137
  # save seurat object
137
- print("Saving Seurat object ...")
138
- saveRDS(sobj, outfile)
138
+ log$info("Saving Seurat object ...")
139
+ save_obj(sobj, outfile)
@@ -40,7 +40,7 @@ cases = {{envs.cases | r}}
40
40
  # sections = c()
41
41
 
42
42
  log_info("- Reading srtobj ...")
43
- srtobj = readRDS(srtfile)
43
+ srtobj = biopipen.utils::read_obj(srtfile)
44
44
  meta = srtobj@meta.data
45
45
 
46
46
  log_info("- Mutating meta data if needed ...")
@@ -1,10 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
3
- {{ biopipen_dir | joinpaths: "utils", "mutate_helpers.R" | source_r }}
4
-
5
1
  library(rlang)
6
2
  library(Seurat)
7
3
  library(tidyseurat)
4
+ library(biopipen.utils)
8
5
 
9
6
  srtfile <- {{in.srtobj | r}} # nolint
10
7
  outdir <- {{out.outdir | r}} # nolint
@@ -14,23 +11,34 @@ group.by <- {{envs["group-by"] | r}} # nolint
14
11
  ident.1 <- {{envs["ident-1"] | r}} # nolint
15
12
  ident.2 <- {{envs["ident-2"] | r}} # nolint
16
13
  each <- {{envs.each | r}} # nolint
17
- prefix_each <- {{envs.prefix_each | r}} # nolint
18
14
  subset <- {{envs.subset | r}} # nolint
19
- section <- {{envs.section | r}} # nolint
20
15
  gmtfile <- {{envs.gmtfile | r}} # nolint
21
16
  method <- {{envs.method | r}} # nolint
22
17
  top <- {{envs.top | r}} # nolint
23
- minsize <- {{envs.minsize | r}} # nolint
24
- maxsize <- {{envs.maxsize | r}} # nolint
18
+ minsize <- {{envs.minSize | default: envs.minsize | r}} # nolint
19
+ maxsize <- {{envs.maxSize | default: envs.maxsize | r}} # nolint
25
20
  eps <- {{envs.eps | r}} # nolint
21
+ allpathway_plots_defaults <- {{envs.allpathway_plots_defaults | r}} # nolint
22
+ allpathway_plots <- {{envs.allpathway_plots | r}} #
26
23
  ncores <- {{envs.ncores | r}} # nolint
27
24
  rest <- {{envs.rest | r: todot="-"}} # nolint
28
25
  cases <- {{envs.cases | r: todot="-"}} # nolint
29
26
 
30
- log_info("- Reading srtobj...")
27
+ log <- get_logger()
28
+ reporter <- get_reporter()
29
+
30
+ allpathway_plots <- lapply(allpathway_plots, function(x) {
31
+ list_update(allpathway_plots_defaults, x)
32
+ })
33
+
34
+ log$info("Reading Seurat object ...")
35
+ srtobj <- read_obj(srtfile)
36
+ if (!"Identity" %in% colnames(srtobj@meta.data)) {
37
+ srtobj@meta.data$Identity <- Idents(srtobj)
38
+ }
31
39
 
32
- srtobj <- readRDS(srtfile)
33
40
  if (!is.null(mutaters) && length(mutaters) > 0) {
41
+ log$info("Mutating metadata columns ...")
34
42
  srtobj@meta.data <- srtobj@meta.data %>% mutate(!!!lapply(mutaters, parse_expr))
35
43
  }
36
44
 
@@ -39,69 +47,87 @@ defaults <- list(
39
47
  ident.1 = ident.1,
40
48
  ident.2 = ident.2,
41
49
  each = each,
42
- prefix_each = prefix_each,
43
50
  subset = subset,
44
- section = section,
45
51
  gmtfile = gmtfile,
46
52
  method = method,
47
53
  top = top,
48
54
  minsize = minsize,
49
55
  maxsize = maxsize,
50
56
  eps = eps,
57
+ allpathway_plots_defaults = allpathway_plots_defaults,
58
+ allpathway_plots = allpathway_plots,
51
59
  ncores = ncores,
52
60
  rest = rest
53
61
  )
54
62
 
55
63
  expand_each <- function(name, case) {
56
64
  outcases <- list()
57
- if (is.null(case$each) || nchar(case$each) == 0) {
58
- if (is.null(case$section) || case$section == "DEFAULT") {
59
- outcases[[name]] <- case
60
- } else {
61
- outcases[[paste0(case$section, "::", name)]] <- case
65
+
66
+ case$group.by <- case$group.by %||% "Identity"
67
+
68
+ if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
69
+ if (length(case$allpathway_plots) > 0) {
70
+ stop("Cannot perform `allpathway_plots` without `each` defined.")
62
71
  }
72
+
73
+ outcases[[name]] <- case
63
74
  } else {
64
- if (!is.null(case$section) && case$section != "DEFAULT") {
65
- log_warn(" Ignoring `section` in case `{name}` when `each` is set.")
66
- case$section <- NULL
67
- }
68
- if (is.null(case$subset)) {
69
- eachs <- srtobj@meta.data %>%
75
+ eachs <- if (!is.null(case$subset)) {
76
+ srtobj@meta.data %>%
77
+ filter(!!parse_expr(case$subset)) %>%
70
78
  pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
71
79
  } else {
72
- eachs <- srtobj@meta.data %>% dplyr::filter(!!!parse_exprs(case$subset)) %>%
80
+ srtobj@meta.data %>%
73
81
  pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
74
82
  }
83
+
84
+ if (length(cases) == 0 && name == "GSEA") {
85
+ name <- case$each
86
+ }
87
+
75
88
  for (each in eachs) {
76
- by <- make.names(paste0("..", name, "_", case$each,"_", each))
77
- srtobj@meta.data <<- srtobj@meta.data %>%
78
- mutate(!!sym(by) := if_else(
79
- !!sym(case$each) == each,
80
- !!sym(case$group.by),
81
- NA
82
- ))
83
-
84
- if (isTRUE(case$prefix_each)) {
85
- key <- paste0(name, "::", case$each, " - ", each)
89
+ newname <- paste0(case$each, "::", each)
90
+ newcase <- case
91
+
92
+ newcase$original_case <- paste0(name, " (all ", case$each,")")
93
+ newcase$each_name <- case$each
94
+ newcase$each <- each
95
+
96
+ newcase$allpathway_plots_defaults <- NULL
97
+ newcase$allpathway_plots <- NULL
98
+
99
+ if (!is.null(case$subset)) {
100
+ newcase$subset <- paste0(case$subset, " & ", bQuote(case$each), " == '", each, "'")
86
101
  } else {
87
- key <- paste0(name, "::", each)
102
+ newcase$subset <- paste0(bQuote(case$each), " == '", each, "'")
88
103
  }
89
- outcases[[key]] <- case
90
- outcases[[key]]$section <- name
91
- outcases[[key]]$group.by <- by
104
+
105
+ outcases[[newname]] <- newcase
106
+ }
107
+
108
+ if (length(case$allpathway_plots) > 0) {
109
+ newcase <- case
110
+
111
+ newcase$gseas <- list()
112
+ newcase$allpathway_plots <- lapply(
113
+ newcase$allpathway_plots,
114
+ function(x) { list_update(newcase$allpathway_plots_defaults, x) }
115
+ )
116
+
117
+ outcases[[paste0(name, " (all ", case$each,")")]] <- newcase
92
118
  }
93
119
  }
94
120
  outcases
95
121
  }
96
122
 
97
- log_info("- Expanding cases...")
98
- cases <- expand_cases(cases, defaults, expand_each)
123
+ log$info("Expanding cases...")
124
+ cases <- expand_cases(cases, defaults, expand_each, default_case = "GSEA")
99
125
 
100
126
 
101
127
  ensure_sobj <- function(expr, allow_empty) {
102
128
  tryCatch({ expr }, error = function(e) {
103
129
  if (allow_empty) {
104
- log_warn(" Ignoring this case: {e$message}")
130
+ log$warn(" Ignoring this case: {e$message}")
105
131
  return(NULL)
106
132
  } else {
107
133
  stop(e)
@@ -109,60 +135,121 @@ ensure_sobj <- function(expr, allow_empty) {
109
135
  })
110
136
  }
111
137
 
138
+ do_case <- function(name) {
139
+ log$info("- Processing case: {name} ...")
140
+ case <- cases[[name]]
141
+ info <- case_info(name, outdir, create = TRUE)
142
+
143
+ if (!is.null(case$gseas)) {
112
144
 
113
- do_case <- function(name, case) {
114
- log_info("- Handling case: {name} ...")
115
- info <- casename_info(name, cases, outdir, create = TRUE)
145
+ each_levels <- names(case$gseas)
146
+ gseas <- do_call(rbind, lapply(each_levels, function(x) {
147
+ gsea_df <- case$gseas[[x]]
148
+ if (nrow(gsea_df) > 0) {
149
+ gsea_df[[case$each]] <- x
150
+ } else {
151
+ gsea_df[[case$each]] <- character(0) # Empty case
152
+ }
153
+ gsea_df
154
+ }))
155
+ gseas[[case$each]] <- factor(gseas[[case$each]], levels = each_levels)
156
+
157
+ for (plotname in names(case$allpathway_plots)) {
158
+ plotargs <- case$allpathway_plots[[plotname]]
159
+ plotargs <- extract_vars(plotargs, "devpars")
160
+ plotargs$gsea_results <- gseas
161
+ plotargs$group_by <- case$each
162
+ if (plotargs$plot_type == "heatmap") {
163
+ plotargs$show_row_names <- plotargs$show_row_names %||% TRUE
164
+ plotargs$show_column_names <- plotargs$show_column_names %||% TRUE
165
+ }
116
166
 
117
- allow_empty = startsWith(case$group.by, "..")
167
+ p <- do_call(VizGSEA, plotargs)
168
+
169
+ outprefix <- file.path(info$prefix, paste0("all.", slugify(plotname)))
170
+ save_plot(p, outprefix, devpars, formats = "png")
171
+ reporter$add2(
172
+ list(kind = "descr", content = paste0("Pathways for all ", case$each, ".")),
173
+ list(kind = "image", src = paste0(outprefix, ".png")),
174
+ hs = c(info$section, info$name),
175
+ hs2 = plotname
176
+ )
177
+ }
178
+
179
+ return(invisible(NULL))
180
+ }
181
+
182
+ allow_empty = !is.null(case$each)
118
183
  # prepare expression matrix
119
- log_info(" Preparing expression matrix...")
184
+ log$info(" Preparing expression matrix...")
120
185
  sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
121
- if (is.null(sobj)) { return() }
186
+ if (is.null(sobj)) {
187
+ reporter$add2(
188
+ list(
189
+ kind = "error",
190
+ content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
191
+ ),
192
+ hs = c(info$section, info$name)
193
+ )
194
+ return(NULL)
195
+ }
122
196
 
123
197
  if (!is.null(case$subset)) {
124
198
  sobj <- ensure_sobj({ sobj %>% filter(!!!parse_exprs(case$subset)) }, allow_empty)
125
- if (is.null(sobj)) { return() }
199
+ if (is.null(sobj)) {
200
+ reporter$add2(
201
+ list(
202
+ kind = "error",
203
+ content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
204
+ ),
205
+ hs = c(info$section, info$name)
206
+ )
207
+ return(NULL)
208
+ }
126
209
  }
127
210
  if (!is.null(case$ident.2)) {
128
211
  sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
129
- if (is.null(sobj)) { return() }
212
+ if (is.null(sobj)) {
213
+ reporter$add2(
214
+ list(
215
+ kind = "error",
216
+ content = paste0("No cells with non-NA `", case$group.by, "` in the Seurat object.")
217
+ ),
218
+ hs = c(info$section, info$name)
219
+ )
220
+ return(NULL)
221
+ }
130
222
  }
131
223
 
132
224
  allclasses <- sobj@meta.data[, case$group.by, drop = TRUE]
133
225
  if (is.null(case$ident.2)) {
134
- case$ident.2 <- ".rest"
135
- allclasses[allclasses != case$ident.1] <- ".rest"
226
+ case$ident.2 <- "Other"
227
+ allclasses[allclasses != case$ident.1] <- "Other"
136
228
  }
137
229
  exprs <- GetAssayData(sobj, layer = "data")
138
230
 
139
231
  # get preranks
140
- log_info(" Getting preranks...")
141
- ranks <- prerank(exprs, case$ident.1, case$ident.2, allclasses, case$method)
232
+ log$info(" Getting preranks...")
233
+ ranks <- RunGSEAPreRank(exprs, allclasses, case$ident.1, case$ident.2, case$method)
142
234
  write.table(
143
- ranks,
144
- file.path(info$casedir, "fgsea.rank"),
145
- row.names = FALSE,
235
+ as.data.frame(ranks),
236
+ file.path(info$prefix, "fgsea.rank.txt"),
237
+ row.names = TRUE,
146
238
  col.names = TRUE,
147
239
  sep = "\t",
148
240
  quote = FALSE
149
241
  )
150
- if (sum(is.na(ranks[, 2])) == nrow(ranks)) {
242
+ if (all(is.na(ranks))) {
151
243
  if (length(allclasses) < 100) {
152
- log_warn(" Ignoring this case because all gene ranks are NA and there are <100 cells.")
153
- cat(
154
- paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea."),
155
- file = file.path(info$casedir, "fgsea.log")
156
- )
157
- add_report(
244
+ log$warn(" Ignoring this case because all gene ranks are NA and there are <100 cells.")
245
+ reporter$add2(
158
246
  list(
159
247
  kind = "error",
160
248
  content = paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea.")
161
249
  ),
162
- h1 = info$h1,
163
- h2 = info$h2
250
+ hs = c(info$section, info$name)
164
251
  )
165
- return()
252
+ return(NULL)
166
253
  } else {
167
254
  stop(paste0(
168
255
  "All gene ranks are NA (# cells = ",
@@ -175,20 +262,88 @@ do_case <- function(name, case) {
175
262
  }
176
263
 
177
264
  # run fgsea
178
- log_info(" Running fgsea...")
179
- case$rest$minSize <- case$minsize
180
- case$rest$maxSize <- case$maxsize
265
+ log$info(" Running fgsea...")
266
+ case$rest$ranks <- ranks
267
+ case$rest$genesets <- ParseGMT(case$gmtfile)
268
+ case$rest$minSize <- case$rest$minSize %||% case$rest$minsize %||% case$minsize
269
+ case$rest$maxSize <- case$rest$maxSize %||% case$rest$maxsize %||% case$maxsize
181
270
  case$rest$eps <- case$eps
182
271
  case$rest$nproc <- case$ncores
183
- runFGSEA(ranks, case$gmtfile, case$top, info$casedir, case$rest)
272
+ case$rest$minsize <- NULL
273
+ case$rest$maxsize <- NULL
274
+ result <- do_call(RunGSEA, case$rest)
275
+ write.table(
276
+ result,
277
+ file.path(info$prefix, "fgsea.tsv"),
278
+ row.names = FALSE,
279
+ col.names = TRUE,
280
+ sep = "\t",
281
+ quote = FALSE
282
+ )
283
+
284
+ aspect.ratio <- sqrt(case$top) / sqrt(10)
285
+ p_summary <- VizGSEA(
286
+ result,
287
+ plot_type = "summary",
288
+ top_term = case$top,
289
+ aspect.ratio = aspect.ratio
290
+ )
291
+ save_plot(
292
+ p_summary,
293
+ file.path(info$prefix, "summary"),
294
+ devpars = list(res = 100, height = attr(p_summary, "height") * 100 / 1.5, width = attr(p_summary, "width") * 100),
295
+ formats = "png"
296
+ )
297
+
298
+ p_gsea <- VizGSEA(
299
+ result,
300
+ plot_type = "gsea",
301
+ gs = result$pathway[1:min(case$top, nrow(result))]
302
+ )
303
+ save_plot(
304
+ p_gsea,
305
+ file.path(info$prefix, "pathways"),
306
+ devpars = list(res = 100, height = attr(p_gsea, "height") * 100, width = attr(p_gsea, "width") * 100),
307
+ formats = "png"
308
+ )
309
+
184
310
 
185
- add_report(
186
- list(kind = "fgsea", dir = info$casedir),
187
- h1 = info$h1,
188
- h2 = info$h2
311
+ reporter$add2(
312
+ list(
313
+ name = paste0("Table (", case$ident.1, " vs ", case$ident.2, ")"),
314
+ contents = list(
315
+ list(kind = "descr", content = paste0(
316
+ "Showing top 50 pathways by padj in descending order. ",
317
+ "Use 'Download the entire data' button to download all pathways."
318
+ )),
319
+ list(kind = "table", src = file.path(info$prefix, "fgsea.tsv"), data = list(nrows = 50))
320
+ )
321
+ ),
322
+ list(
323
+ name = "Summary Plot",
324
+ contents = list(
325
+ list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
326
+ list(kind = "image", src = file.path(info$prefix, "summary.png"))
327
+ )
328
+ ),
329
+ list(
330
+ name = "GSEA Plots",
331
+ contents = list(
332
+ list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
333
+ list(kind = "image", src = file.path(info$prefix, "pathways.png"))
334
+ )
335
+ ),
336
+ hs = c(info$section, info$name),
337
+ ui = "tabs"
189
338
  )
339
+
340
+ if (!is.null(case$original_case) && !is.null(cases[[case$original_case]])) {
341
+ cases[[case$original_case]]$gseas[[case$each]] <<- result
342
+ }
343
+
344
+ invisible()
190
345
  }
191
346
 
192
- sapply(sort(names(cases)), function(name) do_case(name, cases[[name]]))
347
+ sapply(names(cases), function(name) do_case(name))
193
348
 
194
- save_report(joboutdir)
349
+ reporter$save(joboutdir)
@@ -1,8 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(splatter)
5
3
  library(scater)
4
+ library(biopipen.utils)
6
5
 
7
6
  # Load template variables
8
7
  seed <- {{ in.seed | r }}
@@ -14,11 +13,13 @@ outtype <- {{ envs.outtype | r }}
14
13
  method <- {{ envs.method | r }}
15
14
  user_params <- {{ envs.params | r: todot="-" }}
16
15
 
17
- log_info("Generating simulation parameters ...")
16
+ log <- get_logger()
17
+
18
+ log$info("Generating simulation parameters ...")
18
19
 
19
20
  seed <- seed %||% 1
20
21
  if (length(seed) > 1) {
21
- log_warn("- multiple seeds provided, using the first one")
22
+ log$warn("- multiple seeds provided, using the first one")
22
23
  seed <- seed[1]
23
24
  }
24
25
  if (is.character(seed)) {
@@ -41,7 +42,7 @@ user_params$object = params
41
42
  do_call(setParams, user_params)
42
43
 
43
44
 
44
- log_info("Saving simulation parameters to file ...")
45
+ log$info("Saving simulation parameters to file ...")
45
46
 
46
47
  sim <- splatSimulate(params, method = method, verbose = TRUE)
47
48
 
@@ -49,16 +50,16 @@ outtype <- tolower(outtype)
49
50
  if (outtype == "sce") outtype <- "singlecellexperiment"
50
51
 
51
52
  if (outtype == "singlecellexperiment") {
52
- log_info("Saving simulation to file ...")
53
- saveRDS(sim, file = outfile)
53
+ log$info("Saving simulation to file ...")
54
+ save_obj(sim, file = outfile)
54
55
  } else {
55
- log_info("Converting simulation to Seurat object ...")
56
+ log$info("Converting simulation to Seurat object ...")
56
57
  cnts <- SingleCellExperiment::counts(sim)
57
58
  sobj <- Seurat::CreateSeuratObject(counts = cnts, project = proj)
58
59
  rm(sim)
59
60
  rm(cnts)
60
61
  gc()
61
62
 
62
- log_info("Saving simulation to file ...")
63
- saveRDS(sobj, file = outfile)
63
+ log$info("Saving simulation to file ...")
64
+ save_obj(sobj, file = outfile)
64
65
  }