biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (150) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +328 -292
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +481 -215
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +231 -76
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +6 -5
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/ScFGSEA.svelte +0 -16
  132. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  133. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  134. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  135. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  136. biopipen/reports/utils/gsea.liq +0 -110
  137. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  138. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  139. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  140. biopipen/utils/caching.R +0 -44
  141. biopipen/utils/gene.R +0 -95
  142. biopipen/utils/gsea.R +0 -329
  143. biopipen/utils/io.R +0 -20
  144. biopipen/utils/misc.R +0 -602
  145. biopipen/utils/mutate_helpers.R +0 -581
  146. biopipen/utils/plot.R +0 -209
  147. biopipen/utils/repr.R +0 -146
  148. biopipen/utils/rnaseq.R +0 -48
  149. biopipen/utils/single_cell.R +0 -207
  150. {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
@@ -1,9 +1,7 @@
1
- library(ggplot2)
2
- library(ggprism)
3
1
  library(dplyr)
4
2
  library(tidyr)
5
3
  library(tibble)
6
- library(patchwork)
4
+ library(plotthis)
7
5
 
8
6
  tmadfiles = {{in.tmadfiles | r}}
9
7
  metafile = {{in.metafile | r}}
@@ -47,7 +45,7 @@ if (!is.null(group_cols)) {
47
45
  }
48
46
 
49
47
  data = data.frame(Sample = sams, tMAD = tmads)
50
- if (file.exists(metafile) && length(meta_cols) > 0) {
48
+ if (is.character(metafile) && file.exists(metafile) && length(meta_cols) > 0) {
51
49
  metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
52
50
  if (!is.null(metadf$Sample)) {
53
51
  metadf$Sample = as.character(metadf$Sample)
@@ -63,20 +61,12 @@ if (file.exists(metafile) && length(meta_cols) > 0) {
63
61
  write.table(data, file=file.path(outdir, "tMAD.txt"), sep="\t", quote=F, row.names=F)
64
62
 
65
63
  # bar plot for all samples without grouping
66
- p = ggplot(data, aes(x=Sample, y=tMAD)) +
67
- geom_bar(stat="identity", fill="steelblue") +
68
- theme_prism() +
69
- theme(
70
- axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
71
- axis.title.x = element_blank(),
72
- axis.title.y = element_text(size=12),
73
- axis.text.y = element_text(size=12),
74
- legend.position = "none",
75
- ) +
76
- labs(
77
- x = NULL,
78
- y = "tMAD",
79
- )
64
+ p <- BarPlot(
65
+ data = data,
66
+ x = "Sample",
67
+ y = "tMAD",
68
+ x_text_angle = 90
69
+ )
80
70
 
81
71
  png(file.path(outdir, "tMAD.png"), width=400 + length(sams) * 12, height=800, res=100)
82
72
  print(p)
@@ -88,41 +78,30 @@ if (!is.null(group_cols)) {
88
78
  if (!grepl(",", group_col, fixed = TRUE)) {
89
79
  # Bar plot with this group_col, but with different fill colors
90
80
  # for each group, and samples from the same group are next to each other
91
- p = ggplot(
92
- data %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample)),
93
- aes(x=Sample, y=tMAD, fill=!!sym(group_col))
94
- ) +
95
- geom_bar(stat="identity") +
96
- theme_prism() +
97
- theme(
98
- axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
99
- axis.title.x = element_blank(),
100
- axis.title.y = element_text(size=12),
101
- axis.text.y = element_text(size=12),
102
- ) +
103
- labs(
104
- x = NULL,
105
- y = "tMAD",
106
- )
81
+ gdata <- data %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=unique(Sample)))
82
+ p <- BarPlot(
83
+ data = gdata,
84
+ x = "Sample",
85
+ y = "tMAD",
86
+ fill = group_col,
87
+ x_text_angle = 90
88
+ )
107
89
 
108
90
  png(file.path(outdir, paste0("tMAD_", group_col, "_bar.png")), width=400 + length(sams) * 12, height=600, res=100)
109
91
  print(p)
110
92
  dev.off()
111
93
 
112
94
  # Box plot overlays with violin plot with this group_col
113
- p = ggplot(data, aes(x=!!sym(group_col), y=tMAD)) +
114
- geom_boxplot(outlier.shape=NA, fill="white", color="black") +
115
- geom_violin(fill="steelblue", alpha=0.5) +
116
- theme_prism() +
117
- theme(
118
- axis.title.x = element_text(size=12),
119
- axis.title.y = element_text(size=12),
120
- axis.text.y = element_text(size=12),
121
- ) +
122
- labs(
123
- x = group_col,
124
- y = "tMAD",
125
- )
95
+ p <- ViolinPlot(
96
+ data = gdata,
97
+ x = group_col,
98
+ y = "tMAD",
99
+ x_text_angle = 90,
100
+ add_box = TRUE,
101
+ add_point = TRUE,
102
+ comparisons = TRUE,
103
+ sig_label = "p.format"
104
+ )
126
105
 
127
106
  png(file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")), width=1000, height=600, res=100)
128
107
  print(p)
@@ -137,25 +116,17 @@ if (!is.null(group_cols)) {
137
116
  # concatenate them together using patch work, with ncol=2
138
117
  # calcuate the height and width of the plot based on the number of
139
118
  # groups
140
- ps = data %>%
141
- group_by(!!sym(group_col1)) %>%
142
- group_map(function(.x, .y) {
143
- p = ggplot(
144
- .x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample)),
145
- aes(x=Sample, y=tMAD, fill=!!sym(group_col2))
146
- ) +
147
- geom_bar(stat="identity") +
148
- theme_prism() +
149
- theme(
150
- axis.text.x = element_text(angle=90, hjust=1, vjust=0.5),
151
- axis.title.x = element_blank(),
152
- axis.title.y = element_text(size=12),
153
- axis.text.y = element_text(size=12),
154
- ) +
155
- labs(x = NULL, y = "tMAD") +
156
- ggtitle(.y[[group_col1]][1])
157
- p
158
- })
119
+ gdata <- data %>% arrange(!!sym(group_col1), !!sym(group_col2)) %>%
120
+ mutate(Sample=factor(Sample, levels=unique(Sample)))
121
+ p <- BarPlot(
122
+ data = gdata,
123
+ x = "Sample",
124
+ y = "tMAD",
125
+ split_by = group_col1,
126
+ fill = group_col2,
127
+ x_text_angle = 90,
128
+ ncol = 2
129
+ )
159
130
 
160
131
  png(
161
132
  file.path(outdir, paste0("tMAD_", group_col, "_bar.png")),
@@ -163,26 +134,22 @@ if (!is.null(group_cols)) {
163
134
  height=length(unique(data[[group_col1]])) * 200,
164
135
  res=100
165
136
  )
166
- print(wrap_plots(ps, ncol=2))
137
+ print(p)
167
138
  dev.off()
168
139
 
169
140
  # Do the same for Voilin + boxplot
170
- ps = data %>%
171
- group_by(!!sym(group_col1)) %>%
172
- group_map(function(.x, .y) {
173
- p = ggplot(.x, aes(x=!!sym(group_col2), y=tMAD)) +
174
- geom_boxplot(outlier.shape=NA, fill="white", color="black") +
175
- geom_violin(fill="steelblue", alpha=0.5) +
176
- theme_prism() +
177
- theme(
178
- axis.title.x = element_text(size=12),
179
- axis.title.y = element_text(size=12),
180
- axis.text.y = element_text(size=12),
181
- ) +
182
- labs(x = group_col2, y = "tMAD") +
183
- ggtitle(.y[[group_col1]][1])
184
- p
185
- })
141
+ p <- ViolinPlot(
142
+ data = gdata,
143
+ x = group_col2,
144
+ y = "tMAD",
145
+ split_by = group_col1,
146
+ x_text_angle = 90,
147
+ add_box = TRUE,
148
+ add_point = TRUE,
149
+ comparisons = TRUE,
150
+ sig_label = "p.format",
151
+ ncol = 2
152
+ )
186
153
 
187
154
  png(
188
155
  file.path(outdir, paste0("tMAD_", group_col, "_box_violin.png")),
@@ -190,7 +157,7 @@ if (!is.null(group_cols)) {
190
157
  height=length(unique(data[[group_col1]])) * 200,
191
158
  res=100
192
159
  )
193
- print(wrap_plots(ps, ncol=2))
160
+ print(p)
194
161
  dev.off()
195
162
  }
196
163
  }
@@ -5,10 +5,10 @@ from pathlib import Path, PosixPath # for as_path
5
5
 
6
6
  from biopipen.utils.misc import run_command, dict_to_cli_args
7
7
 
8
- bamfiles = {{in.bamfiles | repr}} # pyright: ignore # noqa
9
- atfile = {{in.atfile | repr}} # pyright: ignore
8
+ bamfiles = {{in.bamfiles | each: str | repr}} # pyright: ignore # noqa
9
+ atfile = {{in.atfile | quote}} # pyright: ignore
10
10
 
11
- targetfile = {{out.targetfile | repr}} # pyright: ignore
11
+ targetfile = {{out.targetfile | quote}} # pyright: ignore
12
12
  covfile = {{out.targetfile | as_path | attr: "with_suffix" | call: ".cnn" | repr}} # pyright: ignore
13
13
 
14
14
  cnvkit: str = {{envs.cnvkit | repr}} # pyright: ignore
@@ -1,12 +1,12 @@
1
1
  from pathlib import Path
2
2
 
3
- from diot import Diot
3
+ from diot import Diot # type: ignore[import]
4
4
 
5
5
  from biopipen.utils.misc import run_command, dict_to_cli_args
6
6
 
7
- segfiles = {{in.segfiles | repr}} # pyright: ignore # noqa # noqa
7
+ segfiles = {{in.segfiles | default: [] | each: str | repr}} # pyright: ignore # noqa # noqa
8
8
  sample_sex = {{in.sample_sex | repr}} # pyright: ignore
9
- outdir: str = {{out.outdir | repr}} # pyright: ignore
9
+ outdir: str = {{out.outdir | quote}} # pyright: ignore
10
10
  cnvkit = {{envs.cnvkit | quote}} # pyright: ignore
11
11
  convert = {{envs.convert | quote}} # pyright: ignore
12
12
  convert_args = {{envs.convert_args | repr}} # pyright: ignore
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, dict_to_cli_args
3
3
 
4
- covfiles = {{in.covfiles | repr}} # pyright: ignore # noqa
5
- target_file = {{in.target_file | repr}} # pyright: ignore
6
- antitarget_file = {{in.antitarget_file | repr}} # pyright: ignore
4
+ covfiles = {{in.covfiles | default: [] | each: str | repr}} # pyright: ignore # noqa
5
+ target_file = {{in.target_file | quote: quote_none=False}} # pyright: ignore
6
+ antitarget_file = {{in.antitarget_file | quote: quote_none=False}} # pyright: ignore
7
7
  sample_sex = {{in.sample_sex | repr}} # pyright: ignore
8
8
  outfile = {{out.outfile | quote}} # pyright: ignore
9
9
  reffile: str = {{envs.ref | quote}} # pyright: ignore
@@ -1,4 +1,4 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
1
+ library(biopipen.utils)
2
2
 
3
3
  infiles <- {{in.infiles | r}}
4
4
  outfile <- {{out.outfile | r}}
@@ -1,5 +1,6 @@
1
1
  library(rlang)
2
2
  library(dplyr)
3
+ library(gglogger)
3
4
  library(biopipen.utils)
4
5
  library(plotthis)
5
6
 
@@ -132,7 +133,7 @@ if (length(stats) > 0) {
132
133
  case$data <- mutdata
133
134
  }
134
135
 
135
- p <- do_call(gglogger::register(plot_fn, name = plot_type), case)
136
+ p <- do_call(plot_fn, case)
136
137
  save_plot(p, info$prefix, devpars, formats = more_formats)
137
138
  if (save_code) {
138
139
  save_plotcode(
@@ -155,3 +156,5 @@ if (length(stats) > 0) {
155
156
  )
156
157
  }
157
158
  }
159
+
160
+ reporter$save(joboutdir)
@@ -1,8 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
1
+ library(biopipen.utils)
3
2
 
4
- infile <- {{in.infile | quote}}
5
- outfile <- {{out.outfile | quote}}
3
+ infile <- {{in.infile | r}}
4
+ outfile <- {{out.outfile | r}}
6
5
  notfound <- {{envs.notfound | r}}
7
6
  genecol <- {{envs.genecol | r}}
8
7
  output <- {{envs.output | r}}
@@ -11,6 +10,8 @@ infmt <- {{envs.infmt | r}}
11
10
  outfmt <- {{envs.outfmt | r}}
12
11
  species <- {{envs.species | r}}
13
12
 
13
+ log <- get_logger()
14
+
14
15
  if (is.na(notfound)) {
15
16
  notfound = "na"
16
17
  }
@@ -18,7 +19,7 @@ if (is.na(notfound)) {
18
19
  df <- read.table(infile, header=TRUE, sep="\t", check.names=FALSE)
19
20
 
20
21
  if (genecol == 0) {
21
- log_warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
22
+ log$warn("envs.genecol should be 1-based, but 0 was given. Using 1 instead.")
22
23
  genecol <- 1
23
24
  }
24
25
 
@@ -27,12 +28,13 @@ if (dup == "combine") { dup <- ";" }
27
28
 
28
29
  genes <- df[[genecol]]
29
30
  converted <- gene_name_conversion(
30
- genes=genes,
31
- species=species,
32
- infmt=infmt,
33
- outfmt=outfmt,
34
- notfound=notfound,
35
- dup=dup
31
+ genes = genes,
32
+ species = species,
33
+ infmt = infmt,
34
+ outfmt = outfmt,
35
+ notfound = notfound,
36
+ dup = dup,
37
+ suppress_messages = FALSE
36
38
  )
37
39
  # <genecol> <outfmt>
38
40
  # 1 1255_g_at GUCA1A
@@ -50,7 +52,7 @@ if (notfound == "skip" || notfound == "ignore") {
50
52
 
51
53
  if (output == "append") {
52
54
  if (outfmt %in% colnames(df)) {
53
- log_warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
55
+ log$warn("The output column name already exists in the input dataframe. Appending with a suffix `_1`.")
54
56
  outcol <- paste(outfmt, "_1", sep="")
55
57
  }
56
58
  df[[outcol]] <- converted[[outfmt]]
@@ -2,8 +2,8 @@
2
2
  {{ biopipen_dir | joinpaths: "utils", "gene.R" | source_r }}
3
3
  {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
4
4
 
5
- infile = {{in.infile | quote}}
6
- outdir = {{out.outdir | quote}}
5
+ infile = {{in.infile | r}}
6
+ outdir = {{out.outdir | r}}
7
7
  genecol = {{envs.genecol | r}}
8
8
  genename = {{envs.genename | r}}
9
9
  dbs = {{envs.dbs | r}}
@@ -1,58 +1,192 @@
1
- # PreRank the genes for GSEA analysis
2
- # See: https://gseapy.readthedocs.io/en/latest/_modules/gseapy/algorithm.html#ranking_metric
3
- {{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
4
- {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
5
-
6
- infile = {{in.infile | quote}}
7
- metafile = {{in.metafile | quote}}
8
- gmtfile = {{in.gmtfile | quote}}
9
- {% if in.configfile %}
10
- config = {{in.config | read | toml_loads | r}}
11
- {% else %}
12
- config = list()
13
- {% endif %}
14
- outdir = {{out.outdir | quote}}
15
- envs = {{envs | r}}
16
- clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
17
- classes <- if (is.null(config$classes)) envs$classes else config$classes
18
-
19
- if (is.null(clscol)) {
20
- stop("No `clscol` specified.")
21
- }
1
+ library(rlang)
2
+ library(biopipen.utils)
22
3
 
23
- if (is.null(classes) || length(classes) != 2) {
24
- stop(paste("`classes` must be a pair of labels."))
25
- }
4
+ # input & output
5
+ infile = {{in.infile | r}}
6
+ metafile = {{in.metafile | r}}
7
+ outdir = {{out.outdir | r}}
8
+ joboutdir = {{job.outdir | r}}
9
+
10
+ # envs
11
+ ncores = {{envs.ncores | r}}
12
+ case = {{envs.case | r}}
13
+ control = {{envs.control | r}}
14
+ gmtfile = {{envs.gmtfile | r}}
15
+ method = {{envs.method | r}}
16
+ clscol = {{envs.clscol | r}}
17
+ top = {{envs.top | r}}
18
+ eps = {{envs.eps | r}}
19
+ minsize = {{envs.minSize | default: envs.minsize | r}}
20
+ maxsize = {{envs.maxSize | default: envs.maxsize | r}}
21
+ rest = {{envs.rest | r}}
22
+ cases = {{envs.cases | r}}
23
+
24
+ log <- get_logger()
25
+ reporter <- get_reporter()
26
+
27
+ defaults <- list(
28
+ case = case,
29
+ control = control,
30
+ gmtfile = gmtfile,
31
+ method = method,
32
+ clscol = clscol,
33
+ top = top,
34
+ eps = eps,
35
+ minsize = minsize,
36
+ maxsize = maxsize,
37
+ rest = rest
38
+ )
39
+ cases <- expand_cases(cases, defaults, default_case = "GSEA")
40
+
41
+ log$info("Reading input file ...")
42
+ indata <- read.table(infile, header=TRUE, stringsAsFactors=FALSE, row.names=1, sep="\t", quote="", check.names=FALSE)
26
43
 
27
- if (is.character(envs$inopts) && inopts == "rds") {
28
- indata = readRDS(infile)
44
+ if (!is.null(metafile)) {
45
+ log$info("Reading metadata file ...")
46
+ metadata <- read.table(metafile, header=TRUE, stringsAsFactors=FALSE, row.names=NULL, sep="\t", quote="", check.names=FALSE)
29
47
  } else {
30
- indata = read.table.opts(infile, envs$inopts)
48
+ metadata <- NULL
31
49
  }
32
50
 
33
- metadata = read.table.opts(metafile, envs$metaopts)
34
- allclasses = metadata[colnames(indata), clscol]
51
+ do_case <- function(name) {
52
+ log$info("Processing case: {name} ...")
53
+ case <- cases[[name]]
54
+ info <- case_info(name, outdir, create = TRUE)
35
55
 
36
- ranks = prerank(indata, classes[1], classes[2], allclasses, envs$method)
56
+ if (is.null(case$case) && is.null(case$control)) {
57
+ stop("Either `case` or `control` must be specified in the case.")
58
+ }
59
+ if (is.null(case$gmtfile)) {
60
+ stop("`gmtfile` must be specified in the case.")
61
+ }
62
+ if (is.null(case$clscol)) {
63
+ stop("`clscol` must be specified in the case.")
64
+ }
65
+ if (!is.null(metadata) && length(case$clscol) > 1) {
66
+ stop("When `in.metafile` is specified, `envs.clscol` must be a single column name.")
67
+ }
68
+ if (!is.null(metadata)) {
69
+ samples <- colnames(indata)
70
+ if (!"Sample" %in% colnames(metadata)) {
71
+ colnames(metadata)[1] <- "Sample"
72
+ }
73
+ metadata <- metadata[match(samples, metadata$Sample), , drop=FALSE]
74
+ case$clscol <- as.character(metadata[[case$clscol]])
75
+ }
76
+ if (length(unique(case$clscol)) < 2) {
77
+ stop("The `clscol` must have at least two unique values.")
78
+ }
79
+ if (length(unique(case$clscol)) == 2) {
80
+ case$case <- case$case %||% setdiff(unique(case$clscol), case$control)
81
+ case$control <- case$control %||% setdiff(unique(case$clscol), case$case)
82
+ } else {
83
+ if (is.null(case$case) || is.null(case$control)) {
84
+ stop("When `clscol` has more than two unique values, both `case` and `control` must be specified.")
85
+ }
86
+ }
87
+ log$info("- Running pre-ranking ...")
88
+ ranks <- RunGSEAPreRank(
89
+ indata,
90
+ classes = case$clscol,
91
+ case = case$case,
92
+ control = case$control,
93
+ method = case$method
94
+ )
95
+ if (all(is.na(ranks))) {
96
+ if (length(case$clscol) < 10) {
97
+ log$warn(" Ignoring this case because all gene ranks are NA and there are <10 samples.")
98
+ reporter$add2(
99
+ list(
100
+ kind = "error",
101
+ content = paste0("Not enough samples (n = ", length(case$clscol), ") to run fgsea.")
102
+ ),
103
+ hs = c(info$section, info$name)
104
+ )
105
+ return(NULL)
106
+ } else {
107
+ stop(paste0(
108
+ "All gene ranks are NA (# samples = ",
109
+ length(case$clscol),
110
+ "). ",
111
+ "It's probably due to high missing rate in the data. ",
112
+ "You may want to try a different `envs$method` for pre-ranking."
113
+ ))
114
+ }
115
+ }
37
116
 
38
- write.table(
39
- ranks,
40
- file.path(outdir, "fgsea.rank"),
41
- row.names=F,
42
- col.names=T,
43
- sep="\t",
44
- quote=F
45
- )
117
+ log$info("- Running GSEA ...")
118
+ case$rest$ranks <- ranks
119
+ case$rest$genesets <- ParseGMT(case$gmtfile)
120
+ case$rest$minSize <- case$rest$minSize %||% case$rest$minsize %||% case$minsize
121
+ case$rest$maxSize <- case$rest$maxSize %||% case$rest$maxsize %||% case$maxsize
122
+ case$rest$eps <- case$eps
123
+ case$rest$nproc <- case$ncores
124
+ case$rest$minsize <- NULL
125
+ case$rest$maxsize <- NULL
126
+ result <- do_call(RunGSEA, case$rest)
127
+ write.table(
128
+ result,
129
+ file.path(info$prefix, "fgsea.tsv"),
130
+ row.names = FALSE,
131
+ col.names = TRUE,
132
+ sep = "\t",
133
+ quote = FALSE
134
+ )
135
+
136
+ p_summary <- VizGSEA(
137
+ result,
138
+ plot_type = "summary",
139
+ top_term = case$top
140
+ )
141
+ save_plot(
142
+ p_summary,
143
+ file.path(info$prefix, "summary"),
144
+ devpars = list(res = 100, height = attr(p_summary, "height") * 100, width = attr(p_summary, "width") * 100),
145
+ formats = "png"
146
+ )
147
+
148
+ p_gsea <- VizGSEA(
149
+ result,
150
+ plot_type = "gsea",
151
+ gs = result$pathway[1:min(case$top, nrow(result))]
152
+ )
153
+ save_plot(
154
+ p_gsea,
155
+ file.path(info$prefix, "pathways"),
156
+ devpars = list(res = 100, height = attr(p_gsea, "height") * 100, width = attr(p_gsea, "width") * 100),
157
+ formats = "png"
158
+ )
159
+
160
+
161
+ reporter$add2(
162
+ list(
163
+ name = "Table",
164
+ contents = list(
165
+ list(kind = "descr", content = paste0(
166
+ "Showing top 50 pathways by padj in descending order. ",
167
+ "Use 'Download the entire data' button to download all pathways."
168
+ )),
169
+ list(kind = "table", src = file.path(info$prefix, "fgsea"), data = list(nrows = 50))
170
+ )
171
+ ),
172
+ list(
173
+ name = "Summary Plot",
174
+ contents = list(
175
+ list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
176
+ list(kind = "image", src = file.path(info$prefix, "summary.png"))
177
+ )
178
+ ),
179
+ list(
180
+ name = "GSEA Plots",
181
+ contents = list(
182
+ list(kind = "descr", content = paste0("Showing top ", case$top, " pathways.")),
183
+ list(kind = "image", src = file.path(info$prefix, "pathways.png"))
184
+ )
185
+ ),
186
+ hs = c(info$section, info$name),
187
+ ui = "tabs"
188
+ )
189
+ }
46
190
 
47
- top = envs$top
48
- envs$nproc = envs$ncores
49
- envs$inopts = NULL
50
- envs$metaopts = NULL
51
- envs$method = NULL
52
- envs$clscol = NULL
53
- envs$classes = NULL
54
- envs$ncores = NULL
55
- envs$top = NULL
56
- # the rest are the arguments for `fgsea()`
57
-
58
- runFGSEA(ranks, gmtfile, top, outdir, envs)
191
+ sapply(names(cases), do_case)
192
+ reporter$save(joboutdir)
@@ -3,14 +3,14 @@
3
3
  {{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
4
4
  {{ biopipen_dir | joinpaths: "utils", "gsea.R" | source_r }}
5
5
 
6
- infile = {{in.infile | quote}}
7
- metafile = {{in.metafile | quote}}
6
+ infile = {{in.infile | r}}
7
+ metafile = {{in.metafile | r}}
8
8
  {% if in.configfile %}
9
9
  config = {{in.config | read | toml_loads | r}}
10
10
  {% else %}
11
11
  config = list()
12
12
  {% endif %}
13
- outfile = {{out.outfile | quote}}
13
+ outfile = {{out.outfile | r}}
14
14
  envs = {{envs | r}}
15
15
  clscol <- if (is.null(config$clscol)) envs$clscol else config$clscol
16
16
  classes <- if (is.null(config$classes)) envs$classes else config$classes