biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,150 +1,172 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
-
4
- library(ggplot2)
5
- library(ggprism)
6
1
  library(dplyr)
7
2
  library(tidyr)
8
3
  library(tibble)
9
- library(patchwork)
4
+ library(plotthis)
5
+ library(biopipen.utils)
10
6
 
11
- asdirs = {{in.asdirs | r}}
12
- metafile = {{in.metafile | r}}
13
- outdir = {{out.outdir | r}}
14
- group_cols = {{envs.group_cols | r}}
15
- sample_name_fun = {{envs.sample_name | r}}
16
- heatmap_cases = {{envs.heatmap_cases | r}}
7
+ asdirs <- {{in.asdirs | r}}
8
+ metafile <- {{in.metafile | r}}
9
+ outdir <- {{out.outdir | r}}
10
+ group_cols <- {{envs.group_cols | r}}
11
+ sample_name_fun <- {{envs.sample_name | r}}
12
+ heatmap_cases <- {{envs.heatmap_cases | r}}
17
13
 
18
14
  if (!is.null(sample_name_fun)) {
19
- sample_name_fun = eval(parse(text=sample_name_fun))
15
+ sample_name_fun <- eval(parse(text = sample_name_fun))
20
16
  }
21
17
 
22
- get_sample_from_asdir = function(asdir) {
23
- x = basename(asdir)
18
+ get_sample_from_asdir <- function(asdir) {
19
+ x <- basename(asdir)
24
20
  if (endsWith(x, ".aneuploidy_score")) {
25
- x = substr(x, 1, nchar(x) - 17)
21
+ x <- substr(x, 1, nchar(x) - 17)
26
22
  }
27
23
  if (endsWith(x, ".call")) {
28
- x = substr(x, 1, nchar(x) - 5)
24
+ x <- substr(x, 1, nchar(x) - 5)
29
25
  }
30
26
  if (!is.null(sample_name_fun)) {
31
- x = sample_name_fun(x)
27
+ x <- sample_name_fun(x)
32
28
  }
33
29
  x
34
30
  }
35
31
 
36
- sams = sapply(asdirs, get_sample_from_asdir)
32
+ asdir_to_sample <- lapply(asdirs, get_sample_from_asdir)
33
+ names(asdir_to_sample) <- asdirs
34
+ table_sams <- table(unlist(asdir_to_sample))
35
+ if (any(table_sams > 1)) {
36
+ log_warn("Duplicate sample names found in asdirs: ")
37
+ dup_sams <- names(table_sams[table_sams > 1])
38
+ for (dup_sam in dup_sams) {
39
+ i <- 1
40
+ for (asdir in asdirs) {
41
+ if (asdir_to_sample[[asdir]] == dup_sam) {
42
+ dedup_sam <- paste0(dup_sam, "_", i)
43
+ log_warn(paste0("- Changing ", dup_sam, "(", asdir, ") to ", dedup_sam))
44
+ asdir_to_sample[[asdir]] <- paste0(dup_sam, "_", i)
45
+ i <- i + 1
46
+ }
47
+ }
48
+ }
49
+ }
50
+ sams <- unlist(asdir_to_sample)
37
51
 
38
- meta_cols = c()
52
+ meta_cols <- c()
39
53
  if (!is.null(group_cols)) {
40
54
  for (group_col in group_cols) {
41
55
  if (grepl(",", group_col, fixed = TRUE)) {
42
- subcols = strsplit(group_col, ",")[[1]]
56
+ subcols <- strsplit(group_col, ",")[[1]]
43
57
  if (length(subcols) > 2) {
44
58
  stop("Only support 2 columns combined for group_cols")
45
59
  }
46
- meta_cols = union(meta_cols, subcols)
60
+ meta_cols <- union(meta_cols, subcols)
47
61
  } else {
48
- meta_cols = union(meta_cols, group_col)
62
+ meta_cols <- union(meta_cols, group_col)
49
63
  }
50
64
  }
51
65
  }
52
66
 
53
67
  if (!is.null(metafile)) {
54
- metadf = read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
68
+ metadf <- read.table(metafile, header=T, row.names=NULL, sep="\t", stringsAsFactors=F)
55
69
  if (!is.null(metadf$Sample)) {
56
- metadf$Sample = as.character(metadf$Sample)
70
+ metadf$Sample <- as.character(metadf$Sample)
57
71
  } else {
58
- colnames(metadf)[1] = "Sample"
72
+ colnames(metadf)[1] <- "Sample"
59
73
  }
60
- metadf = metadf[metadf$Sample %in% sams, c("Sample", meta_cols), drop=FALSE]
74
+ metadf <- metadf[metadf$Sample %in% sams, c("Sample", meta_cols), drop=FALSE]
75
+ rownames(metadf) <- metadf$Sample
61
76
  if (nrow(metadf) != length(sams)) {
62
77
  stop(paste("Not all samples in metafile:", paste(setdiff(sams, metadf$Sample), collapse=", ")))
63
78
  }
64
79
  } else {
65
- metadf = NULL
80
+ metadf <- NULL
66
81
  if (!is.null(group_cols) && length(group_cols) > 0) {
67
82
  stop("`envs.group_cols` given but no metafile provided")
68
83
  }
69
84
  }
70
85
 
71
-
72
-
73
- read_caa = function(asdir) {
86
+ read_caa <- function(asdir) {
74
87
  # Sample Arms arm seg
75
- sample = get_sample_from_asdir(asdir)
76
- caa = read.table(
88
+ sample <- asdir_to_sample[[asdir]]
89
+ caa <- read.table(
77
90
  file.path(asdir, "CAA.txt"),
78
91
  header=T,
79
92
  row.names=NULL,
80
93
  sep="\t",
81
94
  stringsAsFactors=F,
82
95
  )
83
- caa$Sample = sample
96
+ caa$Sample <- sample
84
97
  caa
85
98
  }
86
99
 
87
- read_as = function(asdir) {
100
+ read_as <- function(asdir) {
88
101
  # Sample SignalType Signal
89
- sample = get_sample_from_asdir(asdir)
90
- as = read.table(
102
+ sample <- asdir_to_sample[[asdir]]
103
+ as <- read.table(
91
104
  file.path(asdir, "AS.txt"),
92
105
  header=F,
93
106
  row.names=NULL,
94
107
  sep="\t",
95
108
  stringsAsFactors=F,
96
109
  )
97
- colnames(as) = c("SignalType", "Signal")
98
- as$Sample = sample
110
+ colnames(as) <- c("SignalType", "Signal")
111
+ as$Sample <- sample
99
112
  as
100
113
  }
101
114
 
102
115
  # Sample Arms arm seg
103
- caa = do_call(rbind, lapply(asdirs, read_caa))
116
+ caa <- do_call(rbind, lapply(asdirs, read_caa))
104
117
  # Sample SignalType Signal
105
- as = do_call(rbind, lapply(asdirs, read_as))
118
+ as <- do_call(rbind, lapply(asdirs, read_as))
106
119
 
107
120
  # Sample chr1_p chr1_q chr2_p chr2_q ...
108
- caa_arm = caa %>%
121
+ caa_arm <- caa %>%
109
122
  select(-"seg") %>%
110
123
  pivot_wider(names_from="Arms", values_from="arm")
111
124
 
112
125
  # Sample chr1_p chr1_q chr2_p chr2_q ...
113
- caa_seg = caa %>%
126
+ caa_seg <- caa %>%
114
127
  select(-"arm") %>%
115
128
  pivot_wider(names_from="Arms", values_from="seg")
116
129
 
117
130
  # Sample SignalType Signal
118
- as_arm = as %>% filter(SignalType == "arm") %>% select(-"SignalType")
119
- as_seg = as %>% filter(SignalType == "seg") %>% select(-"SignalType")
131
+ as_arm <- as %>% filter(SignalType == "arm") %>% select(-"SignalType")
132
+ as_seg <- as %>% filter(SignalType == "seg") %>% select(-"SignalType")
120
133
 
121
134
  if (!is.null(metadf)) {
122
- caa_arm = caa_arm %>% left_join(metadf, by="Sample")
123
- caa_seg = caa_seg %>% left_join(metadf, by="Sample")
124
- as_arm = as_arm %>% left_join(metadf, by="Sample")
125
- as_seg = as_seg %>% left_join(metadf, by="Sample")
135
+ caa_arm <- caa_arm %>% left_join(metadf, by="Sample")
136
+ caa_seg <- caa_seg %>% left_join(metadf, by="Sample")
137
+ as_arm <- as_arm %>% left_join(metadf, by="Sample")
138
+ as_seg <- as_seg %>% left_join(metadf, by="Sample")
126
139
  }
127
140
 
128
-
129
141
  write.table(caa_arm, file.path(outdir, "CAA_arm.txt"), sep="\t", quote=F, row.names=F, col.names=T)
130
142
  write.table(caa_seg, file.path(outdir, "CAA_seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
131
143
  write.table(as_arm, file.path(outdir, "AS_arm.txt"), sep="\t", quote=F, row.names=F, col.names=T)
132
144
  write.table(as_seg, file.path(outdir, "AS_seg.txt"), sep="\t", quote=F, row.names=F, col.names=T)
133
145
 
134
146
  # Plot AS without grouping
135
- p_as_arm = ggplot(as_arm) +
136
- geom_bar(aes(x=Sample, y=Signal), stat="identity") +
137
- theme_prism() +
138
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
147
+ p_as_arm <- BarPlot(
148
+ as_arm,
149
+ x="Sample",
150
+ y="Signal",
151
+ title="Aneuploidy Score (Arm)",
152
+ xlab="Sample",
153
+ ylab="Aneuploidy Score",
154
+ x_text_angle = 90
155
+ )
139
156
 
140
157
  png(file.path(outdir, "AS_arm.png"), width=400 + nrow(caa_arm) * 12, height=600, res=100)
141
158
  print(p_as_arm)
142
159
  dev.off()
143
160
 
144
- p_as_seg = ggplot(as_seg) +
145
- geom_bar(aes(x=Sample, y=Signal), stat="identity") +
146
- theme_prism() +
147
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
161
+ p_as_seg <- BarPlot(
162
+ as_seg,
163
+ x="Sample",
164
+ y="Signal",
165
+ title="Aneuploidy Score (Segment)",
166
+ xlab="Sample",
167
+ ylab="Aneuploidy Score",
168
+ x_text_angle = 90
169
+ )
148
170
 
149
171
  png(file.path(outdir, "AS_seg.png"), width=400 + nrow(caa_seg) * 12, height=600, res=100)
150
172
  print(p_as_seg)
@@ -154,49 +176,70 @@ dev.off()
154
176
  if (!is.null(group_cols)) {
155
177
  for (group_col in group_cols) {
156
178
  if (!grepl(",", group_col, fixed = TRUE)) {
157
- # Single layer with group_col
158
- p_as_arm_bar_group = ggplot(
159
- as_arm %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample))
160
- ) +
161
- geom_bar(aes(x=Sample, y=Signal, fill=!!sym(group_col)), stat="identity") +
162
- theme_prism() +
163
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
179
+
180
+ p_as_arm_bar_group <- BarPlot(
181
+ as_arm,
182
+ x="Sample",
183
+ y="Signal",
184
+ fill=group_col,
185
+ title=paste0("Aneuploidy Score (Arm) - ", group_col),
186
+ xlab="Sample",
187
+ ylab="Aneuploidy Score",
188
+ x_text_angle = 90
189
+ )
164
190
 
165
191
  png(file.path(outdir, paste0("AS_arm_bar_", group_col, ".png")), width=400 + nrow(caa_arm) * 12, height=600, res=100)
166
192
  print(p_as_arm_bar_group)
167
193
  dev.off()
168
194
 
169
- p_as_seg_bar_group = ggplot(
170
- as_seg %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample))
171
- ) +
172
- geom_bar(aes(x=Sample, y=Signal, fill=!!sym(group_col)), stat="identity") +
173
- theme_prism() +
174
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
195
+ p_as_seg_bar_group <- BarPlot(
196
+ as_seg,
197
+ x="Sample",
198
+ y="Signal",
199
+ fill=group_col,
200
+ title=paste0("Aneuploidy Score (Segment) - ", group_col),
201
+ xlab="Sample",
202
+ ylab="Aneuploidy Score",
203
+ x_text_angle = 90
204
+ )
175
205
 
176
206
  png(file.path(outdir, paste0("AS_seg_bar_", group_col, ".png")), width=400 + nrow(caa_seg) * 12, height=600, res=100)
177
207
  print(p_as_seg_bar_group)
178
208
  dev.off()
179
209
 
180
210
  # Voilin + boxplot
181
- p_as_arm_violin_group = ggplot(
182
- as_arm %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample))
183
- ) +
184
- geom_violin(aes(x=!!sym(group_col), y=Signal), fill="steelblue", trim=FALSE) +
185
- geom_boxplot(aes(x=!!sym(group_col), y=Signal), width=0.1, outlier.shape=NA) +
186
- theme_prism() +
187
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
211
+
212
+ p_as_arm_violin_group <- ViolinPlot(
213
+ as_arm,
214
+ x=group_col,
215
+ y="Signal",
216
+ title=paste0("Aneuploidy Score (Arm) - ", group_col),
217
+ xlab=group_col,
218
+ ylab="Aneuploidy Score",
219
+ x_text_angle = 90,
220
+ comparisons = TRUE,
221
+ sig_label = "p.format",
222
+ add_point = TRUE,
223
+ add_box = TRUE
224
+ )
188
225
 
189
226
  png(file.path(outdir, paste0("AS_arm_violin_", group_col, ".png")), width=1000, height=600, res=100)
190
227
  print(p_as_arm_violin_group)
191
228
  dev.off()
192
229
 
193
- p_as_seg_violin_group = ggplot(
194
- as_seg %>% arrange(!!sym(group_col)) %>% mutate(Sample=factor(Sample, levels=Sample))
195
- ) +
196
- geom_violin(aes(x=!!sym(group_col), y=Signal), fill="steelblue", trim=FALSE) +
197
- geom_boxplot(aes(x=!!sym(group_col), y=Signal), width=0.1, outlier.shape=NA) +
198
- theme_prism() +
199
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
230
+ p_as_seg_violin_group <- ViolinPlot(
231
+ as_seg,
232
+ x=group_col,
233
+ y="Signal",
234
+ title=paste0("Aneuploidy Score (Segment) - ", group_col),
235
+ xlab=group_col,
236
+ ylab="Aneuploidy Score",
237
+ x_text_angle = 90,
238
+ comparisons = TRUE,
239
+ sig_label = "p.format",
240
+ add_point = TRUE,
241
+ add_box = TRUE
242
+ )
200
243
 
201
244
  png(file.path(outdir, paste0("AS_seg_violin_", group_col, ".png")), width=1000, height=600, res=100)
202
245
  print(p_as_seg_violin_group)
@@ -204,26 +247,27 @@ if (!is.null(group_cols)) {
204
247
 
205
248
  } else {
206
249
  # Multiple layers with group_col
207
- group_cols = strsplit(group_col, ",")[[1]]
208
- group_col1 = group_cols[1]
209
- group_col2 = group_cols[2]
250
+ group_cols <- strsplit(group_col, ",")[[1]]
251
+ group_col1 <- group_cols[1]
252
+ group_col2 <- group_cols[2]
210
253
 
211
254
  # For each group_col1, plot a barplot with group_col2 as fill, and
212
255
  # concatenate them together using patch work, with ncol=2
213
256
  # calcuate the height and width of the plot based on the number of
214
257
  # groups
215
- ps = as_arm %>%
216
- group_by(!!sym(group_col1)) %>%
217
- group_map(function(.x, .y) {
218
- p = ggplot(.x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))) +
219
- geom_bar(aes(x=Sample, y=Signal, fill=!!sym(group_col2)), stat="identity") +
220
- theme_prism() +
221
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
222
- ggtitle(.y[[group_col1]][1])
223
- p
224
- })
225
-
226
- p = wrap_plots(ps, ncol=2)
258
+ as_arm <- as_arm %>% arrange(!!sym(group_col1), !!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))
259
+ p <- BarPlot(
260
+ as_arm,
261
+ x="Sample",
262
+ y="Signal",
263
+ split_by=group_col1,
264
+ fill=group_col2,
265
+ xlab="Sample",
266
+ ylab="Aneuploidy Score",
267
+ x_text_angle = 90,
268
+ ncol = 2
269
+ )
270
+
227
271
  png(
228
272
  file.path(outdir, paste0("AS_arm_bar_", group_col, ".png")),
229
273
  width=1000,
@@ -233,18 +277,18 @@ if (!is.null(group_cols)) {
233
277
  print(p)
234
278
  dev.off()
235
279
 
236
- ps = as_seg %>%
237
- group_by(!!sym(group_col1)) %>%
238
- group_map(function(.x, .y) {
239
- p = ggplot(.x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))) +
240
- geom_bar(aes(x=Sample, y=Signal, fill=!!sym(group_col2)), stat="identity") +
241
- theme_prism() +
242
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
243
- ggtitle(.y[[group_col1]][1])
244
- p
245
- })
246
-
247
- p = wrap_plots(ps, ncol=2)
280
+ as_seg <- as_seg %>% arrange(!!sym(group_col1), !!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))
281
+ p <- BarPlot(
282
+ as_seg,
283
+ x="Sample",
284
+ y="Signal",
285
+ split_by=group_col1,
286
+ fill=group_col2,
287
+ xlab="Sample",
288
+ ylab="Aneuploidy Score",
289
+ x_text_angle = 90,
290
+ ncol = 2
291
+ )
248
292
  png(
249
293
  file.path(outdir, paste0("AS_seg_bar_", group_col, ".png")),
250
294
  width=1000,
@@ -255,19 +299,21 @@ if (!is.null(group_cols)) {
255
299
  dev.off()
256
300
 
257
301
  # Do the same for Voilin + boxplot
258
- ps = as_arm %>%
259
- group_by(!!sym(group_col1)) %>%
260
- group_map(function(.x, .y) {
261
- p = ggplot(.x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))) +
262
- geom_violin(aes(x=!!sym(group_col2), y=Signal), fill="steelblue", trim=FALSE) +
263
- geom_boxplot(aes(x=!!sym(group_col2), y=Signal), width=0.1, outlier.shape=NA) +
264
- theme_prism() +
265
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
266
- ggtitle(.y[[group_col1]][1])
267
- p
268
- })
269
-
270
- p = wrap_plots(ps, ncol=2)
302
+ p <- ViolinPlot(
303
+ as_arm,
304
+ x=group_col2,
305
+ y="Signal",
306
+ split_by = group_col1,
307
+ xlab=group_col2,
308
+ ylab="Aneuploidy Score",
309
+ x_text_angle = 90,
310
+ comparisons = TRUE,
311
+ sig_label = "p.format",
312
+ add_point = TRUE,
313
+ add_box = TRUE,
314
+ ncol = 2
315
+ )
316
+
271
317
  png(
272
318
  file.path(outdir, paste0("AS_arm_violin_", group_col, ".png")),
273
319
  width=1000,
@@ -277,19 +323,21 @@ if (!is.null(group_cols)) {
277
323
  print(p)
278
324
  dev.off()
279
325
 
280
- ps = as_seg %>%
281
- group_by(!!sym(group_col1)) %>%
282
- group_map(function(.x, .y) {
283
- p = ggplot(.x %>% arrange(!!sym(group_col2)) %>% mutate(Sample=factor(Sample, levels=Sample))) +
284
- geom_violin(aes(x=!!sym(group_col2), y=Signal), fill="steelblue", trim=FALSE) +
285
- geom_boxplot(aes(x=!!sym(group_col2), y=Signal), width=0.1, outlier.shape=NA) +
286
- theme_prism() +
287
- theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
288
- ggtitle(.y[[group_col1]][1])
289
- p
290
- })
291
-
292
- p = wrap_plots(ps, ncol=2)
326
+ p <- ViolinPlot(
327
+ as_seg,
328
+ x=group_col2,
329
+ y="Signal",
330
+ split_by = group_col1,
331
+ xlab=group_col2,
332
+ ylab="Aneuploidy Score",
333
+ x_text_angle = 90,
334
+ comparisons = TRUE,
335
+ sig_label = "p.format",
336
+ add_point = TRUE,
337
+ add_box = TRUE,
338
+ ncol = 2
339
+ )
340
+
293
341
  png(
294
342
  file.path(outdir, paste0("AS_seg_violin_", group_col, ".png")),
295
343
  width=1000,
@@ -304,39 +352,46 @@ if (!is.null(group_cols)) {
304
352
 
305
353
  # Heatmaps
306
354
  for (heatmap_name in names(heatmap_cases)) {
307
- arms = heatmap_cases[[heatmap_name]]
355
+ arms <- heatmap_cases[[heatmap_name]]
308
356
  if (all(arms != "ALL")) {
309
- caa_df = caa_arm %>% select(Sample, !!meta_cols, !!arms)
357
+ caa_df <- caa_arm %>% select(Sample, !!meta_cols, !!arms)
310
358
  } else {
311
- caa_df = caa_arm
359
+ caa_df <- caa_arm
312
360
  }
313
- caa_df = caa_df %>% column_to_rownames("Sample")
361
+ caa_df <- caa_df %>% column_to_rownames("Sample")
314
362
  if (!is.null(metadf)) {
315
- caa_df = caa_df %>% select(-!!meta_cols)
363
+ caa_df <- caa_df %>% select(-!!meta_cols)
316
364
  }
317
-
318
- width = 300 + 20 * ncol(caa_df) # all arms: 300 + 30 * 46 = 1680
319
- height = 300 + 25 * nrow(caa_df) # 10 samples: 300 + 30 * 10 = 600
320
- args = list(
365
+ caa_df <- caa_df %>%
366
+ rownames_to_column("Sample") %>%
367
+ pivot_longer(cols=-"Sample", names_to="Arms", values_to="Signal") %>%
368
+ pivot_wider(names_from="Sample", values_from="Signal")
369
+
370
+ height <- 300 + 20 * ncol(caa_df) # all arms: 300 + 30 * 46 = 1680
371
+ width <- 300 + 25 * nrow(caa_df) # 10 samples: 300 + 30 * 10 = 600
372
+ # print(caa_df)
373
+ hmp <- Heatmap(
374
+ caa_df,
375
+ rows_data = metadf,
321
376
  name = "CAA",
322
- cluster_columns = FALSE,
377
+ rows_by = setdiff(colnames(caa_df), "Arms"),
378
+ columns_by = "Arms",
323
379
  cluster_rows = FALSE,
380
+ cluster_columns = FALSE,
324
381
  row_names_side = "left",
325
- rect_gp = grid::gpar(col = "#FFFFFF", lwd = 1)
382
+ show_row_names = TRUE,
383
+ show_column_names = TRUE,
384
+ row_annotation = colnames(metadf),
385
+ lower_cutoff = -1,
386
+ upper_cutoff = 1
326
387
  )
327
- if (!is.null(metadf)) {
328
- row_annos = list()
329
- for (meta_col in meta_cols) {
330
- row_annos[[meta_col]] = metadf[[meta_col]]
331
- }
332
- if (length(row_annos) > 0) {
333
- args$right_annotation = do_call(ComplexHeatmap::rowAnnotation, row_annos)
334
- }
335
- }
336
- plotHeatmap(
337
- caa_df,
338
- args = args,
339
- devpars = list(width=width, height=height, res=100),
340
- outfile = file.path(outdir, paste0("Heatmap_", heatmap_name, ".png"))
388
+
389
+ png(
390
+ file.path(outdir, paste0("Heatmap_", heatmap_name, ".png")),
391
+ width=width,
392
+ height=height,
393
+ res=100
341
394
  )
395
+ plot(hmp)
396
+ dev.off()
342
397
  }
@@ -1,10 +1,10 @@
1
1
  library(dplyr)
2
2
 
3
- segfile = {{in.segfile | quote}}
4
- outfile = {{out.outfile | quote}}
5
- chrom_col = {{envs.chrom_col | quote}}
3
+ segfile = {{in.segfile | r}}
4
+ outfile = {{out.outfile | r}}
5
+ chrom_col = {{envs.chrom_col | r}}
6
6
  excl_chroms = {{envs.excl_chroms | r}}
7
- seg_col = {{envs.seg_col | quote}}
7
+ seg_col = {{envs.seg_col | r}}
8
8
  segmean_transform = {{envs.segmean_transform | r}}
9
9
 
10
10
  if (is.character(segmean_transform)) {