biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -1,7 +1,5 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
- library(ggprism)
4
- theme_set(theme_prism())
1
+ library(plotthis)
2
+ library(biopipen.utils)
5
3
 
6
4
  indir <- {{in.indir | r}}
7
5
  outdir <- {{out.outdir | r}}
@@ -13,11 +11,13 @@ samplecr <- {{envs.samplecr | r}}
13
11
  varcr <- {{envs.varcr | r}}
14
12
  max_iter <- {{envs.max_iter | r}}
15
13
 
14
+ log <- get_logger()
15
+
16
16
  bedfile = Sys.glob(file.path(indir, '*.bed'))
17
17
  if (length(bedfile) == 0)
18
18
  stop("No bed files found in the input directory.")
19
19
  if (length(bedfile) > 1) {
20
- log_warn("Multiple bed files found in the input directory. Using the first one.")
20
+ log$warn("Multiple bed files found in the input directory. Using the first one.")
21
21
  bedfile <- bedfile[1]
22
22
  }
23
23
  input <- tools::file_path_sans_ext(bedfile)
@@ -30,7 +30,7 @@ all_varcr_fail_file = paste0(output, '.varcr.fail')
30
30
  if (file.exists(all_smiss_file)) invisible(file.remove(all_smiss_file))
31
31
  if (file.exists(all_vmiss_file)) invisible(file.remove(all_vmiss_file))
32
32
  for (i in 1:max_iter) {
33
- log_info("Iteration {i} ...")
33
+ log$info("Iteration {i} ...")
34
34
  # iter_out <- paste0(output, "-", i)
35
35
  iter_dir <- file.path(outdir, paste0("iter", i))
36
36
  dir.create(iter_dir, showWarnings = FALSE)
@@ -152,39 +152,48 @@ vmiss <- read.table(
152
152
  vmiss$Callrate <- 1 - vmiss$F_MISS
153
153
 
154
154
  if (doplot) {
155
- log_info("Plotting ...")
155
+ log$info("Plotting ...")
156
156
  callrate.sample$Status <- "Pass"
157
157
  callrate.sample[callrate.sample.fail, "Status"] <- "Fail"
158
- plotGG(
159
- data = callrate.sample,
160
- geom = "histogram",
161
- outfile = paste0(output, '.samplecr.png'),
162
- args = list(aes(fill = Status, x = Callrate), alpha = 0.8, bins = 50),
163
- ggs = c(
164
- 'xlab("Sample Call Rate")',
165
- 'ylab("Count")',
166
- 'geom_vline(xintercept = samplecr, color = "red", linetype="dashed")',
167
- 'theme(legend.position = "none")',
168
- 'geom_text(aes(x = samplecr, y = Inf, label = samplecr), colour="red", angle=90, vjust = 1.2, hjust = 1.2)',
169
- 'scale_fill_manual(values = c("Pass" = "blue3", "Fail" = "red3"))'
170
- )
158
+ callrate.sample$Status <- factor(callrate.sample$Status, levels = c("Fail", "Pass"))
159
+
160
+ p_callrate_file <- paste0(output, '.samplecr.png')
161
+ p_callrate <- Histogram(
162
+ callrate.sample,
163
+ x = "Callrate",
164
+ group_by = "Status",
165
+ xlab = "Sample Call Rate",
166
+ ylab = "Count",
167
+ palette = "Set1",
168
+ alpha = 0.8,
169
+ bins = 50
171
170
  )
171
+ res <- 70
172
+ height <- attr(p_callrate, "height") * res
173
+ width <- attr(p_callrate, "width") * res
174
+ png(p_callrate_file, width = width, height = height, res = res)
175
+ print(p_callrate)
176
+ dev.off()
172
177
 
173
178
  vmiss$Status <- "Pass"
174
179
  vmiss[which(vmiss$Callrate < varcr), "Status"] <- "Fail"
175
- plotGG(
176
- data = vmiss,
177
- geom = "histogram",
178
- outfile = paste0(output, '.varcr.png'),
179
- args = list(aes(fill = Status, x = Callrate), alpha = 0.8, bins = 50),
180
- ggs = c(
181
- 'xlab("Variant Call Rate")',
182
- 'ylab("Count")',
183
- 'geom_vline(xintercept = varcr, color = "red", linetype="dashed")',
184
- 'theme(legend.position = "none")',
185
- 'geom_text(aes(x = varcr, y = Inf, label = varcr), colour="red", angle=90, vjust = 1.2, hjust = 1.2)',
186
- 'scale_fill_manual(values = c("Pass" = "blue3", "Fail" = "red3"))'
187
- ),
188
- devpars = devpars
180
+ vmiss$Status <- factor(vmiss$Status, levels = c("Fail", "Pass"))
181
+
182
+ p_varcr_file <- paste0(output, '.varcr.png')
183
+ p_varcr <- Histogram(
184
+ vmiss,
185
+ x = "Callrate",
186
+ group_by = "Status",
187
+ xlab = "Variant Call Rate",
188
+ ylab = "Count",
189
+ palette = "Set1",
190
+ alpha = 0.8,
191
+ bins = 50
189
192
  )
193
+ res <- 70
194
+ height <- attr(p_varcr, "height") * res
195
+ width <- attr(p_varcr, "width") * res
196
+ png(p_varcr_file, width = width, height = height, res = res)
197
+ print(p_varcr)
198
+ dev.off()
190
199
  }
@@ -1,8 +1,6 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
1
  library(rlang)
4
- library(ggprism)
5
- theme_set(theme_prism())
2
+ library(plotthis)
3
+ library(biopipen.utils)
6
4
 
7
5
  indir <- {{in.indir | r}}
8
6
  outdir <- {{out.outdir | r}}
@@ -92,7 +90,7 @@ post_process <- function(suffix, snp_col = "ID", sep = "\t", modifier = NULL) {
92
90
  lt_flag <- paste0(metric_col, " < ", cutoff)
93
91
  freq$GE <- freq[[metric_col]] >= cutoff
94
92
  freq$Flag <- ifelse(freq$GE, ge_flag, lt_flag)
95
- freq$Flag <- factor(freq$Flag, levels = c(ge_flag, lt_flag))
93
+ freq$Flag <- factor(freq$Flag, levels = c(lt_flag, ge_flag))
96
94
  write.table(
97
95
  freq[[snp_col]][freq$GE],
98
96
  file = paste0(output, suffix, ".", metric_col, ".ge"),
@@ -109,26 +107,22 @@ post_process <- function(suffix, snp_col = "ID", sep = "\t", modifier = NULL) {
109
107
  )
110
108
 
111
109
  if (doplot) {
112
- plotGG(
113
- data = freq,
114
- geom = "histogram",
115
- outfile = paste0(output, suffix, ".", metric_col, ".png"),
116
- args = list(aes(x = !!sym(metric_col), fill = Flag), alpha = 0.8, bins = 50),
117
- ggs = c(
118
- sprintf('xlab("%s")', metric_col),
119
- 'ylab("Count")',
120
- sprintf('geom_vline(xintercept = %.3f, color = "red", linetype="dashed")', cutoff),
121
- sprintf(
122
- 'geom_text(aes(x = %.3f, y = Inf, label = as.character(%.3f)), colour="blue", vjust = 1.5, hjust = -.1)',
123
- cutoff, cutoff
124
- ),
125
- sprintf(
126
- 'scale_fill_manual(values = c("%s" = "blue3", "%s" = "green3"))',
127
- ge_flag, lt_flag
128
- )
129
- ),
130
- devpars = devpars
110
+ p <- Histogram(
111
+ freq,
112
+ x = metric_col,
113
+ group_by = "Flag",
114
+ alpha = 0.8,
115
+ bins = 50,
116
+ xlab = metric_col,
117
+ ylab = "Count",
118
+ palette = "Set1"
131
119
  )
120
+ res <- 70
121
+ height <- attr(p, "height") * res
122
+ width <- attr(p, "width") * res
123
+ png(paste0(output, suffix, ".", metric_col, ".png"), width = width, height = height, res = res)
124
+ print(p)
125
+ dev.off()
132
126
  }
133
127
  } else {
134
128
  iter_dir <- file.path(outdir, paste0(metric_col, "_filtered"))
@@ -148,6 +142,7 @@ post_process <- function(suffix, snp_col = "ID", sep = "\t", modifier = NULL) {
148
142
  }
149
143
  }
150
144
  freq$Flag <- ifelse(indicate(freq), "Fail", "Pass")
145
+ freq$Flag <- factor(freq$Flag, levels = c("Fail", "Pass"))
151
146
  failfile <- paste0(output, suffix, ".", metric_col, ".fail")
152
147
  write.table(
153
148
  freq[[snp_col]][freq$Flag == "Fail"],
@@ -158,24 +153,22 @@ post_process <- function(suffix, snp_col = "ID", sep = "\t", modifier = NULL) {
158
153
  )
159
154
 
160
155
  if (doplot) {
161
- plotGG(
162
- data = freq,
163
- geom = "histogram",
164
- outfile = paste0(output, suffix, ".", metric_col, ".png"),
165
- args = list(aes(x = !!sym(metric_col), fill = Flag), alpha = 0.8, bins = 50),
166
- ggs = c(
167
- sprintf('xlab("%s")', metric_col),
168
- 'ylab("Count")',
169
- sprintf('geom_vline(xintercept = %.3f, color = "blue", linetype="dashed")', cutoff),
170
- sprintf(
171
- 'geom_text(aes(x = %.3f, y = Inf, label = as.character(%.3f)), colour="blue", vjust = 1.5, hjust = -.1)',
172
- cutoff, cutoff
173
- ),
174
- 'theme(legend.position = "none")',
175
- 'scale_fill_manual(values = c("Pass" = "blue3", "Fail" = "red3"))'
176
- ),
177
- devpars = devpars
156
+ p <- Histogram(
157
+ freq,
158
+ x = metric_col,
159
+ group_by = "Flag",
160
+ alpha = 0.8,
161
+ bins = 50,
162
+ xlab = metric_col,
163
+ ylab = "Count",
164
+ palette = "Set1"
178
165
  )
166
+ res <- 70
167
+ height <- attr(p, "height") * res
168
+ width <- attr(p, "width") * res
169
+ png(paste0(output, suffix, ".", metric_col, ".png"), width = width, height = height, res = res)
170
+ print(p)
171
+ dev.off()
179
172
  }
180
173
 
181
174
  filter_cmd <- c(
@@ -1,7 +1,5 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
- library(ggprism)
4
- theme_set(theme_prism())
1
+ library(plotthis)
2
+ library(biopipen.utils)
5
3
 
6
4
  indir <- {{in.indir | r}}
7
5
  outdir <- {{out.outdir | r}}
@@ -51,22 +49,29 @@ if (doplot) {
51
49
  hardy$Pval <- -log10(hardy$P)
52
50
  hardy$Status <- "Pass"
53
51
  hardy[which(hardy$SNP %in% hardy.fail$SNP), "Status"] <- "Fail"
52
+ hardy$Status <- factor(hardy$Status, levels = c("Fail", "Pass"))
54
53
 
55
- plotGG(
56
- data = hardy,
57
- geom = "histogram",
58
- outfile = paste0(output, '.hardy.png'),
59
- args = list(aes(x = Pval, fill = Status), alpha = 0.8, bins = 50),
60
- ggs = c(
61
- 'xlab("-log10(HWE p-value)")',
62
- 'ylab("Count")',
63
- 'geom_vline(xintercept = -log10(cutoff), color = "red", linetype="dashed")',
64
- 'theme(legend.position = "none")',
65
- 'geom_text(aes(x = -log10(cutoff), y = Inf, label = cutoff), colour="red", angle=90, vjust = 1.2, hjust = 1.2)',
66
- 'scale_fill_manual(values = c("Pass" = "blue3", "Fail" = "red3"))' # Added line to set "Fail" color to red
67
- ),
68
- devpars = devpars
54
+ p <- Histogram(
55
+ hardy,
56
+ x = "Pval",
57
+ group_by = "Status",
58
+ alpha = 0.8,
59
+ bins = 50,
60
+ xlab = "-log10(HWE p-value)",
61
+ ylab = "Count",
62
+ palette = "Set1"
69
63
  )
64
+ res <- 70
65
+ height <- attr(p, "height") * res
66
+ width <- attr(p, "width") * res
67
+ png(
68
+ filename = paste0(output, '.hardy.png'),
69
+ width = width,
70
+ height = height,
71
+ res = res
72
+ )
73
+ print(p)
74
+ dev.off()
70
75
  }
71
76
 
72
77
  cmd <- c(
@@ -1,7 +1,5 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
- library(ggprism)
4
- theme_set(theme_prism())
1
+ library(plotthis)
2
+ library(biopipen.utils)
5
3
 
6
4
  indir <- {{in.indir | r}}
7
5
  outdir <- {{out.outdir | r}}
@@ -11,11 +9,13 @@ cutoff <- {{envs.cutoff | r}}
11
9
  doplot <- {{envs.plot | r}}
12
10
  devpars <- {{envs.devpars | r}}
13
11
 
12
+ log <- get_logger()
13
+
14
14
  bedfile = Sys.glob(file.path(indir, '*.bed'))
15
15
  if (length(bedfile) == 0)
16
16
  stop("No bed files found in the input directory.")
17
17
  if (length(bedfile) > 1) {
18
- log_warn("Multiple bed files found in the input directory. Using the first one.")
18
+ log$warn("Multiple bed files found in the input directory. Using the first one.")
19
19
  bedfile <- bedfile[1]
20
20
  }
21
21
  input <- tools::file_path_sans_ext(bedfile)
@@ -60,25 +60,29 @@ writeLines(het.fail, con = file(paste0(output, '.het.fail')))
60
60
  if (doplot) {
61
61
  het$Status <- "Pass"
62
62
  het[het.fail, "Status"] <- "Fail"
63
+ het$Status <- factor(het$Status, levels = c("Fail", "Pass"))
63
64
 
64
- plotGG(
65
- data = het,
66
- geom = "histogram",
67
- outfile = paste0(output, '.het.png'),
68
- args = list(aes(fill = Status, x = Het), alpha = 0.8, bins = 50),
69
- ggs = c(
70
- 'xlab("Sample Heterozygosity")',
71
- 'ylab("Count")',
72
- 'geom_vline(xintercept = c(het.mean-cutoff*het.sd, het.mean+cutoff*het.sd), color = "red", linetype="dashed")',
73
- 'geom_vline(xintercept = het.mean, color = "blue", linetype="dashed")',
74
- 'theme(legend.position = "none")',
75
- 'geom_text(aes(x = het.mean-cutoff*het.sd, y = Inf, label = sprintf("mean - %ssd (%.3f)", cutoff, het.mean - cutoff*het.sd)), colour="red", angle=90, vjust = 1.2, hjust = 1.2)',
76
- 'geom_text(aes(x = het.mean+cutoff*het.sd, y = Inf, label = sprintf("mean + %ssd (%.3f)", cutoff, het.mean + cutoff*het.sd)), colour="red", angle=90, vjust = 1.2, hjust = 1.2)',
77
- 'geom_text(aes(x = het.mean, y = Inf, label = sprintf("mean (%.3f)", het.mean)), colour="blue", vjust = 1.5, hjust = -.1)',
78
- 'scale_fill_manual(values = c("Pass" = "blue3", "Fail" = "red3"))'
79
- ),
80
- devpars = devpars
65
+ p <- Histogram(
66
+ het,
67
+ x = "Het",
68
+ group_by = "Status",
69
+ alpha = 0.8,
70
+ bins = 50,
71
+ xlab = "Sample Heterozygosity",
72
+ ylab = "Count",
73
+ palette = "Set1"
74
+ )
75
+ res <- 70
76
+ height <- attr(p, "height") * res
77
+ width <- attr(p, "width") * res
78
+ png(
79
+ filename = paste0(output, '.het.png'),
80
+ width = width,
81
+ height = height,
82
+ res = res
81
83
  )
84
+ print(p)
85
+ dev.off()
82
86
  }
83
87
 
84
88
  cmd <- c(
@@ -1,9 +1,9 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
1
  suppressPackageStartupMessages({
4
2
  library(dplyr)
5
3
  library(tidyr)
6
4
  library(tibble)
5
+ library(plotthis)
6
+ library(biopipen.utils)
7
7
  })
8
8
 
9
9
  indir <- {{in.indir | r}}
@@ -19,11 +19,13 @@ doplot <- {{envs.plot | r}}
19
19
  seed <- {{envs.seed | r}}
20
20
  ncores <- {{envs.ncores | r}}
21
21
 
22
+ log <- get_logger()
23
+
22
24
  bedfile <- Sys.glob(file.path(indir, '*.bed'))
23
25
  if (length(bedfile) == 0)
24
26
  stop("No bed files found in the input directory.")
25
27
  if (length(bedfile) > 1) {
26
- log_warn("Multiple bed files found in the input directory. Using the first one.")
28
+ log$warn("Multiple bed files found in the input directory. Using the first one.")
27
29
  bedfile <- bedfile[1]
28
30
  }
29
31
  input <- tools::file_path_sans_ext(bedfile)
@@ -153,48 +155,42 @@ if (doplot) {
153
155
  rownames(similarity) <- samids
154
156
  colnames(similarity) <- samids
155
157
 
156
- annos <- list()
158
+ andata <- NULL
159
+ column_annotation <- NULL
157
160
  if (!is.null(annofile) && !isFALSE(annofile)) {
158
161
  options(stringsAsFactors = TRUE)
159
162
  andata <- read.table(annofile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
160
163
  andata <- andata[samids, , drop = FALSE]
161
164
  for (anname in colnames(andata)) {
162
- annos[[anname]] <- as.matrix(andata[, anname])
165
+ column_annotation <- c(column_annotation, anname)
163
166
  }
164
- annos$annotation_name_gp <- fontsize8
165
- annos <- do.call(HeatmapAnnotation, annos)
166
167
  }
167
168
 
168
- args <- list(
169
+ p <- plotthis::Heatmap(
170
+ similarity,
169
171
  name = "PI_HAT",
170
- cell_fun = function(j, i, x, y, width, height, fill) {
171
- if (similarity[i, j] > pihat && i != j)
172
- grid.points(x, y, pch = 4, size = unit(.5, "char"))
173
- },
174
- #heatmap_legend_param = list(
175
- # title_gp = fontsize9,
176
- # labels_gp = fontsize8
177
- #),
172
+ in_form = "matrix",
173
+ cell_type = "label",
174
+ rows_data = andata,
175
+ label = function(x) ifelse (x > pihat, '*', NA),
176
+ title = paste0("(*) PI_HAT > ", pihat),
178
177
  clustering_distance_rows = function(m) as.dist(1-m),
179
178
  clustering_distance_columns = function(m) as.dist(1-m),
180
- top_annotation = if (length(annos) == 0) NULL else annos
179
+ show_row_names = TRUE,
180
+ show_column_names = TRUE,
181
+ column_annotation = column_annotation
181
182
  )
182
183
 
183
- plotHeatmap(
184
- similarity,
185
- outfile = paste0(output, '.ibd.png'),
186
- args = args,
187
- draw = list(
188
- annotation_legend_list = list(
189
- Legend(
190
- labels = paste(">", pihat),
191
- title = "",
192
- type = "points",
193
- pch = 4,
194
- title_gp = fontsize9,
195
- labels_gp = fontsize8)),
196
- merge_legend = TRUE
197
- ),
198
- devpars = devpars
199
- )
184
+ res <- 100
185
+ height <- attr(p, "height") * res
186
+ width <- attr(p, "width") * res
187
+ png(
188
+ filename = paste0(output, '.ibd.png'),
189
+ width = width,
190
+ height = height,
191
+ res = res
192
+ )
193
+ print(p)
194
+ dev.off()
195
+
200
196
  }
@@ -1,7 +1,6 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(dplyr)
3
+ library(biopipen.utils)
5
4
 
6
5
  infile <- {{in.infile | r}}
7
6
  groupfile <- {{in.groupfile | r}}
@@ -11,7 +10,9 @@ padj <- {{envs.padj | r}}
11
10
  transpose_input <- {{envs.transpose_input | r}}
12
11
  transpose_group <- {{envs.transpose_group | r}}
13
12
 
14
- log_info("Reading input files ...")
13
+ log <- get_logger()
14
+
15
+ log$info("Reading input files ...")
15
16
  indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
16
17
  if (transpose_input) {
17
18
  indata <- t(indata)
@@ -105,16 +106,16 @@ formatlm <- function(m, g = NULL, type = "coeff") {
105
106
  }
106
107
  }
107
108
 
108
- log_info("Running Chow tests ...")
109
+ log$info("Running Chow tests ...")
109
110
  ncases <- nrow(fmldata)
110
111
  results <- do_call(rbind, lapply(
111
112
  seq_len(ncases),
112
113
  function(i) {
113
114
  fmlrow <- fmldata[i, , drop=TRUE]
114
115
  if (i %% 100 == 0) {
115
- log_info("- {i} / {ncases} ...")
116
+ log$info("- {i} / {ncases} ...")
116
117
  }
117
- log_debug(" Running Chow test for formula: {fmlrow$Formula} (grouping = {fmlrow$Group})")
118
+ log$debug(" Running Chow test for formula: {fmlrow$Formula} (grouping = {fmlrow$Group})")
118
119
 
119
120
  res <- chow.test(fmlrow$Formula, fmlrow$Group)
120
121
  fmlrow$Pooled_Coef <- formatlm(res$pooled.lm)
@@ -135,11 +136,11 @@ results <- do_call(rbind, lapply(
135
136
  )) %>% as.data.frame()
136
137
 
137
138
  if (padj != "none") {
138
- log_info("Adjusting p-values ...")
139
+ log$info("Adjusting p-values ...")
139
140
  results$Padj <- p.adjust(results$Pval, method = padj)
140
141
  }
141
142
 
142
- log_info("Writing output ...")
143
+ log$info("Writing output ...")
143
144
  # unimplemented type 'list' in 'EncodeElement'
144
145
  results <- apply(results, 2, as.character)
145
146
  write.table(results, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
@@ -1,4 +1,3 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
1
  library(dcanr)
3
2
  library(scuttle)
4
3
  library(doRNG)
@@ -6,6 +5,7 @@ library(doParallel)
6
5
  library(snpStats)
7
6
  library(rlang)
8
7
  library(dplyr)
8
+ library(biopipen.utils)
9
9
 
10
10
  infile <- {{in.infile | r}}
11
11
  groupfile <- {{in.groupfile | r}}
@@ -19,12 +19,14 @@ ncores <- {{envs.ncores | r}}
19
19
  transpose_input <- {{envs.transpose_input | r}}
20
20
  transpose_group <- {{envs.transpose_group | r}}
21
21
 
22
- log_info("Setting seed and parallel backend ...")
22
+ log <- get_logger()
23
+
24
+ log$info("Setting seed and parallel backend ...")
23
25
  set.seed(seed)
24
26
  registerDoParallel(cores = ncores)
25
27
  registerDoRNG(seed)
26
28
 
27
- log_info("Reading input files ...")
29
+ log$info("Reading input files ...")
28
30
  indata <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
29
31
  if (transpose_input) {
30
32
  indata <- t(indata)
@@ -42,21 +44,21 @@ diffcoex_score <- function(group) {
42
44
 
43
45
  gvals <- unique(gdata[, group, drop = TRUE])
44
46
  if (length(gvals) < 2) {
45
- log_debug(" Less than 2 groups in the input. Skipping ...")
47
+ log$debug(" Less than 2 groups in the input. Skipping ...")
46
48
  return(NULL)
47
49
  }
48
50
  rs <- lapply(gvals, function(gval) {
49
51
  samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
50
52
  expr <- indata[samples, , drop = FALSE]
51
53
  if (length(samples) < 3) {
52
- log_debug(" Less than 3 samples in one of the groups. Skipping ...")
54
+ log$debug(" Less than 3 samples in one of the groups. Skipping ...")
53
55
  return(NULL)
54
56
  }
55
57
  cor.pairs(as.matrix(expr), cor.method = method)
56
58
  })
57
59
  rs[sapply(rs, is.null)] <- NULL
58
60
  if (length(rs) < 2) {
59
- log_debug(" Less than 2 groups with at least 3 samples. Skipping ...")
61
+ log$debug(" Less than 2 groups with at least 3 samples. Skipping ...")
60
62
  return(NULL)
61
63
  }
62
64
  N <- length(rs)
@@ -130,21 +132,21 @@ perm_test <- function(dcscores, group, B = perm_batch) {
130
132
 
131
133
  do_one_group <- function(i) {
132
134
  group <- colnames(gdata)[i]
133
- log_info("- Processing group {i}/{ngroups}: {group} ...")
134
- log_info(" Calculating differential co-expression scores ...")
135
+ log$info("- Processing group {i}/{ngroups}: {group} ...")
136
+ log$info(" Calculating differential co-expression scores ...")
135
137
  dcscores <- diffcoex_score(group)
136
138
 
137
139
  if (!is.null(dcscores)) {
138
- log_info(" Calculating p-values ...")
140
+ log$info(" Calculating p-values ...")
139
141
  perm_test(dcscores, group)
140
142
  }
141
143
  }
142
144
 
143
145
  trios <- do_call(rbind, lapply(seq_len(ngroups), do_one_group))
144
146
  if (padj != "none") {
145
- log_info("Correcting p-values ...")
147
+ log$info("Correcting p-values ...")
146
148
  trios$Padj <- p.adjust(trios$Pval, method = padj)
147
149
  }
148
150
 
149
- log_info("Writing output ...")
151
+ log$info("Writing output ...")
150
152
  write.table(trios, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
@@ -1,12 +1,11 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(dplyr)
5
3
  library(tidyr)
6
4
  library(fastLiquidAssociation)
5
+ library(biopipen.utils)
7
6
 
8
7
  infile <- {{in.infile | r}}
9
- covfile <- {{in.covfile | r}}
8
+ covfile <- {{in.covfile | r: quote_none=False | r}}
10
9
  groupfile <- {{in.groupfile | r}}
11
10
  fmlfile <- {{in.fmlfile | r}}
12
11
  outfile <- {{out.outfile | r}}
@@ -32,7 +31,7 @@ if (!is.null(groupfile) && !is.null(nvec)) {
32
31
  stop("Must provide either in.groupfile or envs.nvec, not both")
33
32
  }
34
33
 
35
- log_info("Reading and preparing data ...")
34
+ log$info("Reading and preparing data ...")
36
35
  indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
37
36
  if (transpose_input) {
38
37
  indata <- t(indata)
@@ -76,7 +75,7 @@ if (!is.null(groupfile)) {
76
75
  nvec <- expand_range(nvec)
77
76
  }
78
77
 
79
- log_info("Running fastLiquidAssociation ...")
78
+ log$info("Running fastLiquidAssociation ...")
80
79
  indata <- as.matrix(indata)
81
80
  mla <- fastMLA(
82
81
  data = indata,
@@ -88,7 +87,7 @@ mla <- fastMLA(
88
87
  )
89
88
 
90
89
  if (nrow(mla) == 0) {
91
- log_warn("No significant associations found")
90
+ log$warn("No significant associations found")
92
91
  out <- data.frame(
93
92
  X12 = character(),
94
93
  X21 = character(),
@@ -128,9 +127,9 @@ if (!is.null(xyz_names)) {
128
127
  }
129
128
 
130
129
  if (padj != "none") {
131
- log_info("Calculating adjusted p-values ...")
130
+ log$info("Calculating adjusted p-values ...")
132
131
  out$Padj <- p.adjust(out$Pval, method = padj)
133
132
  }
134
133
 
135
- log_info("Writing output ...")
134
+ log$info("Writing output ...")
136
135
  write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)