biopipen 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (149) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/filters.py +10 -183
  3. biopipen/core/proc.py +5 -3
  4. biopipen/core/testing.py +8 -1
  5. biopipen/ns/bam.py +40 -4
  6. biopipen/ns/cnv.py +1 -1
  7. biopipen/ns/cnvkit.py +1 -1
  8. biopipen/ns/delim.py +1 -1
  9. biopipen/ns/gsea.py +63 -37
  10. biopipen/ns/misc.py +38 -0
  11. biopipen/ns/plot.py +8 -0
  12. biopipen/ns/scrna.py +290 -288
  13. biopipen/ns/scrna_metabolic_landscape.py +207 -366
  14. biopipen/ns/tcr.py +165 -97
  15. biopipen/reports/bam/CNVpytor.svelte +4 -9
  16. biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
  17. biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
  18. biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
  19. biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
  20. biopipen/reports/scrna/DimPlots.svelte +1 -1
  21. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
  22. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
  23. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
  24. biopipen/reports/snp/PlinkCallRate.svelte +2 -2
  25. biopipen/reports/snp/PlinkFreq.svelte +1 -1
  26. biopipen/reports/snp/PlinkHWE.svelte +1 -1
  27. biopipen/reports/snp/PlinkHet.svelte +1 -1
  28. biopipen/reports/snp/PlinkIBD.svelte +1 -1
  29. biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
  30. biopipen/scripts/bam/CNAClinic.R +41 -6
  31. biopipen/scripts/bam/CNVpytor.py +2 -1
  32. biopipen/scripts/bam/ControlFREEC.py +2 -3
  33. biopipen/scripts/bam/SamtoolsView.py +33 -0
  34. biopipen/scripts/cnv/AneuploidyScore.R +25 -13
  35. biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
  36. biopipen/scripts/cnv/TMADScore.R +4 -4
  37. biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
  38. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
  39. biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
  41. biopipen/scripts/delim/RowsBinder.R +1 -1
  42. biopipen/scripts/delim/SampleInfo.R +4 -1
  43. biopipen/scripts/gene/GeneNameConversion.R +14 -12
  44. biopipen/scripts/gsea/Enrichr.R +2 -2
  45. biopipen/scripts/gsea/FGSEA.R +184 -50
  46. biopipen/scripts/gsea/PreRank.R +3 -3
  47. biopipen/scripts/misc/Plot.R +80 -0
  48. biopipen/scripts/plot/VennDiagram.R +2 -2
  49. biopipen/scripts/protein/ProdigySummary.R +34 -27
  50. biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
  51. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
  52. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
  53. biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
  54. biopipen/scripts/regulatory/motifs-common.R +10 -9
  55. biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
  56. biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
  57. biopipen/scripts/rnaseq/Simulation.R +0 -2
  58. biopipen/scripts/rnaseq/UnitConversion.R +6 -5
  59. biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
  60. biopipen/scripts/scrna/CellCellCommunication.py +1 -1
  61. biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
  62. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
  63. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
  64. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
  65. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
  66. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
  67. biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
  68. biopipen/scripts/scrna/CellsDistribution.R +1 -1
  69. biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
  70. biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
  71. biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
  72. biopipen/scripts/scrna/MarkersFinder.R +348 -217
  73. biopipen/scripts/scrna/MetaMarkers.R +3 -3
  74. biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
  75. biopipen/scripts/scrna/RadarPlots.R +1 -1
  76. biopipen/scripts/scrna/ScFGSEA.R +157 -75
  77. biopipen/scripts/scrna/ScSimulation.R +11 -10
  78. biopipen/scripts/scrna/ScVelo.py +605 -0
  79. biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
  80. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  81. biopipen/scripts/scrna/SeuratClusterStats-features.R +39 -30
  82. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
  83. biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
  84. biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
  85. biopipen/scripts/scrna/SeuratClustering.R +31 -48
  86. biopipen/scripts/scrna/SeuratLoading.R +2 -2
  87. biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
  88. biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
  89. biopipen/scripts/scrna/SeuratPreparing.R +76 -24
  90. biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
  91. biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
  92. biopipen/scripts/scrna/Subset10X.R +2 -2
  93. biopipen/scripts/scrna/TopExpressingGenes.R +141 -184
  94. biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
  95. biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
  96. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
  97. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
  98. biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
  99. biopipen/scripts/snp/MatrixEQTL.R +39 -20
  100. biopipen/scripts/snp/PlinkCallRate.R +43 -34
  101. biopipen/scripts/snp/PlinkFreq.R +34 -41
  102. biopipen/scripts/snp/PlinkHWE.R +23 -18
  103. biopipen/scripts/snp/PlinkHet.R +26 -22
  104. biopipen/scripts/snp/PlinkIBD.R +30 -34
  105. biopipen/scripts/stats/ChowTest.R +9 -8
  106. biopipen/scripts/stats/DiffCoexpr.R +13 -11
  107. biopipen/scripts/stats/LiquidAssoc.R +7 -8
  108. biopipen/scripts/stats/Mediation.R +8 -8
  109. biopipen/scripts/stats/MetaPvalue.R +11 -13
  110. biopipen/scripts/stats/MetaPvalue1.R +6 -5
  111. biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
  112. biopipen/scripts/tcr/ClonalStats.R +5 -4
  113. biopipen/scripts/tcr/CloneResidency.R +3 -3
  114. biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
  115. biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
  116. biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
  117. biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
  118. biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
  119. biopipen/scripts/tcr/ScRepLoading.R +114 -92
  120. biopipen/scripts/tcr/TCRClusterStats.R +2 -2
  121. biopipen/scripts/tcr/TCRClustering.R +86 -97
  122. biopipen/scripts/tcr/TESSA.R +65 -115
  123. biopipen/scripts/tcr/VJUsage.R +5 -5
  124. biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
  125. biopipen/utils/common_docstrs.py +66 -63
  126. biopipen/utils/reporter.py +177 -0
  127. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/METADATA +2 -1
  128. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/RECORD +130 -144
  129. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/WHEEL +1 -1
  130. biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
  131. biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
  132. biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
  133. biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
  134. biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
  135. biopipen/reports/utils/gsea.liq +0 -110
  136. biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
  137. biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
  138. biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
  139. biopipen/utils/caching.R +0 -44
  140. biopipen/utils/gene.R +0 -95
  141. biopipen/utils/gsea.R +0 -329
  142. biopipen/utils/io.R +0 -20
  143. biopipen/utils/misc.R +0 -602
  144. biopipen/utils/mutate_helpers.R +0 -581
  145. biopipen/utils/plot.R +0 -209
  146. biopipen/utils/repr.R +0 -146
  147. biopipen/utils/rnaseq.R +0 -48
  148. biopipen/utils/single_cell.R +0 -207
  149. {biopipen-0.33.1.dist-info → biopipen-0.34.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,80 @@
1
+ library(gglogger)
2
+ library(plotthis)
3
+ library(rlang)
4
+ library(biopipen.utils)
5
+
6
+ datafile <- {{in.datafile | r}}
7
+ plotfile <- {{out.plotfile | r}}
8
+ plotprefix <- {{out.plotfile | prefix | r}}
9
+ read_opts <- {{envs.read_opts | r: todot="-"}}
10
+ envs <- {{envs | r}}
11
+
12
+ fn <- envs$fn
13
+ envs$fn <- NULL
14
+ devpars <- envs$devpars
15
+ envs$devpars <- NULL
16
+ more_formats <- envs$more_formats
17
+ envs$more_formats <- NULL
18
+ save_code <- envs$save_code
19
+ envs$save_code <- NULL
20
+ envs$read_opts <- NULL
21
+
22
+ if (endsWith(datafile, ".qs") || endsWith(datafile, ".qs2") ||
23
+ endsWith(datafile, ".rds") || endsWith(datafile, ".RDS")) {
24
+ envs$data <- read_obj(datafile)
25
+ } else {
26
+ read_opts <- read_opts %||% list()
27
+ read_opts$file <- datafile
28
+ envs$data <- do.call(read.table, read_opts)
29
+ }
30
+
31
+ if (fn == "ManhattanPlot" && !is.null(envs$chromosomes)) {
32
+ norm_chroms <- function(chrs) {
33
+ chrs <- as.character(chrs)
34
+ if (length(chrs) == 1 && grepl(",", chrs)) {
35
+ chrs <- trimws(unlist(strsplit(chrs, ",")))
36
+ }
37
+ if (length(chrs) > 1) {
38
+ return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
39
+ }
40
+ if (!grepl("-", chrs)) { return(chrs) }
41
+
42
+ # expand chr1-22 -> chr1, chr2, ..., chr22
43
+ # chr1-22 -> 'chr1', '22'
44
+ chrs <- unlist(strsplit(chrs, "-"))
45
+ if (length(chrs) != 2) {
46
+ stop(paste0("Invalid chroms: ", chrs))
47
+ }
48
+ # detect prefix
49
+ prefix1 <- gsub("[0-9]", "", chrs[1])
50
+ prefix2 <- gsub("[0-9]", "", chrs[2])
51
+ if (nchar(prefix2) > 0 && prefix1 != prefix2) {
52
+ stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
53
+ }
54
+ chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
55
+ chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
56
+ chr_min <- min(chr_a, chr_b)
57
+ chr_max <- max(chr_a, chr_b)
58
+ return(paste0(prefix1, chr_min:chr_max))
59
+ }
60
+
61
+ envs$chromosomes <- norm_chroms(envs$chromosomes)
62
+ }
63
+
64
+ plotfn <- utils::getFromNamespace(fn, "plotthis")
65
+ if (save_code) {
66
+ plotfn <- gglogger::register(plotfn, name = fn)
67
+ }
68
+
69
+ p <- do_call(plotfn, envs)
70
+ save_plot(p, plotprefix, devpars, formats = unique(c("png", more_formats)))
71
+
72
+ if (save_code) {
73
+ save_plotcode(
74
+ p,
75
+ setup = c('library(plotthis)', '', 'load("data.RData")', 'list2env(envs, envir = .GlobalEnv)'),
76
+ prefix = plotprefix,
77
+ "envs",
78
+ auto_data_setup = FALSE
79
+ )
80
+ }
@@ -1,8 +1,8 @@
1
1
  {{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
2
2
  {{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
3
3
 
4
- infile = {{in.infile | quote}}
5
- outfile = {{out.outfile | quote}}
4
+ infile = {{in.infile | r}}
5
+ outfile = {{out.outfile | r}}
6
6
  inopts = {{envs.inopts | r}}
7
7
  intype = {{envs.intype | r}}
8
8
  devpars = {{envs.devpars | r}}
@@ -1,11 +1,7 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(dplyr)
5
- library(ggplot2)
6
- library(ggprism)
7
-
8
- theme_set(theme_prism())
3
+ library(biopipen.utils)
4
+ library(plotthis)
9
5
 
10
6
  infiles <- {{in.infiles | r}}
11
7
  outdir <- {{out.outdir | r}}
@@ -24,12 +20,15 @@ if (is.character(group)) {
24
20
  stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
25
21
  }
26
22
 
27
- log_info("Reading and merging metrics for each sample ...")
23
+ log <- get_logger()
24
+ reporter <- get_reporter()
25
+
26
+ log$info("Reading and merging metrics for each sample ...")
28
27
  metrics <- NULL
29
28
 
30
29
  for (infile in infiles) {
31
30
  sample <- sub("_prodigy$", "", basename(dirname(infile)))
32
- log_debug("- Reading metrics from {sample}")
31
+ log$debug("- Reading metrics from {sample}")
33
32
  metric <- read.table(
34
33
  infile,
35
34
  header = TRUE,
@@ -55,7 +54,7 @@ write.table(
55
54
  row.names = FALSE
56
55
  )
57
56
 
58
- add_report(
57
+ reporter$add(
59
58
  list(kind = "descr", content = "Metrics for all samples"),
60
59
  list(kind = "table", src = file.path(outdir, "metrics.txt")),
61
60
  h1 = "Metrics of all samples"
@@ -76,17 +75,17 @@ METRIC_DESCR = list(
76
75
  )
77
76
 
78
77
  if (!is.null(group)) {
79
- log_info("Merging group information ...")
78
+ log$info("Merging group information ...")
80
79
  metrics <- group %>%
81
80
  left_join(metrics, by = "Sample") %>%
82
81
  mutate(Group = factor(Group, levels = unique(Group)))
83
82
  }
84
83
 
85
- log_info("Plotting Prodigy metrics ...")
84
+ log$info("Plotting Prodigy metrics ...")
86
85
  for (metric in names(METRIC_DESCR)) {
87
- log_info("- {metric}: {METRIC_DESCR[[metric]]}")
86
+ log$info("- {metric}: {METRIC_DESCR[[metric]]}")
88
87
 
89
- add_report(
88
+ reporter$add(
90
89
  list(
91
90
  kind = "descr",
92
91
  content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
@@ -94,18 +93,22 @@ for (metric in names(METRIC_DESCR)) {
94
93
  h1 = metric
95
94
  )
96
95
 
97
- # barplot
98
- p <- ggplot(metrics, aes(x = Sample, y = !!sym(metric))) +
99
- geom_bar(stat = "identity", fill = "steelblue") +
100
- labs(x = "Sample", y = metric) +
101
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
96
+ p <- plotthis::BarPlot(
97
+ x = "Sample",
98
+ y = metric,
99
+ x_text_angle = 90,
100
+ fill = "Group",
101
+ data = metrics
102
+ )
102
103
 
103
104
  figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
104
- png(figfile, height = 600, res = 100, width = nrow(metrics) * 30 + 200)
105
+ height <- attr(p, "height") %||% 6
106
+ width <- attr(p, "width") %||% (nrow(metrics) * .3 + 2)
107
+ png(figfile, height = height * 100, res = 100, width = width * 100)
105
108
  print(p)
106
109
  dev.off()
107
110
 
108
- add_report(
111
+ reporter$add(
109
112
  list(src = figfile, name = "By Sample"),
110
113
  ui = "table_of_images",
111
114
  h1 = metric
@@ -113,21 +116,25 @@ for (metric in names(METRIC_DESCR)) {
113
116
 
114
117
  if (is.null(group)) { next }
115
118
  # group: Sample, Group
116
- p <- ggplot(metrics, aes(x = Group, y = !!sym(metric))) +
117
- geom_boxplot(fill = "steelblue") +
118
- labs(x = "Group", y = metric) +
119
- theme(axis.text.x = element_text(angle = 90, hjust = 1))
119
+ p <- plotthis::BarPlot(
120
+ data = metrics,
121
+ x = "Group",
122
+ y = metric,
123
+ x_text_angle = 90
124
+ )
120
125
 
121
126
  figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
122
- png(figfile, height = 600, res = 100, width = length(unique(metrics$Group)) * 30 + 200)
127
+ height <- attr(p, "height") %||% 6
128
+ width <- attr(p, "width") %||% (length(unique(metrics$Group)) * 0.3 + 2)
129
+ png(figfile, height = height * 100, res = 100, width = width * 100)
123
130
  print(p)
124
131
  dev.off()
125
132
 
126
- add_report(
133
+ reporter$add(
127
134
  list(src = figfile, name = "By Group"),
128
135
  ui = "table_of_images",
129
136
  h1 = metric
130
137
  )
131
138
  }
132
139
 
133
- save_report(joboutdir)
140
+ reporter$save(joboutdir)
@@ -1,9 +1,9 @@
1
1
  # Script for regulatory.MotifAffinityTest
2
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
3
- {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
2
+ {% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
4
3
 
5
4
  library(BiocParallel)
6
5
  library(BSgenome)
6
+ library(biopipen.utils)
7
7
 
8
8
  motiffile <- {{in.motiffile | r}}
9
9
  varfile <- {{in.varfile | r}}
@@ -42,16 +42,18 @@ if (is.null(motif_col) && is.null(regulator_col)) {
42
42
  stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
43
43
  }
44
44
 
45
- log_info("Reading input regulator/motif file ...")
45
+ log <- get_logger()
46
+
47
+ log$info("Reading input regulator/motif file ...")
46
48
  in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
47
49
 
48
- log_info("Ensuring motifs and regulators in the input data ...")
50
+ log$info("Ensuring motifs and regulators in the input data ...")
49
51
  in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
50
52
  genome_pkg <- get_genome_pkg(genome)
51
53
 
52
- log_info("Reading variant file ...")
54
+ log$info("Reading variant file ...")
53
55
  if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
54
- log_info("Converting VCF file to BED file ...")
56
+ log$info("Converting VCF file to BED file ...")
55
57
  varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
56
58
  cmd <- c(
57
59
  bcftools, "query",
@@ -69,7 +71,7 @@ if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
69
71
  snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
70
72
  colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
71
73
 
72
- log_info("Reading motif database ...")
74
+ log$info("Reading motif database ...")
73
75
  mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
74
76
 
75
77
  tool <- tolower(tool)
@@ -77,8 +79,8 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
77
79
 
78
80
  if (tool == "motifbreakr") {
79
81
  motifbreakr_args <- {{envs.motifbreakr_args | r}}
80
- {{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_MotifBreakR.R" | source_r }}
82
+ {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
81
83
  } else { # atsnp
82
84
  atsnp_args <- {{envs.atsnp_args | r}}
83
- {{ biopipen_dir | joinpaths: "scripts", "regulatory", "MotifAffinityTest_AtSNP.R" | source_r }}
85
+ {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
84
86
  }
@@ -1,7 +1,7 @@
1
1
  library(atSNP)
2
2
  library(rtracklayer)
3
3
 
4
- log_info("Converting snpinfo to atSNP object ...")
4
+ log$info("Converting snpinfo to atSNP object ...")
5
5
 
6
6
  # c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
7
7
  if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
@@ -34,10 +34,10 @@ snps <- LoadSNPData(
34
34
  half.window.size = k
35
35
  )
36
36
 
37
- log_info("Running atSNP ...")
37
+ log$info("Running atSNP ...")
38
38
  atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
39
39
 
40
- log_info("Calculating p values ...")
40
+ log$info("Calculating p values ...")
41
41
  atsnp_result <- ComputePValues(
42
42
  motif.lib = motif_lib,
43
43
  snp.info = snps,
@@ -85,7 +85,7 @@ write.table(
85
85
  sep = "\t", quote = FALSE, row.names = FALSE
86
86
  )
87
87
 
88
- log_info("Plotting variants ...")
88
+ log$info("Plotting variants ...")
89
89
  # Convert result to GRanges object
90
90
  atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
91
91
  atsnp_result$effect <- "strong"
@@ -103,7 +103,7 @@ if (is.null(plots) || length(plots) == 0) {
103
103
  variants <- names(plots)
104
104
  }
105
105
  for (variant in variants) {
106
- log_info("- Variant: {variant}")
106
+ log$info("- Variant: {variant}")
107
107
  if (is.null(plots[[variant]])) {
108
108
  plots[[variant]] <- list(devpars = devpars, which = "TRUE")
109
109
  }
@@ -36,7 +36,7 @@ get_bkg <- function(base) {
36
36
  bkg <- c(A = get_bkg("A"), C = get_bkg("C"), G = get_bkg("G"), T = get_bkg("T"))
37
37
 
38
38
  # run motifbreakR
39
- log_info("Running motifbreakR ...")
39
+ log$info("Running motifbreakR ...")
40
40
  results <- motifbreakR(
41
41
  snpList = snps,
42
42
  pwmList = mdb,
@@ -48,7 +48,7 @@ results <- motifbreakR(
48
48
  BPPARAM = MulticoreParam(ncores)
49
49
  )
50
50
 
51
- log_info("Calculating p values ...")
51
+ log$info("Calculating p values ...")
52
52
  results <- calculatePvalue(results)
53
53
  results_to_save <- as.data.frame(unname(results))
54
54
  results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
@@ -69,7 +69,7 @@ write.table(
69
69
  )
70
70
  rm(results_to_save)
71
71
 
72
- log_info("Plotting variants ...")
72
+ log$info("Plotting variants ...")
73
73
  if (is.null(plots) || length(plots) == 0) {
74
74
  results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
75
75
  results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
@@ -78,7 +78,7 @@ if (is.null(plots) || length(plots) == 0) {
78
78
  variants <- names(plots)
79
79
  }
80
80
  for (variant in variants) {
81
- log_info("- Variant: {variant}")
81
+ log$info("- Variant: {variant}")
82
82
  if (is.null(plots[[variant]])) {
83
83
  plots[[variant]] <- list(devpars = devpars, which = "TRUE")
84
84
  }
@@ -1,8 +1,8 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
- {{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
1
+ {% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
3
2
 
4
3
  library(BSgenome)
5
4
  library(GenomicRanges)
5
+ library(biopipen.utils)
6
6
 
7
7
  infile <- {{in.infile | r}}
8
8
  outdir <- {{out.outdir | r}}
@@ -27,17 +27,19 @@ if (is.null(motif_col) && is.null(regulator_col)) {
27
27
  stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
28
28
  }
29
29
 
30
- log_info("Reading input data ...")
30
+ log <- get_logger()
31
+
32
+ log$info("Reading input data ...")
31
33
  indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
32
34
 
33
- log_info("Ensuring regulators in the input data ...")
35
+ log$info("Ensuring regulators in the input data ...")
34
36
  indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
35
37
  genome_pkg <- get_genome_pkg(genome)
36
38
 
37
- log_info("Reading motif database ...")
39
+ log$info("Reading motif database ...")
38
40
  meme <- read_meme_to_motifdb(motifdb, indata, motif_col, regulator_col, notfound, outdir)
39
41
 
40
- log_info("Composing motifbreakR results from input data ...")
42
+ log$info("Composing motifbreakR results from input data ...")
41
43
  indata$chr <- indata$chrom %||% indata$chr %||% indata$seqnames
42
44
  indata$seqnames <- NULL
43
45
  indata$strand <- indata$strand %||% "+"
@@ -62,7 +64,7 @@ genome(indata) <- genome
62
64
  attributes(indata)$genome.package <- genome_pkg
63
65
  attributes(indata)$motifs <- meme
64
66
 
65
- log_info("Plotting variants ...")
67
+ log$info("Plotting variants ...")
66
68
  if (is.null(plot_vars)) {
67
69
  plot_vars <- unique(indata$SNP_id)
68
70
  } else if (length(plot_vars) > 1) {
@@ -71,6 +73,6 @@ if (is.null(plot_vars)) {
71
73
  plot_vars <- strsplit(plot_vars, ",")[[1]]
72
74
  }
73
75
  for (pvar in plot_vars) {
74
- log_info("- Variant: {pvar}")
76
+ log$info("- Variant: {pvar}")
75
77
  plot_variant_motifs(indata, pvar, devpars, outdir)
76
78
  }
@@ -1,8 +1,7 @@
1
- # make sure biopipen/utils/misc.R is loaded, log_warn is defined, and slugify is defined
2
-
3
1
  library(rlang)
4
2
  library(universalmotif)
5
3
  library(MotifDb)
4
+ library(biopipen.utils)
6
5
 
7
6
  #' @title Common functions for regulatory analysis
8
7
  #' @name regulatory-common
@@ -144,11 +143,12 @@ motifdb_to_motiflib <- function(motifdb) {
144
143
  #' @param notfound Action to take if regulators are not found in the mapping file
145
144
  #' @return Data frame with regulators and motifs
146
145
  #' @export
147
- ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error") {
146
+ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
148
147
  if (is.null(motif_col)) {
149
148
  if (is.null(regmotifs)) {
150
149
  stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
151
150
  }
151
+ log <- log %||% get_logger()
152
152
  regmotifs <- .read_regmotifs(regmotifs)
153
153
  rm_motif_col <- colnames(regmotifs)[1]
154
154
  rm_reg_col <- colnames(regmotifs)[2]
@@ -158,7 +158,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
158
158
  notfound_regs <- setdiff(regulators, rm_regs)
159
159
  .handle_notfound_items(
160
160
  notfound_regs,
161
- log_warn,
161
+ log$warn,
162
162
  "The following regulators were not found in the regulator-motif mapping file",
163
163
  notfound,
164
164
  file.path(outdir, "notfound_regulators.txt"),
@@ -185,7 +185,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
185
185
  notfound_motifs <- setdiff(motifs, rm_motifs)
186
186
  .handle_notfound_items(
187
187
  notfound_motifs,
188
- log_warn,
188
+ log$warn,
189
189
  "The following motifs were not found in the regulator-motif mapping file",
190
190
  notfound,
191
191
  file.path(outdir, "notfound_motifs.txt"),
@@ -232,7 +232,8 @@ get_genome_pkg <- function(genome) {
232
232
  #' @param outdir Output directory, used to save un-matched motifs
233
233
  #' @return Motifs that are found
234
234
  #' @export
235
- check_motifs <- function(motifs, all_motifs, notfound, outdir) {
235
+ check_motifs <- function(motifs, all_motifs, notfound, outdir, log = NULL) {
236
+ log <- log %||% get_logger()
236
237
  notfound_motifs <- setdiff(motifs, all_motifs)
237
238
  if (length(notfound_motifs) > 0) {
238
239
  first_notfound <- head(notfound_motifs, 3)
@@ -246,15 +247,15 @@ check_motifs <- function(motifs, all_motifs, notfound, outdir) {
246
247
  if (notfound == "error") {
247
248
  stop(msg1, "\n", msg2)
248
249
  } else if (notfound == "ignore") {
249
- log_warn(msg1)
250
- log_warn(msg2)
250
+ log$warn(msg1)
251
+ log$warn(msg2)
251
252
  }
252
253
  } else {
253
254
  msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
254
255
  if (notfound == "error") {
255
256
  stop(msg)
256
257
  } else if (notfound == "ignore") {
257
- log_warn(msg)
258
+ log$warn(msg)
258
259
  }
259
260
  }
260
261
 
@@ -2,6 +2,7 @@
2
2
  library(ESCO)
3
3
  library(rlang)
4
4
  library(glue)
5
+ library(biopipen.utils)
5
6
 
6
7
  args <- {{envs.esco_args | r: todot="-"}}
7
8
  args <- args %||% list()
@@ -9,6 +10,8 @@ args <- args %||% list()
9
10
  save <- args$save
10
11
  args$save <- NULL
11
12
 
13
+ log <- get_logger()
14
+
12
15
  if (!is.null(seed)) {
13
16
  set.seed(seed)
14
17
  args$seed <- seed
@@ -20,12 +23,12 @@ args$verbose <- TRUE
20
23
  args$numCores <- ncores
21
24
  type <- args$type
22
25
 
23
- log_info("Running simulation ...")
26
+ log$info("Running simulation ...")
24
27
  sim <- do_call(escoSimulate, args)
25
28
  attributes(sim) <- c(attributes(sim), c(simulation_tool = "ESCO"))
26
- saveRDS(sim, file.path(outdir, "sim.rds"))
29
+ save_obj(sim, file.path(outdir, "sim.rds"))
27
30
 
28
- log_info("Plotting ...")
31
+ log$info("Plotting ...")
29
32
  if (type == "single") {
30
33
  asys <- assays(sim)
31
34
  datalist = list(`simulated-truth` = asys$TrueCounts)
@@ -36,7 +39,7 @@ if (type == "single") {
36
39
  datalist$`down-sampled` = asys$observedcounts
37
40
  }
38
41
 
39
- log_info("- Plotting the data ...")
42
+ log$info("- Plotting the data ...")
40
43
  dataplot <- file.path(outdir, "data.png")
41
44
  png(dataplot, width=length(datalist) * 600, height=1200, res=30)
42
45
  heatdata(datalist, norm = FALSE, size = 2, ncol = 3)
@@ -44,7 +47,7 @@ if (type == "single") {
44
47
 
45
48
  rholist <- metadata(sim)$Params@corr
46
49
  if (length(rholist) > 0) {
47
- log_info("- Plotting the GCN ...")
50
+ log$info("- Plotting the GCN ...")
48
51
  corrgenes <- rownames(rholist[[1]])
49
52
  gcnlist = lapply(datalist, function(data)gcn(data, genes = corrgenes))
50
53
  gcnlist = append(gcnlist, list("given truth" = rholist[[1]]), 1)
@@ -75,13 +78,13 @@ if (type == "single") {
75
78
  datalist$`down-sampled` = asys$observedcounts
76
79
  }
77
80
 
78
- log_info("- Plotting the data ...")
81
+ log$info("- Plotting the data ...")
79
82
  dataplot <- file.path(outdir, "data.png")
80
83
  png(dataplot, width=length(datalist) * 600, height=1200, res=30)
81
84
  heatdata(datalist, cellinfo = cellinfo, geneinfo = geneinfo, size = 1, ncol = 3)
82
85
  dev.off()
83
86
 
84
- log_info("- Plotting the GCN for all marker genes (i.e. DE genes) across all cell groups ...")
87
+ log$info("- Plotting the GCN for all marker genes (i.e. DE genes) across all cell groups ...")
85
88
  degeneinfo = geneinfo[which(geneinfo$newcelltype!="None"),]
86
89
  degeneinfo$newcelltype = droplevels(degeneinfo$newcelltype)
87
90
  degcnlist = lapply(datalist, function(data)gcn(data, genes = degeneinfo$genes))
@@ -90,7 +93,7 @@ if (type == "single") {
90
93
  heatgcn(degcnlist, geneinfo = degeneinfo, size = 2, ncol = 3)
91
94
  dev.off()
92
95
 
93
- log_info("- Plotting the GCN for marker genes within one cell group ...")
96
+ log$info("- Plotting the GCN for marker genes within one cell group ...")
94
97
  rholist = metadata(sim)$Params@corr
95
98
  group2_gcnlist = lapply(datalist,
96
99
  function(data){
@@ -126,7 +129,7 @@ if (type == "single") {
126
129
  DEgene.name = as.character(rowData(sim)$Gene[which(group.facs.gene[,1]>1)])
127
130
  degeneinfo = geneinfo[match(DEgene.name, geneinfo$genes),]
128
131
 
129
- log_info("- Plotting the data ...")
132
+ log$info("- Plotting the data ...")
130
133
  dataplot <- file.path(outdir, "data.png")
131
134
  png(dataplot, width=2000, height=1200, res=30)
132
135
  # plot the data
@@ -151,7 +154,7 @@ if (type == "single") {
151
154
  # get the geneinfo
152
155
  degenes = which(metadata(sim)$Params@paths.DEgenes==1)
153
156
 
154
- log_info("- Plotting the trajectory ...")
157
+ log$info("- Plotting the trajectory ...")
155
158
  trajplot <- file.path(outdir, "traj.png")
156
159
  png(trajplot, width=1600, height=1200, res=30)
157
160
  # plot the data
@@ -160,7 +163,7 @@ if (type == "single") {
160
163
  labels = levels(as.factor(colData(sim)$Path)))
161
164
  dev.off()
162
165
 
163
- log_info("- Plotting the data ...")
166
+ log$info("- Plotting the data ...")
164
167
  dataplot <- file.path(outdir, "data.png")
165
168
  heatdata(list("simulated truth" = datatrue[degenes,]),
166
169
  cellinfo = cellinfo,
@@ -1,6 +1,9 @@
1
1
 
2
2
  library(rlang)
3
3
  library(RUVcorr)
4
+ library(biopipen.utils)
5
+
6
+ log <- get_logger()
4
7
 
5
8
  args <- {{envs.ruvcorr_args | r: todot="-"}}
6
9
  if (!is.null(seed)) { set.seed(seed) }
@@ -17,7 +20,7 @@ args$check <- args$check %||% TRUE
17
20
  args$n = ngenes
18
21
  args$m = nsamples
19
22
 
20
- log_info("Running simulation ...")
23
+ log$info("Running simulation ...")
21
24
  sim <- do_call(simulateGEdata, args)
22
25
  attributes(sim) <- c(attributes(sim), c(simulation_tool = "RUVcorr"))
23
26
  genes <- paste0("Gene", 1:ngenes)
@@ -35,8 +38,8 @@ sim$Noise <- t(sim$Noise)
35
38
  colnames(sim$Sigma) <- genes
36
39
  rownames(sim$Sigma) <- genes
37
40
 
38
- log_info("Saving results ...")
39
- saveRDS(sim, file.path(outdir, "sim.rds"))
40
- saveRDS(sim$Truth, file.path(outdir, "Truth.rds"))
41
+ log$info("Saving results ...")
42
+ save_obj(sim, file.path(outdir, "sim.rds"))
43
+ save_obj(sim$Truth, file.path(outdir, "Truth.rds"))
41
44
 
42
45
  simulated <- sim$Y
@@ -1,5 +1,3 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  ngenes <- {{in.ngenes | r}}
4
2
  nsamples <- {{in.nsamples | r}}
5
3
  outfile <- {{out.outfile | r}}
@@ -1,7 +1,6 @@
1
- {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
1
  library(rlang)
4
2
  library(glue)
3
+ library(biopipen.utils)
5
4
 
6
5
  infile <- {{in.infile | r}}
7
6
  outfile <- {{out.outfile | r}}
@@ -11,7 +10,9 @@ refexon <- {{envs.refexon | r}}
11
10
  meanfl <- {{envs.meanfl | r}}
12
11
  nreads <- {{envs.nreads | r}}
13
12
 
14
- log_info("Reading input data ...")
13
+ log <- get_logger()
14
+
15
+ log$info("Reading input data ...")
15
16
  indata = read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = F)
16
17
  samples = colnames(indata)
17
18
 
@@ -326,7 +327,7 @@ if (grepl('rawcounts|rawcount|counts|count', outunit)) {
326
327
  stop(glue("Can't find a supported unit in the outunit: {outunit}\n"))
327
328
  }
328
329
 
329
- log_info("Transforming data by resolving {inunit} ...")
330
+ log$info("Transforming data by resolving {inunit} ...")
330
331
  if (intype == outtype) {
331
332
  fun <- identity
332
333
  } else {
@@ -339,5 +340,5 @@ if (intype == outtype) {
339
340
  assign(outtype, fun(indata))
340
341
  out <- eval(parse_expr(outunit))
341
342
 
342
- log_info("Saving output data ...")
343
+ log$info("Saving output data ...")
343
344
  write.table(out, outfile, quote=FALSE, row.names=TRUE, col.names=TRUE, sep="\t")