biopipen 0.33.1__py3-none-any.whl → 0.34.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +10 -183
- biopipen/core/proc.py +5 -3
- biopipen/core/testing.py +8 -1
- biopipen/ns/bam.py +40 -4
- biopipen/ns/cnv.py +1 -1
- biopipen/ns/cnvkit.py +1 -1
- biopipen/ns/delim.py +1 -1
- biopipen/ns/gsea.py +63 -37
- biopipen/ns/misc.py +38 -0
- biopipen/ns/plot.py +8 -0
- biopipen/ns/scrna.py +328 -292
- biopipen/ns/scrna_metabolic_landscape.py +207 -366
- biopipen/ns/tcr.py +165 -97
- biopipen/reports/bam/CNVpytor.svelte +4 -9
- biopipen/reports/cnvkit/CNVkitDiagram.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitHeatmap.svelte +1 -1
- biopipen/reports/cnvkit/CNVkitScatter.svelte +1 -1
- biopipen/reports/{delim/SampleInfo.svelte → common.svelte} +2 -3
- biopipen/reports/scrna/DimPlots.svelte +1 -1
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +51 -22
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +46 -42
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +63 -6
- biopipen/reports/snp/PlinkCallRate.svelte +2 -2
- biopipen/reports/snp/PlinkFreq.svelte +1 -1
- biopipen/reports/snp/PlinkHWE.svelte +1 -1
- biopipen/reports/snp/PlinkHet.svelte +1 -1
- biopipen/reports/snp/PlinkIBD.svelte +1 -1
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +1 -1
- biopipen/scripts/bam/CNAClinic.R +41 -6
- biopipen/scripts/bam/CNVpytor.py +2 -1
- biopipen/scripts/bam/ControlFREEC.py +2 -3
- biopipen/scripts/bam/SamtoolsView.py +33 -0
- biopipen/scripts/cnv/AneuploidyScore.R +25 -13
- biopipen/scripts/cnv/AneuploidyScoreSummary.R +218 -163
- biopipen/scripts/cnv/TMADScore.R +4 -4
- biopipen/scripts/cnv/TMADScoreSummary.R +51 -84
- biopipen/scripts/cnvkit/CNVkitGuessBaits.py +3 -3
- biopipen/scripts/cnvkit/CNVkitHeatmap.py +3 -3
- biopipen/scripts/cnvkit/CNVkitReference.py +3 -3
- biopipen/scripts/delim/RowsBinder.R +1 -1
- biopipen/scripts/delim/SampleInfo.R +4 -1
- biopipen/scripts/gene/GeneNameConversion.R +14 -12
- biopipen/scripts/gsea/Enrichr.R +2 -2
- biopipen/scripts/gsea/FGSEA.R +184 -50
- biopipen/scripts/gsea/PreRank.R +3 -3
- biopipen/scripts/misc/Plot.R +80 -0
- biopipen/scripts/plot/VennDiagram.R +2 -2
- biopipen/scripts/protein/ProdigySummary.R +34 -27
- biopipen/scripts/regulatory/MotifAffinityTest.R +11 -9
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +5 -5
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +4 -4
- biopipen/scripts/regulatory/VariantMotifPlot.R +10 -8
- biopipen/scripts/regulatory/motifs-common.R +10 -9
- biopipen/scripts/rnaseq/Simulation-ESCO.R +14 -11
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +7 -4
- biopipen/scripts/rnaseq/Simulation.R +0 -2
- biopipen/scripts/rnaseq/UnitConversion.R +6 -5
- biopipen/scripts/scrna/AnnData2Seurat.R +25 -73
- biopipen/scripts/scrna/CellCellCommunication.py +1 -1
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +51 -168
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +99 -150
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +11 -9
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +12 -9
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +14 -11
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +19 -16
- biopipen/scripts/scrna/CellTypeAnnotation.R +10 -2
- biopipen/scripts/scrna/CellsDistribution.R +1 -1
- biopipen/scripts/scrna/ExprImputation-alra.R +87 -11
- biopipen/scripts/scrna/ExprImputation-rmagic.R +247 -21
- biopipen/scripts/scrna/ExprImputation-scimpute.R +8 -5
- biopipen/scripts/scrna/MarkersFinder.R +481 -215
- biopipen/scripts/scrna/MetaMarkers.R +3 -3
- biopipen/scripts/scrna/ModuleScoreCalculator.R +14 -13
- biopipen/scripts/scrna/RadarPlots.R +1 -1
- biopipen/scripts/scrna/ScFGSEA.R +231 -76
- biopipen/scripts/scrna/ScSimulation.R +11 -10
- biopipen/scripts/scrna/ScVelo.py +605 -0
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -3
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +43 -30
- biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +56 -65
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +4 -4
- biopipen/scripts/scrna/SeuratClusterStats.R +9 -6
- biopipen/scripts/scrna/SeuratClustering.R +31 -48
- biopipen/scripts/scrna/SeuratLoading.R +2 -2
- biopipen/scripts/scrna/SeuratMap2Ref.R +66 -367
- biopipen/scripts/scrna/SeuratMetadataMutater.R +5 -7
- biopipen/scripts/scrna/SeuratPreparing.R +76 -24
- biopipen/scripts/scrna/SeuratSubClustering.R +46 -185
- biopipen/scripts/scrna/{SlingShot.R → Slingshot.R} +12 -16
- biopipen/scripts/scrna/Subset10X.R +2 -2
- biopipen/scripts/scrna/TopExpressingGenes.R +144 -185
- biopipen/scripts/scrna/celltypist-wrapper.py +6 -4
- biopipen/scripts/scrna/seurat_anndata_conversion.py +81 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +429 -123
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +346 -245
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +182 -173
- biopipen/scripts/snp/MatrixEQTL.R +39 -20
- biopipen/scripts/snp/PlinkCallRate.R +43 -34
- biopipen/scripts/snp/PlinkFreq.R +34 -41
- biopipen/scripts/snp/PlinkHWE.R +23 -18
- biopipen/scripts/snp/PlinkHet.R +26 -22
- biopipen/scripts/snp/PlinkIBD.R +30 -34
- biopipen/scripts/stats/ChowTest.R +9 -8
- biopipen/scripts/stats/DiffCoexpr.R +13 -11
- biopipen/scripts/stats/LiquidAssoc.R +7 -8
- biopipen/scripts/stats/Mediation.R +8 -8
- biopipen/scripts/stats/MetaPvalue.R +11 -13
- biopipen/scripts/stats/MetaPvalue1.R +6 -5
- biopipen/scripts/tcr/CDR3AAPhyschem.R +105 -164
- biopipen/scripts/tcr/ClonalStats.R +6 -5
- biopipen/scripts/tcr/CloneResidency.R +3 -3
- biopipen/scripts/tcr/CloneSizeQQPlot.R +2 -2
- biopipen/scripts/tcr/Immunarch2VDJtools.R +2 -2
- biopipen/scripts/tcr/ImmunarchFilter.R +3 -3
- biopipen/scripts/tcr/ImmunarchLoading.R +5 -5
- biopipen/scripts/tcr/ScRepCombiningExpression.R +39 -0
- biopipen/scripts/tcr/ScRepLoading.R +114 -92
- biopipen/scripts/tcr/TCRClusterStats.R +2 -2
- biopipen/scripts/tcr/TCRClustering.R +86 -97
- biopipen/scripts/tcr/TESSA.R +65 -115
- biopipen/scripts/tcr/VJUsage.R +5 -5
- biopipen/scripts/vcf/TruvariBenchSummary.R +15 -11
- biopipen/utils/common_docstrs.py +66 -63
- biopipen/utils/reporter.py +177 -0
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/METADATA +2 -1
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/RECORD +130 -145
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/WHEEL +1 -1
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +0 -14
- biopipen/reports/scrna/ScFGSEA.svelte +0 -16
- biopipen/reports/scrna/SeuratClusterStats.svelte +0 -16
- biopipen/reports/scrna/SeuratMap2Ref.svelte +0 -37
- biopipen/reports/scrna/SeuratPreparing.svelte +0 -15
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +0 -28
- biopipen/reports/utils/gsea.liq +0 -110
- biopipen/scripts/scrna/CellTypeAnnotation-common.R +0 -10
- biopipen/scripts/scrna/SeuratClustering-common.R +0 -213
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +0 -193
- biopipen/utils/caching.R +0 -44
- biopipen/utils/gene.R +0 -95
- biopipen/utils/gsea.R +0 -329
- biopipen/utils/io.R +0 -20
- biopipen/utils/misc.R +0 -602
- biopipen/utils/mutate_helpers.R +0 -581
- biopipen/utils/plot.R +0 -209
- biopipen/utils/repr.R +0 -146
- biopipen/utils/rnaseq.R +0 -48
- biopipen/utils/single_cell.R +0 -207
- {biopipen-0.33.1.dist-info → biopipen-0.34.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
library(gglogger)
|
|
2
|
+
library(plotthis)
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(biopipen.utils)
|
|
5
|
+
|
|
6
|
+
datafile <- {{in.datafile | r}}
|
|
7
|
+
plotfile <- {{out.plotfile | r}}
|
|
8
|
+
plotprefix <- {{out.plotfile | prefix | r}}
|
|
9
|
+
read_opts <- {{envs.read_opts | r: todot="-"}}
|
|
10
|
+
envs <- {{envs | r}}
|
|
11
|
+
|
|
12
|
+
fn <- envs$fn
|
|
13
|
+
envs$fn <- NULL
|
|
14
|
+
devpars <- envs$devpars
|
|
15
|
+
envs$devpars <- NULL
|
|
16
|
+
more_formats <- envs$more_formats
|
|
17
|
+
envs$more_formats <- NULL
|
|
18
|
+
save_code <- envs$save_code
|
|
19
|
+
envs$save_code <- NULL
|
|
20
|
+
envs$read_opts <- NULL
|
|
21
|
+
|
|
22
|
+
if (endsWith(datafile, ".qs") || endsWith(datafile, ".qs2") ||
|
|
23
|
+
endsWith(datafile, ".rds") || endsWith(datafile, ".RDS")) {
|
|
24
|
+
envs$data <- read_obj(datafile)
|
|
25
|
+
} else {
|
|
26
|
+
read_opts <- read_opts %||% list()
|
|
27
|
+
read_opts$file <- datafile
|
|
28
|
+
envs$data <- do.call(read.table, read_opts)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (fn == "ManhattanPlot" && !is.null(envs$chromosomes)) {
|
|
32
|
+
norm_chroms <- function(chrs) {
|
|
33
|
+
chrs <- as.character(chrs)
|
|
34
|
+
if (length(chrs) == 1 && grepl(",", chrs)) {
|
|
35
|
+
chrs <- trimws(unlist(strsplit(chrs, ",")))
|
|
36
|
+
}
|
|
37
|
+
if (length(chrs) > 1) {
|
|
38
|
+
return(unique(unlist(sapply(chrs, function(chr) norm_chroms(chr)))))
|
|
39
|
+
}
|
|
40
|
+
if (!grepl("-", chrs)) { return(chrs) }
|
|
41
|
+
|
|
42
|
+
# expand chr1-22 -> chr1, chr2, ..., chr22
|
|
43
|
+
# chr1-22 -> 'chr1', '22'
|
|
44
|
+
chrs <- unlist(strsplit(chrs, "-"))
|
|
45
|
+
if (length(chrs) != 2) {
|
|
46
|
+
stop(paste0("Invalid chroms: ", chrs))
|
|
47
|
+
}
|
|
48
|
+
# detect prefix
|
|
49
|
+
prefix1 <- gsub("[0-9]", "", chrs[1])
|
|
50
|
+
prefix2 <- gsub("[0-9]", "", chrs[2])
|
|
51
|
+
if (nchar(prefix2) > 0 && prefix1 != prefix2) {
|
|
52
|
+
stop(paste0("Invalid chroms: ", chrs, " (prefix mismatch)"))
|
|
53
|
+
}
|
|
54
|
+
chr_a <- as.integer(substring(chrs[1], nchar(prefix1) + 1))
|
|
55
|
+
chr_b <- as.integer(substring(chrs[2], nchar(prefix2) + 1))
|
|
56
|
+
chr_min <- min(chr_a, chr_b)
|
|
57
|
+
chr_max <- max(chr_a, chr_b)
|
|
58
|
+
return(paste0(prefix1, chr_min:chr_max))
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
envs$chromosomes <- norm_chroms(envs$chromosomes)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
plotfn <- utils::getFromNamespace(fn, "plotthis")
|
|
65
|
+
if (save_code) {
|
|
66
|
+
plotfn <- gglogger::register(plotfn, name = fn)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
p <- do_call(plotfn, envs)
|
|
70
|
+
save_plot(p, plotprefix, devpars, formats = unique(c("png", more_formats)))
|
|
71
|
+
|
|
72
|
+
if (save_code) {
|
|
73
|
+
save_plotcode(
|
|
74
|
+
p,
|
|
75
|
+
setup = c('library(plotthis)', '', 'load("data.RData")', 'list2env(envs, envir = .GlobalEnv)'),
|
|
76
|
+
prefix = plotprefix,
|
|
77
|
+
"envs",
|
|
78
|
+
auto_data_setup = FALSE
|
|
79
|
+
)
|
|
80
|
+
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{{ biopipen_dir | joinpaths: "utils", "io.R" | source_r }}
|
|
2
2
|
{{ biopipen_dir | joinpaths: "utils", "plot.R" | source_r }}
|
|
3
3
|
|
|
4
|
-
infile = {{in.infile |
|
|
5
|
-
outfile = {{out.outfile |
|
|
4
|
+
infile = {{in.infile | r}}
|
|
5
|
+
outfile = {{out.outfile | r}}
|
|
6
6
|
inopts = {{envs.inopts | r}}
|
|
7
7
|
intype = {{envs.intype | r}}
|
|
8
8
|
devpars = {{envs.devpars | r}}
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(dplyr)
|
|
5
|
-
library(
|
|
6
|
-
library(
|
|
7
|
-
|
|
8
|
-
theme_set(theme_prism())
|
|
3
|
+
library(biopipen.utils)
|
|
4
|
+
library(plotthis)
|
|
9
5
|
|
|
10
6
|
infiles <- {{in.infiles | r}}
|
|
11
7
|
outdir <- {{out.outdir | r}}
|
|
@@ -24,12 +20,15 @@ if (is.character(group)) {
|
|
|
24
20
|
stop(paste0("Invalid group: ", paste0(group, collapse = ", ")))
|
|
25
21
|
}
|
|
26
22
|
|
|
27
|
-
|
|
23
|
+
log <- get_logger()
|
|
24
|
+
reporter <- get_reporter()
|
|
25
|
+
|
|
26
|
+
log$info("Reading and merging metrics for each sample ...")
|
|
28
27
|
metrics <- NULL
|
|
29
28
|
|
|
30
29
|
for (infile in infiles) {
|
|
31
30
|
sample <- sub("_prodigy$", "", basename(dirname(infile)))
|
|
32
|
-
|
|
31
|
+
log$debug("- Reading metrics from {sample}")
|
|
33
32
|
metric <- read.table(
|
|
34
33
|
infile,
|
|
35
34
|
header = TRUE,
|
|
@@ -55,7 +54,7 @@ write.table(
|
|
|
55
54
|
row.names = FALSE
|
|
56
55
|
)
|
|
57
56
|
|
|
58
|
-
|
|
57
|
+
reporter$add(
|
|
59
58
|
list(kind = "descr", content = "Metrics for all samples"),
|
|
60
59
|
list(kind = "table", src = file.path(outdir, "metrics.txt")),
|
|
61
60
|
h1 = "Metrics of all samples"
|
|
@@ -76,17 +75,17 @@ METRIC_DESCR = list(
|
|
|
76
75
|
)
|
|
77
76
|
|
|
78
77
|
if (!is.null(group)) {
|
|
79
|
-
|
|
78
|
+
log$info("Merging group information ...")
|
|
80
79
|
metrics <- group %>%
|
|
81
80
|
left_join(metrics, by = "Sample") %>%
|
|
82
81
|
mutate(Group = factor(Group, levels = unique(Group)))
|
|
83
82
|
}
|
|
84
83
|
|
|
85
|
-
|
|
84
|
+
log$info("Plotting Prodigy metrics ...")
|
|
86
85
|
for (metric in names(METRIC_DESCR)) {
|
|
87
|
-
|
|
86
|
+
log$info("- {metric}: {METRIC_DESCR[[metric]]}")
|
|
88
87
|
|
|
89
|
-
|
|
88
|
+
reporter$add(
|
|
90
89
|
list(
|
|
91
90
|
kind = "descr",
|
|
92
91
|
content = METRIC_DESCR[[metric]] %||% paste0("Metric: ", metric)
|
|
@@ -94,18 +93,22 @@ for (metric in names(METRIC_DESCR)) {
|
|
|
94
93
|
h1 = metric
|
|
95
94
|
)
|
|
96
95
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
96
|
+
p <- plotthis::BarPlot(
|
|
97
|
+
x = "Sample",
|
|
98
|
+
y = metric,
|
|
99
|
+
x_text_angle = 90,
|
|
100
|
+
fill = "Group",
|
|
101
|
+
data = metrics
|
|
102
|
+
)
|
|
102
103
|
|
|
103
104
|
figfile <- file.path(outdir, paste0(slugify(metric), ".barplot.png"))
|
|
104
|
-
|
|
105
|
+
height <- attr(p, "height") %||% 6
|
|
106
|
+
width <- attr(p, "width") %||% (nrow(metrics) * .3 + 2)
|
|
107
|
+
png(figfile, height = height * 100, res = 100, width = width * 100)
|
|
105
108
|
print(p)
|
|
106
109
|
dev.off()
|
|
107
110
|
|
|
108
|
-
|
|
111
|
+
reporter$add(
|
|
109
112
|
list(src = figfile, name = "By Sample"),
|
|
110
113
|
ui = "table_of_images",
|
|
111
114
|
h1 = metric
|
|
@@ -113,21 +116,25 @@ for (metric in names(METRIC_DESCR)) {
|
|
|
113
116
|
|
|
114
117
|
if (is.null(group)) { next }
|
|
115
118
|
# group: Sample, Group
|
|
116
|
-
p <-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
p <- plotthis::BarPlot(
|
|
120
|
+
data = metrics,
|
|
121
|
+
x = "Group",
|
|
122
|
+
y = metric,
|
|
123
|
+
x_text_angle = 90
|
|
124
|
+
)
|
|
120
125
|
|
|
121
126
|
figfile <- file.path(outdir, paste0(slugify(metric), ".boxplot.png"))
|
|
122
|
-
|
|
127
|
+
height <- attr(p, "height") %||% 6
|
|
128
|
+
width <- attr(p, "width") %||% (length(unique(metrics$Group)) * 0.3 + 2)
|
|
129
|
+
png(figfile, height = height * 100, res = 100, width = width * 100)
|
|
123
130
|
print(p)
|
|
124
131
|
dev.off()
|
|
125
132
|
|
|
126
|
-
|
|
133
|
+
reporter$add(
|
|
127
134
|
list(src = figfile, name = "By Group"),
|
|
128
135
|
ui = "table_of_images",
|
|
129
136
|
h1 = metric
|
|
130
137
|
)
|
|
131
138
|
}
|
|
132
139
|
|
|
133
|
-
|
|
140
|
+
reporter$save(joboutdir)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# Script for regulatory.MotifAffinityTest
|
|
2
|
-
{
|
|
3
|
-
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
|
|
2
|
+
{% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
|
|
4
3
|
|
|
5
4
|
library(BiocParallel)
|
|
6
5
|
library(BSgenome)
|
|
6
|
+
library(biopipen.utils)
|
|
7
7
|
|
|
8
8
|
motiffile <- {{in.motiffile | r}}
|
|
9
9
|
varfile <- {{in.varfile | r}}
|
|
@@ -42,16 +42,18 @@ if (is.null(motif_col) && is.null(regulator_col)) {
|
|
|
42
42
|
stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
|
|
43
43
|
}
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
log <- get_logger()
|
|
46
|
+
|
|
47
|
+
log$info("Reading input regulator/motif file ...")
|
|
46
48
|
in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
47
49
|
|
|
48
|
-
|
|
50
|
+
log$info("Ensuring motifs and regulators in the input data ...")
|
|
49
51
|
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
50
52
|
genome_pkg <- get_genome_pkg(genome)
|
|
51
53
|
|
|
52
|
-
|
|
54
|
+
log$info("Reading variant file ...")
|
|
53
55
|
if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
54
|
-
|
|
56
|
+
log$info("Converting VCF file to BED file ...")
|
|
55
57
|
varfile_bed <- file.path(outdir, gsub("\\.vcf(\\.gz)?$", ".bed", basename(varfile)))
|
|
56
58
|
cmd <- c(
|
|
57
59
|
bcftools, "query",
|
|
@@ -69,7 +71,7 @@ if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
|
69
71
|
snpinfo <- read.table(varfile, header=FALSE, stringsAsFactors=FALSE)
|
|
70
72
|
colnames(snpinfo) <- c("chrom", "start", "end", "name", "score", "strand", "ref", "alt")
|
|
71
73
|
|
|
72
|
-
|
|
74
|
+
log$info("Reading motif database ...")
|
|
73
75
|
mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfound, outdir)
|
|
74
76
|
|
|
75
77
|
tool <- tolower(tool)
|
|
@@ -77,8 +79,8 @@ tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
|
77
79
|
|
|
78
80
|
if (tool == "motifbreakr") {
|
|
79
81
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
80
|
-
{
|
|
82
|
+
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
|
|
81
83
|
} else { # atsnp
|
|
82
84
|
atsnp_args <- {{envs.atsnp_args | r}}
|
|
83
|
-
{
|
|
85
|
+
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
|
|
84
86
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
library(atSNP)
|
|
2
2
|
library(rtracklayer)
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
log$info("Converting snpinfo to atSNP object ...")
|
|
5
5
|
|
|
6
6
|
# c("chrom", "start", "end", "name", "score", "strand", "ref", "alt", "ref_seq", "alt_seq")
|
|
7
7
|
if (any(nchar(snpinfo$ref) != 1) || any(nchar(snpinfo$alt) != 1)) {
|
|
@@ -34,10 +34,10 @@ snps <- LoadSNPData(
|
|
|
34
34
|
half.window.size = k
|
|
35
35
|
)
|
|
36
36
|
|
|
37
|
-
|
|
37
|
+
log$info("Running atSNP ...")
|
|
38
38
|
atsnp_scores <- ComputeMotifScore(motif_lib, snps, ncores = ncores)
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
log$info("Calculating p values ...")
|
|
41
41
|
atsnp_result <- ComputePValues(
|
|
42
42
|
motif.lib = motif_lib,
|
|
43
43
|
snp.info = snps,
|
|
@@ -85,7 +85,7 @@ write.table(
|
|
|
85
85
|
sep = "\t", quote = FALSE, row.names = FALSE
|
|
86
86
|
)
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
log$info("Plotting variants ...")
|
|
89
89
|
# Convert result to GRanges object
|
|
90
90
|
atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
|
|
91
91
|
atsnp_result$effect <- "strong"
|
|
@@ -103,7 +103,7 @@ if (is.null(plots) || length(plots) == 0) {
|
|
|
103
103
|
variants <- names(plots)
|
|
104
104
|
}
|
|
105
105
|
for (variant in variants) {
|
|
106
|
-
|
|
106
|
+
log$info("- Variant: {variant}")
|
|
107
107
|
if (is.null(plots[[variant]])) {
|
|
108
108
|
plots[[variant]] <- list(devpars = devpars, which = "TRUE")
|
|
109
109
|
}
|
|
@@ -36,7 +36,7 @@ get_bkg <- function(base) {
|
|
|
36
36
|
bkg <- c(A = get_bkg("A"), C = get_bkg("C"), G = get_bkg("G"), T = get_bkg("T"))
|
|
37
37
|
|
|
38
38
|
# run motifbreakR
|
|
39
|
-
|
|
39
|
+
log$info("Running motifbreakR ...")
|
|
40
40
|
results <- motifbreakR(
|
|
41
41
|
snpList = snps,
|
|
42
42
|
pwmList = mdb,
|
|
@@ -48,7 +48,7 @@ results <- motifbreakR(
|
|
|
48
48
|
BPPARAM = MulticoreParam(ncores)
|
|
49
49
|
)
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
log$info("Calculating p values ...")
|
|
52
52
|
results <- calculatePvalue(results)
|
|
53
53
|
results_to_save <- as.data.frame(unname(results))
|
|
54
54
|
results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
|
|
@@ -69,7 +69,7 @@ write.table(
|
|
|
69
69
|
)
|
|
70
70
|
rm(results_to_save)
|
|
71
71
|
|
|
72
|
-
|
|
72
|
+
log$info("Plotting variants ...")
|
|
73
73
|
if (is.null(plots) || length(plots) == 0) {
|
|
74
74
|
results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
|
|
75
75
|
results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
|
|
@@ -78,7 +78,7 @@ if (is.null(plots) || length(plots) == 0) {
|
|
|
78
78
|
variants <- names(plots)
|
|
79
79
|
}
|
|
80
80
|
for (variant in variants) {
|
|
81
|
-
|
|
81
|
+
log$info("- Variant: {variant}")
|
|
82
82
|
if (is.null(plots[[variant]])) {
|
|
83
83
|
plots[[variant]] <- list(devpars = devpars, which = "TRUE")
|
|
84
84
|
}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
{
|
|
2
|
-
{{ biopipen_dir | joinpaths: "scripts", "regulatory", "motifs-common.R" | source_r }}
|
|
1
|
+
{% include biopipen_dir + "/scripts/regulatory/motifs-common.R" %}
|
|
3
2
|
|
|
4
3
|
library(BSgenome)
|
|
5
4
|
library(GenomicRanges)
|
|
5
|
+
library(biopipen.utils)
|
|
6
6
|
|
|
7
7
|
infile <- {{in.infile | r}}
|
|
8
8
|
outdir <- {{out.outdir | r}}
|
|
@@ -27,17 +27,19 @@ if (is.null(motif_col) && is.null(regulator_col)) {
|
|
|
27
27
|
stop("Either motif (envs.motif_col) or regulator (envs.regulator_col) column must be provided")
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
-
|
|
30
|
+
log <- get_logger()
|
|
31
|
+
|
|
32
|
+
log$info("Reading input data ...")
|
|
31
33
|
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
log$info("Ensuring regulators in the input data ...")
|
|
34
36
|
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
35
37
|
genome_pkg <- get_genome_pkg(genome)
|
|
36
38
|
|
|
37
|
-
|
|
39
|
+
log$info("Reading motif database ...")
|
|
38
40
|
meme <- read_meme_to_motifdb(motifdb, indata, motif_col, regulator_col, notfound, outdir)
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
log$info("Composing motifbreakR results from input data ...")
|
|
41
43
|
indata$chr <- indata$chrom %||% indata$chr %||% indata$seqnames
|
|
42
44
|
indata$seqnames <- NULL
|
|
43
45
|
indata$strand <- indata$strand %||% "+"
|
|
@@ -62,7 +64,7 @@ genome(indata) <- genome
|
|
|
62
64
|
attributes(indata)$genome.package <- genome_pkg
|
|
63
65
|
attributes(indata)$motifs <- meme
|
|
64
66
|
|
|
65
|
-
|
|
67
|
+
log$info("Plotting variants ...")
|
|
66
68
|
if (is.null(plot_vars)) {
|
|
67
69
|
plot_vars <- unique(indata$SNP_id)
|
|
68
70
|
} else if (length(plot_vars) > 1) {
|
|
@@ -71,6 +73,6 @@ if (is.null(plot_vars)) {
|
|
|
71
73
|
plot_vars <- strsplit(plot_vars, ",")[[1]]
|
|
72
74
|
}
|
|
73
75
|
for (pvar in plot_vars) {
|
|
74
|
-
|
|
76
|
+
log$info("- Variant: {pvar}")
|
|
75
77
|
plot_variant_motifs(indata, pvar, devpars, outdir)
|
|
76
78
|
}
|
|
@@ -1,8 +1,7 @@
|
|
|
1
|
-
# make sure biopipen/utils/misc.R is loaded, log_warn is defined, and slugify is defined
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(universalmotif)
|
|
5
3
|
library(MotifDb)
|
|
4
|
+
library(biopipen.utils)
|
|
6
5
|
|
|
7
6
|
#' @title Common functions for regulatory analysis
|
|
8
7
|
#' @name regulatory-common
|
|
@@ -144,11 +143,12 @@ motifdb_to_motiflib <- function(motifdb) {
|
|
|
144
143
|
#' @param notfound Action to take if regulators are not found in the mapping file
|
|
145
144
|
#' @return Data frame with regulators and motifs
|
|
146
145
|
#' @export
|
|
147
|
-
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error") {
|
|
146
|
+
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
148
147
|
if (is.null(motif_col)) {
|
|
149
148
|
if (is.null(regmotifs)) {
|
|
150
149
|
stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
|
|
151
150
|
}
|
|
151
|
+
log <- log %||% get_logger()
|
|
152
152
|
regmotifs <- .read_regmotifs(regmotifs)
|
|
153
153
|
rm_motif_col <- colnames(regmotifs)[1]
|
|
154
154
|
rm_reg_col <- colnames(regmotifs)[2]
|
|
@@ -158,7 +158,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
|
|
|
158
158
|
notfound_regs <- setdiff(regulators, rm_regs)
|
|
159
159
|
.handle_notfound_items(
|
|
160
160
|
notfound_regs,
|
|
161
|
-
|
|
161
|
+
log$warn,
|
|
162
162
|
"The following regulators were not found in the regulator-motif mapping file",
|
|
163
163
|
notfound,
|
|
164
164
|
file.path(outdir, "notfound_regulators.txt"),
|
|
@@ -185,7 +185,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
|
|
|
185
185
|
notfound_motifs <- setdiff(motifs, rm_motifs)
|
|
186
186
|
.handle_notfound_items(
|
|
187
187
|
notfound_motifs,
|
|
188
|
-
|
|
188
|
+
log$warn,
|
|
189
189
|
"The following motifs were not found in the regulator-motif mapping file",
|
|
190
190
|
notfound,
|
|
191
191
|
file.path(outdir, "notfound_motifs.txt"),
|
|
@@ -232,7 +232,8 @@ get_genome_pkg <- function(genome) {
|
|
|
232
232
|
#' @param outdir Output directory, used to save un-matched motifs
|
|
233
233
|
#' @return Motifs that are found
|
|
234
234
|
#' @export
|
|
235
|
-
check_motifs <- function(motifs, all_motifs, notfound, outdir) {
|
|
235
|
+
check_motifs <- function(motifs, all_motifs, notfound, outdir, log = NULL) {
|
|
236
|
+
log <- log %||% get_logger()
|
|
236
237
|
notfound_motifs <- setdiff(motifs, all_motifs)
|
|
237
238
|
if (length(notfound_motifs) > 0) {
|
|
238
239
|
first_notfound <- head(notfound_motifs, 3)
|
|
@@ -246,15 +247,15 @@ check_motifs <- function(motifs, all_motifs, notfound, outdir) {
|
|
|
246
247
|
if (notfound == "error") {
|
|
247
248
|
stop(msg1, "\n", msg2)
|
|
248
249
|
} else if (notfound == "ignore") {
|
|
249
|
-
|
|
250
|
-
|
|
250
|
+
log$warn(msg1)
|
|
251
|
+
log$warn(msg2)
|
|
251
252
|
}
|
|
252
253
|
} else {
|
|
253
254
|
msg <- paste0("The following motifs were not found in the motif database: ", paste(first_notfound, collapse = ", "))
|
|
254
255
|
if (notfound == "error") {
|
|
255
256
|
stop(msg)
|
|
256
257
|
} else if (notfound == "ignore") {
|
|
257
|
-
|
|
258
|
+
log$warn(msg)
|
|
258
259
|
}
|
|
259
260
|
}
|
|
260
261
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
library(ESCO)
|
|
3
3
|
library(rlang)
|
|
4
4
|
library(glue)
|
|
5
|
+
library(biopipen.utils)
|
|
5
6
|
|
|
6
7
|
args <- {{envs.esco_args | r: todot="-"}}
|
|
7
8
|
args <- args %||% list()
|
|
@@ -9,6 +10,8 @@ args <- args %||% list()
|
|
|
9
10
|
save <- args$save
|
|
10
11
|
args$save <- NULL
|
|
11
12
|
|
|
13
|
+
log <- get_logger()
|
|
14
|
+
|
|
12
15
|
if (!is.null(seed)) {
|
|
13
16
|
set.seed(seed)
|
|
14
17
|
args$seed <- seed
|
|
@@ -20,12 +23,12 @@ args$verbose <- TRUE
|
|
|
20
23
|
args$numCores <- ncores
|
|
21
24
|
type <- args$type
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
log$info("Running simulation ...")
|
|
24
27
|
sim <- do_call(escoSimulate, args)
|
|
25
28
|
attributes(sim) <- c(attributes(sim), c(simulation_tool = "ESCO"))
|
|
26
|
-
|
|
29
|
+
save_obj(sim, file.path(outdir, "sim.rds"))
|
|
27
30
|
|
|
28
|
-
|
|
31
|
+
log$info("Plotting ...")
|
|
29
32
|
if (type == "single") {
|
|
30
33
|
asys <- assays(sim)
|
|
31
34
|
datalist = list(`simulated-truth` = asys$TrueCounts)
|
|
@@ -36,7 +39,7 @@ if (type == "single") {
|
|
|
36
39
|
datalist$`down-sampled` = asys$observedcounts
|
|
37
40
|
}
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
log$info("- Plotting the data ...")
|
|
40
43
|
dataplot <- file.path(outdir, "data.png")
|
|
41
44
|
png(dataplot, width=length(datalist) * 600, height=1200, res=30)
|
|
42
45
|
heatdata(datalist, norm = FALSE, size = 2, ncol = 3)
|
|
@@ -44,7 +47,7 @@ if (type == "single") {
|
|
|
44
47
|
|
|
45
48
|
rholist <- metadata(sim)$Params@corr
|
|
46
49
|
if (length(rholist) > 0) {
|
|
47
|
-
|
|
50
|
+
log$info("- Plotting the GCN ...")
|
|
48
51
|
corrgenes <- rownames(rholist[[1]])
|
|
49
52
|
gcnlist = lapply(datalist, function(data)gcn(data, genes = corrgenes))
|
|
50
53
|
gcnlist = append(gcnlist, list("given truth" = rholist[[1]]), 1)
|
|
@@ -75,13 +78,13 @@ if (type == "single") {
|
|
|
75
78
|
datalist$`down-sampled` = asys$observedcounts
|
|
76
79
|
}
|
|
77
80
|
|
|
78
|
-
|
|
81
|
+
log$info("- Plotting the data ...")
|
|
79
82
|
dataplot <- file.path(outdir, "data.png")
|
|
80
83
|
png(dataplot, width=length(datalist) * 600, height=1200, res=30)
|
|
81
84
|
heatdata(datalist, cellinfo = cellinfo, geneinfo = geneinfo, size = 1, ncol = 3)
|
|
82
85
|
dev.off()
|
|
83
86
|
|
|
84
|
-
|
|
87
|
+
log$info("- Plotting the GCN for all marker genes (i.e. DE genes) across all cell groups ...")
|
|
85
88
|
degeneinfo = geneinfo[which(geneinfo$newcelltype!="None"),]
|
|
86
89
|
degeneinfo$newcelltype = droplevels(degeneinfo$newcelltype)
|
|
87
90
|
degcnlist = lapply(datalist, function(data)gcn(data, genes = degeneinfo$genes))
|
|
@@ -90,7 +93,7 @@ if (type == "single") {
|
|
|
90
93
|
heatgcn(degcnlist, geneinfo = degeneinfo, size = 2, ncol = 3)
|
|
91
94
|
dev.off()
|
|
92
95
|
|
|
93
|
-
|
|
96
|
+
log$info("- Plotting the GCN for marker genes within one cell group ...")
|
|
94
97
|
rholist = metadata(sim)$Params@corr
|
|
95
98
|
group2_gcnlist = lapply(datalist,
|
|
96
99
|
function(data){
|
|
@@ -126,7 +129,7 @@ if (type == "single") {
|
|
|
126
129
|
DEgene.name = as.character(rowData(sim)$Gene[which(group.facs.gene[,1]>1)])
|
|
127
130
|
degeneinfo = geneinfo[match(DEgene.name, geneinfo$genes),]
|
|
128
131
|
|
|
129
|
-
|
|
132
|
+
log$info("- Plotting the data ...")
|
|
130
133
|
dataplot <- file.path(outdir, "data.png")
|
|
131
134
|
png(dataplot, width=2000, height=1200, res=30)
|
|
132
135
|
# plot the data
|
|
@@ -151,7 +154,7 @@ if (type == "single") {
|
|
|
151
154
|
# get the geneinfo
|
|
152
155
|
degenes = which(metadata(sim)$Params@paths.DEgenes==1)
|
|
153
156
|
|
|
154
|
-
|
|
157
|
+
log$info("- Plotting the trajectory ...")
|
|
155
158
|
trajplot <- file.path(outdir, "traj.png")
|
|
156
159
|
png(trajplot, width=1600, height=1200, res=30)
|
|
157
160
|
# plot the data
|
|
@@ -160,7 +163,7 @@ if (type == "single") {
|
|
|
160
163
|
labels = levels(as.factor(colData(sim)$Path)))
|
|
161
164
|
dev.off()
|
|
162
165
|
|
|
163
|
-
|
|
166
|
+
log$info("- Plotting the data ...")
|
|
164
167
|
dataplot <- file.path(outdir, "data.png")
|
|
165
168
|
heatdata(list("simulated truth" = datatrue[degenes,]),
|
|
166
169
|
cellinfo = cellinfo,
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
|
|
2
2
|
library(rlang)
|
|
3
3
|
library(RUVcorr)
|
|
4
|
+
library(biopipen.utils)
|
|
5
|
+
|
|
6
|
+
log <- get_logger()
|
|
4
7
|
|
|
5
8
|
args <- {{envs.ruvcorr_args | r: todot="-"}}
|
|
6
9
|
if (!is.null(seed)) { set.seed(seed) }
|
|
@@ -17,7 +20,7 @@ args$check <- args$check %||% TRUE
|
|
|
17
20
|
args$n = ngenes
|
|
18
21
|
args$m = nsamples
|
|
19
22
|
|
|
20
|
-
|
|
23
|
+
log$info("Running simulation ...")
|
|
21
24
|
sim <- do_call(simulateGEdata, args)
|
|
22
25
|
attributes(sim) <- c(attributes(sim), c(simulation_tool = "RUVcorr"))
|
|
23
26
|
genes <- paste0("Gene", 1:ngenes)
|
|
@@ -35,8 +38,8 @@ sim$Noise <- t(sim$Noise)
|
|
|
35
38
|
colnames(sim$Sigma) <- genes
|
|
36
39
|
rownames(sim$Sigma) <- genes
|
|
37
40
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
+
log$info("Saving results ...")
|
|
42
|
+
save_obj(sim, file.path(outdir, "sim.rds"))
|
|
43
|
+
save_obj(sim$Truth, file.path(outdir, "Truth.rds"))
|
|
41
44
|
|
|
42
45
|
simulated <- sim$Y
|
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
1
|
library(rlang)
|
|
4
2
|
library(glue)
|
|
3
|
+
library(biopipen.utils)
|
|
5
4
|
|
|
6
5
|
infile <- {{in.infile | r}}
|
|
7
6
|
outfile <- {{out.outfile | r}}
|
|
@@ -11,7 +10,9 @@ refexon <- {{envs.refexon | r}}
|
|
|
11
10
|
meanfl <- {{envs.meanfl | r}}
|
|
12
11
|
nreads <- {{envs.nreads | r}}
|
|
13
12
|
|
|
14
|
-
|
|
13
|
+
log <- get_logger()
|
|
14
|
+
|
|
15
|
+
log$info("Reading input data ...")
|
|
15
16
|
indata = read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = F)
|
|
16
17
|
samples = colnames(indata)
|
|
17
18
|
|
|
@@ -326,7 +327,7 @@ if (grepl('rawcounts|rawcount|counts|count', outunit)) {
|
|
|
326
327
|
stop(glue("Can't find a supported unit in the outunit: {outunit}\n"))
|
|
327
328
|
}
|
|
328
329
|
|
|
329
|
-
|
|
330
|
+
log$info("Transforming data by resolving {inunit} ...")
|
|
330
331
|
if (intype == outtype) {
|
|
331
332
|
fun <- identity
|
|
332
333
|
} else {
|
|
@@ -339,5 +340,5 @@ if (intype == outtype) {
|
|
|
339
340
|
assign(outtype, fun(indata))
|
|
340
341
|
out <- eval(parse_expr(outunit))
|
|
341
342
|
|
|
342
|
-
|
|
343
|
+
log$info("Saving output data ...")
|
|
343
344
|
write.table(out, outfile, quote=FALSE, row.names=TRUE, col.names=TRUE, sep="\t")
|