biopipen 0.17.6__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +36 -23
- biopipen/ns/delim.py +1 -1
- biopipen/ns/scrna.py +132 -49
- biopipen/ns/tcr.py +62 -0
- biopipen/reports/scrna/MarkersFinder.svelte +30 -8
- biopipen/reports/scrna/SeuratClusterStats.svelte +64 -109
- biopipen/reports/tcr/TESSA.svelte +43 -0
- biopipen/scripts/delim/SampleInfo.R +18 -15
- biopipen/scripts/scrna/MarkersFinder.R +58 -2
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +40 -0
- biopipen/scripts/scrna/SeuratClusterStats-features.R +236 -0
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +105 -0
- biopipen/scripts/scrna/SeuratClusterStats.R +7 -521
- biopipen/scripts/scrna/SeuratClustering.R +20 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +1 -1
- biopipen/scripts/tcr/TESSA.R +198 -0
- biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv +21 -0
- biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py +168 -0
- biopipen/scripts/tcr/TESSA_source/MCMC_control.R +71 -0
- biopipen/scripts/tcr/TESSA_source/TrainedEncoder.h5 +0 -0
- biopipen/scripts/tcr/TESSA_source/fixed_b.csv +31 -0
- biopipen/scripts/tcr/TESSA_source/initialization.R +120 -0
- biopipen/scripts/tcr/TESSA_source/post_analysis.R +124 -0
- biopipen/scripts/tcr/TESSA_source/real_data.R +67 -0
- biopipen/scripts/tcr/TESSA_source/update.R +195 -0
- biopipen/scripts/tcr/TESSA_source/utility.R +18 -0
- {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/METADATA +8 -8
- {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/RECORD +31 -16
- {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/WHEEL +0 -0
- {biopipen-0.17.6.dist-info → biopipen-0.18.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{% from "utils/misc.liq" import report_jobs, table_of_images -%}
|
|
2
|
+
<script>
|
|
3
|
+
import { Image, DataTable } from "$libs";
|
|
4
|
+
import { Tile } from "$ccs";
|
|
5
|
+
</script>
|
|
6
|
+
|
|
7
|
+
<Tile>
|
|
8
|
+
<p><a href="https://github.com/jcao89757/TESSA" target="_blank">Tessa</a> is a Bayesian model to integrate T cell receptor (TCR) sequence profiling with transcriptomes of T cells. Enabled by the recently developed single cell sequencing techniques, which provide both TCR sequences and RNA sequences of each T cell concurrently, Tessa maps the functional landscape of the TCR repertoire, and generates insights into understanding human immune response to diseases. As the first part of tessa, BriseisEncoder is employed prior to the Bayesian algorithm to capture the TCR sequence features and create numerical embeddings. We showed that the reconstructed Atchley Factor matrices and CDR3 sequences, generated through the numerical embeddings, are highly similar to their original counterparts. The CDR3 peptide sequences are constructed via a RandomForest model applied on the reconstructed Atchley Factor matrices.</p>
|
|
9
|
+
|
|
10
|
+
<p>For more information, please refer to the following papers:</p>
|
|
11
|
+
<ul>
|
|
12
|
+
<li>- <a href="https://www.nature.com/articles/s41592-020-01020-3" target="_blank">Mapping the Functional Landscape of TCR Repertoire</a>, Zhang, Z., Xiong, D., Wang, X. et al. 2021.</li>
|
|
13
|
+
<li>- <a href="https://www.nature.com/articles/s42256-021-00383-2" target="_blank">Deep learning-based prediction of the T cell receptor–antigen binding specificity</a>, Lu, T., Zhang, Z., Zhu, J. et al. 2021.</li>
|
|
14
|
+
</ul>
|
|
15
|
+
</Tile>
|
|
16
|
+
<p> </p>
|
|
17
|
+
|
|
18
|
+
{%- macro report_job(job, h=1) -%}
|
|
19
|
+
{{ table_of_images(
|
|
20
|
+
[
|
|
21
|
+
joinpaths(job.outdir, "result", "Cluster_size_dist.png"),
|
|
22
|
+
joinpaths(job.outdir, "result", "clone_size.png"),
|
|
23
|
+
joinpaths(job.outdir, "result", "exp_TCR_pair_plot.png"),
|
|
24
|
+
joinpaths(job.outdir, "result", "TCR_dist_density.png"),
|
|
25
|
+
joinpaths(job.outdir, "result", "TCR_explore.png"),
|
|
26
|
+
joinpaths(job.outdir, "result", "TCR_explore_clusters.png"),
|
|
27
|
+
],
|
|
28
|
+
[
|
|
29
|
+
"TESSA cluster size distribution",
|
|
30
|
+
"Cluster center size vs. non-center cluster size",
|
|
31
|
+
"Expression-TCR distance plot",
|
|
32
|
+
"Density of TCR distances",
|
|
33
|
+
"Exploratory plot at the TCR level",
|
|
34
|
+
"TESSA clusters",
|
|
35
|
+
],
|
|
36
|
+
) }}
|
|
37
|
+
{%- endmacro -%}
|
|
38
|
+
|
|
39
|
+
{%- macro head_job(job) -%}
|
|
40
|
+
<h1>{{job.in.immdata | stem | escape}}</h1>
|
|
41
|
+
{%- endmacro -%}
|
|
42
|
+
|
|
43
|
+
{{ report_jobs(jobs, head_job, report_job) }}
|
|
@@ -5,6 +5,7 @@ library(dplyr)
|
|
|
5
5
|
library(ggplot2)
|
|
6
6
|
library(ggprism)
|
|
7
7
|
library(ggsci)
|
|
8
|
+
library(ggrepel)
|
|
8
9
|
|
|
9
10
|
infile <- {{in.infile | r}}
|
|
10
11
|
outfile <- {{out.outfile | r}}
|
|
@@ -39,8 +40,9 @@ for (name in names(stats)) {
|
|
|
39
40
|
plotfile <- file.path(outdir, paste0(name, ".png"))
|
|
40
41
|
|
|
41
42
|
is_continuous <- FALSE
|
|
42
|
-
if (startsWith(stat$on, "distinct:")) {
|
|
43
|
-
stat$on <-
|
|
43
|
+
if (startsWith(stat$on, "distinct:") || startsWith(stat$on, "unique:")) {
|
|
44
|
+
stat$on <- gsub("distinct:", "", stat$on)
|
|
45
|
+
stat$on <- gsub("unique:", "", stat$on)
|
|
44
46
|
data <- mutdata %>% distinct(!!sym(stat$on), .keep_all = TRUE)
|
|
45
47
|
} else {
|
|
46
48
|
data <- mutdata
|
|
@@ -89,14 +91,14 @@ for (name in names(stats)) {
|
|
|
89
91
|
if (stat$plot == "boxplot" || stat$plot == "box") {
|
|
90
92
|
p <- ggplot(data, aes(x=!!group, y=!!sym(stat$on), fill=!!group)) +
|
|
91
93
|
geom_boxplot(position = "dodge") +
|
|
92
|
-
scale_fill_ucscgb() +
|
|
94
|
+
scale_fill_ucscgb(alpha = .8) +
|
|
93
95
|
xlab("")
|
|
94
96
|
} else if (stat$plot == "violin" ||
|
|
95
97
|
stat$plot == "violinplot" ||
|
|
96
98
|
stat$plot == "vlnplot") {
|
|
97
99
|
p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill=!!group)) +
|
|
98
100
|
geom_violin(position = "dodge") +
|
|
99
|
-
scale_fill_ucscgb() +
|
|
101
|
+
scale_fill_ucscgb(alpha = .8) +
|
|
100
102
|
xlab("")
|
|
101
103
|
} else if (
|
|
102
104
|
(grepl("violin", stat$plot) || grepl("vln", stat$plot)) &&
|
|
@@ -105,12 +107,12 @@ for (name in names(stats)) {
|
|
|
105
107
|
p <- ggplot(data, aes(x = !!group, y = !!sym(stat$on), fill = !!group)) +
|
|
106
108
|
geom_violin(position = "dodge") +
|
|
107
109
|
geom_boxplot(width = 0.1, position = position_dodge(0.9), fill="white") +
|
|
108
|
-
scale_fill_ucscgb() +
|
|
110
|
+
scale_fill_ucscgb(alpha = .8) +
|
|
109
111
|
xlab("")
|
|
110
112
|
} else if (stat$plot == "histogram" || stat$plot == "hist") {
|
|
111
113
|
p <- ggplot(data, aes(x = !!sym(stat$on), fill = !!group)) +
|
|
112
114
|
geom_histogram(bins = 10, position = "dodge", alpha = 0.8, color = "white") +
|
|
113
|
-
scale_fill_ucscgb()
|
|
115
|
+
scale_fill_ucscgb(alpha = .8)
|
|
114
116
|
} else if (stat$plot == "pie" || stat$plot == "piechart") {
|
|
115
117
|
if (is.null(stat$each)) {
|
|
116
118
|
data <- data %>% distinct(!!group, .keep_all = TRUE)
|
|
@@ -120,19 +122,20 @@ for (name in names(stats)) {
|
|
|
120
122
|
group_by(!!sym(stat$each))
|
|
121
123
|
}
|
|
122
124
|
p <- ggplot(
|
|
123
|
-
data %>%
|
|
124
|
-
|
|
125
|
-
..prop = !!sym(count_on) / sum(!!sym(count_on)),
|
|
126
|
-
..ypos = (cumsum(..prop) - 0.5 * ..prop) * sum(!!sym(count_on))
|
|
127
|
-
),
|
|
128
|
-
aes(x = "", y = !!sym(count_on), fill = !!group)
|
|
125
|
+
data %>% arrange(!!group),
|
|
126
|
+
aes(x = "", y = !!sym(count_on), fill = rev(!!group), label = !!sym(count_on))
|
|
129
127
|
) +
|
|
130
128
|
geom_bar(stat="identity", width=1, color="white") +
|
|
131
129
|
coord_polar("y", start = 0) +
|
|
132
130
|
theme_void() +
|
|
133
131
|
theme(plot.title = element_text(hjust = 0.5)) +
|
|
134
|
-
|
|
135
|
-
|
|
132
|
+
geom_label_repel(
|
|
133
|
+
position = position_stack(vjust = 0.5),
|
|
134
|
+
color="#333333",
|
|
135
|
+
fill="#EEEEEE",
|
|
136
|
+
size=4
|
|
137
|
+
) +
|
|
138
|
+
scale_fill_ucscgb(alpha = .8, name = group) +
|
|
136
139
|
ggtitle(paste0("# ", stat$on))
|
|
137
140
|
} else if (stat$plot == "bar" || stat$plot == "barplot") {
|
|
138
141
|
if (is.null(stat$each)) {
|
|
@@ -144,7 +147,7 @@ for (name in names(stats)) {
|
|
|
144
147
|
data,
|
|
145
148
|
aes(x = !!group, y = !!sym(count_on), fill = !!group)) +
|
|
146
149
|
geom_bar(stat = "identity") +
|
|
147
|
-
scale_fill_ucscgb() +
|
|
150
|
+
scale_fill_ucscgb(alpha = .8) +
|
|
148
151
|
ylab(paste0("# ", stat$on))
|
|
149
152
|
} else {
|
|
150
153
|
stop("Unknown plot type: ", stat$plot)
|
|
@@ -8,6 +8,8 @@ library(tibble)
|
|
|
8
8
|
library(Seurat)
|
|
9
9
|
library(enrichR)
|
|
10
10
|
library(ggplot2)
|
|
11
|
+
library(ggprism)
|
|
12
|
+
library(ggrepel)
|
|
11
13
|
library(future)
|
|
12
14
|
library(tidyseurat)
|
|
13
15
|
|
|
@@ -25,9 +27,14 @@ prefix_each <- {{ envs.prefix_each | r }}
|
|
|
25
27
|
section <- {{ envs.section | r }}
|
|
26
28
|
dbs <- {{ envs.dbs | r }}
|
|
27
29
|
sigmarkers <- {{ envs.sigmarkers | r }}
|
|
30
|
+
volcano_genes <- {{ envs.volcano_genes | r }}
|
|
28
31
|
rest <- {{ envs.rest | r: todot="-" }}
|
|
29
32
|
cases <- {{ envs.cases | r: todot="-" }}
|
|
30
33
|
|
|
34
|
+
if (is.character(volcano_genes) && length(volcano_genes) == 1) {
|
|
35
|
+
volcano_genes <- trimws(strsplit(volcano_genes, ",")[[1]])
|
|
36
|
+
}
|
|
37
|
+
|
|
31
38
|
set.seed(8525)
|
|
32
39
|
if (ncores > 1) {
|
|
33
40
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
@@ -55,6 +62,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
55
62
|
section = section,
|
|
56
63
|
dbs = dbs,
|
|
57
64
|
sigmarkers = sigmarkers,
|
|
65
|
+
volcano_genes = volcano_genes,
|
|
58
66
|
rest = rest
|
|
59
67
|
)
|
|
60
68
|
)
|
|
@@ -70,6 +78,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
70
78
|
section = section,
|
|
71
79
|
dbs = dbs,
|
|
72
80
|
sigmarkers = sigmarkers,
|
|
81
|
+
volcano_genes = volcano_genes,
|
|
73
82
|
rest = rest
|
|
74
83
|
)
|
|
75
84
|
case$rest <- list_setdefault(case$rest, rest)
|
|
@@ -153,13 +162,59 @@ for (name in names(cases)) {
|
|
|
153
162
|
}
|
|
154
163
|
cases <- newcases
|
|
155
164
|
|
|
165
|
+
plot_volcano = function(markers, volfile, sig, volgenes) {
|
|
166
|
+
# markers
|
|
167
|
+
# gene p_val avg_log2FC pct.1 pct.2 p_val_adj
|
|
168
|
+
# 1 CCL5 1.883596e-11 -4.8282535 0.359 0.927 4.332270e-09
|
|
169
|
+
# 2 HLA-DQB1 3.667713e-09 6.1543174 0.718 0.098 8.435740e-07
|
|
170
|
+
# 3 HLA-DRB5 1.242993e-07 3.9032231 0.744 0.195 2.858885e-05
|
|
171
|
+
# 4 CD79B 2.036731e-07 4.2748835 0.692 0.146 4.684482e-05
|
|
172
|
+
markers = markers %>%
|
|
173
|
+
mutate(
|
|
174
|
+
Significant = if_else(
|
|
175
|
+
!!parse_expr(sig),
|
|
176
|
+
if_else(avg_log2FC > 0, "Up", "Down"),
|
|
177
|
+
"No"
|
|
178
|
+
),
|
|
179
|
+
Label = if_else(
|
|
180
|
+
Significant != "No" & (isTRUE(volgenes) | (gene %in% volgenes)),
|
|
181
|
+
gene,
|
|
182
|
+
""
|
|
183
|
+
)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
p_vol = ggplot(markers, aes(x = avg_log2FC, y = -log10(p_val_adj))) +
|
|
187
|
+
geom_point(aes(color = Significant), alpha = 0.75) +
|
|
188
|
+
scale_color_manual(
|
|
189
|
+
values = c(Up = "#FF3333", Down = "#3333FF", No = "#AAAAAA"),
|
|
190
|
+
labels = c(Up = "Up", Down = "Down", No = "Non-Significant")
|
|
191
|
+
) +
|
|
192
|
+
geom_text_repel(
|
|
193
|
+
aes(label = Label),
|
|
194
|
+
size = 3,
|
|
195
|
+
color = "#000000",
|
|
196
|
+
box.padding = unit(0.35, "lines"),
|
|
197
|
+
point.padding = unit(0.5, "lines"),
|
|
198
|
+
segment.color = "#000000"
|
|
199
|
+
) +
|
|
200
|
+
theme_prism() +
|
|
201
|
+
theme(legend.title=element_blank()) +
|
|
202
|
+
labs(
|
|
203
|
+
x = "log2 Fold Change",
|
|
204
|
+
y = "-log10 Adjusted P-value"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
png(volfile, res = 100, height = 800, width = 900)
|
|
208
|
+
print(p_vol)
|
|
209
|
+
dev.off()
|
|
210
|
+
}
|
|
156
211
|
|
|
157
212
|
# Do enrichment analysis for a case using Enrichr
|
|
158
213
|
# Args:
|
|
159
214
|
# case: case name
|
|
160
215
|
# markers: markers dataframe
|
|
161
216
|
# sig: The expression to filter significant markers
|
|
162
|
-
do_enrich <- function(case, markers, sig) {
|
|
217
|
+
do_enrich <- function(case, markers, sig, volgenes) {
|
|
163
218
|
print(paste(" Running enrichment for case:", case))
|
|
164
219
|
parts <- strsplit(case, ":")[[1]]
|
|
165
220
|
sec <- parts[1]
|
|
@@ -171,6 +226,7 @@ do_enrich <- function(case, markers, sig) {
|
|
|
171
226
|
cat("No markers found.", file = file.path(casedir, "error.txt"))
|
|
172
227
|
return()
|
|
173
228
|
}
|
|
229
|
+
plot_volcano(markers, file.path(casedir, "volcano.png"), sig, volgenes)
|
|
174
230
|
markers_sig <- markers %>% filter(!!parse_expr(sig))
|
|
175
231
|
if (nrow(markers_sig) == 0) {
|
|
176
232
|
print(paste(" No significant markers found for case:", case))
|
|
@@ -255,7 +311,7 @@ do_case <- function(casename) {
|
|
|
255
311
|
args$object <- srtobj
|
|
256
312
|
}
|
|
257
313
|
markers <- do_call(FindMarkers, args) %>% rownames_to_column("gene")
|
|
258
|
-
do_enrich(casename, markers, case$sigmarkers)
|
|
314
|
+
do_enrich(casename, markers, case$sigmarkers, case$volcano_genes)
|
|
259
315
|
}
|
|
260
316
|
|
|
261
317
|
sapply(sort(names(cases)), do_case)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Loaded variables: srtfile, outdir, srtobj
|
|
2
|
+
|
|
3
|
+
dimplots_defaults = {{envs.dimplots_defaults | r: todot="-"}}
|
|
4
|
+
dimplots = {{envs.dimplots | r: todot="-", skip=1}}
|
|
5
|
+
|
|
6
|
+
odir = file.path(outdir, "dimplots")
|
|
7
|
+
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
|
+
report_toc_file = file.path(odir, "report_toc.json")
|
|
9
|
+
# Realname => file
|
|
10
|
+
report_toc = list()
|
|
11
|
+
|
|
12
|
+
do_one_dimplot = function(name) {
|
|
13
|
+
print(paste0("Doing dimplots for: ", name))
|
|
14
|
+
|
|
15
|
+
case = list_update(dimplots_defaults, dimplots[[name]])
|
|
16
|
+
case$devpars = list_update(dimplots_defaults$devpars, dimplots[[name]]$devpars)
|
|
17
|
+
case$object = srtobj
|
|
18
|
+
if (is.null(case$cols)) {
|
|
19
|
+
case$cols = pal_ucscgb()(26)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
excluded_args = c("devpars", "ident")
|
|
23
|
+
for (arg in excluded_args) {
|
|
24
|
+
assign(arg, case[[arg]])
|
|
25
|
+
case[[arg]] = NULL
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (case$reduction %in% c("dim", "auto")) {
|
|
29
|
+
case$reduction = NULL
|
|
30
|
+
}
|
|
31
|
+
report_toc[[name]] <<- paste0(slugify(name), ".dim.png")
|
|
32
|
+
figfile = file.path(odir, report_toc[[name]])
|
|
33
|
+
png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
34
|
+
p = do_call(DimPlot, case)
|
|
35
|
+
print(p)
|
|
36
|
+
dev.off()
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
sapply(names(dimplots), do_one_dimplot)
|
|
40
|
+
.save_toc()
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# Loaded variables: srtfile, outdir, srtobj
|
|
2
|
+
|
|
3
|
+
features_defaults = {{envs.features_defaults | r: todot="-"}}
|
|
4
|
+
features = {{envs.features | r: todot="-", skip=1}}
|
|
5
|
+
|
|
6
|
+
odir = file.path(outdir, "features")
|
|
7
|
+
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
|
+
report_toc_file = file.path(odir, "report_toc.json")
|
|
9
|
+
# Section => list(
|
|
10
|
+
# list(name?, kind, file),
|
|
11
|
+
# ...
|
|
12
|
+
# )
|
|
13
|
+
report_toc = list()
|
|
14
|
+
|
|
15
|
+
.add_toc = function(section, toc) {
|
|
16
|
+
if (section %in% names(report_toc)) {
|
|
17
|
+
report_toc[[section]][length(report_toc[[section]])] <<- toc
|
|
18
|
+
} else {
|
|
19
|
+
report_toc[[section]] <<- list(toc)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
.get_features = function(features) {
|
|
24
|
+
if (is.null(features)) { features = 20 }
|
|
25
|
+
if (is.numeric(features)) {
|
|
26
|
+
return (VariableFeatures(srtobj)[1:features])
|
|
27
|
+
}
|
|
28
|
+
if (is.character(features) && length(features) > 1) {
|
|
29
|
+
return (features)
|
|
30
|
+
}
|
|
31
|
+
if (is.character(features) && startsWith(features, "file://")) {
|
|
32
|
+
return (read.table(
|
|
33
|
+
substring(features, 8),
|
|
34
|
+
header = FALSE,
|
|
35
|
+
row.names = NULL,
|
|
36
|
+
check.names = FALSE
|
|
37
|
+
)$V1)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (is.null(features)) {
|
|
41
|
+
if (is.null(default_features)) {
|
|
42
|
+
return (default[1:20])
|
|
43
|
+
} else {
|
|
44
|
+
return (default_features)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return (trimws(unlist(strsplit(features, ","))))
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
do_one_features = function(name) {
|
|
52
|
+
print(paste0("Doing features for: ", name))
|
|
53
|
+
|
|
54
|
+
case = list_update(features_defaults, features[[name]])
|
|
55
|
+
toc = list()
|
|
56
|
+
if (!is.null(case$section)) { toc$name = name }
|
|
57
|
+
case$devpars = list_update(features_defaults$devpars, features[[name]]$devpars)
|
|
58
|
+
excluded_args = c(
|
|
59
|
+
"section",
|
|
60
|
+
"devpars",
|
|
61
|
+
"subset",
|
|
62
|
+
"plus",
|
|
63
|
+
"ident",
|
|
64
|
+
"kind"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
fn = NULL
|
|
68
|
+
default_devpars = NULL
|
|
69
|
+
if ("ridge" %in% case$kind) {
|
|
70
|
+
case$kind = "ridge"
|
|
71
|
+
if (is.null(case$cols)) {
|
|
72
|
+
case$cols = pal_ucscgb(alpha = .8)(26)
|
|
73
|
+
}
|
|
74
|
+
excluded_args = c(excluded_args, "split.by")
|
|
75
|
+
fn = RidgePlot
|
|
76
|
+
default_devpars = function(features, ncol, uidents) {
|
|
77
|
+
if (is.null(ncol)) { ncol = 1 }
|
|
78
|
+
list(
|
|
79
|
+
width = 400 * ncol,
|
|
80
|
+
height = ceiling(length(features) / ncol) * ifelse(length(uidents) < 10, 300, 400),
|
|
81
|
+
res = 100
|
|
82
|
+
)
|
|
83
|
+
}
|
|
84
|
+
} else if ("vln" %in% case$kind || "violin" %in% case$kind) {
|
|
85
|
+
case$kind = "violin"
|
|
86
|
+
if (is.null(case$cols)) {
|
|
87
|
+
case$cols = pal_ucscgb(alpha = .8)(26)
|
|
88
|
+
}
|
|
89
|
+
fn = VlnPlot
|
|
90
|
+
default_devpars = function(features, ncol, uidents) {
|
|
91
|
+
if (is.null(ncol)) { ncol = 1 }
|
|
92
|
+
list(
|
|
93
|
+
width = 400 * ncol,
|
|
94
|
+
height = ceiling(length(features) / ncol) * 400,
|
|
95
|
+
res = 100
|
|
96
|
+
)
|
|
97
|
+
}
|
|
98
|
+
} else if ("feature" %in% case$kind) {
|
|
99
|
+
case$kind = "feature"
|
|
100
|
+
if (is.null(case$cols)) {
|
|
101
|
+
case$cols = c("lightgrey", pal_ucscgb()(1))
|
|
102
|
+
}
|
|
103
|
+
excluded_args = c(excluded_args, "group.by", "assay")
|
|
104
|
+
case$shape.by = case$group.by
|
|
105
|
+
fn = FeaturePlot
|
|
106
|
+
default_devpars = function(features, ncol, uidents) {
|
|
107
|
+
if (is.null(ncol)) { ncol = 1 }
|
|
108
|
+
list(
|
|
109
|
+
width = 400 * ncol,
|
|
110
|
+
height = ceiling(length(features) / ncol) * 300,
|
|
111
|
+
res = 100
|
|
112
|
+
)
|
|
113
|
+
}
|
|
114
|
+
} else if ("dot" %in% case$kind) {
|
|
115
|
+
case$kind = "dot"
|
|
116
|
+
if (is.null(case$cols)) {
|
|
117
|
+
case$cols = c("lightgrey", pal_ucscgb()(1))
|
|
118
|
+
}
|
|
119
|
+
if (is.null(case$plus)) {
|
|
120
|
+
case$plus = 'theme_prism(axis_text_angle=90)'
|
|
121
|
+
}
|
|
122
|
+
excluded_args = c(excluded_args, "slot", "ncol")
|
|
123
|
+
fn = DotPlot
|
|
124
|
+
default_devpars = function(features, ncol, uidents) {
|
|
125
|
+
list(
|
|
126
|
+
height = max(length(uidents) * 80 + 150, 420),
|
|
127
|
+
width = length(features) * 50 + 150,
|
|
128
|
+
res = 100
|
|
129
|
+
)
|
|
130
|
+
}
|
|
131
|
+
} else if ("heatmap" %in% case$kind) {
|
|
132
|
+
case$kind = "heatmap"
|
|
133
|
+
case = list_update(
|
|
134
|
+
list(
|
|
135
|
+
group.colors = pal_ucscgb(alpha = .8)(26),
|
|
136
|
+
size = 3.5,
|
|
137
|
+
group.bar.height = 0.01
|
|
138
|
+
),
|
|
139
|
+
case
|
|
140
|
+
)
|
|
141
|
+
if (is.null(case$plus)) {
|
|
142
|
+
case$plus = 'scale_fill_gradientn(colors = c("lightgrey", pal_ucscgb()(1)), na.value = "white")'
|
|
143
|
+
}
|
|
144
|
+
excluded_args = c(excluded_args, "group.by", "split.by", "downsample", "ncol")
|
|
145
|
+
fn = DoHeatmap
|
|
146
|
+
default_devpars = function(features, ncol, uidents) {
|
|
147
|
+
list(
|
|
148
|
+
width = length(uidents) * 60 + 150,
|
|
149
|
+
height = length(features) * 40 + 150,
|
|
150
|
+
res = 100
|
|
151
|
+
)
|
|
152
|
+
}
|
|
153
|
+
} else if ("table" %in% case$kind) {
|
|
154
|
+
case$kind = "table"
|
|
155
|
+
excluded_args = c(excluded_args, "group.by", "split.by", "assay")
|
|
156
|
+
case$assays = case$assay
|
|
157
|
+
fn = AverageExpression
|
|
158
|
+
if (is.null(case$slot)) {
|
|
159
|
+
case$slot = "data"
|
|
160
|
+
}
|
|
161
|
+
} else {
|
|
162
|
+
stop("Unknown kind of plot")
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
for (arg in excluded_args) {
|
|
166
|
+
assign(arg, case[[arg]])
|
|
167
|
+
case[[arg]] = NULL
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
if (is.character(subset)) {
|
|
171
|
+
case$object = srtobj %>% tidyseurat::filter(!!rlang::parse_expr(subset))
|
|
172
|
+
} else {
|
|
173
|
+
case$object = srtobj
|
|
174
|
+
}
|
|
175
|
+
if (!is.null(ident)) {
|
|
176
|
+
Idents(case$object) = ident
|
|
177
|
+
}
|
|
178
|
+
case$features = .get_features(case$features)
|
|
179
|
+
if (!is.null(case$ncol)) {
|
|
180
|
+
case$ncol = min(case$ncol, length(case$features))
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
toc$kind = kind
|
|
184
|
+
if (kind == "table") {
|
|
185
|
+
expr = do_call(fn, case)$RNA %>%
|
|
186
|
+
as.data.frame() %>%
|
|
187
|
+
rownames_to_column("Feature") %>%
|
|
188
|
+
select(Feature, everything())
|
|
189
|
+
|
|
190
|
+
toc$file = paste0(slugify(name), ".txt")
|
|
191
|
+
write.table(expr, file.path(odir, toc$file), sep="\t", quote=FALSE, row.names=FALSE)
|
|
192
|
+
} else {
|
|
193
|
+
devpars = list_update(
|
|
194
|
+
default_devpars(case$features, case$ncol, unique(Idents(case$object))),
|
|
195
|
+
devpars
|
|
196
|
+
)
|
|
197
|
+
if (kind == "heatmap") {
|
|
198
|
+
if (!exists("downsample") || is.null(downsample)) {
|
|
199
|
+
downsample = "average"
|
|
200
|
+
}
|
|
201
|
+
if (downsample %in% c("average", "mean")) {
|
|
202
|
+
case$object = AverageExpression(case$object, return.seurat = TRUE)
|
|
203
|
+
} else if (is.integer(downsample)) {
|
|
204
|
+
case$object = subset(case$object, downsample = downsample)
|
|
205
|
+
} else {
|
|
206
|
+
stop("Unknown downsample method.")
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
p = do_call(fn, case)
|
|
210
|
+
if (!is.null(plus)) {
|
|
211
|
+
for (pls in plus) {
|
|
212
|
+
p = p + eval(parse(text = pls))
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
figfile = file.path(odir, paste0(slugify(name), ".", kind, ".png"))
|
|
216
|
+
toc$file = basename(figfile)
|
|
217
|
+
png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
218
|
+
tryCatch({
|
|
219
|
+
print(p)
|
|
220
|
+
}, error = function(e) {
|
|
221
|
+
stop(
|
|
222
|
+
paste(
|
|
223
|
+
paste(names(devpars), collapse=" "),
|
|
224
|
+
paste(devpars, collapse=" "),
|
|
225
|
+
e,
|
|
226
|
+
sep = "\n"
|
|
227
|
+
)
|
|
228
|
+
)
|
|
229
|
+
})
|
|
230
|
+
dev.off()
|
|
231
|
+
}
|
|
232
|
+
.add_toc(if (is.null(section)) name else section, toc)
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
sapply(names(features), do_one_features)
|
|
236
|
+
.save_toc()
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Loaded variables: srtfile, outdir, srtobj
|
|
2
|
+
|
|
3
|
+
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
4
|
+
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
5
|
+
|
|
6
|
+
odir = file.path(outdir, "stats")
|
|
7
|
+
dir.create(odir, recursive=TRUE, showWarnings=FALSE)
|
|
8
|
+
report_toc_file = file.path(odir, "report_toc.json")
|
|
9
|
+
# Realname => {bar: ..., pie: ..., table: ...}
|
|
10
|
+
report_toc = list()
|
|
11
|
+
|
|
12
|
+
.add_toc = function(name, toc) {
|
|
13
|
+
report_toc[[name]] <<- toc
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
.save_toc = function() {
|
|
17
|
+
writeLines(toJSON(report_toc, pretty = TRUE, auto_unbox = TRUE), report_toc_file)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
do_one_stats = function(name) {
|
|
21
|
+
print(paste0("Doing stats for: ", name))
|
|
22
|
+
|
|
23
|
+
toc = list()
|
|
24
|
+
|
|
25
|
+
case = list_update(stats_defaults, stats[[name]])
|
|
26
|
+
case$devpars = list_update(stats_defaults$devpars, case$devpars)
|
|
27
|
+
if (isTRUE(case$pie) && !is.null(case$group.by)) {
|
|
28
|
+
stop("pie charts are not supported for group-by")
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
figfile = file.path(odir, paste0(slugify(name), ".bar.png"))
|
|
32
|
+
piefile = file.path(odir, paste0(slugify(name), ".pie.png"))
|
|
33
|
+
tablefile = file.path(odir, paste0(slugify(name), ".txt"))
|
|
34
|
+
|
|
35
|
+
df_cells = srtobj@meta.data
|
|
36
|
+
if (!is.null(case$subset)) {
|
|
37
|
+
df_cells = df_cells %>% filter(!!rlang::parse_expr(case$subset))
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
select_cols = c(case$ident, case$group.by, case$split.by)
|
|
41
|
+
df_cells = df_cells %>%
|
|
42
|
+
select(all_of(select_cols)) %>%
|
|
43
|
+
group_by(!!!syms(select_cols)) %>%
|
|
44
|
+
summarise(.n = n(), .groups = "drop") %>%
|
|
45
|
+
mutate(.frac = .n / sum(.n))
|
|
46
|
+
|
|
47
|
+
if (isTRUE(case$table)) {
|
|
48
|
+
toc$table = basename(tablefile)
|
|
49
|
+
write.table(df_cells, tablefile, sep="\t", quote=FALSE, row.names=FALSE)
|
|
50
|
+
}
|
|
51
|
+
if (isTRUE(case$pie)) {
|
|
52
|
+
p_pie = df_cells %>%
|
|
53
|
+
arrange(!!sym(case$ident)) %>%
|
|
54
|
+
ggplot(aes(x="", y=.n, fill=rev(!!sym(case$ident)))) +
|
|
55
|
+
geom_bar(stat="identity", width=1, alpha=.8) +
|
|
56
|
+
coord_polar("y", start=0) +
|
|
57
|
+
guides(fill = guide_legend(reverse = TRUE, title = NULL)) +
|
|
58
|
+
theme_void() +
|
|
59
|
+
geom_label_repel(
|
|
60
|
+
if (isTRUE(case$frac)) aes(label=sprintf("%.1f%%", .frac * 100)) else aes(label=.n),
|
|
61
|
+
position = position_stack(vjust = 0.5),
|
|
62
|
+
color="#333333",
|
|
63
|
+
fill="#EEEEEE",
|
|
64
|
+
size=5
|
|
65
|
+
) +
|
|
66
|
+
scale_fill_ucscgb(alpha=.8)
|
|
67
|
+
|
|
68
|
+
if (!is.null(case$split.by)) {
|
|
69
|
+
p_pie = p_pie + facet_wrap(case$split.by)
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
toc$pie = basename(piefile)
|
|
73
|
+
png(piefile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
|
|
74
|
+
print(p_pie)
|
|
75
|
+
dev.off()
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
ngroups = ifelse(is.null(case$group.by), 1, length(unique(df_cells[[case$group.by]])))
|
|
79
|
+
nidents = length(unique(df_cells[[case$ident]]))
|
|
80
|
+
bar_position = ifelse(ngroups > 5, "stack", "dodge")
|
|
81
|
+
p = df_cells %>%
|
|
82
|
+
ggplot(aes(
|
|
83
|
+
x=!!sym(case$ident),
|
|
84
|
+
y=if (isTRUE(case$frac)) .frac else .n,
|
|
85
|
+
fill=!!sym(ifelse(is.null(case$group.by), case$ident, case$group.by))
|
|
86
|
+
)) +
|
|
87
|
+
geom_bar(stat="identity", position=bar_position, alpha=.8) +
|
|
88
|
+
theme_prism(axis_text_angle = 90) +
|
|
89
|
+
scale_fill_manual(values=rep(pal_ucscgb(alpha=.8)(26), 10)[1:max(ngroups, nidents)]) +
|
|
90
|
+
ylab(ifelse(isTRUE(case$frac), "Fraction of cells", "Number of cells"))
|
|
91
|
+
|
|
92
|
+
if (!is.null(case$split.by)) {
|
|
93
|
+
p = p + facet_wrap(case$split.by)
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
toc$bar = basename(figfile)
|
|
97
|
+
png(figfile, width=case$devpars$width, height=case$devpars$height, res=case$devpars$res)
|
|
98
|
+
print(p)
|
|
99
|
+
dev.off()
|
|
100
|
+
|
|
101
|
+
.add_toc(name, toc)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
sapply(names(stats), do_one_stats)
|
|
105
|
+
.save_toc()
|