biopipen 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +27 -0
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -155
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +121 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/CDR3AAPhyschem.R +122 -9
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +26 -6
- biopipen/scripts/tcr/Immunarch-clonality.R +22 -2
- biopipen/scripts/tcr/Immunarch-diversity.R +132 -22
- biopipen/scripts/tcr/Immunarch-geneusage.R +33 -4
- biopipen/scripts/tcr/Immunarch-kmer.R +57 -7
- biopipen/scripts/tcr/Immunarch-overlap.R +72 -3
- biopipen/scripts/tcr/Immunarch-spectratyping.R +28 -5
- biopipen/scripts/tcr/Immunarch-tracking.R +33 -4
- biopipen/scripts/tcr/Immunarch-vjjunc.R +118 -0
- biopipen/scripts/tcr/Immunarch.R +17 -0
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TESSA.R +43 -11
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/METADATA +1 -1
- {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/RECORD +58 -57
- {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/WHEEL +0 -0
- {biopipen-0.21.1.dist-info → biopipen-0.22.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
log_info("")
|
|
2
|
+
log_info("#####################################")
|
|
3
|
+
log_info("# VJ Junction Circos Plots #")
|
|
4
|
+
log_info("#####################################")
|
|
5
|
+
|
|
6
|
+
# Already required by immunarch
|
|
7
|
+
library(circlize)
|
|
8
|
+
|
|
9
|
+
vj_juncs <- {{envs.vj_junc | r}}
|
|
10
|
+
|
|
11
|
+
log_info("Filling up cases ...")
|
|
12
|
+
cases <- vj_juncs$cases
|
|
13
|
+
if (is.null(cases) || length(cases) == 0) {
|
|
14
|
+
cases$DEFAULT <- list(
|
|
15
|
+
by = vj_juncs$by,
|
|
16
|
+
by_clones = vj_juncs$by_clones,
|
|
17
|
+
subset = vj_juncs$subset,
|
|
18
|
+
devpars = vj_juncs$devpars
|
|
19
|
+
)
|
|
20
|
+
} else {
|
|
21
|
+
for (name in names(cases)) {
|
|
22
|
+
if (is.null(cases[[name]]$by)) {
|
|
23
|
+
cases[[name]]$by <- vj_juncs$by
|
|
24
|
+
}
|
|
25
|
+
if (is.null(cases[[name]]$by_clones)) {
|
|
26
|
+
cases[[name]]$by_clones <- vj_juncs$by_clones
|
|
27
|
+
}
|
|
28
|
+
if (is.null(cases[[name]]$subset)) {
|
|
29
|
+
cases[[name]]$subset <- vj_juncs$subset
|
|
30
|
+
}
|
|
31
|
+
if (is.null(cases[[name]]$devpars)) {
|
|
32
|
+
cases[[name]]$devpars <- vj_juncs$devpars
|
|
33
|
+
}
|
|
34
|
+
if (is.null(cases[[name]]$devpars$width)) {
|
|
35
|
+
cases[[name]]$devpars$width <- vj_juncs$devpars$width
|
|
36
|
+
}
|
|
37
|
+
if (is.null(cases[[name]]$devpars$height)) {
|
|
38
|
+
cases[[name]]$devpars$height <- vj_juncs$devpars$height
|
|
39
|
+
}
|
|
40
|
+
if (is.null(cases[[name]]$devpars$res)) {
|
|
41
|
+
cases[[name]]$devpars$res <- vj_juncs$devpars$res
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
vjjunc_dir = file.path(outdir, "vj_junc")
|
|
47
|
+
dir.create(vjjunc_dir, showWarnings = FALSE)
|
|
48
|
+
|
|
49
|
+
do_one_case_vjjunc <- function(name, case) {
|
|
50
|
+
log_info("Processing case: {name} ...")
|
|
51
|
+
odir = file.path(vjjunc_dir, slugify(name, tolower = FALSE))
|
|
52
|
+
dir.create(odir, showWarnings = FALSE)
|
|
53
|
+
|
|
54
|
+
if (!is.null(case$subset)) {
|
|
55
|
+
d = filter_expanded_immdata(exdata, case$subset)
|
|
56
|
+
} else {
|
|
57
|
+
d = exdata
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (is.null(case$by) || length(case$by) == 0) {
|
|
61
|
+
case$by <- "Sample"
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
by = trimws(strsplit(case$by, ",")[[1]])
|
|
65
|
+
|
|
66
|
+
lapply(group_split(d, !!!syms(by)), function(gsd) {
|
|
67
|
+
by_name <- gsd[1, by, drop = FALSE] %>% unlist() %>% paste0(collapse = "-")
|
|
68
|
+
log_info("- Processing {by_name} ...")
|
|
69
|
+
|
|
70
|
+
if (isTRUE(case$by_clones)) {
|
|
71
|
+
gsd <- gsd %>% distinct(CDR3.aa, .keep_all = TRUE)
|
|
72
|
+
}
|
|
73
|
+
gsd <- gsd %>%
|
|
74
|
+
group_by(V.name, J.name) %>%
|
|
75
|
+
summarise(Size = n(), .groups = "drop") %>%
|
|
76
|
+
filter(!is.na(V.name) & !is.na(J.name) & V.name != "None" & J.name != "None") %>%
|
|
77
|
+
arrange(V.name, J.name)
|
|
78
|
+
|
|
79
|
+
figfile <- file.path(odir, paste0(slugify(by_name), ".png"))
|
|
80
|
+
png(figfile, width = case$devpars$width, height = case$devpars$height, res = case$devpars$res)
|
|
81
|
+
chordDiagram(
|
|
82
|
+
gsd,
|
|
83
|
+
annotationTrack = c("grid", "axis"),
|
|
84
|
+
preAllocateTracks = list(track.height = 0.25)
|
|
85
|
+
)
|
|
86
|
+
circos.track(track.index = 1, panel.fun = function(x, y) {
|
|
87
|
+
circos.text(
|
|
88
|
+
CELL_META$xcenter,
|
|
89
|
+
CELL_META$ylim[1],
|
|
90
|
+
CELL_META$sector.index,
|
|
91
|
+
cex = .8,
|
|
92
|
+
facing = "clockwise",
|
|
93
|
+
niceFacing = TRUE,
|
|
94
|
+
adj = c(-0.2, 0.5)
|
|
95
|
+
)
|
|
96
|
+
}, bg.border = NA) # here set bg.border to NA is important
|
|
97
|
+
dev.off()
|
|
98
|
+
|
|
99
|
+
add_report(
|
|
100
|
+
list(src = figfile, name = by_name),
|
|
101
|
+
h1 = "V-J Junction Circos Plots",
|
|
102
|
+
h2 = ifelse(name == "DEFAULT", "#" , name),
|
|
103
|
+
ui = "table_of_images"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
NULL
|
|
107
|
+
})
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
add_report(
|
|
111
|
+
list(
|
|
112
|
+
kind = "descr",
|
|
113
|
+
content = "V-J usage plot displaying the frequency of various V-J junctions."
|
|
114
|
+
),
|
|
115
|
+
h1 = "V-J Junction Circos Plots"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
sapply(names(cases), function(name) do_one_case_vjjunc(name, cases[[name]]))
|
biopipen/scripts/tcr/Immunarch.R
CHANGED
|
@@ -11,18 +11,26 @@ library(dplyr)
|
|
|
11
11
|
library(glue)
|
|
12
12
|
library(tidyr)
|
|
13
13
|
library(tibble)
|
|
14
|
+
library(logger)
|
|
15
|
+
library(slugify)
|
|
14
16
|
|
|
17
|
+
log_info("Loading arguments ...")
|
|
15
18
|
theme_set(theme_prism())
|
|
16
19
|
|
|
17
20
|
immfile = {{ in.immdata | r }}
|
|
18
21
|
metafile = {{ in.metafile | r }}
|
|
19
22
|
outdir = {{ out.outdir | r }}
|
|
23
|
+
joboutdir = {{ job.outdir | r }}
|
|
20
24
|
mutaters = {{ envs.mutaters | r }}
|
|
21
25
|
prefix = {{ envs.prefix | r }}
|
|
22
26
|
|
|
27
|
+
log_info("Loading immdata ...")
|
|
23
28
|
immdata = readRDS(immfile)
|
|
29
|
+
|
|
30
|
+
log_info("Expanding immdata ...")
|
|
24
31
|
exdata = expand_immdata(immdata)
|
|
25
32
|
|
|
33
|
+
log_info("Loading metadata if provided ...")
|
|
26
34
|
if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
|
|
27
35
|
meta = readRDS(metafile)@meta.data
|
|
28
36
|
} else if (!is.null(metafile) && nchar(metafile) > 0) {
|
|
@@ -31,6 +39,7 @@ if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
|
|
|
31
39
|
meta = NULL
|
|
32
40
|
}
|
|
33
41
|
|
|
42
|
+
log_info("Merging metadata if provided ...")
|
|
34
43
|
if (!is.null(meta)) {
|
|
35
44
|
cell_ids = glue_data(exdata, paste0(prefix, "{Barcode}"))
|
|
36
45
|
dup_names = intersect(colnames(meta), colnames(exdata))
|
|
@@ -43,6 +52,7 @@ if (!is.null(meta)) {
|
|
|
43
52
|
}
|
|
44
53
|
rm(meta)
|
|
45
54
|
|
|
55
|
+
log_info("Mutating data if `envs.mutaters` is provided ...")
|
|
46
56
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
47
57
|
exdata = mutate(exdata, !!!lapply(mutaters, parse_expr))
|
|
48
58
|
}
|
|
@@ -88,3 +98,10 @@ n_samples = length(immdata$data)
|
|
|
88
98
|
# K-mer analysis #
|
|
89
99
|
######################
|
|
90
100
|
{% include biopipen_dir + "/scripts/tcr/Immunarch-kmer.R" %}
|
|
101
|
+
|
|
102
|
+
######################
|
|
103
|
+
# VJ junction #
|
|
104
|
+
######################
|
|
105
|
+
{% include biopipen_dir + "/scripts/tcr/Immunarch-vjjunc.R" %}
|
|
106
|
+
|
|
107
|
+
save_report(joboutdir)
|
|
@@ -6,13 +6,16 @@ library(dplyr)
|
|
|
6
6
|
library(rlang)
|
|
7
7
|
library(immunarch)
|
|
8
8
|
library(ggprism)
|
|
9
|
+
library(slugify)
|
|
9
10
|
|
|
10
11
|
immfile = {{in.immfile | quote}}
|
|
11
12
|
outdir = {{out.outdir | quote}}
|
|
12
13
|
cluster_size_envs = {{envs.cluster_size | r}}
|
|
13
14
|
shared_clusters_envs = {{envs.shared_clusters | r}}
|
|
14
15
|
sample_diversity_envs = {{envs.sample_diversity | r}}
|
|
16
|
+
joboutdir = {{job.outdir | r}}
|
|
15
17
|
|
|
18
|
+
log_info("Expanding analysis cases ...")
|
|
16
19
|
expand_cases = function(envs) {
|
|
17
20
|
cases = envs$cases
|
|
18
21
|
envs$cases = NULL
|
|
@@ -51,8 +54,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
|
|
|
51
54
|
sample_diversity_cases = expand_cases(sample_diversity_envs)
|
|
52
55
|
|
|
53
56
|
cluster_size_distribution = function(name) {
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
log_info("- Working on cluster size distribution: {name}")
|
|
58
|
+
|
|
59
|
+
odir = file.path(outdir, "ClusterSizeDistribution", slugify(name, tolower = FALSE))
|
|
56
60
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
57
61
|
case = cluster_size_cases[[name]]
|
|
58
62
|
|
|
@@ -75,16 +79,28 @@ cluster_size_distribution = function(name) {
|
|
|
75
79
|
ggs = c(
|
|
76
80
|
"theme_prism()",
|
|
77
81
|
"scale_y_continuous(trans='log10')",
|
|
78
|
-
"labs(x='TCR cluster size', y='Count')"
|
|
82
|
+
"labs(x='TCR cluster size', y='Count')",
|
|
83
|
+
"scale_fill_biopipen()"
|
|
79
84
|
),
|
|
80
85
|
devpars = case$devpars,
|
|
81
86
|
outfile = outplot
|
|
82
87
|
)
|
|
88
|
+
|
|
89
|
+
add_report(
|
|
90
|
+
list(
|
|
91
|
+
src = outplot,
|
|
92
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
93
|
+
descr = paste0("Cluster size distribution for each ", case$by)
|
|
94
|
+
),
|
|
95
|
+
ui = "table_of_images",
|
|
96
|
+
h1 = "Cluster Size Distribution"
|
|
97
|
+
)
|
|
83
98
|
}
|
|
84
99
|
|
|
85
100
|
shared_clusters = function(name) {
|
|
86
|
-
|
|
87
|
-
|
|
101
|
+
log_info("- Working on shared clusters: {name}")
|
|
102
|
+
|
|
103
|
+
odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
|
|
88
104
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
89
105
|
case = shared_clusters_cases[[name]]
|
|
90
106
|
if (!is.null(case$grouping)) {
|
|
@@ -139,12 +155,21 @@ shared_clusters = function(name) {
|
|
|
139
155
|
),
|
|
140
156
|
devpars = case$devpars,
|
|
141
157
|
outfile = file.path(odir, "shared_clusters.png")
|
|
158
|
+
)
|
|
142
159
|
|
|
160
|
+
add_report(
|
|
161
|
+
list(
|
|
162
|
+
src = file.path(odir, "shared_clusters.png"),
|
|
163
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
164
|
+
descr = paste0("Shared TCR clusters across samples")
|
|
165
|
+
),
|
|
166
|
+
ui = "table_of_images",
|
|
167
|
+
h1 = "Shared TCR Clusters"
|
|
143
168
|
)
|
|
144
169
|
}
|
|
145
170
|
|
|
146
171
|
shared_clusters_by_grouping = function(name) {
|
|
147
|
-
odir = file.path(outdir, "SharedClusters", name)
|
|
172
|
+
odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
|
|
148
173
|
case = shared_clusters_cases[[name]]
|
|
149
174
|
|
|
150
175
|
data = list()
|
|
@@ -176,12 +201,47 @@ shared_clusters_by_grouping = function(name) {
|
|
|
176
201
|
devpars = case$devpars,
|
|
177
202
|
outfile = outfile
|
|
178
203
|
)
|
|
204
|
+
|
|
205
|
+
add_report(
|
|
206
|
+
list(
|
|
207
|
+
src = outfile,
|
|
208
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
209
|
+
descr = paste0("Shared TCR clusters across ", grouping)
|
|
210
|
+
),
|
|
211
|
+
ui = "table_of_images",
|
|
212
|
+
h1 = "Shared TCR Clusters"
|
|
213
|
+
)
|
|
179
214
|
}
|
|
180
215
|
|
|
181
216
|
|
|
217
|
+
div_methods = list(
|
|
218
|
+
gini = list(
|
|
219
|
+
name = "The Gini coefficient",
|
|
220
|
+
descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
|
|
221
|
+
),
|
|
222
|
+
gini.simp = list(
|
|
223
|
+
name = "The Gini-Simpson index",
|
|
224
|
+
descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
|
|
225
|
+
),
|
|
226
|
+
inv.simp = list(
|
|
227
|
+
name = "The inverse Simpson index",
|
|
228
|
+
descr = "It is the effective number of types that is obtained when
|
|
229
|
+
the weighted arithmetic mean is used to quantify average
|
|
230
|
+
proportional abundance of types in the dataset of interest."
|
|
231
|
+
),
|
|
232
|
+
div = list(
|
|
233
|
+
name = "The true diversity",
|
|
234
|
+
descr = "It refers to the number of equally abundant types needed
|
|
235
|
+
for the average proportional abundance of the types to
|
|
236
|
+
equal that observed in the dataset of interest where all
|
|
237
|
+
types may not be equally abundant."
|
|
238
|
+
)
|
|
239
|
+
)
|
|
240
|
+
|
|
182
241
|
sample_diversity = function(name) {
|
|
183
|
-
|
|
184
|
-
|
|
242
|
+
log_info("- Working on sample diversity: {name}")
|
|
243
|
+
|
|
244
|
+
odir = file.path(outdir, "SampleDiversity", slugify(name, tolower = FALSE))
|
|
185
245
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
186
246
|
case = sample_diversity_cases[[name]]
|
|
187
247
|
|
|
@@ -192,7 +252,19 @@ sample_diversity = function(name) {
|
|
|
192
252
|
outfile = file.path(odir, "diversity.txt")
|
|
193
253
|
outplot = file.path(odir, "diversity.png")
|
|
194
254
|
div = repDiversity(data, .method = case$method)
|
|
195
|
-
write.table(
|
|
255
|
+
write.table(
|
|
256
|
+
if (ncol(div) == 1) {
|
|
257
|
+
as.data.frame(div) %>% rownames_to_column("Sample")
|
|
258
|
+
} else {
|
|
259
|
+
div
|
|
260
|
+
},
|
|
261
|
+
outfile,
|
|
262
|
+
row.names=TRUE,
|
|
263
|
+
col.names=TRUE,
|
|
264
|
+
quote=FALSE,
|
|
265
|
+
sep="\t"
|
|
266
|
+
)
|
|
267
|
+
|
|
196
268
|
if (case$method == "gini") {
|
|
197
269
|
div = as.data.frame(div) %>% rownames_to_column("Sample")
|
|
198
270
|
colnames(div)[2] = "gini"
|
|
@@ -201,7 +273,8 @@ sample_diversity = function(name) {
|
|
|
201
273
|
mapping = aes(x = Sample, y = gini, fill = Sample)
|
|
202
274
|
ggs = c(
|
|
203
275
|
"theme_prism(axis_text_angle = 90)",
|
|
204
|
-
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
|
|
276
|
+
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
|
|
277
|
+
"scale_fill_biopipen()"
|
|
205
278
|
)
|
|
206
279
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
207
280
|
|
|
@@ -225,7 +298,6 @@ sample_diversity = function(name) {
|
|
|
225
298
|
devpars = case$devpars,
|
|
226
299
|
outfile = outplot
|
|
227
300
|
)
|
|
228
|
-
|
|
229
301
|
} else {
|
|
230
302
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
231
303
|
p = vis(div)
|
|
@@ -243,6 +315,41 @@ sample_diversity = function(name) {
|
|
|
243
315
|
print(p)
|
|
244
316
|
dev.off()
|
|
245
317
|
}
|
|
318
|
+
|
|
319
|
+
add_report(
|
|
320
|
+
list(
|
|
321
|
+
ui = "flat",
|
|
322
|
+
label = "Diversity Plot",
|
|
323
|
+
contents = list(
|
|
324
|
+
list(
|
|
325
|
+
kind = "descr",
|
|
326
|
+
content = paste(
|
|
327
|
+
div_methods[[case$method]]$name,
|
|
328
|
+
ifelse(
|
|
329
|
+
is.null(case$by) || length(case$by) == 0,
|
|
330
|
+
"",
|
|
331
|
+
paste0(" grouped by ", paste(case$by, collapse = ", "))
|
|
332
|
+
),
|
|
333
|
+
div_methods[[case$method]]$descr
|
|
334
|
+
)
|
|
335
|
+
),
|
|
336
|
+
list(
|
|
337
|
+
kind = "image",
|
|
338
|
+
src = outplot
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
),
|
|
342
|
+
list(
|
|
343
|
+
ui = "flat",
|
|
344
|
+
label = "Diversity Table",
|
|
345
|
+
contents = list(
|
|
346
|
+
list(kind = "table", src = outfile, data = list(index_col = 0))
|
|
347
|
+
)
|
|
348
|
+
),
|
|
349
|
+
ui = "tabs",
|
|
350
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
351
|
+
h1 = "Sample Diversity using TCR clusters"
|
|
352
|
+
)
|
|
246
353
|
}
|
|
247
354
|
|
|
248
355
|
|
|
@@ -250,14 +357,20 @@ sample_diversity = function(name) {
|
|
|
250
357
|
# main
|
|
251
358
|
# --------------------------------------------------
|
|
252
359
|
# Load immunarch data
|
|
360
|
+
log_info("Loading immunarch data ...")
|
|
253
361
|
immdata = readRDS(immfile)
|
|
254
362
|
|
|
255
363
|
# Cluster size distribution
|
|
364
|
+
log_info("Performing cluster size distribution analysis ...")
|
|
256
365
|
sapply(names(cluster_size_cases), cluster_size_distribution)
|
|
257
366
|
|
|
258
367
|
# Shared clusters
|
|
368
|
+
log_info("Performing shared clusters analysis ...")
|
|
259
369
|
sapply(names(shared_clusters_cases), shared_clusters)
|
|
260
370
|
|
|
261
371
|
# Diversity
|
|
372
|
+
log_info("Performing sample diversity analysis ...")
|
|
262
373
|
sapply(names(sample_diversity_cases), sample_diversity)
|
|
374
|
+
|
|
375
|
+
save_report(joboutdir)
|
|
263
376
|
}
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -11,6 +11,7 @@ library(ggprism)
|
|
|
11
11
|
immfile <- {{in.immdata | r}}
|
|
12
12
|
exprfile <- {{in.srtobj | r}}
|
|
13
13
|
outfile <- {{out.outfile | r}}
|
|
14
|
+
joboutdir <- {{job.outdir | r}}
|
|
14
15
|
python <- {{envs.python | r}}
|
|
15
16
|
within_sample <- {{envs.within_sample | r}}
|
|
16
17
|
assay <- {{envs.assay | r}}
|
|
@@ -27,7 +28,7 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
|
|
|
27
28
|
|
|
28
29
|
### Start preparing input files for TESSA
|
|
29
30
|
# Prepare input files
|
|
30
|
-
|
|
31
|
+
log_info("Preparing TCR input file ...")
|
|
31
32
|
immdata <- readRDS(immfile)
|
|
32
33
|
|
|
33
34
|
has_VJ <- "V.name" %in% colnames(immdata$data[[1]]) && "J.name" %in% colnames(immdata$data[[1]])
|
|
@@ -76,7 +77,7 @@ if (has_VJ) {
|
|
|
76
77
|
}
|
|
77
78
|
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
log_info("Preparing expression input file ...")
|
|
80
81
|
is_seurat <- endsWith(tolower(exprfile), ".rds")
|
|
81
82
|
is_gz <- endsWith(tolower(exprfile), ".gz")
|
|
82
83
|
|
|
@@ -94,10 +95,10 @@ cell_ids <- intersect(tcrdata$contig_id, colnames(expr))
|
|
|
94
95
|
unused_tcr_cells <- setdiff(tcrdata$contig_id, cell_ids)
|
|
95
96
|
unused_expr_cells <- setdiff(colnames(expr), cell_ids)
|
|
96
97
|
if (length(unused_tcr_cells) > 0) {
|
|
97
|
-
|
|
98
|
+
log_warn(glue("{length(unused_tcr_cells)}/{nrow(tcrdata)} TCR cells are not used."))
|
|
98
99
|
}
|
|
99
100
|
if (length(unused_expr_cells) > 0) {
|
|
100
|
-
|
|
101
|
+
log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
|
|
101
102
|
}
|
|
102
103
|
if (length(cell_ids) == 0) {
|
|
103
104
|
stop("No common cells between TCR and expression data. Are you using the correct prefix?")
|
|
@@ -106,19 +107,19 @@ tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
|
|
|
106
107
|
expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
|
|
107
108
|
|
|
108
109
|
# Write input files
|
|
109
|
-
|
|
110
|
+
log_info("Writing input files ...")
|
|
110
111
|
write.table(tcrdata, file.path(tessa_dir, "tcrdata.txt"), sep=",", quote=FALSE, row.names=FALSE)
|
|
111
112
|
write.table(expr, file.path(tessa_dir, "exprdata.txt"), sep=",", quote=FALSE, row.names=TRUE, col.names=TRUE)
|
|
112
113
|
|
|
113
114
|
### End preparing input files for TESSA
|
|
114
115
|
|
|
115
116
|
### Start running TESSA
|
|
116
|
-
|
|
117
|
+
log_info("Running TESSA ...")
|
|
117
118
|
|
|
118
119
|
# The original TESSA uses a python wrapper to run the encoder and tessa model
|
|
119
120
|
# here we run those two steps directly here
|
|
120
121
|
|
|
121
|
-
|
|
122
|
+
log_info("- Running encoder ...")
|
|
122
123
|
cmd_encoder <- paste(
|
|
123
124
|
python,
|
|
124
125
|
file.path(tessa_srcdir, "BriseisEncoder.py"),
|
|
@@ -140,14 +141,14 @@ if (has_VJ) {
|
|
|
140
141
|
file.path(tessa_dir, "tcr_vj.txt")
|
|
141
142
|
)
|
|
142
143
|
}
|
|
143
|
-
|
|
144
|
+
log_info(paste("- ", cmd_encoder))
|
|
144
145
|
|
|
145
146
|
rc <- system(cmd_encoder)
|
|
146
147
|
if (rc != 0) {
|
|
147
148
|
stop("Error: Failed to run encoder.")
|
|
148
149
|
}
|
|
149
150
|
|
|
150
|
-
|
|
151
|
+
log_info("- Running TESSA model ...")
|
|
151
152
|
source(file.path(tessa_srcdir, "real_data.R"))
|
|
152
153
|
|
|
153
154
|
tessa <- run_tessa(
|
|
@@ -162,7 +163,7 @@ tessa <- run_tessa(
|
|
|
162
163
|
)
|
|
163
164
|
|
|
164
165
|
# Save TESSA results
|
|
165
|
-
|
|
166
|
+
log_info("Saving TESSA results ...")
|
|
166
167
|
if (is_seurat) {
|
|
167
168
|
cells <- rownames(sobj@meta.data)
|
|
168
169
|
sobj@meta.data <- sobj@meta.data %>%
|
|
@@ -187,7 +188,7 @@ if (is_seurat) {
|
|
|
187
188
|
}
|
|
188
189
|
|
|
189
190
|
# Post analysis
|
|
190
|
-
|
|
191
|
+
log_info("Post analysis ...")
|
|
191
192
|
plot_tessa(tessa, result_dir)
|
|
192
193
|
plot_Tessa_clusters(tessa, result_dir)
|
|
193
194
|
|
|
@@ -201,3 +202,34 @@ p <- tessa$meta %>%
|
|
|
201
202
|
png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in", res=100)
|
|
202
203
|
print(p)
|
|
203
204
|
dev.off()
|
|
205
|
+
|
|
206
|
+
add_report(
|
|
207
|
+
list(
|
|
208
|
+
src = file.path(result_dir, "Cluster_size_dist.png"),
|
|
209
|
+
descr = "Histogram of cluster size distribution"
|
|
210
|
+
),
|
|
211
|
+
list(
|
|
212
|
+
src = file.path(result_dir, "clone_size.png"),
|
|
213
|
+
descr = "Center cluster size vs. non-center cluster size"
|
|
214
|
+
),
|
|
215
|
+
list(
|
|
216
|
+
src = file.path(result_dir, "exp_TCR_pair_plot.png"),
|
|
217
|
+
descr = "Expression-TCR distance plot"
|
|
218
|
+
),
|
|
219
|
+
list(
|
|
220
|
+
src = file.path(result_dir, "TCR_dist_density.png"),
|
|
221
|
+
descr = "TCR distance density plot"
|
|
222
|
+
),
|
|
223
|
+
list(
|
|
224
|
+
src = file.path(result_dir, "TCR_explore.png"),
|
|
225
|
+
descr = "Exploratory plot at the TCR level"
|
|
226
|
+
),
|
|
227
|
+
list(
|
|
228
|
+
src = file.path(result_dir, "TCR_explore_clusters.png"),
|
|
229
|
+
descr = "TESSA clusters"
|
|
230
|
+
),
|
|
231
|
+
h1 = "TESSA Results",
|
|
232
|
+
ui = "table_of_images"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
save_report(joboutdir)
|
biopipen/utils/misc.R
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# Misc utilities for R
|
|
2
2
|
library(logger)
|
|
3
|
+
library(jsonlite)
|
|
3
4
|
|
|
4
5
|
.logger_layout <- layout_glue_generator(
|
|
5
6
|
format = '{sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}'
|
|
6
7
|
)
|
|
7
8
|
log_layout(.logger_layout)
|
|
8
9
|
log_appender(appender_stdout)
|
|
9
|
-
log_errors()
|
|
10
|
+
tryCatch(log_errors(), error = function(e) {})
|
|
10
11
|
|
|
11
12
|
.isBQuoted <- function(x) {
|
|
12
13
|
# Check if x is backtick-quoted
|
|
@@ -112,3 +113,97 @@ list_update <- function(x, y) {
|
|
|
112
113
|
}
|
|
113
114
|
x
|
|
114
115
|
}
|
|
116
|
+
|
|
117
|
+
#’ Biopipen palette
|
|
118
|
+
#’ @param alpha Alpha value
|
|
119
|
+
#’ @return A palette function
|
|
120
|
+
#' @export
|
|
121
|
+
pal_biopipen <- function(alpha = 1) {
|
|
122
|
+
if (alpha > 1L | alpha <= 0L) stop("alpha must be in (0, 1]")
|
|
123
|
+
colors <- c(
|
|
124
|
+
"#ec3f3f", "#009e73", "#008ad8", "#cc79a7",
|
|
125
|
+
"#e69f00", "#50cada", "#f0e442", "#a76ce7",
|
|
126
|
+
"#ff864d", "#45e645", "#3699b5", "#ffdcda",
|
|
127
|
+
"#d55e00", "#778ba6", "#c37b35", "#bc28ff"
|
|
128
|
+
)
|
|
129
|
+
colors <- scales::alpha(colors, alpha)
|
|
130
|
+
function(n) {
|
|
131
|
+
if (n <= length(colors)) {
|
|
132
|
+
colors[1:n]
|
|
133
|
+
} else {
|
|
134
|
+
out_colors <- colors
|
|
135
|
+
out_alpha <- 1.0
|
|
136
|
+
while(length(out_colors) < n) {
|
|
137
|
+
out_alpha <- out_alpha - 0.3
|
|
138
|
+
out_colors <- c(out_colors, scales::alpha(colors, out_alpha))
|
|
139
|
+
}
|
|
140
|
+
out_colors[1:n]
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
scale_color_biopipen <- function(alpha = 1, ...) {
|
|
146
|
+
ggplot2::discrete_scale("colour", "biopipen", pal_biopipen(alpha), ...)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
scale_colour_biopipen <- scale_color_biopipen
|
|
150
|
+
|
|
151
|
+
scale_fill_biopipen <- function(alpha = 1, ...) {
|
|
152
|
+
ggplot2::discrete_scale("fill", "biopipen", pal_biopipen(alpha), ...)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
.report <- list(
|
|
156
|
+
# h1 => list(
|
|
157
|
+
# h2 => list(
|
|
158
|
+
# h3#1 => list(ui1 => list(content11, content12)),
|
|
159
|
+
# h3#2 => list(ui2 => list(content21, content22))
|
|
160
|
+
# )
|
|
161
|
+
# )
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
add_report <- function(..., h1, h2 = "#", h3 = "#", ui = "flat") {
|
|
165
|
+
if (is.null(.report[[h1]])) {
|
|
166
|
+
.report[[h1]] <<- list()
|
|
167
|
+
}
|
|
168
|
+
if (is.null(.report[[h1]][[h2]])) {
|
|
169
|
+
.report[[h1]][[h2]] <<- list()
|
|
170
|
+
}
|
|
171
|
+
if (is.null(.report[[h1]][[h2]][[h3]])) {
|
|
172
|
+
.report[[h1]][[h2]][[h3]] <<- list()
|
|
173
|
+
}
|
|
174
|
+
if (is.null(.report[[h1]][[h2]][[h3]][[ui]])) {
|
|
175
|
+
.report[[h1]][[h2]][[h3]][[ui]] <<- list()
|
|
176
|
+
}
|
|
177
|
+
content = list(...)
|
|
178
|
+
for (i in seq_along(content)) {
|
|
179
|
+
.report[[h1]][[h2]][[h3]][[ui]] <<- c(
|
|
180
|
+
.report[[h1]][[h2]][[h3]][[ui]],
|
|
181
|
+
list(content[[i]])
|
|
182
|
+
)
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
save_report <- function(path, clear = TRUE) {
|
|
187
|
+
if (dir.exists(path)) {
|
|
188
|
+
path <- file.path(path, "report.json")
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
writeLines(toJSON(.report, pretty = TRUE, auto_unbox = TRUE), path)
|
|
192
|
+
if (clear) {
|
|
193
|
+
.report <<- list()
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Escape html
|
|
199
|
+
html_escape <- function(text) {
|
|
200
|
+
if (is.null(text)) {
|
|
201
|
+
return("")
|
|
202
|
+
}
|
|
203
|
+
text = gsub("&", "&", text)
|
|
204
|
+
text = gsub("<", "<", text)
|
|
205
|
+
text = gsub(">", ">", text)
|
|
206
|
+
text = gsub("\"", """, text)
|
|
207
|
+
text = gsub("'", "'", text)
|
|
208
|
+
text
|
|
209
|
+
}
|