biopipen 0.21.2__py3-none-any.whl → 0.22.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/filters.py +142 -0
- biopipen/ns/scrna.py +19 -1
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/delim/SampleInfo.svelte +2 -22
- biopipen/reports/scrna/CellsDistribution.svelte +4 -39
- biopipen/reports/scrna/MarkersFinder.svelte +6 -126
- biopipen/reports/scrna/MetaMarkers.svelte +3 -75
- biopipen/reports/scrna/RadarPlots.svelte +4 -20
- biopipen/reports/scrna/ScFGSEA.svelte +4 -23
- biopipen/reports/scrna/SeuratClusterStats.svelte +3 -69
- biopipen/reports/scrna/SeuratPreparing.svelte +3 -26
- biopipen/reports/scrna/TopExpressingGenes.svelte +3 -41
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +17 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.svelte +13 -16
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +44 -52
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +2 -8
- biopipen/reports/tcr/CDR3AAPhyschem.svelte +18 -65
- biopipen/reports/tcr/CloneResidency.svelte +3 -93
- biopipen/reports/tcr/Immunarch.svelte +4 -168
- biopipen/reports/tcr/TCRClusterStats.svelte +3 -45
- biopipen/reports/tcr/TESSA.svelte +11 -28
- biopipen/scripts/delim/SampleInfo.R +41 -7
- biopipen/scripts/scrna/CellsDistribution.R +127 -16
- biopipen/scripts/scrna/MarkersFinder.R +245 -100
- biopipen/scripts/scrna/MetaMarkers.R +163 -82
- biopipen/scripts/scrna/RadarPlots.R +163 -110
- biopipen/scripts/scrna/ScFGSEA.R +51 -11
- biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -13
- biopipen/scripts/scrna/SeuratClusterStats-features.R +58 -53
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +39 -21
- biopipen/scripts/scrna/SeuratClusterStats.R +4 -2
- biopipen/scripts/scrna/SeuratClustering.R +73 -26
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/scrna/SeuratPreparing.R +93 -19
- biopipen/scripts/scrna/TopExpressingGenes.R +100 -18
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R +21 -8
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +25 -3
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +1 -0
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R +14 -3
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +123 -10
- biopipen/scripts/tcr/CloneResidency.R +114 -34
- biopipen/scripts/tcr/Immunarch-basic.R +18 -4
- biopipen/scripts/tcr/Immunarch-clonality.R +14 -0
- biopipen/scripts/tcr/Immunarch-diversity.R +123 -18
- biopipen/scripts/tcr/Immunarch-geneusage.R +23 -1
- biopipen/scripts/tcr/Immunarch-kmer.R +45 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +62 -0
- biopipen/scripts/tcr/Immunarch-spectratyping.R +18 -2
- biopipen/scripts/tcr/Immunarch-tracking.R +24 -1
- biopipen/scripts/tcr/Immunarch-vjjunc.R +17 -2
- biopipen/scripts/tcr/Immunarch.R +7 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClusterStats.R +124 -11
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +66 -41
- biopipen/utils/misc.R +96 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/METADATA +1 -1
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/RECORD +62 -62
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/WHEEL +0 -0
- {biopipen-0.21.2.dist-info → biopipen-0.22.1.dist-info}/entry_points.txt +0 -0
|
@@ -6,13 +6,16 @@ library(dplyr)
|
|
|
6
6
|
library(rlang)
|
|
7
7
|
library(immunarch)
|
|
8
8
|
library(ggprism)
|
|
9
|
+
library(slugify)
|
|
9
10
|
|
|
10
11
|
immfile = {{in.immfile | quote}}
|
|
11
12
|
outdir = {{out.outdir | quote}}
|
|
12
13
|
cluster_size_envs = {{envs.cluster_size | r}}
|
|
13
14
|
shared_clusters_envs = {{envs.shared_clusters | r}}
|
|
14
15
|
sample_diversity_envs = {{envs.sample_diversity | r}}
|
|
16
|
+
joboutdir = {{job.outdir | r}}
|
|
15
17
|
|
|
18
|
+
log_info("Expanding analysis cases ...")
|
|
16
19
|
expand_cases = function(envs) {
|
|
17
20
|
cases = envs$cases
|
|
18
21
|
envs$cases = NULL
|
|
@@ -51,8 +54,9 @@ shared_clusters_cases = expand_cases(shared_clusters_envs)
|
|
|
51
54
|
sample_diversity_cases = expand_cases(sample_diversity_envs)
|
|
52
55
|
|
|
53
56
|
cluster_size_distribution = function(name) {
|
|
54
|
-
|
|
55
|
-
|
|
57
|
+
log_info("- Working on cluster size distribution: {name}")
|
|
58
|
+
|
|
59
|
+
odir = file.path(outdir, "ClusterSizeDistribution", slugify(name, tolower = FALSE))
|
|
56
60
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
57
61
|
case = cluster_size_cases[[name]]
|
|
58
62
|
|
|
@@ -75,16 +79,28 @@ cluster_size_distribution = function(name) {
|
|
|
75
79
|
ggs = c(
|
|
76
80
|
"theme_prism()",
|
|
77
81
|
"scale_y_continuous(trans='log10')",
|
|
78
|
-
"labs(x='TCR cluster size', y='Count')"
|
|
82
|
+
"labs(x='TCR cluster size', y='Count')",
|
|
83
|
+
"scale_fill_biopipen()"
|
|
79
84
|
),
|
|
80
85
|
devpars = case$devpars,
|
|
81
86
|
outfile = outplot
|
|
82
87
|
)
|
|
88
|
+
|
|
89
|
+
add_report(
|
|
90
|
+
list(
|
|
91
|
+
src = outplot,
|
|
92
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
93
|
+
descr = paste0("Cluster size distribution for each ", case$by)
|
|
94
|
+
),
|
|
95
|
+
ui = "table_of_images",
|
|
96
|
+
h1 = "Cluster Size Distribution"
|
|
97
|
+
)
|
|
83
98
|
}
|
|
84
99
|
|
|
85
100
|
shared_clusters = function(name) {
|
|
86
|
-
|
|
87
|
-
|
|
101
|
+
log_info("- Working on shared clusters: {name}")
|
|
102
|
+
|
|
103
|
+
odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
|
|
88
104
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
89
105
|
case = shared_clusters_cases[[name]]
|
|
90
106
|
if (!is.null(case$grouping)) {
|
|
@@ -139,12 +155,21 @@ shared_clusters = function(name) {
|
|
|
139
155
|
),
|
|
140
156
|
devpars = case$devpars,
|
|
141
157
|
outfile = file.path(odir, "shared_clusters.png")
|
|
158
|
+
)
|
|
142
159
|
|
|
160
|
+
add_report(
|
|
161
|
+
list(
|
|
162
|
+
src = file.path(odir, "shared_clusters.png"),
|
|
163
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
164
|
+
descr = paste0("Shared TCR clusters across samples")
|
|
165
|
+
),
|
|
166
|
+
ui = "table_of_images",
|
|
167
|
+
h1 = "Shared TCR Clusters"
|
|
143
168
|
)
|
|
144
169
|
}
|
|
145
170
|
|
|
146
171
|
shared_clusters_by_grouping = function(name) {
|
|
147
|
-
odir = file.path(outdir, "SharedClusters", name)
|
|
172
|
+
odir = file.path(outdir, "SharedClusters", slugify(name, tolower = FALSE))
|
|
148
173
|
case = shared_clusters_cases[[name]]
|
|
149
174
|
|
|
150
175
|
data = list()
|
|
@@ -176,12 +201,47 @@ shared_clusters_by_grouping = function(name) {
|
|
|
176
201
|
devpars = case$devpars,
|
|
177
202
|
outfile = outfile
|
|
178
203
|
)
|
|
204
|
+
|
|
205
|
+
add_report(
|
|
206
|
+
list(
|
|
207
|
+
src = outfile,
|
|
208
|
+
name = ifelse(name == "DEFAULT", FALSE, name),
|
|
209
|
+
descr = paste0("Shared TCR clusters across ", grouping)
|
|
210
|
+
),
|
|
211
|
+
ui = "table_of_images",
|
|
212
|
+
h1 = "Shared TCR Clusters"
|
|
213
|
+
)
|
|
179
214
|
}
|
|
180
215
|
|
|
181
216
|
|
|
217
|
+
div_methods = list(
|
|
218
|
+
gini = list(
|
|
219
|
+
name = "The Gini coefficient",
|
|
220
|
+
descr = "The Gini coefficient is a measure of statistical dispersion intended to represent the income or wealth distribution of a nation's residents, and is the most commonly used measurement of inequality."
|
|
221
|
+
),
|
|
222
|
+
gini.simp = list(
|
|
223
|
+
name = "The Gini-Simpson index",
|
|
224
|
+
descr = "The Gini-Simpson index is a measure of diversity. It is one of the most commonly used in ecology. It is also known as the Simpson index, the Simpson concentration index, the Simpson dominance index, or the Simpson diversity index."
|
|
225
|
+
),
|
|
226
|
+
inv.simp = list(
|
|
227
|
+
name = "The inverse Simpson index",
|
|
228
|
+
descr = "It is the effective number of types that is obtained when
|
|
229
|
+
the weighted arithmetic mean is used to quantify average
|
|
230
|
+
proportional abundance of types in the dataset of interest."
|
|
231
|
+
),
|
|
232
|
+
div = list(
|
|
233
|
+
name = "The true diversity",
|
|
234
|
+
descr = "It refers to the number of equally abundant types needed
|
|
235
|
+
for the average proportional abundance of the types to
|
|
236
|
+
equal that observed in the dataset of interest where all
|
|
237
|
+
types may not be equally abundant."
|
|
238
|
+
)
|
|
239
|
+
)
|
|
240
|
+
|
|
182
241
|
sample_diversity = function(name) {
|
|
183
|
-
|
|
184
|
-
|
|
242
|
+
log_info("- Working on sample diversity: {name}")
|
|
243
|
+
|
|
244
|
+
odir = file.path(outdir, "SampleDiversity", slugify(name, tolower = FALSE))
|
|
185
245
|
dir.create(odir, showWarnings = FALSE, recursive = TRUE)
|
|
186
246
|
case = sample_diversity_cases[[name]]
|
|
187
247
|
|
|
@@ -192,7 +252,19 @@ sample_diversity = function(name) {
|
|
|
192
252
|
outfile = file.path(odir, "diversity.txt")
|
|
193
253
|
outplot = file.path(odir, "diversity.png")
|
|
194
254
|
div = repDiversity(data, .method = case$method)
|
|
195
|
-
write.table(
|
|
255
|
+
write.table(
|
|
256
|
+
if (ncol(div) == 1) {
|
|
257
|
+
as.data.frame(div) %>% rownames_to_column("Sample")
|
|
258
|
+
} else {
|
|
259
|
+
div
|
|
260
|
+
},
|
|
261
|
+
outfile,
|
|
262
|
+
row.names=TRUE,
|
|
263
|
+
col.names=TRUE,
|
|
264
|
+
quote=FALSE,
|
|
265
|
+
sep="\t"
|
|
266
|
+
)
|
|
267
|
+
|
|
196
268
|
if (case$method == "gini") {
|
|
197
269
|
div = as.data.frame(div) %>% rownames_to_column("Sample")
|
|
198
270
|
colnames(div)[2] = "gini"
|
|
@@ -201,7 +273,8 @@ sample_diversity = function(name) {
|
|
|
201
273
|
mapping = aes(x = Sample, y = gini, fill = Sample)
|
|
202
274
|
ggs = c(
|
|
203
275
|
"theme_prism(axis_text_angle = 90)",
|
|
204
|
-
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')"
|
|
276
|
+
"labs(title='Gini coefficient', subtitle='Sample diversity estimation using the Gini coefficient')",
|
|
277
|
+
"scale_fill_biopipen()"
|
|
205
278
|
)
|
|
206
279
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
207
280
|
|
|
@@ -225,7 +298,6 @@ sample_diversity = function(name) {
|
|
|
225
298
|
devpars = case$devpars,
|
|
226
299
|
outfile = outplot
|
|
227
300
|
)
|
|
228
|
-
|
|
229
301
|
} else {
|
|
230
302
|
if (is.null(case$by) || length(case$by) == 0) {
|
|
231
303
|
p = vis(div)
|
|
@@ -243,6 +315,41 @@ sample_diversity = function(name) {
|
|
|
243
315
|
print(p)
|
|
244
316
|
dev.off()
|
|
245
317
|
}
|
|
318
|
+
|
|
319
|
+
add_report(
|
|
320
|
+
list(
|
|
321
|
+
ui = "flat",
|
|
322
|
+
label = "Diversity Plot",
|
|
323
|
+
contents = list(
|
|
324
|
+
list(
|
|
325
|
+
kind = "descr",
|
|
326
|
+
content = paste(
|
|
327
|
+
div_methods[[case$method]]$name,
|
|
328
|
+
ifelse(
|
|
329
|
+
is.null(case$by) || length(case$by) == 0,
|
|
330
|
+
"",
|
|
331
|
+
paste0(" grouped by ", paste(case$by, collapse = ", "))
|
|
332
|
+
),
|
|
333
|
+
div_methods[[case$method]]$descr
|
|
334
|
+
)
|
|
335
|
+
),
|
|
336
|
+
list(
|
|
337
|
+
kind = "image",
|
|
338
|
+
src = outplot
|
|
339
|
+
)
|
|
340
|
+
)
|
|
341
|
+
),
|
|
342
|
+
list(
|
|
343
|
+
ui = "flat",
|
|
344
|
+
label = "Diversity Table",
|
|
345
|
+
contents = list(
|
|
346
|
+
list(kind = "table", src = outfile, data = list(index_col = 0))
|
|
347
|
+
)
|
|
348
|
+
),
|
|
349
|
+
ui = "tabs",
|
|
350
|
+
h2 = ifelse(name == "DEFAULT", "#", name),
|
|
351
|
+
h1 = "Sample Diversity using TCR clusters"
|
|
352
|
+
)
|
|
246
353
|
}
|
|
247
354
|
|
|
248
355
|
|
|
@@ -250,14 +357,20 @@ sample_diversity = function(name) {
|
|
|
250
357
|
# main
|
|
251
358
|
# --------------------------------------------------
|
|
252
359
|
# Load immunarch data
|
|
360
|
+
log_info("Loading immunarch data ...")
|
|
253
361
|
immdata = readRDS(immfile)
|
|
254
362
|
|
|
255
363
|
# Cluster size distribution
|
|
364
|
+
log_info("Performing cluster size distribution analysis ...")
|
|
256
365
|
sapply(names(cluster_size_cases), cluster_size_distribution)
|
|
257
366
|
|
|
258
367
|
# Shared clusters
|
|
368
|
+
log_info("Performing shared clusters analysis ...")
|
|
259
369
|
sapply(names(shared_clusters_cases), shared_clusters)
|
|
260
370
|
|
|
261
371
|
# Diversity
|
|
372
|
+
log_info("Performing sample diversity analysis ...")
|
|
262
373
|
sapply(names(sample_diversity_cases), sample_diversity)
|
|
374
|
+
|
|
375
|
+
save_report(joboutdir)
|
|
263
376
|
}
|
|
@@ -3,11 +3,13 @@
|
|
|
3
3
|
# python = Sys.which({{envs.python | r}})
|
|
4
4
|
# Sys.setenv(RETICULATE_PYTHON = python)
|
|
5
5
|
# library(reticulate)
|
|
6
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
6
7
|
|
|
7
8
|
library(immunarch)
|
|
8
9
|
library(dplyr)
|
|
9
10
|
library(tidyr)
|
|
10
11
|
library(tibble)
|
|
12
|
+
library(glue)
|
|
11
13
|
|
|
12
14
|
immfile = {{in.immfile | r}}
|
|
13
15
|
outdir = normalizePath({{job.outdir | r}})
|
|
@@ -17,6 +19,7 @@ tool = {{envs.tool | r}}
|
|
|
17
19
|
python = {{envs.python | r}}
|
|
18
20
|
on_multi = {{envs.on_multi | r}}
|
|
19
21
|
args = {{envs.args | r}}
|
|
22
|
+
prefix = {{envs.prefix | r}}
|
|
20
23
|
|
|
21
24
|
setwd(outdir)
|
|
22
25
|
|
|
@@ -26,17 +29,13 @@ if (on_multi) {
|
|
|
26
29
|
} else {
|
|
27
30
|
seqdata = immdata$data
|
|
28
31
|
}
|
|
32
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
33
|
+
if (is.null(prefix)) { prefix = "" }
|
|
29
34
|
|
|
30
35
|
get_cdr3aa_df = function() {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
select(Barcode, CDR3.aa) %>%
|
|
35
|
-
separate_rows(Barcode, sep = ";") %>%
|
|
36
|
-
mutate(Barcode = paste0(sample, "_", Barcode))
|
|
37
|
-
out = bind_rows(out, tmpdf)
|
|
38
|
-
}
|
|
39
|
-
out
|
|
36
|
+
expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
37
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
38
|
+
select(Barcode, CDR3.aa)
|
|
40
39
|
}
|
|
41
40
|
cdr3aa_df = get_cdr3aa_df()
|
|
42
41
|
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
2
3
|
|
|
3
4
|
library(glue)
|
|
4
5
|
library(dplyr)
|
|
5
6
|
library(tidyr)
|
|
7
|
+
library(tibble)
|
|
6
8
|
library(immunarch)
|
|
7
9
|
library(Seurat)
|
|
8
10
|
library(ggplot2)
|
|
@@ -11,7 +13,9 @@ library(ggprism)
|
|
|
11
13
|
immfile <- {{in.immdata | r}}
|
|
12
14
|
exprfile <- {{in.srtobj | r}}
|
|
13
15
|
outfile <- {{out.outfile | r}}
|
|
16
|
+
joboutdir <- {{job.outdir | r}}
|
|
14
17
|
python <- {{envs.python | r}}
|
|
18
|
+
prefix <- {{envs.prefix | r}}
|
|
15
19
|
within_sample <- {{envs.within_sample | r}}
|
|
16
20
|
assay <- {{envs.assay | r}}
|
|
17
21
|
predefined_b <- {{envs.predefined_b | r}}
|
|
@@ -27,35 +31,22 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
|
|
|
27
31
|
|
|
28
32
|
### Start preparing input files for TESSA
|
|
29
33
|
# Prepare input files
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
# Sample Patient Timepoint Tissue
|
|
46
|
-
# <chr> <chr> <chr> <chr>
|
|
47
|
-
# 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
|
|
48
|
-
mdata = as.list(immdata$meta[i, , drop=FALSE])
|
|
49
|
-
for (mname in names(mdata)) {
|
|
50
|
-
assign(mname, mdata[[mname]])
|
|
51
|
-
}
|
|
34
|
+
log_info("Preparing TCR input file ...")
|
|
35
|
+
# If immfile endswith .rds, then it is an immunarch object
|
|
36
|
+
if (endsWith(tolower(immfile), ".rds")) {
|
|
37
|
+
immdata <- readRDS(immfile)
|
|
38
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
39
|
+
if (is.null(prefix)) { prefix = "" }
|
|
40
|
+
tcrdata <- expand_immdata(immdata) %>%
|
|
41
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
|
|
42
|
+
rm(immdata)
|
|
43
|
+
} else {
|
|
44
|
+
tcrdata <- read.table(immfile, sep="\t", header=TRUE, row.names=1) %>%
|
|
45
|
+
rownames_to_column("Barcode")
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
has_VJ <- "V.name" %in% colnames(tcrdata) && "J.name" %in% colnames(tcrdata)
|
|
52
49
|
|
|
53
|
-
cldata %>%
|
|
54
|
-
separate_rows(Barcode, sep=";") %>%
|
|
55
|
-
# Just in case there are duplicated barcodes
|
|
56
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
57
|
-
mutate(Barcode = glue("{{envs.prefix}}{Barcode}"), sample = Sample)
|
|
58
|
-
}))
|
|
59
50
|
if (has_VJ) {
|
|
60
51
|
tcrdata <- tcrdata %>% dplyr::mutate(
|
|
61
52
|
v_gene = sub("-\\d+$", "", V.name),
|
|
@@ -65,18 +56,18 @@ if (has_VJ) {
|
|
|
65
56
|
cdr3 = CDR3.aa,
|
|
66
57
|
v_gene,
|
|
67
58
|
j_gene,
|
|
68
|
-
sample
|
|
59
|
+
sample = Sample
|
|
69
60
|
)
|
|
70
61
|
} else {
|
|
71
62
|
tcrdata <- tcrdata %>% dplyr::select(
|
|
72
63
|
contig_id = Barcode,
|
|
73
64
|
cdr3 = CDR3.aa,
|
|
74
|
-
sample
|
|
65
|
+
sample = Sample
|
|
75
66
|
)
|
|
76
67
|
}
|
|
77
68
|
|
|
78
69
|
|
|
79
|
-
|
|
70
|
+
log_info("Preparing expression input file ...")
|
|
80
71
|
is_seurat <- endsWith(tolower(exprfile), ".rds")
|
|
81
72
|
is_gz <- endsWith(tolower(exprfile), ".gz")
|
|
82
73
|
|
|
@@ -94,31 +85,34 @@ cell_ids <- intersect(tcrdata$contig_id, colnames(expr))
|
|
|
94
85
|
unused_tcr_cells <- setdiff(tcrdata$contig_id, cell_ids)
|
|
95
86
|
unused_expr_cells <- setdiff(colnames(expr), cell_ids)
|
|
96
87
|
if (length(unused_tcr_cells) > 0) {
|
|
97
|
-
|
|
88
|
+
log_warn(glue("{length(unused_tcr_cells)}/{nrow(tcrdata)} TCR cells are not used."))
|
|
98
89
|
}
|
|
99
90
|
if (length(unused_expr_cells) > 0) {
|
|
100
|
-
|
|
91
|
+
log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
|
|
101
92
|
}
|
|
102
93
|
if (length(cell_ids) == 0) {
|
|
103
|
-
stop(
|
|
94
|
+
stop(paste0(
|
|
95
|
+
"No common cells between TCR and expression data. ",
|
|
96
|
+
"Are you using the correct `envs.prefix` here or in `ImmunarchLoading`?"
|
|
97
|
+
))
|
|
104
98
|
}
|
|
105
99
|
tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
|
|
106
100
|
expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
|
|
107
101
|
|
|
108
102
|
# Write input files
|
|
109
|
-
|
|
103
|
+
log_info("Writing input files ...")
|
|
110
104
|
write.table(tcrdata, file.path(tessa_dir, "tcrdata.txt"), sep=",", quote=FALSE, row.names=FALSE)
|
|
111
105
|
write.table(expr, file.path(tessa_dir, "exprdata.txt"), sep=",", quote=FALSE, row.names=TRUE, col.names=TRUE)
|
|
112
106
|
|
|
113
107
|
### End preparing input files for TESSA
|
|
114
108
|
|
|
115
109
|
### Start running TESSA
|
|
116
|
-
|
|
110
|
+
log_info("Running TESSA ...")
|
|
117
111
|
|
|
118
112
|
# The original TESSA uses a python wrapper to run the encoder and tessa model
|
|
119
113
|
# here we run those two steps directly here
|
|
120
114
|
|
|
121
|
-
|
|
115
|
+
log_info("- Running encoder ...")
|
|
122
116
|
cmd_encoder <- paste(
|
|
123
117
|
python,
|
|
124
118
|
file.path(tessa_srcdir, "BriseisEncoder.py"),
|
|
@@ -140,14 +134,14 @@ if (has_VJ) {
|
|
|
140
134
|
file.path(tessa_dir, "tcr_vj.txt")
|
|
141
135
|
)
|
|
142
136
|
}
|
|
143
|
-
|
|
137
|
+
log_info(paste("- ", cmd_encoder))
|
|
144
138
|
|
|
145
139
|
rc <- system(cmd_encoder)
|
|
146
140
|
if (rc != 0) {
|
|
147
141
|
stop("Error: Failed to run encoder.")
|
|
148
142
|
}
|
|
149
143
|
|
|
150
|
-
|
|
144
|
+
log_info("- Running TESSA model ...")
|
|
151
145
|
source(file.path(tessa_srcdir, "real_data.R"))
|
|
152
146
|
|
|
153
147
|
tessa <- run_tessa(
|
|
@@ -162,7 +156,7 @@ tessa <- run_tessa(
|
|
|
162
156
|
)
|
|
163
157
|
|
|
164
158
|
# Save TESSA results
|
|
165
|
-
|
|
159
|
+
log_info("Saving TESSA results ...")
|
|
166
160
|
if (is_seurat) {
|
|
167
161
|
cells <- rownames(sobj@meta.data)
|
|
168
162
|
sobj@meta.data <- sobj@meta.data %>%
|
|
@@ -187,7 +181,7 @@ if (is_seurat) {
|
|
|
187
181
|
}
|
|
188
182
|
|
|
189
183
|
# Post analysis
|
|
190
|
-
|
|
184
|
+
log_info("Post analysis ...")
|
|
191
185
|
plot_tessa(tessa, result_dir)
|
|
192
186
|
plot_Tessa_clusters(tessa, result_dir)
|
|
193
187
|
|
|
@@ -201,3 +195,34 @@ p <- tessa$meta %>%
|
|
|
201
195
|
png(file.path(result_dir, "Cluster_size_dist.png"), width=8, height=8, units="in", res=100)
|
|
202
196
|
print(p)
|
|
203
197
|
dev.off()
|
|
198
|
+
|
|
199
|
+
add_report(
|
|
200
|
+
list(
|
|
201
|
+
src = file.path(result_dir, "Cluster_size_dist.png"),
|
|
202
|
+
descr = "Histogram of cluster size distribution"
|
|
203
|
+
),
|
|
204
|
+
list(
|
|
205
|
+
src = file.path(result_dir, "clone_size.png"),
|
|
206
|
+
descr = "Center cluster size vs. non-center cluster size"
|
|
207
|
+
),
|
|
208
|
+
list(
|
|
209
|
+
src = file.path(result_dir, "exp_TCR_pair_plot.png"),
|
|
210
|
+
descr = "Expression-TCR distance plot"
|
|
211
|
+
),
|
|
212
|
+
list(
|
|
213
|
+
src = file.path(result_dir, "TCR_dist_density.png"),
|
|
214
|
+
descr = "TCR distance density plot"
|
|
215
|
+
),
|
|
216
|
+
list(
|
|
217
|
+
src = file.path(result_dir, "TCR_explore.png"),
|
|
218
|
+
descr = "Exploratory plot at the TCR level"
|
|
219
|
+
),
|
|
220
|
+
list(
|
|
221
|
+
src = file.path(result_dir, "TCR_explore_clusters.png"),
|
|
222
|
+
descr = "TESSA clusters"
|
|
223
|
+
),
|
|
224
|
+
h1 = "TESSA Results",
|
|
225
|
+
ui = "table_of_images"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
save_report(joboutdir)
|
biopipen/utils/misc.R
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
# Misc utilities for R
|
|
2
2
|
library(logger)
|
|
3
|
+
library(jsonlite)
|
|
3
4
|
|
|
4
5
|
.logger_layout <- layout_glue_generator(
|
|
5
6
|
format = '{sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}'
|
|
6
7
|
)
|
|
7
8
|
log_layout(.logger_layout)
|
|
8
9
|
log_appender(appender_stdout)
|
|
9
|
-
log_errors()
|
|
10
|
+
tryCatch(log_errors(), error = function(e) {})
|
|
10
11
|
|
|
11
12
|
.isBQuoted <- function(x) {
|
|
12
13
|
# Check if x is backtick-quoted
|
|
@@ -112,3 +113,97 @@ list_update <- function(x, y) {
|
|
|
112
113
|
}
|
|
113
114
|
x
|
|
114
115
|
}
|
|
116
|
+
|
|
117
|
+
#’ Biopipen palette
|
|
118
|
+
#’ @param alpha Alpha value
|
|
119
|
+
#’ @return A palette function
|
|
120
|
+
#' @export
|
|
121
|
+
pal_biopipen <- function(alpha = 1) {
|
|
122
|
+
if (alpha > 1L | alpha <= 0L) stop("alpha must be in (0, 1]")
|
|
123
|
+
colors <- c(
|
|
124
|
+
"#ec3f3f", "#009e73", "#008ad8", "#cc79a7",
|
|
125
|
+
"#e69f00", "#50cada", "#f0e442", "#a76ce7",
|
|
126
|
+
"#ff864d", "#45e645", "#3699b5", "#ffdcda",
|
|
127
|
+
"#d55e00", "#778ba6", "#c37b35", "#bc28ff"
|
|
128
|
+
)
|
|
129
|
+
colors <- scales::alpha(colors, alpha)
|
|
130
|
+
function(n) {
|
|
131
|
+
if (n <= length(colors)) {
|
|
132
|
+
colors[1:n]
|
|
133
|
+
} else {
|
|
134
|
+
out_colors <- colors
|
|
135
|
+
out_alpha <- 1.0
|
|
136
|
+
while(length(out_colors) < n) {
|
|
137
|
+
out_alpha <- out_alpha - 0.3
|
|
138
|
+
out_colors <- c(out_colors, scales::alpha(colors, out_alpha))
|
|
139
|
+
}
|
|
140
|
+
out_colors[1:n]
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
scale_color_biopipen <- function(alpha = 1, ...) {
|
|
146
|
+
ggplot2::discrete_scale("colour", "biopipen", pal_biopipen(alpha), ...)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
scale_colour_biopipen <- scale_color_biopipen
|
|
150
|
+
|
|
151
|
+
scale_fill_biopipen <- function(alpha = 1, ...) {
|
|
152
|
+
ggplot2::discrete_scale("fill", "biopipen", pal_biopipen(alpha), ...)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
.report <- list(
|
|
156
|
+
# h1 => list(
|
|
157
|
+
# h2 => list(
|
|
158
|
+
# h3#1 => list(ui1 => list(content11, content12)),
|
|
159
|
+
# h3#2 => list(ui2 => list(content21, content22))
|
|
160
|
+
# )
|
|
161
|
+
# )
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
add_report <- function(..., h1, h2 = "#", h3 = "#", ui = "flat") {
|
|
165
|
+
if (is.null(.report[[h1]])) {
|
|
166
|
+
.report[[h1]] <<- list()
|
|
167
|
+
}
|
|
168
|
+
if (is.null(.report[[h1]][[h2]])) {
|
|
169
|
+
.report[[h1]][[h2]] <<- list()
|
|
170
|
+
}
|
|
171
|
+
if (is.null(.report[[h1]][[h2]][[h3]])) {
|
|
172
|
+
.report[[h1]][[h2]][[h3]] <<- list()
|
|
173
|
+
}
|
|
174
|
+
if (is.null(.report[[h1]][[h2]][[h3]][[ui]])) {
|
|
175
|
+
.report[[h1]][[h2]][[h3]][[ui]] <<- list()
|
|
176
|
+
}
|
|
177
|
+
content = list(...)
|
|
178
|
+
for (i in seq_along(content)) {
|
|
179
|
+
.report[[h1]][[h2]][[h3]][[ui]] <<- c(
|
|
180
|
+
.report[[h1]][[h2]][[h3]][[ui]],
|
|
181
|
+
list(content[[i]])
|
|
182
|
+
)
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
save_report <- function(path, clear = TRUE) {
|
|
187
|
+
if (dir.exists(path)) {
|
|
188
|
+
path <- file.path(path, "report.json")
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
writeLines(toJSON(.report, pretty = TRUE, auto_unbox = TRUE), path)
|
|
192
|
+
if (clear) {
|
|
193
|
+
.report <<- list()
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# Escape html
|
|
199
|
+
html_escape <- function(text) {
|
|
200
|
+
if (is.null(text)) {
|
|
201
|
+
return("")
|
|
202
|
+
}
|
|
203
|
+
text = gsub("&", "&", text)
|
|
204
|
+
text = gsub("<", "<", text)
|
|
205
|
+
text = gsub(">", ">", text)
|
|
206
|
+
text = gsub("\"", """, text)
|
|
207
|
+
text = gsub("'", "'", text)
|
|
208
|
+
text
|
|
209
|
+
}
|