biopipen 0.22.0__py3-none-any.whl → 0.22.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +6 -0
- biopipen/core/filters.py +12 -0
- biopipen/ns/cellranger.py +101 -0
- biopipen/ns/scrna.py +2 -0
- biopipen/ns/tcr.py +30 -10
- biopipen/reports/cellranger/CellRangerCount.svelte +16 -0
- biopipen/reports/cellranger/CellRangerVdj.svelte +16 -0
- biopipen/scripts/cellranger/CellRangerCount.py +79 -0
- biopipen/scripts/cellranger/CellRangerVdj.py +79 -0
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +31 -24
- biopipen/scripts/scrna/CellsDistribution.R +9 -8
- biopipen/scripts/scrna/MarkersFinder.R +106 -28
- biopipen/scripts/scrna/SeuratClusterStats-features.R +2 -2
- biopipen/scripts/scrna/SeuratMetadataMutater.R +13 -1
- biopipen/scripts/tcr/Attach2Seurat.R +2 -1
- biopipen/scripts/tcr/CDR3AAPhyschem.R +1 -1
- biopipen/scripts/tcr/Immunarch.R +3 -0
- biopipen/scripts/tcr/ImmunarchLoading.R +22 -23
- biopipen/scripts/tcr/TCRClustering.R +8 -9
- biopipen/scripts/tcr/TESSA.R +23 -30
- biopipen/utils/common_docstrs.py +3 -0
- biopipen/utils/mutate_helpers.R +110 -106
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/METADATA +1 -1
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/RECORD +27 -22
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/entry_points.txt +1 -0
- {biopipen-0.22.0.dist-info → biopipen-0.22.2.dist-info}/WHEEL +0 -0
|
@@ -143,11 +143,13 @@ for (name in names(cases)) {
|
|
|
143
143
|
} else if (is.null(case$each)) {
|
|
144
144
|
# is.null(case$ident.1)
|
|
145
145
|
sections <- c(sections, name)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
146
|
+
newcases[[name]] <- case
|
|
147
|
+
newcases[[name]]$findall <- TRUE
|
|
148
|
+
# idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
|
|
149
|
+
# for (ident in idents) {
|
|
150
|
+
# newcases[[paste0(name, ":", ident)]] <- case
|
|
151
|
+
# newcases[[paste0(name, ":", ident)]]$ident.1 <- ident
|
|
152
|
+
# }
|
|
151
153
|
} else {
|
|
152
154
|
eachs <- srtobj@meta.data %>% pull(case$each) %>% unique() %>% na.omit()
|
|
153
155
|
for (each in eachs) {
|
|
@@ -160,18 +162,22 @@ for (name in names(cases)) {
|
|
|
160
162
|
)
|
|
161
163
|
)
|
|
162
164
|
if (is.null(case$ident.1)) {
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
}
|
|
165
|
+
kname <- if (name == "DEFAULT") "" else paste0(" - ", name)
|
|
166
|
+
sections <- c(sections, paste0(each, kname))
|
|
167
|
+
key <- paste0(each, kname)
|
|
168
|
+
newcases[[key]] <- case
|
|
169
|
+
newcases[[key]]$group.by <- by
|
|
170
|
+
newcases[[key]]$findall <- TRUE
|
|
171
|
+
# idents <- srtobj@meta.data %>% pull(case$group.by) %>% unique() %>% na.omit()
|
|
172
|
+
# for (ident in idents) {
|
|
173
|
+
# key <- paste0(each, kname, ":", ident)
|
|
174
|
+
# if (case$prefix_each) {
|
|
175
|
+
# key <- paste0(case$each, " - ", key)
|
|
176
|
+
# }
|
|
177
|
+
# newcases[[key]] <- case
|
|
178
|
+
# newcases[[key]]$ident.1 <- ident
|
|
179
|
+
# newcases[[key]]$group.by <- by
|
|
180
|
+
# }
|
|
175
181
|
} else {
|
|
176
182
|
sections <- c(sections, case$each)
|
|
177
183
|
key <- paste0(case$each, ":", each)
|
|
@@ -312,11 +318,11 @@ do_enrich <- function(info, markers, sig, volgenes) {
|
|
|
312
318
|
}
|
|
313
319
|
|
|
314
320
|
|
|
315
|
-
do_dotplot <- function(info, siggenes,
|
|
316
|
-
dotplot_devpars <-
|
|
321
|
+
do_dotplot <- function(info, siggenes, dotplot, args) {
|
|
322
|
+
dotplot_devpars <- dotplot$devpars
|
|
317
323
|
if (is.null(args$ident.2)) {
|
|
318
|
-
|
|
319
|
-
|
|
324
|
+
dotplot$object <- args$object
|
|
325
|
+
dotplot$object@meta.data <- dotplot$object@meta.data %>%
|
|
320
326
|
mutate(
|
|
321
327
|
!!sym(args$group.by) := if_else(
|
|
322
328
|
!!sym(args$group.by) == args$ident.1,
|
|
@@ -329,17 +335,16 @@ do_dotplot <- function(info, siggenes, case, args) {
|
|
|
329
335
|
)
|
|
330
336
|
)
|
|
331
337
|
} else {
|
|
332
|
-
|
|
338
|
+
dotplot$object <- args$object %>%
|
|
333
339
|
filter(!!sym(args$group.by) %in% c(args$ident.1, args$ident.2)) %>%
|
|
334
340
|
mutate(!!sym(args$group.by) := factor(
|
|
335
341
|
!!sym(args$group.by),
|
|
336
342
|
levels = c(args$ident.1, args$ident.2)
|
|
337
343
|
))
|
|
338
344
|
}
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
case$dotplot$assay <- case$assay
|
|
345
|
+
dotplot$devpars <- NULL
|
|
346
|
+
dotplot$features <- siggenes
|
|
347
|
+
dotplot$group.by <- args$group.by
|
|
343
348
|
dotplot_width = ifelse(
|
|
344
349
|
is.null(dotplot_devpars$width),
|
|
345
350
|
if (length(siggenes) <= 20) length(siggenes) * 60 else length(siggenes) * 30,
|
|
@@ -351,7 +356,7 @@ do_dotplot <- function(info, siggenes, case, args) {
|
|
|
351
356
|
png(dotplot_file, res = dotplot_res, width = dotplot_height, height = dotplot_width)
|
|
352
357
|
# rotate x axis labels
|
|
353
358
|
print(
|
|
354
|
-
do_call(DotPlot,
|
|
359
|
+
do_call(DotPlot, dotplot) +
|
|
355
360
|
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
|
|
356
361
|
coord_flip()
|
|
357
362
|
)
|
|
@@ -456,9 +461,79 @@ add_case_report <- function(info, sigmarkers, siggenes) {
|
|
|
456
461
|
}
|
|
457
462
|
|
|
458
463
|
|
|
464
|
+
do_case_findall <- function(casename) {
|
|
465
|
+
log_info("- Using FindAllMarkers for case: {casename}...")
|
|
466
|
+
|
|
467
|
+
case = cases[[casename]]
|
|
468
|
+
args <- case$rest
|
|
469
|
+
args$group.by <- case$group.by
|
|
470
|
+
if (is.null(args$logfc.threshold)) {
|
|
471
|
+
args$locfc.threshold <- 0
|
|
472
|
+
}
|
|
473
|
+
if (is.null(args$min.cells.group)) {
|
|
474
|
+
args$min.cells.group <- 1
|
|
475
|
+
}
|
|
476
|
+
if (is.null(args$min.cells.feature)) {
|
|
477
|
+
args$min.cells.feature <- 1
|
|
478
|
+
}
|
|
479
|
+
if (is.null(args$min.pct)) {
|
|
480
|
+
args$min.pct <- 0
|
|
481
|
+
}
|
|
482
|
+
if (!is.null(case$subset)) {
|
|
483
|
+
args$object <- srtobj %>% filter(!!parse_expr(case$subset) & filter(!is.na(!!sym(case$group.by))))
|
|
484
|
+
} else {
|
|
485
|
+
args$object <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
|
|
486
|
+
}
|
|
487
|
+
Idents(args$object) <- case$group.by
|
|
488
|
+
markers <- tryCatch({
|
|
489
|
+
do_call(FindAllMarkers, args)
|
|
490
|
+
# gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
|
|
491
|
+
}, error = function(e) {
|
|
492
|
+
log_warn(e$message)
|
|
493
|
+
data.frame(
|
|
494
|
+
gene = character(),
|
|
495
|
+
p_val = numeric(),
|
|
496
|
+
avg_log2FC = numeric(),
|
|
497
|
+
pct.1 = numeric(),
|
|
498
|
+
pct.2 = numeric(),
|
|
499
|
+
p_val_adj=numeric(),
|
|
500
|
+
cluster = character()
|
|
501
|
+
)
|
|
502
|
+
})
|
|
503
|
+
|
|
504
|
+
if (is.null(case$dotplot$assay)) {
|
|
505
|
+
case$dotplot$assay <- assay
|
|
506
|
+
}
|
|
507
|
+
idents <- unique(markers$cluster)
|
|
508
|
+
for (ident in idents) {
|
|
509
|
+
log_info("- Dealing with ident: {ident}...")
|
|
510
|
+
info <- casename_info(paste0(casename, ":", ident), create = TRUE)
|
|
511
|
+
siggenes <- do_enrich(info, markers %>% filter(cluster == ident), case$sigmarkers, case$volcano_genes)
|
|
512
|
+
|
|
513
|
+
if (length(siggenes) > 0) {
|
|
514
|
+
args$ident.1 <- as.character(ident)
|
|
515
|
+
do_dotplot(info, siggenes, case$dotplot, args)
|
|
516
|
+
}
|
|
517
|
+
add_case_report(info, case$sigmarkers, siggenes)
|
|
518
|
+
|
|
519
|
+
if (info$section %in% overlap) {
|
|
520
|
+
if (is.null(overlaps[[info$section]])) {
|
|
521
|
+
overlaps[[info$section]] <<- list()
|
|
522
|
+
}
|
|
523
|
+
overlaps[[info$section]][[info$case]] <<- siggenes
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
|
|
459
529
|
do_case <- function(casename) {
|
|
460
530
|
log_info("Dealing with case: {casename}...")
|
|
461
531
|
|
|
532
|
+
if (isTRUE(cases[[casename]]$findall)) {
|
|
533
|
+
do_case_findall(casename)
|
|
534
|
+
return()
|
|
535
|
+
}
|
|
536
|
+
|
|
462
537
|
info <- casename_info(casename, create = TRUE)
|
|
463
538
|
case <- cases[[casename]]
|
|
464
539
|
# ident1
|
|
@@ -507,7 +582,10 @@ do_case <- function(casename) {
|
|
|
507
582
|
siggenes <- do_enrich(info, markers, case$sigmarkers, case$volcano_genes)
|
|
508
583
|
|
|
509
584
|
if (length(siggenes) > 0) {
|
|
510
|
-
|
|
585
|
+
if (is.null(case$dotplot$assay)) {
|
|
586
|
+
case$dotplot$assay <- assay
|
|
587
|
+
}
|
|
588
|
+
do_dotplot(info, siggenes, case$dotplot, args)
|
|
511
589
|
}
|
|
512
590
|
|
|
513
591
|
if (info$section %in% overlap) {
|
|
@@ -173,8 +173,8 @@ do_one_features = function(name) {
|
|
|
173
173
|
rownames_to_column("Feature") %>%
|
|
174
174
|
select(Feature, everything())
|
|
175
175
|
|
|
176
|
-
exprfile = paste0(slugify(name), ".txt")
|
|
177
|
-
write.table(expr,
|
|
176
|
+
exprfile = file.path(odir, paste0(slugify(name), ".txt"))
|
|
177
|
+
write.table(expr, exprfile, sep="\t", quote=FALSE, row.names=FALSE)
|
|
178
178
|
|
|
179
179
|
add_report(
|
|
180
180
|
list(
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
1
2
|
source("{{biopipen_dir}}/utils/mutate_helpers.R")
|
|
3
|
+
|
|
2
4
|
library(rlang)
|
|
3
5
|
library(tibble)
|
|
4
6
|
library(dplyr)
|
|
@@ -14,7 +16,17 @@ metadata = srt@meta.data
|
|
|
14
16
|
|
|
15
17
|
if (!is.null(metafile)) {
|
|
16
18
|
mdata = read.table(metafile, header=TRUE, row.names=1, sep="\t", check.names=FALSE)
|
|
17
|
-
|
|
19
|
+
ov_cols = intersect(colnames(metadata), colnames(mdata))
|
|
20
|
+
if (length(ov_cols) > 0) {
|
|
21
|
+
log_warn(paste0(
|
|
22
|
+
"The following columns are already present in Seurat object and will be ignored: ",
|
|
23
|
+
paste(ov_cols, collapse=', ')
|
|
24
|
+
))
|
|
25
|
+
}
|
|
26
|
+
metadata = cbind(
|
|
27
|
+
metadata,
|
|
28
|
+
mdata[rownames(metadata), setdiff(colnames(mdata), ov_cols), drop=FALSE]
|
|
29
|
+
)
|
|
18
30
|
}
|
|
19
31
|
|
|
20
32
|
expr = list()
|
|
@@ -11,6 +11,7 @@ immfile = {{in.immfile | r}}
|
|
|
11
11
|
sobjfile = {{in.sobjfile | r}}
|
|
12
12
|
outfile = {{out.outfile | r}}
|
|
13
13
|
metacols = {{envs.metacols | r}}
|
|
14
|
+
prefix = {{envs.prefix | r}}
|
|
14
15
|
|
|
15
16
|
immdata = readRDS(immfile)
|
|
16
17
|
sobj = readRDS(sobjfile)
|
|
@@ -31,7 +32,7 @@ metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
|
|
|
31
32
|
|
|
32
33
|
cldata %>%
|
|
33
34
|
separate_rows(Barcode, sep=";") %>%
|
|
34
|
-
mutate(Barcode = glue("{
|
|
35
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
|
|
35
36
|
|
|
36
37
|
}))
|
|
37
38
|
|
|
@@ -193,7 +193,7 @@ merge_data = function(sam) {
|
|
|
193
193
|
if (!is.null(prefix) && nchar(prefix) > 0) {
|
|
194
194
|
# Replace the placeholder like {Sample} with the data in other columns
|
|
195
195
|
# in the same row
|
|
196
|
-
sdata = sdata %>% mutate(.prefix_len = nchar(glue(
|
|
196
|
+
sdata = sdata %>% mutate(.prefix_len = nchar(glue(prefix)))
|
|
197
197
|
# Remove the prefix in the rownames of sdata
|
|
198
198
|
rownames(sdata) = substring(rownames(sdata), sdata$.prefix_len + 1)
|
|
199
199
|
sdata = sdata %>% select(-.prefix_len)
|
biopipen/scripts/tcr/Immunarch.R
CHANGED
|
@@ -27,6 +27,9 @@ prefix = {{ envs.prefix | r }}
|
|
|
27
27
|
log_info("Loading immdata ...")
|
|
28
28
|
immdata = readRDS(immfile)
|
|
29
29
|
|
|
30
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
31
|
+
if (is.null(prefix)) { prefix = "" }
|
|
32
|
+
|
|
30
33
|
log_info("Expanding immdata ...")
|
|
31
34
|
exdata = expand_immdata(immdata)
|
|
32
35
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
2
3
|
|
|
3
4
|
# Loading 10x data into immunarch
|
|
4
5
|
library(immunarch)
|
|
@@ -13,7 +14,8 @@ rdsfile = {{ out.rdsfile | quote }}
|
|
|
13
14
|
metatxt = {{ out.metatxt | quote }}
|
|
14
15
|
tmpdir = {{ envs.tmpdir | quote }}
|
|
15
16
|
mode = {{ envs.mode | quote }}
|
|
16
|
-
|
|
17
|
+
extracols = {{ envs.extracols | r}}
|
|
18
|
+
prefix = {{ envs.prefix | r }}
|
|
17
19
|
|
|
18
20
|
metadata = read.table(
|
|
19
21
|
metafile,
|
|
@@ -164,27 +166,24 @@ immdata$meta = left_join(
|
|
|
164
166
|
by = "Sample"
|
|
165
167
|
)
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
metadf = do_call(rbind, lapply(seq_len(nrow(immdata$meta)), function(i) {
|
|
170
|
-
# Clones Proportion CDR3.aa Barcode
|
|
171
|
-
# 5 4 0.008583691 CAVRDTGNTPLVF;CASSEYSNQPQHF GTTCGGGCACTTACGA-1;TCTCTAAGTACCAGTT-1
|
|
172
|
-
# 6 4 0.008583691 CALTQAAGNKLTF;CASRPEDLRGQPQHF GCTTGAAGTCGGCACT-1;TACTCGCTCCTAAGTG-1
|
|
173
|
-
cldata = immdata$data[[i]][, unique(c(metacols, "Barcode"))]
|
|
174
|
-
# # A tibble: 4 × 5
|
|
175
|
-
# Sample Patient Timepoint Tissue
|
|
176
|
-
# <chr> <chr> <chr> <chr>
|
|
177
|
-
# 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
|
|
178
|
-
mdata = as.list(immdata$meta[i, , drop=FALSE])
|
|
179
|
-
for (mname in names(mdata)) {
|
|
180
|
-
assign(mname, mdata[[mname]])
|
|
181
|
-
}
|
|
169
|
+
immdata$prefix = prefix
|
|
182
170
|
|
|
183
|
-
|
|
184
|
-
separate_rows(Barcode, sep=";") %>%
|
|
185
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
186
|
-
mutate(Barcode = glue("{{envs.prefix}}{Barcode}")) %>%
|
|
187
|
-
column_to_rownames("Barcode")
|
|
171
|
+
saveRDS(immdata, file=rdsfile)
|
|
188
172
|
|
|
189
|
-
|
|
190
|
-
|
|
173
|
+
exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
174
|
+
distinct(Sample, Barcode, .keep_all = TRUE) %>%
|
|
175
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
176
|
+
select(any_of(c(
|
|
177
|
+
colnames(immdata$meta),
|
|
178
|
+
"Barcode",
|
|
179
|
+
"CDR3.aa",
|
|
180
|
+
"Clones",
|
|
181
|
+
"Proportion",
|
|
182
|
+
"V.name",
|
|
183
|
+
"D.name",
|
|
184
|
+
"J.name",
|
|
185
|
+
extracols
|
|
186
|
+
))) %>%
|
|
187
|
+
column_to_rownames("Barcode")
|
|
188
|
+
|
|
189
|
+
write.table(exdata, metatxt, sep="\t", quote=FALSE, row.names=TRUE, col.names=TRUE)
|
|
@@ -3,11 +3,13 @@
|
|
|
3
3
|
# python = Sys.which({{envs.python | r}})
|
|
4
4
|
# Sys.setenv(RETICULATE_PYTHON = python)
|
|
5
5
|
# library(reticulate)
|
|
6
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
6
7
|
|
|
7
8
|
library(immunarch)
|
|
8
9
|
library(dplyr)
|
|
9
10
|
library(tidyr)
|
|
10
11
|
library(tibble)
|
|
12
|
+
library(glue)
|
|
11
13
|
|
|
12
14
|
immfile = {{in.immfile | r}}
|
|
13
15
|
outdir = normalizePath({{job.outdir | r}})
|
|
@@ -17,6 +19,7 @@ tool = {{envs.tool | r}}
|
|
|
17
19
|
python = {{envs.python | r}}
|
|
18
20
|
on_multi = {{envs.on_multi | r}}
|
|
19
21
|
args = {{envs.args | r}}
|
|
22
|
+
prefix = {{envs.prefix | r}}
|
|
20
23
|
|
|
21
24
|
setwd(outdir)
|
|
22
25
|
|
|
@@ -26,17 +29,13 @@ if (on_multi) {
|
|
|
26
29
|
} else {
|
|
27
30
|
seqdata = immdata$data
|
|
28
31
|
}
|
|
32
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
33
|
+
if (is.null(prefix)) { prefix = "" }
|
|
29
34
|
|
|
30
35
|
get_cdr3aa_df = function() {
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
select(Barcode, CDR3.aa) %>%
|
|
35
|
-
separate_rows(Barcode, sep = ";") %>%
|
|
36
|
-
mutate(Barcode = paste0(sample, "_", Barcode))
|
|
37
|
-
out = bind_rows(out, tmpdf)
|
|
38
|
-
}
|
|
39
|
-
out
|
|
36
|
+
expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
37
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
38
|
+
select(Barcode, CDR3.aa)
|
|
40
39
|
}
|
|
41
40
|
cdr3aa_df = get_cdr3aa_df()
|
|
42
41
|
|
biopipen/scripts/tcr/TESSA.R
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
2
3
|
|
|
3
4
|
library(glue)
|
|
4
5
|
library(dplyr)
|
|
5
6
|
library(tidyr)
|
|
7
|
+
library(tibble)
|
|
6
8
|
library(immunarch)
|
|
7
9
|
library(Seurat)
|
|
8
10
|
library(ggplot2)
|
|
@@ -13,6 +15,7 @@ exprfile <- {{in.srtobj | r}}
|
|
|
13
15
|
outfile <- {{out.outfile | r}}
|
|
14
16
|
joboutdir <- {{job.outdir | r}}
|
|
15
17
|
python <- {{envs.python | r}}
|
|
18
|
+
prefix <- {{envs.prefix | r}}
|
|
16
19
|
within_sample <- {{envs.within_sample | r}}
|
|
17
20
|
assay <- {{envs.assay | r}}
|
|
18
21
|
predefined_b <- {{envs.predefined_b | r}}
|
|
@@ -29,34 +32,21 @@ if (!dir.exists(tessa_dir)) dir.create(tessa_dir)
|
|
|
29
32
|
### Start preparing input files for TESSA
|
|
30
33
|
# Prepare input files
|
|
31
34
|
log_info("Preparing TCR input file ...")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
# Sample Patient Timepoint Tissue
|
|
47
|
-
# <chr> <chr> <chr> <chr>
|
|
48
|
-
# 1 MC1685Pt011-Baseline-PB MC1685Pt011 Baseline PB
|
|
49
|
-
mdata = as.list(immdata$meta[i, , drop=FALSE])
|
|
50
|
-
for (mname in names(mdata)) {
|
|
51
|
-
assign(mname, mdata[[mname]])
|
|
52
|
-
}
|
|
35
|
+
# If immfile endswith .rds, then it is an immunarch object
|
|
36
|
+
if (endsWith(tolower(immfile), ".rds")) {
|
|
37
|
+
immdata <- readRDS(immfile)
|
|
38
|
+
if (is.null(prefix)) { prefix = immdata$prefix }
|
|
39
|
+
if (is.null(prefix)) { prefix = "" }
|
|
40
|
+
tcrdata <- expand_immdata(immdata) %>%
|
|
41
|
+
mutate(Barcode = glue(paste0(prefix, "{Barcode}")))
|
|
42
|
+
rm(immdata)
|
|
43
|
+
} else {
|
|
44
|
+
tcrdata <- read.table(immfile, sep="\t", header=TRUE, row.names=1) %>%
|
|
45
|
+
rownames_to_column("Barcode")
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
has_VJ <- "V.name" %in% colnames(tcrdata) && "J.name" %in% colnames(tcrdata)
|
|
53
49
|
|
|
54
|
-
cldata %>%
|
|
55
|
-
separate_rows(Barcode, sep=";") %>%
|
|
56
|
-
# Just in case there are duplicated barcodes
|
|
57
|
-
distinct(Barcode, .keep_all = TRUE) %>%
|
|
58
|
-
mutate(Barcode = glue("{{envs.prefix}}{Barcode}"), sample = Sample)
|
|
59
|
-
}))
|
|
60
50
|
if (has_VJ) {
|
|
61
51
|
tcrdata <- tcrdata %>% dplyr::mutate(
|
|
62
52
|
v_gene = sub("-\\d+$", "", V.name),
|
|
@@ -66,13 +56,13 @@ if (has_VJ) {
|
|
|
66
56
|
cdr3 = CDR3.aa,
|
|
67
57
|
v_gene,
|
|
68
58
|
j_gene,
|
|
69
|
-
sample
|
|
59
|
+
sample = Sample
|
|
70
60
|
)
|
|
71
61
|
} else {
|
|
72
62
|
tcrdata <- tcrdata %>% dplyr::select(
|
|
73
63
|
contig_id = Barcode,
|
|
74
64
|
cdr3 = CDR3.aa,
|
|
75
|
-
sample
|
|
65
|
+
sample = Sample
|
|
76
66
|
)
|
|
77
67
|
}
|
|
78
68
|
|
|
@@ -101,7 +91,10 @@ if (length(unused_expr_cells) > 0) {
|
|
|
101
91
|
log_warn(glue("{length(unused_expr_cells)}/{ncol(expr)} expression cells are not used."))
|
|
102
92
|
}
|
|
103
93
|
if (length(cell_ids) == 0) {
|
|
104
|
-
stop(
|
|
94
|
+
stop(paste0(
|
|
95
|
+
"No common cells between TCR and expression data. ",
|
|
96
|
+
"Are you using the correct `envs.prefix` here or in `ImmunarchLoading`?"
|
|
97
|
+
))
|
|
105
98
|
}
|
|
106
99
|
tcrdata <- tcrdata[tcrdata$contig_id %in% cell_ids, , drop=FALSE]
|
|
107
100
|
expr <- as.matrix(expr)[, tcrdata$contig_id, drop=FALSE]
|
biopipen/utils/common_docstrs.py
CHANGED
|
@@ -46,11 +46,14 @@ Those functions take following arguments:
|
|
|
46
46
|
* `group-by`: The column name in metadata to group the cells.
|
|
47
47
|
* `idents`: The first group or both groups of cells to compare (value in `group-by` column). If only the first group is given, the rest of the cells (with non-NA in `group-by` column) will be used as the second group.
|
|
48
48
|
* `subset`: An expression to subset the cells, will be passed to `dplyr::filter()`. Default is `TRUE` (no filtering).
|
|
49
|
+
* `each`: A column name (without quotes) in metadata to split the cells.
|
|
50
|
+
Each comparison will be done for each value in this column.
|
|
49
51
|
* `id`: The column name in metadata for the group ids (i.e. `CDR3.aa`).
|
|
50
52
|
* `compare`: Either a (numeric) column name (i.e. `Clones`) in metadata to compare between groups, or `.n` to compare the number of cells in each group.
|
|
51
53
|
If numeric column is given, the values should be the same for all cells in the same group.
|
|
52
54
|
This will not be checked (only the first value is used).
|
|
53
55
|
* `uniq`: Whether to return unique ids or not. Default is `TRUE`. If `FALSE`, you can mutate the meta data frame with the returned ids. For example, `df |> mutate(expanded = expanded(...))`.
|
|
56
|
+
* `debug`: Return the data frame with intermediate columns instead of the ids. Default is `FALSE`.
|
|
54
57
|
* `order`: The order of the returned ids. It could be `sum` or `diff`, which is the sum or diff of the `compare` between idents.
|
|
55
58
|
Two kinds of modifiers can be added, including `desc` and `abs`.
|
|
56
59
|
For example, `sum,desc` means the sum of `compare` between idents in descending order.
|