biopipen 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/proc.py +7 -0
- biopipen/ns/cellranger.py +2 -2
- biopipen/ns/scrna.py +15 -20
- biopipen/ns/tcr.py +8 -6
- biopipen/scripts/scrna/ScFGSEA.R +6 -0
- biopipen/scripts/scrna/SeuratClustering.R +102 -85
- biopipen/scripts/scrna/SeuratPreparing.R +21 -10
- biopipen/scripts/scrna/SeuratSubClustering.R +81 -97
- biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R +5 -5
- biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R +3 -3
- biopipen/scripts/tcr/Immunarch-basic.R +2 -3
- biopipen/scripts/tcr/Immunarch-clonality.R +2 -3
- biopipen/scripts/tcr/Immunarch-diversity.R +70 -49
- biopipen/scripts/tcr/Immunarch-geneusage.R +2 -3
- biopipen/scripts/tcr/Immunarch-kmer.R +2 -3
- biopipen/scripts/tcr/Immunarch-overlap.R +2 -3
- biopipen/scripts/tcr/Immunarch-spectratyping.R +2 -3
- biopipen/scripts/tcr/Immunarch-tracking.R +2 -3
- biopipen/scripts/tcr/Immunarch-vjjunc.R +2 -3
- biopipen/scripts/tcr/Immunarch.R +1 -1
- biopipen/scripts/tcr/ImmunarchLoading.R +2 -0
- biopipen/scripts/tcr/TCRClustering.R +6 -2
- biopipen/scripts/tcr/TESSA.R +3 -1
- biopipen/scripts/tcr/immunarch-patched.R +142 -0
- biopipen/utils/caching.R +44 -0
- {biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/METADATA +8 -7
- {biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/RECORD +30 -28
- {biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/WHEEL +0 -0
- {biopipen-0.23.7.dist-info → biopipen-0.24.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
source("{{biopipen_dir}}/utils/caching.R")
|
|
2
3
|
|
|
3
4
|
library(Seurat)
|
|
4
5
|
library(future)
|
|
@@ -33,40 +34,10 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
|
|
|
33
34
|
log_info("Reading Seurat object ...")
|
|
34
35
|
srtobj <- readRDS(srtfile)
|
|
35
36
|
|
|
36
|
-
if (isTRUE(envs$cache)) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
41
|
-
log_info("Obtainning the signature ...")
|
|
42
|
-
envs2 <- envs
|
|
43
|
-
envs2$ncores <- NULL
|
|
44
|
-
sig <- c(
|
|
45
|
-
capture.output(str(srtobj)),
|
|
46
|
-
"\n\n-------------------\n\n",
|
|
47
|
-
capture.output(str(envs2)),
|
|
48
|
-
"\n"
|
|
49
|
-
)
|
|
50
|
-
digested_sig <- digest::digest(sig, algo = "md5")
|
|
51
|
-
cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
|
|
52
|
-
if (file.exists(cached_file)) {
|
|
53
|
-
log_info("Using cached results {cached_file}")
|
|
54
|
-
# copy cached file to rdsfile
|
|
55
|
-
file.copy(cached_file, rdsfile, copy.date = TRUE)
|
|
56
|
-
quit()
|
|
57
|
-
} else {
|
|
58
|
-
log_info("Cached results not found.")
|
|
59
|
-
log_info("- Current signature: {digested_sig}")
|
|
60
|
-
# print(sig)
|
|
61
|
-
# sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
|
|
62
|
-
# for (sigfile in sigfiles) {
|
|
63
|
-
# log_info("- Found cached signature file: {sigfile}")
|
|
64
|
-
# cached_sig <- readLines(sigfile)
|
|
65
|
-
# log_info("- Cached signature:")
|
|
66
|
-
# print(cached_sig)
|
|
67
|
-
# }
|
|
68
|
-
writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
|
|
69
|
-
}
|
|
37
|
+
if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
|
|
38
|
+
if (length(envs$cache) > 1) {
|
|
39
|
+
log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
|
|
40
|
+
envs$cache <- envs$cache[1]
|
|
70
41
|
}
|
|
71
42
|
|
|
72
43
|
if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
|
|
@@ -102,30 +73,66 @@ for (key in names(envs$cases)) {
|
|
|
102
73
|
}
|
|
103
74
|
|
|
104
75
|
log_info("- Subsetting ...")
|
|
105
|
-
sobj <-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
case$FindNeighbors$object <- sobj
|
|
119
|
-
if (is.null(case$FindNeighbors$reduction)) {
|
|
120
|
-
case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
76
|
+
sobj <- tryCatch({
|
|
77
|
+
srtobj %>% filter(!!parse_expr(case$subset))
|
|
78
|
+
}, error = function(e) {
|
|
79
|
+
stop(paste0(" Error in subset: ", e$message))
|
|
80
|
+
})
|
|
81
|
+
sobj_sig <- capture.output(str(sobj))
|
|
82
|
+
dig_sig <- digest::digest(sobj_sig, algo = "md5")
|
|
83
|
+
dig_sig <- substr(dig_sig, 1, 8)
|
|
84
|
+
cache_dir <- NULL
|
|
85
|
+
if (is.character(envs$cache)) {
|
|
86
|
+
cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
|
|
87
|
+
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
|
|
88
|
+
writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
|
|
121
89
|
}
|
|
122
|
-
sobj <- do_call(FindNeighbors, case$FindNeighbors)
|
|
123
90
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
91
|
+
cached <- get_cached(case$RunUMAP, "RunUMAP", cache_dir)
|
|
92
|
+
reduc_name <- case$RunUMAP$reduction.name %||% "umap"
|
|
93
|
+
if (is.null(cached$data)) {
|
|
94
|
+
log_info("- Running RunUMAP ...")
|
|
95
|
+
umap_args <- list_setdefault(
|
|
96
|
+
case$RunUMAP,
|
|
97
|
+
object = sobj,
|
|
98
|
+
dims = 1:30,
|
|
99
|
+
reduction = sobj@misc$integrated_new_reduction %||% "pca"
|
|
100
|
+
)
|
|
101
|
+
ncells <- ncol(sobj)
|
|
102
|
+
umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
|
|
103
|
+
umap_method <- case$RunUMAP$umap.method %||% "uwot"
|
|
104
|
+
if (umap_method == "uwot" && is.null(case$RunUMAP$n.neighbors)) {
|
|
105
|
+
# https://github.com/satijalab/seurat/issues/4312
|
|
106
|
+
umap_args$n.neighbors <- min(ncells - 1, 30)
|
|
107
|
+
}
|
|
108
|
+
sobj <- do_call(RunUMAP, umap_args)
|
|
109
|
+
cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
|
|
110
|
+
save_to_cache(cached, "RunUMAP", cache_dir)
|
|
111
|
+
} else {
|
|
112
|
+
log_info("- Loading cached RunUMAP ...")
|
|
113
|
+
sobj@reductions[[reduc_name]] <- cached$data$reduc
|
|
114
|
+
sobj@commands <- cached$data$commands
|
|
115
|
+
}
|
|
116
|
+
reduc <- cached$data$reduc
|
|
117
|
+
|
|
118
|
+
cached <- get_cached(case$FindNeighbors, "FindNeighbors", cache_dir)
|
|
119
|
+
if (is.null(cached$data)) {
|
|
120
|
+
log_info("- Running FindNeighbors ...")
|
|
121
|
+
case$FindNeighbors$object <- sobj
|
|
122
|
+
if (is.null(case$FindNeighbors$reduction)) {
|
|
123
|
+
case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
|
|
124
|
+
}
|
|
125
|
+
sobj <- do_call(FindNeighbors, case$FindNeighbors)
|
|
126
|
+
cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
|
|
127
|
+
save_to_cache(cached, "FindNeighbors", cache_dir)
|
|
128
|
+
} else {
|
|
129
|
+
log_info("- Loading cached FindNeighbors ...")
|
|
130
|
+
sobj@graphs <- cached$data$graphs
|
|
131
|
+
sobj@commands <- cached$data$commands
|
|
127
132
|
}
|
|
128
|
-
|
|
133
|
+
|
|
134
|
+
case$FindClusters$random.seed <- case$FindClusters$random.seed %||% 8525
|
|
135
|
+
resolution <- case$FindClusters$resolution %||% 0.8
|
|
129
136
|
if (is.character(resolution)) {
|
|
130
137
|
if (grepl(",", resolution)) {
|
|
131
138
|
resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
|
|
@@ -133,53 +140,30 @@ for (key in names(envs$cases)) {
|
|
|
133
140
|
resolution <- as.numeric(resolution)
|
|
134
141
|
}
|
|
135
142
|
}
|
|
136
|
-
|
|
137
|
-
case$FindClusters$resolution <-
|
|
138
|
-
case$FindClusters
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
log_info("- Found {length(ident_table)} clusters:")
|
|
145
|
-
print(ident_table)
|
|
146
|
-
cat("\n")
|
|
147
|
-
|
|
148
|
-
log_info("- Updating meta.data with subclusters...")
|
|
149
|
-
srtobj <- AddMetaData(srtobj, metadata = sobj@meta.data[, key, drop = FALSE])
|
|
150
|
-
srtobj[[paste0("sub_umap_", key)]] <- sobj@reductions$umap
|
|
151
|
-
} else {
|
|
152
|
-
log_info("- Multiple resolutions detected ...")
|
|
153
|
-
log_info("")
|
|
154
|
-
metadata <- NULL
|
|
155
|
-
for (res in resolution) {
|
|
156
|
-
findclusters_args <- case$FindClusters
|
|
157
|
-
findclusters_args$resolution <- res
|
|
158
|
-
findclusters_args$object <- sobj
|
|
159
|
-
sobj1 <- do_call(FindClusters, findclusters_args)
|
|
160
|
-
res_key <- paste0(key, "_", res)
|
|
143
|
+
for (res in resolution) {
|
|
144
|
+
case$FindClusters$resolution <- res
|
|
145
|
+
cached <- get_cached(case$FindClusters, paste0("FindClusters_", res), cache_dir)
|
|
146
|
+
res_key <- paste0("seurat_clusters_", res)
|
|
147
|
+
if (is.null(cached$data)) {
|
|
148
|
+
log_info("- Running FindClusters at resolution: {res} ...")
|
|
149
|
+
case$FindClusters$object <- sobj
|
|
150
|
+
sobj1 <- do_call(FindClusters, case$FindClusters)
|
|
161
151
|
levels(sobj1$seurat_clusters) <- paste0("s", as.numeric(levels(sobj1$seurat_clusters)) + 1)
|
|
162
|
-
Idents(sobj1) <- "seurat_clusters"
|
|
163
152
|
sobj1[[res_key]] <- sobj1$seurat_clusters
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
log_info("- Updating meta.data with subclusters...")
|
|
170
|
-
metadata <- sobj1@meta.data[, res_key, drop = FALSE]
|
|
171
|
-
srtobj <- AddMetaData(srtobj, metadata = metadata)
|
|
172
|
-
srtobj[[paste0("sub_umap_", res_key)]] <- sobj1@reductions$umap
|
|
153
|
+
cached$data <- sobj1@meta.data[, res_key, drop = FALSE]
|
|
154
|
+
save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
|
|
155
|
+
} else {
|
|
156
|
+
log_info("- Using cached FindClusters at resolution: {res} ...")
|
|
173
157
|
}
|
|
174
|
-
|
|
175
|
-
|
|
158
|
+
ident_table <- table(cached$data[[res_key]])
|
|
159
|
+
log_info(" Found {length(ident_table)} clusters")
|
|
160
|
+
print(ident_table)
|
|
161
|
+
cat("\n")
|
|
176
162
|
}
|
|
163
|
+
log_info("- Updating meta.data with subclusters...")
|
|
164
|
+
srtobj <- AddMetaData(srtobj, metadata = cached$data, col.name = key)
|
|
165
|
+
srtobj[[paste0("sub_umap_", key)]] <- reduc
|
|
177
166
|
}
|
|
178
167
|
|
|
179
168
|
log_info("Saving results ...")
|
|
180
169
|
saveRDS(srtobj, file = rdsfile)
|
|
181
|
-
|
|
182
|
-
if (is.character(envs$cache) && nchar(envs$cache) > 0) {
|
|
183
|
-
log_info("Caching results to {cached_file} ...")
|
|
184
|
-
invisible(file.copy(rdsfile, cached_file, overwrite = TRUE))
|
|
185
|
-
}
|
|
@@ -54,7 +54,7 @@ do_one_comparison <- function(
|
|
|
54
54
|
subset_prefix,
|
|
55
55
|
groupname
|
|
56
56
|
) {
|
|
57
|
-
|
|
57
|
+
log_info(paste(" Design: {compname} ({case}, {control})"))
|
|
58
58
|
case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
|
|
59
59
|
case_obj = tryCatch({
|
|
60
60
|
eval(parse(text = case_code))
|
|
@@ -62,7 +62,7 @@ do_one_comparison <- function(
|
|
|
62
62
|
NULL
|
|
63
63
|
})
|
|
64
64
|
if (is.null(case_obj)) {
|
|
65
|
-
|
|
65
|
+
log_warn(" Skip (not enough cells in case)")
|
|
66
66
|
return (NULL)
|
|
67
67
|
}
|
|
68
68
|
control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
|
|
@@ -72,7 +72,7 @@ do_one_comparison <- function(
|
|
|
72
72
|
NULL
|
|
73
73
|
})
|
|
74
74
|
if (is.null(control_obj)) {
|
|
75
|
-
|
|
75
|
+
log_warn(" Skip (not enough cells in control)")
|
|
76
76
|
add_report(
|
|
77
77
|
list(kind = "error", content = "Not enough cells in control"),
|
|
78
78
|
h1 = groupname,
|
|
@@ -86,7 +86,7 @@ do_one_comparison <- function(
|
|
|
86
86
|
odir = file.path(groupdir, paste0(subset_prefix, compname))
|
|
87
87
|
dir.create(odir, showWarnings = FALSE)
|
|
88
88
|
if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
|
|
89
|
-
|
|
89
|
+
log_warn(" Skip (not enough cells)")
|
|
90
90
|
add_report(
|
|
91
91
|
list(kind = "error", content = "Not enough cells"),
|
|
92
92
|
h1 = groupname,
|
|
@@ -131,7 +131,7 @@ do_one_comparison <- function(
|
|
|
131
131
|
}
|
|
132
132
|
|
|
133
133
|
do_one_group <- function(group) {
|
|
134
|
-
|
|
134
|
+
log_info("- Group: {group} ...")
|
|
135
135
|
|
|
136
136
|
genes = intersect(metabolics, rownames(sobj))
|
|
137
137
|
group_code = paste0(
|
|
@@ -71,7 +71,7 @@ num_of_pathways <- function(gmtfile, overlapgenes) {
|
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
74
|
-
|
|
74
|
+
log_info(" Processing subset: {s} ...")
|
|
75
75
|
if (is.null(s)) {
|
|
76
76
|
subset_dir <- file.path(outdir, "ALL")
|
|
77
77
|
dir.create(subset_dir, showWarnings = FALSE)
|
|
@@ -118,7 +118,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
118
118
|
|
|
119
119
|
for (pi in seq_along(pathway_names)) {
|
|
120
120
|
p <- pathway_names[pi]
|
|
121
|
-
|
|
121
|
+
log_info(" * Pathway ({pi}): {p} ...")
|
|
122
122
|
genes <- pathways[[p]]
|
|
123
123
|
genes_comm <- intersect(genes, rownames(subset_obj))
|
|
124
124
|
genes_expressed <- names(rowSums(subset_obj)[rowSums(subset_obj) > 0])
|
|
@@ -312,7 +312,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
|
|
|
312
312
|
}
|
|
313
313
|
|
|
314
314
|
do_one_subset_col <- function(subset_col, subset_prefix) {
|
|
315
|
-
|
|
315
|
+
log_info("- Handling subset column: {subset_col} ...")
|
|
316
316
|
if (is.null(subset_col)) {
|
|
317
317
|
do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
|
|
318
318
|
} else {
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Basic analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
volumes = {{envs.volumes | r}}
|
|
10
9
|
lens = {{envs.lens | r}}
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Clonality analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
top_clones = {{envs.top_clones | r}}
|
|
10
9
|
rare_clones = {{envs.rare_clones | r}}
|
|
@@ -1,30 +1,34 @@
|
|
|
1
1
|
# Diversity estimation
|
|
2
|
+
source("{{biopipen_dir}}/scripts/tcr/immunarch-patched.R")
|
|
2
3
|
# https://immunarch.com/articles/web_only/v6_diversity.html
|
|
3
4
|
|
|
4
5
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
6
|
+
log_info("# Diversity estimation")
|
|
7
|
+
log_info("-----------------------------------")
|
|
8
8
|
|
|
9
9
|
# Other variables are loaded in the parent template
|
|
10
10
|
# immdata is already loaded, meta is mutated
|
|
11
|
-
div_method = {{envs.divs.method | r}}
|
|
12
|
-
div_by = {{envs.divs.by | r}}
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
11
|
+
div_method = {{envs.divs.method | default: "gini" | r}}
|
|
12
|
+
div_by = {{envs.divs.by | default: None | r}}
|
|
13
|
+
div_plot_type = {{envs.divs.plot_type | default: "bar" | r}}
|
|
14
|
+
div_order = {{envs.divs.order | default: [] | r}}
|
|
15
|
+
div_args = {{envs.divs.args | default: {} | r: todot="-"}}
|
|
16
|
+
div_test = {{envs.divs.test | default: None | r}}
|
|
17
|
+
div_cases = {{envs.divs.cases | default: {} | r: todot="-"}}
|
|
18
|
+
div_devpars = {{envs.divs.devpars | default: None | r}}
|
|
19
|
+
div_separate_by = {{envs.divs.separate_by | default: None | r}}
|
|
20
|
+
div_split_by = {{envs.divs.split_by | default: None | r}}
|
|
21
|
+
div_split_order = {{envs.divs.split_order | default: None | r}}
|
|
22
|
+
div_align_x = {{envs.divs.align_x | default: False | r}}
|
|
23
|
+
div_align_y = {{envs.divs.align_y | default: False | r}}
|
|
24
|
+
div_subset = {{envs.divs.subset | default: None | r}}
|
|
25
|
+
div_log = {{envs.divs.log | default: False | r}}
|
|
26
|
+
div_ncol = {{envs.divs.ncol | default: 2 | r}}
|
|
27
|
+
div_ymin = {{envs.divs.ymin | default: None | r}}
|
|
28
|
+
div_ymax = {{envs.divs.ymax | default: None | r}}
|
|
29
|
+
|
|
30
|
+
div_test = div_test %||% list(method = "none", padjust = "none")
|
|
31
|
+
div_devpars = div_devpars %||% list(res = 100, width = 800, height = 800)
|
|
28
32
|
|
|
29
33
|
div_dir = file.path(outdir, "diversity")
|
|
30
34
|
dir.create(div_dir, showWarnings = FALSE)
|
|
@@ -38,6 +42,7 @@ update_case = function(case, name) {
|
|
|
38
42
|
if (!is.null(case$by) && nchar(case$by) > 0) {
|
|
39
43
|
case$by = unlist(strsplit(case$by, ",")) %>% trimws()
|
|
40
44
|
}
|
|
45
|
+
case$plot_type <- case$plot_type %||% div_plot_type
|
|
41
46
|
case$order <- case$order %||% div_order
|
|
42
47
|
case$args <- case$args %||% div_args
|
|
43
48
|
for (name in names(case$args)) {
|
|
@@ -85,23 +90,6 @@ update_case = function(case, name) {
|
|
|
85
90
|
return (case)
|
|
86
91
|
}
|
|
87
92
|
|
|
88
|
-
# See https://github.com/immunomind/immunarch/pull/341
|
|
89
|
-
vis.immunr_gini <- function(.data, .by = NA, .meta = NA,
|
|
90
|
-
.errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
|
|
91
|
-
.points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
|
|
92
|
-
# repDiversity(..., .method = "gini") generates a matrix
|
|
93
|
-
.data = data.frame(Sample = rownames(.data), Value = .data[, 1])
|
|
94
|
-
vis_bar(
|
|
95
|
-
.data = .data, .by = .by, .meta = .meta,
|
|
96
|
-
.errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
|
|
97
|
-
.points = .points, .test = .test, .signif.label.size = .signif.label.size,
|
|
98
|
-
.defgroupby = "Sample", .grouping.var = "Group",
|
|
99
|
-
.labs = c(NA, "Gini coefficient"),
|
|
100
|
-
.title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
|
|
101
|
-
.legend = NA, .leg.title = NA
|
|
102
|
-
)
|
|
103
|
-
}
|
|
104
|
-
|
|
105
93
|
if (is.null(div_cases) || length(div_cases) == 0) {
|
|
106
94
|
if (is.null(div_method) || length(div_method) == 0 || nchar(div_method) == 0) {
|
|
107
95
|
stop("No method is specified for diversity estimation")
|
|
@@ -176,6 +164,15 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
176
164
|
col.names = TRUE
|
|
177
165
|
)
|
|
178
166
|
|
|
167
|
+
.meta_vals <- function(meta, cols) {
|
|
168
|
+
if (length(cols) == 1) {
|
|
169
|
+
return (meta[[cols]])
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
vlist = lapply(cols, function(.x) meta[[.x]])
|
|
173
|
+
do.call(function(...) paste(..., sep = "; "), vlist)
|
|
174
|
+
}
|
|
175
|
+
|
|
179
176
|
# plot
|
|
180
177
|
# by, order, separate_by, align_y
|
|
181
178
|
n_seps = 1
|
|
@@ -189,11 +186,19 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
189
186
|
metas = metas[intersect(case$split_order, names(metas))]
|
|
190
187
|
}
|
|
191
188
|
ps = lapply(metas, function(meta) {
|
|
192
|
-
.test = length(unique(meta
|
|
193
|
-
p = vis(
|
|
189
|
+
.test = length(unique(.meta_vals(meta, case$by))) > 1
|
|
190
|
+
p = vis(
|
|
191
|
+
filter_div(div, meta$Sample),
|
|
192
|
+
.by = case$by,
|
|
193
|
+
.meta = meta,
|
|
194
|
+
.test = .test,
|
|
195
|
+
.plot.type = case$plot_type
|
|
196
|
+
)
|
|
194
197
|
p = p + xlab(paste0(case$separate_by, ": ", meta[[case$separate_by]][1], ")"))
|
|
195
198
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
196
|
-
p = p + scale_x_discrete(
|
|
199
|
+
p = p + scale_x_discrete(
|
|
200
|
+
limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
|
|
201
|
+
)
|
|
197
202
|
}
|
|
198
203
|
if (!is.null(case$ymin) && !is.null(case$ymax)) {
|
|
199
204
|
p = p + ylim(c(case$ymin, case$ymax))
|
|
@@ -217,10 +222,18 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
217
222
|
}
|
|
218
223
|
.i = 0
|
|
219
224
|
ps = lapply(metas, function(meta) {
|
|
220
|
-
nby = length(unique(meta
|
|
221
|
-
p = vis(
|
|
225
|
+
nby = length(unique(.meta_vals(meta, case$by)))
|
|
226
|
+
p = vis(
|
|
227
|
+
filter_div(div, meta$Sample),
|
|
228
|
+
.by = case$by,
|
|
229
|
+
.meta = meta,
|
|
230
|
+
.test = nby > 1,
|
|
231
|
+
.plot.type = case$plot_type
|
|
232
|
+
)
|
|
222
233
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
223
|
-
p = p + scale_x_discrete(
|
|
234
|
+
p = p + scale_x_discrete(
|
|
235
|
+
limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
|
|
236
|
+
)
|
|
224
237
|
}
|
|
225
238
|
p = p + xlab(meta[[case$split_by]][1]) + theme(
|
|
226
239
|
axis.text.x = element_blank(),
|
|
@@ -253,10 +266,10 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
253
266
|
plots = lapply(ps, function(x) x$p + ylim(c(ymin, ymax)))
|
|
254
267
|
p = wrap_plots(plots, widths = widths, guides = "collect")
|
|
255
268
|
} else {
|
|
256
|
-
.test = length(unique(d$meta
|
|
257
|
-
p = vis(div, .by = case$by, .meta = d$meta, .test = .test)
|
|
269
|
+
.test = length(unique(.meta_vals(d$meta, case$by))) > 1
|
|
270
|
+
p = vis(div, .by = case$by, .meta = d$meta, .test = .test, .plot.type = case$plot_type)
|
|
258
271
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
259
|
-
p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta
|
|
272
|
+
p = p + scale_x_discrete(limits = intersect(case$order, unique(.meta_vals(d$meta, case$by))))
|
|
260
273
|
}
|
|
261
274
|
}
|
|
262
275
|
} else if (!is.null(case$separate_by)) {
|
|
@@ -333,7 +346,9 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
333
346
|
} else {
|
|
334
347
|
p = vis(div)
|
|
335
348
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
336
|
-
p = p + scale_x_discrete(
|
|
349
|
+
p = p + scale_x_discrete(
|
|
350
|
+
limits = intersect(case$order, unique(.meta_vals(d$meta, case$by)))
|
|
351
|
+
)
|
|
337
352
|
}
|
|
338
353
|
}
|
|
339
354
|
|
|
@@ -351,7 +366,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
351
366
|
}
|
|
352
367
|
if (is.null(width)) {
|
|
353
368
|
if (!is.null(case$by) && length(case$by) > 0) {
|
|
354
|
-
width = 200 * length(unique(d$meta
|
|
369
|
+
width = 200 * length(unique(.meta_vals(d$meta, case$by))) + 120
|
|
355
370
|
} else {
|
|
356
371
|
width = 100 * length(unique(d$meta$Sample)) + 120
|
|
357
372
|
}
|
|
@@ -400,7 +415,11 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
|
|
|
400
415
|
"where all values are the same (for example, where everyone has ",
|
|
401
416
|
"the same income). A Gini coefficient of one (or 100 percents ) ",
|
|
402
417
|
"expresses maximal inequality among values (for example where only ",
|
|
403
|
-
"one person has all the income).")
|
|
418
|
+
"one person has all the income)."),
|
|
419
|
+
d50 = paste0(
|
|
420
|
+
"the D50 index. ",
|
|
421
|
+
"It is the number of types that are needed to cover 50% of the total
|
|
422
|
+
abundance.")
|
|
404
423
|
)
|
|
405
424
|
)
|
|
406
425
|
),
|
|
@@ -705,6 +724,8 @@ run_div_case = function(casename) {
|
|
|
705
724
|
run_general(casename, d, case, ddir)
|
|
706
725
|
} else if (case$method == "gini") {
|
|
707
726
|
run_general(casename, d, case, ddir, "V1")
|
|
727
|
+
} else if (case$method == "d50") {
|
|
728
|
+
run_general(casename, d, case, ddir, "Clones")
|
|
708
729
|
} else {
|
|
709
730
|
stop(paste0("Unknown diversity method: ", case$method))
|
|
710
731
|
}
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Gene usage analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
gene_usages = {{ envs.gene_usages | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# K-mer analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
kmers = {{ envs.kmers | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Overlap analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
overlaps = {{ envs.overlaps | r: todot="-" }}
|
|
10
9
|
|
|
@@ -2,9 +2,8 @@
|
|
|
2
2
|
# immfile, outdir, mutaters, immdata, n_samples
|
|
3
3
|
|
|
4
4
|
log_info("")
|
|
5
|
-
log_info("
|
|
6
|
-
log_info("
|
|
7
|
-
log_info("#####################################")
|
|
5
|
+
log_info("# Spectratyping analysis")
|
|
6
|
+
log_info("-----------------------------------")
|
|
8
7
|
|
|
9
8
|
spects = {{ envs.spects | r }}
|
|
10
9
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
log_info("")
|
|
2
|
-
log_info("
|
|
3
|
-
log_info("
|
|
4
|
-
log_info("#####################################")
|
|
2
|
+
log_info("# Clonotype tracking")
|
|
3
|
+
log_info("-----------------------------------")
|
|
5
4
|
|
|
6
5
|
trackings = {{ envs.trackings | r }}
|
|
7
6
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
log_info("")
|
|
2
|
-
log_info("
|
|
3
|
-
log_info("
|
|
4
|
-
log_info("#####################################")
|
|
2
|
+
log_info("# VJ Junction Circos Plots")
|
|
3
|
+
log_info("-----------------------------------")
|
|
5
4
|
|
|
6
5
|
# Already required by immunarch
|
|
7
6
|
library(circlize)
|
biopipen/scripts/tcr/Immunarch.R
CHANGED
|
@@ -34,7 +34,7 @@ log_info("Expanding immdata ...")
|
|
|
34
34
|
exdata = expand_immdata(immdata)
|
|
35
35
|
|
|
36
36
|
log_info("Loading metadata if provided ...")
|
|
37
|
-
if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
|
|
37
|
+
if (!is.null(metafile) && (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS"))) {
|
|
38
38
|
meta = readRDS(metafile)@meta.data
|
|
39
39
|
} else if (!is.null(metafile) && nchar(metafile) > 0) {
|
|
40
40
|
meta = read.table(metafile, sep = "\t", header = TRUE, row.names = 1)
|
|
@@ -144,6 +144,7 @@ for (i in seq_len(nrow(metadata))) {
|
|
|
144
144
|
# file.symlink(normalizePath(annofile), file.path(datadir, paste0(sample, ext)))
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
+
log_info("Loading TCR data ...")
|
|
147
148
|
immdata = repLoad(datadir, .mode=mode)
|
|
148
149
|
if (mode == "single") {
|
|
149
150
|
data = immdata$data
|
|
@@ -178,6 +179,7 @@ immdata$prefix = prefix
|
|
|
178
179
|
|
|
179
180
|
saveRDS(immdata, file=rdsfile)
|
|
180
181
|
|
|
182
|
+
log_info("Saving cell-level data ...")
|
|
181
183
|
exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
|
|
182
184
|
distinct(Sample, Barcode, .keep_all = TRUE) %>%
|
|
183
185
|
mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
# python = Sys.which({{envs.python | r}})
|
|
4
4
|
# Sys.setenv(RETICULATE_PYTHON = python)
|
|
5
5
|
# library(reticulate)
|
|
6
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
6
7
|
source("{{biopipen_dir}}/utils/single_cell.R")
|
|
7
8
|
|
|
8
9
|
library(immunarch)
|
|
@@ -97,7 +98,7 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
|
|
|
97
98
|
}
|
|
98
99
|
|
|
99
100
|
run_clustcr = function() {
|
|
100
|
-
|
|
101
|
+
log_info("Running ClusTCR ...")
|
|
101
102
|
clustcr_dir = file.path(outdir, "ClusTCR_Output")
|
|
102
103
|
dir.create(clustcr_dir, showWarnings = FALSE)
|
|
103
104
|
clustcr_file = prepare_clustcr(clustcr_dir)
|
|
@@ -110,6 +111,7 @@ run_clustcr = function() {
|
|
|
110
111
|
)
|
|
111
112
|
print("Running:")
|
|
112
113
|
print(clustcr_cmd)
|
|
114
|
+
log_debug("- Running command: {clustcr_cmd}")
|
|
113
115
|
rc = system(clustcr_cmd)
|
|
114
116
|
if (rc != 0) {
|
|
115
117
|
quit(status=rc)
|
|
@@ -196,7 +198,7 @@ clean_giana_output = function(giana_outfile, giana_infile) {
|
|
|
196
198
|
}
|
|
197
199
|
|
|
198
200
|
run_giana = function() {
|
|
199
|
-
|
|
201
|
+
log_info("Running GIANA ...")
|
|
200
202
|
giana_srcdir = prepare_giana()
|
|
201
203
|
giana_input = prepare_input()
|
|
202
204
|
giana_outdir = file.path(outdir, "GIANA_Output")
|
|
@@ -228,6 +230,7 @@ run_giana = function() {
|
|
|
228
230
|
)
|
|
229
231
|
print("Running:")
|
|
230
232
|
print(giana_cmd)
|
|
233
|
+
log_debug("- Running command: {giana_cmd}")
|
|
231
234
|
rc = system(giana_cmd)
|
|
232
235
|
if (rc != 0) {
|
|
233
236
|
quit(status=rc)
|
|
@@ -276,4 +279,5 @@ if (tolower(tool) == "clustcr") {
|
|
|
276
279
|
stop(paste("Unknown tool:", tool))
|
|
277
280
|
}
|
|
278
281
|
|
|
282
|
+
log_info("Saving results ...")
|
|
279
283
|
attach_to_immdata(out)
|