biopipen 0.23.7__py3-none-any.whl → 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -1,4 +1,5 @@
1
1
  source("{{biopipen_dir}}/utils/misc.R")
2
+ source("{{biopipen_dir}}/utils/caching.R")
2
3
 
3
4
  library(Seurat)
4
5
  library(future)
@@ -33,40 +34,10 @@ envs$FindNeighbors <- .expand_dims(envs$FindNeighbors)
33
34
  log_info("Reading Seurat object ...")
34
35
  srtobj <- readRDS(srtfile)
35
36
 
36
- if (isTRUE(envs$cache)) {
37
- envs$cache <- joboutdir
38
- }
39
-
40
- if (is.character(envs$cache) && nchar(envs$cache) > 0) {
41
- log_info("Obtainning the signature ...")
42
- envs2 <- envs
43
- envs2$ncores <- NULL
44
- sig <- c(
45
- capture.output(str(srtobj)),
46
- "\n\n-------------------\n\n",
47
- capture.output(str(envs2)),
48
- "\n"
49
- )
50
- digested_sig <- digest::digest(sig, algo = "md5")
51
- cached_file <- file.path(envs$cache, paste0(digested_sig, ".cached.RDS"))
52
- if (file.exists(cached_file)) {
53
- log_info("Using cached results {cached_file}")
54
- # copy cached file to rdsfile
55
- file.copy(cached_file, rdsfile, copy.date = TRUE)
56
- quit()
57
- } else {
58
- log_info("Cached results not found.")
59
- log_info("- Current signature: {digested_sig}")
60
- # print(sig)
61
- # sigfiles <- Sys.glob(file.path(envs$cache, "*.signature.txt"))
62
- # for (sigfile in sigfiles) {
63
- # log_info("- Found cached signature file: {sigfile}")
64
- # cached_sig <- readLines(sigfile)
65
- # log_info("- Cached signature:")
66
- # print(cached_sig)
67
- # }
68
- writeLines(sig, file.path(envs$cache, paste0(digested_sig, ".signature.txt")))
69
- }
37
+ if (isTRUE(envs$cache)) { envs$cache <- joboutdir }
38
+ if (length(envs$cache) > 1) {
39
+ log_warn("Multiple cache directories (envs.cache) detected, using the first one.")
40
+ envs$cache <- envs$cache[1]
70
41
  }
71
42
 
72
43
  if (!is.null(envs$mutaters) && length(envs$mutaters) > 0) {
@@ -102,30 +73,66 @@ for (key in names(envs$cases)) {
102
73
  }
103
74
 
104
75
  log_info("- Subsetting ...")
105
- sobj <- srtobj %>% filter(!!parse_expr(case$subset))
106
-
107
- log_info("- Running RunUMAP ...")
108
- umap_args <- list_setdefault(
109
- case$RunUMAP,
110
- object = sobj,
111
- dims = 1:30,
112
- reduction = sobj@misc$integrated_new_reduction %||% "pca"
113
- )
114
- umap_args$dims <- 1:min(max(umap_args$dims), ncol(sobj) - 1)
115
- sobj <- do_call(RunUMAP, umap_args)
116
-
117
- log_info("- Running FindNeighbors ...")
118
- case$FindNeighbors$object <- sobj
119
- if (is.null(case$FindNeighbors$reduction)) {
120
- case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
76
+ sobj <- tryCatch({
77
+ srtobj %>% filter(!!parse_expr(case$subset))
78
+ }, error = function(e) {
79
+ stop(paste0(" Error in subset: ", e$message))
80
+ })
81
+ sobj_sig <- capture.output(str(sobj))
82
+ dig_sig <- digest::digest(sobj_sig, algo = "md5")
83
+ dig_sig <- substr(dig_sig, 1, 8)
84
+ cache_dir <- NULL
85
+ if (is.character(envs$cache)) {
86
+ cache_dir <- file.path(envs$cache, paste0(dig_sig, ".seurat_cache"))
87
+ dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
88
+ writeLines(sobj_sig, file.path(cache_dir, "signature.txt"))
121
89
  }
122
- sobj <- do_call(FindNeighbors, case$FindNeighbors)
123
90
 
124
- log_info("- Running FindClusters ...")
125
- if (is.null(case$FindClusters$random.seed)) {
126
- case$FindClusters$random.seed <- 8525
91
+ cached <- get_cached(case$RunUMAP, "RunUMAP", cache_dir)
92
+ reduc_name <- case$RunUMAP$reduction.name %||% "umap"
93
+ if (is.null(cached$data)) {
94
+ log_info("- Running RunUMAP ...")
95
+ umap_args <- list_setdefault(
96
+ case$RunUMAP,
97
+ object = sobj,
98
+ dims = 1:30,
99
+ reduction = sobj@misc$integrated_new_reduction %||% "pca"
100
+ )
101
+ ncells <- ncol(sobj)
102
+ umap_args$dims <- 1:min(max(umap_args$dims), ncells - 1)
103
+ umap_method <- case$RunUMAP$umap.method %||% "uwot"
104
+ if (umap_method == "uwot" && is.null(case$RunUMAP$n.neighbors)) {
105
+ # https://github.com/satijalab/seurat/issues/4312
106
+ umap_args$n.neighbors <- min(ncells - 1, 30)
107
+ }
108
+ sobj <- do_call(RunUMAP, umap_args)
109
+ cached$data <- list(reduc = sobj@reductions[[reduc_name]], commands = sobj@commands)
110
+ save_to_cache(cached, "RunUMAP", cache_dir)
111
+ } else {
112
+ log_info("- Loading cached RunUMAP ...")
113
+ sobj@reductions[[reduc_name]] <- cached$data$reduc
114
+ sobj@commands <- cached$data$commands
115
+ }
116
+ reduc <- cached$data$reduc
117
+
118
+ cached <- get_cached(case$FindNeighbors, "FindNeighbors", cache_dir)
119
+ if (is.null(cached$data)) {
120
+ log_info("- Running FindNeighbors ...")
121
+ case$FindNeighbors$object <- sobj
122
+ if (is.null(case$FindNeighbors$reduction)) {
123
+ case$FindNeighbors$reduction <- sobj@misc$integrated_new_reduction %||% "pca"
124
+ }
125
+ sobj <- do_call(FindNeighbors, case$FindNeighbors)
126
+ cached$data <- list(graphs = sobj@graphs, commands = sobj@commands)
127
+ save_to_cache(cached, "FindNeighbors", cache_dir)
128
+ } else {
129
+ log_info("- Loading cached FindNeighbors ...")
130
+ sobj@graphs <- cached$data$graphs
131
+ sobj@commands <- cached$data$commands
127
132
  }
128
- resolution <- case$FindClusters$resolution
133
+
134
+ case$FindClusters$random.seed <- case$FindClusters$random.seed %||% 8525
135
+ resolution <- case$FindClusters$resolution %||% 0.8
129
136
  if (is.character(resolution)) {
130
137
  if (grepl(",", resolution)) {
131
138
  resolution <- as.numeric(trimws(unlist(strsplit(resolution, ","))))
@@ -133,53 +140,30 @@ for (key in names(envs$cases)) {
133
140
  resolution <- as.numeric(resolution)
134
141
  }
135
142
  }
136
- if (is.null(resolution) || length(resolution) == 1) {
137
- case$FindClusters$resolution <- resolution
138
- case$FindClusters$object <- sobj
139
- sobj <- do_call(FindClusters, case$FindClusters)
140
- levels(sobj$seurat_clusters) <- paste0("s", as.numeric(levels(sobj$seurat_clusters)) + 1)
141
- Idents(sobj) <- "seurat_clusters"
142
- sobj[[key]] <- sobj$seurat_clusters
143
- ident_table <- table(sobj[[key]])
144
- log_info("- Found {length(ident_table)} clusters:")
145
- print(ident_table)
146
- cat("\n")
147
-
148
- log_info("- Updating meta.data with subclusters...")
149
- srtobj <- AddMetaData(srtobj, metadata = sobj@meta.data[, key, drop = FALSE])
150
- srtobj[[paste0("sub_umap_", key)]] <- sobj@reductions$umap
151
- } else {
152
- log_info("- Multiple resolutions detected ...")
153
- log_info("")
154
- metadata <- NULL
155
- for (res in resolution) {
156
- findclusters_args <- case$FindClusters
157
- findclusters_args$resolution <- res
158
- findclusters_args$object <- sobj
159
- sobj1 <- do_call(FindClusters, findclusters_args)
160
- res_key <- paste0(key, "_", res)
143
+ for (res in resolution) {
144
+ case$FindClusters$resolution <- res
145
+ cached <- get_cached(case$FindClusters, paste0("FindClusters_", res), cache_dir)
146
+ res_key <- paste0("seurat_clusters_", res)
147
+ if (is.null(cached$data)) {
148
+ log_info("- Running FindClusters at resolution: {res} ...")
149
+ case$FindClusters$object <- sobj
150
+ sobj1 <- do_call(FindClusters, case$FindClusters)
161
151
  levels(sobj1$seurat_clusters) <- paste0("s", as.numeric(levels(sobj1$seurat_clusters)) + 1)
162
- Idents(sobj1) <- "seurat_clusters"
163
152
  sobj1[[res_key]] <- sobj1$seurat_clusters
164
- ident_table <- table(sobj1[[res_key]])
165
- log_info("- Found {length(ident_table)} at resolution: {res}:")
166
- print(ident_table)
167
- cat("\n")
168
-
169
- log_info("- Updating meta.data with subclusters...")
170
- metadata <- sobj1@meta.data[, res_key, drop = FALSE]
171
- srtobj <- AddMetaData(srtobj, metadata = metadata)
172
- srtobj[[paste0("sub_umap_", res_key)]] <- sobj1@reductions$umap
153
+ cached$data <- sobj1@meta.data[, res_key, drop = FALSE]
154
+ save_to_cache(cached, paste0("FindClusters_", res), cache_dir)
155
+ } else {
156
+ log_info("- Using cached FindClusters at resolution: {res} ...")
173
157
  }
174
- srtobj <- AddMetaData(srtobj, metadata = metadata, col.name = key)
175
- srtobj[[paste0("sub_umap_", key)]] <- sobj1@reductions$umap
158
+ ident_table <- table(cached$data[[res_key]])
159
+ log_info(" Found {length(ident_table)} clusters")
160
+ print(ident_table)
161
+ cat("\n")
176
162
  }
163
+ log_info("- Updating meta.data with subclusters...")
164
+ srtobj <- AddMetaData(srtobj, metadata = cached$data, col.name = key)
165
+ srtobj[[paste0("sub_umap_", key)]] <- reduc
177
166
  }
178
167
 
179
168
  log_info("Saving results ...")
180
169
  saveRDS(srtobj, file = rdsfile)
181
-
182
- if (is.character(envs$cache) && nchar(envs$cache) > 0) {
183
- log_info("Caching results to {cached_file} ...")
184
- invisible(file.copy(rdsfile, cached_file, overwrite = TRUE))
185
- }
@@ -54,7 +54,7 @@ do_one_comparison <- function(
54
54
  subset_prefix,
55
55
  groupname
56
56
  ) {
57
- print(paste(" Design:", compname, "(", case, ",", control, ")"))
57
+ log_info(paste(" Design: {compname} ({case}, {control})"))
58
58
  case_code = paste0("subset(obj, subset = ", subset_col, " == '", case, "')")
59
59
  case_obj = tryCatch({
60
60
  eval(parse(text = case_code))
@@ -62,7 +62,7 @@ do_one_comparison <- function(
62
62
  NULL
63
63
  })
64
64
  if (is.null(case_obj)) {
65
- print(" Skip (not enough cells in case)")
65
+ log_warn(" Skip (not enough cells in case)")
66
66
  return (NULL)
67
67
  }
68
68
  control_code = paste0("subset(obj, subset = ", subset_col, " == '", control, "')")
@@ -72,7 +72,7 @@ do_one_comparison <- function(
72
72
  NULL
73
73
  })
74
74
  if (is.null(control_obj)) {
75
- print(" Skip (not enough cells in control)")
75
+ log_warn(" Skip (not enough cells in control)")
76
76
  add_report(
77
77
  list(kind = "error", content = "Not enough cells in control"),
78
78
  h1 = groupname,
@@ -86,7 +86,7 @@ do_one_comparison <- function(
86
86
  odir = file.path(groupdir, paste0(subset_prefix, compname))
87
87
  dir.create(odir, showWarnings = FALSE)
88
88
  if (ncol(exprs_case) < 3 || ncol(exprs_control) < 3) {
89
- print(" Skip (not enough cells)")
89
+ log_warn(" Skip (not enough cells)")
90
90
  add_report(
91
91
  list(kind = "error", content = "Not enough cells"),
92
92
  h1 = groupname,
@@ -131,7 +131,7 @@ do_one_comparison <- function(
131
131
  }
132
132
 
133
133
  do_one_group <- function(group) {
134
- print(paste("- Group:", group, "..."))
134
+ log_info("- Group: {group} ...")
135
135
 
136
136
  genes = intersect(metabolics, rownames(sobj))
137
137
  group_code = paste0(
@@ -71,7 +71,7 @@ num_of_pathways <- function(gmtfile, overlapgenes) {
71
71
  }
72
72
 
73
73
  do_one_subset <- function(s, subset_col, subset_prefix) {
74
- print(paste0(" Processing subset: ", s, "..."))
74
+ log_info(" Processing subset: {s} ...")
75
75
  if (is.null(s)) {
76
76
  subset_dir <- file.path(outdir, "ALL")
77
77
  dir.create(subset_dir, showWarnings = FALSE)
@@ -118,7 +118,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
118
118
 
119
119
  for (pi in seq_along(pathway_names)) {
120
120
  p <- pathway_names[pi]
121
- print(paste0(" * Pathway (", pi, "): ", p, "..."))
121
+ log_info(" * Pathway ({pi}): {p} ...")
122
122
  genes <- pathways[[p]]
123
123
  genes_comm <- intersect(genes, rownames(subset_obj))
124
124
  genes_expressed <- names(rowSums(subset_obj)[rowSums(subset_obj) > 0])
@@ -312,7 +312,7 @@ do_one_subset <- function(s, subset_col, subset_prefix) {
312
312
  }
313
313
 
314
314
  do_one_subset_col <- function(subset_col, subset_prefix) {
315
- print(paste0("- Handling subset column: ", subset_col, " ..."))
315
+ log_info("- Handling subset column: {subset_col} ...")
316
316
  if (is.null(subset_col)) {
317
317
  do_one_subset(NULL, subset_col = NULL, subset_prefix = NULL)
318
318
  } else {
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Basic analysis #")
7
- log_info("#####################################")
5
+ log_info("# Basic analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  volumes = {{envs.volumes | r}}
10
9
  lens = {{envs.lens | r}}
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Clonality analysis #")
7
- log_info("#####################################")
5
+ log_info("# Clonality analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  top_clones = {{envs.top_clones | r}}
10
9
  rare_clones = {{envs.rare_clones | r}}
@@ -1,30 +1,34 @@
1
1
  # Diversity estimation
2
+ source("{{biopipen_dir}}/scripts/tcr/immunarch-patched.R")
2
3
  # https://immunarch.com/articles/web_only/v6_diversity.html
3
4
 
4
5
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Diversity estimation #")
7
- log_info("#####################################")
6
+ log_info("# Diversity estimation")
7
+ log_info("-----------------------------------")
8
8
 
9
9
  # Other variables are loaded in the parent template
10
10
  # immdata is already loaded, meta is mutated
11
- div_method = {{envs.divs.method | r}}
12
- div_by = {{envs.divs.by | r}}
13
- div_order = {{envs.divs.order | r}}
14
- div_args = {{envs.divs.args | r: todot="-"}}
15
- div_test = {{envs.divs.test | r}}
16
- div_cases = {{envs.divs.cases | r: todot="-"}}
17
- div_devpars = {{envs.divs.devpars | r}}
18
- div_separate_by = {{envs.divs.separate_by | r}}
19
- div_split_by = {{envs.divs.split_by | r}}
20
- div_split_order = {{envs.divs.split_order | r}}
21
- div_align_x = {{envs.divs.align_x | r}}
22
- div_align_y = {{envs.divs.align_y | r}}
23
- div_subset = {{envs.divs.subset | r}}
24
- div_log = {{envs.divs.log | r}}
25
- div_ncol = {{envs.divs.ncol | r}}
26
- div_ymin = {{envs.divs.ymin | r}}
27
- div_ymax = {{envs.divs.ymax | r}}
11
+ div_method = {{envs.divs.method | default: "gini" | r}}
12
+ div_by = {{envs.divs.by | default: None | r}}
13
+ div_plot_type = {{envs.divs.plot_type | default: "bar" | r}}
14
+ div_order = {{envs.divs.order | default: [] | r}}
15
+ div_args = {{envs.divs.args | default: {} | r: todot="-"}}
16
+ div_test = {{envs.divs.test | default: None | r}}
17
+ div_cases = {{envs.divs.cases | default: {} | r: todot="-"}}
18
+ div_devpars = {{envs.divs.devpars | default: None | r}}
19
+ div_separate_by = {{envs.divs.separate_by | default: None | r}}
20
+ div_split_by = {{envs.divs.split_by | default: None | r}}
21
+ div_split_order = {{envs.divs.split_order | default: None | r}}
22
+ div_align_x = {{envs.divs.align_x | default: False | r}}
23
+ div_align_y = {{envs.divs.align_y | default: False | r}}
24
+ div_subset = {{envs.divs.subset | default: None | r}}
25
+ div_log = {{envs.divs.log | default: False | r}}
26
+ div_ncol = {{envs.divs.ncol | default: 2 | r}}
27
+ div_ymin = {{envs.divs.ymin | default: None | r}}
28
+ div_ymax = {{envs.divs.ymax | default: None | r}}
29
+
30
+ div_test = div_test %||% list(method = "none", padjust = "none")
31
+ div_devpars = div_devpars %||% list(res = 100, width = 800, height = 800)
28
32
 
29
33
  div_dir = file.path(outdir, "diversity")
30
34
  dir.create(div_dir, showWarnings = FALSE)
@@ -38,6 +42,7 @@ update_case = function(case, name) {
38
42
  if (!is.null(case$by) && nchar(case$by) > 0) {
39
43
  case$by = unlist(strsplit(case$by, ",")) %>% trimws()
40
44
  }
45
+ case$plot_type <- case$plot_type %||% div_plot_type
41
46
  case$order <- case$order %||% div_order
42
47
  case$args <- case$args %||% div_args
43
48
  for (name in names(case$args)) {
@@ -85,23 +90,6 @@ update_case = function(case, name) {
85
90
  return (case)
86
91
  }
87
92
 
88
- # See https://github.com/immunomind/immunarch/pull/341
89
- vis.immunr_gini <- function(.data, .by = NA, .meta = NA,
90
- .errorbars = c(0.025, 0.975), .errorbars.off = FALSE,
91
- .points = TRUE, .test = TRUE, .signif.label.size = 3.5, ...) {
92
- # repDiversity(..., .method = "gini") generates a matrix
93
- .data = data.frame(Sample = rownames(.data), Value = .data[, 1])
94
- vis_bar(
95
- .data = .data, .by = .by, .meta = .meta,
96
- .errorbars = .errorbars, .errorbars.off = .errorbars.off, .stack = FALSE,
97
- .points = .points, .test = .test, .signif.label.size = .signif.label.size,
98
- .defgroupby = "Sample", .grouping.var = "Group",
99
- .labs = c(NA, "Gini coefficient"),
100
- .title = "Gini coefficient", .subtitle = "Sample diversity estimation using the Gini coefficient",
101
- .legend = NA, .leg.title = NA
102
- )
103
- }
104
-
105
93
  if (is.null(div_cases) || length(div_cases) == 0) {
106
94
  if (is.null(div_method) || length(div_method) == 0 || nchar(div_method) == 0) {
107
95
  stop("No method is specified for diversity estimation")
@@ -176,6 +164,15 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
176
164
  col.names = TRUE
177
165
  )
178
166
 
167
+ .meta_vals <- function(meta, cols) {
168
+ if (length(cols) == 1) {
169
+ return (meta[[cols]])
170
+ }
171
+
172
+ vlist = lapply(cols, function(.x) meta[[.x]])
173
+ do.call(function(...) paste(..., sep = "; "), vlist)
174
+ }
175
+
179
176
  # plot
180
177
  # by, order, separate_by, align_y
181
178
  n_seps = 1
@@ -189,11 +186,19 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
189
186
  metas = metas[intersect(case$split_order, names(metas))]
190
187
  }
191
188
  ps = lapply(metas, function(meta) {
192
- .test = length(unique(meta[[case$by]])) > 1
193
- p = vis(filter_div(div, meta$Sample), .by = case$by, .meta = meta, .test = .test)
189
+ .test = length(unique(.meta_vals(meta, case$by))) > 1
190
+ p = vis(
191
+ filter_div(div, meta$Sample),
192
+ .by = case$by,
193
+ .meta = meta,
194
+ .test = .test,
195
+ .plot.type = case$plot_type
196
+ )
194
197
  p = p + xlab(paste0(case$separate_by, ": ", meta[[case$separate_by]][1], ")"))
195
198
  if (!is.null(case$order) && length(case$order) > 0) {
196
- p = p + scale_x_discrete(limits = intersect(case$order, unique(meta[[case$by]])))
199
+ p = p + scale_x_discrete(
200
+ limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
201
+ )
197
202
  }
198
203
  if (!is.null(case$ymin) && !is.null(case$ymax)) {
199
204
  p = p + ylim(c(case$ymin, case$ymax))
@@ -217,10 +222,18 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
217
222
  }
218
223
  .i = 0
219
224
  ps = lapply(metas, function(meta) {
220
- nby = length(unique(meta[[case$by]]))
221
- p = vis(filter_div(div, meta$Sample), .by = case$by, .meta = meta, .test = nby > 1)
225
+ nby = length(unique(.meta_vals(meta, case$by)))
226
+ p = vis(
227
+ filter_div(div, meta$Sample),
228
+ .by = case$by,
229
+ .meta = meta,
230
+ .test = nby > 1,
231
+ .plot.type = case$plot_type
232
+ )
222
233
  if (!is.null(case$order) && length(case$order) > 0) {
223
- p = p + scale_x_discrete(limits = intersect(case$order, unique(meta[[case$by]])))
234
+ p = p + scale_x_discrete(
235
+ limits = intersect(case$order, unique(.meta_vals(meta, case$by)))
236
+ )
224
237
  }
225
238
  p = p + xlab(meta[[case$split_by]][1]) + theme(
226
239
  axis.text.x = element_blank(),
@@ -253,10 +266,10 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
253
266
  plots = lapply(ps, function(x) x$p + ylim(c(ymin, ymax)))
254
267
  p = wrap_plots(plots, widths = widths, guides = "collect")
255
268
  } else {
256
- .test = length(unique(d$meta[[case$by]])) > 1
257
- p = vis(div, .by = case$by, .meta = d$meta, .test = .test)
269
+ .test = length(unique(.meta_vals(d$meta, case$by))) > 1
270
+ p = vis(div, .by = case$by, .meta = d$meta, .test = .test, .plot.type = case$plot_type)
258
271
  if (!is.null(case$order) && length(case$order) > 0) {
259
- p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta[[case$by]])))
272
+ p = p + scale_x_discrete(limits = intersect(case$order, unique(.meta_vals(d$meta, case$by))))
260
273
  }
261
274
  }
262
275
  } else if (!is.null(case$separate_by)) {
@@ -333,7 +346,9 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
333
346
  } else {
334
347
  p = vis(div)
335
348
  if (!is.null(case$order) && length(case$order) > 0) {
336
- p = p + scale_x_discrete(limits = intersect(case$order, unique(d$meta[[case$by]])))
349
+ p = p + scale_x_discrete(
350
+ limits = intersect(case$order, unique(.meta_vals(d$meta, case$by)))
351
+ )
337
352
  }
338
353
  }
339
354
 
@@ -351,7 +366,7 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
351
366
  }
352
367
  if (is.null(width)) {
353
368
  if (!is.null(case$by) && length(case$by) > 0) {
354
- width = 200 * length(unique(d$meta[[case$by]])) + 120
369
+ width = 200 * length(unique(.meta_vals(d$meta, case$by))) + 120
355
370
  } else {
356
371
  width = 100 * length(unique(d$meta$Sample)) + 120
357
372
  }
@@ -400,7 +415,11 @@ run_general = function(casename, d, case, ddir, value_col = "Value") {
400
415
  "where all values are the same (for example, where everyone has ",
401
416
  "the same income). A Gini coefficient of one (or 100 percents ) ",
402
417
  "expresses maximal inequality among values (for example where only ",
403
- "one person has all the income).")
418
+ "one person has all the income)."),
419
+ d50 = paste0(
420
+ "the D50 index. ",
421
+ "It is the number of types that are needed to cover 50% of the total
422
+ abundance.")
404
423
  )
405
424
  )
406
425
  ),
@@ -705,6 +724,8 @@ run_div_case = function(casename) {
705
724
  run_general(casename, d, case, ddir)
706
725
  } else if (case$method == "gini") {
707
726
  run_general(casename, d, case, ddir, "V1")
727
+ } else if (case$method == "d50") {
728
+ run_general(casename, d, case, ddir, "Clones")
708
729
  } else {
709
730
  stop(paste0("Unknown diversity method: ", case$method))
710
731
  }
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Gene usage analysis #")
7
- log_info("#####################################")
5
+ log_info("# Gene usage analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  gene_usages = {{ envs.gene_usages | r: todot="-" }}
10
9
 
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# K-mer analysis #")
7
- log_info("#####################################")
5
+ log_info("# K-mer analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  kmers = {{ envs.kmers | r: todot="-" }}
10
9
 
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Overlap analysis #")
7
- log_info("#####################################")
5
+ log_info("# Overlap analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  overlaps = {{ envs.overlaps | r: todot="-" }}
10
9
 
@@ -2,9 +2,8 @@
2
2
  # immfile, outdir, mutaters, immdata, n_samples
3
3
 
4
4
  log_info("")
5
- log_info("#####################################")
6
- log_info("# Spectratyping analysis #")
7
- log_info("#####################################")
5
+ log_info("# Spectratyping analysis")
6
+ log_info("-----------------------------------")
8
7
 
9
8
  spects = {{ envs.spects | r }}
10
9
 
@@ -1,7 +1,6 @@
1
1
  log_info("")
2
- log_info("#####################################")
3
- log_info("# Clonotype tracking #")
4
- log_info("#####################################")
2
+ log_info("# Clonotype tracking")
3
+ log_info("-----------------------------------")
5
4
 
6
5
  trackings = {{ envs.trackings | r }}
7
6
 
@@ -1,7 +1,6 @@
1
1
  log_info("")
2
- log_info("#####################################")
3
- log_info("# VJ Junction Circos Plots #")
4
- log_info("#####################################")
2
+ log_info("# VJ Junction Circos Plots")
3
+ log_info("-----------------------------------")
5
4
 
6
5
  # Already required by immunarch
7
6
  library(circlize)
@@ -34,7 +34,7 @@ log_info("Expanding immdata ...")
34
34
  exdata = expand_immdata(immdata)
35
35
 
36
36
  log_info("Loading metadata if provided ...")
37
- if (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS")) {
37
+ if (!is.null(metafile) && (endsWith(metafile, ".rds") || endsWith(metafile, ".RDS"))) {
38
38
  meta = readRDS(metafile)@meta.data
39
39
  } else if (!is.null(metafile) && nchar(metafile) > 0) {
40
40
  meta = read.table(metafile, sep = "\t", header = TRUE, row.names = 1)
@@ -144,6 +144,7 @@ for (i in seq_len(nrow(metadata))) {
144
144
  # file.symlink(normalizePath(annofile), file.path(datadir, paste0(sample, ext)))
145
145
  }
146
146
 
147
+ log_info("Loading TCR data ...")
147
148
  immdata = repLoad(datadir, .mode=mode)
148
149
  if (mode == "single") {
149
150
  data = immdata$data
@@ -178,6 +179,7 @@ immdata$prefix = prefix
178
179
 
179
180
  saveRDS(immdata, file=rdsfile)
180
181
 
182
+ log_info("Saving cell-level data ...")
181
183
  exdata <- expand_immdata(immdata, cell_id = "Barcode") %>%
182
184
  distinct(Sample, Barcode, .keep_all = TRUE) %>%
183
185
  mutate(Barcode = glue(paste0(prefix, "{Barcode}"))) %>%
@@ -3,6 +3,7 @@
3
3
  # python = Sys.which({{envs.python | r}})
4
4
  # Sys.setenv(RETICULATE_PYTHON = python)
5
5
  # library(reticulate)
6
+ source("{{biopipen_dir}}/utils/misc.R")
6
7
  source("{{biopipen_dir}}/utils/single_cell.R")
7
8
 
8
9
  library(immunarch)
@@ -97,7 +98,7 @@ clean_clustcr_output = function(clustcr_outfile, clustcr_input) {
97
98
  }
98
99
 
99
100
  run_clustcr = function() {
100
- print(paste("Using tool:", "ClusTCR"))
101
+ log_info("Running ClusTCR ...")
101
102
  clustcr_dir = file.path(outdir, "ClusTCR_Output")
102
103
  dir.create(clustcr_dir, showWarnings = FALSE)
103
104
  clustcr_file = prepare_clustcr(clustcr_dir)
@@ -110,6 +111,7 @@ run_clustcr = function() {
110
111
  )
111
112
  print("Running:")
112
113
  print(clustcr_cmd)
114
+ log_debug("- Running command: {clustcr_cmd}")
113
115
  rc = system(clustcr_cmd)
114
116
  if (rc != 0) {
115
117
  quit(status=rc)
@@ -196,7 +198,7 @@ clean_giana_output = function(giana_outfile, giana_infile) {
196
198
  }
197
199
 
198
200
  run_giana = function() {
199
- print(paste("Using tool:", "GIANA"))
201
+ log_info("Running GIANA ...")
200
202
  giana_srcdir = prepare_giana()
201
203
  giana_input = prepare_input()
202
204
  giana_outdir = file.path(outdir, "GIANA_Output")
@@ -228,6 +230,7 @@ run_giana = function() {
228
230
  )
229
231
  print("Running:")
230
232
  print(giana_cmd)
233
+ log_debug("- Running command: {giana_cmd}")
231
234
  rc = system(giana_cmd)
232
235
  if (rc != 0) {
233
236
  quit(status=rc)
@@ -276,4 +279,5 @@ if (tolower(tool) == "clustcr") {
276
279
  stop(paste("Unknown tool:", tool))
277
280
  }
278
281
 
282
+ log_info("Saving results ...")
279
283
  attach_to_immdata(out)