biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +4 -0
  3. biopipen/core/filters.py +1 -1
  4. biopipen/core/testing.py +2 -1
  5. biopipen/ns/cellranger.py +33 -3
  6. biopipen/ns/regulatory.py +4 -0
  7. biopipen/ns/scrna.py +548 -98
  8. biopipen/ns/scrna_metabolic_landscape.py +4 -0
  9. biopipen/ns/tcr.py +256 -16
  10. biopipen/ns/web.py +5 -0
  11. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
  12. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
  13. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
  14. biopipen/reports/tcr/ClonalStats.svelte +1 -0
  15. biopipen/scripts/cellranger/CellRangerCount.py +55 -11
  16. biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
  17. biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
  18. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
  19. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
  20. biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
  21. biopipen/scripts/regulatory/motifs-common.R +3 -2
  22. biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
  23. biopipen/scripts/scrna/CellCellCommunication.py +26 -14
  24. biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
  25. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  26. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
  27. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
  28. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
  29. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
  31. biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
  32. biopipen/scripts/scrna/MQuad.py +25 -0
  33. biopipen/scripts/scrna/MarkersFinder.R +128 -30
  34. biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
  35. biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
  36. biopipen/scripts/scrna/ScFGSEA.R +23 -26
  37. biopipen/scripts/scrna/ScVelo.py +20 -8
  38. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  39. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
  40. biopipen/scripts/scrna/SeuratClustering.R +5 -1
  41. biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
  42. biopipen/scripts/scrna/SeuratPreparing.R +19 -11
  43. biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
  44. biopipen/scripts/scrna/Slingshot.R +2 -4
  45. biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
  46. biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
  47. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  48. biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
  49. biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
  50. biopipen/scripts/tcr/ClonalStats.R +76 -35
  51. biopipen/utils/misc.py +104 -9
  52. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
  53. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
  54. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  55. biopipen/utils/common_docstrs.py +0 -103
  56. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
@@ -1,15 +1,23 @@
1
1
  library(Seurat)
2
+ library(rlang)
3
+ library(dplyr)
4
+ library(tidyseurat)
2
5
 
3
6
  sobjfile <- {{in.sobjfile | r}}
4
7
  outfile <- {{out.outfile | r}}
5
8
  celltypes <- {{envs.cell_types | r}}
6
9
  newcol <- {{envs.newcol | r}}
10
+ ident <- {{envs.ident | r }}
7
11
  merge_same_labels <- {{envs.merge | r}}
12
+ more_cell_types <- {{envs.more_cell_types | r}}
8
13
 
9
14
  log <- biopipen.utils::get_logger()
10
15
 
11
16
  if (is.null(celltypes) || length(celltypes) == 0) {
12
17
  log$warn("No cell types are given!")
18
+ if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
19
+ log$warn("`envs.celltypes` is not given, won't process `envs.more_cell_types`!")
20
+ }
13
21
 
14
22
  if (merge_same_labels) {
15
23
  log$warn("Ignoring 'envs.merge' because no cell types are given!")
@@ -19,45 +27,52 @@ if (is.null(celltypes) || length(celltypes) == 0) {
19
27
  } else {
20
28
  log$info("Loading Seurat object ...")
21
29
  sobj <- biopipen.utils::read_obj(sobjfile)
30
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
31
+ Idents(sobj) <- ident
22
32
  idents <- Idents(sobj)
23
33
  if (is.factor(idents)) {
24
34
  idents <- levels(idents)
25
35
  } else {
26
36
  idents <- as.character(unique(idents))
27
37
  }
28
-
29
- if (length(celltypes) < length(idents)) {
30
- celltypes <- c(celltypes, idents[(length(celltypes) + 1):length(idents)])
31
- } else if (length(celltypes) > length(idents)) {
32
- celltypes <- celltypes[1:length(idents)]
33
- log$warn("The length of cell types is longer than the number of clusters!")
38
+ process_celltypes <- function(ct, key = NULL) {
39
+ if (length(ct) < length(idents)) {
40
+ ct <- c(ct, idents[(length(ct) + 1):length(idents)])
41
+ } else if (length(ct) > length(idents)) {
42
+ ct <- ct[1:length(idents)]
43
+ if (is.null(key)) {
44
+ log$warn("The length of cell types is longer than the number of clusters!")
45
+ } else {
46
+ log$warn(paste0("The length of cell types for '", key, "' is longer than the number of clusters!"))
47
+ }
48
+ }
49
+ for (i in seq_along(ct)) {
50
+ if (ct[i] == "-" || ct[i] == "") {
51
+ ct[i] <- idents[i]
52
+ }
53
+ }
54
+ names(ct) <- idents
55
+ return(ct)
34
56
  }
35
- for (i in seq_along(celltypes)) {
36
- if (celltypes[i] == "-" || celltypes[i] == "") {
37
- celltypes[i] <- idents[i]
57
+
58
+ if (!is.null(more_cell_types) && length(more_cell_types) > 0) {
59
+ for (key in names(more_cell_types)) {
60
+ ct <- more_cell_types[[key]]
61
+ ct <- process_celltypes(ct, key)
62
+ log$info(paste0("Adding additional cell type annotation: '", key, "' ..."))
63
+ sobj@meta.data[[key]] <- ct[as.character(Idents(sobj))]
38
64
  }
39
65
  }
40
- names(celltypes) <- idents
66
+
67
+ celltypes <- process_celltypes(celltypes)
41
68
 
42
69
  log$info("Renaming cell types ...")
43
70
  if (is.null(newcol)) {
44
- has_na <- "NA" %in% unlist(celltypes) || anyNA(unlist(celltypes))
45
- sobj$seurat_clusters_id <- Idents(sobj)
46
- celltypes$object <- sobj
47
- sobj <- do_call(RenameIdents, celltypes)
48
- sobj$seurat_clusters <- Idents(sobj)
49
- if (has_na) {
50
- log$info("Filtering clusters if NA ...")
51
- sobj <- subset(
52
- sobj,
53
- subset = seurat_clusters != "NA" & !is.na(seurat_clusters)
54
- )
55
- }
71
+ sobj <- rename_idents(sobj, ident, celltypes)
72
+ log$info("Filtering clusters if NA ...")
73
+ sobj <- filter(sobj, !!sym(ident) != "NA" & !is.na(!!sym(ident)))
56
74
  } else {
57
- celltypes$object <- sobj
58
- sobj <- do_call(RenameIdents, celltypes)
59
- sobj[[newcol]] <- Idents(sobj)
60
- Idents(sobj) <- "seurat_clusters"
75
+ sobj[[newcol]] <- celltypes[as.character(Idents(sobj))]
61
76
  }
62
77
 
63
78
  if (merge_same_labels) {
@@ -65,5 +80,6 @@ if (is.null(celltypes) || length(celltypes) == 0) {
65
80
  sobj <- merge_clusters_with_same_labels(sobj, newcol)
66
81
  }
67
82
 
83
+ log$info("Saving Seurat object ...")
68
84
  biopipen.utils::save_obj(sobj, outfile)
69
85
  }
@@ -1,13 +1,13 @@
1
- library(Seurat)
1
+ library(rlang)
2
2
  library(dplyr)
3
3
  library(hitype)
4
- library(biopipen.utils)
5
4
 
6
5
  sobjfile = {{in.sobjfile | r}}
7
6
  outfile = {{out.outfile | r}}
8
7
  tissue = {{envs.hitype_tissue | r}}
9
8
  db = {{envs.hitype_db | r}}
10
9
  newcol = {{envs.newcol | r}}
10
+ ident = {{envs.ident | r }}
11
11
  merge_same_labels = {{envs.merge | r}}
12
12
 
13
13
  if (is.null(db)) { stop("`envs.hitype_db` is not set") }
@@ -16,6 +16,8 @@ log <- get_logger()
16
16
 
17
17
  log$info("Reading Seurat object...")
18
18
  sobj = biopipen.utils::read_obj(sobjfile)
19
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
20
+ Idents(sobj) <- ident
19
21
 
20
22
  # prepare gene sets
21
23
  log$info("Preparing gene sets...")
@@ -30,18 +32,14 @@ log$info("Running RunHitype...")
30
32
  sobj = RunHitype(sobj, gs_list, threshold = 0.0, make_unique = TRUE)
31
33
 
32
34
  log$info("Renaming cell types...")
33
- hitype_levels = sobj@meta.data %>%
34
- select(seurat_clusters, hitype) %>%
35
- distinct(seurat_clusters, .keep_all = TRUE) %>%
36
- arrange(as.numeric(seurat_clusters)) %>%
37
- pull("hitype")
35
+ hitype_labels <- sobj@meta.data %>%
36
+ distinct(!!sym(ident), hitype)
37
+ hitype_labels <- split(hitype_labels$hitype, hitype_labels[[ident]])
38
38
 
39
39
  if (is.null(newcol)) {
40
- sobj$seurat_clusters_id = sobj$seurat_clusters
41
- sobj$seurat_clusters = factor(sobj$hitype, levels = hitype_levels)
42
- Idents(sobj) = "seurat_clusters"
40
+ sobj <- rename_idents(sobj, ident, hitype_labels)
43
41
  } else {
44
- sobj[[newcol]] = factor(sobj$hitype, levels = hitype_levels)
42
+ sobj[[newcol]] = sobj$hitype
45
43
  }
46
44
 
47
45
  if (merge_same_labels) {
@@ -55,9 +53,9 @@ biopipen.utils::save_obj(sobj, outfile)
55
53
  log$info("Saving the mappings ...")
56
54
  if (is.null(newcol)) {
57
55
  celltypes = sobj@meta.data %>%
58
- group_by(seurat_clusters_id) %>%
56
+ group_by(!!sym(backup_col)) %>%
59
57
  summarize(CellType = hitype[1]) %>%
60
- select(Cluster = seurat_clusters_id, CellType) %>%
58
+ select(Cluster = !!sym(backup_col), CellType) %>%
61
59
  ungroup()
62
60
  } else {
63
61
  celltypes = sobj@meta.data %>%
@@ -6,6 +6,7 @@ sobjfile = {{in.sobjfile | r}}
6
6
  outfile = {{out.outfile | r}}
7
7
  sccatch_args = {{envs.sccatch_args | r}}
8
8
  newcol = {{envs.newcol | r}}
9
+ ident = {{envs.ident | r }}
9
10
  merge_same_labels = {{envs.merge | r}}
10
11
 
11
12
  log <- get_logger()
@@ -22,6 +23,8 @@ if (is.integer(sccatch_args$use_method)) {
22
23
 
23
24
  log$info("Reading Seurat object...")
24
25
  sobj = read_obj(sobjfile)
26
+ ident <- ident %||% GetIdentityColumn(sobj)
27
+ Idents(sobj) <- ident
25
28
 
26
29
  log$info("Running createscCATCH ...")
27
30
  obj = createscCATCH(data = GetAssayData(sobj), cluster = as.character(Idents(sobj)))
@@ -48,15 +51,9 @@ if (length(celltypes) == 0) {
48
51
  log$warn("- No cell types annotated from the database!")
49
52
  } else {
50
53
  if (is.null(newcol)) {
51
- sobj$seurat_clusters_id = Idents(sobj)
52
- celltypes$object = sobj
53
- sobj = do_call(RenameIdents, celltypes)
54
- sobj$seurat_clusters = Idents(sobj)
54
+ sobj <- rename_idents(sobj, ident, celltypes)
55
55
  } else {
56
- celltypes$object = sobj
57
- sobj = do_call(RenameIdents, celltypes)
58
- sobj[[newcol]] = Idents(sobj)
59
- Idents(sobj) = "seurat_clusters"
56
+ sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
60
57
  }
61
58
 
62
59
  if (merge_same_labels) {
@@ -11,6 +11,7 @@ outfile = {{out.outfile | r}}
11
11
  tissue = {{envs.sctype_tissue | r}}
12
12
  db = {{envs.sctype_db | r}}
13
13
  newcol = {{envs.newcol | r}}
14
+ ident = {{envs.ident | r }}
14
15
  merge_same_labels = {{envs.merge | r}}
15
16
 
16
17
  if (is.null(db)) { stop("`envs.sctype_args.db` is not set") }
@@ -19,6 +20,8 @@ log <- get_logger()
19
20
 
20
21
  log$info("Reading Seurat object...")
21
22
  sobj = biopipen.utils::read_obj(sobjfile)
23
+ ident <- ident %||% biopipen.utils::GetIdentityColumn(sobj)
24
+ Idents(sobj) <- ident
22
25
 
23
26
  # prepare gene sets
24
27
  log$info("Preparing gene sets...")
@@ -116,15 +119,9 @@ for (key in names(celltypes)) {
116
119
 
117
120
  celltypes = as.list(celltypes)
118
121
  if (is.null(newcol)) {
119
- sobj$seurat_clusters_id = sobj$seurat_clusters
120
- celltypes$object = sobj
121
- sobj = do_call(RenameIdents, celltypes)
122
- sobj$seurat_clusters = Idents(sobj)
122
+ sobj <- rename_idents(sobj, ident, celltypes)
123
123
  } else {
124
- celltypes$object = sobj
125
- sobj = do_call(RenameIdents, celltypes)
126
- sobj[[newcol]] = Idents(sobj)
127
- Idents(sobj) = "seurat_clusters"
124
+ sobj@meta.data[[newcol]] = celltypes[as.character(Idents(sobj))]
128
125
  }
129
126
  celltypes$object = NULL
130
127
  gc()
@@ -1,9 +1,15 @@
1
+ library(Seurat)
2
+ library(biopipen.utils)
1
3
  set.seed(8525)
2
4
 
3
- merge_clusters_with_same_labels <- function(sobj, newcol) {
5
+ backup_col <- {{envs.backup_col | r}}
6
+
7
+
8
+ merge_clusters_with_same_labels <- function(sobj, newcol = NULL) {
4
9
  if (is.null(newcol)) {
5
- sobj@meta.data$seurat_clusters <- sub("\\.\\d+$", "", sobj@meta.data$seurat_clusters)
6
- Idents(sobj) <- "seurat_clusters"
10
+ newcol <- biopipen.utils::GetIdentityColumn(sobj)
11
+ sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
12
+ Idents(sobj) <- newcol
7
13
  } else {
8
14
  sobj@meta.data[[newcol]] <- sub("\\.\\d+$", "", sobj@meta.data[[newcol]])
9
15
  }
@@ -11,6 +17,23 @@ merge_clusters_with_same_labels <- function(sobj, newcol) {
11
17
  sobj
12
18
  }
13
19
 
20
+ rename_idents <- function(sobj, ident_col, mapping) {
21
+ orig_ident_col <- biopipen.utils::GetIdentityColumn(sobj)
22
+ if (!identical(ident_col, orig_ident_col)) {
23
+ Idents(sobj) <- ident_col
24
+ mapping$object <- sobj
25
+ sobj <- do_call(RenameIdents, mapping)
26
+ } else {
27
+ if (!is.null(backup_col)) {
28
+ sobj@meta.data[[backup_col]] <- Idents(sobj)
29
+ }
30
+ mapping$object <- sobj
31
+ sobj <- do_call(RenameIdents, mapping)
32
+ }
33
+ sobj@meta.data[[ident_col]] <- Idents(sobj)
34
+ sobj
35
+ }
36
+
14
37
  {% if envs.tool == "hitype" %}
15
38
  {% include biopipen_dir + "/scripts/scrna/CellTypeAnnotation-hitype.R" %}
16
39
  {% elif envs.tool == "sctype" %}
@@ -0,0 +1,25 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from contextlib import suppress
5
+ from biopipen.core.filters import dict_to_cli_args
6
+ from biopipen.utils.misc import run_command
7
+
8
+ cellsnpout = {{in.cellsnpout | quote}} # noqa: E999 # pyright: ignore
9
+ outdir = {{out.outdir | quote}} # pyright: ignore
10
+ envs: dict = {{envs | repr}} # pyright: ignore
11
+ mquad = envs.pop("mquad")
12
+ ncores = envs.pop("ncores")
13
+ seed = envs.pop("seed", 8525)
14
+
15
+ with suppress(RuntimeError):
16
+ run_command([mquad], fg=True)
17
+ print("")
18
+
19
+ envs["cellData"] = cellsnpout
20
+ envs["outDir"] = outdir
21
+ envs["randSeed"] = seed
22
+ envs["nproc"] = ncores
23
+
24
+ cmd = [mquad, *dict_to_cli_args(envs, sep="=")]
25
+ run_command(cmd, fg=True, bufsize=1)
@@ -42,15 +42,12 @@ if (isTRUE(cache)) { cache <- joboutdir }
42
42
 
43
43
  set.seed(8525)
44
44
  if (ncores > 1) {
45
- options(future.globals.maxSize = 80000 * 1024^2)
45
+ options(future.globals.maxSize = Inf)
46
46
  plan(strategy = "multicore", workers = ncores)
47
47
  }
48
48
 
49
49
  log$info("Reading Seurat object ...")
50
50
  srtobj <- read_obj(srtfile)
51
- if (!"Identity" %in% colnames(srtobj@meta.data)) {
52
- srtobj@meta.data$Identity <- Idents(srtobj)
53
- }
54
51
 
55
52
 
56
53
  if (!is.null(mutaters) && length(mutaters) > 0) {
@@ -105,7 +102,7 @@ log$info("Expanding cases ...")
105
102
  post_casing <- function(name, case) {
106
103
  outcases <- list()
107
104
 
108
- case$group_by <- case$group_by %||% "Identity"
105
+ case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
109
106
 
110
107
  if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
111
108
  # single cases, no need to expand
@@ -165,10 +162,12 @@ post_casing <- function(name, case) {
165
162
 
166
163
  if (length(cases) == 0 && name == "Marker Discovery") {
167
164
  name <- case$each
165
+ } else {
166
+ name <- paste0(name, " (", case$each, ")")
168
167
  }
169
168
 
170
169
  for (each in eachs) {
171
- newname <- paste0(name, " - ", each)
170
+ newname <- paste0(name, "::", each)
172
171
  newcase <- case
173
172
 
174
173
  newcase$original_case <- name
@@ -266,20 +265,23 @@ process_markers <- function(markers, info, case) {
266
265
  ui = "tabs"
267
266
  )
268
267
 
269
- for (plotname in names(case$marker_plots)) {
270
- plotargs <- case$marker_plots[[plotname]]
271
- plotargs$degs <- markers
272
- rownames(plotargs$degs) <- make.unique(markers$gene)
273
- plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
274
- do_call(VizDEGs, plotargs)
275
- reporter$add2(
276
- list(
277
- name = plotname,
278
- contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
279
- hs = c(info$section, info$name),
280
- hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
281
- ui = "tabs"
282
- )
268
+ if (nrow(markers) > 0) {
269
+ for (plotname in names(case$marker_plots)) {
270
+ plotargs <- case$marker_plots[[plotname]]
271
+ plotargs$markers <- markers
272
+ plotargs$object <- case$object
273
+ plotargs$comparison_by <- case$group_by
274
+ plotargs$outprefix <- file.path(info$prefix, paste0("markers.", slugify(plotname)))
275
+ do_call(VizDEGs, plotargs)
276
+ reporter$add2(
277
+ list(
278
+ name = plotname,
279
+ contents = list(reporter$image(plotargs$outprefix, plotargs$more_formats, plotargs$save_code))),
280
+ hs = c(info$section, info$name),
281
+ hs2 = ifelse(is.null(case$ident), "Markers", paste0("Markers (", case$ident, ")")),
282
+ ui = "tabs"
283
+ )
284
+ }
283
285
  }
284
286
 
285
287
  # Do enrichment analysis
@@ -349,16 +351,29 @@ process_markers <- function(markers, info, case) {
349
351
  for (db in case$dbs) {
350
352
  plots <- list()
351
353
  for (plotname in names(case$enrich_plots)) {
352
- plotargs <- case$enrich_plots[[plotname]]
354
+ plotargs <- extract_vars(case$enrich_plots[[plotname]], "descr", allow_nonexisting = TRUE)
353
355
  plotargs$data <- enrich[enrich$Database == db, , drop = FALSE]
354
356
 
355
- p <- do_call(VizEnrichment, plotargs)
357
+ p <- tryCatch(
358
+ do_call(VizEnrichment, plotargs),
359
+ error = function(e) {
360
+ stop("Failed to plot enrichment for database '", db, "' with plot '", plotname, "': ", e$message)
361
+ }
362
+ )
356
363
 
357
364
  if (plotargs$plot_type == "bar") {
358
365
  attr(p, "height") <- attr(p, "height") / 1.5
366
+ descr <- descr %||% glue::glue(
367
+ "The bar plot shows the top enriched terms in database '{db}', ",
368
+ "the x-axis shows the -log10 of the adjusted p-values, ",
369
+ "and the y-axis shows the term names. The number next to each bar indicates the overlap gene count."
370
+ )
359
371
  }
360
372
  outprefix <- file.path(info$prefix, paste0("enrich.", slugify(db), ".", slugify(plotname)))
361
373
  save_plot(p, outprefix, plotargs$devpars, formats = "png")
374
+ if (!is.null(descr)) {
375
+ plots[[length(plots) + 1]] <- list(kind = "descr", content = glue::glue(descr))
376
+ }
362
377
  plots[[length(plots) + 1]] <- reporter$image(outprefix, c(), FALSE)
363
378
  }
364
379
  reporter$add2(
@@ -389,13 +404,18 @@ process_markers <- function(markers, info, case) {
389
404
  }
390
405
  }
391
406
 
392
- process_allmarkers <- function(markers, plotcases, casename, groupname) {
407
+ process_allmarkers <- function(markers, object, comparison_by, plotcases, casename, groupname, subset_by_group = TRUE) {
393
408
  name <- paste0(casename, "::", paste0(groupname, " (All Markers)"))
394
409
  info <- case_info(name, outdir, create = TRUE)
395
410
 
396
411
  for (plotname in names(plotcases)) {
412
+ log$info(" {plotname} ...")
397
413
  plotargs <- plotcases[[plotname]]
398
- plotargs$degs <- markers
414
+ plotargs$markers <- markers
415
+ plotargs$object <- object
416
+ plotargs$comparison_by <- comparison_by
417
+ if (subset_by_group)
418
+ plotargs$subset_by <- groupname
399
419
  plotargs$outprefix <- file.path(info$prefix, slugify(plotname))
400
420
  do_call(VizDEGs, plotargs)
401
421
  reporter$add2(
@@ -417,6 +437,7 @@ process_allenriches <- function(enriches, plotcases, casename, groupname) {
417
437
  for (db in dbs) {
418
438
  plots <- list()
419
439
  for (plotname in names(plotcases)) {
440
+ log$info(" {plotname} ({db}) ...")
420
441
  plotargs <- plotcases[[plotname]]
421
442
  plotargs <- extract_vars(plotargs, "devpars")
422
443
  plotargs$data <- enriches[enriches$Database == db, , drop = FALSE]
@@ -449,6 +470,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
449
470
  info <- case_info(name, outdir, create = TRUE)
450
471
 
451
472
  for (plotname in names(ovcases)) {
473
+ log$info(" {plotname} ...")
452
474
  args <- extract_vars(
453
475
  ovcases[[plotname]],
454
476
  sigm = "sigmarkers", "more_formats", "save_code", "devpars", "plot_type",
@@ -513,6 +535,7 @@ run_case <- function(name) {
513
535
  case,
514
536
  "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
515
537
  "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style", "original_subset",
538
+ subset_ = "subset",
516
539
  allow_nonexisting = TRUE
517
540
  )
518
541
 
@@ -545,7 +568,16 @@ run_case <- function(name) {
545
568
  attr(markers, "group_by") <- each
546
569
  attr(markers, "ident_1") <- NULL
547
570
  attr(markers, "ident_2") <- NULL
548
- process_allmarkers(markers, allmarker_plots, name, each)
571
+ if (!is.null(markers) && nrow(markers) > 0) {
572
+ process_allmarkers(
573
+ markers,
574
+ object = if (is.null(original_subset)) srtobj else filter(srtobj, !!parse_expr(original_subset)),
575
+ comparison_by = group_by,
576
+ allmarker_plots,
577
+ name,
578
+ each
579
+ )
580
+ }
549
581
  }
550
582
 
551
583
  if (length(overlaps) > 0) {
@@ -555,7 +587,7 @@ run_case <- function(name) {
555
587
 
556
588
  }
557
589
 
558
- if (!is.null(enriches)) {
590
+ if (!is.null(enriches) && length(enriches) > 0) {
559
591
  log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
560
592
  if (!is.data.frame(enriches)) {
561
593
  each_levels <- names(enriches)
@@ -571,8 +603,30 @@ run_case <- function(name) {
571
603
  enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
572
604
  }
573
605
 
574
- if (length(allenrich_plots) > 0) {
606
+ if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
575
607
  log$info("- Visualizing all enrichments together ...")
608
+ # add other metadata columns if any by mapping groupname
609
+ # only add the metadata columns from object if there is a single value mapped
610
+ metacols <- srtobj@meta.data %>% group_by(!!sym(each)) %>%
611
+ summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
612
+ select(where(~ all(. == TRUE))) %>%
613
+ colnames()
614
+
615
+ if (length(metacols) > 1) {
616
+ metadf <- srtobj@meta.data[, metacols, drop = FALSE] %>%
617
+ distinct(!!sym(each), .keep_all = TRUE)
618
+
619
+ for (col in setdiff(metacols, each)) {
620
+ if (col %in% colnames(enriches)) {
621
+ warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
622
+ metadf[[paste0(col, "_meta")]] <- metadf[[col]]
623
+ metadf[[col]] <- NULL
624
+ }
625
+ }
626
+
627
+ enriches <- left_join(enriches, metadf, by = each)
628
+ }
629
+
576
630
  process_allenriches(enriches, allenrich_plots, name, each)
577
631
  }
578
632
  }
@@ -580,11 +634,15 @@ run_case <- function(name) {
580
634
  return(invisible())
581
635
  }
582
636
 
637
+ # Let RunSeuratDEAnalysis handle the subset
638
+ case$subset <- subset_
583
639
  case$object <- srtobj
584
640
  markers <- do_call(RunSeuratDEAnalysis, case)
585
- case$object <- NULL
641
+ case$object <- NULL # Release memory
586
642
  gc()
587
643
 
644
+ subobj <- if (is.null(subset_)) srtobj else filter(srtobj, !!parse_expr(subset_))
645
+
588
646
  if (is.null(case$ident_1)) {
589
647
  all_idents <- unique(as.character(markers[[case$group_by]]))
590
648
  enriches <- list()
@@ -596,7 +654,9 @@ run_case <- function(name) {
596
654
 
597
655
  attr(ident_markers, "ident_1") <- ident
598
656
  enrich <- process_markers(ident_markers, info = info, case = list(
657
+ object = subobj,
599
658
  dbs = dbs,
659
+ group_by = case$group_by,
600
660
  sigmarkers = sigmarkers,
601
661
  enrich_style = enrich_style,
602
662
  marker_plots = marker_plots,
@@ -609,7 +669,14 @@ run_case <- function(name) {
609
669
 
610
670
  if (length(allmarker_plots) > 0) {
611
671
  log$info("- Visualizing all markers together ...")
612
- process_allmarkers(markers, allmarker_plots, name, case$group_by)
672
+ process_allmarkers(
673
+ markers,
674
+ object = subobj,
675
+ comparison_by = case$group_by,
676
+ plotcases = allmarker_plots,
677
+ casename = name,
678
+ groupname = case$group_by,
679
+ subset_by_group = FALSE)
613
680
  }
614
681
 
615
682
  if (length(overlaps) > 0) {
@@ -619,12 +686,41 @@ run_case <- function(name) {
619
686
 
620
687
  if (length(allenrich_plots) > 0) {
621
688
  log$info("- Visualizing all enrichments together ...")
689
+ # add other metadata columns if any by mapping groupname
690
+ # only add the metadata columns from object if there is a single value mapped
691
+ metacols <- subobj@meta.data %>% group_by(!!sym(case$group_by)) %>%
692
+ summarize(across(everything(), ~ n_distinct(.) == 1), .groups = "keep") %>%
693
+ select(where(~ all(. == TRUE))) %>%
694
+ colnames()
695
+
696
+ if (length(metacols) > 1) {
697
+ metadf <- subobj@meta.data[, metacols, drop = FALSE] %>%
698
+ distinct(!!sym(case$group_by), .keep_all = TRUE)
699
+
700
+ for (col in setdiff(metacols, case$group_by)) {
701
+ if (col %in% colnames(enriches[[1]])) {
702
+ warning("Column name conflict: {col}, adding with suffix '_meta'", immediate. = TRUE)
703
+ metadf[[paste0(col, "_meta")]] <- metadf[[col]]
704
+ metadf[[col]] <- NULL
705
+ }
706
+ }
707
+
708
+ for (ne in names(enriches)) {
709
+ if (!case$group_by %in% colnames(enriches[[ne]])) {
710
+ enriches[[ne]][[case$group_by]] <- ne
711
+ }
712
+ enriches[[ne]] <- left_join(enriches[[ne]], metadf, by = case$group_by)
713
+ }
714
+ }
715
+ enriches <- do_call(rbind, enriches)
622
716
  process_allenriches(enriches, allenrich_plots, name, case$group_by)
623
717
  }
624
718
  } else {
625
719
  info <- case_info(name, outdir, create = TRUE)
626
720
  enrich <- process_markers(markers, info = info, case = list(
721
+ object = subobj,
627
722
  dbs = dbs,
723
+ group_by = case$group_by,
628
724
  sigmarkers = sigmarkers,
629
725
  enrich_style = enrich_style,
630
726
  marker_plots = marker_plots,
@@ -634,7 +730,9 @@ run_case <- function(name) {
634
730
  ))
635
731
 
636
732
  if (!is.null(original_case) && !is.null(cases[[original_case]])) {
637
- markers[[each_name]] <- each
733
+ if (nrow(markers) > 0) {
734
+ markers[[each_name]] <- each
735
+ }
638
736
  cases[[original_case]]$markers[[each]] <<- markers
639
737
  cases[[original_case]]$enriches[[each]] <<- enrich
640
738
  }
@@ -1,11 +1,13 @@
1
- library(Seurat)
1
+ library(rlang)
2
2
  library(dplyr)
3
+ library(Seurat)
3
4
  library(biopipen.utils)
4
5
 
5
6
  sobjfile <- {{in.srtobj | r}}
6
7
  outfile <- {{out.rdsfile | r}}
7
8
  defaults <- {{envs.defaults | r}}
8
9
  modules <- {{envs.modules | r}}
10
+ post_mutaters <- {{envs.post_mutaters | r}}
9
11
 
10
12
  log <- get_logger()
11
13
 
@@ -134,6 +136,12 @@ for (key in names(modules)) {
134
136
  }
135
137
  }
136
138
 
139
+ if (!is.null(post_mutaters) && length(post_mutaters) > 0) {
140
+ log$info("Applying post mutaters ...")
141
+ sobj@meta.data <- sobj@meta.data %>%
142
+ mutate(!!!lapply(post_mutaters, parse_expr))
143
+ }
144
+
137
145
  # save seurat object
138
146
  log$info("Saving Seurat object ...")
139
147
  save_obj(sobj, outfile)