biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +4 -0
  3. biopipen/core/filters.py +1 -1
  4. biopipen/core/testing.py +2 -1
  5. biopipen/ns/cellranger.py +33 -3
  6. biopipen/ns/regulatory.py +4 -0
  7. biopipen/ns/scrna.py +548 -98
  8. biopipen/ns/scrna_metabolic_landscape.py +4 -0
  9. biopipen/ns/tcr.py +256 -16
  10. biopipen/ns/web.py +5 -0
  11. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
  12. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
  13. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
  14. biopipen/reports/tcr/ClonalStats.svelte +1 -0
  15. biopipen/scripts/cellranger/CellRangerCount.py +55 -11
  16. biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
  17. biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
  18. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
  19. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
  20. biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
  21. biopipen/scripts/regulatory/motifs-common.R +3 -2
  22. biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
  23. biopipen/scripts/scrna/CellCellCommunication.py +26 -14
  24. biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
  25. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  26. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
  27. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
  28. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
  29. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
  31. biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
  32. biopipen/scripts/scrna/MQuad.py +25 -0
  33. biopipen/scripts/scrna/MarkersFinder.R +128 -30
  34. biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
  35. biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
  36. biopipen/scripts/scrna/ScFGSEA.R +23 -26
  37. biopipen/scripts/scrna/ScVelo.py +20 -8
  38. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  39. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
  40. biopipen/scripts/scrna/SeuratClustering.R +5 -1
  41. biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
  42. biopipen/scripts/scrna/SeuratPreparing.R +19 -11
  43. biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
  44. biopipen/scripts/scrna/Slingshot.R +2 -4
  45. biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
  46. biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
  47. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  48. biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
  49. biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
  50. biopipen/scripts/tcr/ClonalStats.R +76 -35
  51. biopipen/utils/misc.py +104 -9
  52. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
  53. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
  54. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  55. biopipen/utils/common_docstrs.py +0 -103
  56. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  library(rlang)
2
2
  library(dplyr)
3
3
  library(plotthis)
4
+ library(Seurat)
4
5
  library(biopipen.utils)
5
6
 
6
7
  sobjfile <- {{in.sobjfile | r}}
@@ -8,7 +9,9 @@ outdir <- {{out.outdir | r}}
8
9
  joboutdir <- {{job.outdir | r}}
9
10
  each <- {{envs.each | r}}
10
11
  subset <- {{envs.subset | r}}
12
+ ncores <- {{envs.ncores | r}}
11
13
  mutaters <- {{envs.mutaters | r}}
14
+ cache <- {{ envs.cache | r }}
12
15
  aggregate_by <- {{envs.aggregate_by | r}}
13
16
  layer <- {{envs.layer | r}}
14
17
  assay <- {{envs.assay | r}}
@@ -35,6 +38,7 @@ overlaps <- {{ envs.overlaps | r }}
35
38
  cases <- {{envs.cases | r}}
36
39
 
37
40
  aggregate_by <- unique(c(aggregate_by, group_by, paired_by, each))
41
+ if (isTRUE(cache)) { cache <- joboutdir }
38
42
 
39
43
  log <- get_logger()
40
44
  reporter <- get_reporter()
@@ -74,10 +78,12 @@ defaults <- list(
74
78
  ident_1 = ident_1,
75
79
  ident_2 = ident_2,
76
80
  dbs = dbs,
81
+ ncores = ncores,
77
82
  sigmarkers = sigmarkers,
78
83
  enrich_style = enrich_style,
79
84
  paired_by = paired_by,
80
85
  tool = tool,
86
+ cache = cache,
81
87
  allmarker_plots_defaults = allmarker_plots_defaults,
82
88
  allmarker_plots = allmarker_plots,
83
89
  allenrich_plots_defaults = allenrich_plots_defaults,
@@ -131,12 +137,14 @@ expand_each <- function(name, case) {
131
137
 
132
138
  if (length(cases) == 0 && name == "DEG Analysis") {
133
139
  name <- case$each
140
+ } else {
141
+ name <- paste0(name, " (", case$each, ")")
134
142
  }
135
143
 
136
144
  case$aggregate_by <- unique(c(case$aggregate_by, case$group_by, case$paired_by, case$each))
137
145
 
138
146
  for (each in eachs) {
139
- newname <- paste0(case$each, "::", each)
147
+ newname <- paste0(name, "::", each)
140
148
  newcase <- case
141
149
 
142
150
  newcase$original_case <- name
@@ -179,6 +187,7 @@ expand_each <- function(name, case) {
179
187
  if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
180
188
  ovcase <- case
181
189
 
190
+ ovcase$allexprs <- list()
182
191
  ovcase$markers <- list()
183
192
  ovcase$allmarker_plots <- lapply(
184
193
  ovcase$allmarker_plots,
@@ -212,7 +221,52 @@ process_markers <- function(markers, info, case) {
212
221
  # markers <- markers %>%
213
222
  # mutate(gene = as.character(gene)) %>%
214
223
  # arrange(p_val_adj, desc(abs(avg_log2FC)))
224
+
225
+ empty <- if (case$enrich_style == "enrichr") {
226
+ data.frame(
227
+ Database = character(0),
228
+ Term = character(0),
229
+ Overlap = character(0),
230
+ P.value = numeric(0),
231
+ Adjusted.P.value = numeric(0),
232
+ Odds.Ratio = numeric(0),
233
+ Combined.Score = numeric(0),
234
+ Genes = character(0),
235
+ Rank = numeric(0)
236
+ )
237
+ } else { # clusterProfiler
238
+ data.frame(
239
+ ID = character(0),
240
+ Description = character(0),
241
+ GeneRatio = character(0),
242
+ BgRatio = character(0),
243
+ Count = integer(0),
244
+ pvalue = numeric(0),
245
+ p.adjust = numeric(0),
246
+ qvalue = numeric(0),
247
+ geneID = character(0),
248
+ Database = character(0)
249
+ )
250
+ }
251
+ if (is.null(markers) || nrow(markers) == 0) {
252
+ if (case$error) {
253
+ stop("Error: No markers found in case '", info$name, "'.")
254
+ } else {
255
+ log$warn("! Warning: No markers found in case '", info$name, "'.")
256
+ reporter$add2(
257
+ list(
258
+ name = "Warning",
259
+ contents = list(list(kind = "error", content = "No markers found.", kind_ = "warning"))),
260
+ hs = c(info$section, info$name),
261
+ hs2 = "DEG Analysis",
262
+ ui = "tabs"
263
+ )
264
+ return(empty)
265
+ }
266
+ }
215
267
  markers$gene <- as.character(markers$gene)
268
+ markers$p_val_adj <- as.numeric(markers$p_val_adj)
269
+ markers$log2FC <- as.numeric(markers$log2FC)
216
270
  markers <- markers[order(markers$p_val_adj, -abs(markers$log2FC)), ]
217
271
 
218
272
  # Save markers
@@ -287,7 +341,7 @@ process_markers <- function(markers, info, case) {
287
341
  stop("Error: Not enough significant DEGs with '", case$sigmarkers, "' in case '", info$name, "' found (< 5) for enrichment analysis.")
288
342
  } else {
289
343
  message <- paste0("Not enough significant DEGs with '", case$sigmarkers, "' found (< 5) for enrichment analysis.")
290
- log$warn(" ! Error: {message}")
344
+ log$warn("! Error: {message}")
291
345
  reporter$add2(
292
346
  list(
293
347
  name = "Warning",
@@ -345,7 +399,7 @@ process_markers <- function(markers, info, case) {
345
399
  if (case$error) {
346
400
  stop("Error: ", e$message)
347
401
  } else {
348
- log$warn(" ! Error: {e$message}")
402
+ log$warn("! Error: {e$message}")
349
403
  reporter$add2(
350
404
  list(
351
405
  name = "Warning",
@@ -478,6 +532,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
478
532
 
479
533
  run_case <- function(name) {
480
534
  case <- cases[[name]]
535
+ log$info("----------------------------------------")
481
536
  log$info("Case: {name} ...")
482
537
 
483
538
  case <- extract_vars(
@@ -485,18 +540,21 @@ run_case <- function(name) {
485
540
  "dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
486
541
  "overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style",
487
542
  "aggregate_by", "subset", "layer", "assay", "group_by", "ident_1", "ident_2", "original_subset",
488
- "paired_by", "tool", "error",
543
+ "paired_by", "tool", "error", "ncores", "cache", "allexprs",
489
544
  allow_nonexisting = TRUE
490
545
  )
491
546
 
492
547
  if (!is.null(markers) || !is.null(enriches)) {
493
- if (!is.null(markers)) { # It is the overlap/allmarker case
494
- log$info("- Summarizing DEGs in subcases (by each: {each}) ...")
548
+ if (!is.null(markers) && length(markers) > 0) {
549
+ log$info("Summarizing DEGs in subcases (by each: {each}) ...")
495
550
  # handle the overlaps / allmarkers analysis here
496
551
  if (!is.data.frame(markers)) {
497
552
  each_levels <- names(markers)
498
553
  markers <- do_call(rbind, lapply(each_levels, function(x) {
499
554
  markers_df <- markers[[x]]
555
+ if (is.null(markers_df) || nrow(markers_df) == 0) {
556
+ return(NULL)
557
+ }
500
558
  if (nrow(markers_df) > 0) {
501
559
  markers_df[[each]] <- x
502
560
  } else {
@@ -508,17 +566,17 @@ run_case <- function(name) {
508
566
  }
509
567
  # gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
510
568
 
569
+ if (!is.data.frame(allexprs)) {
570
+ meta <- do_call(rbind, lapply(allexprs, attr, "meta"))
571
+ allexprs <- do_call(cbind, allexprs)
572
+ } else {
573
+ meta <- attr(allexprs, "meta")
574
+ }
575
+
511
576
  if (length(allmarker_plots) > 0) {
512
- log$info("- Visualizing all DEGs together ...")
513
- exprs <- AggregateExpressionPseudobulk(
514
- srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
515
- subset = original_subset, log = log
516
- )
517
- attr(markers, "object") <- AggregateExpressionPseudobulk(
518
- srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
519
- subset = original_subset, log = log
520
- )
521
- attr(markers, "meta") <- attr(exprs, "meta")
577
+ log$info("Visualizing all DEGs together ...")
578
+ attr(markers, "object") <- allexprs
579
+ attr(markers, "meta") <- meta
522
580
  attr(markers, "group_by") <- each
523
581
  attr(markers, "paired_by") <- paired_by
524
582
  attr(markers, "ident_1") <- NULL
@@ -527,18 +585,21 @@ run_case <- function(name) {
527
585
  }
528
586
 
529
587
  if (length(overlaps) > 0) {
530
- log$info("- Visualizing overlaps between subcases ...")
588
+ log$info("Visualizing overlaps between subcases ...")
531
589
  process_overlaps(markers, overlaps, name, each)
532
590
  }
533
591
 
534
592
  }
535
593
 
536
- if (!is.null(enriches)) {
537
- log$info("- Summarizing enrichments in subcases (by each: {each}) ...")
594
+ if (!is.null(enriches) && length(enriches) > 0) {
595
+ log$info("Summarizing enrichments in subcases (by each: {each}) ...")
538
596
  if (!is.data.frame(enriches)) {
539
597
  each_levels <- names(enriches)
540
598
  enriches <- do_call(rbind, lapply(each_levels, function(x) {
541
599
  enrich_df <- enriches[[x]]
600
+ if (is.null(enrich_df) || nrow(enrich_df) == 0) {
601
+ return(NULL)
602
+ }
542
603
  if (nrow(enrich_df) > 0) {
543
604
  enrich_df[[each]] <- x
544
605
  } else {
@@ -546,11 +607,13 @@ run_case <- function(name) {
546
607
  }
547
608
  enrich_df
548
609
  }))
549
- enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
610
+ if (!is.null(enriches) && nrow(enriches) > 0) {
611
+ enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
612
+ }
550
613
  }
551
614
 
552
- if (length(allenrich_plots) > 0) {
553
- log$info("- Visualizing all enrichments together ...")
615
+ if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
616
+ log$info("Visualizing all enrichments together ...")
554
617
  process_allenriches(enriches, allenrich_plots, name, each)
555
618
  }
556
619
  }
@@ -558,16 +621,36 @@ run_case <- function(name) {
558
621
  return(invisible())
559
622
  }
560
623
 
624
+ info <- case_info(name, outdir, create = TRUE)
561
625
  exprs <- AggregateExpressionPseudobulk(
562
626
  srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
563
627
  subset = subset, log = log
564
628
  )
565
- markers <- RunDEGAnalysis(
566
- exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
567
- paired_by = paired_by, tool = tool, log = log
629
+ markers <- tryCatch(
630
+ {
631
+ RunDEGAnalysis(
632
+ exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
633
+ paired_by = paired_by, tool = tool, log = log, ncores = ncores,
634
+ cache = cache
635
+ )
636
+ }, error = function(e) {
637
+ if (error) {
638
+ stop("Error: ", e$message)
639
+ } else {
640
+ log$warn("! Error: {e$message}")
641
+ reporter$add2(
642
+ list(
643
+ name = "Warning",
644
+ contents = list(list(kind = "error", content = e$message, kind_ = "warning"))),
645
+ hs = c(info$section, info$name),
646
+ hs2 = "DEG Analysis",
647
+ ui = "tabs"
648
+ )
649
+ return(invisible())
650
+ }
651
+ }
568
652
  )
569
653
 
570
- info <- case_info(name, outdir, create = TRUE)
571
654
  enrich <- process_markers(markers, info = info, case = list(
572
655
  dbs = dbs,
573
656
  sigmarkers = sigmarkers,
@@ -579,9 +662,12 @@ run_case <- function(name) {
579
662
  ))
580
663
 
581
664
  if (!is.null(original_case) && !is.null(cases[[original_case]])) {
582
- markers[[each_name]] <- each
665
+ if (!is.null(markers)) {
666
+ markers[[each_name]] <- each
667
+ }
583
668
  cases[[original_case]]$markers[[each]] <<- markers
584
669
  cases[[original_case]]$enriches[[each]] <<- enrich
670
+ cases[[original_case]]$allexprs[[each]] <<- exprs
585
671
  }
586
672
 
587
673
  invisible()
@@ -10,6 +10,7 @@ mutaters <- {{envs.mutaters | r}} # nolint
10
10
  group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}} # nolint
11
11
  ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}} # nolint
12
12
  ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}} # nolint
13
+ assay <- {{envs.assay | r}} # nolint
13
14
  each <- {{envs.each | r}} # nolint
14
15
  subset <- {{envs.subset | r}} # nolint
15
16
  gmtfile <- {{envs.gmtfile | r}} # nolint
@@ -33,9 +34,6 @@ alleach_plots <- lapply(alleach_plots, function(x) {
33
34
 
34
35
  log$info("Reading Seurat object ...")
35
36
  srtobj <- read_obj(srtfile)
36
- if (!"Identity" %in% colnames(srtobj@meta.data)) {
37
- srtobj@meta.data$Identity <- Idents(srtobj)
38
- }
39
37
 
40
38
  if (!is.null(mutaters) && length(mutaters) > 0) {
41
39
  log$info("Mutating metadata columns ...")
@@ -46,6 +44,7 @@ defaults <- list(
46
44
  group_by = group_by,
47
45
  ident_1 = ident_1,
48
46
  ident_2 = ident_2,
47
+ assay = assay,
49
48
  each = each,
50
49
  subset = subset,
51
50
  gmtfile = gmtfile,
@@ -63,7 +62,7 @@ defaults <- list(
63
62
  expand_each <- function(name, case) {
64
63
  outcases <- list()
65
64
 
66
- case$group_by <- case$group_by %||% "Identity"
65
+ case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
67
66
 
68
67
  if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
69
68
  if (length(case$alleach_plots) > 0) {
@@ -82,11 +81,13 @@ expand_each <- function(name, case) {
82
81
  }
83
82
 
84
83
  if (length(cases) == 0 && name == "GSEA") {
85
- name <- case$each
84
+ prefix <- case$each
85
+ } else {
86
+ prefix <- paste0(name, " (", case$each, ")")
86
87
  }
87
88
 
88
89
  for (each in eachs) {
89
- newname <- paste0(case$each, "::", each)
90
+ newname <- paste0(prefix, "::", each)
90
91
  newcase <- case
91
92
 
92
93
  newcase$original_case <- paste0(name, " (all ", case$each,")")
@@ -142,6 +143,11 @@ do_case <- function(name) {
142
143
 
143
144
  if (!is.null(case$gseas)) {
144
145
 
146
+ if (length(case$gseas) == 0) {
147
+ log$warn(" No GSEA results found for case {name}. Skipping.")
148
+ return(invisible(NULL))
149
+ }
150
+
145
151
  each_levels <- names(case$gseas)
146
152
  gseas <- do_call(rbind, lapply(each_levels, function(x) {
147
153
  gsea_df <- case$gseas[[x]]
@@ -226,7 +232,7 @@ do_case <- function(name) {
226
232
  case$ident_2 <- "Other"
227
233
  allclasses[allclasses != case$ident_1] <- "Other"
228
234
  }
229
- exprs <- GetAssayData(sobj, layer = "data")
235
+ exprs <- GetAssayData(sobj, layer = "data", assay = case$assay)
230
236
 
231
237
  # get preranks
232
238
  log$info(" Getting preranks...")
@@ -240,25 +246,16 @@ do_case <- function(name) {
240
246
  quote = FALSE
241
247
  )
242
248
  if (all(is.na(ranks))) {
243
- if (length(allclasses) < 100) {
244
- log$warn(" Ignoring this case because all gene ranks are NA and there are <100 cells.")
245
- reporter$add2(
246
- list(
247
- kind = "error",
248
- content = paste0("Not enough cells (n = ", length(allclasses), ") to run fgsea.")
249
- ),
250
- hs = c(info$section, info$name)
251
- )
252
- return(NULL)
253
- } else {
254
- stop(paste0(
255
- "All gene ranks are NA (# cells = ",
256
- length(allclasses),
257
- "). ",
258
- "It's probably due to high missing rate in the data. ",
259
- "You may want to try a different `envs$method` for pre-ranking."
260
- ))
261
- }
249
+ log$warn(" All gene ranks are NA. It's probably due to high missing rate in the data.")
250
+ log$warn(" Case ignored, you may also try a different ranking method.")
251
+ reporter$add2(
252
+ list(
253
+ kind = "error",
254
+ content = "All gene ranks are NA. It's probably due to high missing rate in the data."
255
+ ),
256
+ hs = c(info$section, info$name)
257
+ )
258
+ return(invisible(NULL))
262
259
  }
263
260
 
264
261
  # run fgsea
@@ -7,13 +7,21 @@ from diot import Diot # type: ignore[import]
7
7
  import scanpy as sc
8
8
  import scvelo as scv
9
9
  import numpy as np
10
+ import matplotlib
11
+ matplotlib.use('Agg')
10
12
  import matplotlib.pyplot as plt
11
- from biopipen.utils.misc import logger
13
+ from biopipen.utils.misc import logger, require_package
12
14
  from biopipen.scripts.scrna.seurat_anndata_conversion import (
13
15
  convert_seurat_to_anndata,
14
16
  convert_anndata_to_seurat,
15
17
  )
16
18
 
19
+ require_package("scvelo", ">=0.3.3")
20
+ from biopipen.scripts.scrna import scvelo_paga # noqa: F401
21
+
22
+ warnings.simplefilter("ignore", category=UserWarning)
23
+ warnings.simplefilter("ignore", category=FutureWarning)
24
+ warnings.simplefilter("ignore", category=DeprecationWarning)
17
25
 
18
26
 
19
27
  def SCVELO(
@@ -45,10 +53,6 @@ def SCVELO(
45
53
  dpi=100,
46
54
  fileprefix="",
47
55
  ):
48
- warnings.simplefilter("ignore", category=UserWarning)
49
- warnings.simplefilter("ignore", category=FutureWarning)
50
- warnings.simplefilter("ignore", category=DeprecationWarning)
51
-
52
56
  os.chdir(os.path.expanduser(dirpath))
53
57
  if linear_reduction is None:
54
58
  sc.pp.pca(adata, n_comps=n_pcs)
@@ -526,18 +530,26 @@ calculate_velocity_genes: bool = {{envs.calculate_velocity_genes | repr}} # pyr
526
530
  top_n: int = {{envs.top_n | repr}} # pyright: ignore # noqa: E999
527
531
  rscript: str = {{envs.rscript | repr}} # pyright: ignore # noqa: E999
528
532
 
529
- if group_by is None:
530
- raise ValueError("The 'envs.group_by' parameter must be specified.")
531
533
 
532
534
  if sobjfile.endswith(".h5ad"):
533
535
  h5ad_file = Path(sobjfile)
534
536
  else:
535
537
  h5ad_file = Path(outfile).with_suffix(".input.h5ad")
536
538
  logger.info("Converting Seurat object to AnnData (h5ad) format...")
537
- convert_seurat_to_anndata(
539
+ seurat_ident_col = convert_seurat_to_anndata(
538
540
  input_file=sobjfile,
539
541
  output_file=h5ad_file,
540
542
  rscript=rscript,
543
+ return_ident_col=not group_by,
544
+ )
545
+ group_by = group_by or seurat_ident_col
546
+
547
+ if group_by is None:
548
+ group_by = "seurat_clusters"
549
+ logger.warning(
550
+ "`envs.group_by` is not provided. "
551
+ "Using 'seurat_clusters' as the default groupby column. "
552
+ "It is recommended to provide the `envs.group_by` parameter."
541
553
  )
542
554
 
543
555
  logger.info(f"Reading AnnData (h5ad) file ...")
@@ -16,7 +16,7 @@ if (
16
16
  if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
17
17
  pref <- substring(key, 14)
18
18
  if (pref == "") {
19
- pref <- "seurat_clusters"
19
+ pref <- biopipen.utils::GetIdentityColumn(srtobj)
20
20
  }
21
21
 
22
22
  clustrees[[pref]] <- list(prefix = pref)
@@ -107,7 +107,12 @@ do_one_features <- function(name) {
107
107
  caching$restore()
108
108
  } else {
109
109
  case$features <- .get_features(features, case$object)
110
- p <- do_call(gglogger::register(FeatureStatPlot), case)
110
+ p <- tryCatch({
111
+ do_call(gglogger::register(FeatureStatPlot), case)
112
+ }, error = function(e) {
113
+ if (save_code) { stop(e) }
114
+ do_call(FeatureStatPlot, case)
115
+ })
111
116
  save_plot(p, info$prefix, devpars, formats = c("png", more_formats))
112
117
  if (save_code) {
113
118
  save_plotcode(p, info$prefix,
@@ -1,4 +1,5 @@
1
1
 
2
+ library(rlang)
2
3
  library(Seurat)
3
4
  library(biopipen.utils)
4
5
 
@@ -11,13 +12,16 @@ RunPCAArgs <- {{envs.RunPCA | r: todot="-"}}
11
12
  FindNeighborsArgs <- {{envs.FindNeighbors | r: todot="-"}}
12
13
  FindClustersArgs <- {{envs.FindClusters | r: todot="-"}}
13
14
  RunUMAPArgs <- {{envs.RunUMAP | r: todot="-"}}
15
+ ident <- {{envs.ident | r }}
14
16
  cache <- {{envs.cache | r}}
15
17
  ncores <- {{envs.ncores | r}}
16
18
 
19
+ FindClustersArgs$cluster.name <- FindClustersArgs$cluster.name %||% ident %||% "seurat_clusters"
20
+
17
21
  log <- get_logger()
18
22
 
19
23
  # options(str = strOptions(vec.len = 5, digits.d = 5))
20
- options(future.globals.maxSize = 80000 * 1024^2)
24
+ options(future.globals.maxSize = Inf)
21
25
  plan(strategy = "multicore", workers = ncores)
22
26
 
23
27
  log$info("Reading Seurat object ...")
@@ -25,7 +25,7 @@ plots = {{envs.plots | r}}
25
25
  log <- get_logger()
26
26
  reporter <- get_reporter()
27
27
 
28
- options(future.globals.maxSize = 8 * 1024 ^ 4)
28
+ options(future.globals.maxSize = Inf)
29
29
  options(future.rng.onMisuse="ignore")
30
30
  options(Seurat.object.assay.version = "v5")
31
31
 
@@ -43,7 +43,6 @@ if (isTRUE(cache)) {
43
43
  cache = joboutdir
44
44
  }
45
45
  if (is.null(split_by)) {
46
- options(future.globals.maxSize = 8 * 1024 ^ 4)
47
46
  future::plan(strategy = "multicore", workers = ncores)
48
47
  }
49
48
 
@@ -17,7 +17,7 @@ reporter <- get_reporter()
17
17
 
18
18
  set.seed(8525)
19
19
  # 8TB
20
- options(future.globals.maxSize = 8 * 1024 ^ 4)
20
+ options(future.globals.maxSize = Inf)
21
21
  options(future.rng.onMisuse="ignore")
22
22
  options(Seurat.object.assay.version = "v5")
23
23
  plan(strategy = "multicore", workers = envs$ncores)
@@ -38,19 +38,27 @@ reporter$add(
38
38
  h1 = "Filters and QC"
39
39
  )
40
40
 
41
- metadata <- read.table(
42
- metafile,
43
- header = TRUE,
44
- row.names = NULL,
45
- sep = "\t",
46
- check.names = FALSE
47
- )
41
+ metadata <- tryCatch({
42
+ log$debug("Trying to read Seurat object from metafile ...")
43
+ read_obj(metafile)
44
+ }, error = function(e) {
45
+ log$debug("Failed to read Seurat object from metafile: {e$message}")
46
+ log$debug("Reading metafile as a table (sample info) ...")
47
+ read.table(
48
+ metafile,
49
+ header = TRUE,
50
+ row.names = NULL,
51
+ sep = "\t",
52
+ check.names = FALSE
53
+ )
54
+ })
55
+ is_seurat <- inherits(metadata, "Seurat")
48
56
 
49
- meta_cols = colnames(metadata)
57
+ meta_cols <- if (is_seurat) colnames(metadata@meta.data) else colnames(metadata)
50
58
  if (!"Sample" %in% meta_cols) {
51
- stop("Error: Column `Sample` is not found in metafile.")
59
+ stop("Error: Column `Sample` is not found in ", ifelse(is_seurat, "Seurat object's meta.data.", "metafile."))
52
60
  }
53
- if (!"RNAData" %in% meta_cols) {
61
+ if (!"RNAData" %in% meta_cols && !is_seurat) {
54
62
  stop("Error: Column `RNAData` is not found in metafile.")
55
63
  }
56
64
 
@@ -17,7 +17,7 @@ FindNeighborsArgs <- {{envs.FindNeighbors | r: todot = "-"}}
17
17
  FindClustersArgs <- {{envs.FindClusters | r: todot = "-"}}
18
18
  cases <- {{envs.cases | r}}
19
19
 
20
- options(future.globals.maxSize = 80000 * 1024^2)
20
+ options(future.globals.maxSize = Inf)
21
21
  plan(strategy = "multicore", workers = ncores)
22
22
 
23
23
  log <- get_logger()
@@ -16,16 +16,14 @@ align_start <- {{envs.align_start | r}}
16
16
  seed <- {{envs.seed | r}}
17
17
 
18
18
  set.seed(seed)
19
- if (is.null(group_by)) {
20
- stop("envs.group_by is required")
21
- }
22
19
 
23
20
  log <- get_logger()
24
21
 
25
22
  log$info("Reading Seurat object ...")
26
23
  srt <- read_obj(sobjfile)
24
+ group_by <- group_by %||% biopipen.utils::GetIdentityColumn(srt)
27
25
 
28
- if (!group_by %in% colnames(srt@meta.data)) {
26
+ if (is.null(group_by) || !group_by %in% colnames(srt@meta.data)) {
29
27
  stop(paste("Grouping column", group_by, "not found in the Seurat object"))
30
28
  }
31
29
 
@@ -25,9 +25,6 @@ reporter <- get_reporter()
25
25
 
26
26
  log$info("Reading Seurat object ...")
27
27
  srtobj <- read_obj(srtfile)
28
- if (!"Identity" %in% colnames(srtobj@meta.data)) {
29
- srtobj@meta.data$Identity <- Idents(srtobj)
30
- }
31
28
  assay <- DefaultAssay(srtobj)
32
29
 
33
30
  if (!is.null(mutaters) && length(mutaters) > 0) {
@@ -171,7 +168,7 @@ run_case <- function(name) {
171
168
  } else {
172
169
  subobj <- srtobj
173
170
  }
174
- case$group_by <- case$group_by %||% "Identity"
171
+ case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
175
172
  if (is.null(case$ident)) {
176
173
  case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
177
174
  }