biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +4 -0
- biopipen/core/filters.py +1 -1
- biopipen/core/testing.py +2 -1
- biopipen/ns/cellranger.py +33 -3
- biopipen/ns/regulatory.py +4 -0
- biopipen/ns/scrna.py +548 -98
- biopipen/ns/scrna_metabolic_landscape.py +4 -0
- biopipen/ns/tcr.py +256 -16
- biopipen/ns/web.py +5 -0
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
- biopipen/reports/tcr/ClonalStats.svelte +1 -0
- biopipen/scripts/cellranger/CellRangerCount.py +55 -11
- biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
- biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
- biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
- biopipen/scripts/regulatory/motifs-common.R +3 -2
- biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
- biopipen/scripts/scrna/CellCellCommunication.py +26 -14
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +128 -30
- biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
- biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
- biopipen/scripts/scrna/ScFGSEA.R +23 -26
- biopipen/scripts/scrna/ScVelo.py +20 -8
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
- biopipen/scripts/scrna/SeuratClustering.R +5 -1
- biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
- biopipen/scripts/scrna/SeuratPreparing.R +19 -11
- biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
- biopipen/scripts/scrna/Slingshot.R +2 -4
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
- biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
- biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
- biopipen/scripts/tcr/ClonalStats.R +76 -35
- biopipen/utils/misc.py +104 -9
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- biopipen/utils/common_docstrs.py +0 -103
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
library(rlang)
|
|
2
2
|
library(dplyr)
|
|
3
3
|
library(plotthis)
|
|
4
|
+
library(Seurat)
|
|
4
5
|
library(biopipen.utils)
|
|
5
6
|
|
|
6
7
|
sobjfile <- {{in.sobjfile | r}}
|
|
@@ -8,7 +9,9 @@ outdir <- {{out.outdir | r}}
|
|
|
8
9
|
joboutdir <- {{job.outdir | r}}
|
|
9
10
|
each <- {{envs.each | r}}
|
|
10
11
|
subset <- {{envs.subset | r}}
|
|
12
|
+
ncores <- {{envs.ncores | r}}
|
|
11
13
|
mutaters <- {{envs.mutaters | r}}
|
|
14
|
+
cache <- {{ envs.cache | r }}
|
|
12
15
|
aggregate_by <- {{envs.aggregate_by | r}}
|
|
13
16
|
layer <- {{envs.layer | r}}
|
|
14
17
|
assay <- {{envs.assay | r}}
|
|
@@ -35,6 +38,7 @@ overlaps <- {{ envs.overlaps | r }}
|
|
|
35
38
|
cases <- {{envs.cases | r}}
|
|
36
39
|
|
|
37
40
|
aggregate_by <- unique(c(aggregate_by, group_by, paired_by, each))
|
|
41
|
+
if (isTRUE(cache)) { cache <- joboutdir }
|
|
38
42
|
|
|
39
43
|
log <- get_logger()
|
|
40
44
|
reporter <- get_reporter()
|
|
@@ -74,10 +78,12 @@ defaults <- list(
|
|
|
74
78
|
ident_1 = ident_1,
|
|
75
79
|
ident_2 = ident_2,
|
|
76
80
|
dbs = dbs,
|
|
81
|
+
ncores = ncores,
|
|
77
82
|
sigmarkers = sigmarkers,
|
|
78
83
|
enrich_style = enrich_style,
|
|
79
84
|
paired_by = paired_by,
|
|
80
85
|
tool = tool,
|
|
86
|
+
cache = cache,
|
|
81
87
|
allmarker_plots_defaults = allmarker_plots_defaults,
|
|
82
88
|
allmarker_plots = allmarker_plots,
|
|
83
89
|
allenrich_plots_defaults = allenrich_plots_defaults,
|
|
@@ -131,12 +137,14 @@ expand_each <- function(name, case) {
|
|
|
131
137
|
|
|
132
138
|
if (length(cases) == 0 && name == "DEG Analysis") {
|
|
133
139
|
name <- case$each
|
|
140
|
+
} else {
|
|
141
|
+
name <- paste0(name, " (", case$each, ")")
|
|
134
142
|
}
|
|
135
143
|
|
|
136
144
|
case$aggregate_by <- unique(c(case$aggregate_by, case$group_by, case$paired_by, case$each))
|
|
137
145
|
|
|
138
146
|
for (each in eachs) {
|
|
139
|
-
newname <- paste0(
|
|
147
|
+
newname <- paste0(name, "::", each)
|
|
140
148
|
newcase <- case
|
|
141
149
|
|
|
142
150
|
newcase$original_case <- name
|
|
@@ -179,6 +187,7 @@ expand_each <- function(name, case) {
|
|
|
179
187
|
if (length(case$overlaps) > 0 || length(case$allmarker_plots) > 0 || length(case$allenrich_plots) > 0) {
|
|
180
188
|
ovcase <- case
|
|
181
189
|
|
|
190
|
+
ovcase$allexprs <- list()
|
|
182
191
|
ovcase$markers <- list()
|
|
183
192
|
ovcase$allmarker_plots <- lapply(
|
|
184
193
|
ovcase$allmarker_plots,
|
|
@@ -212,7 +221,52 @@ process_markers <- function(markers, info, case) {
|
|
|
212
221
|
# markers <- markers %>%
|
|
213
222
|
# mutate(gene = as.character(gene)) %>%
|
|
214
223
|
# arrange(p_val_adj, desc(abs(avg_log2FC)))
|
|
224
|
+
|
|
225
|
+
empty <- if (case$enrich_style == "enrichr") {
|
|
226
|
+
data.frame(
|
|
227
|
+
Database = character(0),
|
|
228
|
+
Term = character(0),
|
|
229
|
+
Overlap = character(0),
|
|
230
|
+
P.value = numeric(0),
|
|
231
|
+
Adjusted.P.value = numeric(0),
|
|
232
|
+
Odds.Ratio = numeric(0),
|
|
233
|
+
Combined.Score = numeric(0),
|
|
234
|
+
Genes = character(0),
|
|
235
|
+
Rank = numeric(0)
|
|
236
|
+
)
|
|
237
|
+
} else { # clusterProfiler
|
|
238
|
+
data.frame(
|
|
239
|
+
ID = character(0),
|
|
240
|
+
Description = character(0),
|
|
241
|
+
GeneRatio = character(0),
|
|
242
|
+
BgRatio = character(0),
|
|
243
|
+
Count = integer(0),
|
|
244
|
+
pvalue = numeric(0),
|
|
245
|
+
p.adjust = numeric(0),
|
|
246
|
+
qvalue = numeric(0),
|
|
247
|
+
geneID = character(0),
|
|
248
|
+
Database = character(0)
|
|
249
|
+
)
|
|
250
|
+
}
|
|
251
|
+
if (is.null(markers) || nrow(markers) == 0) {
|
|
252
|
+
if (case$error) {
|
|
253
|
+
stop("Error: No markers found in case '", info$name, "'.")
|
|
254
|
+
} else {
|
|
255
|
+
log$warn("! Warning: No markers found in case '", info$name, "'.")
|
|
256
|
+
reporter$add2(
|
|
257
|
+
list(
|
|
258
|
+
name = "Warning",
|
|
259
|
+
contents = list(list(kind = "error", content = "No markers found.", kind_ = "warning"))),
|
|
260
|
+
hs = c(info$section, info$name),
|
|
261
|
+
hs2 = "DEG Analysis",
|
|
262
|
+
ui = "tabs"
|
|
263
|
+
)
|
|
264
|
+
return(empty)
|
|
265
|
+
}
|
|
266
|
+
}
|
|
215
267
|
markers$gene <- as.character(markers$gene)
|
|
268
|
+
markers$p_val_adj <- as.numeric(markers$p_val_adj)
|
|
269
|
+
markers$log2FC <- as.numeric(markers$log2FC)
|
|
216
270
|
markers <- markers[order(markers$p_val_adj, -abs(markers$log2FC)), ]
|
|
217
271
|
|
|
218
272
|
# Save markers
|
|
@@ -287,7 +341,7 @@ process_markers <- function(markers, info, case) {
|
|
|
287
341
|
stop("Error: Not enough significant DEGs with '", case$sigmarkers, "' in case '", info$name, "' found (< 5) for enrichment analysis.")
|
|
288
342
|
} else {
|
|
289
343
|
message <- paste0("Not enough significant DEGs with '", case$sigmarkers, "' found (< 5) for enrichment analysis.")
|
|
290
|
-
log$warn("
|
|
344
|
+
log$warn("! Error: {message}")
|
|
291
345
|
reporter$add2(
|
|
292
346
|
list(
|
|
293
347
|
name = "Warning",
|
|
@@ -345,7 +399,7 @@ process_markers <- function(markers, info, case) {
|
|
|
345
399
|
if (case$error) {
|
|
346
400
|
stop("Error: ", e$message)
|
|
347
401
|
} else {
|
|
348
|
-
log$warn("
|
|
402
|
+
log$warn("! Error: {e$message}")
|
|
349
403
|
reporter$add2(
|
|
350
404
|
list(
|
|
351
405
|
name = "Warning",
|
|
@@ -478,6 +532,7 @@ process_overlaps <- function(markers, ovcases, casename, groupname) {
|
|
|
478
532
|
|
|
479
533
|
run_case <- function(name) {
|
|
480
534
|
case <- cases[[name]]
|
|
535
|
+
log$info("----------------------------------------")
|
|
481
536
|
log$info("Case: {name} ...")
|
|
482
537
|
|
|
483
538
|
case <- extract_vars(
|
|
@@ -485,18 +540,21 @@ run_case <- function(name) {
|
|
|
485
540
|
"dbs", "sigmarkers", "allmarker_plots", "allenrich_plots", "marker_plots", "enrich_plots",
|
|
486
541
|
"overlaps", "original_case", "markers", "enriches", "each_name", "each", "enrich_style",
|
|
487
542
|
"aggregate_by", "subset", "layer", "assay", "group_by", "ident_1", "ident_2", "original_subset",
|
|
488
|
-
"paired_by", "tool", "error",
|
|
543
|
+
"paired_by", "tool", "error", "ncores", "cache", "allexprs",
|
|
489
544
|
allow_nonexisting = TRUE
|
|
490
545
|
)
|
|
491
546
|
|
|
492
547
|
if (!is.null(markers) || !is.null(enriches)) {
|
|
493
|
-
if (!is.null(markers)
|
|
494
|
-
log$info("
|
|
548
|
+
if (!is.null(markers) && length(markers) > 0) {
|
|
549
|
+
log$info("Summarizing DEGs in subcases (by each: {each}) ...")
|
|
495
550
|
# handle the overlaps / allmarkers analysis here
|
|
496
551
|
if (!is.data.frame(markers)) {
|
|
497
552
|
each_levels <- names(markers)
|
|
498
553
|
markers <- do_call(rbind, lapply(each_levels, function(x) {
|
|
499
554
|
markers_df <- markers[[x]]
|
|
555
|
+
if (is.null(markers_df) || nrow(markers_df) == 0) {
|
|
556
|
+
return(NULL)
|
|
557
|
+
}
|
|
500
558
|
if (nrow(markers_df) > 0) {
|
|
501
559
|
markers_df[[each]] <- x
|
|
502
560
|
} else {
|
|
@@ -508,17 +566,17 @@ run_case <- function(name) {
|
|
|
508
566
|
}
|
|
509
567
|
# gene, p_val, avg_log2FC, pct.1, pct.2, p_val_adj, diff_pct, <each>
|
|
510
568
|
|
|
569
|
+
if (!is.data.frame(allexprs)) {
|
|
570
|
+
meta <- do_call(rbind, lapply(allexprs, attr, "meta"))
|
|
571
|
+
allexprs <- do_call(cbind, allexprs)
|
|
572
|
+
} else {
|
|
573
|
+
meta <- attr(allexprs, "meta")
|
|
574
|
+
}
|
|
575
|
+
|
|
511
576
|
if (length(allmarker_plots) > 0) {
|
|
512
|
-
log$info("
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
subset = original_subset, log = log
|
|
516
|
-
)
|
|
517
|
-
attr(markers, "object") <- AggregateExpressionPseudobulk(
|
|
518
|
-
srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
|
|
519
|
-
subset = original_subset, log = log
|
|
520
|
-
)
|
|
521
|
-
attr(markers, "meta") <- attr(exprs, "meta")
|
|
577
|
+
log$info("Visualizing all DEGs together ...")
|
|
578
|
+
attr(markers, "object") <- allexprs
|
|
579
|
+
attr(markers, "meta") <- meta
|
|
522
580
|
attr(markers, "group_by") <- each
|
|
523
581
|
attr(markers, "paired_by") <- paired_by
|
|
524
582
|
attr(markers, "ident_1") <- NULL
|
|
@@ -527,18 +585,21 @@ run_case <- function(name) {
|
|
|
527
585
|
}
|
|
528
586
|
|
|
529
587
|
if (length(overlaps) > 0) {
|
|
530
|
-
log$info("
|
|
588
|
+
log$info("Visualizing overlaps between subcases ...")
|
|
531
589
|
process_overlaps(markers, overlaps, name, each)
|
|
532
590
|
}
|
|
533
591
|
|
|
534
592
|
}
|
|
535
593
|
|
|
536
|
-
if (!is.null(enriches)) {
|
|
537
|
-
log$info("
|
|
594
|
+
if (!is.null(enriches) && length(enriches) > 0) {
|
|
595
|
+
log$info("Summarizing enrichments in subcases (by each: {each}) ...")
|
|
538
596
|
if (!is.data.frame(enriches)) {
|
|
539
597
|
each_levels <- names(enriches)
|
|
540
598
|
enriches <- do_call(rbind, lapply(each_levels, function(x) {
|
|
541
599
|
enrich_df <- enriches[[x]]
|
|
600
|
+
if (is.null(enrich_df) || nrow(enrich_df) == 0) {
|
|
601
|
+
return(NULL)
|
|
602
|
+
}
|
|
542
603
|
if (nrow(enrich_df) > 0) {
|
|
543
604
|
enrich_df[[each]] <- x
|
|
544
605
|
} else {
|
|
@@ -546,11 +607,13 @@ run_case <- function(name) {
|
|
|
546
607
|
}
|
|
547
608
|
enrich_df
|
|
548
609
|
}))
|
|
549
|
-
enriches
|
|
610
|
+
if (!is.null(enriches) && nrow(enriches) > 0) {
|
|
611
|
+
enriches[[each]] <- factor(enriches[[each]], levels = each_levels)
|
|
612
|
+
}
|
|
550
613
|
}
|
|
551
614
|
|
|
552
|
-
if (length(allenrich_plots) > 0) {
|
|
553
|
-
log$info("
|
|
615
|
+
if (length(allenrich_plots) > 0 && !is.null(enriches) && nrow(enriches) > 0) {
|
|
616
|
+
log$info("Visualizing all enrichments together ...")
|
|
554
617
|
process_allenriches(enriches, allenrich_plots, name, each)
|
|
555
618
|
}
|
|
556
619
|
}
|
|
@@ -558,16 +621,36 @@ run_case <- function(name) {
|
|
|
558
621
|
return(invisible())
|
|
559
622
|
}
|
|
560
623
|
|
|
624
|
+
info <- case_info(name, outdir, create = TRUE)
|
|
561
625
|
exprs <- AggregateExpressionPseudobulk(
|
|
562
626
|
srtobj, aggregate_by = aggregate_by, layer = layer, assay = assay,
|
|
563
627
|
subset = subset, log = log
|
|
564
628
|
)
|
|
565
|
-
markers <-
|
|
566
|
-
|
|
567
|
-
|
|
629
|
+
markers <- tryCatch(
|
|
630
|
+
{
|
|
631
|
+
RunDEGAnalysis(
|
|
632
|
+
exprs, group_by = group_by, ident_1 = ident_1, ident_2 = ident_2,
|
|
633
|
+
paired_by = paired_by, tool = tool, log = log, ncores = ncores,
|
|
634
|
+
cache = cache
|
|
635
|
+
)
|
|
636
|
+
}, error = function(e) {
|
|
637
|
+
if (error) {
|
|
638
|
+
stop("Error: ", e$message)
|
|
639
|
+
} else {
|
|
640
|
+
log$warn("! Error: {e$message}")
|
|
641
|
+
reporter$add2(
|
|
642
|
+
list(
|
|
643
|
+
name = "Warning",
|
|
644
|
+
contents = list(list(kind = "error", content = e$message, kind_ = "warning"))),
|
|
645
|
+
hs = c(info$section, info$name),
|
|
646
|
+
hs2 = "DEG Analysis",
|
|
647
|
+
ui = "tabs"
|
|
648
|
+
)
|
|
649
|
+
return(invisible())
|
|
650
|
+
}
|
|
651
|
+
}
|
|
568
652
|
)
|
|
569
653
|
|
|
570
|
-
info <- case_info(name, outdir, create = TRUE)
|
|
571
654
|
enrich <- process_markers(markers, info = info, case = list(
|
|
572
655
|
dbs = dbs,
|
|
573
656
|
sigmarkers = sigmarkers,
|
|
@@ -579,9 +662,12 @@ run_case <- function(name) {
|
|
|
579
662
|
))
|
|
580
663
|
|
|
581
664
|
if (!is.null(original_case) && !is.null(cases[[original_case]])) {
|
|
582
|
-
markers
|
|
665
|
+
if (!is.null(markers)) {
|
|
666
|
+
markers[[each_name]] <- each
|
|
667
|
+
}
|
|
583
668
|
cases[[original_case]]$markers[[each]] <<- markers
|
|
584
669
|
cases[[original_case]]$enriches[[each]] <<- enrich
|
|
670
|
+
cases[[original_case]]$allexprs[[each]] <<- exprs
|
|
585
671
|
}
|
|
586
672
|
|
|
587
673
|
invisible()
|
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -10,6 +10,7 @@ mutaters <- {{envs.mutaters | r}} # nolint
|
|
|
10
10
|
group_by <- {{envs.group_by | default: envs["group-by"] | default: None | r}} # nolint
|
|
11
11
|
ident_1 <- {{envs.ident_1 | default: envs["ident-1"] | default: None | r}} # nolint
|
|
12
12
|
ident_2 <- {{envs.ident_2 | default: envs["ident-2"] | default: None | r}} # nolint
|
|
13
|
+
assay <- {{envs.assay | r}} # nolint
|
|
13
14
|
each <- {{envs.each | r}} # nolint
|
|
14
15
|
subset <- {{envs.subset | r}} # nolint
|
|
15
16
|
gmtfile <- {{envs.gmtfile | r}} # nolint
|
|
@@ -33,9 +34,6 @@ alleach_plots <- lapply(alleach_plots, function(x) {
|
|
|
33
34
|
|
|
34
35
|
log$info("Reading Seurat object ...")
|
|
35
36
|
srtobj <- read_obj(srtfile)
|
|
36
|
-
if (!"Identity" %in% colnames(srtobj@meta.data)) {
|
|
37
|
-
srtobj@meta.data$Identity <- Idents(srtobj)
|
|
38
|
-
}
|
|
39
37
|
|
|
40
38
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
41
39
|
log$info("Mutating metadata columns ...")
|
|
@@ -46,6 +44,7 @@ defaults <- list(
|
|
|
46
44
|
group_by = group_by,
|
|
47
45
|
ident_1 = ident_1,
|
|
48
46
|
ident_2 = ident_2,
|
|
47
|
+
assay = assay,
|
|
49
48
|
each = each,
|
|
50
49
|
subset = subset,
|
|
51
50
|
gmtfile = gmtfile,
|
|
@@ -63,7 +62,7 @@ defaults <- list(
|
|
|
63
62
|
expand_each <- function(name, case) {
|
|
64
63
|
outcases <- list()
|
|
65
64
|
|
|
66
|
-
case$group_by <- case$group_by %||%
|
|
65
|
+
case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
|
|
67
66
|
|
|
68
67
|
if (is.null(case$each) || is.na(case$each) || nchar(case$each) == 0 || isFALSE(each)) {
|
|
69
68
|
if (length(case$alleach_plots) > 0) {
|
|
@@ -82,11 +81,13 @@ expand_each <- function(name, case) {
|
|
|
82
81
|
}
|
|
83
82
|
|
|
84
83
|
if (length(cases) == 0 && name == "GSEA") {
|
|
85
|
-
|
|
84
|
+
prefix <- case$each
|
|
85
|
+
} else {
|
|
86
|
+
prefix <- paste0(name, " (", case$each, ")")
|
|
86
87
|
}
|
|
87
88
|
|
|
88
89
|
for (each in eachs) {
|
|
89
|
-
newname <- paste0(
|
|
90
|
+
newname <- paste0(prefix, "::", each)
|
|
90
91
|
newcase <- case
|
|
91
92
|
|
|
92
93
|
newcase$original_case <- paste0(name, " (all ", case$each,")")
|
|
@@ -142,6 +143,11 @@ do_case <- function(name) {
|
|
|
142
143
|
|
|
143
144
|
if (!is.null(case$gseas)) {
|
|
144
145
|
|
|
146
|
+
if (length(case$gseas) == 0) {
|
|
147
|
+
log$warn(" No GSEA results found for case {name}. Skipping.")
|
|
148
|
+
return(invisible(NULL))
|
|
149
|
+
}
|
|
150
|
+
|
|
145
151
|
each_levels <- names(case$gseas)
|
|
146
152
|
gseas <- do_call(rbind, lapply(each_levels, function(x) {
|
|
147
153
|
gsea_df <- case$gseas[[x]]
|
|
@@ -226,7 +232,7 @@ do_case <- function(name) {
|
|
|
226
232
|
case$ident_2 <- "Other"
|
|
227
233
|
allclasses[allclasses != case$ident_1] <- "Other"
|
|
228
234
|
}
|
|
229
|
-
exprs <- GetAssayData(sobj, layer = "data")
|
|
235
|
+
exprs <- GetAssayData(sobj, layer = "data", assay = case$assay)
|
|
230
236
|
|
|
231
237
|
# get preranks
|
|
232
238
|
log$info(" Getting preranks...")
|
|
@@ -240,25 +246,16 @@ do_case <- function(name) {
|
|
|
240
246
|
quote = FALSE
|
|
241
247
|
)
|
|
242
248
|
if (all(is.na(ranks))) {
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
} else {
|
|
254
|
-
stop(paste0(
|
|
255
|
-
"All gene ranks are NA (# cells = ",
|
|
256
|
-
length(allclasses),
|
|
257
|
-
"). ",
|
|
258
|
-
"It's probably due to high missing rate in the data. ",
|
|
259
|
-
"You may want to try a different `envs$method` for pre-ranking."
|
|
260
|
-
))
|
|
261
|
-
}
|
|
249
|
+
log$warn(" All gene ranks are NA. It's probably due to high missing rate in the data.")
|
|
250
|
+
log$warn(" Case ignored, you may also try a different ranking method.")
|
|
251
|
+
reporter$add2(
|
|
252
|
+
list(
|
|
253
|
+
kind = "error",
|
|
254
|
+
content = "All gene ranks are NA. It's probably due to high missing rate in the data."
|
|
255
|
+
),
|
|
256
|
+
hs = c(info$section, info$name)
|
|
257
|
+
)
|
|
258
|
+
return(invisible(NULL))
|
|
262
259
|
}
|
|
263
260
|
|
|
264
261
|
# run fgsea
|
biopipen/scripts/scrna/ScVelo.py
CHANGED
|
@@ -7,13 +7,21 @@ from diot import Diot # type: ignore[import]
|
|
|
7
7
|
import scanpy as sc
|
|
8
8
|
import scvelo as scv
|
|
9
9
|
import numpy as np
|
|
10
|
+
import matplotlib
|
|
11
|
+
matplotlib.use('Agg')
|
|
10
12
|
import matplotlib.pyplot as plt
|
|
11
|
-
from biopipen.utils.misc import logger
|
|
13
|
+
from biopipen.utils.misc import logger, require_package
|
|
12
14
|
from biopipen.scripts.scrna.seurat_anndata_conversion import (
|
|
13
15
|
convert_seurat_to_anndata,
|
|
14
16
|
convert_anndata_to_seurat,
|
|
15
17
|
)
|
|
16
18
|
|
|
19
|
+
require_package("scvelo", ">=0.3.3")
|
|
20
|
+
from biopipen.scripts.scrna import scvelo_paga # noqa: F401
|
|
21
|
+
|
|
22
|
+
warnings.simplefilter("ignore", category=UserWarning)
|
|
23
|
+
warnings.simplefilter("ignore", category=FutureWarning)
|
|
24
|
+
warnings.simplefilter("ignore", category=DeprecationWarning)
|
|
17
25
|
|
|
18
26
|
|
|
19
27
|
def SCVELO(
|
|
@@ -45,10 +53,6 @@ def SCVELO(
|
|
|
45
53
|
dpi=100,
|
|
46
54
|
fileprefix="",
|
|
47
55
|
):
|
|
48
|
-
warnings.simplefilter("ignore", category=UserWarning)
|
|
49
|
-
warnings.simplefilter("ignore", category=FutureWarning)
|
|
50
|
-
warnings.simplefilter("ignore", category=DeprecationWarning)
|
|
51
|
-
|
|
52
56
|
os.chdir(os.path.expanduser(dirpath))
|
|
53
57
|
if linear_reduction is None:
|
|
54
58
|
sc.pp.pca(adata, n_comps=n_pcs)
|
|
@@ -526,18 +530,26 @@ calculate_velocity_genes: bool = {{envs.calculate_velocity_genes | repr}} # pyr
|
|
|
526
530
|
top_n: int = {{envs.top_n | repr}} # pyright: ignore # noqa: E999
|
|
527
531
|
rscript: str = {{envs.rscript | repr}} # pyright: ignore # noqa: E999
|
|
528
532
|
|
|
529
|
-
if group_by is None:
|
|
530
|
-
raise ValueError("The 'envs.group_by' parameter must be specified.")
|
|
531
533
|
|
|
532
534
|
if sobjfile.endswith(".h5ad"):
|
|
533
535
|
h5ad_file = Path(sobjfile)
|
|
534
536
|
else:
|
|
535
537
|
h5ad_file = Path(outfile).with_suffix(".input.h5ad")
|
|
536
538
|
logger.info("Converting Seurat object to AnnData (h5ad) format...")
|
|
537
|
-
convert_seurat_to_anndata(
|
|
539
|
+
seurat_ident_col = convert_seurat_to_anndata(
|
|
538
540
|
input_file=sobjfile,
|
|
539
541
|
output_file=h5ad_file,
|
|
540
542
|
rscript=rscript,
|
|
543
|
+
return_ident_col=not group_by,
|
|
544
|
+
)
|
|
545
|
+
group_by = group_by or seurat_ident_col
|
|
546
|
+
|
|
547
|
+
if group_by is None:
|
|
548
|
+
group_by = "seurat_clusters"
|
|
549
|
+
logger.warning(
|
|
550
|
+
"`envs.group_by` is not provided. "
|
|
551
|
+
"Using 'seurat_clusters' as the default groupby column. "
|
|
552
|
+
"It is recommended to provide the `envs.group_by` parameter."
|
|
541
553
|
)
|
|
542
554
|
|
|
543
555
|
logger.info(f"Reading AnnData (h5ad) file ...")
|
|
@@ -16,7 +16,7 @@ if (
|
|
|
16
16
|
if (startsWith(key, "FindClusters") && length(srtobj@commands[[key]]$resolution) > 1) {
|
|
17
17
|
pref <- substring(key, 14)
|
|
18
18
|
if (pref == "") {
|
|
19
|
-
pref <-
|
|
19
|
+
pref <- biopipen.utils::GetIdentityColumn(srtobj)
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
clustrees[[pref]] <- list(prefix = pref)
|
|
@@ -107,7 +107,12 @@ do_one_features <- function(name) {
|
|
|
107
107
|
caching$restore()
|
|
108
108
|
} else {
|
|
109
109
|
case$features <- .get_features(features, case$object)
|
|
110
|
-
p <-
|
|
110
|
+
p <- tryCatch({
|
|
111
|
+
do_call(gglogger::register(FeatureStatPlot), case)
|
|
112
|
+
}, error = function(e) {
|
|
113
|
+
if (save_code) { stop(e) }
|
|
114
|
+
do_call(FeatureStatPlot, case)
|
|
115
|
+
})
|
|
111
116
|
save_plot(p, info$prefix, devpars, formats = c("png", more_formats))
|
|
112
117
|
if (save_code) {
|
|
113
118
|
save_plotcode(p, info$prefix,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
|
|
2
|
+
library(rlang)
|
|
2
3
|
library(Seurat)
|
|
3
4
|
library(biopipen.utils)
|
|
4
5
|
|
|
@@ -11,13 +12,16 @@ RunPCAArgs <- {{envs.RunPCA | r: todot="-"}}
|
|
|
11
12
|
FindNeighborsArgs <- {{envs.FindNeighbors | r: todot="-"}}
|
|
12
13
|
FindClustersArgs <- {{envs.FindClusters | r: todot="-"}}
|
|
13
14
|
RunUMAPArgs <- {{envs.RunUMAP | r: todot="-"}}
|
|
15
|
+
ident <- {{envs.ident | r }}
|
|
14
16
|
cache <- {{envs.cache | r}}
|
|
15
17
|
ncores <- {{envs.ncores | r}}
|
|
16
18
|
|
|
19
|
+
FindClustersArgs$cluster.name <- FindClustersArgs$cluster.name %||% ident %||% "seurat_clusters"
|
|
20
|
+
|
|
17
21
|
log <- get_logger()
|
|
18
22
|
|
|
19
23
|
# options(str = strOptions(vec.len = 5, digits.d = 5))
|
|
20
|
-
options(future.globals.maxSize =
|
|
24
|
+
options(future.globals.maxSize = Inf)
|
|
21
25
|
plan(strategy = "multicore", workers = ncores)
|
|
22
26
|
|
|
23
27
|
log$info("Reading Seurat object ...")
|
|
@@ -25,7 +25,7 @@ plots = {{envs.plots | r}}
|
|
|
25
25
|
log <- get_logger()
|
|
26
26
|
reporter <- get_reporter()
|
|
27
27
|
|
|
28
|
-
options(future.globals.maxSize =
|
|
28
|
+
options(future.globals.maxSize = Inf)
|
|
29
29
|
options(future.rng.onMisuse="ignore")
|
|
30
30
|
options(Seurat.object.assay.version = "v5")
|
|
31
31
|
|
|
@@ -43,7 +43,6 @@ if (isTRUE(cache)) {
|
|
|
43
43
|
cache = joboutdir
|
|
44
44
|
}
|
|
45
45
|
if (is.null(split_by)) {
|
|
46
|
-
options(future.globals.maxSize = 8 * 1024 ^ 4)
|
|
47
46
|
future::plan(strategy = "multicore", workers = ncores)
|
|
48
47
|
}
|
|
49
48
|
|
|
@@ -17,7 +17,7 @@ reporter <- get_reporter()
|
|
|
17
17
|
|
|
18
18
|
set.seed(8525)
|
|
19
19
|
# 8TB
|
|
20
|
-
options(future.globals.maxSize =
|
|
20
|
+
options(future.globals.maxSize = Inf)
|
|
21
21
|
options(future.rng.onMisuse="ignore")
|
|
22
22
|
options(Seurat.object.assay.version = "v5")
|
|
23
23
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
@@ -38,19 +38,27 @@ reporter$add(
|
|
|
38
38
|
h1 = "Filters and QC"
|
|
39
39
|
)
|
|
40
40
|
|
|
41
|
-
metadata <-
|
|
42
|
-
metafile
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
41
|
+
metadata <- tryCatch({
|
|
42
|
+
log$debug("Trying to read Seurat object from metafile ...")
|
|
43
|
+
read_obj(metafile)
|
|
44
|
+
}, error = function(e) {
|
|
45
|
+
log$debug("Failed to read Seurat object from metafile: {e$message}")
|
|
46
|
+
log$debug("Reading metafile as a table (sample info) ...")
|
|
47
|
+
read.table(
|
|
48
|
+
metafile,
|
|
49
|
+
header = TRUE,
|
|
50
|
+
row.names = NULL,
|
|
51
|
+
sep = "\t",
|
|
52
|
+
check.names = FALSE
|
|
53
|
+
)
|
|
54
|
+
})
|
|
55
|
+
is_seurat <- inherits(metadata, "Seurat")
|
|
48
56
|
|
|
49
|
-
meta_cols
|
|
57
|
+
meta_cols <- if (is_seurat) colnames(metadata@meta.data) else colnames(metadata)
|
|
50
58
|
if (!"Sample" %in% meta_cols) {
|
|
51
|
-
stop("Error: Column `Sample` is not found in metafile.")
|
|
59
|
+
stop("Error: Column `Sample` is not found in ", ifelse(is_seurat, "Seurat object's meta.data.", "metafile."))
|
|
52
60
|
}
|
|
53
|
-
if (!"RNAData" %in% meta_cols) {
|
|
61
|
+
if (!"RNAData" %in% meta_cols && !is_seurat) {
|
|
54
62
|
stop("Error: Column `RNAData` is not found in metafile.")
|
|
55
63
|
}
|
|
56
64
|
|
|
@@ -17,7 +17,7 @@ FindNeighborsArgs <- {{envs.FindNeighbors | r: todot = "-"}}
|
|
|
17
17
|
FindClustersArgs <- {{envs.FindClusters | r: todot = "-"}}
|
|
18
18
|
cases <- {{envs.cases | r}}
|
|
19
19
|
|
|
20
|
-
options(future.globals.maxSize =
|
|
20
|
+
options(future.globals.maxSize = Inf)
|
|
21
21
|
plan(strategy = "multicore", workers = ncores)
|
|
22
22
|
|
|
23
23
|
log <- get_logger()
|
|
@@ -16,16 +16,14 @@ align_start <- {{envs.align_start | r}}
|
|
|
16
16
|
seed <- {{envs.seed | r}}
|
|
17
17
|
|
|
18
18
|
set.seed(seed)
|
|
19
|
-
if (is.null(group_by)) {
|
|
20
|
-
stop("envs.group_by is required")
|
|
21
|
-
}
|
|
22
19
|
|
|
23
20
|
log <- get_logger()
|
|
24
21
|
|
|
25
22
|
log$info("Reading Seurat object ...")
|
|
26
23
|
srt <- read_obj(sobjfile)
|
|
24
|
+
group_by <- group_by %||% biopipen.utils::GetIdentityColumn(srt)
|
|
27
25
|
|
|
28
|
-
if (!group_by %in% colnames(srt@meta.data)) {
|
|
26
|
+
if (is.null(group_by) || !group_by %in% colnames(srt@meta.data)) {
|
|
29
27
|
stop(paste("Grouping column", group_by, "not found in the Seurat object"))
|
|
30
28
|
}
|
|
31
29
|
|
|
@@ -25,9 +25,6 @@ reporter <- get_reporter()
|
|
|
25
25
|
|
|
26
26
|
log$info("Reading Seurat object ...")
|
|
27
27
|
srtobj <- read_obj(srtfile)
|
|
28
|
-
if (!"Identity" %in% colnames(srtobj@meta.data)) {
|
|
29
|
-
srtobj@meta.data$Identity <- Idents(srtobj)
|
|
30
|
-
}
|
|
31
28
|
assay <- DefaultAssay(srtobj)
|
|
32
29
|
|
|
33
30
|
if (!is.null(mutaters) && length(mutaters) > 0) {
|
|
@@ -171,7 +168,7 @@ run_case <- function(name) {
|
|
|
171
168
|
} else {
|
|
172
169
|
subobj <- srtobj
|
|
173
170
|
}
|
|
174
|
-
case$group_by <- case$group_by %||%
|
|
171
|
+
case$group_by <- case$group_by %||% GetIdentityColumn(srtobj)
|
|
175
172
|
if (is.null(case$ident)) {
|
|
176
173
|
case$ident <- as.character(unique(subobj@meta.data[[case$group_by]]))
|
|
177
174
|
}
|