biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +4 -0
- biopipen/core/filters.py +1 -1
- biopipen/core/testing.py +2 -1
- biopipen/ns/cellranger.py +33 -3
- biopipen/ns/regulatory.py +4 -0
- biopipen/ns/scrna.py +548 -98
- biopipen/ns/scrna_metabolic_landscape.py +4 -0
- biopipen/ns/tcr.py +256 -16
- biopipen/ns/web.py +5 -0
- biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
- biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
- biopipen/reports/tcr/ClonalStats.svelte +1 -0
- biopipen/scripts/cellranger/CellRangerCount.py +55 -11
- biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
- biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
- biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
- biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
- biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
- biopipen/scripts/regulatory/motifs-common.R +3 -2
- biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
- biopipen/scripts/scrna/CellCellCommunication.py +26 -14
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
- biopipen/scripts/scrna/CellSNPLite.py +30 -0
- biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
- biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
- biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
- biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
- biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
- biopipen/scripts/scrna/MQuad.py +25 -0
- biopipen/scripts/scrna/MarkersFinder.R +128 -30
- biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
- biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
- biopipen/scripts/scrna/ScFGSEA.R +23 -26
- biopipen/scripts/scrna/ScVelo.py +20 -8
- biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
- biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
- biopipen/scripts/scrna/SeuratClustering.R +5 -1
- biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
- biopipen/scripts/scrna/SeuratPreparing.R +19 -11
- biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
- biopipen/scripts/scrna/Slingshot.R +2 -4
- biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
- biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
- biopipen/scripts/scrna/scvelo_paga.py +313 -0
- biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
- biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
- biopipen/scripts/tcr/ClonalStats.R +76 -35
- biopipen/utils/misc.py +104 -9
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
- biopipen/utils/common_docstrs.py +0 -103
- {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
|
@@ -1,19 +1,24 @@
|
|
|
1
|
-
import
|
|
1
|
+
import hashlib
|
|
2
|
+
import shutil
|
|
2
3
|
import re
|
|
4
|
+
from contextlib import suppress
|
|
3
5
|
from pathlib import Path, PosixPath # noqa: F401
|
|
4
6
|
from biopipen.utils.misc import run_command
|
|
5
7
|
|
|
6
8
|
fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
|
|
7
|
-
outdir:
|
|
9
|
+
outdir: Path = Path({{out.outdir | quote}}) # pyright: ignore
|
|
8
10
|
id: str = {{out.outdir | basename | quote}} # pyright: ignore
|
|
9
11
|
|
|
10
12
|
cellranger: str = {{envs.cellranger | quote}} # pyright: ignore
|
|
11
13
|
tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
|
|
12
14
|
ref: str = {{envs.ref | quote}} # pyright: ignore
|
|
13
15
|
ncores: int = {{envs.ncores | int}} # pyright: ignore
|
|
16
|
+
outdir_is_mounted: bool = {{envs.outdir_is_mounted | repr}} # pyright: ignore
|
|
17
|
+
copy_outs_only: bool = {{envs.copy_outs_only | repr}} # pyright: ignore
|
|
14
18
|
|
|
15
19
|
# create a temporary unique directory to store the soft-linked fastq files
|
|
16
|
-
|
|
20
|
+
uid = hashlib.md5(str(fastqs).encode()).hexdigest()[:8]
|
|
21
|
+
fastqdir = tmpdir / f"cellranger_count_{uid}"
|
|
17
22
|
fastqdir.mkdir(parents=True, exist_ok=True)
|
|
18
23
|
if len(fastqs) == 1 and fastqs[0].is_dir():
|
|
19
24
|
fastqs = list(fastqs[0].glob("*.fastq.gz"))
|
|
@@ -23,7 +28,7 @@ for fastq in fastqs:
|
|
|
23
28
|
fastq = Path(fastq)
|
|
24
29
|
(fastqdir / fastq.name).symlink_to(fastq)
|
|
25
30
|
|
|
26
|
-
other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores']}} # pyright: ignore
|
|
31
|
+
other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores', 'outdir_is_mounted', 'copy_outs_only']}} # pyright: ignore
|
|
27
32
|
|
|
28
33
|
command = [
|
|
29
34
|
cellranger,
|
|
@@ -40,12 +45,26 @@ command = [
|
|
|
40
45
|
*other_args,
|
|
41
46
|
]
|
|
42
47
|
|
|
43
|
-
run_command(
|
|
48
|
+
version: str = run_command([cellranger, "--version"], stdout = "RETURN") # type: ignore
|
|
49
|
+
version = version.replace("cellranger", "").replace("-", "").strip() # type: ignore
|
|
50
|
+
print(f"# Detected cellranger version: {version}")
|
|
44
51
|
|
|
45
|
-
|
|
52
|
+
if outdir_is_mounted:
|
|
53
|
+
print("# Using mounted outdir, redirecting cellranger output to a local tmpdir")
|
|
54
|
+
local_outdir = tmpdir / f"{outdir.name}-{uid}" / id
|
|
55
|
+
if local_outdir.parent.exists():
|
|
56
|
+
shutil.rmtree(local_outdir.parent)
|
|
57
|
+
local_outdir.parent.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
odir = local_outdir
|
|
59
|
+
else:
|
|
60
|
+
odir = outdir
|
|
61
|
+
|
|
62
|
+
run_command(command, fg=True, cwd=str(odir.parent))
|
|
63
|
+
|
|
64
|
+
web_summary_html = odir / "outs" / "web_summary.html"
|
|
46
65
|
if not web_summary_html.exists():
|
|
47
66
|
raise RuntimeError(
|
|
48
|
-
f"web_summary.html does not exist in {
|
|
67
|
+
f"web_summary.html does not exist in {odir}/outs. "
|
|
49
68
|
"cellranger vdj failed."
|
|
50
69
|
)
|
|
51
70
|
|
|
@@ -53,7 +72,7 @@ if not web_summary_html.exists():
|
|
|
53
72
|
# to void vscode live server breaking the page by injecting some code
|
|
54
73
|
print("# Modify web_summary.html to move javascript to a separate file")
|
|
55
74
|
try:
|
|
56
|
-
web_summary_js =
|
|
75
|
+
web_summary_js = odir / "outs" / "web_summary.js"
|
|
57
76
|
web_summary_content = web_summary_html.read_text()
|
|
58
77
|
regex = re.compile(r"<script>(.+)</script>", re.DOTALL)
|
|
59
78
|
web_summary_html.write_text(regex.sub(
|
|
@@ -64,3 +83,30 @@ try:
|
|
|
64
83
|
except Exception as e:
|
|
65
84
|
print(f"Error modifying web_summary.html: {e}")
|
|
66
85
|
raise e
|
|
86
|
+
|
|
87
|
+
# If using local tmpdir for output, move results to the final outdir
|
|
88
|
+
if outdir_is_mounted:
|
|
89
|
+
print("# Copy results back to outdir")
|
|
90
|
+
if outdir.exists():
|
|
91
|
+
shutil.rmtree(outdir)
|
|
92
|
+
|
|
93
|
+
if copy_outs_only:
|
|
94
|
+
outdir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
with suppress(Exception):
|
|
96
|
+
# Some files may be failed to copy due to permission issues
|
|
97
|
+
# But the contents are actually copied
|
|
98
|
+
shutil.copytree(odir / "outs", outdir / "outs")
|
|
99
|
+
else:
|
|
100
|
+
with suppress(Exception):
|
|
101
|
+
shutil.copytree(local_outdir, outdir) # type: ignore
|
|
102
|
+
|
|
103
|
+
# Make sure essential files exist
|
|
104
|
+
web_summary_html = outdir / "outs" / "web_summary.html"
|
|
105
|
+
web_summary_js = outdir / "outs" / "web_summary.js"
|
|
106
|
+
filtered_annotations_csv = outdir / "outs" / "filtered_contig_annotations.csv"
|
|
107
|
+
for f in [web_summary_html, web_summary_js, filtered_annotations_csv]:
|
|
108
|
+
if not f.exists():
|
|
109
|
+
raise RuntimeError(
|
|
110
|
+
f"{f} does not exist in {outdir}/outs. "
|
|
111
|
+
"Copying results back from tmpdir failed."
|
|
112
|
+
)
|
|
@@ -14,6 +14,7 @@ bcftools <- {{envs.bcftools | r}}
|
|
|
14
14
|
genome <- {{envs.genome | r}}
|
|
15
15
|
motif_col <- {{envs.motif_col | r}}
|
|
16
16
|
regulator_col <- {{envs.regulator_col | r}}
|
|
17
|
+
var_col <- {{envs.var_col | r}}
|
|
17
18
|
notfound <- {{envs.notfound | r}}
|
|
18
19
|
motifdb <- {{envs.motifdb | r}}
|
|
19
20
|
regmotifs <- {{envs.regmotifs | r}}
|
|
@@ -21,6 +22,7 @@ devpars <- {{envs.devpars | r}}
|
|
|
21
22
|
plot_nvars <- {{envs.plot_nvars | r}}
|
|
22
23
|
plots <- {{envs.plots | r}}
|
|
23
24
|
cutoff <- {{envs.cutoff | r}}
|
|
25
|
+
set.seed(8525)
|
|
24
26
|
|
|
25
27
|
if (is.null(motifdb) || !file.exists(motifdb)) {
|
|
26
28
|
stop("Motif database (envs.motifdb) is required and must exist")
|
|
@@ -47,10 +49,21 @@ log <- get_logger()
|
|
|
47
49
|
log$info("Reading input regulator/motif file ...")
|
|
48
50
|
in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
49
51
|
|
|
52
|
+
|
|
50
53
|
log$info("Ensuring motifs and regulators in the input data ...")
|
|
51
|
-
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
54
|
+
in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
|
|
52
55
|
genome_pkg <- get_genome_pkg(genome)
|
|
53
56
|
|
|
57
|
+
motif_var_pairs <- NULL
|
|
58
|
+
if (!is.null(var_col)) {
|
|
59
|
+
log$info("Obtaining motif-variant pairs to test ...")
|
|
60
|
+
if (!var_col %in% colnames(in_motifs)) {
|
|
61
|
+
stop("Variant column (envs.var_col) not found in the input motif file")
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
|
|
65
|
+
}
|
|
66
|
+
|
|
54
67
|
log$info("Reading variant file ...")
|
|
55
68
|
if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
|
|
56
69
|
log$info("Converting VCF file to BED file ...")
|
|
@@ -77,10 +90,13 @@ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfou
|
|
|
77
90
|
tool <- tolower(tool)
|
|
78
91
|
tool <- match.arg(tool, c("motifbreakr", "atsnp"))
|
|
79
92
|
|
|
80
|
-
if
|
|
93
|
+
{% if envs.tool == "motifbreakr" %}
|
|
81
94
|
motifbreakr_args <- {{envs.motifbreakr_args | r}}
|
|
82
95
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
|
|
83
|
-
|
|
84
|
-
atsnp_args <-
|
|
96
|
+
{% else %}
|
|
97
|
+
atsnp_args <- list_update(
|
|
98
|
+
list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
|
|
99
|
+
{{envs.atsnp_args | r}}
|
|
100
|
+
)
|
|
85
101
|
{% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
|
|
86
|
-
}
|
|
102
|
+
{% endif %}
|
|
@@ -46,6 +46,13 @@ atsnp_result <- ComputePValues(
|
|
|
46
46
|
testing.mc = TRUE
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
+
if (!is.null(motif_var_pairs)) {
|
|
50
|
+
log$info("Filtering motif-variant pairs ...")
|
|
51
|
+
atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
|
|
52
|
+
atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
53
|
+
atsnp_result$motifs_vars <- NULL
|
|
54
|
+
}
|
|
55
|
+
|
|
49
56
|
padj_col <- paste0(atsnp_args$p, "_adj")
|
|
50
57
|
atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
|
|
51
58
|
cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
|
|
@@ -87,7 +94,8 @@ write.table(
|
|
|
87
94
|
|
|
88
95
|
log$info("Plotting variants ...")
|
|
89
96
|
# Convert result to GRanges object
|
|
90
|
-
atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
|
|
97
|
+
atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
|
|
98
|
+
atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
|
|
91
99
|
atsnp_result$effect <- "strong"
|
|
92
100
|
atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
|
|
93
101
|
atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
|
|
@@ -96,7 +104,6 @@ attributes(atsnp_result)$genome.package <- genome_pkg
|
|
|
96
104
|
attributes(atsnp_result)$motifs <- mdb
|
|
97
105
|
|
|
98
106
|
if (is.null(plots) || length(plots) == 0) {
|
|
99
|
-
atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
|
|
100
107
|
atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
|
|
101
108
|
variants <- unique(atsnp_result$SNP_id)
|
|
102
109
|
} else {
|
|
@@ -50,6 +50,7 @@ results <- motifbreakR(
|
|
|
50
50
|
|
|
51
51
|
log$info("Calculating p values ...")
|
|
52
52
|
results <- calculatePvalue(results)
|
|
53
|
+
results$.id <- 1:length(results)
|
|
53
54
|
results_to_save <- as.data.frame(unname(results))
|
|
54
55
|
results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
|
|
55
56
|
results_to_save$altPos <- lapply(results_to_save$altPos, function(x) paste(x, collapse = ","))
|
|
@@ -60,20 +61,28 @@ if (!is.null(regulator_col)) {
|
|
|
60
61
|
drop = TRUE
|
|
61
62
|
]
|
|
62
63
|
}
|
|
63
|
-
results_to_save <- apply(results_to_save, 2, as.character)
|
|
64
|
+
results_to_save <- as.data.frame(apply(results_to_save, 2, as.character))
|
|
65
|
+
|
|
66
|
+
if (!is.null(motif_var_pairs)) {
|
|
67
|
+
log$info("Filtering motif-variant pairs ...")
|
|
68
|
+
results_to_save$motifs_vars <- paste0(results_to_save$providerId, " // ", results_to_save$SNP_id)
|
|
69
|
+
results_to_save <- results_to_save[results_to_save$motifs_vars %in% motif_var_pairs, , drop = FALSE]
|
|
70
|
+
results_to_save$motifs_vars <- NULL
|
|
71
|
+
}
|
|
64
72
|
|
|
65
73
|
write.table(
|
|
66
74
|
results_to_save,
|
|
67
75
|
file = file.path(outdir, "motifbreakr.txt"),
|
|
68
76
|
sep = "\t", quote = FALSE, row.names = FALSE
|
|
69
77
|
)
|
|
70
|
-
rm(results_to_save)
|
|
78
|
+
# rm(results_to_save)
|
|
71
79
|
|
|
72
80
|
log$info("Plotting variants ...")
|
|
73
81
|
if (is.null(plots) || length(plots) == 0) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
82
|
+
results_to_save$alleleDiff <- as.numeric(results_to_save$alleleDiff)
|
|
83
|
+
results_to_save <- results_to_save[order(-abs(results_to_save$alleleDiff)), , drop = FALSE]
|
|
84
|
+
results_to_save <- results_to_save[1:min(plot_nvars, nrow(results_to_save)), , drop = FALSE]
|
|
85
|
+
variants <- unique(results_to_save$SNP_id)
|
|
77
86
|
} else {
|
|
78
87
|
variants <- names(plots)
|
|
79
88
|
}
|
|
@@ -88,7 +97,7 @@ for (variant in variants) {
|
|
|
88
97
|
if (is.null(plots[[variant]]$devpars)) {
|
|
89
98
|
plots[[variant]]$devpars <- devpars
|
|
90
99
|
}
|
|
91
|
-
res <- results[results$SNP_id == variant, , drop = FALSE]
|
|
100
|
+
res <- results[results$SNP_id == variant & results$.id %in% results_to_save$.id, , drop = FALSE]
|
|
92
101
|
res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
|
|
93
102
|
|
|
94
103
|
plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
|
|
@@ -33,7 +33,7 @@ log$info("Reading input data ...")
|
|
|
33
33
|
indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
|
|
34
34
|
|
|
35
35
|
log$info("Ensuring regulators in the input data ...")
|
|
36
|
-
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
|
|
36
|
+
indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, "SNP_id", regmotifs, notfound = notfound)
|
|
37
37
|
genome_pkg <- get_genome_pkg(genome)
|
|
38
38
|
|
|
39
39
|
log$info("Reading motif database ...")
|
|
@@ -138,12 +138,13 @@ motifdb_to_motiflib <- function(motifdb) {
|
|
|
138
138
|
#' @param outdir Output directory, used to save un-matched regulators
|
|
139
139
|
#' @param motif_col Column name for the motif
|
|
140
140
|
#' @param regulator_col Column name for the regulator
|
|
141
|
+
#' @param var_col Column name for the variant
|
|
141
142
|
#' @param regmotifs Regulator-motif mapping file
|
|
142
143
|
#' @param log_indent Indentation for log messages
|
|
143
144
|
#' @param notfound Action to take if regulators are not found in the mapping file
|
|
144
145
|
#' @return Data frame with regulators and motifs
|
|
145
146
|
#' @export
|
|
146
|
-
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
|
+
ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, var_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
|
|
147
148
|
if (is.null(motif_col)) {
|
|
148
149
|
if (is.null(regmotifs)) {
|
|
149
150
|
stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
|
|
@@ -198,7 +199,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
|
|
|
198
199
|
regulator_col <<- rm_reg_col
|
|
199
200
|
}
|
|
200
201
|
} else {
|
|
201
|
-
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
|
|
202
|
+
indata <- indata[!duplicated(indata[, c(regulator_col, motif_col, var_col), drop = FALSE]), , drop = FALSE]
|
|
202
203
|
}
|
|
203
204
|
|
|
204
205
|
return(indata)
|
|
@@ -8,10 +8,11 @@ outfile <- {{out.outfile | r}}
|
|
|
8
8
|
dotplot_check <- {{envs.dotplot_check | r}}
|
|
9
9
|
outdir <- dirname(outfile)
|
|
10
10
|
assay <- {{envs.assay | r}}
|
|
11
|
+
ident <- {{envs.ident | r}}
|
|
11
12
|
|
|
12
13
|
log <- get_logger()
|
|
13
14
|
|
|
14
|
-
ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, log = log)
|
|
15
|
+
ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, ident = ident, log = log)
|
|
15
16
|
|
|
16
17
|
if (!isFALSE(dotplot_check)) {
|
|
17
18
|
log$info("Reading Seurat object ...")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
from biopipen.utils.misc import run_command, logger
|
|
3
|
+
from biopipen.scripts.scrna.seurat_anndata_conversion import convert_seurat_to_anndata
|
|
3
4
|
import os
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
@@ -7,6 +8,10 @@ import scanpy
|
|
|
7
8
|
import liana
|
|
8
9
|
import liana.method.sc._liana_pipe as _liana_pipe
|
|
9
10
|
|
|
11
|
+
# AttributeError: module 'numpy' has no attribute 'product'
|
|
12
|
+
if not hasattr(np, "product"):
|
|
13
|
+
np.product = np.prod
|
|
14
|
+
|
|
10
15
|
# monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
|
|
11
16
|
# https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
|
|
12
17
|
def _trimean(a, axis=0):
|
|
@@ -35,27 +40,24 @@ ncores = envs.pop("ncores")
|
|
|
35
40
|
species = envs.pop("species")
|
|
36
41
|
rscript = envs.pop("rscript")
|
|
37
42
|
subset = envs.pop("subset")
|
|
43
|
+
group_by = envs.pop("group_by", None)
|
|
44
|
+
groupby = envs.pop("groupby", None) or group_by
|
|
38
45
|
subset_using = envs.pop("subset_using", "auto")
|
|
39
46
|
if subset_using == "auto":
|
|
40
47
|
subset_using = "python" if subset and "[" in subset else "r"
|
|
41
48
|
split_by = envs.pop("split_by")
|
|
42
49
|
|
|
43
50
|
if sobjfile.suffix.lower() in (".rds", ".qs", "qs2"):
|
|
44
|
-
logger.info("Converting the Seurat object to h5ad ...")
|
|
45
|
-
|
|
46
51
|
annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
|
|
57
|
-
)
|
|
58
|
-
run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
|
|
52
|
+
seurat_ident_col = convert_seurat_to_anndata(
|
|
53
|
+
input_file=str(sobjfile),
|
|
54
|
+
output_file=str(annfile),
|
|
55
|
+
assay=assay,
|
|
56
|
+
subset=subset if subset_using == "r" else None,
|
|
57
|
+
rscript=rscript,
|
|
58
|
+
return_ident_col=not groupby,
|
|
59
|
+
)
|
|
60
|
+
groupby = groupby or seurat_ident_col
|
|
59
61
|
sobjfile = annfile
|
|
60
62
|
elif subset and subset == "r":
|
|
61
63
|
raise ValueError(
|
|
@@ -63,6 +65,16 @@ elif subset and subset == "r":
|
|
|
63
65
|
"'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
|
|
64
66
|
)
|
|
65
67
|
|
|
68
|
+
if not groupby:
|
|
69
|
+
logger.warning(
|
|
70
|
+
"`groupby` is not provided. "
|
|
71
|
+
"Using 'seurat_clusters' as the default groupby column. "
|
|
72
|
+
"It is recommended to provide the `groupby` parameter."
|
|
73
|
+
)
|
|
74
|
+
groupby = "seurat_clusters"
|
|
75
|
+
|
|
76
|
+
envs["groupby"] = groupby
|
|
77
|
+
|
|
66
78
|
logger.info("Reading the h5ad file ...")
|
|
67
79
|
adata = scanpy.read_h5ad(sobjfile)
|
|
68
80
|
|
|
@@ -27,7 +27,7 @@ defaults <- list(
|
|
|
27
27
|
devpars = list(res = 100)
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
cases <- expand_cases(cases, defaults)
|
|
30
|
+
cases <- expand_cases(cases, defaults, default_case = "Cell-Cell Communication")
|
|
31
31
|
log <- get_logger()
|
|
32
32
|
reporter <- get_reporter()
|
|
33
33
|
|
|
@@ -35,12 +35,31 @@ do_case <- function(name) {
|
|
|
35
35
|
log$info("- Case: {name}")
|
|
36
36
|
case <- cases[[name]]
|
|
37
37
|
info <- case_info(name, outdir, is_dir = FALSE)
|
|
38
|
-
case <- extract_vars(case, "subset", "devpars", "more_formats", "descr")
|
|
38
|
+
case <- extract_vars(case, subset_ = "subset", "devpars", "more_formats", "descr")
|
|
39
39
|
|
|
40
40
|
case$data <- ccc
|
|
41
|
-
if (!is.null(
|
|
42
|
-
case$data <- ccc %>% dplyr::filter(!!parse_expr(
|
|
41
|
+
if (!is.null(subset_)) {
|
|
42
|
+
case$data <- ccc %>% dplyr::filter(!!parse_expr(subset_))
|
|
43
43
|
}
|
|
44
|
+
|
|
45
|
+
if (identical(case$plot_type, "table")) {
|
|
46
|
+
write.table(
|
|
47
|
+
case$data,
|
|
48
|
+
file = paste0(info$prefix, ".txt"),
|
|
49
|
+
sep = "\t",
|
|
50
|
+
row.names = FALSE,
|
|
51
|
+
col.names = TRUE,
|
|
52
|
+
quote = FALSE
|
|
53
|
+
)
|
|
54
|
+
report <- list(
|
|
55
|
+
kind = "table",
|
|
56
|
+
data = list(nrows = 100),
|
|
57
|
+
src = paste0(info$prefix, ".txt")
|
|
58
|
+
)
|
|
59
|
+
reporter$add2(report, hs = c(info$section, info$name))
|
|
60
|
+
return()
|
|
61
|
+
}
|
|
62
|
+
|
|
44
63
|
if (is.null(case$magnitude)) {
|
|
45
64
|
case$magnitude <- NULL
|
|
46
65
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from contextlib import suppress
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from biopipen.core.filters import dict_to_cli_args
|
|
6
|
+
from biopipen.utils.misc import run_command
|
|
7
|
+
|
|
8
|
+
crdir = Path({{in.crdir | quote}}) # noqa: E999 # pyright: ignore
|
|
9
|
+
outdir = {{out.outdir | quote}} # pyright: ignore
|
|
10
|
+
envs: dict = {{envs | repr}} # pyright: ignore
|
|
11
|
+
cellsnp_lite = envs.pop("cellsnp_lite")
|
|
12
|
+
ncores = envs.pop("ncores")
|
|
13
|
+
|
|
14
|
+
with suppress(RuntimeError):
|
|
15
|
+
run_command([cellsnp_lite, "--version"], fg=True)
|
|
16
|
+
print("")
|
|
17
|
+
|
|
18
|
+
if crdir.name != "outs":
|
|
19
|
+
crdir = crdir / "outs"
|
|
20
|
+
|
|
21
|
+
bamfile = str(crdir / "possorted_genome_bam.bam")
|
|
22
|
+
barcodefile = str(crdir / "filtered_feature_bc_matrix" / "barcodes.tsv.gz")
|
|
23
|
+
|
|
24
|
+
envs["nproc"] = ncores
|
|
25
|
+
envs["samFile"] = bamfile
|
|
26
|
+
envs["barcodeFile"] = barcodefile
|
|
27
|
+
envs["outDir"] = outdir
|
|
28
|
+
|
|
29
|
+
cmd = [cellsnp_lite, *dict_to_cli_args(envs)]
|
|
30
|
+
run_command(cmd, fg=True, bufsize=1)
|
|
@@ -7,6 +7,7 @@ library(biopipen.utils)
|
|
|
7
7
|
sobjfile <- {{in.sobjfile | r}}
|
|
8
8
|
outfile <- {{out.outfile | r}}
|
|
9
9
|
newcol <- {{envs.newcol | r}}
|
|
10
|
+
cluster_ident <- {{envs.ident | r }}
|
|
10
11
|
merge_same_labels <- {{envs.merge | r}}
|
|
11
12
|
celltypist_args <- {{envs.celltypist_args | r}}
|
|
12
13
|
outtype <- {{envs.outtype | r }}
|
|
@@ -17,6 +18,10 @@ if (identical(outtype, "input")) {
|
|
|
17
18
|
outdir <- dirname(outfile)
|
|
18
19
|
outprefix <- file.path(outdir, tools::file_path_sans_ext(basename(outfile)))
|
|
19
20
|
|
|
21
|
+
over_clustering <- celltypist_args$over_clustering %||% cluster_ident
|
|
22
|
+
|
|
23
|
+
require_package("celltypist", version = ">=1.7.1", python = celltypist_args$python)
|
|
24
|
+
|
|
20
25
|
log <- get_logger()
|
|
21
26
|
|
|
22
27
|
if (is.null(celltypist_args$model)) {
|
|
@@ -30,23 +35,14 @@ suppressWarnings(file.remove(modelfile))
|
|
|
30
35
|
file.symlink(normalizePath(celltypist_args$model), modelfile)
|
|
31
36
|
|
|
32
37
|
sobj <- NULL
|
|
38
|
+
ident <- NULL
|
|
33
39
|
if (!endsWith(sobjfile, ".h5ad")) {
|
|
34
40
|
sobj <- read_obj(sobjfile)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
celltypist_args$over_clustering <- col
|
|
41
|
-
break
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
if (is.null(celltypist_args$over_clustering)) {
|
|
46
|
-
celltypist_args$over_clustering <- FALSE
|
|
47
|
-
}
|
|
48
|
-
if (!isFALSE(celltypist_args$over_clustering)) {
|
|
49
|
-
destfile <- paste0(outprefix, ".", celltypist_args$over_clustering, ".h5ad")
|
|
41
|
+
ident <- GetIdentityColumn(sobj)
|
|
42
|
+
over_clustering <- over_clustering %||% ident
|
|
43
|
+
|
|
44
|
+
if (!isFALSE(over_clustering)) {
|
|
45
|
+
destfile <- paste0(outprefix, ".", over_clustering, ".h5ad")
|
|
50
46
|
} else {
|
|
51
47
|
destfile <- paste0(outprefix, ".h5ad")
|
|
52
48
|
}
|
|
@@ -61,7 +57,7 @@ if (!endsWith(sobjfile, ".h5ad")) {
|
|
|
61
57
|
ConvertSeuratToAnnData(
|
|
62
58
|
sobj,
|
|
63
59
|
outfile = destfile,
|
|
64
|
-
assay = celltypist_args$assay
|
|
60
|
+
assay = celltypist_args$assay,
|
|
65
61
|
log = log
|
|
66
62
|
)
|
|
67
63
|
}
|
|
@@ -103,15 +99,15 @@ if (file.exists(celltypist_outfile) &&
|
|
|
103
99
|
"-m", celltypist_args$model,
|
|
104
100
|
"-o", celltypist_outfile
|
|
105
101
|
)
|
|
106
|
-
if (!isFALSE(
|
|
107
|
-
|
|
108
|
-
command <- paste(command, "-c", celltypist_args$over_clustering)
|
|
102
|
+
if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
|
|
103
|
+
command <- paste(command, "-c", over_clustering)
|
|
109
104
|
}
|
|
110
105
|
if (isTRUE(celltypist_args$majority_voting)) {
|
|
111
106
|
command <- paste(command, "-v")
|
|
112
107
|
}
|
|
113
108
|
log$info("Running celltypist:")
|
|
114
|
-
print("- {command}")
|
|
109
|
+
# print("- {command}")
|
|
110
|
+
log$debug(" {command}")
|
|
115
111
|
rc <- system(command)
|
|
116
112
|
if (rc != 0) {
|
|
117
113
|
stop("Failed to run celltypist. Check the job.stderr file to see the error message.")
|
|
@@ -129,6 +125,7 @@ if (outtype == "h5ad") {
|
|
|
129
125
|
infile = celltypist_outfile,
|
|
130
126
|
outfile = NULL,
|
|
131
127
|
assay = celltypist_args$assay %||% "RNA",
|
|
128
|
+
ident = ident,
|
|
132
129
|
log = log
|
|
133
130
|
)
|
|
134
131
|
} else {
|
|
@@ -152,31 +149,20 @@ if (outtype == "h5ad") {
|
|
|
152
149
|
|
|
153
150
|
if (!is.null(newcol)) {
|
|
154
151
|
sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
|
|
155
|
-
} else {
|
|
156
|
-
over_clustering
|
|
157
|
-
|
|
158
|
-
sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
|
|
159
|
-
} else {
|
|
160
|
-
over_clustering <- "over_clustering"
|
|
161
|
-
}
|
|
152
|
+
} else if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
|
|
153
|
+
# save the original over_clustering column as seurat_clusters_id
|
|
154
|
+
sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
|
|
162
155
|
|
|
163
156
|
# make a map of original cluster id to new cluster id
|
|
164
157
|
cluster_map <- data.frame(
|
|
165
|
-
seurat_clusters_id = sobj@meta.data
|
|
158
|
+
seurat_clusters_id = sobj@meta.data$seurat_clusters_id,
|
|
166
159
|
seurat_clusters = sobj@meta.data[[prediction]]
|
|
167
160
|
) %>%
|
|
168
161
|
group_by(seurat_clusters_id) %>%
|
|
169
162
|
summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
|
|
170
163
|
mutate(seurat_clusters = make.unique(seurat_clusters))
|
|
171
164
|
cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
|
|
172
|
-
|
|
173
|
-
sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
|
|
174
|
-
}
|
|
175
|
-
Idents(sobj) <- "seurat_clusters"
|
|
176
|
-
cluster_map$object <- sobj
|
|
177
|
-
log$info("Renaming clusters ...")
|
|
178
|
-
sobj <- do_call(RenameIdents, cluster_map)
|
|
179
|
-
sobj@meta.data$seurat_clusters <- Idents(sobj)
|
|
165
|
+
sobj <- rename_idents(sobj, over_clustering, cluster_map)
|
|
180
166
|
}
|
|
181
167
|
} else if (!is.null(newcol)) {
|
|
182
168
|
sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
|
|
@@ -187,6 +173,11 @@ if (outtype == "h5ad") {
|
|
|
187
173
|
sobj <- merge_clusters_with_same_labels(sobj, newcol)
|
|
188
174
|
}
|
|
189
175
|
|
|
176
|
+
if (!is.null(ident)) {
|
|
177
|
+
# restore the original identity
|
|
178
|
+
Idents(sobj) <- ident
|
|
179
|
+
}
|
|
180
|
+
|
|
190
181
|
log$info("Saving the object ...")
|
|
191
182
|
save_obj(sobj, outfile)
|
|
192
183
|
} else {
|