biopipen 0.34.6__py3-none-any.whl → 0.34.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +4 -0
  3. biopipen/core/filters.py +1 -1
  4. biopipen/core/testing.py +2 -1
  5. biopipen/ns/cellranger.py +33 -3
  6. biopipen/ns/regulatory.py +4 -0
  7. biopipen/ns/scrna.py +548 -98
  8. biopipen/ns/scrna_metabolic_landscape.py +4 -0
  9. biopipen/ns/tcr.py +256 -16
  10. biopipen/ns/web.py +5 -0
  11. biopipen/reports/scrna_metabolic_landscape/MetabolicFeatures.svelte +9 -9
  12. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayActivity.svelte +9 -8
  13. biopipen/reports/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.svelte +9 -9
  14. biopipen/reports/tcr/ClonalStats.svelte +1 -0
  15. biopipen/scripts/cellranger/CellRangerCount.py +55 -11
  16. biopipen/scripts/cellranger/CellRangerVdj.py +54 -8
  17. biopipen/scripts/regulatory/MotifAffinityTest.R +21 -5
  18. biopipen/scripts/regulatory/MotifAffinityTest_AtSNP.R +9 -2
  19. biopipen/scripts/regulatory/MotifAffinityTest_MotifBreakR.R +15 -6
  20. biopipen/scripts/regulatory/VariantMotifPlot.R +1 -1
  21. biopipen/scripts/regulatory/motifs-common.R +3 -2
  22. biopipen/scripts/scrna/AnnData2Seurat.R +2 -1
  23. biopipen/scripts/scrna/CellCellCommunication.py +26 -14
  24. biopipen/scripts/scrna/CellCellCommunicationPlots.R +23 -4
  25. biopipen/scripts/scrna/CellSNPLite.py +30 -0
  26. biopipen/scripts/scrna/CellTypeAnnotation-celltypist.R +27 -36
  27. biopipen/scripts/scrna/CellTypeAnnotation-direct.R +42 -26
  28. biopipen/scripts/scrna/CellTypeAnnotation-hitype.R +11 -13
  29. biopipen/scripts/scrna/CellTypeAnnotation-sccatch.R +5 -8
  30. biopipen/scripts/scrna/CellTypeAnnotation-sctype.R +5 -8
  31. biopipen/scripts/scrna/CellTypeAnnotation.R +26 -3
  32. biopipen/scripts/scrna/MQuad.py +25 -0
  33. biopipen/scripts/scrna/MarkersFinder.R +128 -30
  34. biopipen/scripts/scrna/ModuleScoreCalculator.R +9 -1
  35. biopipen/scripts/scrna/PseudoBulkDEG.R +113 -27
  36. biopipen/scripts/scrna/ScFGSEA.R +23 -26
  37. biopipen/scripts/scrna/ScVelo.py +20 -8
  38. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +1 -1
  39. biopipen/scripts/scrna/SeuratClusterStats-features.R +6 -1
  40. biopipen/scripts/scrna/SeuratClustering.R +5 -1
  41. biopipen/scripts/scrna/SeuratMap2Ref.R +1 -2
  42. biopipen/scripts/scrna/SeuratPreparing.R +19 -11
  43. biopipen/scripts/scrna/SeuratSubClustering.R +1 -1
  44. biopipen/scripts/scrna/Slingshot.R +2 -4
  45. biopipen/scripts/scrna/TopExpressingGenes.R +1 -4
  46. biopipen/scripts/scrna/celltypist-wrapper.py +140 -4
  47. biopipen/scripts/scrna/scvelo_paga.py +313 -0
  48. biopipen/scripts/scrna/seurat_anndata_conversion.py +18 -1
  49. biopipen/scripts/tcr/{TCRClustering.R → CDR3Clustering.R} +63 -23
  50. biopipen/scripts/tcr/ClonalStats.R +76 -35
  51. biopipen/utils/misc.py +104 -9
  52. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/METADATA +5 -2
  53. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/RECORD +55 -53
  54. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/WHEEL +1 -1
  55. biopipen/utils/common_docstrs.py +0 -103
  56. {biopipen-0.34.6.dist-info → biopipen-0.34.26.dist-info}/entry_points.txt +0 -0
@@ -1,19 +1,24 @@
1
- import uuid
1
+ import hashlib
2
+ import shutil
2
3
  import re
4
+ from contextlib import suppress
3
5
  from pathlib import Path, PosixPath # noqa: F401
4
6
  from biopipen.utils.misc import run_command
5
7
 
6
8
  fastqs: list[Path] = {{in.fastqs | each: as_path}} # pyright: ignore # noqa
7
- outdir: str = {{out.outdir | quote}} # pyright: ignore
9
+ outdir: Path = Path({{out.outdir | quote}}) # pyright: ignore
8
10
  id: str = {{out.outdir | basename | quote}} # pyright: ignore
9
11
 
10
12
  cellranger: str = {{envs.cellranger | quote}} # pyright: ignore
11
13
  tmpdir = Path({{envs.tmpdir | quote}}) # pyright: ignore
12
14
  ref: str = {{envs.ref | quote}} # pyright: ignore
13
15
  ncores: int = {{envs.ncores | int}} # pyright: ignore
16
+ outdir_is_mounted: bool = {{envs.outdir_is_mounted | repr}} # pyright: ignore
17
+ copy_outs_only: bool = {{envs.copy_outs_only | repr}} # pyright: ignore
14
18
 
15
19
  # create a temporary unique directory to store the soft-linked fastq files
16
- fastqdir = tmpdir / f"cellranger_count_{uuid.uuid4()}"
20
+ uid = hashlib.md5(str(fastqs).encode()).hexdigest()[:8]
21
+ fastqdir = tmpdir / f"cellranger_count_{uid}"
17
22
  fastqdir.mkdir(parents=True, exist_ok=True)
18
23
  if len(fastqs) == 1 and fastqs[0].is_dir():
19
24
  fastqs = list(fastqs[0].glob("*.fastq.gz"))
@@ -23,7 +28,7 @@ for fastq in fastqs:
23
28
  fastq = Path(fastq)
24
29
  (fastqdir / fastq.name).symlink_to(fastq)
25
30
 
26
- other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores']}} # pyright: ignore
31
+ other_args = {{envs | dict_to_cli_args: dashify=True, exclude=['cellranger', 'reference', 'ref', 'tmpdir', 'id', 'ncores', 'outdir_is_mounted', 'copy_outs_only']}} # pyright: ignore
27
32
 
28
33
  command = [
29
34
  cellranger,
@@ -40,12 +45,26 @@ command = [
40
45
  *other_args,
41
46
  ]
42
47
 
43
- run_command(command, fg=True, cwd=str(Path(outdir).parent))
48
+ version: str = run_command([cellranger, "--version"], stdout = "RETURN") # type: ignore
49
+ version = version.replace("cellranger", "").replace("-", "").strip() # type: ignore
50
+ print(f"# Detected cellranger version: {version}")
44
51
 
45
- web_summary_html = Path(outdir) / "outs" / "web_summary.html"
52
+ if outdir_is_mounted:
53
+ print("# Using mounted outdir, redirecting cellranger output to a local tmpdir")
54
+ local_outdir = tmpdir / f"{outdir.name}-{uid}" / id
55
+ if local_outdir.parent.exists():
56
+ shutil.rmtree(local_outdir.parent)
57
+ local_outdir.parent.mkdir(parents=True, exist_ok=True)
58
+ odir = local_outdir
59
+ else:
60
+ odir = outdir
61
+
62
+ run_command(command, fg=True, cwd=str(odir.parent))
63
+
64
+ web_summary_html = odir / "outs" / "web_summary.html"
46
65
  if not web_summary_html.exists():
47
66
  raise RuntimeError(
48
- f"web_summary.html does not exist in {outdir}/outs. "
67
+ f"web_summary.html does not exist in {odir}/outs. "
49
68
  "cellranger vdj failed."
50
69
  )
51
70
 
@@ -53,7 +72,7 @@ if not web_summary_html.exists():
53
72
  # to void vscode live server breaking the page by injecting some code
54
73
  print("# Modify web_summary.html to move javascript to a separate file")
55
74
  try:
56
- web_summary_js = Path(outdir) / "outs" / "web_summary.js"
75
+ web_summary_js = odir / "outs" / "web_summary.js"
57
76
  web_summary_content = web_summary_html.read_text()
58
77
  regex = re.compile(r"<script>(.+)</script>", re.DOTALL)
59
78
  web_summary_html.write_text(regex.sub(
@@ -64,3 +83,30 @@ try:
64
83
  except Exception as e:
65
84
  print(f"Error modifying web_summary.html: {e}")
66
85
  raise e
86
+
87
+ # If using local tmpdir for output, move results to the final outdir
88
+ if outdir_is_mounted:
89
+ print("# Copy results back to outdir")
90
+ if outdir.exists():
91
+ shutil.rmtree(outdir)
92
+
93
+ if copy_outs_only:
94
+ outdir.mkdir(parents=True, exist_ok=True)
95
+ with suppress(Exception):
96
+ # Some files may be failed to copy due to permission issues
97
+ # But the contents are actually copied
98
+ shutil.copytree(odir / "outs", outdir / "outs")
99
+ else:
100
+ with suppress(Exception):
101
+ shutil.copytree(local_outdir, outdir) # type: ignore
102
+
103
+ # Make sure essential files exist
104
+ web_summary_html = outdir / "outs" / "web_summary.html"
105
+ web_summary_js = outdir / "outs" / "web_summary.js"
106
+ filtered_annotations_csv = outdir / "outs" / "filtered_contig_annotations.csv"
107
+ for f in [web_summary_html, web_summary_js, filtered_annotations_csv]:
108
+ if not f.exists():
109
+ raise RuntimeError(
110
+ f"{f} does not exist in {outdir}/outs. "
111
+ "Copying results back from tmpdir failed."
112
+ )
@@ -14,6 +14,7 @@ bcftools <- {{envs.bcftools | r}}
14
14
  genome <- {{envs.genome | r}}
15
15
  motif_col <- {{envs.motif_col | r}}
16
16
  regulator_col <- {{envs.regulator_col | r}}
17
+ var_col <- {{envs.var_col | r}}
17
18
  notfound <- {{envs.notfound | r}}
18
19
  motifdb <- {{envs.motifdb | r}}
19
20
  regmotifs <- {{envs.regmotifs | r}}
@@ -21,6 +22,7 @@ devpars <- {{envs.devpars | r}}
21
22
  plot_nvars <- {{envs.plot_nvars | r}}
22
23
  plots <- {{envs.plots | r}}
23
24
  cutoff <- {{envs.cutoff | r}}
25
+ set.seed(8525)
24
26
 
25
27
  if (is.null(motifdb) || !file.exists(motifdb)) {
26
28
  stop("Motif database (envs.motifdb) is required and must exist")
@@ -47,10 +49,21 @@ log <- get_logger()
47
49
  log$info("Reading input regulator/motif file ...")
48
50
  in_motifs <- read.table(motiffile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
49
51
 
52
+
50
53
  log$info("Ensuring motifs and regulators in the input data ...")
51
- in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
54
+ in_motifs <- ensure_regulator_motifs(in_motifs, outdir, motif_col, regulator_col, var_col, regmotifs, notfound = notfound)
52
55
  genome_pkg <- get_genome_pkg(genome)
53
56
 
57
+ motif_var_pairs <- NULL
58
+ if (!is.null(var_col)) {
59
+ log$info("Obtaining motif-variant pairs to test ...")
60
+ if (!var_col %in% colnames(in_motifs)) {
61
+ stop("Variant column (envs.var_col) not found in the input motif file")
62
+ }
63
+
64
+ motif_var_pairs <- unique(paste0(in_motifs[[motif_col]], " // ", in_motifs[[var_col]]))
65
+ }
66
+
54
67
  log$info("Reading variant file ...")
55
68
  if (grepl("\\.vcf$", varfile) || grepl("\\.vcf\\.gz$", varfile)) {
56
69
  log$info("Converting VCF file to BED file ...")
@@ -77,10 +90,13 @@ mdb <- read_meme_to_motifdb(motifdb, in_motifs, motif_col, regulator_col, notfou
77
90
  tool <- tolower(tool)
78
91
  tool <- match.arg(tool, c("motifbreakr", "atsnp"))
79
92
 
80
- if (tool == "motifbreakr") {
93
+ {% if envs.tool == "motifbreakr" %}
81
94
  motifbreakr_args <- {{envs.motifbreakr_args | r}}
82
95
  {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_MotifBreakR.R" %}
83
- } else { # atsnp
84
- atsnp_args <- {{envs.atsnp_args | r}}
96
+ {% else %}
97
+ atsnp_args <- list_update(
98
+ list(padj_cutoff = TRUE, padj = "BH", p = "Pval_diff"),
99
+ {{envs.atsnp_args | r}}
100
+ )
85
101
  {% include biopipen_dir + "/scripts/regulatory/MotifAffinityTest_AtSNP.R" %}
86
- }
102
+ {% endif %}
@@ -46,6 +46,13 @@ atsnp_result <- ComputePValues(
46
46
  testing.mc = TRUE
47
47
  )
48
48
 
49
+ if (!is.null(motif_var_pairs)) {
50
+ log$info("Filtering motif-variant pairs ...")
51
+ atsnp_result$motifs_vars <- paste0(atsnp_result$motif, " // ", atsnp_result$snpid)
52
+ atsnp_result <- atsnp_result[atsnp_result$motifs_vars %in% motif_var_pairs, , drop = FALSE]
53
+ atsnp_result$motifs_vars <- NULL
54
+ }
55
+
49
56
  padj_col <- paste0(atsnp_args$p, "_adj")
50
57
  atsnp_result[[padj_col]] <- p.adjust(atsnp_result[[atsnp_args$p]], method = atsnp_args$padj)
51
58
  cutoff_col <- if (atsnp_args$padj_cutoff) padj_col else atsnp_args$p
@@ -87,7 +94,8 @@ write.table(
87
94
 
88
95
  log$info("Plotting variants ...")
89
96
  # Convert result to GRanges object
90
- atsnp_result$alleleDiff <- -atsnp_result[[cutoff_col]]
97
+ atsnp_result$alleleDiff <- -log10(atsnp_result[[cutoff_col]])
98
+ atsnp_result <- atsnp_result[order(-atsnp_result$alleleDiff), , drop = FALSE]
91
99
  atsnp_result$effect <- "strong"
92
100
  atsnp_result$motifPos <- lapply(atsnp_result$motifPos, function(x) as.integer(unlist(strsplit(x, ","))))
93
101
  atsnp_result <- makeGRangesFromDataFrame(atsnp_result, keep.extra.columns = TRUE, starts.in.df.are.0based = TRUE)
@@ -96,7 +104,6 @@ attributes(atsnp_result)$genome.package <- genome_pkg
96
104
  attributes(atsnp_result)$motifs <- mdb
97
105
 
98
106
  if (is.null(plots) || length(plots) == 0) {
99
- atsnp_result <- atsnp_result[order(-abs(atsnp_result$alleleDiff)), , drop = FALSE]
100
107
  atsnp_result <- atsnp_result[1:min(plot_nvars, length(atsnp_result)), , drop = FALSE]
101
108
  variants <- unique(atsnp_result$SNP_id)
102
109
  } else {
@@ -50,6 +50,7 @@ results <- motifbreakR(
50
50
 
51
51
  log$info("Calculating p values ...")
52
52
  results <- calculatePvalue(results)
53
+ results$.id <- 1:length(results)
53
54
  results_to_save <- as.data.frame(unname(results))
54
55
  results_to_save$motifPos <- lapply(results_to_save$motifPos, function(x) paste(x, collapse = ","))
55
56
  results_to_save$altPos <- lapply(results_to_save$altPos, function(x) paste(x, collapse = ","))
@@ -60,20 +61,28 @@ if (!is.null(regulator_col)) {
60
61
  drop = TRUE
61
62
  ]
62
63
  }
63
- results_to_save <- apply(results_to_save, 2, as.character)
64
+ results_to_save <- as.data.frame(apply(results_to_save, 2, as.character))
65
+
66
+ if (!is.null(motif_var_pairs)) {
67
+ log$info("Filtering motif-variant pairs ...")
68
+ results_to_save$motifs_vars <- paste0(results_to_save$providerId, " // ", results_to_save$SNP_id)
69
+ results_to_save <- results_to_save[results_to_save$motifs_vars %in% motif_var_pairs, , drop = FALSE]
70
+ results_to_save$motifs_vars <- NULL
71
+ }
64
72
 
65
73
  write.table(
66
74
  results_to_save,
67
75
  file = file.path(outdir, "motifbreakr.txt"),
68
76
  sep = "\t", quote = FALSE, row.names = FALSE
69
77
  )
70
- rm(results_to_save)
78
+ # rm(results_to_save)
71
79
 
72
80
  log$info("Plotting variants ...")
73
81
  if (is.null(plots) || length(plots) == 0) {
74
- results <- results[order(-abs(results$alleleDiff)), , drop = FALSE]
75
- results <- results[1:min(plot_nvars, length(results)), , drop = FALSE]
76
- variants <- unique(results$SNP_id)
82
+ results_to_save$alleleDiff <- as.numeric(results_to_save$alleleDiff)
83
+ results_to_save <- results_to_save[order(-abs(results_to_save$alleleDiff)), , drop = FALSE]
84
+ results_to_save <- results_to_save[1:min(plot_nvars, nrow(results_to_save)), , drop = FALSE]
85
+ variants <- unique(results_to_save$SNP_id)
77
86
  } else {
78
87
  variants <- names(plots)
79
88
  }
@@ -88,7 +97,7 @@ for (variant in variants) {
88
97
  if (is.null(plots[[variant]]$devpars)) {
89
98
  plots[[variant]]$devpars <- devpars
90
99
  }
91
- res <- results[results$SNP_id == variant, , drop = FALSE]
100
+ res <- results[results$SNP_id == variant & results$.id %in% results_to_save$.id, , drop = FALSE]
92
101
  res <- subset(res, subset = eval(parse(text = plots[[variant]]$which)))
93
102
 
94
103
  plot_variant_motifs(res, variant, plots[[variant]]$devpars, outdir)
@@ -33,7 +33,7 @@ log$info("Reading input data ...")
33
33
  indata <- read.table(infile, header=TRUE, sep="\t", stringsAsFactors=FALSE, check.names = FALSE)
34
34
 
35
35
  log$info("Ensuring regulators in the input data ...")
36
- indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, regmotifs, notfound = notfound)
36
+ indata <- ensure_regulator_motifs(indata, outdir, motif_col, regulator_col, "SNP_id", regmotifs, notfound = notfound)
37
37
  genome_pkg <- get_genome_pkg(genome)
38
38
 
39
39
  log$info("Reading motif database ...")
@@ -138,12 +138,13 @@ motifdb_to_motiflib <- function(motifdb) {
138
138
  #' @param outdir Output directory, used to save un-matched regulators
139
139
  #' @param motif_col Column name for the motif
140
140
  #' @param regulator_col Column name for the regulator
141
+ #' @param var_col Column name for the variant
141
142
  #' @param regmotifs Regulator-motif mapping file
142
143
  #' @param log_indent Indentation for log messages
143
144
  #' @param notfound Action to take if regulators are not found in the mapping file
144
145
  #' @return Data frame with regulators and motifs
145
146
  #' @export
146
- ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
147
+ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, var_col, regmotifs, log_indent = "", notfound = "error", log = NULL) {
147
148
  if (is.null(motif_col)) {
148
149
  if (is.null(regmotifs)) {
149
150
  stop("Regulator-motif mapping file (envs.regmotifs) is required when no motif column (envs.motif_col) is provided")
@@ -198,7 +199,7 @@ ensure_regulator_motifs <- function (indata, outdir, motif_col, regulator_col, r
198
199
  regulator_col <<- rm_reg_col
199
200
  }
200
201
  } else {
201
- indata <- indata[!duplicated(indata[, c(regulator_col, motif_col), drop = FALSE]), , drop = FALSE]
202
+ indata <- indata[!duplicated(indata[, c(regulator_col, motif_col, var_col), drop = FALSE]), , drop = FALSE]
202
203
  }
203
204
 
204
205
  return(indata)
@@ -8,10 +8,11 @@ outfile <- {{out.outfile | r}}
8
8
  dotplot_check <- {{envs.dotplot_check | r}}
9
9
  outdir <- dirname(outfile)
10
10
  assay <- {{envs.assay | r}}
11
+ ident <- {{envs.ident | r}}
11
12
 
12
13
  log <- get_logger()
13
14
 
14
- ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, log = log)
15
+ ConvertAnnDataToSeurat(adfile, outfile = outfile, assay = assay, ident = ident, log = log)
15
16
 
16
17
  if (!isFALSE(dotplot_check)) {
17
18
  log$info("Reading Seurat object ...")
@@ -1,5 +1,6 @@
1
1
  from pathlib import Path
2
2
  from biopipen.utils.misc import run_command, logger
3
+ from biopipen.scripts.scrna.seurat_anndata_conversion import convert_seurat_to_anndata
3
4
  import os
4
5
  import numpy as np
5
6
  import pandas as pd
@@ -7,6 +8,10 @@ import scanpy
7
8
  import liana
8
9
  import liana.method.sc._liana_pipe as _liana_pipe
9
10
 
11
+ # AttributeError: module 'numpy' has no attribute 'product'
12
+ if not hasattr(np, "product"):
13
+ np.product = np.prod
14
+
10
15
  # monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
11
16
  # https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
12
17
  def _trimean(a, axis=0):
@@ -35,27 +40,24 @@ ncores = envs.pop("ncores")
35
40
  species = envs.pop("species")
36
41
  rscript = envs.pop("rscript")
37
42
  subset = envs.pop("subset")
43
+ group_by = envs.pop("group_by", None)
44
+ groupby = envs.pop("groupby", None) or group_by
38
45
  subset_using = envs.pop("subset_using", "auto")
39
46
  if subset_using == "auto":
40
47
  subset_using = "python" if subset and "[" in subset else "r"
41
48
  split_by = envs.pop("split_by")
42
49
 
43
50
  if sobjfile.suffix.lower() in (".rds", ".qs", "qs2"):
44
- logger.info("Converting the Seurat object to h5ad ...")
45
-
46
51
  annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
47
- if subset and subset_using == "r":
48
- r_script_convert_to_anndata = (
49
- "biopipen.utils::ConvertSeuratToAnnData"
50
- f"({str(sobjfile)!r}, {str(annfile)!r}, "
51
- f"assay = {{envs['assay'] | r}}, subset = {{envs['subset'] | r}})"
52
- )
53
- else:
54
- r_script_convert_to_anndata = (
55
- "biopipen.utils::ConvertSeuratToAnnData"
56
- f"({str(sobjfile)!r}, {str(annfile)!r}, assay = {{envs['assay'] | r}})"
57
- )
58
- run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
52
+ seurat_ident_col = convert_seurat_to_anndata(
53
+ input_file=str(sobjfile),
54
+ output_file=str(annfile),
55
+ assay=assay,
56
+ subset=subset if subset_using == "r" else None,
57
+ rscript=rscript,
58
+ return_ident_col=not groupby,
59
+ )
60
+ groupby = groupby or seurat_ident_col
59
61
  sobjfile = annfile
60
62
  elif subset and subset == "r":
61
63
  raise ValueError(
@@ -63,6 +65,16 @@ elif subset and subset == "r":
63
65
  "'subset' can only be a 'python' expression (`envs.subset_using = 'python'`)."
64
66
  )
65
67
 
68
+ if not groupby:
69
+ logger.warning(
70
+ "`groupby` is not provided. "
71
+ "Using 'seurat_clusters' as the default groupby column. "
72
+ "It is recommended to provide the `groupby` parameter."
73
+ )
74
+ groupby = "seurat_clusters"
75
+
76
+ envs["groupby"] = groupby
77
+
66
78
  logger.info("Reading the h5ad file ...")
67
79
  adata = scanpy.read_h5ad(sobjfile)
68
80
 
@@ -27,7 +27,7 @@ defaults <- list(
27
27
  devpars = list(res = 100)
28
28
  )
29
29
 
30
- cases <- expand_cases(cases, defaults)
30
+ cases <- expand_cases(cases, defaults, default_case = "Cell-Cell Communication")
31
31
  log <- get_logger()
32
32
  reporter <- get_reporter()
33
33
 
@@ -35,12 +35,31 @@ do_case <- function(name) {
35
35
  log$info("- Case: {name}")
36
36
  case <- cases[[name]]
37
37
  info <- case_info(name, outdir, is_dir = FALSE)
38
- case <- extract_vars(case, "subset", "devpars", "more_formats", "descr")
38
+ case <- extract_vars(case, subset_ = "subset", "devpars", "more_formats", "descr")
39
39
 
40
40
  case$data <- ccc
41
- if (!is.null(case$subset)) {
42
- case$data <- ccc %>% dplyr::filter(!!parse_expr(case$subset))
41
+ if (!is.null(subset_)) {
42
+ case$data <- ccc %>% dplyr::filter(!!parse_expr(subset_))
43
43
  }
44
+
45
+ if (identical(case$plot_type, "table")) {
46
+ write.table(
47
+ case$data,
48
+ file = paste0(info$prefix, ".txt"),
49
+ sep = "\t",
50
+ row.names = FALSE,
51
+ col.names = TRUE,
52
+ quote = FALSE
53
+ )
54
+ report <- list(
55
+ kind = "table",
56
+ data = list(nrows = 100),
57
+ src = paste0(info$prefix, ".txt")
58
+ )
59
+ reporter$add2(report, hs = c(info$section, info$name))
60
+ return()
61
+ }
62
+
44
63
  if (is.null(case$magnitude)) {
45
64
  case$magnitude <- NULL
46
65
  }
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from contextlib import suppress
4
+ from pathlib import Path
5
+ from biopipen.core.filters import dict_to_cli_args
6
+ from biopipen.utils.misc import run_command
7
+
8
+ crdir = Path({{in.crdir | quote}}) # noqa: E999 # pyright: ignore
9
+ outdir = {{out.outdir | quote}} # pyright: ignore
10
+ envs: dict = {{envs | repr}} # pyright: ignore
11
+ cellsnp_lite = envs.pop("cellsnp_lite")
12
+ ncores = envs.pop("ncores")
13
+
14
+ with suppress(RuntimeError):
15
+ run_command([cellsnp_lite, "--version"], fg=True)
16
+ print("")
17
+
18
+ if crdir.name != "outs":
19
+ crdir = crdir / "outs"
20
+
21
+ bamfile = str(crdir / "possorted_genome_bam.bam")
22
+ barcodefile = str(crdir / "filtered_feature_bc_matrix" / "barcodes.tsv.gz")
23
+
24
+ envs["nproc"] = ncores
25
+ envs["samFile"] = bamfile
26
+ envs["barcodeFile"] = barcodefile
27
+ envs["outDir"] = outdir
28
+
29
+ cmd = [cellsnp_lite, *dict_to_cli_args(envs)]
30
+ run_command(cmd, fg=True, bufsize=1)
@@ -7,6 +7,7 @@ library(biopipen.utils)
7
7
  sobjfile <- {{in.sobjfile | r}}
8
8
  outfile <- {{out.outfile | r}}
9
9
  newcol <- {{envs.newcol | r}}
10
+ cluster_ident <- {{envs.ident | r }}
10
11
  merge_same_labels <- {{envs.merge | r}}
11
12
  celltypist_args <- {{envs.celltypist_args | r}}
12
13
  outtype <- {{envs.outtype | r }}
@@ -17,6 +18,10 @@ if (identical(outtype, "input")) {
17
18
  outdir <- dirname(outfile)
18
19
  outprefix <- file.path(outdir, tools::file_path_sans_ext(basename(outfile)))
19
20
 
21
+ over_clustering <- celltypist_args$over_clustering %||% cluster_ident
22
+
23
+ require_package("celltypist", version = ">=1.7.1", python = celltypist_args$python)
24
+
20
25
  log <- get_logger()
21
26
 
22
27
  if (is.null(celltypist_args$model)) {
@@ -30,23 +35,14 @@ suppressWarnings(file.remove(modelfile))
30
35
  file.symlink(normalizePath(celltypist_args$model), modelfile)
31
36
 
32
37
  sobj <- NULL
38
+ ident <- NULL
33
39
  if (!endsWith(sobjfile, ".h5ad")) {
34
40
  sobj <- read_obj(sobjfile)
35
- if (is.null(celltypist_args$over_clustering)) {
36
- # find the default ident name in meta.data
37
- for (col in colnames(sobj@meta.data)) {
38
- if (!is.factor(sobj@meta.data[[col]])) { next }
39
- if (isTRUE(all.equal(unname(Idents(sobj)), sobj@meta.data[[col]]))) {
40
- celltypist_args$over_clustering <- col
41
- break
42
- }
43
- }
44
- }
45
- if (is.null(celltypist_args$over_clustering)) {
46
- celltypist_args$over_clustering <- FALSE
47
- }
48
- if (!isFALSE(celltypist_args$over_clustering)) {
49
- destfile <- paste0(outprefix, ".", celltypist_args$over_clustering, ".h5ad")
41
+ ident <- GetIdentityColumn(sobj)
42
+ over_clustering <- over_clustering %||% ident
43
+
44
+ if (!isFALSE(over_clustering)) {
45
+ destfile <- paste0(outprefix, ".", over_clustering, ".h5ad")
50
46
  } else {
51
47
  destfile <- paste0(outprefix, ".h5ad")
52
48
  }
@@ -61,7 +57,7 @@ if (!endsWith(sobjfile, ".h5ad")) {
61
57
  ConvertSeuratToAnnData(
62
58
  sobj,
63
59
  outfile = destfile,
64
- assay = celltypist_args$assay %||% "RNA",
60
+ assay = celltypist_args$assay,
65
61
  log = log
66
62
  )
67
63
  }
@@ -103,15 +99,15 @@ if (file.exists(celltypist_outfile) &&
103
99
  "-m", celltypist_args$model,
104
100
  "-o", celltypist_outfile
105
101
  )
106
- if (!isFALSE(celltypist_args$over_clustering) &&
107
- !is.null(celltypist_args$over_clustering)) {
108
- command <- paste(command, "-c", celltypist_args$over_clustering)
102
+ if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
103
+ command <- paste(command, "-c", over_clustering)
109
104
  }
110
105
  if (isTRUE(celltypist_args$majority_voting)) {
111
106
  command <- paste(command, "-v")
112
107
  }
113
108
  log$info("Running celltypist:")
114
- print("- {command}")
109
+ # print("- {command}")
110
+ log$debug(" {command}")
115
111
  rc <- system(command)
116
112
  if (rc != 0) {
117
113
  stop("Failed to run celltypist. Check the job.stderr file to see the error message.")
@@ -129,6 +125,7 @@ if (outtype == "h5ad") {
129
125
  infile = celltypist_outfile,
130
126
  outfile = NULL,
131
127
  assay = celltypist_args$assay %||% "RNA",
128
+ ident = ident,
132
129
  log = log
133
130
  )
134
131
  } else {
@@ -152,31 +149,20 @@ if (outtype == "h5ad") {
152
149
 
153
150
  if (!is.null(newcol)) {
154
151
  sobj@meta.data[[newcol]] <- sobj@meta.data[[prediction]]
155
- } else {
156
- over_clustering <- celltypist_args$over_clustering
157
- if (over_clustering %in% colnames(sobj@meta.data)) {
158
- sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
159
- } else {
160
- over_clustering <- "over_clustering"
161
- }
152
+ } else if (!isFALSE(over_clustering) && !is.null(over_clustering)) {
153
+ # save the original over_clustering column as seurat_clusters_id
154
+ sobj@meta.data$seurat_clusters_id <- sobj@meta.data[[over_clustering]]
162
155
 
163
156
  # make a map of original cluster id to new cluster id
164
157
  cluster_map <- data.frame(
165
- seurat_clusters_id = sobj@meta.data[[over_clustering]],
158
+ seurat_clusters_id = sobj@meta.data$seurat_clusters_id,
166
159
  seurat_clusters = sobj@meta.data[[prediction]]
167
160
  ) %>%
168
161
  group_by(seurat_clusters_id) %>%
169
162
  summarise(seurat_clusters = first(seurat_clusters), .groups = "drop") %>%
170
163
  mutate(seurat_clusters = make.unique(seurat_clusters))
171
164
  cluster_map <- split(cluster_map$seurat_clusters, cluster_map$seurat_clusters_id)
172
- if (over_clustering != "seurat_clusters") {
173
- sobj@meta.data$seurat_clusters <- sobj@meta.data[[over_clustering]]
174
- }
175
- Idents(sobj) <- "seurat_clusters"
176
- cluster_map$object <- sobj
177
- log$info("Renaming clusters ...")
178
- sobj <- do_call(RenameIdents, cluster_map)
179
- sobj@meta.data$seurat_clusters <- Idents(sobj)
165
+ sobj <- rename_idents(sobj, over_clustering, cluster_map)
180
166
  }
181
167
  } else if (!is.null(newcol)) {
182
168
  sobj@meta.data[[newcol]] <- sobj@meta.data[["predicted_labels"]]
@@ -187,6 +173,11 @@ if (outtype == "h5ad") {
187
173
  sobj <- merge_clusters_with_same_labels(sobj, newcol)
188
174
  }
189
175
 
176
+ if (!is.null(ident)) {
177
+ # restore the original identity
178
+ Idents(sobj) <- ident
179
+ }
180
+
190
181
  log$info("Saving the object ...")
191
182
  save_obj(sobj, outfile)
192
183
  } else {