biopipen 0.31.6__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,101 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+ import numpy as np
4
+ import scanpy
5
+ import liana
6
+ import liana.method.sc._liana_pipe as _liana_pipe
7
+
8
+ # monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
9
+ # https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
10
+ def _trimean(a, axis=0):
11
+ try:
12
+ arr = a.A
13
+ except AttributeError:
14
+ arr = a.toarray()
15
+
16
+ quantiles = np.quantile(arr, q=[0.25, 0.75], axis=axis)
17
+ median = np.median(arr, axis=axis)
18
+ return (quantiles[0] + 2 * median + quantiles[1]) / 4
19
+
20
+
21
+ _liana_pipe._trimean = _trimean
22
+
23
+
24
+ sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
25
+ outfile = Path({{out.outfile | repr}}) # pyright: ignore
26
+ envs = {{envs | repr}} # pyright: ignore
27
+
28
+ method = envs.pop("method")
29
+ assay = envs.pop("assay")
30
+ ncores = envs.pop("ncores")
31
+ species = envs.pop("species")
32
+ rscript = envs.pop("rscript")
33
+
34
+ if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
35
+ annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
36
+ r_script_convert_to_anndata = f"""
37
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
38
+ {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
39
+
40
+ seurat_to_anndata(
41
+ "{sobjfile}",
42
+ "{annfile}",
43
+ assay = {{ envs.assay | r }},
44
+ log_info = log_info
45
+ )
46
+ """
47
+ run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
48
+
49
+ sobjfile = annfile
50
+
51
+ logger.info("Reading the h5ad file ...")
52
+ adata = scanpy.read_h5ad(sobjfile)
53
+
54
+ method = method.lower()
55
+ if method == "log2fc":
56
+ method_fun = liana.mt.logfc
57
+ else:
58
+ method_fun = getattr(liana.mt, method)
59
+
60
+ logger.info(f"Running {method} ...")
61
+ envs["adata"] = adata
62
+ envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
63
+ envs["n_jobs"] = ncores
64
+ envs["inplace"] = True
65
+ envs["verbose"] = True
66
+ envs["key_added"] = "liana_ccc"
67
+ method_fun(**envs)
68
+
69
+ res = adata.uns['liana_ccc']
70
+
71
+ mag_score_names = {
72
+ "cellphonedb": "lr_means",
73
+ "connectome": "expr_prod",
74
+ "log2fc": None,
75
+ "natmi": "expr_prod",
76
+ "singlecellsignalr": "lrscore",
77
+ "rank_aggregation": "magnitude_rank",
78
+ "geometric_mean": "lr_gmeans",
79
+ "scseqcomm": "inter_score",
80
+ "cellchat": "lr_probs",
81
+ }
82
+
83
+ spec_score_names = {
84
+ "cellphonedb": "cellphone_pvals",
85
+ "connectome": "scaled_weight",
86
+ "log2fc": "lr_logfc",
87
+ "natmi": "spec_weight",
88
+ "singlecellsignalr": None,
89
+ "rank_aggregation": "specificity_rank",
90
+ "geometric_mean": "gmean_pvals",
91
+ "scseqcomm": None,
92
+ "cellchat": "cellchat_pvals",
93
+ }
94
+
95
+ if mag_score_names[method] is not None:
96
+ res['mag_score'] = res[mag_score_names[method]]
97
+ if spec_score_names[method] is not None:
98
+ res['spec_score'] = res[spec_score_names[method]]
99
+
100
+ logger.info("Saving the result ...")
101
+ res.to_csv(outfile, sep="\t", index=False)
@@ -0,0 +1,191 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ library(rlang)
3
+ library(dplyr)
4
+ library(ggplot2)
5
+ library(CCPlotR)
6
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "CCPlotR-patch.R" | source_r }}
7
+
8
+ cccfile <- {{ in.cccfile | r }}
9
+ expfile <- {{ in.expfile | r }}
10
+ outdir <- {{ out.outdir | r }}
11
+ joboutdir <- {{ job.outdir | r }}
12
+ score_col <- {{ envs.score_col | r }}
13
+ subset <- {{ envs.subset | r }}
14
+ cases <- {{ envs.cases | r }}
15
+
16
+ ccc <- read.table(cccfile, header=TRUE, sep="\t", check.names = FALSE)
17
+ if (!is.null(subset)) {
18
+ ccc <- ccc %>% dplyr::filter(!!parse_expr(subset))
19
+ }
20
+ if (ncol(ccc) > 10) {
21
+ # from CellCellCommunication
22
+ if (!is.null(expfile)) {
23
+ log_warn("in.cccfile is from CellCellCommunication, in.expfile will be ignored")
24
+ }
25
+ if (is.null(score_col)) {
26
+ stop("'envs.score_col' is required for CellCellCommunication output")
27
+ }
28
+ if (!score_col %in% colnames(ccc)) {
29
+ stop(paste("Score column", score_col, "not found in the in.cccfile"))
30
+ }
31
+ # compose the expression data frame
32
+ exp <- data.frame(
33
+ cell_type = c(ccc$source, ccc$target),
34
+ gene = c(ccc$ligand, ccc$receptor),
35
+ mean_exp = c(ccc$ligand_trimean, ccc$receptor_trimean)
36
+ ) %>% distinct()
37
+ ccc <- ccc %>% select(
38
+ source, target,
39
+ ligand, receptor,
40
+ !!sym(score_col)
41
+ ) %>% rename(score = !!sym(score_col))
42
+ } else {
43
+ if (!is.null(expfile)) {
44
+ exp <- read.table(expfile, header=TRUE, sep="\t", check.names = FALSE)
45
+ }
46
+ }
47
+
48
+ if (length(cases) == 0) {
49
+ stop("No cases provided.")
50
+ }
51
+
52
+ .get_default_devpars <- function(kind, nrows, ncols = NULL) {
53
+ if (kind == "arrow") {
54
+ list(
55
+ res = 100,
56
+ width = 600,
57
+ height = 50 + nrows * 20
58
+ )
59
+ } else if (kind == "circos") {
60
+ list(
61
+ res = 100,
62
+ width = 800,
63
+ height = 800
64
+ )
65
+ } else if (kind == "dotplot") {
66
+ list(
67
+ res = 100,
68
+ width = 120 + ncols * 60,
69
+ height = 300 + nrows * 40
70
+ )
71
+ } else if (kind == "heatmap") {
72
+ list(
73
+ res = 100,
74
+ width = 120 + ncols * 60,
75
+ height = 300 + ncols * 40
76
+ )
77
+ } else if (kind == "network") {
78
+ list(
79
+ res = 100,
80
+ width = 1200,
81
+ height = 1200
82
+ )
83
+ } else if (kind == "sigmoid") {
84
+ list(
85
+ res = 100,
86
+ width = max(800, ncols * 200),
87
+ height = 100 + nrows * 60
88
+ )
89
+ }
90
+ }
91
+
92
+ images <- lapply(names(cases), function(name) {
93
+ log_info("- Case: ", name, " ...")
94
+ case <- cases[[name]]
95
+
96
+ kind <- match.arg(case$kind, c("arrow", "circos", "dotplot", "heatmap", "network", "sigmoid"))
97
+ fun <- get(paste0("cc_", kind))
98
+ case$kind <- NULL
99
+
100
+ gg <- NULL
101
+ if (kind == "arrow") {
102
+ cell_types <- case$cell_types
103
+ if (is.null(cell_types) || length(cell_types) != 2) {
104
+ stop("'case.cell_types' is required and must be a vector of length 2")
105
+ }
106
+ n_ligand <- length(unique(ccc[ccc$source == cell_types[1], "ligand"]))
107
+ n_receptor <- length(unique(ccc[ccc$target == cell_types[2], "receptor"]))
108
+ default_devpars <- .get_default_devpars(kind, nrows = max(n_ligand, n_receptor))
109
+ } else if (kind == "circos") {
110
+ nrows <- length(unique(c(ccc$source, ccc$target)))
111
+ default_devpars <- .get_default_devpars(kind, nrows = nrows)
112
+ } else if (kind == "dotplot" || kind == "heatmap") {
113
+ nrows <- length(unique(ccc$source))
114
+ ncols <- length(unique(ccc$target))
115
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
116
+ if (
117
+ (kind == "heatmap" && (is.null(case$option) || case$option != "B")) ||
118
+ (kind == "dotplot" && (is.null(case$option) || case$option != "B"))) {
119
+ gg <- theme(axis.text.x = element_text(angle = 90, hjust = 1))
120
+ }
121
+ } else if (kind == "network") {
122
+ nrows <- length(unique(c(ccc$source, ccc$target)))
123
+ ncols <- length(unique(c(ccc$ligand, ccc$receptor)))
124
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
125
+ gg <- theme(plot.margin = margin(c(50, 50, 50, 50), "pt"))
126
+ } else if (kind == "sigmoid") {
127
+ nrows <- (case$n_top_ints %||% 20) / 2 # approx
128
+ ncols <- length(unique(c(ccc$source, ccc$target))) / 2
129
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
130
+ }
131
+ devpars <- case$devpars %||% default_devpars
132
+ devpars$res <- devpars$res %||% default_devpars$res
133
+ devpars$width <- devpars$width %||% default_devpars$width
134
+ devpars$height <- devpars$height %||% default_devpars$height
135
+ case$devpars <- NULL
136
+
137
+ section <- case$section
138
+ case$section <- NULL
139
+
140
+ case$cc_df <- ccc
141
+ if ("exp_df" %in% names(formals(fun))) {
142
+ case$exp_df <- exp
143
+ }
144
+ outpath <- file.path(outdir, paste0(slugify(name), ".png"))
145
+ png(outpath, width=devpars$width, height=devpars$height, res=devpars$res)
146
+ p <- do_call(fun, case)
147
+ if (!is.null(gg)) { p <- p + gg }
148
+ print(p)
149
+ dev.off()
150
+
151
+ list(
152
+ section = section,
153
+ kind = "table_image",
154
+ src = outpath,
155
+ name = name
156
+ )
157
+ })
158
+
159
+ section_images = list()
160
+ for (image in images) {
161
+ section <- image$section
162
+ image$section <- NULL
163
+ if (is.null(section)) {
164
+ section = "DEFAULT"
165
+ }
166
+ if (!section %in% names(section_images)) {
167
+ section_images[[section]] = list()
168
+ }
169
+ section_images[[section]][[length(section_images[[section]]) + 1]] = image
170
+ }
171
+
172
+ if (length(section_images) == 1 && names(section_images)[1] == "DEFAULT") {
173
+ add_report(
174
+ section_images,
175
+ h1 = "Cell-Cell Communication Plots",
176
+ ui = "table_of_images"
177
+ )
178
+ } else {
179
+ for (section in names(section_images)) {
180
+ imgplots = section_images[[section]]
181
+ add_report(
182
+ list(
183
+ ui = "table_of_images",
184
+ contents = imgplots
185
+ ),
186
+ h1 = section
187
+ )
188
+ }
189
+ }
190
+
191
+ save_report(joboutdir)
@@ -1,48 +1,8 @@
1
1
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
- library(rlang)
4
- library(Seurat)
5
- library(SeuratDisk)
2
+ {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
6
3
 
7
4
  sobjfile <- {{in.sobjfile | r}}
8
5
  outfile <- {{out.outfile | r}}
9
- outdir <- dirname(outfile)
10
6
  assay <- {{envs.assay | r}}
11
7
 
12
- if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
13
- assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
14
- h5seurat_file <- file.path(
15
- outdir,
16
- paste0(tools::file_path_sans_ext(basename(outfile)), assay_name, ".h5seurat")
17
- )
18
- if (file.exists(h5seurat_file) &&
19
- (file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
20
- file.remove(h5seurat_file)
21
- }
22
- if (!file.exists(h5seurat_file)) {
23
- log_info("Reading RDS file ...")
24
- sobj <- readRDS(sobjfile)
25
- assay <- assay %||% DefaultAssay(sobj)
26
- # In order to convert to h5ad
27
- # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
28
- sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
29
- DefaultAssay(sobj) <- "RNAv3"
30
- sobj$RNA <- NULL
31
- sobj <- RenameAssays(sobj, RNAv3 = "RNA")
32
-
33
- log_info("Saving to H5Seurat file ...")
34
- SaveH5Seurat(sobj, h5seurat_file)
35
- rm(sobj)
36
- sobjfile <- h5seurat_file
37
- } else {
38
- log_info("Using existing H5Seurat file ...")
39
- }
40
- }
41
-
42
- if (!endsWith(sobjfile, ".h5seurat")) {
43
- stop(paste0("Unknown input file format: ",
44
- tools::file_ext(sobjfile),
45
- ". Supported formats: .rds, .RDS, .h5seurat"))
46
- }
47
-
48
- Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
8
+ seurat_to_anndata(sobjfile, outfile, assay, log_info)
@@ -5,6 +5,7 @@ library(Seurat)
5
5
  library(SeuratDisk)
6
6
  library(rlang)
7
7
  library(dplyr)
8
+ library(tidyr)
8
9
 
9
10
  set.seed(8525)
10
11
 
@@ -377,7 +378,25 @@ for (qname in names(mapquery_args$refdata)) {
377
378
  repel = TRUE,
378
379
  ) + NoLegend()
379
380
 
380
- png(file.path(outdir, paste0("UMAPs.png")), width = 1400, height = 700, res = 100)
381
+ png(file.path(outdir, paste0("UMAPs-", slugify(qname), ".png")), width = 1500, height = 700, res = 100)
381
382
  print(ref_p | query_p)
382
383
  dev.off()
384
+
385
+ # summarize the stats
386
+ log_info(" Summarizing stats: {qname} -> {rname}")
387
+ ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
388
+ colnames(ref_stats) <- c("CellType", "Count_Ref")
389
+ query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
390
+ colnames(query_stats) <- c("CellType", "Count_Query")
391
+ stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
392
+ replace_na(list(Count_Query = 0)) %>%
393
+ arrange(desc(Count_Query), desc(Count_Ref))
394
+
395
+ write.table(
396
+ stats,
397
+ file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
398
+ row.names = FALSE,
399
+ quote = FALSE,
400
+ sep = "\t"
401
+ )
383
402
  }