biopipen 0.31.7__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.31.7"
1
+ __version__ = "0.32.0"
biopipen/ns/scrna.py CHANGED
@@ -2314,3 +2314,156 @@ class ScSimulation(Proc):
2314
2314
  "params": {},
2315
2315
  }
2316
2316
  script = "file://../scripts/scrna/ScSimulation.R"
2317
+
2318
+
2319
+ class CellCellCommunication(Proc):
2320
+ """Cell-cell communication inference
2321
+
2322
+ This is implemented based on [LIANA](https://liana-py.readthedocs.io/en/latest/index.html),
2323
+ which is a Python package for cell-cell communication inference and provides a list of existing
2324
+ methods including [CellPhoneDB](https://github.com/ventolab/CellphoneDB),
2325
+ [Connectome](https://github.com/msraredon/Connectome/), log2FC,
2326
+ [NATMI](https://github.com/forrest-lab/NATMI),
2327
+ [SingleCellSignalR](https://github.com/SCA-IRCM/SingleCellSignalR), Rank_Aggregate, Geometric Mean,
2328
+ [scSeqComm](https://gitlab.com/sysbiobig/scseqcomm), and [CellChat](https://github.com/jinworks/CellChat).
2329
+
2330
+ You can also try `python -c 'import liana; liana.mt.show_methods()'` to see the methods available.
2331
+
2332
+ Note that this process does not do any visualization. You can use `CellCellCommunicationPlots`
2333
+ to visualize the results.
2334
+
2335
+ Reference:
2336
+ - [Review](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9184522/).
2337
+ - [LIANA](https://www.biorxiv.org/content/10.1101/2023.08.19.553863v1).
2338
+
2339
+ Input:
2340
+ sobjfile: The seurat object file in RDS or h5seurat format or AnnData file.
2341
+
2342
+ Output:
2343
+ outfile: The output file with the 'liana_res' data frame.
2344
+ Stats are provided for both ligand and receptor entities, more specifically: ligand and receptor are
2345
+ the two entities that potentially interact. As a reminder, CCC events are not limited to secreted signalling,
2346
+ but we refer to them as ligand and receptor for simplicity.
2347
+ Also, in the case of heteromeric complexes, the ligand and receptor columns represent the subunit with minimum
2348
+ expression, while *_complex corresponds to the actual complex, with subunits being separated by _.
2349
+ source and target columns represent the source/sender and target/receiver cell identity for each interaction, respectively
2350
+ * `*_props`: represents the proportion of cells that express the entity.
2351
+ By default, any interactions in which either entity is not expressed in above 10% of cells per cell type
2352
+ is considered as a false positive, under the assumption that since CCC occurs between cell types, a sufficient
2353
+ proportion of cells within should express the genes.
2354
+ * `*_means`: entity expression mean per cell type.
2355
+ * `lr_means`: mean ligand-receptor expression, as a measure of ligand-receptor interaction magnitude.
2356
+ * `cellphone_pvals`: permutation-based p-values, as a measure of interaction specificity.
2357
+
2358
+ Envs:
2359
+ method (choice): The method to use for cell-cell communication inference.
2360
+ - CellPhoneDB: Use CellPhoneDB method.
2361
+ Magnitude Score: lr_means; Specificity Score: cellphone_pvals.
2362
+ - Connectome: Use Connectome method.
2363
+ - log2FC: Use log2FC method.
2364
+ - NATMI: Use NATMI method.
2365
+ - SingleCellSignalR: Use SingleCellSignalR method.
2366
+ - Rank_Aggregate: Use Rank_Aggregate method.
2367
+ - Geometric_Mean: Use Geometric Mean method.
2368
+ - scSeqComm: Use scSeqComm method.
2369
+ - CellChat: Use CellChat method.
2370
+ - cellphonedb: alias for `CellPhoneDB`
2371
+ - connectome: alias for `Connectome`
2372
+ - log2fc: alias for `log2FC`
2373
+ - natmi: alias for `NATMI`
2374
+ - singlesignaler: alias for `SingleCellSignalR`
2375
+ - rank_aggregate: alias for `Rank_Aggregate`
2376
+ - geometric_mean: alias for `Geometric_Mean`
2377
+ - scseqcomm: alias for `scSeqComm`
2378
+ - cellchat: alias for `CellChat`
2379
+ assay: The assay to use for the analysis.
2380
+ Only works for Seurat object.
2381
+ seed (type=int): The seed for the random number generator.
2382
+ ncores (type=int): The number of cores to use.
2383
+ groupby: The column name in metadata to group the cells.
2384
+ Typically, this column should be the cluster id.
2385
+ species (choice): The species of the cells.
2386
+ - human: Human cells, the 'consensus' resource will be used.
2387
+ - mouse: Mouse cells, the 'mouseconsensus' resource will be used.
2388
+ expr_prop (type=float): Minimum expression proportion for the ligands and
2389
+ receptors (+ their subunits) in the corresponding cell identities. Set to 0
2390
+ to return unfiltered results.
2391
+ min_cells (type=int): Minimum cells (per cell identity if grouped by `groupby`)
2392
+ to be considered for downstream analysis.
2393
+ n_perms (type=int): Number of permutations for the permutation test.
2394
+ Relevant only for permutation-based methods (e.g., `CellPhoneDB`).
2395
+ If `0` is passed, no permutation testing is performed.
2396
+ rscript: The path to the Rscript executable used to convert RDS file to AnnData.
2397
+ if `in.sobjfile` is an RDS file, it will be converted to AnnData file (h5ad).
2398
+ You need `Seurat`, `SeuratDisk` and `digest` installed.
2399
+ <more>: Other arguments for the method.
2400
+ The arguments are passed to the method directly.
2401
+ See the method documentation for more details and also
2402
+ `help(liana.mt.<method>.__call__)` in Python.
2403
+ """ # noqa: E501
2404
+ input = "sobjfile:file"
2405
+ output = "outfile:file:{{in.sobjfile | stem}}-ccc.txt"
2406
+ lang = config.lang.python
2407
+ envs = {
2408
+ "method": "cellchat",
2409
+ "assay": None,
2410
+ "seed": 1337,
2411
+ "ncores": config.misc.ncores,
2412
+ "groupby": "seurat_clusters",
2413
+ "species": "human",
2414
+ "expr_prop": 0.1,
2415
+ "min_cells": 5,
2416
+ "n_perms": 1000,
2417
+ "rscript": config.lang.rscript,
2418
+ }
2419
+ script = "file://../scripts/scrna/CellCellCommunication.py"
2420
+
2421
+
2422
+ class CellCellCommunicationPlots(Proc):
2423
+ """Visualization for cell-cell communication inference.
2424
+
2425
+ R package [`CCPlotR`](https://github.com/Sarah145/CCPlotR) is used to visualize
2426
+ the results.
2427
+
2428
+ Input:
2429
+ cccfile: The output file from `CellCellCommunication`
2430
+ or a tab-separated file with the following columns: `source`, `target`,
2431
+ `ligand`, `receptor`, and `score`.
2432
+ If so, `in.expfile` can be provided where `exp_df` is needed.
2433
+ expfile: The expression file with the expression of ligands and receptors.
2434
+ Columns include: `cell_type`, `gene` and `mean_exp`.
2435
+
2436
+ Output:
2437
+ outdir: The output directory for the plots.
2438
+
2439
+ Envs:
2440
+ score_col: The column name in the input file that contains the score, if
2441
+ the input file is from `CellCellCommunication`.
2442
+ Two alias columns are added in the result file of `CellCellCommunication`,
2443
+ `mag_score` and `spec_score`, which are the magnitude and specificity
2444
+ scores.
2445
+ subset: An expression to pass to `dplyr::filter()` to subset the ccc data.
2446
+ cases (type=json): The cases for the plots.
2447
+ The keys are the names of the cases and the values are the arguments for
2448
+ the plots. The arguments include:
2449
+ * kind: one of `arrow`, `circos`, `dotplot`, `heatmap`, `network`,
2450
+ and `sigmoid`.
2451
+ * devpars: The parameters for `png()` for the plot, including `res`,
2452
+ `width`, and `height`.
2453
+ * section: The section name for the report to group the plots.
2454
+ * <other>: Other arguments for `cc_<kind>` function in `CCPlotR`.
2455
+ See the documentation for more details.
2456
+ Or you can use `?CCPlotR::cc_<kind>` in R.
2457
+ """
2458
+ input = "cccfile:file, expfile:file"
2459
+ output = "outdir:dir:{{in.cccfile | stem}}-ccc_plots"
2460
+ lang = config.lang.rscript
2461
+ envs = {
2462
+ "score_col": "mag_score",
2463
+ "subset": None,
2464
+ "cases": {},
2465
+ }
2466
+ script = "file://../scripts/scrna/CellCellCommunicationPlots.R"
2467
+ plugin_opts = {
2468
+ "report": "file://../reports/scrna/CellCellCommunicationPlots.svelte",
2469
+ }
@@ -0,0 +1,14 @@
1
+ {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
+ <script>
3
+ import { Image } from "$libs";
4
+ </script>
5
+
6
+ {%- macro report_job(job, h=1) -%}
7
+ {{ job | render_job: h=h }}
8
+ {%- endmacro -%}
9
+
10
+ {%- macro head_job(job) -%}
11
+ <h1>{{job.in.cccfile | stem0 | escape}}</h1>
12
+ {%- endmacro -%}
13
+
14
+ {{ report_jobs(jobs, head_job, report_job) }}
@@ -1,16 +1,20 @@
1
1
  {% from "utils/misc.liq" import report_jobs, table_of_images -%}
2
2
  <script>
3
- import { Image } from "$libs";
3
+ import { Image, DataTable } from "$libs";
4
4
  </script>
5
5
 
6
6
  {%- macro report_job(job, h=1) -%}
7
- <h{{h}}>Reference UMAP</h{{h}}>
8
- {% set imgs = job.outdir | glob: "Reference_UMAP_*.png" %}
9
- {{ table_of_images(imgs) }}
10
7
 
11
- <h{{h}}>Query UMAP</h{{h}}>
12
- {% set imgs = job.outdir | glob: "Query_UMAP_*.png" %}
8
+ <h{{h}}>UMAPs</h{{h}}>
9
+ {% set imgs = job.outdir | glob: "UMAPs-*.png" %}
13
10
  {{ table_of_images(imgs) }}
11
+
12
+ <h{{h}}>Stats</h{{h}}>
13
+ {% for stfile in job.outdir | glob: "stats-*.txt" %}
14
+ <h{{h+1}}>{{stfile | stem | replace: "stats-", ""}}</h{{h+1}}>
15
+ <DataTable src="{{stfile}}" data={ {{stfile | datatable: sep="\t"}} } />
16
+ {% endfor %}
17
+
14
18
  {%- endmacro -%}
15
19
 
16
20
  {%- macro head_job(job) -%}
@@ -41,25 +41,33 @@ if (outtype == "rds") {
41
41
  f <- H5File$new(h5seurat_file, "r+")
42
42
  groups <- f$ls(recursive = TRUE)
43
43
 
44
- for (name in groups$name[grepl("categories", groups$name)]) {
45
- names <- strsplit(name, "/")[[1]]
46
- names <- c(names[1:length(names) - 1], "levels")
47
- new_name <- paste(names, collapse = "/")
48
- f[[new_name]] <- f[[name]]
49
- }
44
+ for (name in groups$name[grepl("/categories$", groups$name)]) {
45
+ valuenames <- levelnames <- codenames <- strsplit(name, "/")[[1]]
46
+ valuenames[length(valuenames)] <- "values"
47
+ valuenames <- paste(valuenames, collapse = "/")
48
+ levelnames[length(levelnames)] <- "levels"
49
+ levelnames <- paste(levelnames, collapse = "/")
50
+ codenames[length(codenames)] <- "codes"
51
+ codenames <- paste(codenames, collapse = "/")
52
+ if (!f$exists(codenames)) {
53
+ # No codes, skip
54
+ next
55
+ }
50
56
 
51
- for (name in groups$name[grepl("codes", groups$name)]) {
52
- names <- strsplit(name, "/")[[1]]
53
- names <- c(names[1:length(names) - 1], "values")
54
- new_name <- paste(names, collapse = "/")
55
- f[[new_name]] <- f[[name]]
56
- grp <- f[[new_name]]
57
- grp$write(args = list(1:grp$dims), value = grp$read() + 1)
57
+ if (!f$exists(levelnames)) {
58
+ f[[levelnames]] <- f[[name]]
59
+ }
60
+
61
+ if (!f$exists(valuenames)) {
62
+ f[[valuenames]] <- f[[codenames]]
63
+ grp <- f[[valuenames]]
64
+ grp$write(args = list(1:grp$dims), value = grp$read() + 1)
65
+ }
58
66
  }
59
67
  f$close_all()
60
68
  # end
61
69
 
62
- sobj <- LoadH5Seurat(h5seurat_file)
70
+ sobj <- LoadH5Seurat(h5seurat_file, assays = assay)
63
71
  if (!isFALSE(dotplot_check)) {
64
72
  log_info("Checking dotplot ...")
65
73
  dotfig <- file.path(outdir, "dotplot.png")
@@ -0,0 +1,161 @@
1
+ # patched version of cc_circos
2
+ # See https://github.com/Sarah145/CCPlotR/issues/4
3
+
4
+ cc_circos <- function(cc_df, option = "A", n_top_ints = 15, exp_df = NULL, cell_cols = NULL, palette = "BuPu", cex = 1, show_legend = TRUE, scale = FALSE, ...) {
5
+ stopifnot("'cc_df' must be a dataframe" = is(cc_df, "data.frame"))
6
+ stopifnot("cc_df should contain columns named source, target, ligand, receptor and score. See `toy_data` for an example." = all(c('source', 'target', 'ligand', 'receptor', 'score') %in% colnames(cc_df)))
7
+ stopifnot("option must be either 'A', 'B', 'C'" = option %in% c('A', 'B', 'C'))
8
+ library(stringr)
9
+ library(ComplexHeatmap)
10
+ library(circlize)
11
+ circos.clear()
12
+
13
+ target <- score <- ligand <- receptor <- source_lig <- target_rec <- cell_type <- gene <- cell_gene <- NULL
14
+ if (option == "A") {
15
+ input_df <- cc_df %>%
16
+ mutate(source = factor(source), target = factor(target)) %>%
17
+ group_by(source, target) %>%
18
+ tally()
19
+ if (is.null(cell_cols)) {
20
+ cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
21
+ }
22
+ circlize_plot <- function() {
23
+ par(cex = cex)
24
+ chordDiagram(input_df,
25
+ scale = FALSE, grid.col = cell_cols,
26
+ annotationTrack = c("grid", "name"), directional = 1, direction.type = c("arrows", "diffHeight"), link.arr.type = "big.arrow", link.arr.length = 0.1, diffHeight = -mm_h(0.5), preAllocateTracks = list(
27
+ track.height = mm_h(10),
28
+ track.margin = c(mm_h(2), -mm_h(4))
29
+ ), ...
30
+ )
31
+ }
32
+ } else if (option == "B") {
33
+ input_df <- cc_df %>%
34
+ slice_max(order_by = score, n = n_top_ints) %>%
35
+ mutate(
36
+ source_lig = paste0(source, "|", ligand),
37
+ target_rec = paste0(target, "|", receptor)
38
+ )
39
+ arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
40
+
41
+ if (is.null(cell_cols)) {
42
+ cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
43
+ }
44
+
45
+ link_cols <- c()
46
+ for (i in input_df$source_lig) {
47
+ link_cols <- c(link_cols, cell_cols[str_extract(i, "[^|]+")])
48
+ }
49
+
50
+ segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
51
+ grp <- str_extract(segments, "[^|]+")
52
+ names(grp) <- segments
53
+ lgd <- Legend(
54
+ labels = unique(c(input_df$source, input_df$target)),
55
+ title = "Cell type",
56
+ type = "points",
57
+ title_gp = gpar(fontsize = 14 * cex),
58
+ labels_gp = gpar(fontsize = 12 * cex),
59
+ legend_gp = gpar(col = "transparent"),
60
+ background = cell_cols[unique(c(input_df$source, input_df$target))]
61
+ )
62
+ circlize_plot <- function() {
63
+ par(cex = cex)
64
+ chordDiagram(
65
+ input_df %>%
66
+ select(source_lig, target_rec, score),
67
+ directional = 1, group = grp, link.sort = FALSE, scale = scale, diffHeight = 0.005,
68
+ direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
69
+ preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05)),
70
+ big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = link_cols,
71
+ link.arr.length = 0.4, link.arr.width = 0.35, ...
72
+ )
73
+ circos.track(track.index = 1, panel.fun = function(x, y) {
74
+ circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
75
+ facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
76
+ )
77
+ }, bg.border = NA)
78
+ for (l in unique(str_extract(segments, "[^|]+"))) {
79
+ highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
80
+ }
81
+ if (show_legend == TRUE) {
82
+ draw(lgd, just = c("left", "bottom"), x = unit(5, "mm"), y = unit(5, "mm"))
83
+ }
84
+ circos.clear()
85
+ }
86
+ } else if (option == "C") {
87
+ stopifnot("'exp_df' must be a dataframe" = is(exp_df, "data.frame"))
88
+ stopifnot("exp_df should contain columns named cell_type, gene and mean_exp. See `toy_exp` for an example." = all(c('cell_type', 'gene', 'mean_exp') %in% colnames(exp_df)))
89
+
90
+ input_df <- cc_df %>%
91
+ slice_max(order_by = score, n = n_top_ints) %>%
92
+ mutate(
93
+ source_lig = paste0(source, "|", ligand),
94
+ target_rec = paste0(target, "|", receptor)
95
+ )
96
+
97
+ arr_wd <- (((input_df$score - min(input_df$score)) / (max(input_df$score) - min(input_df$score))) * (4)) + 1
98
+
99
+ if (is.null(cell_cols)) {
100
+ cell_cols <- setNames(paletteMartin(n = length(unique(c(input_df$source, input_df$target)))), unique(c(input_df$source, input_df$target)))
101
+ }
102
+
103
+ segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), paste0(input_df$target, "|", input_df$receptor)))
104
+ grp <- str_extract(segments, "[^|]+")
105
+ names(grp) <- segments
106
+
107
+ gene_df <- as.data.frame(exp_df %>% mutate(cell_gene = paste0(cell_type, "|", gene)) %>% filter(cell_gene %in% segments))
108
+ rownames(gene_df) <- gene_df$cell_gene
109
+
110
+ brks <- scales::pretty_breaks(n = 5)(c(floor(min(gene_df$mean_exp)), ceiling(max(gene_df$mean_exp))))
111
+ gene_col_fun <- colorRamp2(brks, RColorBrewer::brewer.pal(length(brks), palette))
112
+
113
+ inner.cols <- setNames(gene_col_fun(gene_df[segments, "mean_exp"]), segments)
114
+ lgd1 <- Legend(
115
+ labels = unique(c(input_df$source, input_df$target)),
116
+ title = "Cell type",
117
+ type = "points",
118
+ title_gp = gpar(fontsize = 14 * cex),
119
+ labels_gp = gpar(fontsize = 12 * cex),
120
+ legend_gp = gpar(col = "transparent"),
121
+ background = cell_cols[unique(c(input_df$source, input_df$target))],
122
+ direction = "horizontal"
123
+ )
124
+
125
+ lgd2 <- Legend(
126
+ title_gp = gpar(fontsize = 14 * cex),
127
+ labels_gp = gpar(fontsize = 12 * cex),
128
+ direction = "horizontal", at = brks,
129
+ col_fun = gene_col_fun, title = "Mean exp."
130
+ )
131
+ circlize_plot <- function() {
132
+ par(cex = cex)
133
+ chordDiagram(
134
+ input_df %>%
135
+ select(source_lig, target_rec, score),
136
+ directional = 1, group = grp, link.sort = FALSE, diffHeight = 0.005, scale = scale,
137
+ direction.type = c("arrows"), link.arr.type = "triangle", annotationTrack = c(),
138
+ preAllocateTracks = list(list(track.height = 0.175), list(track.height = 0.05), list(track.height = 0.045)),
139
+ big.gap = 3, transparency = 1, link.arr.lwd = arr_wd, link.arr.col = "black", link.arr.length = 0.4, link.arr.width = 0.35, ...
140
+ )
141
+ circos.track(track.index = 1, panel.fun = function(x, y) {
142
+ circos.text(CELL_META$xcenter, CELL_META$ylim[1], str_extract(CELL_META$sector.index, "[^|]+$"),
143
+ facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.55), cex = 1.3
144
+ )
145
+ }, bg.border = NA)
146
+ for (l in unique(str_extract(segments, "[^|]+"))) {
147
+ highlight.sector(segments[str_detect(segments, paste0("^", str_escape(l)))], track.index = 2, col = cell_cols[l])
148
+ }
149
+ circos.track(track.index = 3, panel.fun = function(x, y) {
150
+ circos.rect(CELL_META$xlim[1], CELL_META$ylim[1], CELL_META$xlim[2], CELL_META$ylim[2],
151
+ sector.index = CELL_META$sector.index, col = inner.cols[CELL_META$sector.index]
152
+ )
153
+ }, bg.border = NA)
154
+ if (show_legend == TRUE) {
155
+ draw(packLegend(lgd1, lgd2, direction = "vertical"), just = c("left", "bottom"), x = unit(4.75, "mm"), y = unit(4.75, "mm"))
156
+ }
157
+ circos.clear()
158
+ }
159
+ }
160
+ circlize_plot()
161
+ }
@@ -0,0 +1,101 @@
1
+ from pathlib import Path
2
+ from biopipen.utils.misc import run_command, logger
3
+ import numpy as np
4
+ import scanpy
5
+ import liana
6
+ import liana.method.sc._liana_pipe as _liana_pipe
7
+
8
+ # monkey-patch liana.method.sc._liana_pipe._trimean due to the updates by scipy 1.14
9
+ # https://github.com/scipy/scipy/commit/a660202652deead0f3b4b688eb9fdcdf9f74066c
10
+ def _trimean(a, axis=0):
11
+ try:
12
+ arr = a.A
13
+ except AttributeError:
14
+ arr = a.toarray()
15
+
16
+ quantiles = np.quantile(arr, q=[0.25, 0.75], axis=axis)
17
+ median = np.median(arr, axis=axis)
18
+ return (quantiles[0] + 2 * median + quantiles[1]) / 4
19
+
20
+
21
+ _liana_pipe._trimean = _trimean
22
+
23
+
24
+ sobjfile = Path({{in.sobjfile | repr}}) # pyright: ignore # noqa: E999
25
+ outfile = Path({{out.outfile | repr}}) # pyright: ignore
26
+ envs = {{envs | repr}} # pyright: ignore
27
+
28
+ method = envs.pop("method")
29
+ assay = envs.pop("assay")
30
+ ncores = envs.pop("ncores")
31
+ species = envs.pop("species")
32
+ rscript = envs.pop("rscript")
33
+
34
+ if sobjfile.suffix.lower() == ".rds" or sobjfile.suffix.lower() == ".h5seurat":
35
+ annfile = outfile.parent / f"{sobjfile.stem}.h5ad"
36
+ r_script_convert_to_anndata = f"""
37
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
38
+ {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
39
+
40
+ seurat_to_anndata(
41
+ "{sobjfile}",
42
+ "{annfile}",
43
+ assay = {{ envs.assay | r }},
44
+ log_info = log_info
45
+ )
46
+ """
47
+ run_command([rscript, "-e", r_script_convert_to_anndata], fg=True)
48
+
49
+ sobjfile = annfile
50
+
51
+ logger.info("Reading the h5ad file ...")
52
+ adata = scanpy.read_h5ad(sobjfile)
53
+
54
+ method = method.lower()
55
+ if method == "log2fc":
56
+ method_fun = liana.mt.logfc
57
+ else:
58
+ method_fun = getattr(liana.mt, method)
59
+
60
+ logger.info(f"Running {method} ...")
61
+ envs["adata"] = adata
62
+ envs["resource_name"] = "consensus" if species == "human" else "mouseconsensus"
63
+ envs["n_jobs"] = ncores
64
+ envs["inplace"] = True
65
+ envs["verbose"] = True
66
+ envs["key_added"] = "liana_ccc"
67
+ method_fun(**envs)
68
+
69
+ res = adata.uns['liana_ccc']
70
+
71
+ mag_score_names = {
72
+ "cellphonedb": "lr_means",
73
+ "connectome": "expr_prod",
74
+ "log2fc": None,
75
+ "natmi": "expr_prod",
76
+ "singlecellsignalr": "lrscore",
77
+ "rank_aggregation": "magnitude_rank",
78
+ "geometric_mean": "lr_gmeans",
79
+ "scseqcomm": "inter_score",
80
+ "cellchat": "lr_probs",
81
+ }
82
+
83
+ spec_score_names = {
84
+ "cellphonedb": "cellphone_pvals",
85
+ "connectome": "scaled_weight",
86
+ "log2fc": "lr_logfc",
87
+ "natmi": "spec_weight",
88
+ "singlecellsignalr": None,
89
+ "rank_aggregation": "specificity_rank",
90
+ "geometric_mean": "gmean_pvals",
91
+ "scseqcomm": None,
92
+ "cellchat": "cellchat_pvals",
93
+ }
94
+
95
+ if mag_score_names[method] is not None:
96
+ res['mag_score'] = res[mag_score_names[method]]
97
+ if spec_score_names[method] is not None:
98
+ res['spec_score'] = res[spec_score_names[method]]
99
+
100
+ logger.info("Saving the result ...")
101
+ res.to_csv(outfile, sep="\t", index=False)