biopipen 0.31.7__py3-none-any.whl → 0.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

@@ -0,0 +1,191 @@
1
+ {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
+ library(rlang)
3
+ library(dplyr)
4
+ library(ggplot2)
5
+ library(CCPlotR)
6
+ {{ biopipen_dir | joinpaths: "scripts", "scrna", "CCPlotR-patch.R" | source_r }}
7
+
8
+ cccfile <- {{ in.cccfile | r }}
9
+ expfile <- {{ in.expfile | r }}
10
+ outdir <- {{ out.outdir | r }}
11
+ joboutdir <- {{ job.outdir | r }}
12
+ score_col <- {{ envs.score_col | r }}
13
+ subset <- {{ envs.subset | r }}
14
+ cases <- {{ envs.cases | r }}
15
+
16
+ ccc <- read.table(cccfile, header=TRUE, sep="\t", check.names = FALSE)
17
+ if (!is.null(subset)) {
18
+ ccc <- ccc %>% dplyr::filter(!!parse_expr(subset))
19
+ }
20
+ if (ncol(ccc) > 10) {
21
+ # from CellCellCommunication
22
+ if (!is.null(expfile)) {
23
+ log_warn("in.cccfile is from CellCellCommunication, in.expfile will be ignored")
24
+ }
25
+ if (is.null(score_col)) {
26
+ stop("'envs.score_col' is required for CellCellCommunication output")
27
+ }
28
+ if (!score_col %in% colnames(ccc)) {
29
+ stop(paste("Score column", score_col, "not found in the in.cccfile"))
30
+ }
31
+ # compose the expression data frame
32
+ exp <- data.frame(
33
+ cell_type = c(ccc$source, ccc$target),
34
+ gene = c(ccc$ligand, ccc$receptor),
35
+ mean_exp = c(ccc$ligand_trimean, ccc$receptor_trimean)
36
+ ) %>% distinct()
37
+ ccc <- ccc %>% select(
38
+ source, target,
39
+ ligand, receptor,
40
+ !!sym(score_col)
41
+ ) %>% rename(score = !!sym(score_col))
42
+ } else {
43
+ if (!is.null(expfile)) {
44
+ exp <- read.table(expfile, header=TRUE, sep="\t", check.names = FALSE)
45
+ }
46
+ }
47
+
48
+ if (length(cases) == 0) {
49
+ stop("No cases provided.")
50
+ }
51
+
52
+ .get_default_devpars <- function(kind, nrows, ncols = NULL) {
53
+ if (kind == "arrow") {
54
+ list(
55
+ res = 100,
56
+ width = 600,
57
+ height = 50 + nrows * 20
58
+ )
59
+ } else if (kind == "circos") {
60
+ list(
61
+ res = 100,
62
+ width = 800,
63
+ height = 800
64
+ )
65
+ } else if (kind == "dotplot") {
66
+ list(
67
+ res = 100,
68
+ width = 120 + ncols * 60,
69
+ height = 300 + nrows * 40
70
+ )
71
+ } else if (kind == "heatmap") {
72
+ list(
73
+ res = 100,
74
+ width = 120 + ncols * 60,
75
+ height = 300 + ncols * 40
76
+ )
77
+ } else if (kind == "network") {
78
+ list(
79
+ res = 100,
80
+ width = 1200,
81
+ height = 1200
82
+ )
83
+ } else if (kind == "sigmoid") {
84
+ list(
85
+ res = 100,
86
+ width = max(800, ncols * 200),
87
+ height = 100 + nrows * 60
88
+ )
89
+ }
90
+ }
91
+
92
+ images <- lapply(names(cases), function(name) {
93
+ log_info("- Case: ", name, " ...")
94
+ case <- cases[[name]]
95
+
96
+ kind <- match.arg(case$kind, c("arrow", "circos", "dotplot", "heatmap", "network", "sigmoid"))
97
+ fun <- get(paste0("cc_", kind))
98
+ case$kind <- NULL
99
+
100
+ gg <- NULL
101
+ if (kind == "arrow") {
102
+ cell_types <- case$cell_types
103
+ if (is.null(cell_types) || length(cell_types) != 2) {
104
+ stop("'case.cell_types' is required and must be a vector of length 2")
105
+ }
106
+ n_ligand <- length(unique(ccc[ccc$source == cell_types[1], "ligand"]))
107
+ n_receptor <- length(unique(ccc[ccc$target == cell_types[2], "receptor"]))
108
+ default_devpars <- .get_default_devpars(kind, nrows = max(n_ligand, n_receptor))
109
+ } else if (kind == "circos") {
110
+ nrows <- length(unique(c(ccc$source, ccc$target)))
111
+ default_devpars <- .get_default_devpars(kind, nrows = nrows)
112
+ } else if (kind == "dotplot" || kind == "heatmap") {
113
+ nrows <- length(unique(ccc$source))
114
+ ncols <- length(unique(ccc$target))
115
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
116
+ if (
117
+ (kind == "heatmap" && (is.null(case$option) || case$option != "B")) ||
118
+ (kind == "dotplot" && (is.null(case$option) || case$option != "B"))) {
119
+ gg <- theme(axis.text.x = element_text(angle = 90, hjust = 1))
120
+ }
121
+ } else if (kind == "network") {
122
+ nrows <- length(unique(c(ccc$source, ccc$target)))
123
+ ncols <- length(unique(c(ccc$ligand, ccc$receptor)))
124
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
125
+ gg <- theme(plot.margin = margin(c(50, 50, 50, 50), "pt"))
126
+ } else if (kind == "sigmoid") {
127
+ nrows <- (case$n_top_ints %||% 20) / 2 # approx
128
+ ncols <- length(unique(c(ccc$source, ccc$target))) / 2
129
+ default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
130
+ }
131
+ devpars <- case$devpars %||% default_devpars
132
+ devpars$res <- devpars$res %||% default_devpars$res
133
+ devpars$width <- devpars$width %||% default_devpars$width
134
+ devpars$height <- devpars$height %||% default_devpars$height
135
+ case$devpars <- NULL
136
+
137
+ section <- case$section
138
+ case$section <- NULL
139
+
140
+ case$cc_df <- ccc
141
+ if ("exp_df" %in% names(formals(fun))) {
142
+ case$exp_df <- exp
143
+ }
144
+ outpath <- file.path(outdir, paste0(slugify(name), ".png"))
145
+ png(outpath, width=devpars$width, height=devpars$height, res=devpars$res)
146
+ p <- do_call(fun, case)
147
+ if (!is.null(gg)) { p <- p + gg }
148
+ print(p)
149
+ dev.off()
150
+
151
+ list(
152
+ section = section,
153
+ kind = "table_image",
154
+ src = outpath,
155
+ name = name
156
+ )
157
+ })
158
+
159
+ section_images = list()
160
+ for (image in images) {
161
+ section <- image$section
162
+ image$section <- NULL
163
+ if (is.null(section)) {
164
+ section = "DEFAULT"
165
+ }
166
+ if (!section %in% names(section_images)) {
167
+ section_images[[section]] = list()
168
+ }
169
+ section_images[[section]][[length(section_images[[section]]) + 1]] = image
170
+ }
171
+
172
+ if (length(section_images) == 1 && names(section_images)[1] == "DEFAULT") {
173
+ add_report(
174
+ section_images,
175
+ h1 = "Cell-Cell Communication Plots",
176
+ ui = "table_of_images"
177
+ )
178
+ } else {
179
+ for (section in names(section_images)) {
180
+ imgplots = section_images[[section]]
181
+ add_report(
182
+ list(
183
+ ui = "table_of_images",
184
+ contents = imgplots
185
+ ),
186
+ h1 = section
187
+ )
188
+ }
189
+ }
190
+
191
+ save_report(joboutdir)
@@ -180,7 +180,7 @@ do_case <- function(name, case) {
180
180
  case$rest$maxSize <- case$maxsize
181
181
  case$rest$eps <- case$eps
182
182
  case$rest$nproc <- case$ncores
183
- runFGSEA(ranks, gmtfile, case$top, info$casedir, case$rest)
183
+ runFGSEA(ranks, case$gmtfile, case$top, info$casedir, case$rest)
184
184
 
185
185
  add_report(
186
186
  list(kind = "fgsea", dir = info$casedir),
@@ -1,48 +1,8 @@
1
1
  {{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
2
-
3
- library(rlang)
4
- library(Seurat)
5
- library(SeuratDisk)
2
+ {{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
6
3
 
7
4
  sobjfile <- {{in.sobjfile | r}}
8
5
  outfile <- {{out.outfile | r}}
9
- outdir <- dirname(outfile)
10
6
  assay <- {{envs.assay | r}}
11
7
 
12
- if (endsWith(sobjfile, ".rds") || endsWith(sobjfile, ".RDS")) {
13
- assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
14
- h5seurat_file <- file.path(
15
- outdir,
16
- paste0(tools::file_path_sans_ext(basename(outfile)), assay_name, ".h5seurat")
17
- )
18
- if (file.exists(h5seurat_file) &&
19
- (file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
20
- file.remove(h5seurat_file)
21
- }
22
- if (!file.exists(h5seurat_file)) {
23
- log_info("Reading RDS file ...")
24
- sobj <- readRDS(sobjfile)
25
- assay <- assay %||% DefaultAssay(sobj)
26
- # In order to convert to h5ad
27
- # https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
28
- sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
29
- DefaultAssay(sobj) <- "RNAv3"
30
- sobj$RNA <- NULL
31
- sobj <- RenameAssays(sobj, RNAv3 = "RNA")
32
-
33
- log_info("Saving to H5Seurat file ...")
34
- SaveH5Seurat(sobj, h5seurat_file)
35
- rm(sobj)
36
- sobjfile <- h5seurat_file
37
- } else {
38
- log_info("Using existing H5Seurat file ...")
39
- }
40
- }
41
-
42
- if (!endsWith(sobjfile, ".h5seurat")) {
43
- stop(paste0("Unknown input file format: ",
44
- tools::file_ext(sobjfile),
45
- ". Supported formats: .rds, .RDS, .h5seurat"))
46
- }
47
-
48
- Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
8
+ seurat_to_anndata(sobjfile, outfile, assay, log_info)
@@ -414,7 +414,7 @@ do_one_features = function(name) {
414
414
  p = p + eval(parse(text = pls))
415
415
  }
416
416
  }
417
- figfile = file.path(odir, paste0(slugify(name), ".", slugify(case$kind), ".png"))
417
+ figfile = file.path(odir, paste0(slugify(name), ".", slugify(kind), ".png"))
418
418
  png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
419
419
  tryCatch({
420
420
  print(p)
@@ -5,6 +5,7 @@ library(Seurat)
5
5
  library(SeuratDisk)
6
6
  library(rlang)
7
7
  library(dplyr)
8
+ library(tidyr)
8
9
 
9
10
  set.seed(8525)
10
11
 
@@ -377,7 +378,25 @@ for (qname in names(mapquery_args$refdata)) {
377
378
  repel = TRUE,
378
379
  ) + NoLegend()
379
380
 
380
- png(file.path(outdir, paste0("UMAPs.png")), width = 1400, height = 700, res = 100)
381
+ png(file.path(outdir, paste0("UMAPs-", slugify(qname), ".png")), width = 1500, height = 700, res = 100)
381
382
  print(ref_p | query_p)
382
383
  dev.off()
384
+
385
+ # summarize the stats
386
+ log_info(" Summarizing stats: {qname} -> {rname}")
387
+ ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
388
+ colnames(ref_stats) <- c("CellType", "Count_Ref")
389
+ query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
390
+ colnames(query_stats) <- c("CellType", "Count_Query")
391
+ stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
392
+ replace_na(list(Count_Query = 0)) %>%
393
+ arrange(desc(Count_Query), desc(Count_Ref))
394
+
395
+ write.table(
396
+ stats,
397
+ file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
398
+ row.names = FALSE,
399
+ quote = FALSE,
400
+ sep = "\t"
401
+ )
383
402
  }
@@ -35,10 +35,10 @@ rename_files = function(e, sample, path) {
35
35
  perform_cell_qc <- function(sobj, per_sample = FALSE) {
36
36
  log_prefix <- ifelse(per_sample, " ", "- ")
37
37
  log_info("{log_prefix}Adding metadata for QC ...")
38
- sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
39
- sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
40
- sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^(P)]")
41
- sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
38
+ sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-|^Mt-|^mt-")
39
+ sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]|^Rp[sl]")
40
+ sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^P]|^Hb[^p]")
41
+ sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4|Pecam1|Pf4")
42
42
 
43
43
  if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
44
44
  log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
@@ -210,7 +210,7 @@ load_sample = function(sample) {
210
210
 
211
211
  if (isTRUE(envs$cell_qc_per_sample)) {
212
212
  log_info("- Perform cell QC for sample: {sample} ...")
213
- obj = perform_cell_qc(obj, TRUE)
213
+ obj = perform_cell_qc(obj, per_sample = TRUE)
214
214
  }
215
215
 
216
216
  if (isTRUE(envs$use_sct)) {
@@ -287,7 +287,7 @@ run_cell_qc <- function(sobj) {
287
287
 
288
288
  if (!envs$cell_qc_per_sample) {
289
289
  log_info("Performing cell QC ...")
290
- sobj = perform_cell_qc(sobj)
290
+ sobj = perform_cell_qc(sobj, per_sample = FALSE)
291
291
  }
292
292
 
293
293
  cached$data <- list(sobj = sobj, cell_qc_df = cell_qc_df)