biopipen 0.31.7__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +153 -0
- biopipen/reports/scrna/CellCellCommunicationPlots.svelte +14 -0
- biopipen/reports/scrna/SeuratMap2Ref.svelte +10 -6
- biopipen/reports/scrna/TopExpressingGenes.svelte +1 -1
- biopipen/scripts/scrna/AnnData2Seurat.R +22 -14
- biopipen/scripts/scrna/CCPlotR-patch.R +161 -0
- biopipen/scripts/scrna/CellCellCommunication.py +101 -0
- biopipen/scripts/scrna/CellCellCommunicationPlots.R +191 -0
- biopipen/scripts/scrna/ScFGSEA.R +1 -1
- biopipen/scripts/scrna/Seurat2AnnData.R +2 -42
- biopipen/scripts/scrna/SeuratClusterStats-features.R +1 -1
- biopipen/scripts/scrna/SeuratMap2Ref.R +20 -1
- biopipen/scripts/scrna/SeuratPreparing-common.R +6 -6
- biopipen/scripts/tcr/GIANA/GIANA.py +1356 -797
- biopipen/scripts/tcr/GIANA/GIANA4.py +1364 -789
- biopipen/scripts/tcr/GIANA/query.py +164 -162
- biopipen/scripts/tcr/TCRClustering.R +25 -4
- biopipen/utils/single_cell.R +92 -1
- {biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/METADATA +2 -1
- {biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/RECORD +23 -19
- {biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/WHEEL +1 -1
- {biopipen-0.31.7.dist-info → biopipen-0.32.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
+
library(rlang)
|
|
3
|
+
library(dplyr)
|
|
4
|
+
library(ggplot2)
|
|
5
|
+
library(CCPlotR)
|
|
6
|
+
{{ biopipen_dir | joinpaths: "scripts", "scrna", "CCPlotR-patch.R" | source_r }}
|
|
7
|
+
|
|
8
|
+
cccfile <- {{ in.cccfile | r }}
|
|
9
|
+
expfile <- {{ in.expfile | r }}
|
|
10
|
+
outdir <- {{ out.outdir | r }}
|
|
11
|
+
joboutdir <- {{ job.outdir | r }}
|
|
12
|
+
score_col <- {{ envs.score_col | r }}
|
|
13
|
+
subset <- {{ envs.subset | r }}
|
|
14
|
+
cases <- {{ envs.cases | r }}
|
|
15
|
+
|
|
16
|
+
ccc <- read.table(cccfile, header=TRUE, sep="\t", check.names = FALSE)
|
|
17
|
+
if (!is.null(subset)) {
|
|
18
|
+
ccc <- ccc %>% dplyr::filter(!!parse_expr(subset))
|
|
19
|
+
}
|
|
20
|
+
if (ncol(ccc) > 10) {
|
|
21
|
+
# from CellCellCommunication
|
|
22
|
+
if (!is.null(expfile)) {
|
|
23
|
+
log_warn("in.cccfile is from CellCellCommunication, in.expfile will be ignored")
|
|
24
|
+
}
|
|
25
|
+
if (is.null(score_col)) {
|
|
26
|
+
stop("'envs.score_col' is required for CellCellCommunication output")
|
|
27
|
+
}
|
|
28
|
+
if (!score_col %in% colnames(ccc)) {
|
|
29
|
+
stop(paste("Score column", score_col, "not found in the in.cccfile"))
|
|
30
|
+
}
|
|
31
|
+
# compose the expression data frame
|
|
32
|
+
exp <- data.frame(
|
|
33
|
+
cell_type = c(ccc$source, ccc$target),
|
|
34
|
+
gene = c(ccc$ligand, ccc$receptor),
|
|
35
|
+
mean_exp = c(ccc$ligand_trimean, ccc$receptor_trimean)
|
|
36
|
+
) %>% distinct()
|
|
37
|
+
ccc <- ccc %>% select(
|
|
38
|
+
source, target,
|
|
39
|
+
ligand, receptor,
|
|
40
|
+
!!sym(score_col)
|
|
41
|
+
) %>% rename(score = !!sym(score_col))
|
|
42
|
+
} else {
|
|
43
|
+
if (!is.null(expfile)) {
|
|
44
|
+
exp <- read.table(expfile, header=TRUE, sep="\t", check.names = FALSE)
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (length(cases) == 0) {
|
|
49
|
+
stop("No cases provided.")
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
.get_default_devpars <- function(kind, nrows, ncols = NULL) {
|
|
53
|
+
if (kind == "arrow") {
|
|
54
|
+
list(
|
|
55
|
+
res = 100,
|
|
56
|
+
width = 600,
|
|
57
|
+
height = 50 + nrows * 20
|
|
58
|
+
)
|
|
59
|
+
} else if (kind == "circos") {
|
|
60
|
+
list(
|
|
61
|
+
res = 100,
|
|
62
|
+
width = 800,
|
|
63
|
+
height = 800
|
|
64
|
+
)
|
|
65
|
+
} else if (kind == "dotplot") {
|
|
66
|
+
list(
|
|
67
|
+
res = 100,
|
|
68
|
+
width = 120 + ncols * 60,
|
|
69
|
+
height = 300 + nrows * 40
|
|
70
|
+
)
|
|
71
|
+
} else if (kind == "heatmap") {
|
|
72
|
+
list(
|
|
73
|
+
res = 100,
|
|
74
|
+
width = 120 + ncols * 60,
|
|
75
|
+
height = 300 + ncols * 40
|
|
76
|
+
)
|
|
77
|
+
} else if (kind == "network") {
|
|
78
|
+
list(
|
|
79
|
+
res = 100,
|
|
80
|
+
width = 1200,
|
|
81
|
+
height = 1200
|
|
82
|
+
)
|
|
83
|
+
} else if (kind == "sigmoid") {
|
|
84
|
+
list(
|
|
85
|
+
res = 100,
|
|
86
|
+
width = max(800, ncols * 200),
|
|
87
|
+
height = 100 + nrows * 60
|
|
88
|
+
)
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
images <- lapply(names(cases), function(name) {
|
|
93
|
+
log_info("- Case: ", name, " ...")
|
|
94
|
+
case <- cases[[name]]
|
|
95
|
+
|
|
96
|
+
kind <- match.arg(case$kind, c("arrow", "circos", "dotplot", "heatmap", "network", "sigmoid"))
|
|
97
|
+
fun <- get(paste0("cc_", kind))
|
|
98
|
+
case$kind <- NULL
|
|
99
|
+
|
|
100
|
+
gg <- NULL
|
|
101
|
+
if (kind == "arrow") {
|
|
102
|
+
cell_types <- case$cell_types
|
|
103
|
+
if (is.null(cell_types) || length(cell_types) != 2) {
|
|
104
|
+
stop("'case.cell_types' is required and must be a vector of length 2")
|
|
105
|
+
}
|
|
106
|
+
n_ligand <- length(unique(ccc[ccc$source == cell_types[1], "ligand"]))
|
|
107
|
+
n_receptor <- length(unique(ccc[ccc$target == cell_types[2], "receptor"]))
|
|
108
|
+
default_devpars <- .get_default_devpars(kind, nrows = max(n_ligand, n_receptor))
|
|
109
|
+
} else if (kind == "circos") {
|
|
110
|
+
nrows <- length(unique(c(ccc$source, ccc$target)))
|
|
111
|
+
default_devpars <- .get_default_devpars(kind, nrows = nrows)
|
|
112
|
+
} else if (kind == "dotplot" || kind == "heatmap") {
|
|
113
|
+
nrows <- length(unique(ccc$source))
|
|
114
|
+
ncols <- length(unique(ccc$target))
|
|
115
|
+
default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
|
|
116
|
+
if (
|
|
117
|
+
(kind == "heatmap" && (is.null(case$option) || case$option != "B")) ||
|
|
118
|
+
(kind == "dotplot" && (is.null(case$option) || case$option != "B"))) {
|
|
119
|
+
gg <- theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
|
120
|
+
}
|
|
121
|
+
} else if (kind == "network") {
|
|
122
|
+
nrows <- length(unique(c(ccc$source, ccc$target)))
|
|
123
|
+
ncols <- length(unique(c(ccc$ligand, ccc$receptor)))
|
|
124
|
+
default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
|
|
125
|
+
gg <- theme(plot.margin = margin(c(50, 50, 50, 50), "pt"))
|
|
126
|
+
} else if (kind == "sigmoid") {
|
|
127
|
+
nrows <- (case$n_top_ints %||% 20) / 2 # approx
|
|
128
|
+
ncols <- length(unique(c(ccc$source, ccc$target))) / 2
|
|
129
|
+
default_devpars <- .get_default_devpars(kind, nrows = nrows, ncols = ncols)
|
|
130
|
+
}
|
|
131
|
+
devpars <- case$devpars %||% default_devpars
|
|
132
|
+
devpars$res <- devpars$res %||% default_devpars$res
|
|
133
|
+
devpars$width <- devpars$width %||% default_devpars$width
|
|
134
|
+
devpars$height <- devpars$height %||% default_devpars$height
|
|
135
|
+
case$devpars <- NULL
|
|
136
|
+
|
|
137
|
+
section <- case$section
|
|
138
|
+
case$section <- NULL
|
|
139
|
+
|
|
140
|
+
case$cc_df <- ccc
|
|
141
|
+
if ("exp_df" %in% names(formals(fun))) {
|
|
142
|
+
case$exp_df <- exp
|
|
143
|
+
}
|
|
144
|
+
outpath <- file.path(outdir, paste0(slugify(name), ".png"))
|
|
145
|
+
png(outpath, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
146
|
+
p <- do_call(fun, case)
|
|
147
|
+
if (!is.null(gg)) { p <- p + gg }
|
|
148
|
+
print(p)
|
|
149
|
+
dev.off()
|
|
150
|
+
|
|
151
|
+
list(
|
|
152
|
+
section = section,
|
|
153
|
+
kind = "table_image",
|
|
154
|
+
src = outpath,
|
|
155
|
+
name = name
|
|
156
|
+
)
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
section_images = list()
|
|
160
|
+
for (image in images) {
|
|
161
|
+
section <- image$section
|
|
162
|
+
image$section <- NULL
|
|
163
|
+
if (is.null(section)) {
|
|
164
|
+
section = "DEFAULT"
|
|
165
|
+
}
|
|
166
|
+
if (!section %in% names(section_images)) {
|
|
167
|
+
section_images[[section]] = list()
|
|
168
|
+
}
|
|
169
|
+
section_images[[section]][[length(section_images[[section]]) + 1]] = image
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (length(section_images) == 1 && names(section_images)[1] == "DEFAULT") {
|
|
173
|
+
add_report(
|
|
174
|
+
section_images,
|
|
175
|
+
h1 = "Cell-Cell Communication Plots",
|
|
176
|
+
ui = "table_of_images"
|
|
177
|
+
)
|
|
178
|
+
} else {
|
|
179
|
+
for (section in names(section_images)) {
|
|
180
|
+
imgplots = section_images[[section]]
|
|
181
|
+
add_report(
|
|
182
|
+
list(
|
|
183
|
+
ui = "table_of_images",
|
|
184
|
+
contents = imgplots
|
|
185
|
+
),
|
|
186
|
+
h1 = section
|
|
187
|
+
)
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
save_report(joboutdir)
|
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -180,7 +180,7 @@ do_case <- function(name, case) {
|
|
|
180
180
|
case$rest$maxSize <- case$maxsize
|
|
181
181
|
case$rest$eps <- case$eps
|
|
182
182
|
case$rest$nproc <- case$ncores
|
|
183
|
-
runFGSEA(ranks, gmtfile, case$top, info$casedir, case$rest)
|
|
183
|
+
runFGSEA(ranks, case$gmtfile, case$top, info$casedir, case$rest)
|
|
184
184
|
|
|
185
185
|
add_report(
|
|
186
186
|
list(kind = "fgsea", dir = info$casedir),
|
|
@@ -1,48 +1,8 @@
|
|
|
1
1
|
{{ biopipen_dir | joinpaths: "utils", "misc.R" | source_r }}
|
|
2
|
-
|
|
3
|
-
library(rlang)
|
|
4
|
-
library(Seurat)
|
|
5
|
-
library(SeuratDisk)
|
|
2
|
+
{{ biopipen_dir | joinpaths: "utils", "single_cell.R" | source_r }}
|
|
6
3
|
|
|
7
4
|
sobjfile <- {{in.sobjfile | r}}
|
|
8
5
|
outfile <- {{out.outfile | r}}
|
|
9
|
-
outdir <- dirname(outfile)
|
|
10
6
|
assay <- {{envs.assay | r}}
|
|
11
7
|
|
|
12
|
-
|
|
13
|
-
assay_name <- ifelse(is.null(assay), "", paste0("_", assay))
|
|
14
|
-
h5seurat_file <- file.path(
|
|
15
|
-
outdir,
|
|
16
|
-
paste0(tools::file_path_sans_ext(basename(outfile)), assay_name, ".h5seurat")
|
|
17
|
-
)
|
|
18
|
-
if (file.exists(h5seurat_file) &&
|
|
19
|
-
(file.mtime(h5seurat_file) < file.mtime(sobjfile))) {
|
|
20
|
-
file.remove(h5seurat_file)
|
|
21
|
-
}
|
|
22
|
-
if (!file.exists(h5seurat_file)) {
|
|
23
|
-
log_info("Reading RDS file ...")
|
|
24
|
-
sobj <- readRDS(sobjfile)
|
|
25
|
-
assay <- assay %||% DefaultAssay(sobj)
|
|
26
|
-
# In order to convert to h5ad
|
|
27
|
-
# https://github.com/satijalab/seurat/issues/8220#issuecomment-1871874649
|
|
28
|
-
sobj$RNAv3 <- as(object = sobj[[assay]], Class = "Assay")
|
|
29
|
-
DefaultAssay(sobj) <- "RNAv3"
|
|
30
|
-
sobj$RNA <- NULL
|
|
31
|
-
sobj <- RenameAssays(sobj, RNAv3 = "RNA")
|
|
32
|
-
|
|
33
|
-
log_info("Saving to H5Seurat file ...")
|
|
34
|
-
SaveH5Seurat(sobj, h5seurat_file)
|
|
35
|
-
rm(sobj)
|
|
36
|
-
sobjfile <- h5seurat_file
|
|
37
|
-
} else {
|
|
38
|
-
log_info("Using existing H5Seurat file ...")
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
if (!endsWith(sobjfile, ".h5seurat")) {
|
|
43
|
-
stop(paste0("Unknown input file format: ",
|
|
44
|
-
tools::file_ext(sobjfile),
|
|
45
|
-
". Supported formats: .rds, .RDS, .h5seurat"))
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
Convert(sobjfile, dest = outfile, assay = assay %||% "RNA", overwrite = TRUE)
|
|
8
|
+
seurat_to_anndata(sobjfile, outfile, assay, log_info)
|
|
@@ -414,7 +414,7 @@ do_one_features = function(name) {
|
|
|
414
414
|
p = p + eval(parse(text = pls))
|
|
415
415
|
}
|
|
416
416
|
}
|
|
417
|
-
figfile = file.path(odir, paste0(slugify(name), ".", slugify(
|
|
417
|
+
figfile = file.path(odir, paste0(slugify(name), ".", slugify(kind), ".png"))
|
|
418
418
|
png(figfile, width=devpars$width, height=devpars$height, res=devpars$res)
|
|
419
419
|
tryCatch({
|
|
420
420
|
print(p)
|
|
@@ -5,6 +5,7 @@ library(Seurat)
|
|
|
5
5
|
library(SeuratDisk)
|
|
6
6
|
library(rlang)
|
|
7
7
|
library(dplyr)
|
|
8
|
+
library(tidyr)
|
|
8
9
|
|
|
9
10
|
set.seed(8525)
|
|
10
11
|
|
|
@@ -377,7 +378,25 @@ for (qname in names(mapquery_args$refdata)) {
|
|
|
377
378
|
repel = TRUE,
|
|
378
379
|
) + NoLegend()
|
|
379
380
|
|
|
380
|
-
png(file.path(outdir, paste0("UMAPs.png")), width =
|
|
381
|
+
png(file.path(outdir, paste0("UMAPs-", slugify(qname), ".png")), width = 1500, height = 700, res = 100)
|
|
381
382
|
print(ref_p | query_p)
|
|
382
383
|
dev.off()
|
|
384
|
+
|
|
385
|
+
# summarize the stats
|
|
386
|
+
log_info(" Summarizing stats: {qname} -> {rname}")
|
|
387
|
+
ref_stats <- as.data.frame(table(reference@meta.data[[rname]]))
|
|
388
|
+
colnames(ref_stats) <- c("CellType", "Count_Ref")
|
|
389
|
+
query_stats <- as.data.frame(table(sobj@meta.data[[paste0("predicted.", qname)]]))
|
|
390
|
+
colnames(query_stats) <- c("CellType", "Count_Query")
|
|
391
|
+
stats <- left_join(ref_stats, query_stats, by = "CellType") %>%
|
|
392
|
+
replace_na(list(Count_Query = 0)) %>%
|
|
393
|
+
arrange(desc(Count_Query), desc(Count_Ref))
|
|
394
|
+
|
|
395
|
+
write.table(
|
|
396
|
+
stats,
|
|
397
|
+
file = file.path(outdir, paste0("stats-", slugify(qname), ".txt")),
|
|
398
|
+
row.names = FALSE,
|
|
399
|
+
quote = FALSE,
|
|
400
|
+
sep = "\t"
|
|
401
|
+
)
|
|
383
402
|
}
|
|
@@ -35,10 +35,10 @@ rename_files = function(e, sample, path) {
|
|
|
35
35
|
perform_cell_qc <- function(sobj, per_sample = FALSE) {
|
|
36
36
|
log_prefix <- ifelse(per_sample, " ", "- ")
|
|
37
37
|
log_info("{log_prefix}Adding metadata for QC ...")
|
|
38
|
-
sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-")
|
|
39
|
-
sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]")
|
|
40
|
-
sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^
|
|
41
|
-
sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4")
|
|
38
|
+
sobj$percent.mt <- PercentageFeatureSet(sobj, pattern = "^MT-|^Mt-|^mt-")
|
|
39
|
+
sobj$percent.ribo <- PercentageFeatureSet(sobj, pattern = "^RP[SL]|^Rp[sl]")
|
|
40
|
+
sobj$percent.hb <- PercentageFeatureSet(sobj, pattern = "^HB[^P]|^Hb[^p]")
|
|
41
|
+
sobj$percent.plat <- PercentageFeatureSet(sobj, pattern = "PECAM1|PF4|Pecam1|Pf4")
|
|
42
42
|
|
|
43
43
|
if (is.null(envs$cell_qc) || length(envs$cell_qc) == 0) {
|
|
44
44
|
log_warn("{log_prefix}No cell QC criteria is provided. All cells will be kept.")
|
|
@@ -210,7 +210,7 @@ load_sample = function(sample) {
|
|
|
210
210
|
|
|
211
211
|
if (isTRUE(envs$cell_qc_per_sample)) {
|
|
212
212
|
log_info("- Perform cell QC for sample: {sample} ...")
|
|
213
|
-
obj = perform_cell_qc(obj, TRUE)
|
|
213
|
+
obj = perform_cell_qc(obj, per_sample = TRUE)
|
|
214
214
|
}
|
|
215
215
|
|
|
216
216
|
if (isTRUE(envs$use_sct)) {
|
|
@@ -287,7 +287,7 @@ run_cell_qc <- function(sobj) {
|
|
|
287
287
|
|
|
288
288
|
if (!envs$cell_qc_per_sample) {
|
|
289
289
|
log_info("Performing cell QC ...")
|
|
290
|
-
sobj = perform_cell_qc(sobj)
|
|
290
|
+
sobj = perform_cell_qc(sobj, per_sample = FALSE)
|
|
291
291
|
}
|
|
292
292
|
|
|
293
293
|
cached$data <- list(sobj = sobj, cell_qc_df = cell_qc_df)
|