biopipen 0.25.4__py3-none-any.whl → 0.26.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +17 -3
- biopipen/ns/snp.py +70 -0
- biopipen/ns/stats.py +320 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +177 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +42 -0
- biopipen/scripts/rnaseq/Simulation.R +23 -0
- biopipen/scripts/rnaseq/UnitConversion.R +323 -54
- biopipen/scripts/scrna/CellsDistribution.R +225 -147
- biopipen/scripts/scrna/MarkersFinder.R +53 -47
- biopipen/scripts/scrna/RadarPlots.R +6 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +37 -0
- biopipen/scripts/scrna/TopExpressingGenes.R +58 -33
- biopipen/scripts/snp/PlinkSimulation.py +88 -0
- biopipen/scripts/stats/ChowTest.R +119 -0
- biopipen/scripts/stats/DiffCoexpr.R +150 -0
- biopipen/scripts/stats/LiquidAssoc.R +136 -0
- biopipen/scripts/stats/MetaPvalue.R +128 -0
- biopipen/scripts/tcr/CloneResidency.R +37 -72
- biopipen/utils/misc.R +19 -0
- biopipen/utils/misc.py +15 -0
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/METADATA +9 -10
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/RECORD +27 -17
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/WHEEL +1 -1
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/entry_points.txt +2 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(dplyr)
|
|
5
|
+
library(tidyr)
|
|
6
|
+
library(fastLiquidAssociation)
|
|
7
|
+
|
|
8
|
+
infile <- {{in.infile | r}}
|
|
9
|
+
covfile <- {{in.covfile | r}}
|
|
10
|
+
groupfile <- {{in.groupfile | r}}
|
|
11
|
+
fmlfile <- {{in.fmlfile | r}}
|
|
12
|
+
outfile <- {{out.outfile | r}}
|
|
13
|
+
x <- {{envs.x | r}}
|
|
14
|
+
nvec <- {{envs.nvec | r}}
|
|
15
|
+
topn <- {{envs.topn | r}}
|
|
16
|
+
rvalue <- {{envs.rvalue | r}}
|
|
17
|
+
cut <- {{envs.cut | r}}
|
|
18
|
+
ncores <- {{envs.ncores | r}}
|
|
19
|
+
padj <- {{envs.padj | r}}
|
|
20
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
21
|
+
transpose_group <- {{envs.transpose_group | r}}
|
|
22
|
+
transpose_cov <- {{envs.transpose_cov | r}}
|
|
23
|
+
xyz_names <- {{envs.xyz_names | r}}
|
|
24
|
+
if (!is.null(xyz_names) && length(xyz_names) == 1) {
|
|
25
|
+
xyz_names <- trimws(strsplit(xyz_names, ",")[[1]])
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (is.null(groupfile) && is.null(nvec)) {
|
|
29
|
+
stop("Must provide either in.groupfile or envs.nvec")
|
|
30
|
+
}
|
|
31
|
+
if (!is.null(groupfile) && !is.null(nvec)) {
|
|
32
|
+
stop("Must provide either in.groupfile or envs.nvec, not both")
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
log_info("Reading and preparing data ...")
|
|
36
|
+
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1, check.names = FALSE)
|
|
37
|
+
if (transpose_input) {
|
|
38
|
+
indata <- t(indata)
|
|
39
|
+
}
|
|
40
|
+
if (!is.null(covfile)) {
|
|
41
|
+
covdata <- read.table(covfile, header = TRUE, sep = "\t", row.names = 1)
|
|
42
|
+
if (transpose_cov) {
|
|
43
|
+
covdata <- t(covdata)
|
|
44
|
+
}
|
|
45
|
+
if (!isTRUE(all.equal(rownames(indata), rownames(covdata)))) {
|
|
46
|
+
stop("Row names of indata and covdata must be identical")
|
|
47
|
+
}
|
|
48
|
+
indata <- indata %>% mutate(across(everything(), function(xx) {
|
|
49
|
+
lm(xx ~ as.matrix(covdata))$residuals
|
|
50
|
+
}))
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
expand_range <- function(range) {
|
|
54
|
+
items <- trimws(strsplit(range, ",|-")[[1]])
|
|
55
|
+
num_items <- as.numeric(items)
|
|
56
|
+
if (anyNA(num_items)) {
|
|
57
|
+
# it's sample names
|
|
58
|
+
return(match(items, colnames(indata)))
|
|
59
|
+
}
|
|
60
|
+
return(num_items)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
cut <- cut %||% max(ceiling(nrow(indata)/22), 4)
|
|
64
|
+
if (!is.null(x)) { x <- expand_range(x) }
|
|
65
|
+
if (!is.null(groupfile)) {
|
|
66
|
+
groupdata <- read.table(groupfile, header = TRUE, sep = "\t", row.names = 1)
|
|
67
|
+
if (transpose_group) {
|
|
68
|
+
groupdata <- t(groupdata)
|
|
69
|
+
}
|
|
70
|
+
if (!isTRUE(all.equal(rownames(indata), rownames(groupdata)))) {
|
|
71
|
+
stop("Row names of indata and groupdata must be identical")
|
|
72
|
+
}
|
|
73
|
+
nvec <- (ncol(indata) + 1) : (ncol(indata) + ncol(groupdata))
|
|
74
|
+
indata <- cbind(indata, groupdata)
|
|
75
|
+
} else {
|
|
76
|
+
nvec <- expand_range(nvec)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
log_info("Running fastLiquidAssociation ...")
|
|
80
|
+
indata <- as.matrix(indata)
|
|
81
|
+
mla <- fastMLA(
|
|
82
|
+
data = indata,
|
|
83
|
+
topn = topn,
|
|
84
|
+
rvalue = rvalue,
|
|
85
|
+
cut = cut,
|
|
86
|
+
threads = ncores,
|
|
87
|
+
nvec = nvec
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
if (nrow(mla) == 0) {
|
|
91
|
+
log_warn("No significant associations found")
|
|
92
|
+
out <- data.frame(
|
|
93
|
+
X12 = character(),
|
|
94
|
+
X21 = character(),
|
|
95
|
+
X3 = character(),
|
|
96
|
+
rhodiff = numeric(),
|
|
97
|
+
`MLA.value` = numeric(),
|
|
98
|
+
estimates = numeric(),
|
|
99
|
+
`san.se` = numeric(),
|
|
100
|
+
wald = numeric(),
|
|
101
|
+
Pval = numeric(),
|
|
102
|
+
model = character()
|
|
103
|
+
)
|
|
104
|
+
} else {
|
|
105
|
+
cnm <- mass.CNM(data = indata, GLA.mat = mla, nback = topn)
|
|
106
|
+
out <- cnm$`top p-values` %>%
|
|
107
|
+
dplyr::select(X12 = "X1 or X2", X21 = "X2 or X1", everything(), Pval = "p value")
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (!is.null(fmlfile)) {
|
|
111
|
+
fmldata <- read.table(fmlfile, header = FALSE, sep = "\t", row.names = NULL)
|
|
112
|
+
colnames(fmldata) <- c("Z", "X", "Y")
|
|
113
|
+
all_combns <- fmldata %>% unite("XYZ", X, Y, Z, sep = " // ") %>% pull(XYZ)
|
|
114
|
+
out <- out %>%
|
|
115
|
+
unite("XYZ", X12, X21, X3, sep = " // ", remove = FALSE) %>%
|
|
116
|
+
dplyr::filter(XYZ %in% all_combns) %>%
|
|
117
|
+
dplyr::select(-XYZ)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
if (!is.null(xyz_names)) {
|
|
121
|
+
out <- out %>%
|
|
122
|
+
dplyr::select(
|
|
123
|
+
!!sym(xyz_names[1]) := "X12",
|
|
124
|
+
!!sym(xyz_names[2]) := "X21",
|
|
125
|
+
!!sym(xyz_names[3]) := "X3",
|
|
126
|
+
everything()
|
|
127
|
+
)
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (padj != "none") {
|
|
131
|
+
log_info("Calculating adjusted p-values ...")
|
|
132
|
+
out$Padj <- p.adjust(out$Pval, method = padj)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
log_info("Writing output ...")
|
|
136
|
+
write.table(out, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
|
|
3
|
+
library(metap)
|
|
4
|
+
library(rlang)
|
|
5
|
+
library(dplyr)
|
|
6
|
+
|
|
7
|
+
infiles <- {{in.infiles | r}}
|
|
8
|
+
outfile <- {{out.outfile | r}}
|
|
9
|
+
id_cols <- {{envs.id_cols | r}}
|
|
10
|
+
id_exprs <- {{envs.id_exprs | r}}
|
|
11
|
+
pval_cols <- {{envs.pval_cols | r}}
|
|
12
|
+
method <- {{envs.method | r}}
|
|
13
|
+
na <- {{envs.na | r}}
|
|
14
|
+
padj <- {{envs.padj | r}}
|
|
15
|
+
|
|
16
|
+
if (method == "fisher") { method = "sumlog" }
|
|
17
|
+
|
|
18
|
+
if (length(infiles) == 1 && padj == "none") {
|
|
19
|
+
log_info("Only one input file, copying to output ...")
|
|
20
|
+
file.copy(infiles, outfile)
|
|
21
|
+
} else if (length(infiles) == 1) {
|
|
22
|
+
log_info("Only one input file, performing p-value adjustment ...")
|
|
23
|
+
if (is.null(pval_cols)) {
|
|
24
|
+
stop("Must provide envs.pval_cols")
|
|
25
|
+
}
|
|
26
|
+
indata <- read.table(infiles, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE)
|
|
27
|
+
if (!pval_cols %in% colnames(indata)) {
|
|
28
|
+
stop("envs.pval_cols does not exist in input file")
|
|
29
|
+
}
|
|
30
|
+
indata$Padj <- p.adjust(indata[, pval_cols], method = padj)
|
|
31
|
+
|
|
32
|
+
log_info("Writing output ...")
|
|
33
|
+
write.table(indata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
34
|
+
} else {
|
|
35
|
+
# Check pval_cols
|
|
36
|
+
if (is.null(pval_cols)) {
|
|
37
|
+
stop("Must provide envs.pval_cols")
|
|
38
|
+
}
|
|
39
|
+
if (length(pval_cols) == 1) {
|
|
40
|
+
pval_cols <- trimws(strsplit(pval_cols, ",")[[1]])
|
|
41
|
+
}
|
|
42
|
+
if (length(pval_cols) == 1) {
|
|
43
|
+
pval_cols <- rep(pval_cols, length(infiles))
|
|
44
|
+
}
|
|
45
|
+
if (length(pval_cols) != length(infiles)) {
|
|
46
|
+
stop("envs.pval_cols must be a single name or have the same length as in.infiles")
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Check id_cols
|
|
50
|
+
if (is.null(id_cols)) {
|
|
51
|
+
stop("Must provide envs.id_cols")
|
|
52
|
+
}
|
|
53
|
+
if (length(id_cols) == 1) {
|
|
54
|
+
id_cols <- trimws(strsplit(id_cols, ",")[[1]])
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Check id_exprs
|
|
58
|
+
if (!is.null(id_exprs)) {
|
|
59
|
+
if (length(id_exprs) == 1) {
|
|
60
|
+
id_exprs <- rep(id_exprs, length(infiles))
|
|
61
|
+
}
|
|
62
|
+
if (length(id_exprs) != length(infiles)) {
|
|
63
|
+
stop("envs.id_exprs must be a single expression or have the same length as in.infiles")
|
|
64
|
+
}
|
|
65
|
+
if (length(id_cols) != 1) {
|
|
66
|
+
stop("envs.id_cols must be a single name if envs.id_exprs is provided")
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
log_info("Reading and preparing data ...")
|
|
71
|
+
outdata <- NULL
|
|
72
|
+
for (i in seq_along(infiles)) {
|
|
73
|
+
infile <- infiles[i]
|
|
74
|
+
name <- tools::file_path_sans_ext(basename(infile))
|
|
75
|
+
pval_col <- paste0("Pval_", name)
|
|
76
|
+
dat <- read.table(
|
|
77
|
+
infile, header = TRUE, sep = "\t", row.names = NULL, check.names = FALSE
|
|
78
|
+
)
|
|
79
|
+
if (!is.null(id_exprs)) {
|
|
80
|
+
dat <- dat %>% mutate(!!sym(id_cols) := !!parse_expr(id_exprs[i]))
|
|
81
|
+
}
|
|
82
|
+
dat <- dat %>% dplyr::select(all_of(id_cols), !!sym(pval_col) := !!sym(pval_cols[i]))
|
|
83
|
+
|
|
84
|
+
if (is.null(outdata)) {
|
|
85
|
+
outdata <- dat
|
|
86
|
+
} else {
|
|
87
|
+
outdata <- full_join(outdata, dat, by = id_cols)
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
log_info("Running metap on each row ...")
|
|
92
|
+
metaps <- c()
|
|
93
|
+
ns <- c()
|
|
94
|
+
pval_columns <- setdiff(colnames(outdata), id_cols)
|
|
95
|
+
for (i in seq_len(nrow(outdata))) {
|
|
96
|
+
ps <- unlist(outdata[i, pval_columns, drop = TRUE])
|
|
97
|
+
if (na == -1) {
|
|
98
|
+
ps <- ps[!is.na(ps)]
|
|
99
|
+
} else {
|
|
100
|
+
ps[is.na(ps)] <- na
|
|
101
|
+
}
|
|
102
|
+
if (length(ps) == 0) {
|
|
103
|
+
metaps <- c(metaps, NA)
|
|
104
|
+
ns <- c(ns, NA)
|
|
105
|
+
} else if (length(ps) == 1) {
|
|
106
|
+
metaps <- c(metaps, ps)
|
|
107
|
+
ns <- c(ns, 1)
|
|
108
|
+
} else {
|
|
109
|
+
metaps <- c(metaps, do.call(method, list(ps))$p)
|
|
110
|
+
ns <- c(ns, length(ps))
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
outdata$MetaPval <- metaps
|
|
114
|
+
outdata$N <- ns
|
|
115
|
+
outdata <- outdata %>% arrange(MetaPval)
|
|
116
|
+
|
|
117
|
+
if (padj != "none") {
|
|
118
|
+
log_info("Calculating adjusted p-values ...")
|
|
119
|
+
outdata$MetaPadj <- p.adjust(outdata$MetaPval, method = padj)
|
|
120
|
+
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
log_info("Writing output ...")
|
|
124
|
+
write.table(outdata, outfile, quote = FALSE, sep = "\t", row.names = FALSE)
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
@@ -116,13 +116,13 @@ get_groups <- function(order) {
|
|
|
116
116
|
}
|
|
117
117
|
|
|
118
118
|
perpare_case <- function(casename, case) {
|
|
119
|
-
log_info("
|
|
119
|
+
log_info("- Processing case: {casename} ...")
|
|
120
120
|
# Check if required keys are provided
|
|
121
121
|
if (is.null(case$subject) || length(case$subject) == 0) {
|
|
122
|
-
stop(paste("`subject` is required for case:", casename))
|
|
122
|
+
stop(paste(" `subject` is required for case:", casename))
|
|
123
123
|
}
|
|
124
124
|
if (is.null(case$group) || length(case$group) == 0) {
|
|
125
|
-
stop(paste("`group` is required for case:", casename))
|
|
125
|
+
stop(paste(" `group` is required for case:", casename))
|
|
126
126
|
}
|
|
127
127
|
if (!is.null(case$order) && length(case$order) > 0) {
|
|
128
128
|
has_comma <- grepl(",", case$order)
|
|
@@ -134,13 +134,8 @@ perpare_case <- function(casename, case) {
|
|
|
134
134
|
))
|
|
135
135
|
} else if (!any(has_comma)) {
|
|
136
136
|
if (length(case$order) > 2) {
|
|
137
|
-
log_warn(
|
|
138
|
-
|
|
139
|
-
"- Order of groups in case:", casename,
|
|
140
|
-
" is not recommended, please use comma to separate groups. \n",
|
|
141
|
-
"Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`."
|
|
142
|
-
)
|
|
143
|
-
)
|
|
137
|
+
log_warn(" Order of groups is not recommended, please use comma to separate groups.")
|
|
138
|
+
log_warn(" Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`.")
|
|
144
139
|
case$order <- sapply(
|
|
145
140
|
combn(case$order, 2, simplify = FALSE),
|
|
146
141
|
function(x) paste(x, collapse = ",")
|
|
@@ -151,8 +146,8 @@ perpare_case <- function(casename, case) {
|
|
|
151
146
|
} else {
|
|
152
147
|
stop(
|
|
153
148
|
paste0(
|
|
154
|
-
"
|
|
155
|
-
" is not consistent, please use comma to separate groups.
|
|
149
|
+
" Order of groups in case:", casename,
|
|
150
|
+
" is not consistent, please use comma to separate groups. ",
|
|
156
151
|
"Instead of `['A', 'B', 'C']`, use `['A,B', 'A,C', 'B,C']`, ",
|
|
157
152
|
"however, this is inconsistent: `['A,B', 'C']`"
|
|
158
153
|
)
|
|
@@ -255,14 +250,16 @@ plot_scatter <- function(counts, subject, suf1, suf2) {
|
|
|
255
250
|
}
|
|
256
251
|
ggplot(plotdata) +
|
|
257
252
|
geom_point(
|
|
258
|
-
|
|
259
|
-
x =
|
|
253
|
+
aes(
|
|
254
|
+
x = !!sym(suf1),
|
|
255
|
+
y = !!sym(suf2),
|
|
256
|
+
color = Type,
|
|
257
|
+
size = Size,
|
|
258
|
+
fill = Type
|
|
260
259
|
),
|
|
261
260
|
alpha = .6,
|
|
262
261
|
shape = 21
|
|
263
262
|
) +
|
|
264
|
-
# geom_point(aes_string(x=x, y=y, color='color'), shape=1) +
|
|
265
|
-
# scale_color_manual(values=color) +
|
|
266
263
|
scale_x_continuous(
|
|
267
264
|
trans = "log2",
|
|
268
265
|
limits = c(minx, maxx),
|
|
@@ -277,7 +274,6 @@ plot_scatter <- function(counts, subject, suf1, suf2) {
|
|
|
277
274
|
) +
|
|
278
275
|
theme_prism(base_size = 16) +
|
|
279
276
|
scale_size(guide = "none") +
|
|
280
|
-
# theme(legend.position = "none") +
|
|
281
277
|
labs(
|
|
282
278
|
title = bquote(.(subject) ~ (italic(n) == .(n_formatted))),
|
|
283
279
|
subtitle = subtitle
|
|
@@ -302,61 +298,38 @@ plot_venndg <- function(counts, groups, singletons) {
|
|
|
302
298
|
venn <- Venn(venn_data)
|
|
303
299
|
vdata <- process_data(venn)
|
|
304
300
|
vregion <- venn_region(vdata)
|
|
305
|
-
|
|
306
|
-
sregion$count = singletons[sregion$name, "count"]
|
|
307
|
-
sregion <- sregion %>% mutate(name = paste0(name, " singletons"))
|
|
301
|
+
vregion$singleton_count = singletons[vregion$name, "count"]
|
|
308
302
|
vregion <- vregion %>% mutate(
|
|
309
303
|
count_perc = round(count / sum(count) * 100, 1),
|
|
310
|
-
count_str = paste0(count, " (", count_perc, "%)")
|
|
304
|
+
count_str = paste0(count, " (", count_perc, "%)"),
|
|
305
|
+
count_str = if_else(is.na(singleton_count), count_str, paste0(count_str, "\nsingletons = ", singleton_count))
|
|
311
306
|
)
|
|
312
307
|
|
|
313
|
-
# Align the catagory labels
|
|
314
|
-
cat_nudge_y <- 0
|
|
315
|
-
if (length(groups) == 3) { cat_nudge_y <- c(-400, 0, -400) }
|
|
316
|
-
# Shift Count labels
|
|
317
|
-
count_nudge_y <- -10
|
|
318
|
-
if (length(groups) == 3) { count_nudge_y <- c(20, -20, 20, rep(0, nrow(vregion) - 3)) }
|
|
319
|
-
# Shift the singletons stat labels
|
|
320
|
-
label_nudge_y <- 60
|
|
321
|
-
if (length(groups) == 3) { label_nudge_y <- c(60, -60, -60) }
|
|
322
|
-
|
|
323
308
|
venn_p <- ggplot() +
|
|
324
309
|
# 1. region count layer
|
|
325
310
|
geom_sf(aes(fill = count), data = venn_region(vdata)) +
|
|
326
311
|
# 2. set edge layer
|
|
327
312
|
# geom_sf(aes(color = factor(id)), data = venn_setedge(data), show.legend = FALSE) +
|
|
328
313
|
# 3. set label layer
|
|
329
|
-
geom_sf_text(aes(label = name), data = venn_setlabel(vdata)
|
|
314
|
+
geom_sf_text(aes(label = name), data = venn_setlabel(vdata)) +
|
|
330
315
|
# 4. region label layer
|
|
331
316
|
geom_sf_label(
|
|
332
317
|
aes(label = count_str),
|
|
333
318
|
alpha = .8,
|
|
334
319
|
label.padding = unit(.2, "lines"),
|
|
335
|
-
data = vregion
|
|
336
|
-
nudge_y = count_nudge_y
|
|
320
|
+
data = vregion
|
|
337
321
|
) +
|
|
338
322
|
# 5. singletons label layer
|
|
339
323
|
scale_fill_distiller(palette = "Oranges", direction = 1) +
|
|
340
|
-
new_scale_fill() +
|
|
341
|
-
geom_sf_label(
|
|
342
|
-
aes(label = count, fill = name),
|
|
343
|
-
alpha = .6,
|
|
344
|
-
data = sregion,
|
|
345
|
-
nudge_y = label_nudge_y,
|
|
346
|
-
label.padding = unit(1, "lines"),
|
|
347
|
-
label.r = unit(1.2, "lines"),
|
|
348
|
-
label.size = 0.05,
|
|
349
|
-
show.legend = TRUE
|
|
350
|
-
) +
|
|
351
324
|
theme_void() +
|
|
352
|
-
theme(plot.margin = margin(1,1,1,1, "cm"))
|
|
353
|
-
scale_fill_brewer(palette = "Reds", name = "Singletons")
|
|
325
|
+
theme(plot.margin = margin(1,1,1,1, "cm"))
|
|
354
326
|
|
|
355
327
|
venn_p
|
|
356
328
|
}
|
|
357
329
|
|
|
358
330
|
plot_upset <- function(counts, singletons) {
|
|
359
331
|
query_singleton <- function(row) { row["Singletons"] == "true" }
|
|
332
|
+
query_multiplet <- function(row) { rep(TRUE, length(row)) }
|
|
360
333
|
|
|
361
334
|
cnts <- column_to_rownames(counts, "CDR3.aa") %>%
|
|
362
335
|
mutate(across(everything(), ~ as.integer(as.logical(.x))))
|
|
@@ -365,7 +338,19 @@ plot_upset <- function(counts, singletons) {
|
|
|
365
338
|
cnts[sgltns, "Singletons"] <- "true"
|
|
366
339
|
sets <- setdiff(colnames(cnts), "Singletons")
|
|
367
340
|
|
|
341
|
+
# Fix: Error in fix.by(by.x, x) : 'by' must specify uniquely valid columns
|
|
342
|
+
colnames(cnts) <- make.names(colnames(cnts))
|
|
343
|
+
sets <- make.names(sets)
|
|
344
|
+
|
|
368
345
|
upset(cnts, sets = sets, query.legend = "top", sets.x.label = "# clones", queries = list(
|
|
346
|
+
list(
|
|
347
|
+
# in order to add legend
|
|
348
|
+
# actually mark all, but singleton will override
|
|
349
|
+
query = query_multiplet,
|
|
350
|
+
color = "#3b3b3b",
|
|
351
|
+
active = TRUE,
|
|
352
|
+
query.name = "Multiplets"
|
|
353
|
+
),
|
|
369
354
|
list(
|
|
370
355
|
query = query_singleton,
|
|
371
356
|
color = "orange",
|
|
@@ -407,7 +392,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
407
392
|
mutate(across(everything(), as.character)) %>%
|
|
408
393
|
paste(collapse = "-")
|
|
409
394
|
|
|
410
|
-
log_info("Handling {
|
|
395
|
+
log_info(" Handling {subject} ({i}/{nrow(subjects)}) ...")
|
|
411
396
|
|
|
412
397
|
if (!is.null(case$subset)) {
|
|
413
398
|
counts <- cldata %>% filter(!!parse_expr(case$subset))
|
|
@@ -432,7 +417,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
432
417
|
case$order <- sapply(combn(groups, 2, simplify = FALSE), function(x) paste(x, collapse = ","))
|
|
433
418
|
}
|
|
434
419
|
if (length(unique(counts[[case$group]])) < 2) {
|
|
435
|
-
log_warn("
|
|
420
|
+
log_warn(" - Subject doesn't have enough groups: {subject}")
|
|
436
421
|
return()
|
|
437
422
|
}
|
|
438
423
|
singletons = counts %>%
|
|
@@ -452,20 +437,6 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
452
437
|
select(CDR3.aa, !!!syms(groups))
|
|
453
438
|
counts[is.na(counts)] <- 0
|
|
454
439
|
|
|
455
|
-
# # Save samples to group_by so they can be aligned accordingly in the report
|
|
456
|
-
# if (!is.null(section)) {
|
|
457
|
-
# group_dir <- file.path(casedir, "section")
|
|
458
|
-
# dir.create(group_dir, showWarnings = FALSE)
|
|
459
|
-
|
|
460
|
-
# sgroups <- subject_row %>%
|
|
461
|
-
# left_join(cldata) %>%
|
|
462
|
-
# pull(section) %>%
|
|
463
|
-
# unique() %>%
|
|
464
|
-
# paste(collapse = "-")
|
|
465
|
-
# group_file <- file.path(group_dir, paste0(slugify(sgroups), ".txt"))
|
|
466
|
-
# cat(subject, file = group_file, sep = "\n", append = TRUE)
|
|
467
|
-
# }
|
|
468
|
-
|
|
469
440
|
# Save counts
|
|
470
441
|
counts_dir <- file.path(casedir, "counts")
|
|
471
442
|
countfile <- file.path(counts_dir, paste0(slugify(subject), ".txt"))
|
|
@@ -495,13 +466,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
495
466
|
for (j in seq_along(case$order)) {
|
|
496
467
|
pair <- strsplit(case$order[j], ",")[[1]]
|
|
497
468
|
if (length(setdiff(pair, groups)) > 0) {
|
|
498
|
-
log_warn(
|
|
499
|
-
paste0(
|
|
500
|
-
"- One of the comparisons doesn't exist in case (", casename,
|
|
501
|
-
") for subject (", subject, "): ",
|
|
502
|
-
case$order[j]
|
|
503
|
-
)
|
|
504
|
-
)
|
|
469
|
+
log_warn(" - Comparison {case$order[j]} doesn't exist.")
|
|
505
470
|
next
|
|
506
471
|
}
|
|
507
472
|
scatter_p <- plot_scatter(counts, subject, pair[1], pair[2])
|
|
@@ -534,7 +499,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
534
499
|
|
|
535
500
|
h <- headings(case$section, casename, "Overlapping Clones (Venn Diagram)")
|
|
536
501
|
add_report(
|
|
537
|
-
list(src = venn_png),
|
|
502
|
+
list(src = venn_png, name = subject),
|
|
538
503
|
h1 = h$h1,
|
|
539
504
|
h2 = h$h2,
|
|
540
505
|
h3 = h$h3,
|
|
@@ -549,7 +514,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
549
514
|
|
|
550
515
|
h <- headings(case$section, casename, "Overlapping Clones (UpSet Plots)")
|
|
551
516
|
add_report(
|
|
552
|
-
list(src = upset_png),
|
|
517
|
+
list(src = upset_png, name = subject),
|
|
553
518
|
h1 = h$h1,
|
|
554
519
|
h2 = h$h2,
|
|
555
520
|
h3 = h$h3,
|
biopipen/utils/misc.R
CHANGED
|
@@ -29,6 +29,25 @@ bQuote <- function(x) {
|
|
|
29
29
|
#' @param tolower Convert to lowercase
|
|
30
30
|
#' @return A slugified string
|
|
31
31
|
slugify <- function(x, non_alphanum_replace="-", collapse_replace=TRUE, tolower=FALSE) {
|
|
32
|
+
subs <- list(
|
|
33
|
+
"š"="s", "œ"="oe", "ž"="z", "ß"="ss", "þ"="y", "à"="a", "á"="a", "â"="a",
|
|
34
|
+
"ã"="a", "ä"="a", "å"="a", "æ"="ae", "ç"="c", "è"="e", "é"="e", "ê"="e",
|
|
35
|
+
"ë"="e", "ì"="i", "í"="i", "î"="i", "ï"="i", "ð"="d", "ñ"="n", "ò"="o",
|
|
36
|
+
"ó"="o", "ô"="o", "õ"="o", "ö"="o", "ø"="oe", "ù"="u", "ú"="u", "û"="u",
|
|
37
|
+
"ü"="u", "ý"="y", "ÿ"="y", "ğ"="g", "ı"="i", "ij"="ij", "ľ"="l", "ň"="n",
|
|
38
|
+
"ř"="r", "ş"="s", "ť"="t", "ų"="u", "ů"="u", "ý"="y", "ź"="z", "ż"="z",
|
|
39
|
+
"ſ"="s", "α"="a", "β"="b", "γ"="g", "δ"="d", "ε"="e", "ζ"="z", "η"="h",
|
|
40
|
+
"θ"="th", "ι"="i", "κ"="k", "λ"="l", "μ"="m", "ν"="n", "ξ"="x", "ο"="o",
|
|
41
|
+
"π"="p", "ρ"="r", "σ"="s", "τ"="t", "υ"="u", "φ"="ph", "χ"="ch", "ψ"="ps",
|
|
42
|
+
"ω"="o", "ά"="a", "έ"="e", "ή"="h", "ί"="i", "ό"="o", "ύ"="u", "ώ"="o",
|
|
43
|
+
"ϐ"="b", "ϑ"="th", "ϒ"="y", "ϕ"="ph", "ϖ"="p", "Ϛ"="st", "ϛ"="st", "Ϝ"="f",
|
|
44
|
+
"ϝ"="f", "Ϟ"="k", "ϟ"="k", "Ϡ"="k", "ϡ"="k", "ϰ"="k", "ϱ"="r", "ϲ"="s",
|
|
45
|
+
"ϳ"="j", "ϴ"="th", "ϵ"="e", "϶"="p"
|
|
46
|
+
)
|
|
47
|
+
# replace latin and greek characters to the closest english character
|
|
48
|
+
for (k in names(subs)) {
|
|
49
|
+
x <- gsub(k, subs[[k]], x)
|
|
50
|
+
}
|
|
32
51
|
x <- gsub("[^[:alnum:]_]", non_alphanum_replace, x)
|
|
33
52
|
if(collapse_replace) x <- gsub(paste0(non_alphanum_replace, "+"), non_alphanum_replace, x)
|
|
34
53
|
if(tolower) x <- tolower(x)
|
biopipen/utils/misc.py
CHANGED
|
@@ -2,9 +2,24 @@ from __future__ import annotations
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
+
import logging
|
|
5
6
|
from typing import List
|
|
6
7
|
from biopipen.core.filters import dict_to_cli_args # noqa: F401
|
|
7
8
|
|
|
9
|
+
logger = logging.getLogger("biopipen_job")
|
|
10
|
+
logger.setLevel(logging.INFO)
|
|
11
|
+
_handler = logging.StreamHandler(sys.stdout)
|
|
12
|
+
# Use same log format as in R
|
|
13
|
+
# {sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}
|
|
14
|
+
# so the logs can be populated by pipen-poplog
|
|
15
|
+
_handler.setFormatter(
|
|
16
|
+
logging.Formatter(
|
|
17
|
+
"%(levelname)-7s [%(asctime)s] %(message)s",
|
|
18
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
19
|
+
)
|
|
20
|
+
)
|
|
21
|
+
logger.addHandler(_handler)
|
|
22
|
+
|
|
8
23
|
|
|
9
24
|
def exec_code(code, global_vars=None, local_vars=None, return_var=None):
|
|
10
25
|
global_vars = global_vars or {}
|
|
@@ -1,23 +1,22 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.26.1
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
7
7
|
Author-email: pwwang@pwwang.com
|
|
8
|
-
Requires-Python: >=3.
|
|
8
|
+
Requires-Python: >=3.9,<4.0
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
12
11
|
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
15
|
Provides-Extra: runinfo
|
|
17
|
-
Requires-Dist: datar[pandas] (>=0.15.
|
|
18
|
-
Requires-Dist: pipen-board[report] (>=0.
|
|
19
|
-
Requires-Dist: pipen-cli-run (>=0.
|
|
20
|
-
Requires-Dist: pipen-filters (>=0.
|
|
21
|
-
Requires-Dist: pipen-poplog (>=0.0.2
|
|
22
|
-
Requires-Dist: pipen-runinfo (>=0.
|
|
23
|
-
Requires-Dist: pipen-verbose (>=0.
|
|
16
|
+
Requires-Dist: datar[pandas] (>=0.15.5,<0.16.0)
|
|
17
|
+
Requires-Dist: pipen-board[report] (>=0.15,<0.16)
|
|
18
|
+
Requires-Dist: pipen-cli-run (>=0.13,<0.14)
|
|
19
|
+
Requires-Dist: pipen-filters (>=0.12,<0.13)
|
|
20
|
+
Requires-Dist: pipen-poplog (>=0.1,<0.2)
|
|
21
|
+
Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
|
|
22
|
+
Requires-Dist: pipen-verbose (>=0.11,<0.12)
|