biopipen 0.25.4__py3-none-any.whl → 0.26.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/core/config.toml +2 -0
- biopipen/ns/rnaseq.py +142 -5
- biopipen/ns/scrna.py +17 -3
- biopipen/ns/snp.py +70 -0
- biopipen/ns/stats.py +320 -0
- biopipen/scripts/rnaseq/Simulation-ESCO.R +177 -0
- biopipen/scripts/rnaseq/Simulation-RUVcorr.R +42 -0
- biopipen/scripts/rnaseq/Simulation.R +23 -0
- biopipen/scripts/rnaseq/UnitConversion.R +323 -54
- biopipen/scripts/scrna/CellsDistribution.R +225 -147
- biopipen/scripts/scrna/MarkersFinder.R +53 -47
- biopipen/scripts/scrna/RadarPlots.R +6 -3
- biopipen/scripts/scrna/SeuratClusterStats-stats.R +37 -0
- biopipen/scripts/scrna/TopExpressingGenes.R +58 -33
- biopipen/scripts/snp/PlinkSimulation.py +88 -0
- biopipen/scripts/stats/ChowTest.R +119 -0
- biopipen/scripts/stats/DiffCoexpr.R +150 -0
- biopipen/scripts/stats/LiquidAssoc.R +136 -0
- biopipen/scripts/stats/MetaPvalue.R +128 -0
- biopipen/scripts/tcr/CloneResidency.R +37 -72
- biopipen/utils/misc.R +19 -0
- biopipen/utils/misc.py +15 -0
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/METADATA +9 -10
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/RECORD +27 -17
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/WHEEL +1 -1
- {biopipen-0.25.4.dist-info → biopipen-0.26.1.dist-info}/entry_points.txt +2 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# Loaded variables: srtfile, outdir, srtobj
|
|
2
|
+
library(circlize)
|
|
2
3
|
|
|
3
4
|
stats_defaults = {{envs.stats_defaults | r: todot="-"}}
|
|
4
5
|
stats = {{envs.stats | r: todot="-", skip=1}}
|
|
@@ -12,6 +13,7 @@ do_one_stats = function(name) {
|
|
|
12
13
|
case = list_update(stats_defaults, stats[[name]])
|
|
13
14
|
case$devpars = list_update(stats_defaults$devpars, case$devpars)
|
|
14
15
|
case$pie_devpars = list_update(stats_defaults$pie_devpars, case$pie_devpars)
|
|
16
|
+
case$circos_devpars = list_update(stats_defaults$circos_devpars, case$circos_devpars)
|
|
15
17
|
if (isTRUE(case$pie) && !is.null(case$group.by)) {
|
|
16
18
|
stop(paste0(name, ": pie charts are not supported for group-by"))
|
|
17
19
|
}
|
|
@@ -27,6 +29,7 @@ do_one_stats = function(name) {
|
|
|
27
29
|
|
|
28
30
|
figfile = file.path(odir, paste0(slugify(name), ".bar.png"))
|
|
29
31
|
piefile = file.path(odir, paste0(slugify(name), ".pie.png"))
|
|
32
|
+
circosfile = file.path(odir, paste0(slugify(name), ".circos.png"))
|
|
30
33
|
samtablefile = file.path(odir, paste0(slugify(name), ".bysample.txt"))
|
|
31
34
|
tablefile = file.path(odir, paste0(slugify(name), ".txt"))
|
|
32
35
|
|
|
@@ -172,6 +175,40 @@ do_one_stats = function(name) {
|
|
|
172
175
|
ui = "tabs"
|
|
173
176
|
)
|
|
174
177
|
}
|
|
178
|
+
|
|
179
|
+
if (isTRUE(case$circos)) {
|
|
180
|
+
if (isTRUE(case$transpose)) {
|
|
181
|
+
circos_df <- plot_df %>%
|
|
182
|
+
select(from=!!sym(case$ident), to=!!sym(case$group.by), value=.n)
|
|
183
|
+
} else {
|
|
184
|
+
circos_df <- plot_df %>%
|
|
185
|
+
select(from=!!sym(case$group.by), to=!!sym(case$ident), value=.n)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
png(
|
|
189
|
+
circosfile,
|
|
190
|
+
width=case$circos_devpars$width,
|
|
191
|
+
height=case$circos_devpars$height,
|
|
192
|
+
res=case$circos_devpars$res
|
|
193
|
+
)
|
|
194
|
+
circos.clear()
|
|
195
|
+
chordDiagram(
|
|
196
|
+
circos_df,
|
|
197
|
+
direction = 1,
|
|
198
|
+
direction.type = c("diffHeight", "arrows"),
|
|
199
|
+
link.arr.type = "big.arrow"
|
|
200
|
+
)
|
|
201
|
+
dev.off()
|
|
202
|
+
|
|
203
|
+
add_report(
|
|
204
|
+
list(
|
|
205
|
+
name = "Circos plot",
|
|
206
|
+
contents = list(list(kind = "image", src = circosfile))
|
|
207
|
+
),
|
|
208
|
+
h1 = name,
|
|
209
|
+
ui = "tabs"
|
|
210
|
+
)
|
|
211
|
+
}
|
|
175
212
|
}
|
|
176
213
|
|
|
177
214
|
sapply(names(stats), do_one_stats)
|
|
@@ -20,6 +20,7 @@ prefix_each <- {{ envs.prefix_each | r }}
|
|
|
20
20
|
section <- {{ envs.section | r }}
|
|
21
21
|
dbs <- {{ envs.dbs | r }}
|
|
22
22
|
n <- {{ envs.n | r }}
|
|
23
|
+
sset <- {{ envs.subset | r }}
|
|
23
24
|
cases <- {{ envs.cases | r: todot = "-" }} # nolint
|
|
24
25
|
|
|
25
26
|
set.seed(8525)
|
|
@@ -43,7 +44,8 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
43
44
|
prefix_each = prefix_each,
|
|
44
45
|
section = section,
|
|
45
46
|
dbs = dbs,
|
|
46
|
-
n = n
|
|
47
|
+
n = n,
|
|
48
|
+
subset = sset
|
|
47
49
|
)
|
|
48
50
|
)
|
|
49
51
|
} else {
|
|
@@ -56,7 +58,8 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
56
58
|
prefix_each = prefix_each,
|
|
57
59
|
section = section,
|
|
58
60
|
dbs = dbs,
|
|
59
|
-
n = n
|
|
61
|
+
n = n,
|
|
62
|
+
subset = sset
|
|
60
63
|
)
|
|
61
64
|
})
|
|
62
65
|
}
|
|
@@ -144,7 +147,7 @@ casename_info <- function(casename, create = FALSE) {
|
|
|
144
147
|
}
|
|
145
148
|
|
|
146
149
|
do_enrich <- function(expr, odir) {
|
|
147
|
-
|
|
150
|
+
log_debug(" Saving expressions ...")
|
|
148
151
|
expr <- expr %>% as.data.frame()
|
|
149
152
|
colnames(expr) <- c("Expression")
|
|
150
153
|
expr <- expr %>% rownames_to_column("Gene") %>% select(Gene, Expression)
|
|
@@ -165,7 +168,7 @@ do_enrich <- function(expr, odir) {
|
|
|
165
168
|
quote = FALSE
|
|
166
169
|
)
|
|
167
170
|
|
|
168
|
-
|
|
171
|
+
log_debug(" Running enrichment ...")
|
|
169
172
|
enriched <- enrichr(head(expr$Gene, n), dbs) # nolint
|
|
170
173
|
for (db in dbs) {
|
|
171
174
|
write.table(
|
|
@@ -178,7 +181,7 @@ do_enrich <- function(expr, odir) {
|
|
|
178
181
|
)
|
|
179
182
|
|
|
180
183
|
if (nrow(enriched[[db]]) == 0) {
|
|
181
|
-
|
|
184
|
+
log_warn(paste0(" No enriched terms for ", db))
|
|
182
185
|
next
|
|
183
186
|
}
|
|
184
187
|
|
|
@@ -199,15 +202,24 @@ do_case <- function(casename) {
|
|
|
199
202
|
case <- cases[[casename]]
|
|
200
203
|
info <- casename_info(casename, create = TRUE)
|
|
201
204
|
|
|
202
|
-
|
|
203
|
-
|
|
205
|
+
log_debug(" Calculating average expression ...")
|
|
206
|
+
if (!is.null(case$subset)) {
|
|
207
|
+
tryCatch({
|
|
208
|
+
sobj <- subset(srtobj, !!parse_expr(case$subset))
|
|
209
|
+
}, error = function(e) {
|
|
210
|
+
log_warn(" No cells found for the subset, skipping ...")
|
|
211
|
+
})
|
|
212
|
+
} else {
|
|
213
|
+
sobj <- srtobj
|
|
214
|
+
}
|
|
215
|
+
assay <- DefaultAssay(sobj)
|
|
204
216
|
avgexpr <- AverageExpression(
|
|
205
|
-
|
|
217
|
+
sobj,
|
|
206
218
|
group.by = case$group.by,
|
|
207
219
|
assays = assay
|
|
208
220
|
)[[assay]]
|
|
209
221
|
# https://github.com/satijalab/seurat/issues/7893
|
|
210
|
-
colnames(avgexpr) <- as.character(unique(
|
|
222
|
+
colnames(avgexpr) <- as.character(unique(sobj@meta.data[[case$group.by]]))
|
|
211
223
|
avgexpr <- avgexpr[, case$ident, drop = FALSE]
|
|
212
224
|
avgexpr <- avgexpr[order(-avgexpr), , drop = FALSE]
|
|
213
225
|
|
|
@@ -217,7 +229,7 @@ do_case <- function(casename) {
|
|
|
217
229
|
}
|
|
218
230
|
|
|
219
231
|
add_case_report <- function(info) {
|
|
220
|
-
|
|
232
|
+
log_debug(" Adding case report ...")
|
|
221
233
|
h1 = ifelse(
|
|
222
234
|
info$section == "DEFAULT",
|
|
223
235
|
info$case,
|
|
@@ -237,30 +249,43 @@ add_case_report <- function(info) {
|
|
|
237
249
|
ifelse(single_section, "#", info$case)
|
|
238
250
|
)
|
|
239
251
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
kind = "
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
252
|
+
if (!is.null(info$error)) {
|
|
253
|
+
add_report(
|
|
254
|
+
list(
|
|
255
|
+
kind = "descr",
|
|
256
|
+
content = paste0("Top ", n, " expressing genes")
|
|
257
|
+
),
|
|
258
|
+
list(kind = "error", content = info$error),
|
|
259
|
+
h1 = h1,
|
|
260
|
+
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
261
|
+
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
262
|
+
)
|
|
263
|
+
} else {
|
|
264
|
+
add_report(
|
|
265
|
+
list(
|
|
266
|
+
kind = "descr",
|
|
267
|
+
content = paste0("Top ", n, " expressing genes")
|
|
268
|
+
),
|
|
269
|
+
list(
|
|
270
|
+
kind = "table",
|
|
271
|
+
src = file.path(info$casedir, "exprn.txt")
|
|
272
|
+
),
|
|
273
|
+
h1 = h1,
|
|
274
|
+
h2 = ifelse(h2 == "#", "Top Expressing Genes", h2),
|
|
275
|
+
h3 = ifelse(h2 == "#", "#", "Top Expressing Genes")
|
|
276
|
+
)
|
|
253
277
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
278
|
+
add_report(
|
|
279
|
+
list(
|
|
280
|
+
kind = "descr",
|
|
281
|
+
content = paste0("Enrichment analysis for the top ", n, " expressing genes")
|
|
282
|
+
),
|
|
283
|
+
list(kind = "enrichr", dir = info$casedir),
|
|
284
|
+
h1 = h1,
|
|
285
|
+
h2 = ifelse(h2 == "#", "Enrichment Analysis", h2),
|
|
286
|
+
h3 = ifelse(h2 == "#", "#", "Enrichment Analysis")
|
|
287
|
+
)
|
|
288
|
+
}
|
|
264
289
|
}
|
|
265
290
|
|
|
266
291
|
sapply(sort(names(cases)), do_case)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from biopipen.utils.misc import logger, run_command, dict_to_cli_args
|
|
3
|
+
|
|
4
|
+
nsnps = {{in.nsnps | repr}} # pyright: ignore
|
|
5
|
+
ncases = {{in.ncases | repr}} # pyright: ignore
|
|
6
|
+
nctrls = {{in.nctrls | repr}} # pyright: ignore
|
|
7
|
+
outdir = {{out.outdir | repr}} # pyright: ignore
|
|
8
|
+
gtmatfile = {{out.gtmat | repr}} # pyright: ignore
|
|
9
|
+
plink = {{envs.plink | repr}} # pyright: ignore
|
|
10
|
+
seed = {{envs.seed | repr}} # pyright: ignore
|
|
11
|
+
label = {{envs.label | repr}} # pyright: ignore
|
|
12
|
+
prevalence = {{envs.prevalence | repr}} # pyright: ignore
|
|
13
|
+
minfreq = {{envs.minfreq | repr}} # pyright: ignore
|
|
14
|
+
maxfreq = {{envs.maxfreq | repr}} # pyright: ignore
|
|
15
|
+
hetodds = {{envs.hetodds | repr}} # pyright: ignore
|
|
16
|
+
homodds = {{envs.homodds | repr}} # pyright: ignore
|
|
17
|
+
missing = {{envs.missing | repr}} # pyright: ignore
|
|
18
|
+
args = {{envs.args | repr}} # pyright: ignore
|
|
19
|
+
transpose_gtmat = {{envs.transpose_gtmat | repr}} # pyright: ignore
|
|
20
|
+
sample_prefix = {{envs.sample_prefix | repr}} # pyright: ignore
|
|
21
|
+
|
|
22
|
+
logger.info("Generating parameters file")
|
|
23
|
+
params_file = Path(outdir) / "params.txt"
|
|
24
|
+
params_file.write_text(
|
|
25
|
+
f"{nsnps}\t{label}\t{minfreq}\t{maxfreq}\t{hetodds}\t{homodds}\n"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
if seed is not None:
|
|
29
|
+
args["seed"] = seed
|
|
30
|
+
|
|
31
|
+
args["simulate"] = params_file
|
|
32
|
+
args["out"] = Path(outdir) / "sim_snps"
|
|
33
|
+
args["simulate-ncases"] = ncases
|
|
34
|
+
args["simulate-ncontrols"] = nctrls
|
|
35
|
+
args["simulate-prevalence"] = prevalence
|
|
36
|
+
args["simulate-missing"] = missing
|
|
37
|
+
|
|
38
|
+
cmd = [plink] + dict_to_cli_args(args)
|
|
39
|
+
|
|
40
|
+
logger.info("Running PLINK simulation ...")
|
|
41
|
+
run_command(cmd, fg=True)
|
|
42
|
+
|
|
43
|
+
# Transpose the genotype matrix
|
|
44
|
+
# CHR SNP (C)M POS COUNTED ALT per0_per0 per1_per1 per2_per2
|
|
45
|
+
# 1 SNP_0 0 1 D d 1 0 1
|
|
46
|
+
# 1 SNP_1 0 2 d D 0 1 0
|
|
47
|
+
# 1 SNP_2 0 3 d D 0 0 0
|
|
48
|
+
# 1 SNP_3 0 4 d D 0 0 0
|
|
49
|
+
# 1 SNP_4 0 5 D d 1 2 1
|
|
50
|
+
cmd = [
|
|
51
|
+
plink,
|
|
52
|
+
"--recode",
|
|
53
|
+
"A" if transpose_gtmat else "A-transpose",
|
|
54
|
+
"tab",
|
|
55
|
+
"--bfile",
|
|
56
|
+
args["out"],
|
|
57
|
+
"--out",
|
|
58
|
+
gtmatfile + ".plink.recoded",
|
|
59
|
+
]
|
|
60
|
+
logger.info("Recoding into genotype matrix ...")
|
|
61
|
+
run_command(cmd, fg=True)
|
|
62
|
+
|
|
63
|
+
logger.info("Saving genotype matrix ...")
|
|
64
|
+
## transpose_gtmat = False
|
|
65
|
+
# SNP_COUNTED per0_per0 per1_per1 per2_per2
|
|
66
|
+
# SNP_0_D 1 0 1
|
|
67
|
+
# SNP_1_d 0 1 0
|
|
68
|
+
# SNP_2_d 0 0 0
|
|
69
|
+
# SNP_3_d 0 0 0
|
|
70
|
+
# SNP_4_D 1 2 1
|
|
71
|
+
## transpose_gtmat = True
|
|
72
|
+
# FID_IID SNP_0_D SNP_1_D SNP_2_D
|
|
73
|
+
# per0_per0 0 1 1
|
|
74
|
+
# per1_per1 0 2 0
|
|
75
|
+
# per2_per2 0 0 0
|
|
76
|
+
# per3_per3 1 1 0
|
|
77
|
+
# per4_per4 0 0 0
|
|
78
|
+
if transpose_gtmat:
|
|
79
|
+
cmd = f"cut -f1,2,7- {gtmatfile}.plink.recoded.raw | sed 's/\\t/_/'"
|
|
80
|
+
else:
|
|
81
|
+
cmd = f"cut -f2,5,7- {gtmatfile}.plink.recoded.traw | sed 's/\\t/_/'"
|
|
82
|
+
|
|
83
|
+
if sample_prefix:
|
|
84
|
+
cmd = f"{cmd} | sed 's/per[0-9]\\+_per/{sample_prefix}/g'"
|
|
85
|
+
|
|
86
|
+
cmd = f"{cmd} > {gtmatfile}"
|
|
87
|
+
|
|
88
|
+
run_command(cmd, fg=True)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
|
|
3
|
+
library(rlang)
|
|
4
|
+
library(dplyr)
|
|
5
|
+
|
|
6
|
+
infile <- {{in.infile | r}}
|
|
7
|
+
groupfile <- {{in.groupfile | r}}
|
|
8
|
+
fmlfile <- {{in.fmlfile | r}}
|
|
9
|
+
outfile <- {{out.outfile | r}}
|
|
10
|
+
padj <- {{envs.padj | r}}
|
|
11
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
12
|
+
transpose_group <- {{envs.transpose_group | r}}
|
|
13
|
+
|
|
14
|
+
log_info("Reading input files ...")
|
|
15
|
+
indata <- read.table(infile, header = TRUE, sep = "\t", row.names = 1)
|
|
16
|
+
if (transpose_input) {
|
|
17
|
+
indata <- t(indata)
|
|
18
|
+
}
|
|
19
|
+
groupdata <- read.table(groupfile, header = TRUE, sep = "\t", row.names = 1)
|
|
20
|
+
if (transpose_group) {
|
|
21
|
+
groupdata <- t(groupdata)
|
|
22
|
+
}
|
|
23
|
+
fmldata <- read.table(fmlfile, header = TRUE, sep = "\t", row.names = NULL)
|
|
24
|
+
colnames(fmldata)[1:2] <- c("Group", "Formula")
|
|
25
|
+
|
|
26
|
+
chow.test <- function(fml, grouping) {
|
|
27
|
+
formula <- as.formula(fml)
|
|
28
|
+
pooled_lm <- tryCatch(lm(formula, data = indata), error = function(e) NULL)
|
|
29
|
+
if (is.null(pooled_lm)) {
|
|
30
|
+
return(list(
|
|
31
|
+
pooled.lm = NA,
|
|
32
|
+
group.lms = NULL,
|
|
33
|
+
Fstat = NA,
|
|
34
|
+
group = grouping,
|
|
35
|
+
pooled.ssr = NA,
|
|
36
|
+
group.ssr = NA,
|
|
37
|
+
Pval = NA
|
|
38
|
+
))
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
splitdata <- split(indata, groupdata[rownames(indata), grouping])
|
|
42
|
+
group_lms <- lapply(names(splitdata), function(g) {
|
|
43
|
+
tryCatch(lm(formula, data = splitdata[[g]]), error = function(e) NULL)
|
|
44
|
+
})
|
|
45
|
+
names(group_lms) <- names(splitdata)
|
|
46
|
+
|
|
47
|
+
fmvars <- all.vars(formula)
|
|
48
|
+
pooled.ssr <- sum(pooled_lm$residuals ^ 2)
|
|
49
|
+
subssr <- ifelse(any(is.null(group_lms)), NA, sum(sapply(group_lms, function(x) sum(x$residuals ^ 2))))
|
|
50
|
+
ngroups <- length(splitdata)
|
|
51
|
+
K <- ifelse(fmvars[2] == ".", ncol(indata), length(fmvars))
|
|
52
|
+
J <- (ngroups - 1) * K
|
|
53
|
+
DF <- nrow(indata) - ngroups * K
|
|
54
|
+
FS <- (pooled.ssr - subssr) * DF / subssr / J
|
|
55
|
+
list(
|
|
56
|
+
pooled.lm = pooled_lm,
|
|
57
|
+
group.lms = group_lms,
|
|
58
|
+
Fstat = FS,
|
|
59
|
+
group = grouping,
|
|
60
|
+
pooled.ssr = pooled.ssr,
|
|
61
|
+
group.ssr = subssr,
|
|
62
|
+
Pval = pf(FS, J, DF, lower.tail = FALSE)
|
|
63
|
+
)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
formatlm <- function(m) {
|
|
67
|
+
if (class(m) == 'lm') {
|
|
68
|
+
coeff <- as.list(m$coefficients)
|
|
69
|
+
vars <- all.vars(m$terms)
|
|
70
|
+
terms <- unlist(sapply(na.omit(c(vars[2:length(vars)], '(Intercept)', 'N')), function(x) {
|
|
71
|
+
ce <- coeff[[x]] %||% coeff[[bQuote(x)]]
|
|
72
|
+
if (x == 'N') {
|
|
73
|
+
paste0('N=', nrow(m$model))
|
|
74
|
+
} else if (is.null(ce)) {
|
|
75
|
+
NULL
|
|
76
|
+
} else {
|
|
77
|
+
l <- ifelse(x == '(Intercept)', '_', x)
|
|
78
|
+
paste0(l, '=', round(ce, 3))
|
|
79
|
+
}
|
|
80
|
+
}))
|
|
81
|
+
paste(terms[!is.null(terms)], collapse = ', ')
|
|
82
|
+
} else {
|
|
83
|
+
paste(sapply(names(m), function(x) {
|
|
84
|
+
paste0(x, ': ', formatlm(m[[x]]))
|
|
85
|
+
}), collapse = ' // ')
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
log_info("Running Chow tests ...")
|
|
90
|
+
ncases <- nrow(fmldata)
|
|
91
|
+
results <- do_call(rbind, lapply(
|
|
92
|
+
seq_len(ncases),
|
|
93
|
+
function(i) {
|
|
94
|
+
fmlrow <- fmldata[i, , drop=TRUE]
|
|
95
|
+
if (i %% 100 == 0) {
|
|
96
|
+
log_info("- {i} / {ncases} ...")
|
|
97
|
+
}
|
|
98
|
+
log_debug(" Running Chow test for formula: {fmlrow$Formula} (grouping = {fmlrow$Group})")
|
|
99
|
+
|
|
100
|
+
res <- chow.test(fmlrow$Formula, fmlrow$Group)
|
|
101
|
+
fmlrow$Pooled <- formatlm(res$pooled.lm)
|
|
102
|
+
fmlrow$Groups <- formatlm(res$group.lms)
|
|
103
|
+
fmlrow$SSR <- res$group.ssr
|
|
104
|
+
fmlrow$SumSSR <- res$pooled.ssr
|
|
105
|
+
fmlrow$Fstat <- res$Fstat
|
|
106
|
+
fmlrow$Pval <- res$Pval
|
|
107
|
+
fmlrow
|
|
108
|
+
}
|
|
109
|
+
)) %>% as.data.frame()
|
|
110
|
+
|
|
111
|
+
if (padj != "none") {
|
|
112
|
+
log_info("Adjusting p-values ...")
|
|
113
|
+
results$Padj <- p.adjust(results$Pval, method = padj)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
log_info("Writing output ...")
|
|
117
|
+
# unimplemented type 'list' in 'EncodeElement'
|
|
118
|
+
results <- apply(results, 2, as.character)
|
|
119
|
+
write.table(results, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
source("{{biopipen_dir}}/utils/misc.R")
|
|
2
|
+
library(dcanr)
|
|
3
|
+
library(scuttle)
|
|
4
|
+
library(doRNG)
|
|
5
|
+
library(doParallel)
|
|
6
|
+
library(snpStats)
|
|
7
|
+
library(rlang)
|
|
8
|
+
library(dplyr)
|
|
9
|
+
|
|
10
|
+
infile <- {{in.infile | r}}
|
|
11
|
+
groupfile <- {{in.groupfile | r}}
|
|
12
|
+
outfile <- {{out.outfile | r}}
|
|
13
|
+
method <- {{envs.method | r}}
|
|
14
|
+
beta <- {{envs.beta | r}}
|
|
15
|
+
padj <- {{envs.padj | r}}
|
|
16
|
+
perm_batch <- {{envs.perm_batch | r}}
|
|
17
|
+
seed <- {{envs.seed | r}}
|
|
18
|
+
ncores <- {{envs.ncores | r}}
|
|
19
|
+
transpose_input <- {{envs.transpose_input | r}}
|
|
20
|
+
transpose_group <- {{envs.transpose_group | r}}
|
|
21
|
+
|
|
22
|
+
log_info("Setting seed and parallel backend ...")
|
|
23
|
+
set.seed(seed)
|
|
24
|
+
registerDoParallel(cores = ncores)
|
|
25
|
+
registerDoRNG(seed)
|
|
26
|
+
|
|
27
|
+
log_info("Reading input files ...")
|
|
28
|
+
indata <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
|
|
29
|
+
if (transpose_input) {
|
|
30
|
+
indata <- t(indata)
|
|
31
|
+
}
|
|
32
|
+
gdata <- read.table(groupfile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
|
|
33
|
+
if (transpose_group) {
|
|
34
|
+
gdata <- t(gdata)
|
|
35
|
+
}
|
|
36
|
+
ngroups <- ncol(gdata)
|
|
37
|
+
|
|
38
|
+
sign2 <- function(x) sign(x) * x^2
|
|
39
|
+
mat2vec <- dcanr:::mat2vec
|
|
40
|
+
|
|
41
|
+
diffcoex_score <- function(group) {
|
|
42
|
+
|
|
43
|
+
gvals <- unique(gdata[, group, drop = TRUE])
|
|
44
|
+
if (length(gvals) < 2) {
|
|
45
|
+
log_warn(" Less than 2 groups in the input. Skipping ...")
|
|
46
|
+
return(NULL)
|
|
47
|
+
}
|
|
48
|
+
rs <- lapply(gvals, function(gval) {
|
|
49
|
+
samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
|
|
50
|
+
expr <- indata[samples, , drop = FALSE]
|
|
51
|
+
if (length(samples) < 3) {
|
|
52
|
+
log_warn(" Less than 3 samples in one of the groups. Skipping ...")
|
|
53
|
+
return(NULL)
|
|
54
|
+
}
|
|
55
|
+
cor.pairs(as.matrix(expr), cor.method = method)
|
|
56
|
+
})
|
|
57
|
+
rs[sapply(rs, is.null)] <- NULL
|
|
58
|
+
if (length(rs) < 2) {
|
|
59
|
+
log_warn(" Less than 2 groups with at least 3 samples. Skipping ...")
|
|
60
|
+
return(NULL)
|
|
61
|
+
}
|
|
62
|
+
N <- length(rs)
|
|
63
|
+
C0 <- lapply(rs, sign2)
|
|
64
|
+
C0 <- Reduce(`+`, C0) / N
|
|
65
|
+
D <- lapply(rs, function(r) abs(sign2(r) - C0))
|
|
66
|
+
D <- Reduce(`+`, D) / 2 / (N - 1)
|
|
67
|
+
D <- sqrt(D)
|
|
68
|
+
D <- D^beta
|
|
69
|
+
T_ovlap <- D %*% D + ncol(D) * D #calc topological ovlap
|
|
70
|
+
|
|
71
|
+
mins = matrix(rep(rowSums(D), ncol(D)), nrow = ncol(D))
|
|
72
|
+
mins = pmin(mins, matrix(rep(colSums(D), each = ncol(D)), nrow = ncol(D)))
|
|
73
|
+
T_ovlap = 1 - (T_ovlap/(mins + 1 - D))
|
|
74
|
+
|
|
75
|
+
diag(T_ovlap) = 1
|
|
76
|
+
|
|
77
|
+
#add run parameters as attributes
|
|
78
|
+
attributes(T_ovlap) = c(
|
|
79
|
+
attributes(T_ovlap),
|
|
80
|
+
'method' = method,
|
|
81
|
+
'beta' = beta,
|
|
82
|
+
'call' = match.call()
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return(1 - T_ovlap)
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
perm_test <- function(dcscores, group, B = perm_batch) {
|
|
90
|
+
obs = mat2vec(dcscores)
|
|
91
|
+
|
|
92
|
+
#package requirements
|
|
93
|
+
pckgs = c('dcanr')
|
|
94
|
+
|
|
95
|
+
#perform permutation
|
|
96
|
+
pvals = foreach(
|
|
97
|
+
b = seq_len(B),
|
|
98
|
+
.combine = function(...) {mapply(sum, ...)},
|
|
99
|
+
.multicombine = TRUE,
|
|
100
|
+
.inorder = FALSE,
|
|
101
|
+
.packages = pckgs
|
|
102
|
+
) %dorng% {
|
|
103
|
+
#shuffle condition and recalculate scores
|
|
104
|
+
env = new.env()
|
|
105
|
+
assign('group', group, envir = env)
|
|
106
|
+
permsc = eval(attr(dcscores, 'call'), envir = env)
|
|
107
|
+
permsc = mat2vec(permsc)
|
|
108
|
+
|
|
109
|
+
#count elements greater than obs
|
|
110
|
+
permsc = abs(permsc)
|
|
111
|
+
permsc = permsc[!(is.na(permsc) || is.infinite(permsc))]
|
|
112
|
+
permcounts = vapply(abs(obs), function(x) sum(permsc > x), 0)
|
|
113
|
+
return(c(permcounts, length(permsc)))
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
#p-values
|
|
117
|
+
N <- pvals[length(pvals)]
|
|
118
|
+
pvals <- pvals[-(length(pvals))] / N
|
|
119
|
+
# attributes(pvals) = attributes(obs)
|
|
120
|
+
# pvals = dcanr:::vec2mat(pvals)
|
|
121
|
+
# attr(pvals, 'dc.test') = 'permutation'
|
|
122
|
+
# return(pvals)
|
|
123
|
+
# Format into Group,Feature1,Feature2,Pval
|
|
124
|
+
feature_pairs <- as.data.frame(t(combn(attr(obs, 'feature.names'), 2)))
|
|
125
|
+
colnames(feature_pairs) <- c('Feature1', 'Feature2')
|
|
126
|
+
feature_pairs$Group <- group
|
|
127
|
+
feature_pairs$Pval <- pvals
|
|
128
|
+
feature_pairs[, c('Group', 'Feature1', 'Feature2', 'Pval'), drop = FALSE]
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
do_one_group <- function(i) {
|
|
132
|
+
group <- colnames(gdata)[i]
|
|
133
|
+
log_info("- Processing group {i}/{ngroups}: {group} ...")
|
|
134
|
+
log_info(" Calculating differential co-expression scores ...")
|
|
135
|
+
dcscores <- diffcoex_score(group)
|
|
136
|
+
|
|
137
|
+
if (!is.null(dcscores)) {
|
|
138
|
+
log_info(" Calculating p-values ...")
|
|
139
|
+
perm_test(dcscores, group)
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
trios <- do_call(rbind, lapply(seq_len(ngroups), do_one_group))
|
|
144
|
+
if (padj != "none") {
|
|
145
|
+
log_info("Correcting p-values ...")
|
|
146
|
+
trios$Padj <- p.adjust(trios$Pval, method = padj)
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
log_info("Writing output ...")
|
|
150
|
+
write.table(trios, file = outfile, sep = "\t", quote = FALSE, row.names = FALSE)
|