biopipen 0.27.1__py3-none-any.whl → 0.27.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/scrna.py +8 -0
- biopipen/ns/snp.py +18 -15
- biopipen/ns/tcr.py +73 -0
- biopipen/scripts/scrna/MarkersFinder.R +30 -5
- biopipen/scripts/scrna/MetaMarkers.R +17 -3
- biopipen/scripts/scrna/RadarPlots.R +2 -2
- biopipen/scripts/scrna/ScFGSEA.R +21 -4
- biopipen/scripts/scrna/SeuratPreparing.R +113 -1
- biopipen/scripts/snp/PlinkSimulation.py +119 -83
- biopipen/scripts/stats/DiffCoexpr.R +3 -3
- biopipen/scripts/tcr/CloneResidency.R +16 -4
- biopipen/scripts/tcr/TCRDock.py +106 -0
- biopipen/utils/misc.py +5 -1
- {biopipen-0.27.1.dist-info → biopipen-0.27.3.dist-info}/METADATA +3 -2
- {biopipen-0.27.1.dist-info → biopipen-0.27.3.dist-info}/RECORD +18 -17
- {biopipen-0.27.1.dist-info → biopipen-0.27.3.dist-info}/WHEEL +0 -0
- {biopipen-0.27.1.dist-info → biopipen-0.27.3.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.27.
|
|
1
|
+
__version__ = "0.27.3"
|
biopipen/ns/scrna.py
CHANGED
|
@@ -201,6 +201,13 @@ class SeuratPreparing(Proc):
|
|
|
201
201
|
- scvi: Same as `scVIIntegration`.
|
|
202
202
|
- <more>: See <https://satijalab.org/seurat/reference/integratelayers>
|
|
203
203
|
|
|
204
|
+
DoubletFinder (ns): Arguments to run [`DoubletFinder`](https://github.com/chris-mcginnis-ucsf/DoubletFinder).
|
|
205
|
+
See also <https://demultiplexing-doublet-detecting-docs.readthedocs.io/en/latest/DoubletFinder.html>.
|
|
206
|
+
To disable `DoubletFinder`, set `envs.DoubletFinder` to `None` or `False`; or set `pcs` to `0`.
|
|
207
|
+
- PCs (type=int): Number of PCs to use for 'doubletFinder' function.
|
|
208
|
+
- doublets (type=float): Number of expected doublets as a proportion of the pool size.
|
|
209
|
+
- pN (type=float): Number of doublets to simulate as a proportion of the pool size.
|
|
210
|
+
|
|
204
211
|
Requires:
|
|
205
212
|
r-seurat:
|
|
206
213
|
- check: {{proc.lang}} <(echo "library(Seurat)")
|
|
@@ -227,6 +234,7 @@ class SeuratPreparing(Proc):
|
|
|
227
234
|
"min_cells": 5,
|
|
228
235
|
},
|
|
229
236
|
"IntegrateLayers": {"method": "harmony"},
|
|
237
|
+
"DoubletFinder": {"PCs": 0, "pN": 0.25, "doublets": 0.075},
|
|
230
238
|
}
|
|
231
239
|
script = "file://../scripts/scrna/SeuratPreparing.R"
|
|
232
240
|
plugin_opts = {
|
biopipen/ns/snp.py
CHANGED
|
@@ -7,12 +7,15 @@ from ..core.config import config
|
|
|
7
7
|
class PlinkSimulation(Proc):
|
|
8
8
|
"""Simulate SNPs using PLINK v1.9
|
|
9
9
|
|
|
10
|
-
See also <https://www.cog-genomics.org/plink/1.9/input#simulate
|
|
10
|
+
See also <https://www.cog-genomics.org/plink/1.9/input#simulate> and
|
|
11
|
+
<https://pwwang.github.io/biopipen/api/biopipen.ns.snp/#biopipen.ns.snp.PlinkSimulation>
|
|
11
12
|
|
|
12
13
|
Input:
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
configfile: Configuration file containing the parameters for the simulation.
|
|
15
|
+
The configuration file (in toml, yaml or json format) should contain a
|
|
16
|
+
dictionary of parameters. The parameters are listed in `envs` except
|
|
17
|
+
`ncores`, which is used for parallelization. You can set parameters
|
|
18
|
+
in `envs` and override them in the configuration file.
|
|
16
19
|
|
|
17
20
|
Output:
|
|
18
21
|
outdir: Output directory containing the simulated data
|
|
@@ -21,9 +24,11 @@ class PlinkSimulation(Proc):
|
|
|
21
24
|
SNPs and columns representing samples.
|
|
22
25
|
|
|
23
26
|
Envs:
|
|
27
|
+
nsnps (type=int): Number of SNPs to simulate
|
|
28
|
+
ncases (type=int): Number of cases to simulate
|
|
29
|
+
nctrls (type=int): Number of controls to simulate
|
|
24
30
|
plink: Path to PLINK v1.9
|
|
25
|
-
seed (type=int): Random seed.
|
|
26
|
-
If not set, seed will not be set.
|
|
31
|
+
seed (type=int): Random seed. If not set, seed will not be set.
|
|
27
32
|
label: Prefix label for the SNPs.
|
|
28
33
|
prevalence (type=float): Disease prevalence.
|
|
29
34
|
minfreq (type=float): Minimum allele frequency.
|
|
@@ -41,19 +46,17 @@ class PlinkSimulation(Proc):
|
|
|
41
46
|
This only affects the sample names in the genotype matrix file
|
|
42
47
|
(`out.gtmat`).
|
|
43
48
|
"""
|
|
44
|
-
input = "
|
|
49
|
+
input = "configfile:file"
|
|
45
50
|
output = [
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
),
|
|
50
|
-
(
|
|
51
|
-
"gtmat:file:{{in.nsnps | int}}_"
|
|
52
|
-
"{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim/gtmat.txt"
|
|
53
|
-
),
|
|
51
|
+
"outdir:dir:{{in.configfile | stem}}.plink_sim",
|
|
52
|
+
"gtmat:file:{{in.configfile | stem}}.plink_sim/"
|
|
53
|
+
"{{in.configfile | stem}}-gtmat.txt",
|
|
54
54
|
]
|
|
55
55
|
lang = config.lang.python
|
|
56
56
|
envs = {
|
|
57
|
+
"nsnps": None,
|
|
58
|
+
"ncases": None,
|
|
59
|
+
"nctrls": None,
|
|
57
60
|
"plink": config.exe.plink,
|
|
58
61
|
"seed": None,
|
|
59
62
|
"label": "SNP",
|
biopipen/ns/tcr.py
CHANGED
|
@@ -983,6 +983,7 @@ class CloneResidency(Proc):
|
|
|
983
983
|
before calculating the clone residency. For example, `Clones > 1` to filter
|
|
984
984
|
out singletons.
|
|
985
985
|
prefix: The prefix of the cell barcodes in the `Seurat` object.
|
|
986
|
+
upset_ymax: The maximum value of the y-axis in the upset bar plots.
|
|
986
987
|
upset_trans: The transformation to apply to the y axis of upset bar plots.
|
|
987
988
|
For example, `log10` or `sqrt`. If not specified, the y axis will be
|
|
988
989
|
plotted as is. Note that the position of the bar plots will be dodged
|
|
@@ -1007,6 +1008,7 @@ class CloneResidency(Proc):
|
|
|
1007
1008
|
"mutaters": {},
|
|
1008
1009
|
"subset": None,
|
|
1009
1010
|
"prefix": "{Sample}_",
|
|
1011
|
+
"upset_ymax": None,
|
|
1010
1012
|
"upset_trans": None,
|
|
1011
1013
|
"cases": {},
|
|
1012
1014
|
}
|
|
@@ -1595,3 +1597,74 @@ class TESSA(Proc):
|
|
|
1595
1597
|
}
|
|
1596
1598
|
script = "file://../scripts/tcr/TESSA.R"
|
|
1597
1599
|
plugin_opts = {"report": "file://../reports/tcr/TESSA.svelte"}
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
class TCRDock(Proc):
|
|
1603
|
+
"""Using TCRDock to predict the structure of MHC-peptide-TCR complexes
|
|
1604
|
+
|
|
1605
|
+
See <https://github.com/phbradley/TCRdock>.
|
|
1606
|
+
|
|
1607
|
+
Input:
|
|
1608
|
+
configfile: The config file for TCRDock
|
|
1609
|
+
It's should be a toml file with the keys listed in `envs`, including
|
|
1610
|
+
`organism`, `mhc_class`, `mhc`, `peptide`, `va`, `ja`, `vb`, `jb`,
|
|
1611
|
+
`cdr3a`, and `cdr3b`.
|
|
1612
|
+
The values will overwrite the values in `envs`.
|
|
1613
|
+
|
|
1614
|
+
Output:
|
|
1615
|
+
outdir: The output directory containing the results
|
|
1616
|
+
|
|
1617
|
+
Envs:
|
|
1618
|
+
organism: The organism of the TCR, peptide and MHC
|
|
1619
|
+
mhc_class (type=int): The MHC class, either `1` or `2`
|
|
1620
|
+
mhc: The MHC allele, e.g., `A*02:01`
|
|
1621
|
+
peptide: The peptide sequence
|
|
1622
|
+
va: The V alpha gene
|
|
1623
|
+
ja: The J alpha gene
|
|
1624
|
+
vb: The V beta gene
|
|
1625
|
+
jb: The J beta gene
|
|
1626
|
+
cdr3a: The CDR3 alpha sequence
|
|
1627
|
+
cdr3b: The CDR3 beta sequence
|
|
1628
|
+
python: The path of python with dependencies for `tcrdock` installed.
|
|
1629
|
+
If not provided, `TCRDock.lang` will be used (the same interpreter
|
|
1630
|
+
used for the wrapper script).
|
|
1631
|
+
It could also be a list to specify, for example, a python in a conda
|
|
1632
|
+
environment (e.g., `["conda", "run", "-n", "myenv", "python"]`).
|
|
1633
|
+
tmpdir: The temporary directory used to clone the `tcrdock` source code if
|
|
1634
|
+
`envs.tcrdock` is not provided.
|
|
1635
|
+
tcrdock: The path to the `tcrdock` source code repo.
|
|
1636
|
+
You need to clone the source code from the github repository.
|
|
1637
|
+
<https://github.com/phbradley/TCRdock> at
|
|
1638
|
+
revision c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193 at main branch.
|
|
1639
|
+
You also have to run `download_blast.py` after cloning to download the
|
|
1640
|
+
blast database in the directory.
|
|
1641
|
+
If not provided, we will clone the source code to the `envs.tmpdir`
|
|
1642
|
+
directory and run the `download_blast.py` script.
|
|
1643
|
+
model_name: The model name to use
|
|
1644
|
+
model_file: The model file to use.
|
|
1645
|
+
If provided as a relative path, it should be relative to the
|
|
1646
|
+
`<envs.data_dir>/params/`, otherwise, it should be the full path.
|
|
1647
|
+
data_dir: The data directory that contains the model files.
|
|
1648
|
+
The model files should be in the `params` subdirectory.
|
|
1649
|
+
"""
|
|
1650
|
+
input = "configfile:file"
|
|
1651
|
+
output = "outdir:dir:{{in.configfile | stem}}.tcrdock"
|
|
1652
|
+
lang = config.lang.python
|
|
1653
|
+
envs = {
|
|
1654
|
+
"tcrdock": None,
|
|
1655
|
+
"organism": "human",
|
|
1656
|
+
"mhc_class": 1,
|
|
1657
|
+
"mhc": "A*02:01",
|
|
1658
|
+
"peptide": None,
|
|
1659
|
+
"va": None,
|
|
1660
|
+
"ja": None,
|
|
1661
|
+
"vb": None,
|
|
1662
|
+
"jb": None,
|
|
1663
|
+
"cdr3a": None,
|
|
1664
|
+
"cdr3b": None,
|
|
1665
|
+
"python": None,
|
|
1666
|
+
"model_name": "model_2_ptm_ft4",
|
|
1667
|
+
"model_file": "tcrpmhc_run4_af_mhc_params_891.pkl",
|
|
1668
|
+
"data_dir": None,
|
|
1669
|
+
}
|
|
1670
|
+
script = "file://../scripts/tcr/TCRDock.py"
|
|
@@ -120,7 +120,7 @@ expand_each <- function(name, case) {
|
|
|
120
120
|
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
121
121
|
}
|
|
122
122
|
for (each in eachs) {
|
|
123
|
-
by <- make.names(paste0("
|
|
123
|
+
by <- make.names(paste0("..", name, "_", case$each,"_", each))
|
|
124
124
|
srtobj@meta.data <<- srtobj@meta.data %>% mutate(
|
|
125
125
|
!!sym(by) := if_else(
|
|
126
126
|
!!sym(case$each) == each,
|
|
@@ -364,6 +364,16 @@ add_case_report <- function(info, sigmarkers, siggenes) {
|
|
|
364
364
|
}
|
|
365
365
|
}
|
|
366
366
|
|
|
367
|
+
ensure_sobj <- function(expr, allow_empty) {
|
|
368
|
+
tryCatch({ expr }, error = function(e) {
|
|
369
|
+
if (allow_empty) {
|
|
370
|
+
log_warn(" Ignoring this case: {e$message}")
|
|
371
|
+
return(NULL)
|
|
372
|
+
} else {
|
|
373
|
+
stop(e)
|
|
374
|
+
}
|
|
375
|
+
})
|
|
376
|
+
}
|
|
367
377
|
|
|
368
378
|
do_case_findall <- function(casename) {
|
|
369
379
|
# casename
|
|
@@ -382,10 +392,17 @@ do_case_findall <- function(casename) {
|
|
|
382
392
|
# args$min.cells.group <- args$min.cells.group %||% 1
|
|
383
393
|
# args$min.cells.feature <- args$min.cells.feature %||% 1
|
|
384
394
|
# args$min.pct <- args$min.pct %||% 0
|
|
395
|
+
allow_empty = startsWith(case$group.by, "..")
|
|
385
396
|
if (!is.null(case$subset)) {
|
|
386
|
-
args$object <-
|
|
397
|
+
args$object <- ensure_sobj({
|
|
398
|
+
srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
|
|
399
|
+
}, allow_empty)
|
|
400
|
+
if (is.null(args$object)) { return() }
|
|
387
401
|
} else {
|
|
388
|
-
args$object <-
|
|
402
|
+
args$object <- ensure_sobj({
|
|
403
|
+
srtobj %>% filter(!is.na(!!sym(case$group.by)))
|
|
404
|
+
}, allow_empty)
|
|
405
|
+
if (is.null(args$object)) { return() }
|
|
389
406
|
}
|
|
390
407
|
Idents(args$object) <- case$group.by
|
|
391
408
|
|
|
@@ -486,11 +503,19 @@ do_case <- function(casename) {
|
|
|
486
503
|
# sigmarkers
|
|
487
504
|
# rest
|
|
488
505
|
args <- case$rest
|
|
506
|
+
allow_empty = startsWith(case$group.by, "..")
|
|
489
507
|
if (!is.null(case$subset)) {
|
|
490
|
-
args$object <-
|
|
508
|
+
args$object <- ensure_sobj({
|
|
509
|
+
srtobj %>% filter(!!parse_expr(case$subset) & !is.na(!!sym(case$group.by)))
|
|
510
|
+
}, allow_empty)
|
|
511
|
+
if (is.null(args$object)) { return() }
|
|
491
512
|
} else {
|
|
492
|
-
args$object <-
|
|
513
|
+
args$object <- ensure_sobj({
|
|
514
|
+
srtobj %>% filter(!is.na(!!sym(case$group.by)))
|
|
515
|
+
}, allow_empty)
|
|
516
|
+
if (is.null(args$object)) { return() }
|
|
493
517
|
}
|
|
518
|
+
|
|
494
519
|
args$assay <- case$assay
|
|
495
520
|
args$group.by <- case$group.by
|
|
496
521
|
args$ident.1 <- case$ident.1
|
|
@@ -76,7 +76,7 @@ expand_each <- function(name, case) {
|
|
|
76
76
|
pull(case$each) %>% unique() %>% na.omit()
|
|
77
77
|
}
|
|
78
78
|
for (each in eachs) {
|
|
79
|
-
by = make.names(paste0("
|
|
79
|
+
by = make.names(paste0("..", name, "_", case$each, "_", each))
|
|
80
80
|
idents <- case$idents
|
|
81
81
|
if (is.null(idents) || length(idents) == 0) {
|
|
82
82
|
srtobj@meta.data = srtobj@meta.data %>%
|
|
@@ -169,17 +169,31 @@ do_enrich <- function(info, markers, sig) {
|
|
|
169
169
|
}
|
|
170
170
|
}
|
|
171
171
|
|
|
172
|
+
ensure_sobj <- function(expr, allow_empty) {
|
|
173
|
+
tryCatch({ expr }, error = function(e) {
|
|
174
|
+
if (allow_empty) {
|
|
175
|
+
log_warn(" Ignoring this case: {e$message}")
|
|
176
|
+
return(NULL)
|
|
177
|
+
} else {
|
|
178
|
+
stop(e)
|
|
179
|
+
}
|
|
180
|
+
})
|
|
181
|
+
}
|
|
182
|
+
|
|
172
183
|
do_case <- function(casename) {
|
|
173
184
|
log_info("- Dealing with case: {casename} ...")
|
|
174
185
|
info <- casename_info(casename, cases, outdir, create = TRUE)
|
|
175
186
|
case <- cases[[casename]]
|
|
187
|
+
allow_empty = startsWith(case$group_by, "..")
|
|
176
188
|
|
|
177
189
|
if (sum(!is.na(srtobj@meta.data[[case$group_by]])) == 0) {
|
|
178
190
|
msg = "Not enough cells to run tests."
|
|
179
191
|
} else {
|
|
180
|
-
sobj <- srtobj %>% filter(!is.na(!!sym(case$group_by)))
|
|
192
|
+
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group_by))) }, allow_empty)
|
|
193
|
+
if (is.null(sobj)) { return() }
|
|
181
194
|
if (!is.null(case$subset)) {
|
|
182
|
-
sobj <-
|
|
195
|
+
sobj <- ensure_sobj({ sobj %>% filter(!!parse_expr(case$subset)) }, allow_empty)
|
|
196
|
+
if (is.null(sobj)) { return() }
|
|
183
197
|
}
|
|
184
198
|
df <- tryCatch({
|
|
185
199
|
GetAssayData(sobj, layer = "data")
|
|
@@ -74,10 +74,10 @@ expand_each <- function(name, case) {
|
|
|
74
74
|
}
|
|
75
75
|
} else {
|
|
76
76
|
if (is.null(case$subset)) {
|
|
77
|
-
eachs <-
|
|
77
|
+
eachs <- meta %>%
|
|
78
78
|
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
79
79
|
} else {
|
|
80
|
-
eachs <-
|
|
80
|
+
eachs <- meta %>% filter(!!parse_expr(case$subset)) %>%
|
|
81
81
|
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
82
82
|
}
|
|
83
83
|
for (each in eachs) {
|
biopipen/scripts/scrna/ScFGSEA.R
CHANGED
|
@@ -72,7 +72,7 @@ expand_each <- function(name, case) {
|
|
|
72
72
|
pull(case$each) %>% na.omit() %>% unique() %>% as.vector()
|
|
73
73
|
}
|
|
74
74
|
for (each in eachs) {
|
|
75
|
-
by <- make.names(paste0("
|
|
75
|
+
by <- make.names(paste0("..", name, "_", case$each,"_", each))
|
|
76
76
|
srtobj@meta.data <<- srtobj@meta.data %>%
|
|
77
77
|
mutate(!!sym(by) := if_else(
|
|
78
78
|
!!sym(case$each) == each,
|
|
@@ -97,18 +97,35 @@ log_info("- Expanding cases...")
|
|
|
97
97
|
cases <- expand_cases(cases, defaults, expand_each)
|
|
98
98
|
|
|
99
99
|
|
|
100
|
+
ensure_sobj <- function(expr, allow_empty) {
|
|
101
|
+
tryCatch({ expr }, error = function(e) {
|
|
102
|
+
if (allow_empty) {
|
|
103
|
+
log_warn(" Ignoring this case: {e$message}")
|
|
104
|
+
return(NULL)
|
|
105
|
+
} else {
|
|
106
|
+
stop(e)
|
|
107
|
+
}
|
|
108
|
+
})
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
100
112
|
do_case <- function(name, case) {
|
|
101
113
|
log_info("- Handling case: {name} ...")
|
|
102
114
|
info <- casename_info(name, cases, outdir, create = TRUE)
|
|
103
115
|
|
|
116
|
+
allow_empty = startsWith(case$group.by, "..")
|
|
104
117
|
# prepare expression matrix
|
|
105
118
|
log_info(" Preparing expression matrix...")
|
|
106
|
-
sobj <- srtobj %>% filter(!is.na(!!sym(case$group.by)))
|
|
119
|
+
sobj <- ensure_sobj({ srtobj %>% filter(!is.na(!!sym(case$group.by))) }, allow_empty)
|
|
120
|
+
if (is.null(sobj)) { return() }
|
|
121
|
+
|
|
107
122
|
if (!is.null(case$subset)) {
|
|
108
|
-
sobj <- sobj %>% filter(!!!parse_exprs(case$subset))
|
|
123
|
+
sobj <- ensure_sobj({ sobj %>% filter(!!!parse_exprs(case$subset)) }, allow_empty)
|
|
124
|
+
if (is.null(sobj)) { return() }
|
|
109
125
|
}
|
|
110
126
|
if (!is.null(case$ident.2)) {
|
|
111
|
-
sobj <- sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2))
|
|
127
|
+
sobj <- ensure_sobj({ sobj %>% filter(!!sym(case$group.by) %in% c(case$ident.1, case$ident.2)) }, allow_empty)
|
|
128
|
+
if (is.null(sobj)) { return() }
|
|
112
129
|
}
|
|
113
130
|
|
|
114
131
|
allclasses <- sobj@meta.data[, case$group.by, drop = TRUE]
|
|
@@ -13,6 +13,7 @@ envs = {{envs | r: todot = "-", skip = 1}}
|
|
|
13
13
|
|
|
14
14
|
set.seed(8525)
|
|
15
15
|
options(future.globals.maxSize = 80000 * 1024^2)
|
|
16
|
+
options(future.rng.onMisuse="ignore")
|
|
16
17
|
options(Seurat.object.assay.version = "v5")
|
|
17
18
|
plan(strategy = "multicore", workers = envs$ncores)
|
|
18
19
|
|
|
@@ -342,7 +343,7 @@ RunPCAArgs$object <- sobj
|
|
|
342
343
|
sobj <- do_call(RunPCA, RunPCAArgs)
|
|
343
344
|
|
|
344
345
|
if (!envs$no_integration) {
|
|
345
|
-
log_info("- Running IntegrateLayers ...")
|
|
346
|
+
log_info("- Running IntegrateLayers (method = {envs$IntegrateLayers$method}) ...")
|
|
346
347
|
IntegrateLayersArgs <- envs$IntegrateLayers
|
|
347
348
|
method <- IntegrateLayersArgs$method
|
|
348
349
|
if (!is.null(IntegrateLayersArgs$reference) && is.character(IntegrateLayersArgs$reference)) {
|
|
@@ -383,6 +384,117 @@ if (!envs$use_sct) {
|
|
|
383
384
|
sobj <- JoinLayers(sobj)
|
|
384
385
|
}
|
|
385
386
|
|
|
387
|
+
if (!is.null(envs$DoubletFinder) && is.list(envs$DoubletFinder) && envs$DoubletFinder$PCs > 0) {
|
|
388
|
+
library(DoubletFinder)
|
|
389
|
+
|
|
390
|
+
log_info("Running DoubletFinder ...")
|
|
391
|
+
log_info("- Preparing Seurat object ...")
|
|
392
|
+
# More controls from envs?
|
|
393
|
+
sobj <- FindNeighbors(sobj, dims = 1:envs$DoubletFinder$PCs)
|
|
394
|
+
sobj <- FindClusters(sobj)
|
|
395
|
+
|
|
396
|
+
log_info("- pK Indentification ...")
|
|
397
|
+
sweep.res.list <- paramSweep(
|
|
398
|
+
sobj,
|
|
399
|
+
PCs = 1:envs$DoubletFinder$PCs,
|
|
400
|
+
sct = envs$use_sct,
|
|
401
|
+
num.cores = envs$ncores
|
|
402
|
+
)
|
|
403
|
+
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
|
|
404
|
+
bcmvn <- find.pK(sweep.stats)
|
|
405
|
+
|
|
406
|
+
bcmvn$Selected <- bcmvn$pK == bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
|
|
407
|
+
plot <- ggplot(bcmvn, aes(x = pK, y = BCmetric, color = Selected)) +
|
|
408
|
+
geom_point() +
|
|
409
|
+
# rotate x axis labels
|
|
410
|
+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
|
|
411
|
+
ggsave(plot, filename = file.path(plotsdir, "pK_BCmetric.png"))
|
|
412
|
+
|
|
413
|
+
pK <- bcmvn$pK[which.max(bcmvn$BCmetric)[1]]
|
|
414
|
+
pK <- as.numeric(as.character(pK))
|
|
415
|
+
pN <- envs$DoubletFinder$pN
|
|
416
|
+
log_info("- Homotypic Doublet Proportion Estimate ...")
|
|
417
|
+
homotypic.prop <- modelHomotypic(Idents(sobj))
|
|
418
|
+
nExp_poi <- round(nrow(sobj@meta.data) * envs$DoubletFinder$doublets)
|
|
419
|
+
nExp_poi.adj <- round(nExp_poi * (1 - homotypic.prop))
|
|
420
|
+
|
|
421
|
+
log_info("- Running DoubletFinder ...")
|
|
422
|
+
sobj <- doubletFinder(
|
|
423
|
+
sobj,
|
|
424
|
+
PCs = 1:envs$DoubletFinder$PCs,
|
|
425
|
+
pN = pN,
|
|
426
|
+
pK = pK,
|
|
427
|
+
nExp = nExp_poi.adj,
|
|
428
|
+
reuse.pANN = FALSE,
|
|
429
|
+
sct = envs$use_sct
|
|
430
|
+
)
|
|
431
|
+
pANN_col <- paste0("pANN_", pN, "_", pK)
|
|
432
|
+
pANN_col <- colnames(sobj@meta.data)[grepl(pANN_col, colnames(sobj@meta.data))]
|
|
433
|
+
DF_col <- paste0("DF.classifications_", pN, "_", pK)
|
|
434
|
+
DF_col <- colnames(sobj@meta.data)[grepl(DF_col, colnames(sobj@meta.data))]
|
|
435
|
+
doublets <- as.data.frame(
|
|
436
|
+
cbind(
|
|
437
|
+
colnames(sobj),
|
|
438
|
+
sobj@meta.data[, pANN_col],
|
|
439
|
+
sobj@meta.data[, DF_col]
|
|
440
|
+
)
|
|
441
|
+
)
|
|
442
|
+
colnames(doublets) <- c("Barcode","DoubletFinder_score","DoubletFinder_DropletType")
|
|
443
|
+
write.table(
|
|
444
|
+
doublets,
|
|
445
|
+
file.path(joboutdir, "DoubletFinder_doublets_singlets.txt"),
|
|
446
|
+
row.names = FALSE,
|
|
447
|
+
quote = FALSE,
|
|
448
|
+
sep = "\t"
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
summary <- as.data.frame(table(doublets$DoubletFinder_DropletType))
|
|
452
|
+
colnames(summary) <- c("Classification", "Droplet_N")
|
|
453
|
+
write.table(
|
|
454
|
+
summary,
|
|
455
|
+
file.path(joboutdir, "DoubletFinder_summary.txt"),
|
|
456
|
+
row.names = FALSE,
|
|
457
|
+
quote = FALSE,
|
|
458
|
+
sep = "\t"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Do a dimplot
|
|
462
|
+
log_info("- Plotting dimension reduction ...")
|
|
463
|
+
dimp <- DimPlot(
|
|
464
|
+
sobj, group.by = DF_col, order = "Doublet",
|
|
465
|
+
cols = c("#333333", "#FF3333"), pt.size = 0.8, alpha = 0.5)
|
|
466
|
+
ggsave(dimp, filename = file.path(plotsdir, "DoubletFinder_dimplot.png"))
|
|
467
|
+
|
|
468
|
+
log_info("- Filtering doublets ...")
|
|
469
|
+
sobj <- subset(sobj, cells = doublets$Barcode[doublets$DoubletFinder_DropletType == "Singlet"])
|
|
470
|
+
|
|
471
|
+
add_report(
|
|
472
|
+
list(
|
|
473
|
+
kind = "descr",
|
|
474
|
+
content = "The table contains the number of cells classified as singlets and doublets."
|
|
475
|
+
),
|
|
476
|
+
list(
|
|
477
|
+
kind = "table",
|
|
478
|
+
data = list(path = file.path(joboutdir, "DoubletFinder_summary.txt"))
|
|
479
|
+
),
|
|
480
|
+
h1 = "DoubletFinder Results",
|
|
481
|
+
h2 = "The DoubletFinder Summary"
|
|
482
|
+
)
|
|
483
|
+
add_report(
|
|
484
|
+
list(
|
|
485
|
+
name = "pK vs BCmetric",
|
|
486
|
+
src = file.path(plotsdir, "pK_BCmetric.png")
|
|
487
|
+
),
|
|
488
|
+
list(
|
|
489
|
+
name = "Dimension Reduction Plot",
|
|
490
|
+
src = file.path(plotsdir, "DoubletFinder_dimplot.png")
|
|
491
|
+
),
|
|
492
|
+
ui = "table_of_images",
|
|
493
|
+
h1 = "DoubletFinder Results",
|
|
494
|
+
h2 = "Plots"
|
|
495
|
+
)
|
|
496
|
+
}
|
|
497
|
+
|
|
386
498
|
log_info("Saving filtered seurat object ...")
|
|
387
499
|
saveRDS(sobj, rdsfile)
|
|
388
500
|
|
|
@@ -1,88 +1,124 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from multiprocessing import Pool
|
|
3
|
+
from slugify import slugify
|
|
4
|
+
from simpleconf import Config
|
|
2
5
|
from biopipen.utils.misc import logger, run_command, dict_to_cli_args
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
ncases = {{in.ncases | repr}} # pyright: ignore
|
|
6
|
-
nctrls = {{in.nctrls | repr}} # pyright: ignore
|
|
7
|
+
configfile = {{in.configfile | repr}} # pyright: ignore # noqa: E999
|
|
7
8
|
outdir = {{out.outdir | repr}} # pyright: ignore
|
|
8
9
|
gtmatfile = {{out.gtmat | repr}} # pyright: ignore
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
#
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
10
|
+
config = Config.load(configfile)
|
|
11
|
+
|
|
12
|
+
default_nsnps = {{envs.nsnps | repr}} # pyright: ignore
|
|
13
|
+
default_ncases = {{envs.ncases | repr}} # pyright: ignore
|
|
14
|
+
default_nctrls = {{envs.nctrls | repr}} # pyright: ignore
|
|
15
|
+
default_plink = {{envs.plink | repr}} # pyright: ignore
|
|
16
|
+
default_seed = {{envs.seed | repr}} # pyright: ignore
|
|
17
|
+
default_label = {{envs.label | repr}} # pyright: ignore
|
|
18
|
+
default_prevalence = {{envs.prevalence | repr}} # pyright: ignore
|
|
19
|
+
default_minfreq = {{envs.minfreq | repr}} # pyright: ignore
|
|
20
|
+
default_maxfreq = {{envs.maxfreq | repr}} # pyright: ignore
|
|
21
|
+
default_hetodds = {{envs.hetodds | repr}} # pyright: ignore
|
|
22
|
+
default_homodds = {{envs.homodds | repr}} # pyright: ignore
|
|
23
|
+
default_missing = {{envs.missing | repr}} # pyright: ignore
|
|
24
|
+
default_args = {{envs.args | repr}} # pyright: ignore
|
|
25
|
+
default_transpose_gtmat = {{envs.transpose_gtmat | repr}} # pyright: ignore
|
|
26
|
+
default_sample_prefix = {{envs.sample_prefix | repr}} # pyright: ignore
|
|
27
|
+
|
|
28
|
+
defaults = {
|
|
29
|
+
"nsnps": default_nsnps,
|
|
30
|
+
"ncases": default_ncases,
|
|
31
|
+
"nctrls": default_nctrls,
|
|
32
|
+
"plink": default_plink,
|
|
33
|
+
"seed": default_seed,
|
|
34
|
+
"label": default_label,
|
|
35
|
+
"prevalence": default_prevalence,
|
|
36
|
+
"minfreq": default_minfreq,
|
|
37
|
+
"maxfreq": default_maxfreq,
|
|
38
|
+
"hetodds": default_hetodds,
|
|
39
|
+
"homodds": default_homodds,
|
|
40
|
+
"missing": default_missing,
|
|
41
|
+
# "args": default_args,
|
|
42
|
+
"transpose_gtmat": default_transpose_gtmat,
|
|
43
|
+
"sample_prefix": default_sample_prefix,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def do_one_simulation(confitems):
|
|
47
|
+
args = default_args.copy()
|
|
48
|
+
args.update(confitems.pop("args", {}))
|
|
49
|
+
confs = defaults.copy()
|
|
50
|
+
confs.update(confitems)
|
|
51
|
+
transpose_gtmat = confs.pop("transpose_gtmat")
|
|
52
|
+
sample_prefix = confs.pop("sample_prefix")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
logger.debug(" Generating parameters file")
|
|
56
|
+
params_file = Path(outdir) / "params.txt"
|
|
57
|
+
params_file.write_text(
|
|
58
|
+
f"{confs['nsnps']}\t{confs['label']}\t{confs['minfreq']}\t"
|
|
59
|
+
f"{confs['maxfreq']}\t{confs['hetodds']}\t{confs['homodds']}\n"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if confs.get('seed') is not None:
|
|
63
|
+
args["seed"] = confs['seed']
|
|
64
|
+
|
|
65
|
+
args["simulate"] = params_file
|
|
66
|
+
args["out"] = Path(outdir) / "sim_snps"
|
|
67
|
+
args["simulate-ncases"] = confs['ncases']
|
|
68
|
+
args["simulate-ncontrols"] = confs['nctrls']
|
|
69
|
+
args["simulate-prevalence"] = confs['prevalence']
|
|
70
|
+
args["simulate-missing"] = confs['missing']
|
|
71
|
+
|
|
72
|
+
cmd = [confs['plink']] + dict_to_cli_args(args)
|
|
73
|
+
|
|
74
|
+
logger.debug(" Running PLINK simulation ...")
|
|
75
|
+
run_command(cmd, fg=True)
|
|
76
|
+
|
|
77
|
+
# Transpose the genotype matrix
|
|
78
|
+
# CHR SNP (C)M POS COUNTED ALT per0_per0 per1_per1 per2_per2
|
|
79
|
+
# 1 SNP_0 0 1 D d 1 0 1
|
|
80
|
+
# 1 SNP_1 0 2 d D 0 1 0
|
|
81
|
+
# 1 SNP_2 0 3 d D 0 0 0
|
|
82
|
+
# 1 SNP_3 0 4 d D 0 0 0
|
|
83
|
+
# 1 SNP_4 0 5 D d 1 2 1
|
|
84
|
+
cmd = [
|
|
85
|
+
confs['plink'],
|
|
86
|
+
"--recode",
|
|
87
|
+
"A" if transpose_gtmat else "A-transpose",
|
|
88
|
+
"tab",
|
|
89
|
+
"--bfile",
|
|
90
|
+
args["out"],
|
|
91
|
+
"--out",
|
|
92
|
+
gtmatfile + ".plink.recoded",
|
|
93
|
+
]
|
|
94
|
+
logger.debug("- Recoding into genotype matrix ...")
|
|
95
|
+
run_command(cmd, fg=True)
|
|
96
|
+
|
|
97
|
+
logger.debug(" Saving genotype matrix ...")
|
|
98
|
+
## transpose_gtmat = False
|
|
99
|
+
# SNP_COUNTED per0_per0 per1_per1 per2_per2
|
|
100
|
+
# SNP_0_D 1 0 1
|
|
101
|
+
# SNP_1_d 0 1 0
|
|
102
|
+
# SNP_2_d 0 0 0
|
|
103
|
+
# SNP_3_d 0 0 0
|
|
104
|
+
# SNP_4_D 1 2 1
|
|
105
|
+
## transpose_gtmat = True
|
|
106
|
+
# FID_IID SNP_0_D SNP_1_D SNP_2_D
|
|
107
|
+
# per0_per0 0 1 1
|
|
108
|
+
# per1_per1 0 2 0
|
|
109
|
+
# per2_per2 0 0 0
|
|
110
|
+
# per3_per3 1 1 0
|
|
111
|
+
# per4_per4 0 0 0
|
|
112
|
+
if transpose_gtmat:
|
|
113
|
+
cmd = f"cut -f1,2,7- {gtmatfile}.plink.recoded.raw | sed 's/\\t/_/'"
|
|
114
|
+
else:
|
|
115
|
+
cmd = f"cut -f2,5,7- {gtmatfile}.plink.recoded.traw | sed 's/\\t/_/'"
|
|
116
|
+
|
|
117
|
+
if sample_prefix:
|
|
118
|
+
cmd = f"{cmd} | sed 's/per[0-9]\\+_per/{sample_prefix}/g'"
|
|
119
|
+
|
|
120
|
+
cmd = f"{cmd} > {gtmatfile}"
|
|
121
|
+
run_command(cmd, fg=True)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
do_one_simulation(config)
|
|
@@ -42,21 +42,21 @@ diffcoex_score <- function(group) {
|
|
|
42
42
|
|
|
43
43
|
gvals <- unique(gdata[, group, drop = TRUE])
|
|
44
44
|
if (length(gvals) < 2) {
|
|
45
|
-
|
|
45
|
+
log_debug(" Less than 2 groups in the input. Skipping ...")
|
|
46
46
|
return(NULL)
|
|
47
47
|
}
|
|
48
48
|
rs <- lapply(gvals, function(gval) {
|
|
49
49
|
samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
|
|
50
50
|
expr <- indata[samples, , drop = FALSE]
|
|
51
51
|
if (length(samples) < 3) {
|
|
52
|
-
|
|
52
|
+
log_debug(" Less than 3 samples in one of the groups. Skipping ...")
|
|
53
53
|
return(NULL)
|
|
54
54
|
}
|
|
55
55
|
cor.pairs(as.matrix(expr), cor.method = method)
|
|
56
56
|
})
|
|
57
57
|
rs[sapply(rs, is.null)] <- NULL
|
|
58
58
|
if (length(rs) < 2) {
|
|
59
|
-
|
|
59
|
+
log_debug(" Less than 2 groups with at least 3 samples. Skipping ...")
|
|
60
60
|
return(NULL)
|
|
61
61
|
}
|
|
62
62
|
N <- length(rs)
|
|
@@ -26,6 +26,7 @@ section <- {{ envs.section | r }}
|
|
|
26
26
|
mutaters <- {{ envs.mutaters | r }}
|
|
27
27
|
subset <- {{ envs.subset | r }}
|
|
28
28
|
prefix <- {{ envs.prefix | r }}
|
|
29
|
+
upset_ymax <- {{ envs.upset_ymax | r }}
|
|
29
30
|
upset_trans <- {{ envs.upset_trans | r }}
|
|
30
31
|
cases <- {{ envs.cases | r }}
|
|
31
32
|
|
|
@@ -40,6 +41,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
40
41
|
order = sample_order,
|
|
41
42
|
subset = subset,
|
|
42
43
|
section = section,
|
|
44
|
+
upset_ymax = upset_ymax,
|
|
43
45
|
upset_trans = upset_trans
|
|
44
46
|
)
|
|
45
47
|
)
|
|
@@ -50,6 +52,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
50
52
|
cases[[key]]$order <- cases[[key]]$order %||% sample_order
|
|
51
53
|
cases[[key]]$section <- cases[[key]]$section %||% section
|
|
52
54
|
cases[[key]]$subset <- cases[[key]]$subset %||% subset
|
|
55
|
+
cases[[key]]$upset_ymax <- cases[[key]]$upset_ymax %||% upset_ymax
|
|
53
56
|
cases[[key]]$upset_trans <- cases[[key]]$upset_trans %||% upset_trans
|
|
54
57
|
}
|
|
55
58
|
}
|
|
@@ -320,7 +323,7 @@ plot_venndg <- function(counts, groups, singletons) {
|
|
|
320
323
|
venn_p
|
|
321
324
|
}
|
|
322
325
|
|
|
323
|
-
plot_upset <- function(counts, singletons, upset_trans) {
|
|
326
|
+
plot_upset <- function(counts, singletons, upset_ymax, upset_trans) {
|
|
324
327
|
|
|
325
328
|
cnts <- column_to_rownames(counts, "CDR3.aa") %>%
|
|
326
329
|
mutate(across(everything(), ~ as.integer(as.logical(.x))))
|
|
@@ -345,12 +348,21 @@ plot_upset <- function(counts, singletons, upset_trans) {
|
|
|
345
348
|
geom_text(
|
|
346
349
|
aes(label = ..count.., vjust = ifelse(..type == "Multiplets", -0.25, +1.25)),
|
|
347
350
|
stat = "count", position = "stack", size = 2.8)
|
|
351
|
+
if (!is.null(upset_ymax)) {
|
|
352
|
+
p <- p + ylim(0, upset_ymax)
|
|
353
|
+
}
|
|
348
354
|
} else {
|
|
349
355
|
p <- p + geom_bar(stat = "count", position = "dodge2") +
|
|
350
356
|
geom_text(
|
|
351
357
|
aes(label = ..count..),
|
|
352
|
-
stat = "count", position = position_dodge(width = 0.9), vjust = -0.25, size = 2.5)
|
|
353
|
-
|
|
358
|
+
stat = "count", position = position_dodge(width = 0.9), vjust = -0.25, size = 2.5)
|
|
359
|
+
|
|
360
|
+
# limit the y and do log10 transformation
|
|
361
|
+
if (!is.null(upset_ymax)) {
|
|
362
|
+
p <- p + scale_y_continuous(trans = "log10", limits = c(1, upset_ymax))
|
|
363
|
+
} else {
|
|
364
|
+
p <- p + scale_y_continuous(trans = "log10")
|
|
365
|
+
}
|
|
354
366
|
}
|
|
355
367
|
|
|
356
368
|
upset(
|
|
@@ -519,7 +531,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
519
531
|
upset_dir <- file.path(casedir, "upset")
|
|
520
532
|
upset_png <- file.path(upset_dir, paste0("upset_", slugify(subject), ".png"))
|
|
521
533
|
png(upset_png, res = 100, height = 600, width = 800)
|
|
522
|
-
print(plot_upset(counts, singletons, case$upset_trans))
|
|
534
|
+
print(plot_upset(counts, singletons, case$upset_ymax, case$upset_trans))
|
|
523
535
|
dev.off()
|
|
524
536
|
|
|
525
537
|
h <- headings(case$section, casename, "Overlapping Clones (UpSet Plots)")
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rtoml
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from tempfile import gettempdir
|
|
8
|
+
from biopipen.utils.misc import logger, run_command
|
|
9
|
+
|
|
10
|
+
configfile = {{in.configfile | repr}} # pyright: ignore
|
|
11
|
+
outdir = Path({{out.outdir | repr}}) # pyright: ignore
|
|
12
|
+
envs = {{envs | dict | repr}} # pyright: ignore
|
|
13
|
+
python = sys.executable
|
|
14
|
+
|
|
15
|
+
args = envs.copy()
|
|
16
|
+
config = rtoml.load(Path(configfile))
|
|
17
|
+
args.update(config)
|
|
18
|
+
model_name = args.pop("model_name")
|
|
19
|
+
model_file = Path(args.pop("model_file"))
|
|
20
|
+
data_dir = args.pop("data_dir", None)
|
|
21
|
+
tcrdock = args.pop("tcrdock", None)
|
|
22
|
+
tmpdir = args.pop("tmpdir", gettempdir())
|
|
23
|
+
python = args.pop("python", python)
|
|
24
|
+
|
|
25
|
+
if not isinstance(python, (list, tuple)):
|
|
26
|
+
python = [python]
|
|
27
|
+
|
|
28
|
+
if not data_dir:
|
|
29
|
+
raise ValueError("`envs.data_dir` is required")
|
|
30
|
+
|
|
31
|
+
if not tcrdock:
|
|
32
|
+
logger.info("- `envs.tcrdock` is not provided, cloning the repository ... ")
|
|
33
|
+
repo_url = "https://github.com/phbradley/TCRdock"
|
|
34
|
+
commit_id = "c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193"
|
|
35
|
+
branch = "main"
|
|
36
|
+
|
|
37
|
+
from git import Repo
|
|
38
|
+
repo = Repo.clone_from(repo_url, tmpdir, branch=branch, no_checkout=True)
|
|
39
|
+
repo.git.checkout(commit_id)
|
|
40
|
+
tcrdock = Path(tmpdir) / "TCRdock"
|
|
41
|
+
|
|
42
|
+
logger.info("- Running download_blast.py ...")
|
|
43
|
+
cmd = [
|
|
44
|
+
*python,
|
|
45
|
+
tcrdock / "download_blast.py",
|
|
46
|
+
]
|
|
47
|
+
run_command(cmd, fg=True, cwd=str(tcrdock))
|
|
48
|
+
|
|
49
|
+
if not model_file.is_absolute():
|
|
50
|
+
model_file = Path(data_dir) / "params" / model_file
|
|
51
|
+
|
|
52
|
+
os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
|
|
53
|
+
os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'
|
|
54
|
+
|
|
55
|
+
logger.info("- Composing targets file ... ")
|
|
56
|
+
targets_file = outdir / "user_targets.tsv"
|
|
57
|
+
targets = pd.DataFrame(
|
|
58
|
+
[
|
|
59
|
+
dict(
|
|
60
|
+
organism=args['organism'],
|
|
61
|
+
mhc_class=args['mhc_class'],
|
|
62
|
+
mhc=args['mhc'],
|
|
63
|
+
peptide=args['peptide'],
|
|
64
|
+
va=args['va'],
|
|
65
|
+
ja=args['ja'],
|
|
66
|
+
cdr3a=args['cdr3a'],
|
|
67
|
+
vb=args['vb'],
|
|
68
|
+
jb=args['jb'],
|
|
69
|
+
cdr3b=args['cdr3b'],
|
|
70
|
+
)
|
|
71
|
+
]
|
|
72
|
+
)
|
|
73
|
+
targets.to_csv(targets_file, sep="\t", index=False)
|
|
74
|
+
|
|
75
|
+
logger.info("- Generating inputs for AlphaFold modeling ... ")
|
|
76
|
+
cmd = [
|
|
77
|
+
*python,
|
|
78
|
+
tcrdock + "/setup_for_alphafold.py",
|
|
79
|
+
"--targets_tsvfile", targets_file,
|
|
80
|
+
"--output_dir", outdir / "user_output",
|
|
81
|
+
"--new_docking",
|
|
82
|
+
]
|
|
83
|
+
run_command(cmd, fg=True)
|
|
84
|
+
|
|
85
|
+
logger.info("- Running AlphaFold modeling ... ")
|
|
86
|
+
cmd = [
|
|
87
|
+
*python,
|
|
88
|
+
tcrdock + "/run_prediction.py",
|
|
89
|
+
"--verbose",
|
|
90
|
+
"--targets", outdir / "user_output/targets.tsv",
|
|
91
|
+
"--outfile_prefix", f"{outdir}/{args['peptide']}",
|
|
92
|
+
"--model_names", model_name,
|
|
93
|
+
"--data_dir", data_dir,
|
|
94
|
+
"--model_params_files", model_file,
|
|
95
|
+
]
|
|
96
|
+
run_command(cmd, fg=True, env={"XLA_FLAGS": "--xla_gpu_force_compilation_parallelism=1"})
|
|
97
|
+
|
|
98
|
+
logger.info("- Calculating the PAE ... ")
|
|
99
|
+
cmd = [
|
|
100
|
+
*python,
|
|
101
|
+
tcrdock + "/add_pmhc_tcr_pae_to_tsvfile.py",
|
|
102
|
+
"--infile", f"{outdir}/{args['peptide']}_final.tsv",
|
|
103
|
+
"--outfile", f"{outdir}/{args['peptide']}_w_pae.tsv",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
run_command(cmd, fg=True)
|
biopipen/utils/misc.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
import os
|
|
4
5
|
import sys
|
|
5
6
|
import logging
|
|
6
7
|
from typing import List
|
|
7
8
|
from biopipen.core.filters import dict_to_cli_args # noqa: F401
|
|
8
9
|
|
|
9
10
|
logger = logging.getLogger("biopipen_job")
|
|
10
|
-
logger.setLevel(logging.
|
|
11
|
+
logger.setLevel(logging.DEBUG)
|
|
11
12
|
_handler = logging.StreamHandler(sys.stdout)
|
|
12
13
|
# Use same log format as in R
|
|
13
14
|
# {sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}
|
|
@@ -100,6 +101,9 @@ def run_command(
|
|
|
100
101
|
kwargs["stderr"] = sys.stderr
|
|
101
102
|
kwargs["universal_newlines"] = True
|
|
102
103
|
|
|
104
|
+
if "env" in kwargs:
|
|
105
|
+
kwargs["env"] = {**os.environ, **kwargs["env"]}
|
|
106
|
+
|
|
103
107
|
try:
|
|
104
108
|
p = Popen(cmd, **kwargs)
|
|
105
109
|
except Exception as e:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: biopipen
|
|
3
|
-
Version: 0.27.
|
|
3
|
+
Version: 0.27.3
|
|
4
4
|
Summary: Bioinformatics processes/pipelines that can be run from `pipen run`
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: pwwang
|
|
@@ -17,6 +17,7 @@ Requires-Dist: datar[pandas] (>=0.15.6,<0.16.0)
|
|
|
17
17
|
Requires-Dist: pipen-board[report] (>=0.15,<0.16)
|
|
18
18
|
Requires-Dist: pipen-cli-run (>=0.13,<0.14)
|
|
19
19
|
Requires-Dist: pipen-filters (>=0.12,<0.13)
|
|
20
|
-
Requires-Dist: pipen-poplog (>=0.1,<0.2)
|
|
20
|
+
Requires-Dist: pipen-poplog (>=0.1.2,<0.2.0)
|
|
21
21
|
Requires-Dist: pipen-runinfo (>=0.6,<0.7) ; extra == "runinfo"
|
|
22
22
|
Requires-Dist: pipen-verbose (>=0.11,<0.12)
|
|
23
|
+
Requires-Dist: pyyaml-include (==1.*)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=lxhjPOOCzhlHB02EzaqTtDdBFZSOLV3WLWw2HC0DYvo,23
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=20RCI30Peee1EQdfb_UbV3Hf74XUPndJnYZlUThytsw,1781
|
|
@@ -21,12 +21,12 @@ biopipen/ns/gsea.py,sha256=EsNRAPYsagaV2KYgr4Jv0KCnZGqayM209v4yOGGTIOI,7423
|
|
|
21
21
|
biopipen/ns/misc.py,sha256=fzn0pXvdghMkQhu-e3MMapPNMyO6IAJbtTzVU3GbFa0,3246
|
|
22
22
|
biopipen/ns/plot.py,sha256=yguxmErUOH-hOM10JfuI_sXw2p49XF8yGR_gXfbd5yQ,4066
|
|
23
23
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
24
|
-
biopipen/ns/scrna.py,sha256=
|
|
24
|
+
biopipen/ns/scrna.py,sha256=i9h0xNOII3SqJ_cJOZ5epn8breAsc-yXH_Us04DoZvg,103401
|
|
25
25
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=9s1NvH3aMaNDXyfwy9TdzGcSP_lIW4JqhLgknNZcIKE,28313
|
|
26
|
-
biopipen/ns/snp.py,sha256=
|
|
26
|
+
biopipen/ns/snp.py,sha256=Nq20NJzQ9YiqE9mhtCUH6dfs7528o1e4N-j9PewjAsQ,3016
|
|
27
27
|
biopipen/ns/stats.py,sha256=yJ6C1CXF84T7DDs9mgufqUOr89Rl6kybE5ji8Vnx6cw,13693
|
|
28
28
|
biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
|
|
29
|
-
biopipen/ns/tcr.py,sha256=
|
|
29
|
+
biopipen/ns/tcr.py,sha256=5bMnxhbeB08UrAw8YSh2BkA3AUFeoOajhE6DhHt74K4,87863
|
|
30
30
|
biopipen/ns/vcf.py,sha256=cdkKroii0_nl_bSP2cnO09qESUAhHqu6btOiTSKS79Y,15314
|
|
31
31
|
biopipen/ns/web.py,sha256=3zucrDo-IVsSnIvlw-deoScuxqWa6OMTm8Vo-R4E44Q,2224
|
|
32
32
|
biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
|
|
@@ -129,12 +129,12 @@ biopipen/scripts/scrna/ExprImpution-alra.R,sha256=w3W1txJcdWg52-SETY2Z0lO7maDNfi
|
|
|
129
129
|
biopipen/scripts/scrna/ExprImpution-rmagic.R,sha256=jYIfqZpnvjKJkvItLnemPVtUApHBYQi1_L8rHVbEe1M,735
|
|
130
130
|
biopipen/scripts/scrna/ExprImpution-scimpute.R,sha256=mg40qCUW7-nP5oHPvARq7dmtoahM0GRFWXQpum0BXVk,1082
|
|
131
131
|
biopipen/scripts/scrna/ExprImpution.R,sha256=7768ezrr59xUZDXq8lO9jj2XhnkSsx-xxBmOD9_DO7c,313
|
|
132
|
-
biopipen/scripts/scrna/MarkersFinder.R,sha256=
|
|
133
|
-
biopipen/scripts/scrna/MetaMarkers.R,sha256=
|
|
132
|
+
biopipen/scripts/scrna/MarkersFinder.R,sha256=M7fHTbHHErZ9JbLmjDkx-6yVIay0_h0MkvgFegnqL44,22918
|
|
133
|
+
biopipen/scripts/scrna/MetaMarkers.R,sha256=9ve1X0TrDzS_ZEW6HtU3n8R-uPx7q-hYMMNFVDSE8wQ,11272
|
|
134
134
|
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=JSHd-_-KiFqW8avCGxgU4T-C5BtDr2u0kwIvEu2lFIg,4188
|
|
135
|
-
biopipen/scripts/scrna/RadarPlots.R,sha256=
|
|
135
|
+
biopipen/scripts/scrna/RadarPlots.R,sha256=TGPUTUcHOHgd9rsNtLYT-N6WHiFNDBZsiIoqkyAJh0A,13020
|
|
136
136
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
137
|
-
biopipen/scripts/scrna/ScFGSEA.R,sha256=
|
|
137
|
+
biopipen/scripts/scrna/ScFGSEA.R,sha256=2UCTCIydVkPGvn7WP-_fcE7857iKKDxY56-j-ruyO8o,6254
|
|
138
138
|
biopipen/scripts/scrna/Seurat2AnnData.R,sha256=qz4u-B5J3GMwttubnNnByJXreziFbrP5Mak0L0q7eG0,1557
|
|
139
139
|
biopipen/scripts/scrna/SeuratClusterStats-dimplots.R,sha256=gViDgQ8NorYD64iK0FgcODOrDOw0tExZmhuPRuLNp4g,2354
|
|
140
140
|
biopipen/scripts/scrna/SeuratClusterStats-features.R,sha256=SaKTJloP1fttRXZQeb2ApX0ej7al13wOoEYkthSk13k,15489
|
|
@@ -147,7 +147,7 @@ biopipen/scripts/scrna/SeuratFilter.R,sha256=BrYK0MLdaTtQvInMaQsmOt7oH_hlks0M1zy
|
|
|
147
147
|
biopipen/scripts/scrna/SeuratLoading.R,sha256=ekWKnHIqtQb3kHVQiVymAHXXqiUxs6KKefjZKjaykmk,900
|
|
148
148
|
biopipen/scripts/scrna/SeuratMap2Ref.R,sha256=Xn3VnvKqShuC0Ju05380wjuLVSdW0uWVzntdxjme244,4359
|
|
149
149
|
biopipen/scripts/scrna/SeuratMetadataMutater.R,sha256=Pp4GsF3hZ6ZC2vroC3LSBmVa4B1p2L3hbh981yaAIeQ,1093
|
|
150
|
-
biopipen/scripts/scrna/SeuratPreparing.R,sha256=
|
|
150
|
+
biopipen/scripts/scrna/SeuratPreparing.R,sha256=c_aBM0mugBNyYJ5OjNVDR_Cj0sGqkiJZXCOk3pesFDk,16990
|
|
151
151
|
biopipen/scripts/scrna/SeuratSplit.R,sha256=vdK11V39_Uo_NaOh76QWCtxObGaEr5Ynxqq0hTiSvsU,754
|
|
152
152
|
biopipen/scripts/scrna/SeuratSubClustering.R,sha256=L1SwKhNNKvsQGrcj0ZjScW9BLuvdO2pg7U48Ospsot8,6096
|
|
153
153
|
biopipen/scripts/scrna/SeuratSubset.R,sha256=yVA11NVE2FSSw-DhxQcJRapns0tNNHdyDYi5epO6SKM,1776
|
|
@@ -160,9 +160,9 @@ biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R,sha256=b77yG5FeRs
|
|
|
160
160
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R,sha256=ic8Fy8QqYDGh_izmvZVJ3KL66podg_CSF5ITL3FZsvo,5196
|
|
161
161
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R,sha256=95DLX1Rz0tobOuDZ8V9YdGgO0KiNthhccoeeOK21tno,16216
|
|
162
162
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R,sha256=rQ9iwGh9FNRZlJJzM4QItdyXmebfzLAq05ZAjb1kGUw,9831
|
|
163
|
-
biopipen/scripts/snp/PlinkSimulation.py,sha256=
|
|
163
|
+
biopipen/scripts/snp/PlinkSimulation.py,sha256=mSSoGGG6sbEPBcUGdHgbebUrg4DiHeyNyc7jLPjV5pY,4169
|
|
164
164
|
biopipen/scripts/stats/ChowTest.R,sha256=4p7NULmfOZSfeBSQ04els0h3cXOK5yeCJJ4-gEBPOGk,3617
|
|
165
|
-
biopipen/scripts/stats/DiffCoexpr.R,sha256=
|
|
165
|
+
biopipen/scripts/stats/DiffCoexpr.R,sha256=5hQDV2_7bKdKUsOGMZUa0GS5rc7kFspxonNyFEPmtbc,4516
|
|
166
166
|
biopipen/scripts/stats/LiquidAssoc.R,sha256=s-XJbFoOfH4eWSkxbbOSHZ1x16lY0Sdod_V1KvSkM8k,3727
|
|
167
167
|
biopipen/scripts/stats/MetaPvalue.R,sha256=c26lYC4rxQ3D7vRvsXJ4_M-QIYTDTV8AEjXrag2_srU,3957
|
|
168
168
|
biopipen/scripts/tcgamaf/Maf2Vcf.py,sha256=Cxh7fiSNCxWDTfIJqZDOOnaSrw-85S_fH2U-PWY03hc,704
|
|
@@ -170,7 +170,7 @@ biopipen/scripts/tcgamaf/MafAddChr.py,sha256=V10HMisl12O3ZfXuRmFNdy5p-3mr43WCvy0
|
|
|
170
170
|
biopipen/scripts/tcgamaf/maf2vcf.pl,sha256=hJKcH-NbgWK6fmK7f3qex7ozJJl-PqCNPXqpwfcHwJg,22707
|
|
171
171
|
biopipen/scripts/tcr/Attach2Seurat.R,sha256=C91TAh1cLSxWkdFPf84pbxlpTYMuWq_rduG4eiIkXZI,1345
|
|
172
172
|
biopipen/scripts/tcr/CDR3AAPhyschem.R,sha256=-0BS6cdt5GfQJphA3HlDgGjWr4XFF-7INLJyMBHQNAc,16628
|
|
173
|
-
biopipen/scripts/tcr/CloneResidency.R,sha256=
|
|
173
|
+
biopipen/scripts/tcr/CloneResidency.R,sha256=nFPPPknJPEX-RU16uqQZzYMmJqmWqUAun_FI8GpJ7iw,21520
|
|
174
174
|
biopipen/scripts/tcr/CloneSizeQQPlot.R,sha256=5FPfWQjxTsv59KSDQaDWj3C95zPQMngKG7qOf95NEzI,4527
|
|
175
175
|
biopipen/scripts/tcr/GIANA/GIANA.py,sha256=0qLhgCWxT8K-4JvORA03CzBPTT5pd4Di5B_DgrHXbFA,47198
|
|
176
176
|
biopipen/scripts/tcr/GIANA/GIANA4.py,sha256=Z7Q3cUr1Pvmy4CFADN0P7i9g1-HbzWROMqk5HvL_F1Q,45762
|
|
@@ -193,6 +193,7 @@ biopipen/scripts/tcr/ImmunarchSplitIdents.R,sha256=FGCeGV0uSmFU91lKkldUAeV4A2m3h
|
|
|
193
193
|
biopipen/scripts/tcr/SampleDiversity.R,sha256=jQ1OU3b8vswD8tZhLt3fkcqJKrl2bhQX0giHM2rXz3Y,2643
|
|
194
194
|
biopipen/scripts/tcr/TCRClusterStats.R,sha256=D7q1svXQxl1uOya8bePvR9e6NJXjCjXbPsXnEPTWdlE,12004
|
|
195
195
|
biopipen/scripts/tcr/TCRClustering.R,sha256=eflUsYfq4aEaX9BVL0MiB7lNlot_L-8VaReK516go84,9236
|
|
196
|
+
biopipen/scripts/tcr/TCRDock.py,sha256=jjzxMWp-hs0LDtA1mVbiWDvUieSO7X-F9yeKGy1LSTM,3026
|
|
196
197
|
biopipen/scripts/tcr/TESSA.R,sha256=bfOixWLZy8yi0MzXncP67KjtCukwXEzsK5fCdMzB5VM,6822
|
|
197
198
|
biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv,sha256=SumqDOqP67P54uM7Cuc5_O_rySTWcGo7eX3psMSPX9s,763
|
|
198
199
|
biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py,sha256=z4_Q_6StymffuUGGjHP1-B3aTsXtamKao5Q1-Kg9has,6831
|
|
@@ -230,14 +231,14 @@ biopipen/utils/gene.py,sha256=qE_BqTayrJWxRdniffhcz6OhZcw9GUoOrj2EtFWH9Gw,2246
|
|
|
230
231
|
biopipen/utils/gsea.R,sha256=UMQOlWGstQTOBScvy1wIzrB7I3CE28Xo2v1sy4lmJ-M,7549
|
|
231
232
|
biopipen/utils/io.R,sha256=jIYdqdn0iRWfQYAZa5CjXi3fikqmYvPPLIXhobRe8sw,537
|
|
232
233
|
biopipen/utils/misc.R,sha256=jXusPDCxSIaYRq_qm4khUsu9nyMhbpBVcj8BVn4j8Ic,10629
|
|
233
|
-
biopipen/utils/misc.py,sha256=
|
|
234
|
+
biopipen/utils/misc.py,sha256=KJziAFY4Kl-0ZsO93vteY9gRLZg9BSYig-TDocHY36k,3601
|
|
234
235
|
biopipen/utils/mutate_helpers.R,sha256=Bqy6Oi4rrPEPJw0Jq32bVAwwBfZv7JJL9jFcK5x-cek,17649
|
|
235
236
|
biopipen/utils/plot.R,sha256=pzl37PomNeUZPxohHZ2w93j3Fc4T0Qrc62FF-9MTKdw,4417
|
|
236
237
|
biopipen/utils/reference.py,sha256=6bPSwQa-GiDfr7xLR9a5T64Ey40y24yn3QfQ5wDFZkU,4420
|
|
237
238
|
biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
|
|
238
239
|
biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
|
|
239
240
|
biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
|
|
240
|
-
biopipen-0.27.
|
|
241
|
-
biopipen-0.27.
|
|
242
|
-
biopipen-0.27.
|
|
243
|
-
biopipen-0.27.
|
|
241
|
+
biopipen-0.27.3.dist-info/METADATA,sha256=4DeAjhBZHdg7pZXoTNPiQkzGsx6hSm7VwgWgyYKMY18,920
|
|
242
|
+
biopipen-0.27.3.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
243
|
+
biopipen-0.27.3.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
|
|
244
|
+
biopipen-0.27.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|