biopipen 0.27.1__py3-none-any.whl → 0.27.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of biopipen might be problematic. Click here for more details.
- biopipen/__init__.py +1 -1
- biopipen/ns/snp.py +18 -15
- biopipen/ns/tcr.py +73 -0
- biopipen/scripts/scrna/RadarPlots.R +2 -2
- biopipen/scripts/snp/PlinkSimulation.py +119 -83
- biopipen/scripts/stats/DiffCoexpr.R +3 -3
- biopipen/scripts/tcr/CloneResidency.R +16 -4
- biopipen/scripts/tcr/TCRDock.py +106 -0
- biopipen/utils/misc.py +5 -1
- {biopipen-0.27.1.dist-info → biopipen-0.27.2.dist-info}/METADATA +1 -1
- {biopipen-0.27.1.dist-info → biopipen-0.27.2.dist-info}/RECORD +13 -12
- {biopipen-0.27.1.dist-info → biopipen-0.27.2.dist-info}/WHEEL +0 -0
- {biopipen-0.27.1.dist-info → biopipen-0.27.2.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.27.
|
|
1
|
+
__version__ = "0.27.2"
|
biopipen/ns/snp.py
CHANGED
|
@@ -7,12 +7,15 @@ from ..core.config import config
|
|
|
7
7
|
class PlinkSimulation(Proc):
|
|
8
8
|
"""Simulate SNPs using PLINK v1.9
|
|
9
9
|
|
|
10
|
-
See also <https://www.cog-genomics.org/plink/1.9/input#simulate
|
|
10
|
+
See also <https://www.cog-genomics.org/plink/1.9/input#simulate> and
|
|
11
|
+
<https://pwwang.github.io/biopipen/api/biopipen.ns.snp/#biopipen.ns.snp.PlinkSimulation>
|
|
11
12
|
|
|
12
13
|
Input:
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
configfile: Configuration file containing the parameters for the simulation.
|
|
15
|
+
The configuration file (in toml, yaml or json format) should contain a
|
|
16
|
+
dictionary of parameters. The parameters are listed in `envs` except
|
|
17
|
+
`ncores`, which is used for parallelization. You can set parameters
|
|
18
|
+
in `envs` and override them in the configuration file.
|
|
16
19
|
|
|
17
20
|
Output:
|
|
18
21
|
outdir: Output directory containing the simulated data
|
|
@@ -21,9 +24,11 @@ class PlinkSimulation(Proc):
|
|
|
21
24
|
SNPs and columns representing samples.
|
|
22
25
|
|
|
23
26
|
Envs:
|
|
27
|
+
nsnps (type=int): Number of SNPs to simulate
|
|
28
|
+
ncases (type=int): Number of cases to simulate
|
|
29
|
+
nctrls (type=int): Number of controls to simulate
|
|
24
30
|
plink: Path to PLINK v1.9
|
|
25
|
-
seed (type=int): Random seed.
|
|
26
|
-
If not set, seed will not be set.
|
|
31
|
+
seed (type=int): Random seed. If not set, seed will not be set.
|
|
27
32
|
label: Prefix label for the SNPs.
|
|
28
33
|
prevalence (type=float): Disease prevalence.
|
|
29
34
|
minfreq (type=float): Minimum allele frequency.
|
|
@@ -41,19 +46,17 @@ class PlinkSimulation(Proc):
|
|
|
41
46
|
This only affects the sample names in the genotype matrix file
|
|
42
47
|
(`out.gtmat`).
|
|
43
48
|
"""
|
|
44
|
-
input = "
|
|
49
|
+
input = "configfile:file"
|
|
45
50
|
output = [
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
),
|
|
50
|
-
(
|
|
51
|
-
"gtmat:file:{{in.nsnps | int}}_"
|
|
52
|
-
"{{in.ncases | int}}xcases_{{in.nctrls | int}}xctrls.plink_sim/gtmat.txt"
|
|
53
|
-
),
|
|
51
|
+
"outdir:dir:{{in.configfile | stem}}.plink_sim",
|
|
52
|
+
"gtmat:file:{{in.configfile | stem}}.plink_sim/"
|
|
53
|
+
"{{in.configfile | stem}}-gtmat.txt",
|
|
54
54
|
]
|
|
55
55
|
lang = config.lang.python
|
|
56
56
|
envs = {
|
|
57
|
+
"nsnps": None,
|
|
58
|
+
"ncases": None,
|
|
59
|
+
"nctrls": None,
|
|
57
60
|
"plink": config.exe.plink,
|
|
58
61
|
"seed": None,
|
|
59
62
|
"label": "SNP",
|
biopipen/ns/tcr.py
CHANGED
|
@@ -983,6 +983,7 @@ class CloneResidency(Proc):
|
|
|
983
983
|
before calculating the clone residency. For example, `Clones > 1` to filter
|
|
984
984
|
out singletons.
|
|
985
985
|
prefix: The prefix of the cell barcodes in the `Seurat` object.
|
|
986
|
+
upset_ymax: The maximum value of the y-axis in the upset bar plots.
|
|
986
987
|
upset_trans: The transformation to apply to the y axis of upset bar plots.
|
|
987
988
|
For example, `log10` or `sqrt`. If not specified, the y axis will be
|
|
988
989
|
plotted as is. Note that the position of the bar plots will be dodged
|
|
@@ -1007,6 +1008,7 @@ class CloneResidency(Proc):
|
|
|
1007
1008
|
"mutaters": {},
|
|
1008
1009
|
"subset": None,
|
|
1009
1010
|
"prefix": "{Sample}_",
|
|
1011
|
+
"upset_ymax": None,
|
|
1010
1012
|
"upset_trans": None,
|
|
1011
1013
|
"cases": {},
|
|
1012
1014
|
}
|
|
@@ -1595,3 +1597,74 @@ class TESSA(Proc):
|
|
|
1595
1597
|
}
|
|
1596
1598
|
script = "file://../scripts/tcr/TESSA.R"
|
|
1597
1599
|
plugin_opts = {"report": "file://../reports/tcr/TESSA.svelte"}
|
|
1600
|
+
|
|
1601
|
+
|
|
1602
|
+
class TCRDock(Proc):
|
|
1603
|
+
"""Using TCRDock to predict the structure of MHC-peptide-TCR complexes
|
|
1604
|
+
|
|
1605
|
+
See <https://github.com/phbradley/TCRdock>.
|
|
1606
|
+
|
|
1607
|
+
Input:
|
|
1608
|
+
configfile: The config file for TCRDock
|
|
1609
|
+
It's should be a toml file with the keys listed in `envs`, including
|
|
1610
|
+
`organism`, `mhc_class`, `mhc`, `peptide`, `va`, `ja`, `vb`, `jb`,
|
|
1611
|
+
`cdr3a`, and `cdr3b`.
|
|
1612
|
+
The values will overwrite the values in `envs`.
|
|
1613
|
+
|
|
1614
|
+
Output:
|
|
1615
|
+
outdir: The output directory containing the results
|
|
1616
|
+
|
|
1617
|
+
Envs:
|
|
1618
|
+
organism: The organism of the TCR, peptide and MHC
|
|
1619
|
+
mhc_class (type=int): The MHC class, either `1` or `2`
|
|
1620
|
+
mhc: The MHC allele, e.g., `A*02:01`
|
|
1621
|
+
peptide: The peptide sequence
|
|
1622
|
+
va: The V alpha gene
|
|
1623
|
+
ja: The J alpha gene
|
|
1624
|
+
vb: The V beta gene
|
|
1625
|
+
jb: The J beta gene
|
|
1626
|
+
cdr3a: The CDR3 alpha sequence
|
|
1627
|
+
cdr3b: The CDR3 beta sequence
|
|
1628
|
+
python: The path of python with dependencies for `tcrdock` installed.
|
|
1629
|
+
If not provided, `TCRDock.lang` will be used (the same interpreter
|
|
1630
|
+
used for the wrapper script).
|
|
1631
|
+
It could also be a list to specify, for example, a python in a conda
|
|
1632
|
+
environment (e.g., `["conda", "run", "-n", "myenv", "python"]`).
|
|
1633
|
+
tmpdir: The temporary directory used to clone the `tcrdock` source code if
|
|
1634
|
+
`envs.tcrdock` is not provided.
|
|
1635
|
+
tcrdock: The path to the `tcrdock` source code repo.
|
|
1636
|
+
You need to clone the source code from the github repository.
|
|
1637
|
+
<https://github.com/phbradley/TCRdock> at
|
|
1638
|
+
revision c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193 at main branch.
|
|
1639
|
+
You also have to run `download_blast.py` after cloning to download the
|
|
1640
|
+
blast database in the directory.
|
|
1641
|
+
If not provided, we will clone the source code to the `envs.tmpdir`
|
|
1642
|
+
directory and run the `download_blast.py` script.
|
|
1643
|
+
model_name: The model name to use
|
|
1644
|
+
model_file: The model file to use.
|
|
1645
|
+
If provided as a relative path, it should be relative to the
|
|
1646
|
+
`<envs.data_dir>/params/`, otherwise, it should be the full path.
|
|
1647
|
+
data_dir: The data directory that contains the model files.
|
|
1648
|
+
The model files should be in the `params` subdirectory.
|
|
1649
|
+
"""
|
|
1650
|
+
input = "configfile:file"
|
|
1651
|
+
output = "outdir:dir:{{in.configfile | stem}}.tcrdock"
|
|
1652
|
+
lang = config.lang.python
|
|
1653
|
+
envs = {
|
|
1654
|
+
"tcrdock": None,
|
|
1655
|
+
"organism": "human",
|
|
1656
|
+
"mhc_class": 1,
|
|
1657
|
+
"mhc": "A*02:01",
|
|
1658
|
+
"peptide": None,
|
|
1659
|
+
"va": None,
|
|
1660
|
+
"ja": None,
|
|
1661
|
+
"vb": None,
|
|
1662
|
+
"jb": None,
|
|
1663
|
+
"cdr3a": None,
|
|
1664
|
+
"cdr3b": None,
|
|
1665
|
+
"python": None,
|
|
1666
|
+
"model_name": "model_2_ptm_ft4",
|
|
1667
|
+
"model_file": "tcrpmhc_run4_af_mhc_params_891.pkl",
|
|
1668
|
+
"data_dir": None,
|
|
1669
|
+
}
|
|
1670
|
+
script = "file://../scripts/tcr/TCRDock.py"
|
|
@@ -74,10 +74,10 @@ expand_each <- function(name, case) {
|
|
|
74
74
|
}
|
|
75
75
|
} else {
|
|
76
76
|
if (is.null(case$subset)) {
|
|
77
|
-
eachs <-
|
|
77
|
+
eachs <- meta %>%
|
|
78
78
|
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
79
79
|
} else {
|
|
80
|
-
eachs <-
|
|
80
|
+
eachs <- meta %>% filter(!!parse_expr(case$subset)) %>%
|
|
81
81
|
pull(case$each) %>% unique() %>% na.omit() %>% as.vector()
|
|
82
82
|
}
|
|
83
83
|
for (each in eachs) {
|
|
@@ -1,88 +1,124 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
+
from multiprocessing import Pool
|
|
3
|
+
from slugify import slugify
|
|
4
|
+
from simpleconf import Config
|
|
2
5
|
from biopipen.utils.misc import logger, run_command, dict_to_cli_args
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
ncases = {{in.ncases | repr}} # pyright: ignore
|
|
6
|
-
nctrls = {{in.nctrls | repr}} # pyright: ignore
|
|
7
|
+
configfile = {{in.configfile | repr}} # pyright: ignore # noqa: E999
|
|
7
8
|
outdir = {{out.outdir | repr}} # pyright: ignore
|
|
8
9
|
gtmatfile = {{out.gtmat | repr}} # pyright: ignore
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
#
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
10
|
+
config = Config.load(configfile)
|
|
11
|
+
|
|
12
|
+
default_nsnps = {{envs.nsnps | repr}} # pyright: ignore
|
|
13
|
+
default_ncases = {{envs.ncases | repr}} # pyright: ignore
|
|
14
|
+
default_nctrls = {{envs.nctrls | repr}} # pyright: ignore
|
|
15
|
+
default_plink = {{envs.plink | repr}} # pyright: ignore
|
|
16
|
+
default_seed = {{envs.seed | repr}} # pyright: ignore
|
|
17
|
+
default_label = {{envs.label | repr}} # pyright: ignore
|
|
18
|
+
default_prevalence = {{envs.prevalence | repr}} # pyright: ignore
|
|
19
|
+
default_minfreq = {{envs.minfreq | repr}} # pyright: ignore
|
|
20
|
+
default_maxfreq = {{envs.maxfreq | repr}} # pyright: ignore
|
|
21
|
+
default_hetodds = {{envs.hetodds | repr}} # pyright: ignore
|
|
22
|
+
default_homodds = {{envs.homodds | repr}} # pyright: ignore
|
|
23
|
+
default_missing = {{envs.missing | repr}} # pyright: ignore
|
|
24
|
+
default_args = {{envs.args | repr}} # pyright: ignore
|
|
25
|
+
default_transpose_gtmat = {{envs.transpose_gtmat | repr}} # pyright: ignore
|
|
26
|
+
default_sample_prefix = {{envs.sample_prefix | repr}} # pyright: ignore
|
|
27
|
+
|
|
28
|
+
defaults = {
|
|
29
|
+
"nsnps": default_nsnps,
|
|
30
|
+
"ncases": default_ncases,
|
|
31
|
+
"nctrls": default_nctrls,
|
|
32
|
+
"plink": default_plink,
|
|
33
|
+
"seed": default_seed,
|
|
34
|
+
"label": default_label,
|
|
35
|
+
"prevalence": default_prevalence,
|
|
36
|
+
"minfreq": default_minfreq,
|
|
37
|
+
"maxfreq": default_maxfreq,
|
|
38
|
+
"hetodds": default_hetodds,
|
|
39
|
+
"homodds": default_homodds,
|
|
40
|
+
"missing": default_missing,
|
|
41
|
+
# "args": default_args,
|
|
42
|
+
"transpose_gtmat": default_transpose_gtmat,
|
|
43
|
+
"sample_prefix": default_sample_prefix,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
def do_one_simulation(confitems):
|
|
47
|
+
args = default_args.copy()
|
|
48
|
+
args.update(confitems.pop("args", {}))
|
|
49
|
+
confs = defaults.copy()
|
|
50
|
+
confs.update(confitems)
|
|
51
|
+
transpose_gtmat = confs.pop("transpose_gtmat")
|
|
52
|
+
sample_prefix = confs.pop("sample_prefix")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
logger.debug(" Generating parameters file")
|
|
56
|
+
params_file = Path(outdir) / "params.txt"
|
|
57
|
+
params_file.write_text(
|
|
58
|
+
f"{confs['nsnps']}\t{confs['label']}\t{confs['minfreq']}\t"
|
|
59
|
+
f"{confs['maxfreq']}\t{confs['hetodds']}\t{confs['homodds']}\n"
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if confs.get('seed') is not None:
|
|
63
|
+
args["seed"] = confs['seed']
|
|
64
|
+
|
|
65
|
+
args["simulate"] = params_file
|
|
66
|
+
args["out"] = Path(outdir) / "sim_snps"
|
|
67
|
+
args["simulate-ncases"] = confs['ncases']
|
|
68
|
+
args["simulate-ncontrols"] = confs['nctrls']
|
|
69
|
+
args["simulate-prevalence"] = confs['prevalence']
|
|
70
|
+
args["simulate-missing"] = confs['missing']
|
|
71
|
+
|
|
72
|
+
cmd = [confs['plink']] + dict_to_cli_args(args)
|
|
73
|
+
|
|
74
|
+
logger.debug(" Running PLINK simulation ...")
|
|
75
|
+
run_command(cmd, fg=True)
|
|
76
|
+
|
|
77
|
+
# Transpose the genotype matrix
|
|
78
|
+
# CHR SNP (C)M POS COUNTED ALT per0_per0 per1_per1 per2_per2
|
|
79
|
+
# 1 SNP_0 0 1 D d 1 0 1
|
|
80
|
+
# 1 SNP_1 0 2 d D 0 1 0
|
|
81
|
+
# 1 SNP_2 0 3 d D 0 0 0
|
|
82
|
+
# 1 SNP_3 0 4 d D 0 0 0
|
|
83
|
+
# 1 SNP_4 0 5 D d 1 2 1
|
|
84
|
+
cmd = [
|
|
85
|
+
confs['plink'],
|
|
86
|
+
"--recode",
|
|
87
|
+
"A" if transpose_gtmat else "A-transpose",
|
|
88
|
+
"tab",
|
|
89
|
+
"--bfile",
|
|
90
|
+
args["out"],
|
|
91
|
+
"--out",
|
|
92
|
+
gtmatfile + ".plink.recoded",
|
|
93
|
+
]
|
|
94
|
+
logger.debug("- Recoding into genotype matrix ...")
|
|
95
|
+
run_command(cmd, fg=True)
|
|
96
|
+
|
|
97
|
+
logger.debug(" Saving genotype matrix ...")
|
|
98
|
+
## transpose_gtmat = False
|
|
99
|
+
# SNP_COUNTED per0_per0 per1_per1 per2_per2
|
|
100
|
+
# SNP_0_D 1 0 1
|
|
101
|
+
# SNP_1_d 0 1 0
|
|
102
|
+
# SNP_2_d 0 0 0
|
|
103
|
+
# SNP_3_d 0 0 0
|
|
104
|
+
# SNP_4_D 1 2 1
|
|
105
|
+
## transpose_gtmat = True
|
|
106
|
+
# FID_IID SNP_0_D SNP_1_D SNP_2_D
|
|
107
|
+
# per0_per0 0 1 1
|
|
108
|
+
# per1_per1 0 2 0
|
|
109
|
+
# per2_per2 0 0 0
|
|
110
|
+
# per3_per3 1 1 0
|
|
111
|
+
# per4_per4 0 0 0
|
|
112
|
+
if transpose_gtmat:
|
|
113
|
+
cmd = f"cut -f1,2,7- {gtmatfile}.plink.recoded.raw | sed 's/\\t/_/'"
|
|
114
|
+
else:
|
|
115
|
+
cmd = f"cut -f2,5,7- {gtmatfile}.plink.recoded.traw | sed 's/\\t/_/'"
|
|
116
|
+
|
|
117
|
+
if sample_prefix:
|
|
118
|
+
cmd = f"{cmd} | sed 's/per[0-9]\\+_per/{sample_prefix}/g'"
|
|
119
|
+
|
|
120
|
+
cmd = f"{cmd} > {gtmatfile}"
|
|
121
|
+
run_command(cmd, fg=True)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
do_one_simulation(config)
|
|
@@ -42,21 +42,21 @@ diffcoex_score <- function(group) {
|
|
|
42
42
|
|
|
43
43
|
gvals <- unique(gdata[, group, drop = TRUE])
|
|
44
44
|
if (length(gvals) < 2) {
|
|
45
|
-
|
|
45
|
+
log_debug(" Less than 2 groups in the input. Skipping ...")
|
|
46
46
|
return(NULL)
|
|
47
47
|
}
|
|
48
48
|
rs <- lapply(gvals, function(gval) {
|
|
49
49
|
samples <- rownames(gdata[gdata[[group]] == gval, , drop = FALSE])
|
|
50
50
|
expr <- indata[samples, , drop = FALSE]
|
|
51
51
|
if (length(samples) < 3) {
|
|
52
|
-
|
|
52
|
+
log_debug(" Less than 3 samples in one of the groups. Skipping ...")
|
|
53
53
|
return(NULL)
|
|
54
54
|
}
|
|
55
55
|
cor.pairs(as.matrix(expr), cor.method = method)
|
|
56
56
|
})
|
|
57
57
|
rs[sapply(rs, is.null)] <- NULL
|
|
58
58
|
if (length(rs) < 2) {
|
|
59
|
-
|
|
59
|
+
log_debug(" Less than 2 groups with at least 3 samples. Skipping ...")
|
|
60
60
|
return(NULL)
|
|
61
61
|
}
|
|
62
62
|
N <- length(rs)
|
|
@@ -26,6 +26,7 @@ section <- {{ envs.section | r }}
|
|
|
26
26
|
mutaters <- {{ envs.mutaters | r }}
|
|
27
27
|
subset <- {{ envs.subset | r }}
|
|
28
28
|
prefix <- {{ envs.prefix | r }}
|
|
29
|
+
upset_ymax <- {{ envs.upset_ymax | r }}
|
|
29
30
|
upset_trans <- {{ envs.upset_trans | r }}
|
|
30
31
|
cases <- {{ envs.cases | r }}
|
|
31
32
|
|
|
@@ -40,6 +41,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
40
41
|
order = sample_order,
|
|
41
42
|
subset = subset,
|
|
42
43
|
section = section,
|
|
44
|
+
upset_ymax = upset_ymax,
|
|
43
45
|
upset_trans = upset_trans
|
|
44
46
|
)
|
|
45
47
|
)
|
|
@@ -50,6 +52,7 @@ if (is.null(cases) || length(cases) == 0) {
|
|
|
50
52
|
cases[[key]]$order <- cases[[key]]$order %||% sample_order
|
|
51
53
|
cases[[key]]$section <- cases[[key]]$section %||% section
|
|
52
54
|
cases[[key]]$subset <- cases[[key]]$subset %||% subset
|
|
55
|
+
cases[[key]]$upset_ymax <- cases[[key]]$upset_ymax %||% upset_ymax
|
|
53
56
|
cases[[key]]$upset_trans <- cases[[key]]$upset_trans %||% upset_trans
|
|
54
57
|
}
|
|
55
58
|
}
|
|
@@ -320,7 +323,7 @@ plot_venndg <- function(counts, groups, singletons) {
|
|
|
320
323
|
venn_p
|
|
321
324
|
}
|
|
322
325
|
|
|
323
|
-
plot_upset <- function(counts, singletons, upset_trans) {
|
|
326
|
+
plot_upset <- function(counts, singletons, upset_ymax, upset_trans) {
|
|
324
327
|
|
|
325
328
|
cnts <- column_to_rownames(counts, "CDR3.aa") %>%
|
|
326
329
|
mutate(across(everything(), ~ as.integer(as.logical(.x))))
|
|
@@ -345,12 +348,21 @@ plot_upset <- function(counts, singletons, upset_trans) {
|
|
|
345
348
|
geom_text(
|
|
346
349
|
aes(label = ..count.., vjust = ifelse(..type == "Multiplets", -0.25, +1.25)),
|
|
347
350
|
stat = "count", position = "stack", size = 2.8)
|
|
351
|
+
if (!is.null(upset_ymax)) {
|
|
352
|
+
p <- p + ylim(0, upset_ymax)
|
|
353
|
+
}
|
|
348
354
|
} else {
|
|
349
355
|
p <- p + geom_bar(stat = "count", position = "dodge2") +
|
|
350
356
|
geom_text(
|
|
351
357
|
aes(label = ..count..),
|
|
352
|
-
stat = "count", position = position_dodge(width = 0.9), vjust = -0.25, size = 2.5)
|
|
353
|
-
|
|
358
|
+
stat = "count", position = position_dodge(width = 0.9), vjust = -0.25, size = 2.5)
|
|
359
|
+
|
|
360
|
+
# limit the y and do log10 transformation
|
|
361
|
+
if (!is.null(upset_ymax)) {
|
|
362
|
+
p <- p + scale_y_continuous(trans = "log10", limits = c(1, upset_ymax))
|
|
363
|
+
} else {
|
|
364
|
+
p <- p + scale_y_continuous(trans = "log10")
|
|
365
|
+
}
|
|
354
366
|
}
|
|
355
367
|
|
|
356
368
|
upset(
|
|
@@ -519,7 +531,7 @@ handle_subject <- function(i, subjects, casename, case) {
|
|
|
519
531
|
upset_dir <- file.path(casedir, "upset")
|
|
520
532
|
upset_png <- file.path(upset_dir, paste0("upset_", slugify(subject), ".png"))
|
|
521
533
|
png(upset_png, res = 100, height = 600, width = 800)
|
|
522
|
-
print(plot_upset(counts, singletons, case$upset_trans))
|
|
534
|
+
print(plot_upset(counts, singletons, case$upset_ymax, case$upset_trans))
|
|
523
535
|
dev.off()
|
|
524
536
|
|
|
525
537
|
h <- headings(case$section, casename, "Overlapping Clones (UpSet Plots)")
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import rtoml
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from tempfile import gettempdir
|
|
8
|
+
from biopipen.utils.misc import logger, run_command
|
|
9
|
+
|
|
10
|
+
configfile = {{in.configfile | repr}} # pyright: ignore
|
|
11
|
+
outdir = Path({{out.outdir | repr}}) # pyright: ignore
|
|
12
|
+
envs = {{envs | dict | repr}} # pyright: ignore
|
|
13
|
+
python = sys.executable
|
|
14
|
+
|
|
15
|
+
args = envs.copy()
|
|
16
|
+
config = rtoml.load(Path(configfile))
|
|
17
|
+
args.update(config)
|
|
18
|
+
model_name = args.pop("model_name")
|
|
19
|
+
model_file = Path(args.pop("model_file"))
|
|
20
|
+
data_dir = args.pop("data_dir", None)
|
|
21
|
+
tcrdock = args.pop("tcrdock", None)
|
|
22
|
+
tmpdir = args.pop("tmpdir", gettempdir())
|
|
23
|
+
python = args.pop("python", python)
|
|
24
|
+
|
|
25
|
+
if not isinstance(python, (list, tuple)):
|
|
26
|
+
python = [python]
|
|
27
|
+
|
|
28
|
+
if not data_dir:
|
|
29
|
+
raise ValueError("`envs.data_dir` is required")
|
|
30
|
+
|
|
31
|
+
if not tcrdock:
|
|
32
|
+
logger.info("- `envs.tcrdock` is not provided, cloning the repository ... ")
|
|
33
|
+
repo_url = "https://github.com/phbradley/TCRdock"
|
|
34
|
+
commit_id = "c5a7af42eeb0c2a4492a4d4fe803f1f9aafb6193"
|
|
35
|
+
branch = "main"
|
|
36
|
+
|
|
37
|
+
from git import Repo
|
|
38
|
+
repo = Repo.clone_from(repo_url, tmpdir, branch=branch, no_checkout=True)
|
|
39
|
+
repo.git.checkout(commit_id)
|
|
40
|
+
tcrdock = Path(tmpdir) / "TCRdock"
|
|
41
|
+
|
|
42
|
+
logger.info("- Running download_blast.py ...")
|
|
43
|
+
cmd = [
|
|
44
|
+
*python,
|
|
45
|
+
tcrdock / "download_blast.py",
|
|
46
|
+
]
|
|
47
|
+
run_command(cmd, fg=True, cwd=str(tcrdock))
|
|
48
|
+
|
|
49
|
+
if not model_file.is_absolute():
|
|
50
|
+
model_file = Path(data_dir) / "params" / model_file
|
|
51
|
+
|
|
52
|
+
os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'
|
|
53
|
+
os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '4.0'
|
|
54
|
+
|
|
55
|
+
logger.info("- Composing targets file ... ")
|
|
56
|
+
targets_file = outdir / "user_targets.tsv"
|
|
57
|
+
targets = pd.DataFrame(
|
|
58
|
+
[
|
|
59
|
+
dict(
|
|
60
|
+
organism=args['organism'],
|
|
61
|
+
mhc_class=args['mhc_class'],
|
|
62
|
+
mhc=args['mhc'],
|
|
63
|
+
peptide=args['peptide'],
|
|
64
|
+
va=args['va'],
|
|
65
|
+
ja=args['ja'],
|
|
66
|
+
cdr3a=args['cdr3a'],
|
|
67
|
+
vb=args['vb'],
|
|
68
|
+
jb=args['jb'],
|
|
69
|
+
cdr3b=args['cdr3b'],
|
|
70
|
+
)
|
|
71
|
+
]
|
|
72
|
+
)
|
|
73
|
+
targets.to_csv(targets_file, sep="\t", index=False)
|
|
74
|
+
|
|
75
|
+
logger.info("- Generating inputs for AlphaFold modeling ... ")
|
|
76
|
+
cmd = [
|
|
77
|
+
*python,
|
|
78
|
+
tcrdock + "/setup_for_alphafold.py",
|
|
79
|
+
"--targets_tsvfile", targets_file,
|
|
80
|
+
"--output_dir", outdir / "user_output",
|
|
81
|
+
"--new_docking",
|
|
82
|
+
]
|
|
83
|
+
run_command(cmd, fg=True)
|
|
84
|
+
|
|
85
|
+
logger.info("- Running AlphaFold modeling ... ")
|
|
86
|
+
cmd = [
|
|
87
|
+
*python,
|
|
88
|
+
tcrdock + "/run_prediction.py",
|
|
89
|
+
"--verbose",
|
|
90
|
+
"--targets", outdir / "user_output/targets.tsv",
|
|
91
|
+
"--outfile_prefix", f"{outdir}/{args['peptide']}",
|
|
92
|
+
"--model_names", model_name,
|
|
93
|
+
"--data_dir", data_dir,
|
|
94
|
+
"--model_params_files", model_file,
|
|
95
|
+
]
|
|
96
|
+
run_command(cmd, fg=True, env={"XLA_FLAGS": "--xla_gpu_force_compilation_parallelism=1"})
|
|
97
|
+
|
|
98
|
+
logger.info("- Calculating the PAE ... ")
|
|
99
|
+
cmd = [
|
|
100
|
+
*python,
|
|
101
|
+
tcrdock + "/add_pmhc_tcr_pae_to_tsvfile.py",
|
|
102
|
+
"--infile", f"{outdir}/{args['peptide']}_final.tsv",
|
|
103
|
+
"--outfile", f"{outdir}/{args['peptide']}_w_pae.tsv",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
run_command(cmd, fg=True)
|
biopipen/utils/misc.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
|
+
import os
|
|
4
5
|
import sys
|
|
5
6
|
import logging
|
|
6
7
|
from typing import List
|
|
7
8
|
from biopipen.core.filters import dict_to_cli_args # noqa: F401
|
|
8
9
|
|
|
9
10
|
logger = logging.getLogger("biopipen_job")
|
|
10
|
-
logger.setLevel(logging.
|
|
11
|
+
logger.setLevel(logging.DEBUG)
|
|
11
12
|
_handler = logging.StreamHandler(sys.stdout)
|
|
12
13
|
# Use same log format as in R
|
|
13
14
|
# {sprintf("%-7s", level)} [{format(time, "%Y-%m-%d %H:%M:%S")}] {msg}
|
|
@@ -100,6 +101,9 @@ def run_command(
|
|
|
100
101
|
kwargs["stderr"] = sys.stderr
|
|
101
102
|
kwargs["universal_newlines"] = True
|
|
102
103
|
|
|
104
|
+
if "env" in kwargs:
|
|
105
|
+
kwargs["env"] = {**os.environ, **kwargs["env"]}
|
|
106
|
+
|
|
103
107
|
try:
|
|
104
108
|
p = Popen(cmd, **kwargs)
|
|
105
109
|
except Exception as e:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
biopipen/__init__.py,sha256=
|
|
1
|
+
biopipen/__init__.py,sha256=_CerHyxnUJ2hU0sB7noT3JBjjI0ohEM5boAPBoFxOg4,23
|
|
2
2
|
biopipen/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
biopipen/core/config.py,sha256=edK5xnDhM8j27srDzsxubi934NMrglLoKrdcC8qsEPk,1069
|
|
4
4
|
biopipen/core/config.toml,sha256=20RCI30Peee1EQdfb_UbV3Hf74XUPndJnYZlUThytsw,1781
|
|
@@ -23,10 +23,10 @@ biopipen/ns/plot.py,sha256=yguxmErUOH-hOM10JfuI_sXw2p49XF8yGR_gXfbd5yQ,4066
|
|
|
23
23
|
biopipen/ns/rnaseq.py,sha256=bKAa6friFWof4yDTWZQahm1MS-lrdetO1GqDKdfxXYc,7708
|
|
24
24
|
biopipen/ns/scrna.py,sha256=eLCXGyVcgq3vQ-br5SFHHmfIVPaJN4kSFxtCiJiYamg,102716
|
|
25
25
|
biopipen/ns/scrna_metabolic_landscape.py,sha256=9s1NvH3aMaNDXyfwy9TdzGcSP_lIW4JqhLgknNZcIKE,28313
|
|
26
|
-
biopipen/ns/snp.py,sha256=
|
|
26
|
+
biopipen/ns/snp.py,sha256=Nq20NJzQ9YiqE9mhtCUH6dfs7528o1e4N-j9PewjAsQ,3016
|
|
27
27
|
biopipen/ns/stats.py,sha256=yJ6C1CXF84T7DDs9mgufqUOr89Rl6kybE5ji8Vnx6cw,13693
|
|
28
28
|
biopipen/ns/tcgamaf.py,sha256=AFbUJIxiMSvsVY3RcHgjRFuMnNh2DG3Mr5slLNEyz6o,1455
|
|
29
|
-
biopipen/ns/tcr.py,sha256=
|
|
29
|
+
biopipen/ns/tcr.py,sha256=5bMnxhbeB08UrAw8YSh2BkA3AUFeoOajhE6DhHt74K4,87863
|
|
30
30
|
biopipen/ns/vcf.py,sha256=cdkKroii0_nl_bSP2cnO09qESUAhHqu6btOiTSKS79Y,15314
|
|
31
31
|
biopipen/ns/web.py,sha256=3zucrDo-IVsSnIvlw-deoScuxqWa6OMTm8Vo-R4E44Q,2224
|
|
32
32
|
biopipen/reports/bam/CNAClinic.svelte,sha256=D4IxQcgDCPQZMbXog-aZP5iJEQTK2N4i0C60e_iXyfs,213
|
|
@@ -132,7 +132,7 @@ biopipen/scripts/scrna/ExprImpution.R,sha256=7768ezrr59xUZDXq8lO9jj2XhnkSsx-xxBm
|
|
|
132
132
|
biopipen/scripts/scrna/MarkersFinder.R,sha256=TvLVozCsgL_R-EMW7SbkCRdpchxt9k7Ewwz5nb3TOYo,22172
|
|
133
133
|
biopipen/scripts/scrna/MetaMarkers.R,sha256=J__ZZ4K4P-Jdty1lZhRldu4rAErLxMtDZkRUlOqZea4,10852
|
|
134
134
|
biopipen/scripts/scrna/ModuleScoreCalculator.R,sha256=JSHd-_-KiFqW8avCGxgU4T-C5BtDr2u0kwIvEu2lFIg,4188
|
|
135
|
-
biopipen/scripts/scrna/RadarPlots.R,sha256=
|
|
135
|
+
biopipen/scripts/scrna/RadarPlots.R,sha256=TGPUTUcHOHgd9rsNtLYT-N6WHiFNDBZsiIoqkyAJh0A,13020
|
|
136
136
|
biopipen/scripts/scrna/SCImpute.R,sha256=dSJOHhmJ3x_72LBRXT72dbCti5oiB85CJ-OjWtqONbk,2958
|
|
137
137
|
biopipen/scripts/scrna/ScFGSEA.R,sha256=M6YeqUNa_0bq1qmL8dutQR3o5v2jy_gICCLaWw5c3A4,5738
|
|
138
138
|
biopipen/scripts/scrna/Seurat2AnnData.R,sha256=qz4u-B5J3GMwttubnNnByJXreziFbrP5Mak0L0q7eG0,1557
|
|
@@ -160,9 +160,9 @@ biopipen/scripts/scrna_metabolic_landscape/MetabolicFeatures.R,sha256=b77yG5FeRs
|
|
|
160
160
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicFeaturesIntraSubset.R,sha256=ic8Fy8QqYDGh_izmvZVJ3KL66podg_CSF5ITL3FZsvo,5196
|
|
161
161
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayActivity.R,sha256=95DLX1Rz0tobOuDZ8V9YdGgO0KiNthhccoeeOK21tno,16216
|
|
162
162
|
biopipen/scripts/scrna_metabolic_landscape/MetabolicPathwayHeterogeneity.R,sha256=rQ9iwGh9FNRZlJJzM4QItdyXmebfzLAq05ZAjb1kGUw,9831
|
|
163
|
-
biopipen/scripts/snp/PlinkSimulation.py,sha256=
|
|
163
|
+
biopipen/scripts/snp/PlinkSimulation.py,sha256=mSSoGGG6sbEPBcUGdHgbebUrg4DiHeyNyc7jLPjV5pY,4169
|
|
164
164
|
biopipen/scripts/stats/ChowTest.R,sha256=4p7NULmfOZSfeBSQ04els0h3cXOK5yeCJJ4-gEBPOGk,3617
|
|
165
|
-
biopipen/scripts/stats/DiffCoexpr.R,sha256=
|
|
165
|
+
biopipen/scripts/stats/DiffCoexpr.R,sha256=5hQDV2_7bKdKUsOGMZUa0GS5rc7kFspxonNyFEPmtbc,4516
|
|
166
166
|
biopipen/scripts/stats/LiquidAssoc.R,sha256=s-XJbFoOfH4eWSkxbbOSHZ1x16lY0Sdod_V1KvSkM8k,3727
|
|
167
167
|
biopipen/scripts/stats/MetaPvalue.R,sha256=c26lYC4rxQ3D7vRvsXJ4_M-QIYTDTV8AEjXrag2_srU,3957
|
|
168
168
|
biopipen/scripts/tcgamaf/Maf2Vcf.py,sha256=Cxh7fiSNCxWDTfIJqZDOOnaSrw-85S_fH2U-PWY03hc,704
|
|
@@ -170,7 +170,7 @@ biopipen/scripts/tcgamaf/MafAddChr.py,sha256=V10HMisl12O3ZfXuRmFNdy5p-3mr43WCvy0
|
|
|
170
170
|
biopipen/scripts/tcgamaf/maf2vcf.pl,sha256=hJKcH-NbgWK6fmK7f3qex7ozJJl-PqCNPXqpwfcHwJg,22707
|
|
171
171
|
biopipen/scripts/tcr/Attach2Seurat.R,sha256=C91TAh1cLSxWkdFPf84pbxlpTYMuWq_rduG4eiIkXZI,1345
|
|
172
172
|
biopipen/scripts/tcr/CDR3AAPhyschem.R,sha256=-0BS6cdt5GfQJphA3HlDgGjWr4XFF-7INLJyMBHQNAc,16628
|
|
173
|
-
biopipen/scripts/tcr/CloneResidency.R,sha256=
|
|
173
|
+
biopipen/scripts/tcr/CloneResidency.R,sha256=nFPPPknJPEX-RU16uqQZzYMmJqmWqUAun_FI8GpJ7iw,21520
|
|
174
174
|
biopipen/scripts/tcr/CloneSizeQQPlot.R,sha256=5FPfWQjxTsv59KSDQaDWj3C95zPQMngKG7qOf95NEzI,4527
|
|
175
175
|
biopipen/scripts/tcr/GIANA/GIANA.py,sha256=0qLhgCWxT8K-4JvORA03CzBPTT5pd4Di5B_DgrHXbFA,47198
|
|
176
176
|
biopipen/scripts/tcr/GIANA/GIANA4.py,sha256=Z7Q3cUr1Pvmy4CFADN0P7i9g1-HbzWROMqk5HvL_F1Q,45762
|
|
@@ -193,6 +193,7 @@ biopipen/scripts/tcr/ImmunarchSplitIdents.R,sha256=FGCeGV0uSmFU91lKkldUAeV4A2m3h
|
|
|
193
193
|
biopipen/scripts/tcr/SampleDiversity.R,sha256=jQ1OU3b8vswD8tZhLt3fkcqJKrl2bhQX0giHM2rXz3Y,2643
|
|
194
194
|
biopipen/scripts/tcr/TCRClusterStats.R,sha256=D7q1svXQxl1uOya8bePvR9e6NJXjCjXbPsXnEPTWdlE,12004
|
|
195
195
|
biopipen/scripts/tcr/TCRClustering.R,sha256=eflUsYfq4aEaX9BVL0MiB7lNlot_L-8VaReK516go84,9236
|
|
196
|
+
biopipen/scripts/tcr/TCRDock.py,sha256=jjzxMWp-hs0LDtA1mVbiWDvUieSO7X-F9yeKGy1LSTM,3026
|
|
196
197
|
biopipen/scripts/tcr/TESSA.R,sha256=bfOixWLZy8yi0MzXncP67KjtCukwXEzsK5fCdMzB5VM,6822
|
|
197
198
|
biopipen/scripts/tcr/TESSA_source/Atchley_factors.csv,sha256=SumqDOqP67P54uM7Cuc5_O_rySTWcGo7eX3psMSPX9s,763
|
|
198
199
|
biopipen/scripts/tcr/TESSA_source/BriseisEncoder.py,sha256=z4_Q_6StymffuUGGjHP1-B3aTsXtamKao5Q1-Kg9has,6831
|
|
@@ -230,14 +231,14 @@ biopipen/utils/gene.py,sha256=qE_BqTayrJWxRdniffhcz6OhZcw9GUoOrj2EtFWH9Gw,2246
|
|
|
230
231
|
biopipen/utils/gsea.R,sha256=UMQOlWGstQTOBScvy1wIzrB7I3CE28Xo2v1sy4lmJ-M,7549
|
|
231
232
|
biopipen/utils/io.R,sha256=jIYdqdn0iRWfQYAZa5CjXi3fikqmYvPPLIXhobRe8sw,537
|
|
232
233
|
biopipen/utils/misc.R,sha256=jXusPDCxSIaYRq_qm4khUsu9nyMhbpBVcj8BVn4j8Ic,10629
|
|
233
|
-
biopipen/utils/misc.py,sha256=
|
|
234
|
+
biopipen/utils/misc.py,sha256=KJziAFY4Kl-0ZsO93vteY9gRLZg9BSYig-TDocHY36k,3601
|
|
234
235
|
biopipen/utils/mutate_helpers.R,sha256=Bqy6Oi4rrPEPJw0Jq32bVAwwBfZv7JJL9jFcK5x-cek,17649
|
|
235
236
|
biopipen/utils/plot.R,sha256=pzl37PomNeUZPxohHZ2w93j3Fc4T0Qrc62FF-9MTKdw,4417
|
|
236
237
|
biopipen/utils/reference.py,sha256=6bPSwQa-GiDfr7xLR9a5T64Ey40y24yn3QfQ5wDFZkU,4420
|
|
237
238
|
biopipen/utils/rnaseq.R,sha256=Ro2B2dG-Z2oVaT5tkwp9RHBz4dp_RF-JcizlM5GYXFs,1298
|
|
238
239
|
biopipen/utils/single_cell.R,sha256=pJjYP8bIZpNAtTQ32rOXhZxaM1Y-6D-xUcK3pql9tbk,4316
|
|
239
240
|
biopipen/utils/vcf.py,sha256=ajXs0M_QghEctlvUlSRjWQIABVF02wPdYd-0LP4mIsU,9377
|
|
240
|
-
biopipen-0.27.
|
|
241
|
-
biopipen-0.27.
|
|
242
|
-
biopipen-0.27.
|
|
243
|
-
biopipen-0.27.
|
|
241
|
+
biopipen-0.27.2.dist-info/METADATA,sha256=G3Cij0e6iDpAZ87kUeVZabL8MHa689YLGzIFIjaLmG4,878
|
|
242
|
+
biopipen-0.27.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
243
|
+
biopipen-0.27.2.dist-info/entry_points.txt,sha256=wu70aoBcv1UahVbB_5237MY-9M9_mzqmWjDD-oi3yz0,621
|
|
244
|
+
biopipen-0.27.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|