gemmi-protools 0.1.17__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/__init__.py +1 -4
- gemmi_protools/io/convert.py +0 -3
- gemmi_protools/io/reader.py +749 -310
- gemmi_protools/{utils → tools}/align.py +38 -55
- gemmi_protools/tools/dockq.py +127 -0
- gemmi_protools/tools/mesh.py +95 -0
- gemmi_protools/{utils → tools}/pdb_annot.py +21 -106
- {gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.0.dist-info}/METADATA +14 -11
- gemmi_protools-1.0.0.dist-info/RECORD +19 -0
- gemmi_protools/io/cif_opts.py +0 -173
- gemmi_protools/io/parse_pdb_header.py +0 -387
- gemmi_protools/io/parser.py +0 -292
- gemmi_protools/io/pdb_opts.py +0 -179
- gemmi_protools/io/peptide.py +0 -32
- gemmi_protools/io/struct_info.py +0 -91
- gemmi_protools/utils/dockq.py +0 -139
- gemmi_protools/utils/fixer.py +0 -274
- gemmi_protools/utils/immune_complex.py +0 -787
- gemmi_protools/utils/ppi.py +0 -74
- gemmi_protools-0.1.17.dist-info/RECORD +0 -27
- /gemmi_protools/{utils → tools}/__init__.py +0 -0
- {gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.0.dist-info}/WHEEL +0 -0
- {gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-0.1.17.dist-info → gemmi_protools-1.0.0.dist-info}/top_level.txt +0 -0
gemmi_protools/utils/dockq.py
DELETED
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
@Author: Luo Jiejian
|
|
3
|
-
"""
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
import pathlib
|
|
7
|
-
import shutil
|
|
8
|
-
import subprocess
|
|
9
|
-
import tempfile
|
|
10
|
-
import uuid
|
|
11
|
-
from typing import Optional, Union
|
|
12
|
-
|
|
13
|
-
import pandas as pd
|
|
14
|
-
from typeguard import typechecked
|
|
15
|
-
|
|
16
|
-
from gemmi_protools.io.reader import StructureParser
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@typechecked
|
|
20
|
-
def _read_model(model_file: Union[str, pathlib.Path]):
|
|
21
|
-
st = StructureParser()
|
|
22
|
-
st.load_from_file(model_file)
|
|
23
|
-
st.set_default_model()
|
|
24
|
-
return st
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
@typechecked
|
|
28
|
-
def dockq_score(query_model: Union[str, pathlib.Path],
|
|
29
|
-
native_model: Union[str, pathlib.Path],
|
|
30
|
-
mapping: Optional[str] = None):
|
|
31
|
-
dockq_program = shutil.which("DockQ")
|
|
32
|
-
if dockq_program is None:
|
|
33
|
-
raise RuntimeError("DockQ is need")
|
|
34
|
-
|
|
35
|
-
q_st = _read_model(query_model)
|
|
36
|
-
n_st = _read_model(native_model)
|
|
37
|
-
|
|
38
|
-
tmp_dir = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
|
|
39
|
-
os.makedirs(tmp_dir)
|
|
40
|
-
|
|
41
|
-
result_file = os.path.join(tmp_dir, "result.json")
|
|
42
|
-
q_file = os.path.join(tmp_dir, "q.pdb")
|
|
43
|
-
n_file = os.path.join(tmp_dir, "n.pdb")
|
|
44
|
-
q_st.to_pdb(q_file, write_minimal_pdb=True)
|
|
45
|
-
n_st.to_pdb(n_file, write_minimal_pdb=True)
|
|
46
|
-
if mapping is None:
|
|
47
|
-
cid = "".join(n_st.chain_ids)
|
|
48
|
-
mapping = cid + ":" + cid
|
|
49
|
-
|
|
50
|
-
_command = "%s --mapping %s --json %s %s %s" % (dockq_program, mapping, result_file, q_file, n_file)
|
|
51
|
-
metrics = ['DockQ', 'F1', 'chain1', 'chain2']
|
|
52
|
-
|
|
53
|
-
try:
|
|
54
|
-
_ = subprocess.run(_command, shell=True, check=True,
|
|
55
|
-
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
56
|
-
timeout=300.0)
|
|
57
|
-
except subprocess.CalledProcessError as e:
|
|
58
|
-
# Handle errors in the called executable
|
|
59
|
-
msg = e.stderr.decode()
|
|
60
|
-
outputs = pd.DataFrame(columns=metrics)
|
|
61
|
-
except Exception as e:
|
|
62
|
-
# Handle other exceptions such as file not found or permissions issues
|
|
63
|
-
msg = str(e)
|
|
64
|
-
outputs = pd.DataFrame(columns=metrics)
|
|
65
|
-
else:
|
|
66
|
-
with open(result_file, "r") as fin:
|
|
67
|
-
vals = json.load(fin)
|
|
68
|
-
msg = "Finished"
|
|
69
|
-
result = []
|
|
70
|
-
for v in vals["best_result"].values():
|
|
71
|
-
result.append(v)
|
|
72
|
-
outputs = pd.DataFrame(result)[metrics]
|
|
73
|
-
finally:
|
|
74
|
-
if os.path.isdir(tmp_dir):
|
|
75
|
-
shutil.rmtree(tmp_dir)
|
|
76
|
-
|
|
77
|
-
return dict(value=outputs,
|
|
78
|
-
msg=msg,
|
|
79
|
-
mapping=mapping,
|
|
80
|
-
model=query_model,
|
|
81
|
-
native=native_model
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def dockq_score_interface(query_model: Union[str, pathlib.Path],
|
|
86
|
-
native_model: Union[str, pathlib.Path],
|
|
87
|
-
chains_a: str,
|
|
88
|
-
chains_b: str):
|
|
89
|
-
ppi_if = chains_a + "@" + chains_b
|
|
90
|
-
chs_a = list(chains_a)
|
|
91
|
-
chs_b = list(chains_b)
|
|
92
|
-
|
|
93
|
-
# if multiple chains, merge to one
|
|
94
|
-
q_st = _read_model(query_model)
|
|
95
|
-
n_st = _read_model(native_model)
|
|
96
|
-
|
|
97
|
-
for c in chs_a + chs_b:
|
|
98
|
-
if c not in q_st.chain_ids:
|
|
99
|
-
raise RuntimeError("Chain %s is not in the query model: %s" % (c, query_model))
|
|
100
|
-
|
|
101
|
-
for c in chs_a + chs_b:
|
|
102
|
-
if c not in n_st.chain_ids:
|
|
103
|
-
raise RuntimeError("Chain %s is not in the native model: %s" % (c, native_model))
|
|
104
|
-
|
|
105
|
-
if len(chs_a) > 1:
|
|
106
|
-
q_st.merge_chains(chs_a)
|
|
107
|
-
n_st.merge_chains(chs_a)
|
|
108
|
-
|
|
109
|
-
if len(chs_b) > 1:
|
|
110
|
-
q_st.merge_chains(chs_b)
|
|
111
|
-
n_st.merge_chains(chs_b)
|
|
112
|
-
|
|
113
|
-
tmp_dir = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
|
|
114
|
-
os.makedirs(tmp_dir)
|
|
115
|
-
|
|
116
|
-
q_file = os.path.join(tmp_dir, "qm.pdb")
|
|
117
|
-
n_file = os.path.join(tmp_dir, "nm.pdb")
|
|
118
|
-
q_st.to_pdb(q_file, write_minimal_pdb=True)
|
|
119
|
-
n_st.to_pdb(n_file, write_minimal_pdb=True)
|
|
120
|
-
|
|
121
|
-
chs = chs_a[0] + chs_b[0]
|
|
122
|
-
result = dockq_score(q_file, n_file, mapping="%s:%s" % (chs, chs))
|
|
123
|
-
|
|
124
|
-
if len(result["value"]) > 0:
|
|
125
|
-
q_score = round(result["value"].iloc[0]["DockQ"], 4)
|
|
126
|
-
f1 = round(result["value"].iloc[0]["F1"], 4)
|
|
127
|
-
else:
|
|
128
|
-
q_score = ""
|
|
129
|
-
f1 = ""
|
|
130
|
-
|
|
131
|
-
if os.path.isdir(tmp_dir):
|
|
132
|
-
shutil.rmtree(tmp_dir)
|
|
133
|
-
|
|
134
|
-
return dict(DockQ=q_score,
|
|
135
|
-
F1=f1,
|
|
136
|
-
interface=ppi_if,
|
|
137
|
-
model=query_model,
|
|
138
|
-
native=native_model
|
|
139
|
-
)
|
gemmi_protools/utils/fixer.py
DELETED
|
@@ -1,274 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
@Author: Luo Jiejian
|
|
3
|
-
@Date: 2025/1/21
|
|
4
|
-
"""
|
|
5
|
-
import gzip
|
|
6
|
-
import io
|
|
7
|
-
import os
|
|
8
|
-
import pathlib
|
|
9
|
-
import re
|
|
10
|
-
import shutil
|
|
11
|
-
import subprocess
|
|
12
|
-
import time
|
|
13
|
-
import uuid
|
|
14
|
-
from typing import Union
|
|
15
|
-
|
|
16
|
-
import openmm
|
|
17
|
-
import pdbfixer
|
|
18
|
-
from openmm import app
|
|
19
|
-
from typeguard import typechecked
|
|
20
|
-
|
|
21
|
-
from gemmi_protools import StructureParser
|
|
22
|
-
from gemmi_protools.io.cif_opts import _is_cif
|
|
23
|
-
from gemmi_protools.io.pdb_opts import _is_pdb
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@typechecked
|
|
27
|
-
def _load_by_pbdfixer(path: Union[str, pathlib.Path], cpu_platform=True) -> pdbfixer.PDBFixer:
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
path:
|
|
32
|
-
cpu_platform: default True, if False, auto select platform
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
|
|
36
|
-
"""
|
|
37
|
-
if cpu_platform:
|
|
38
|
-
platform = openmm.Platform.getPlatformByName('CPU')
|
|
39
|
-
else:
|
|
40
|
-
platform = None
|
|
41
|
-
|
|
42
|
-
cur_path = pathlib.Path(path)
|
|
43
|
-
if _is_pdb(path) or _is_cif(path):
|
|
44
|
-
s1 = cur_path.suffixes[-1]
|
|
45
|
-
s2 = "".join(cur_path.suffixes[-2:])
|
|
46
|
-
|
|
47
|
-
if s1 in [".pdb", ".cif"]:
|
|
48
|
-
# s1 suffix
|
|
49
|
-
fixer = pdbfixer.PDBFixer(filename=path, platform=platform)
|
|
50
|
-
else:
|
|
51
|
-
# s2 suffix
|
|
52
|
-
with gzip.open(path, "rb") as gz_handle:
|
|
53
|
-
with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
|
|
54
|
-
if s2 == ".pdb.gz":
|
|
55
|
-
fixer = pdbfixer.PDBFixer(pdbfile=text_io, platform=platform)
|
|
56
|
-
else:
|
|
57
|
-
fixer = pdbfixer.PDBFixer(pdbxfile=text_io, platform=platform)
|
|
58
|
-
else:
|
|
59
|
-
raise ValueError("Only support .cif, .cif.gz, .pdb or .pdb.gz file, but got %s" % path)
|
|
60
|
-
return fixer
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
@typechecked
|
|
64
|
-
def clean_structure(input_file: Union[str, pathlib.Path],
|
|
65
|
-
output_file: Union[str, pathlib.Path],
|
|
66
|
-
add_missing_residue: bool = False,
|
|
67
|
-
add_missing_atoms: str = "heavy",
|
|
68
|
-
keep_heterogens: str = "all",
|
|
69
|
-
replace_nonstandard: bool = True,
|
|
70
|
-
ph: Union[float, int] = 7.0,
|
|
71
|
-
cpu_platform=True,
|
|
72
|
-
clean_connect=True
|
|
73
|
-
):
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
:param input_file: str, Input structure file, support file format .cif, .cif.gz, .pdb or .pdb.gz
|
|
77
|
-
:param output_file: str, Output structure file, support file format .cif, .pdb
|
|
78
|
-
:param add_missing_residue: default False
|
|
79
|
-
:param add_missing_atoms: default heavy, accepted values 'all', 'heavy', 'hydrogen', 'none'
|
|
80
|
-
all: add missing heavy and hydrogen atoms
|
|
81
|
-
heavy: add missing heavy atoms only
|
|
82
|
-
hydrogen: add missing hydrogen atoms only
|
|
83
|
-
none: not add missing atoms
|
|
84
|
-
|
|
85
|
-
:param keep_heterogens: default all, accepted values 'all', 'water', 'none'
|
|
86
|
-
all: keep all heterogens
|
|
87
|
-
water: only keep water
|
|
88
|
-
none: remove all heterogens
|
|
89
|
-
:param replace_nonstandard: default True, replace all non-standard residues to standard ones
|
|
90
|
-
:param ph: default 7.0, ph values to add missing hydrogen atoms
|
|
91
|
-
:param cpu_platform: default True to use CPU platform, if False, auto select platform
|
|
92
|
-
:param clean_connect: default True to clean CONECT lines in output pdb
|
|
93
|
-
|
|
94
|
-
:return:
|
|
95
|
-
str, status message of fixing
|
|
96
|
-
if successful, return Finish, otherwise message of error
|
|
97
|
-
"""
|
|
98
|
-
assert add_missing_atoms in ['all', 'heavy', 'hydrogen', 'none']
|
|
99
|
-
assert keep_heterogens in ['all', 'water', 'none']
|
|
100
|
-
|
|
101
|
-
try:
|
|
102
|
-
######################################################
|
|
103
|
-
# load structure
|
|
104
|
-
######################################################
|
|
105
|
-
fixer = _load_by_pbdfixer(input_file, cpu_platform)
|
|
106
|
-
|
|
107
|
-
######################################################
|
|
108
|
-
# check
|
|
109
|
-
######################################################
|
|
110
|
-
fixer.findMissingResidues()
|
|
111
|
-
fixer.findMissingAtoms()
|
|
112
|
-
ratio = "%.2f" % (len(fixer.missingAtoms) / fixer.topology.getNumResidues(),)
|
|
113
|
-
|
|
114
|
-
######################################################
|
|
115
|
-
# replace non-standard residues
|
|
116
|
-
######################################################
|
|
117
|
-
if replace_nonstandard:
|
|
118
|
-
fixer.findNonstandardResidues()
|
|
119
|
-
fixer.replaceNonstandardResidues()
|
|
120
|
-
|
|
121
|
-
######################################################
|
|
122
|
-
# remove heterogens
|
|
123
|
-
######################################################
|
|
124
|
-
if keep_heterogens == 'none':
|
|
125
|
-
fixer.removeHeterogens(keepWater=False)
|
|
126
|
-
elif keep_heterogens == 'water':
|
|
127
|
-
fixer.removeHeterogens(keepWater=True)
|
|
128
|
-
|
|
129
|
-
######################################################
|
|
130
|
-
# missing residue
|
|
131
|
-
######################################################
|
|
132
|
-
if add_missing_residue:
|
|
133
|
-
fixer.findMissingResidues()
|
|
134
|
-
else:
|
|
135
|
-
fixer.missingResidues = {}
|
|
136
|
-
|
|
137
|
-
######################################################
|
|
138
|
-
# missing atoms
|
|
139
|
-
######################################################
|
|
140
|
-
fixer.findMissingAtoms()
|
|
141
|
-
if add_missing_atoms not in ['all', 'heavy']:
|
|
142
|
-
fixer.missingAtoms = {}
|
|
143
|
-
fixer.missingTerminals = {}
|
|
144
|
-
fixer.addMissingAtoms()
|
|
145
|
-
if add_missing_atoms in ['all', 'hydrogen']:
|
|
146
|
-
fixer.addMissingHydrogens(ph)
|
|
147
|
-
|
|
148
|
-
######################################################
|
|
149
|
-
# output
|
|
150
|
-
######################################################
|
|
151
|
-
out_dir = os.path.dirname(output_file)
|
|
152
|
-
if not os.path.isdir(out_dir):
|
|
153
|
-
os.makedirs(out_dir)
|
|
154
|
-
|
|
155
|
-
suffix = pathlib.Path(output_file).suffix
|
|
156
|
-
assert suffix in [".pdb", ".cif"], "output file must be .cif or .pdb"
|
|
157
|
-
|
|
158
|
-
with open(output_file, 'w') as out_handle:
|
|
159
|
-
if suffix == ".pdb":
|
|
160
|
-
app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
|
|
161
|
-
else:
|
|
162
|
-
app.PDBxFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
|
|
163
|
-
|
|
164
|
-
msg_str = "Finished"
|
|
165
|
-
except Exception as e:
|
|
166
|
-
msg_str = str(e)
|
|
167
|
-
ratio = "*"
|
|
168
|
-
else:
|
|
169
|
-
if clean_connect:
|
|
170
|
-
output_lines = []
|
|
171
|
-
with open(output_file, "r") as in_handle:
|
|
172
|
-
for line in in_handle:
|
|
173
|
-
if not re.match("CONECT", line):
|
|
174
|
-
output_lines.append(line)
|
|
175
|
-
with open(output_file, "w") as out_handle:
|
|
176
|
-
print("".join(output_lines), file=out_handle)
|
|
177
|
-
|
|
178
|
-
return dict(input=input_file, msg=msg_str, res_ratio_with_missing_atoms=ratio)
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
@typechecked
|
|
182
|
-
def repair_structure(input_file: str,
|
|
183
|
-
output_file: str,
|
|
184
|
-
complex_with_dna: bool = False,
|
|
185
|
-
complex_with_rna: bool = False,
|
|
186
|
-
timeout: Union[int, float] = 3600):
|
|
187
|
-
"""
|
|
188
|
-
|
|
189
|
-
:param input_file: .pdb or .cif or .pdb.gz or .cif.gz
|
|
190
|
-
:param output_file: .pdb file
|
|
191
|
-
:param complex_with_dna: bool, default False, not debug yet
|
|
192
|
-
:param complex_with_rna: bool, default False, not debug yet
|
|
193
|
-
:param timeout: float or int
|
|
194
|
-
:return:
|
|
195
|
-
"""
|
|
196
|
-
############################################################
|
|
197
|
-
# Check and convert input_file to .pdb if not
|
|
198
|
-
############################################################
|
|
199
|
-
input_file = str(pathlib.Path(input_file).expanduser().resolve())
|
|
200
|
-
output_file = str(pathlib.Path(output_file).expanduser().resolve())
|
|
201
|
-
# input_file and output_file can't be the same path
|
|
202
|
-
assert input_file != output_file, "input_file and output_file can't be the same path"
|
|
203
|
-
|
|
204
|
-
assert os.path.splitext(output_file)[1] == ".pdb", "output_file Not .pdb: %s" % output_file
|
|
205
|
-
assert _is_cif(input_file) or _is_pdb(input_file), "Not .pdb or .cif or .pdb.gz or .cif.gz: %s" % input_file
|
|
206
|
-
|
|
207
|
-
############################################################
|
|
208
|
-
# Config Path
|
|
209
|
-
############################################################
|
|
210
|
-
out_dir = os.path.dirname(output_file)
|
|
211
|
-
if not os.path.isdir(out_dir):
|
|
212
|
-
os.makedirs(out_dir)
|
|
213
|
-
|
|
214
|
-
temp_dir = os.path.join(out_dir, "_RepairTemp_%s" % str(uuid.uuid4()))
|
|
215
|
-
if os.path.isdir(temp_dir):
|
|
216
|
-
shutil.rmtree(temp_dir)
|
|
217
|
-
os.makedirs(temp_dir)
|
|
218
|
-
|
|
219
|
-
# for fix the filename bug of foldx
|
|
220
|
-
# rename the input always and save to .pdb
|
|
221
|
-
# convert to .pdb
|
|
222
|
-
st = StructureParser()
|
|
223
|
-
st.load_from_file(input_file)
|
|
224
|
-
# if exist non-1-letter chain ID, rename
|
|
225
|
-
org2new = st.make_chain_names_to_one_letter()
|
|
226
|
-
|
|
227
|
-
file_name_r = "in.pdb"
|
|
228
|
-
in_dir_r = temp_dir
|
|
229
|
-
st.to_pdb(os.path.join(in_dir_r, file_name_r))
|
|
230
|
-
|
|
231
|
-
foldx_path = shutil.which("foldx")
|
|
232
|
-
if foldx_path is None:
|
|
233
|
-
raise RuntimeError("path of foldx is not set or found in PATH")
|
|
234
|
-
|
|
235
|
-
cwd_dir = os.getcwd()
|
|
236
|
-
|
|
237
|
-
repair_cmd = [foldx_path,
|
|
238
|
-
"-c RepairPDB",
|
|
239
|
-
"--pdb %s" % file_name_r,
|
|
240
|
-
"--pdb-dir %s" % in_dir_r,
|
|
241
|
-
"--output-dir %s" % temp_dir
|
|
242
|
-
]
|
|
243
|
-
if complex_with_dna:
|
|
244
|
-
repair_cmd.append("--complexWithDNA true")
|
|
245
|
-
|
|
246
|
-
if complex_with_rna:
|
|
247
|
-
repair_cmd.append("--complexWithRNA true")
|
|
248
|
-
|
|
249
|
-
command_settings = ["cd %s &&" % temp_dir] + repair_cmd + ["&& cd %s" % cwd_dir]
|
|
250
|
-
start = time.time()
|
|
251
|
-
try:
|
|
252
|
-
result = subprocess.run(" ".join(command_settings), shell=True, check=True,
|
|
253
|
-
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
254
|
-
timeout=timeout)
|
|
255
|
-
# Return a tuple of the file name and the stdout or stderr if command fails
|
|
256
|
-
if result.returncode == 0:
|
|
257
|
-
msg_str = "Finished"
|
|
258
|
-
else:
|
|
259
|
-
msg_str = str(result.stderr)
|
|
260
|
-
except Exception as e:
|
|
261
|
-
msg_str = str(e)
|
|
262
|
-
else:
|
|
263
|
-
if msg_str == "Finished":
|
|
264
|
-
# just keep .pdb, ignore .fxout
|
|
265
|
-
result_file = os.path.join(temp_dir, "in_Repair.pdb")
|
|
266
|
-
if os.path.exists(result_file):
|
|
267
|
-
shutil.move(result_file, output_file)
|
|
268
|
-
else:
|
|
269
|
-
msg_str = "result file not found"
|
|
270
|
-
finally:
|
|
271
|
-
if os.path.isdir(temp_dir):
|
|
272
|
-
shutil.rmtree(temp_dir)
|
|
273
|
-
end = time.time()
|
|
274
|
-
return dict(input=input_file, output=output_file, msg=msg_str, use_time=round(end - start, 1))
|