gemmi-protools 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/utils/fixer.py +116 -83
- {gemmi_protools-0.1.7.dist-info → gemmi_protools-0.1.9.dist-info}/METADATA +1 -1
- {gemmi_protools-0.1.7.dist-info → gemmi_protools-0.1.9.dist-info}/RECORD +6 -6
- {gemmi_protools-0.1.7.dist-info → gemmi_protools-0.1.9.dist-info}/WHEEL +1 -1
- {gemmi_protools-0.1.7.dist-info → gemmi_protools-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-0.1.7.dist-info → gemmi_protools-0.1.9.dist-info}/top_level.txt +0 -0
gemmi_protools/utils/fixer.py
CHANGED
|
@@ -6,22 +6,39 @@ import gzip
|
|
|
6
6
|
import io
|
|
7
7
|
import os
|
|
8
8
|
import pathlib
|
|
9
|
+
import re
|
|
9
10
|
import shutil
|
|
10
11
|
import subprocess
|
|
11
12
|
import time
|
|
12
13
|
import uuid
|
|
13
|
-
from typing import Union
|
|
14
|
+
from typing import Union
|
|
14
15
|
|
|
16
|
+
import openmm
|
|
15
17
|
import pdbfixer
|
|
16
18
|
from openmm import app
|
|
17
19
|
from typeguard import typechecked
|
|
18
20
|
|
|
21
|
+
from gemmi_protools import StructureParser
|
|
19
22
|
from gemmi_protools.io.cif_opts import _is_cif
|
|
20
23
|
from gemmi_protools.io.pdb_opts import _is_pdb
|
|
21
24
|
|
|
22
25
|
|
|
23
26
|
@typechecked
|
|
24
|
-
def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
|
|
27
|
+
def _load_by_pbdfixer(path: Union[str, pathlib.Path], cpu_platform=True) -> pdbfixer.PDBFixer:
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
path:
|
|
32
|
+
cpu_platform: default True, if False, auto select platform
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
if cpu_platform:
|
|
38
|
+
platform = openmm.Platform.getPlatformByName('CPU')
|
|
39
|
+
else:
|
|
40
|
+
platform = None
|
|
41
|
+
|
|
25
42
|
cur_path = pathlib.Path(path)
|
|
26
43
|
if _is_pdb(path) or _is_cif(path):
|
|
27
44
|
s1 = cur_path.suffixes[-1]
|
|
@@ -29,15 +46,15 @@ def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
|
|
|
29
46
|
|
|
30
47
|
if s1 in [".pdb", ".cif"]:
|
|
31
48
|
# s1 suffix
|
|
32
|
-
fixer = pdbfixer.PDBFixer(filename=path)
|
|
49
|
+
fixer = pdbfixer.PDBFixer(filename=path, platform=platform)
|
|
33
50
|
else:
|
|
34
51
|
# s2 suffix
|
|
35
52
|
with gzip.open(path, "rb") as gz_handle:
|
|
36
53
|
with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
|
|
37
54
|
if s2 == ".pdb.gz":
|
|
38
|
-
fixer = pdbfixer.PDBFixer(pdbfile=text_io)
|
|
55
|
+
fixer = pdbfixer.PDBFixer(pdbfile=text_io, platform=platform)
|
|
39
56
|
else:
|
|
40
|
-
fixer = pdbfixer.PDBFixer(pdbxfile=text_io)
|
|
57
|
+
fixer = pdbfixer.PDBFixer(pdbxfile=text_io, platform=platform)
|
|
41
58
|
else:
|
|
42
59
|
raise ValueError("Only support .cif, .cif.gz, .pdb or .pdb.gz file, but got %s" % path)
|
|
43
60
|
return fixer
|
|
@@ -51,8 +68,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
51
68
|
keep_heterogens: str = "all",
|
|
52
69
|
replace_nonstandard: bool = True,
|
|
53
70
|
ph: Union[float, int] = 7.0,
|
|
54
|
-
|
|
55
|
-
|
|
71
|
+
cpu_platform=True,
|
|
72
|
+
clean_connect=True
|
|
56
73
|
):
|
|
57
74
|
"""
|
|
58
75
|
|
|
@@ -71,8 +88,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
71
88
|
none: remove all heterogens
|
|
72
89
|
:param replace_nonstandard: default True, replace all non-standard residues to standard ones
|
|
73
90
|
:param ph: default 7.0, ph values to add missing hydrogen atoms
|
|
74
|
-
:param
|
|
75
|
-
:param
|
|
91
|
+
:param cpu_platform: default True to use CPU platform, if False, auto select platform
|
|
92
|
+
:param clean_connect: default True to clean CONECT lines in output pdb
|
|
76
93
|
|
|
77
94
|
:return:
|
|
78
95
|
str, status message of fixing
|
|
@@ -85,16 +102,14 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
85
102
|
######################################################
|
|
86
103
|
# load structure
|
|
87
104
|
######################################################
|
|
88
|
-
fixer = _load_by_pbdfixer(input_file)
|
|
105
|
+
fixer = _load_by_pbdfixer(input_file, cpu_platform)
|
|
89
106
|
|
|
90
107
|
######################################################
|
|
91
108
|
# check
|
|
92
109
|
######################################################
|
|
93
110
|
fixer.findMissingResidues()
|
|
94
111
|
fixer.findMissingAtoms()
|
|
95
|
-
ratio = len(fixer.missingAtoms) / fixer.topology.getNumResidues()
|
|
96
|
-
if ratio > threshold:
|
|
97
|
-
return dict(input=input_file, msg="Too many residues with missing atoms: %.2f" % ratio)
|
|
112
|
+
ratio = "%.2f" % (len(fixer.missingAtoms) / fixer.topology.getNumResidues(),)
|
|
98
113
|
|
|
99
114
|
######################################################
|
|
100
115
|
# replace non-standard residues
|
|
@@ -149,73 +164,94 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
149
164
|
msg_str = "Finished"
|
|
150
165
|
except Exception as e:
|
|
151
166
|
msg_str = str(e)
|
|
167
|
+
ratio = "*"
|
|
168
|
+
else:
|
|
169
|
+
if clean_connect:
|
|
170
|
+
output_lines = []
|
|
171
|
+
with open(output_file, "r") as in_handle:
|
|
172
|
+
for line in in_handle:
|
|
173
|
+
if not re.match("CONECT", line):
|
|
174
|
+
output_lines.append(line)
|
|
175
|
+
with open(output_file, "w") as out_handle:
|
|
176
|
+
print("".join(output_lines), file=out_handle)
|
|
152
177
|
|
|
153
|
-
return dict(input=input_file, msg=msg_str)
|
|
178
|
+
return dict(input=input_file, msg=msg_str, res_ratio_with_missing_atoms=ratio)
|
|
154
179
|
|
|
155
180
|
|
|
156
181
|
@typechecked
|
|
157
|
-
def
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
if os.path.exists(dst_path):
|
|
165
|
-
os.remove(dst_path)
|
|
166
|
-
shutil.move(src_path, dst_folder)
|
|
167
|
-
|
|
182
|
+
def repair_structure(input_file: str,
|
|
183
|
+
output_file: str,
|
|
184
|
+
complex_with_dna: bool = False,
|
|
185
|
+
complex_with_rna: bool = False,
|
|
186
|
+
timeout: Union[int, float] = 3600):
|
|
187
|
+
"""
|
|
168
188
|
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
189
|
+
:param input_file: .pdb or .cif or .pdb.gz or .cif.gz
|
|
190
|
+
:param output_file: .pdb file
|
|
191
|
+
:param complex_with_dna: bool, default False, not debug yet
|
|
192
|
+
:param complex_with_rna: bool, default False, not debug yet
|
|
193
|
+
:param timeout: float or int
|
|
194
|
+
:return:
|
|
195
|
+
"""
|
|
196
|
+
############################################################
|
|
197
|
+
# Check and convert input_file to .pdb if not
|
|
198
|
+
############################################################
|
|
199
|
+
assert os.path.splitext(output_file)[1] == ".pdb", "output_file Not .pdb: %s" % output_file
|
|
200
|
+
assert _is_cif(input_file) or _is_pdb(input_file), "Not .pdb or .cif or .pdb.gz or .cif.gz: %s" % input_file
|
|
201
|
+
|
|
202
|
+
# input_file and output_file can't be the same path
|
|
203
|
+
p_in = pathlib.Path(input_file).expanduser().resolve()
|
|
204
|
+
p_out = pathlib.Path(output_file).expanduser().resolve()
|
|
205
|
+
|
|
206
|
+
assert str(p_in) != str(p_out), "input_file and output_file can't be the same path"
|
|
207
|
+
############################################################
|
|
208
|
+
# Config Path
|
|
209
|
+
############################################################
|
|
210
|
+
file_name = os.path.basename(input_file)
|
|
211
|
+
stem_name = os.path.splitext(file_name)[0]
|
|
212
|
+
|
|
213
|
+
in_dir = os.path.dirname(input_file)
|
|
214
|
+
out_dir = os.path.dirname(output_file)
|
|
175
215
|
if not os.path.isdir(out_dir):
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
216
|
+
os.makedirs(out_dir)
|
|
217
|
+
|
|
218
|
+
temp_dir = os.path.join(out_dir, "_RepairTemp_%s" % str(uuid.uuid4()))
|
|
219
|
+
if os.path.isdir(temp_dir):
|
|
220
|
+
shutil.rmtree(temp_dir)
|
|
221
|
+
os.makedirs(temp_dir)
|
|
222
|
+
|
|
223
|
+
if os.path.splitext(input_file)[1] != ".pdb":
|
|
224
|
+
# convert to .pdb
|
|
225
|
+
st = StructureParser()
|
|
226
|
+
st.load_from_file(input_file)
|
|
227
|
+
file_name_r = "%s.pdb" % stem_name
|
|
228
|
+
in_dir_r = temp_dir
|
|
229
|
+
st.to_pdb(os.path.join(in_dir_r, file_name_r))
|
|
230
|
+
else:
|
|
231
|
+
file_name_r = file_name
|
|
232
|
+
in_dir_r = in_dir
|
|
188
233
|
|
|
189
|
-
|
|
234
|
+
foldx_path = shutil.which("foldx")
|
|
235
|
+
if foldx_path is None:
|
|
236
|
+
raise RuntimeError("path of foldx is not set or found in PATH")
|
|
190
237
|
|
|
191
|
-
|
|
192
|
-
sub_temp_dir = os.path.join(temp_dir, "%s_%s" % (stem_name, str(uuid.uuid4())))
|
|
238
|
+
cwd_dir = os.getcwd()
|
|
193
239
|
|
|
194
|
-
|
|
195
|
-
|
|
240
|
+
repair_cmd = [foldx_path,
|
|
241
|
+
"-c RepairPDB",
|
|
242
|
+
"--pdb %s" % file_name_r,
|
|
243
|
+
"--pdb-dir %s" % in_dir_r,
|
|
244
|
+
"--output-dir %s" % temp_dir
|
|
245
|
+
]
|
|
246
|
+
if complex_with_dna:
|
|
247
|
+
repair_cmd.append("--complexWithDNA true")
|
|
196
248
|
|
|
197
|
-
|
|
249
|
+
if complex_with_rna:
|
|
250
|
+
repair_cmd.append("--complexWithRNA true")
|
|
198
251
|
|
|
199
|
-
|
|
200
|
-
foldx_path = shutil.which("foldx")
|
|
201
|
-
|
|
202
|
-
if foldx_path is None:
|
|
203
|
-
raise RuntimeError("path of foldx is not set or found in PATH")
|
|
204
|
-
|
|
205
|
-
old_dir = os.getcwd()
|
|
206
|
-
command_settings = ["cd %s" % sub_temp_dir,
|
|
207
|
-
"&&",
|
|
208
|
-
foldx_path,
|
|
209
|
-
"-c RepairPDB",
|
|
210
|
-
"--pdb %s" % pdb_file,
|
|
211
|
-
"--pdb-dir %s" % pdb_dir,
|
|
212
|
-
"--output-dir %s" % sub_temp_dir,
|
|
213
|
-
"&&",
|
|
214
|
-
"cd %s" % old_dir
|
|
215
|
-
]
|
|
252
|
+
command_settings = ["cd %s &&" % temp_dir] + repair_cmd + ["&& cd %s" % cwd_dir]
|
|
216
253
|
|
|
217
254
|
start = time.time()
|
|
218
|
-
|
|
219
255
|
try:
|
|
220
256
|
result = subprocess.run(" ".join(command_settings), shell=True, check=True,
|
|
221
257
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
@@ -224,22 +260,19 @@ def repair_structure(input_file: Union[str, pathlib.Path],
|
|
|
224
260
|
if result.returncode == 0:
|
|
225
261
|
msg_str = "Finished"
|
|
226
262
|
else:
|
|
227
|
-
msg_str = result.stderr
|
|
228
|
-
|
|
229
|
-
result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
|
|
230
|
-
fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
|
|
231
|
-
if os.path.exists(result_file) and os.path.exists(fxout_file):
|
|
232
|
-
move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
|
|
233
|
-
move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
|
|
234
|
-
except subprocess.CalledProcessError as e:
|
|
235
|
-
# Handle errors in the called executable
|
|
236
|
-
msg_str = e.stderr
|
|
263
|
+
msg_str = str(result.stderr)
|
|
237
264
|
except Exception as e:
|
|
238
|
-
|
|
239
|
-
|
|
265
|
+
msg_str = str(e)
|
|
266
|
+
else:
|
|
267
|
+
if msg_str == "Finished":
|
|
268
|
+
# just keep .pdb, ignore .fxout
|
|
269
|
+
result_file = os.path.join(temp_dir, "%s_Repair.pdb" % stem_name)
|
|
270
|
+
if os.path.exists(result_file):
|
|
271
|
+
shutil.move(result_file, output_file)
|
|
272
|
+
else:
|
|
273
|
+
msg_str = "result file not found"
|
|
240
274
|
finally:
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
shutil.rmtree(sub_temp_dir)
|
|
275
|
+
if os.path.isdir(temp_dir):
|
|
276
|
+
shutil.rmtree(temp_dir)
|
|
244
277
|
end = time.time()
|
|
245
|
-
return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
|
|
278
|
+
return dict(input=input_file, output=output_file, msg=msg_str, use_time=round(end - start, 1))
|
|
@@ -11,10 +11,10 @@ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR5
|
|
|
11
11
|
gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
12
12
|
gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
|
|
13
13
|
gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
|
|
14
|
-
gemmi_protools/utils/fixer.py,sha256=
|
|
14
|
+
gemmi_protools/utils/fixer.py,sha256=RDmpoZpTrGdwuJQTTK1eif132MHV1I-T6Nt47ezgxTM,10236
|
|
15
15
|
gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
|
|
16
|
-
gemmi_protools-0.1.
|
|
17
|
-
gemmi_protools-0.1.
|
|
18
|
-
gemmi_protools-0.1.
|
|
19
|
-
gemmi_protools-0.1.
|
|
20
|
-
gemmi_protools-0.1.
|
|
16
|
+
gemmi_protools-0.1.9.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
17
|
+
gemmi_protools-0.1.9.dist-info/METADATA,sha256=OFqx1MONnQTZ7b65AspnNX0FIvtrE2gQVWT7yCTGhqE,567
|
|
18
|
+
gemmi_protools-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
gemmi_protools-0.1.9.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
20
|
+
gemmi_protools-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|