gemmi-protools 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/utils/fixer.py +101 -85
- {gemmi_protools-0.1.8.dist-info → gemmi_protools-0.1.9.dist-info}/METADATA +1 -1
- {gemmi_protools-0.1.8.dist-info → gemmi_protools-0.1.9.dist-info}/RECORD +6 -6
- {gemmi_protools-0.1.8.dist-info → gemmi_protools-0.1.9.dist-info}/WHEEL +1 -1
- {gemmi_protools-0.1.8.dist-info → gemmi_protools-0.1.9.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-0.1.8.dist-info → gemmi_protools-0.1.9.dist-info}/top_level.txt +0 -0
gemmi_protools/utils/fixer.py
CHANGED
|
@@ -6,33 +6,35 @@ import gzip
|
|
|
6
6
|
import io
|
|
7
7
|
import os
|
|
8
8
|
import pathlib
|
|
9
|
+
import re
|
|
9
10
|
import shutil
|
|
10
11
|
import subprocess
|
|
11
12
|
import time
|
|
12
13
|
import uuid
|
|
13
|
-
from typing import Union
|
|
14
|
+
from typing import Union
|
|
14
15
|
|
|
16
|
+
import openmm
|
|
15
17
|
import pdbfixer
|
|
16
18
|
from openmm import app
|
|
17
|
-
import openmm
|
|
18
19
|
from typeguard import typechecked
|
|
19
20
|
|
|
21
|
+
from gemmi_protools import StructureParser
|
|
20
22
|
from gemmi_protools.io.cif_opts import _is_cif
|
|
21
23
|
from gemmi_protools.io.pdb_opts import _is_pdb
|
|
22
24
|
|
|
23
25
|
|
|
24
26
|
@typechecked
|
|
25
|
-
def _load_by_pbdfixer(path: Union[str, pathlib.Path],
|
|
27
|
+
def _load_by_pbdfixer(path: Union[str, pathlib.Path], cpu_platform=True) -> pdbfixer.PDBFixer:
|
|
26
28
|
"""
|
|
27
29
|
|
|
28
30
|
Args:
|
|
29
31
|
path:
|
|
30
|
-
|
|
32
|
+
cpu_platform: default True, if False, auto select platform
|
|
31
33
|
|
|
32
34
|
Returns:
|
|
33
35
|
|
|
34
36
|
"""
|
|
35
|
-
if
|
|
37
|
+
if cpu_platform:
|
|
36
38
|
platform = openmm.Platform.getPlatformByName('CPU')
|
|
37
39
|
else:
|
|
38
40
|
platform = None
|
|
@@ -66,9 +68,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
66
68
|
keep_heterogens: str = "all",
|
|
67
69
|
replace_nonstandard: bool = True,
|
|
68
70
|
ph: Union[float, int] = 7.0,
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
use_platform=True
|
|
71
|
+
cpu_platform=True,
|
|
72
|
+
clean_connect=True
|
|
72
73
|
):
|
|
73
74
|
"""
|
|
74
75
|
|
|
@@ -87,9 +88,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
87
88
|
none: remove all heterogens
|
|
88
89
|
:param replace_nonstandard: default True, replace all non-standard residues to standard ones
|
|
89
90
|
:param ph: default 7.0, ph values to add missing hydrogen atoms
|
|
90
|
-
:param
|
|
91
|
-
:param
|
|
92
|
-
:param use_platform: default True to use CPU platform, if False, auto select platform
|
|
91
|
+
:param cpu_platform: default True to use CPU platform, if False, auto select platform
|
|
92
|
+
:param clean_connect: default True to clean CONECT lines in output pdb
|
|
93
93
|
|
|
94
94
|
:return:
|
|
95
95
|
str, status message of fixing
|
|
@@ -102,16 +102,14 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
102
102
|
######################################################
|
|
103
103
|
# load structure
|
|
104
104
|
######################################################
|
|
105
|
-
fixer = _load_by_pbdfixer(input_file,
|
|
105
|
+
fixer = _load_by_pbdfixer(input_file, cpu_platform)
|
|
106
106
|
|
|
107
107
|
######################################################
|
|
108
108
|
# check
|
|
109
109
|
######################################################
|
|
110
110
|
fixer.findMissingResidues()
|
|
111
111
|
fixer.findMissingAtoms()
|
|
112
|
-
ratio = len(fixer.missingAtoms) / fixer.topology.getNumResidues()
|
|
113
|
-
if ratio > threshold:
|
|
114
|
-
return dict(input=input_file, msg="Too many residues with missing atoms: %.2f" % ratio)
|
|
112
|
+
ratio = "%.2f" % (len(fixer.missingAtoms) / fixer.topology.getNumResidues(),)
|
|
115
113
|
|
|
116
114
|
######################################################
|
|
117
115
|
# replace non-standard residues
|
|
@@ -166,73 +164,94 @@ def clean_structure(input_file: Union[str, pathlib.Path],
|
|
|
166
164
|
msg_str = "Finished"
|
|
167
165
|
except Exception as e:
|
|
168
166
|
msg_str = str(e)
|
|
167
|
+
ratio = "*"
|
|
168
|
+
else:
|
|
169
|
+
if clean_connect:
|
|
170
|
+
output_lines = []
|
|
171
|
+
with open(output_file, "r") as in_handle:
|
|
172
|
+
for line in in_handle:
|
|
173
|
+
if not re.match("CONECT", line):
|
|
174
|
+
output_lines.append(line)
|
|
175
|
+
with open(output_file, "w") as out_handle:
|
|
176
|
+
print("".join(output_lines), file=out_handle)
|
|
169
177
|
|
|
170
|
-
return dict(input=input_file, msg=msg_str)
|
|
178
|
+
return dict(input=input_file, msg=msg_str, res_ratio_with_missing_atoms=ratio)
|
|
171
179
|
|
|
172
180
|
|
|
173
181
|
@typechecked
|
|
174
|
-
def
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
if os.path.exists(dst_path):
|
|
182
|
-
os.remove(dst_path)
|
|
183
|
-
shutil.move(src_path, dst_folder)
|
|
184
|
-
|
|
182
|
+
def repair_structure(input_file: str,
|
|
183
|
+
output_file: str,
|
|
184
|
+
complex_with_dna: bool = False,
|
|
185
|
+
complex_with_rna: bool = False,
|
|
186
|
+
timeout: Union[int, float] = 3600):
|
|
187
|
+
"""
|
|
185
188
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
189
|
+
:param input_file: .pdb or .cif or .pdb.gz or .cif.gz
|
|
190
|
+
:param output_file: .pdb file
|
|
191
|
+
:param complex_with_dna: bool, default False, not debug yet
|
|
192
|
+
:param complex_with_rna: bool, default False, not debug yet
|
|
193
|
+
:param timeout: float or int
|
|
194
|
+
:return:
|
|
195
|
+
"""
|
|
196
|
+
############################################################
|
|
197
|
+
# Check and convert input_file to .pdb if not
|
|
198
|
+
############################################################
|
|
199
|
+
assert os.path.splitext(output_file)[1] == ".pdb", "output_file Not .pdb: %s" % output_file
|
|
200
|
+
assert _is_cif(input_file) or _is_pdb(input_file), "Not .pdb or .cif or .pdb.gz or .cif.gz: %s" % input_file
|
|
201
|
+
|
|
202
|
+
# input_file and output_file can't be the same path
|
|
203
|
+
p_in = pathlib.Path(input_file).expanduser().resolve()
|
|
204
|
+
p_out = pathlib.Path(output_file).expanduser().resolve()
|
|
205
|
+
|
|
206
|
+
assert str(p_in) != str(p_out), "input_file and output_file can't be the same path"
|
|
207
|
+
############################################################
|
|
208
|
+
# Config Path
|
|
209
|
+
############################################################
|
|
210
|
+
file_name = os.path.basename(input_file)
|
|
211
|
+
stem_name = os.path.splitext(file_name)[0]
|
|
212
|
+
|
|
213
|
+
in_dir = os.path.dirname(input_file)
|
|
214
|
+
out_dir = os.path.dirname(output_file)
|
|
192
215
|
if not os.path.isdir(out_dir):
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
216
|
+
os.makedirs(out_dir)
|
|
217
|
+
|
|
218
|
+
temp_dir = os.path.join(out_dir, "_RepairTemp_%s" % str(uuid.uuid4()))
|
|
219
|
+
if os.path.isdir(temp_dir):
|
|
220
|
+
shutil.rmtree(temp_dir)
|
|
221
|
+
os.makedirs(temp_dir)
|
|
222
|
+
|
|
223
|
+
if os.path.splitext(input_file)[1] != ".pdb":
|
|
224
|
+
# convert to .pdb
|
|
225
|
+
st = StructureParser()
|
|
226
|
+
st.load_from_file(input_file)
|
|
227
|
+
file_name_r = "%s.pdb" % stem_name
|
|
228
|
+
in_dir_r = temp_dir
|
|
229
|
+
st.to_pdb(os.path.join(in_dir_r, file_name_r))
|
|
230
|
+
else:
|
|
231
|
+
file_name_r = file_name
|
|
232
|
+
in_dir_r = in_dir
|
|
210
233
|
|
|
211
|
-
|
|
212
|
-
|
|
234
|
+
foldx_path = shutil.which("foldx")
|
|
235
|
+
if foldx_path is None:
|
|
236
|
+
raise RuntimeError("path of foldx is not set or found in PATH")
|
|
213
237
|
|
|
214
|
-
os.
|
|
238
|
+
cwd_dir = os.getcwd()
|
|
215
239
|
|
|
216
|
-
|
|
217
|
-
|
|
240
|
+
repair_cmd = [foldx_path,
|
|
241
|
+
"-c RepairPDB",
|
|
242
|
+
"--pdb %s" % file_name_r,
|
|
243
|
+
"--pdb-dir %s" % in_dir_r,
|
|
244
|
+
"--output-dir %s" % temp_dir
|
|
245
|
+
]
|
|
246
|
+
if complex_with_dna:
|
|
247
|
+
repair_cmd.append("--complexWithDNA true")
|
|
218
248
|
|
|
219
|
-
if
|
|
220
|
-
|
|
249
|
+
if complex_with_rna:
|
|
250
|
+
repair_cmd.append("--complexWithRNA true")
|
|
221
251
|
|
|
222
|
-
|
|
223
|
-
command_settings = ["cd %s" % sub_temp_dir,
|
|
224
|
-
"&&",
|
|
225
|
-
foldx_path,
|
|
226
|
-
"-c RepairPDB",
|
|
227
|
-
"--pdb %s" % pdb_file,
|
|
228
|
-
"--pdb-dir %s" % pdb_dir,
|
|
229
|
-
"--output-dir %s" % sub_temp_dir,
|
|
230
|
-
"&&",
|
|
231
|
-
"cd %s" % old_dir
|
|
232
|
-
]
|
|
252
|
+
command_settings = ["cd %s &&" % temp_dir] + repair_cmd + ["&& cd %s" % cwd_dir]
|
|
233
253
|
|
|
234
254
|
start = time.time()
|
|
235
|
-
|
|
236
255
|
try:
|
|
237
256
|
result = subprocess.run(" ".join(command_settings), shell=True, check=True,
|
|
238
257
|
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
|
@@ -241,22 +260,19 @@ def repair_structure(input_file: Union[str, pathlib.Path],
|
|
|
241
260
|
if result.returncode == 0:
|
|
242
261
|
msg_str = "Finished"
|
|
243
262
|
else:
|
|
244
|
-
msg_str = result.stderr
|
|
245
|
-
|
|
246
|
-
result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
|
|
247
|
-
fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
|
|
248
|
-
if os.path.exists(result_file) and os.path.exists(fxout_file):
|
|
249
|
-
move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
|
|
250
|
-
move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
|
|
251
|
-
except subprocess.CalledProcessError as e:
|
|
252
|
-
# Handle errors in the called executable
|
|
253
|
-
msg_str = e.stderr
|
|
263
|
+
msg_str = str(result.stderr)
|
|
254
264
|
except Exception as e:
|
|
255
|
-
|
|
256
|
-
|
|
265
|
+
msg_str = str(e)
|
|
266
|
+
else:
|
|
267
|
+
if msg_str == "Finished":
|
|
268
|
+
# just keep .pdb, ignore .fxout
|
|
269
|
+
result_file = os.path.join(temp_dir, "%s_Repair.pdb" % stem_name)
|
|
270
|
+
if os.path.exists(result_file):
|
|
271
|
+
shutil.move(result_file, output_file)
|
|
272
|
+
else:
|
|
273
|
+
msg_str = "result file not found"
|
|
257
274
|
finally:
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
shutil.rmtree(sub_temp_dir)
|
|
275
|
+
if os.path.isdir(temp_dir):
|
|
276
|
+
shutil.rmtree(temp_dir)
|
|
261
277
|
end = time.time()
|
|
262
|
-
return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
|
|
278
|
+
return dict(input=input_file, output=output_file, msg=msg_str, use_time=round(end - start, 1))
|
|
@@ -11,10 +11,10 @@ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR5
|
|
|
11
11
|
gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
12
12
|
gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
|
|
13
13
|
gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
|
|
14
|
-
gemmi_protools/utils/fixer.py,sha256=
|
|
14
|
+
gemmi_protools/utils/fixer.py,sha256=RDmpoZpTrGdwuJQTTK1eif132MHV1I-T6Nt47ezgxTM,10236
|
|
15
15
|
gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
|
|
16
|
-
gemmi_protools-0.1.
|
|
17
|
-
gemmi_protools-0.1.
|
|
18
|
-
gemmi_protools-0.1.
|
|
19
|
-
gemmi_protools-0.1.
|
|
20
|
-
gemmi_protools-0.1.
|
|
16
|
+
gemmi_protools-0.1.9.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
17
|
+
gemmi_protools-0.1.9.dist-info/METADATA,sha256=OFqx1MONnQTZ7b65AspnNX0FIvtrE2gQVWT7yCTGhqE,567
|
|
18
|
+
gemmi_protools-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
gemmi_protools-0.1.9.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
20
|
+
gemmi_protools-0.1.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|