gemmi-protools 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -6,22 +6,39 @@ import gzip
6
6
  import io
7
7
  import os
8
8
  import pathlib
9
+ import re
9
10
  import shutil
10
11
  import subprocess
11
12
  import time
12
13
  import uuid
13
- from typing import Union, Optional
14
+ from typing import Union
14
15
 
16
+ import openmm
15
17
  import pdbfixer
16
18
  from openmm import app
17
19
  from typeguard import typechecked
18
20
 
21
+ from gemmi_protools import StructureParser
19
22
  from gemmi_protools.io.cif_opts import _is_cif
20
23
  from gemmi_protools.io.pdb_opts import _is_pdb
21
24
 
22
25
 
23
26
  @typechecked
24
- def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
27
+ def _load_by_pbdfixer(path: Union[str, pathlib.Path], cpu_platform=True) -> pdbfixer.PDBFixer:
28
+ """
29
+
30
+ Args:
31
+ path:
32
+ cpu_platform: default True, if False, auto select platform
33
+
34
+ Returns:
35
+
36
+ """
37
+ if cpu_platform:
38
+ platform = openmm.Platform.getPlatformByName('CPU')
39
+ else:
40
+ platform = None
41
+
25
42
  cur_path = pathlib.Path(path)
26
43
  if _is_pdb(path) or _is_cif(path):
27
44
  s1 = cur_path.suffixes[-1]
@@ -29,15 +46,15 @@ def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
29
46
 
30
47
  if s1 in [".pdb", ".cif"]:
31
48
  # s1 suffix
32
- fixer = pdbfixer.PDBFixer(filename=path)
49
+ fixer = pdbfixer.PDBFixer(filename=path, platform=platform)
33
50
  else:
34
51
  # s2 suffix
35
52
  with gzip.open(path, "rb") as gz_handle:
36
53
  with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
37
54
  if s2 == ".pdb.gz":
38
- fixer = pdbfixer.PDBFixer(pdbfile=text_io)
55
+ fixer = pdbfixer.PDBFixer(pdbfile=text_io, platform=platform)
39
56
  else:
40
- fixer = pdbfixer.PDBFixer(pdbxfile=text_io)
57
+ fixer = pdbfixer.PDBFixer(pdbxfile=text_io, platform=platform)
41
58
  else:
42
59
  raise ValueError("Only support .cif, .cif.gz, .pdb or .pdb.gz file, but got %s" % path)
43
60
  return fixer
@@ -51,8 +68,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
51
68
  keep_heterogens: str = "all",
52
69
  replace_nonstandard: bool = True,
53
70
  ph: Union[float, int] = 7.0,
54
- check_mode: bool = True,
55
- threshold: float = 0.3,
71
+ cpu_platform=True,
72
+ clean_connect=True
56
73
  ):
57
74
  """
58
75
 
@@ -71,8 +88,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
71
88
  none: remove all heterogens
72
89
  :param replace_nonstandard: default True, replace all non-standard residues to standard ones
73
90
  :param ph: default 7.0, ph values to add missing hydrogen atoms
74
- :param check_mode: default True to check the ratio of residues with missing atoms
75
- :param threshold: float, default 0.3, only use when check_mode=True
91
+ :param cpu_platform: default True to use CPU platform, if False, auto select platform
92
+ :param clean_connect: default True to clean CONECT lines in output pdb
76
93
 
77
94
  :return:
78
95
  str, status message of fixing
@@ -85,16 +102,14 @@ def clean_structure(input_file: Union[str, pathlib.Path],
85
102
  ######################################################
86
103
  # load structure
87
104
  ######################################################
88
- fixer = _load_by_pbdfixer(input_file)
105
+ fixer = _load_by_pbdfixer(input_file, cpu_platform)
89
106
 
90
107
  ######################################################
91
108
  # check
92
109
  ######################################################
93
110
  fixer.findMissingResidues()
94
111
  fixer.findMissingAtoms()
95
- ratio = len(fixer.missingAtoms) / fixer.topology.getNumResidues()
96
- if ratio > threshold:
97
- return dict(input=input_file, msg="Too many residues with missing atoms: %.2f" % ratio)
112
+ ratio = "%.2f" % (len(fixer.missingAtoms) / fixer.topology.getNumResidues(),)
98
113
 
99
114
  ######################################################
100
115
  # replace non-standard residues
@@ -149,73 +164,94 @@ def clean_structure(input_file: Union[str, pathlib.Path],
149
164
  msg_str = "Finished"
150
165
  except Exception as e:
151
166
  msg_str = str(e)
167
+ ratio = "*"
168
+ else:
169
+ if clean_connect:
170
+ output_lines = []
171
+ with open(output_file, "r") as in_handle:
172
+ for line in in_handle:
173
+ if not re.match("CONECT", line):
174
+ output_lines.append(line)
175
+ with open(output_file, "w") as out_handle:
176
+ print("".join(output_lines), file=out_handle)
152
177
 
153
- return dict(input=input_file, msg=msg_str)
178
+ return dict(input=input_file, msg=msg_str, res_ratio_with_missing_atoms=ratio)
154
179
 
155
180
 
156
181
  @typechecked
157
- def move_with_overwrite(src_folder: str, dst_folder: str, filename: str):
158
- assert os.path.isdir(src_folder)
159
- assert os.path.isdir(dst_folder)
160
-
161
- src_path = os.path.join(src_folder, filename)
162
- dst_path = os.path.join(dst_folder, filename)
163
-
164
- if os.path.exists(dst_path):
165
- os.remove(dst_path)
166
- shutil.move(src_path, dst_folder)
167
-
182
+ def repair_structure(input_file: str,
183
+ output_file: str,
184
+ complex_with_dna: bool = False,
185
+ complex_with_rna: bool = False,
186
+ timeout: Union[int, float] = 3600):
187
+ """
168
188
 
169
- @typechecked
170
- def repair_structure(input_file: Union[str, pathlib.Path],
171
- out_dir: Union[str, pathlib.Path],
172
- temp_dir: Union[str, pathlib.Path],
173
- foldx_path: Optional[str] = None,
174
- timeout=3600):
189
+ :param input_file: .pdb or .cif or .pdb.gz or .cif.gz
190
+ :param output_file: .pdb file
191
+ :param complex_with_dna: bool, default False, not debug yet
192
+ :param complex_with_rna: bool, default False, not debug yet
193
+ :param timeout: float or int
194
+ :return:
195
+ """
196
+ ############################################################
197
+ # Check and convert input_file to .pdb if not
198
+ ############################################################
199
+ assert os.path.splitext(output_file)[1] == ".pdb", "output_file Not .pdb: %s" % output_file
200
+ assert _is_cif(input_file) or _is_pdb(input_file), "Not .pdb or .cif or .pdb.gz or .cif.gz: %s" % input_file
201
+
202
+ # input_file and output_file can't be the same path
203
+ p_in = pathlib.Path(input_file).expanduser().resolve()
204
+ p_out = pathlib.Path(output_file).expanduser().resolve()
205
+
206
+ assert str(p_in) != str(p_out), "input_file and output_file can't be the same path"
207
+ ############################################################
208
+ # Config Path
209
+ ############################################################
210
+ file_name = os.path.basename(input_file)
211
+ stem_name = os.path.splitext(file_name)[0]
212
+
213
+ in_dir = os.path.dirname(input_file)
214
+ out_dir = os.path.dirname(output_file)
175
215
  if not os.path.isdir(out_dir):
176
- raise NotADirectoryError(out_dir)
177
-
178
- if not os.path.isdir(temp_dir):
179
- raise NotADirectoryError(temp_dir)
180
-
181
- in_path = pathlib.Path(input_file).expanduser().resolve()
182
- pdb_dir = str(in_path.parent)
183
- pdb_file = str(in_path.name)
184
- if not os.path.isfile(input_file):
185
- raise FileNotFoundError(input_file)
186
-
187
- assert pdb_dir != str(out_dir), "output directory can't be the directory of input_file"
216
+ os.makedirs(out_dir)
217
+
218
+ temp_dir = os.path.join(out_dir, "_RepairTemp_%s" % str(uuid.uuid4()))
219
+ if os.path.isdir(temp_dir):
220
+ shutil.rmtree(temp_dir)
221
+ os.makedirs(temp_dir)
222
+
223
+ if os.path.splitext(input_file)[1] != ".pdb":
224
+ # convert to .pdb
225
+ st = StructureParser()
226
+ st.load_from_file(input_file)
227
+ file_name_r = "%s.pdb" % stem_name
228
+ in_dir_r = temp_dir
229
+ st.to_pdb(os.path.join(in_dir_r, file_name_r))
230
+ else:
231
+ file_name_r = file_name
232
+ in_dir_r = in_dir
188
233
 
189
- stem_name = in_path.stem
234
+ foldx_path = shutil.which("foldx")
235
+ if foldx_path is None:
236
+ raise RuntimeError("path of foldx is not set or found in PATH")
190
237
 
191
- # create temp dir
192
- sub_temp_dir = os.path.join(temp_dir, "%s_%s" % (stem_name, str(uuid.uuid4())))
238
+ cwd_dir = os.getcwd()
193
239
 
194
- if os.path.isdir(sub_temp_dir):
195
- shutil.rmtree(sub_temp_dir)
240
+ repair_cmd = [foldx_path,
241
+ "-c RepairPDB",
242
+ "--pdb %s" % file_name_r,
243
+ "--pdb-dir %s" % in_dir_r,
244
+ "--output-dir %s" % temp_dir
245
+ ]
246
+ if complex_with_dna:
247
+ repair_cmd.append("--complexWithDNA true")
196
248
 
197
- os.makedirs(sub_temp_dir)
249
+ if complex_with_rna:
250
+ repair_cmd.append("--complexWithRNA true")
198
251
 
199
- if foldx_path is None:
200
- foldx_path = shutil.which("foldx")
201
-
202
- if foldx_path is None:
203
- raise RuntimeError("path of foldx is not set or found in PATH")
204
-
205
- old_dir = os.getcwd()
206
- command_settings = ["cd %s" % sub_temp_dir,
207
- "&&",
208
- foldx_path,
209
- "-c RepairPDB",
210
- "--pdb %s" % pdb_file,
211
- "--pdb-dir %s" % pdb_dir,
212
- "--output-dir %s" % sub_temp_dir,
213
- "&&",
214
- "cd %s" % old_dir
215
- ]
252
+ command_settings = ["cd %s &&" % temp_dir] + repair_cmd + ["&& cd %s" % cwd_dir]
216
253
 
217
254
  start = time.time()
218
-
219
255
  try:
220
256
  result = subprocess.run(" ".join(command_settings), shell=True, check=True,
221
257
  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
@@ -224,22 +260,19 @@ def repair_structure(input_file: Union[str, pathlib.Path],
224
260
  if result.returncode == 0:
225
261
  msg_str = "Finished"
226
262
  else:
227
- msg_str = result.stderr
228
-
229
- result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
230
- fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
231
- if os.path.exists(result_file) and os.path.exists(fxout_file):
232
- move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
233
- move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
234
- except subprocess.CalledProcessError as e:
235
- # Handle errors in the called executable
236
- msg_str = e.stderr
263
+ msg_str = str(result.stderr)
237
264
  except Exception as e:
238
- # Handle other exceptions such as file not found or permissions issues
239
- msg_str = str(e).encode()
265
+ msg_str = str(e)
266
+ else:
267
+ if msg_str == "Finished":
268
+ # just keep .pdb, ignore .fxout
269
+ result_file = os.path.join(temp_dir, "%s_Repair.pdb" % stem_name)
270
+ if os.path.exists(result_file):
271
+ shutil.move(result_file, output_file)
272
+ else:
273
+ msg_str = "result file not found"
240
274
  finally:
241
- # clean sub temp
242
- if os.path.isdir(sub_temp_dir):
243
- shutil.rmtree(sub_temp_dir)
275
+ if os.path.isdir(temp_dir):
276
+ shutil.rmtree(temp_dir)
244
277
  end = time.time()
245
- return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
278
+ return dict(input=input_file, output=output_file, msg=msg_str, use_time=round(end - start, 1))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
@@ -11,10 +11,10 @@ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR5
11
11
  gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
12
12
  gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
13
13
  gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
14
- gemmi_protools/utils/fixer.py,sha256=WUiIoK8dFPGUkXlK-wiiWyorYD8T71rN7WDE2psGSiE,9061
14
+ gemmi_protools/utils/fixer.py,sha256=RDmpoZpTrGdwuJQTTK1eif132MHV1I-T6Nt47ezgxTM,10236
15
15
  gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
16
- gemmi_protools-0.1.7.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
- gemmi_protools-0.1.7.dist-info/METADATA,sha256=tsp4fzE0T7lTsmPaRYK5UzgqS4OjrvOSfESFMqbkk0Y,567
18
- gemmi_protools-0.1.7.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
19
- gemmi_protools-0.1.7.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
- gemmi_protools-0.1.7.dist-info/RECORD,,
16
+ gemmi_protools-0.1.9.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
+ gemmi_protools-0.1.9.dist-info/METADATA,sha256=OFqx1MONnQTZ7b65AspnNX0FIvtrE2gQVWT7yCTGhqE,567
18
+ gemmi_protools-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ gemmi_protools-0.1.9.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
+ gemmi_protools-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5