gemmi-protools 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -6,33 +6,35 @@ import gzip
6
6
  import io
7
7
  import os
8
8
  import pathlib
9
+ import re
9
10
  import shutil
10
11
  import subprocess
11
12
  import time
12
13
  import uuid
13
- from typing import Union, Optional
14
+ from typing import Union
14
15
 
16
+ import openmm
15
17
  import pdbfixer
16
18
  from openmm import app
17
- import openmm
18
19
  from typeguard import typechecked
19
20
 
21
+ from gemmi_protools import StructureParser
20
22
  from gemmi_protools.io.cif_opts import _is_cif
21
23
  from gemmi_protools.io.pdb_opts import _is_pdb
22
24
 
23
25
 
24
26
  @typechecked
25
- def _load_by_pbdfixer(path: Union[str, pathlib.Path], use_platform=True) -> pdbfixer.PDBFixer:
27
+ def _load_by_pbdfixer(path: Union[str, pathlib.Path], cpu_platform=True) -> pdbfixer.PDBFixer:
26
28
  """
27
29
 
28
30
  Args:
29
31
  path:
30
- use_platform: default True, if False, auto select platform
32
+ cpu_platform: default True, if False, auto select platform
31
33
 
32
34
  Returns:
33
35
 
34
36
  """
35
- if use_platform:
37
+ if cpu_platform:
36
38
  platform = openmm.Platform.getPlatformByName('CPU')
37
39
  else:
38
40
  platform = None
@@ -66,9 +68,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
66
68
  keep_heterogens: str = "all",
67
69
  replace_nonstandard: bool = True,
68
70
  ph: Union[float, int] = 7.0,
69
- check_mode: bool = True,
70
- threshold: float = 0.3,
71
- use_platform=True
71
+ cpu_platform=True,
72
+ clean_connect=True
72
73
  ):
73
74
  """
74
75
 
@@ -87,9 +88,8 @@ def clean_structure(input_file: Union[str, pathlib.Path],
87
88
  none: remove all heterogens
88
89
  :param replace_nonstandard: default True, replace all non-standard residues to standard ones
89
90
  :param ph: default 7.0, ph values to add missing hydrogen atoms
90
- :param check_mode: default True to check the ratio of residues with missing atoms
91
- :param threshold: float, default 0.3, only use when check_mode=True
92
- :param use_platform: default True to use CPU platform, if False, auto select platform
91
+ :param cpu_platform: default True to use CPU platform, if False, auto select platform
92
+ :param clean_connect: default True to clean CONECT lines in output pdb
93
93
 
94
94
  :return:
95
95
  str, status message of fixing
@@ -102,16 +102,14 @@ def clean_structure(input_file: Union[str, pathlib.Path],
102
102
  ######################################################
103
103
  # load structure
104
104
  ######################################################
105
- fixer = _load_by_pbdfixer(input_file, use_platform=use_platform)
105
+ fixer = _load_by_pbdfixer(input_file, cpu_platform)
106
106
 
107
107
  ######################################################
108
108
  # check
109
109
  ######################################################
110
110
  fixer.findMissingResidues()
111
111
  fixer.findMissingAtoms()
112
- ratio = len(fixer.missingAtoms) / fixer.topology.getNumResidues()
113
- if ratio > threshold:
114
- return dict(input=input_file, msg="Too many residues with missing atoms: %.2f" % ratio)
112
+ ratio = "%.2f" % (len(fixer.missingAtoms) / fixer.topology.getNumResidues(),)
115
113
 
116
114
  ######################################################
117
115
  # replace non-standard residues
@@ -166,73 +164,94 @@ def clean_structure(input_file: Union[str, pathlib.Path],
166
164
  msg_str = "Finished"
167
165
  except Exception as e:
168
166
  msg_str = str(e)
167
+ ratio = "*"
168
+ else:
169
+ if clean_connect:
170
+ output_lines = []
171
+ with open(output_file, "r") as in_handle:
172
+ for line in in_handle:
173
+ if not re.match("CONECT", line):
174
+ output_lines.append(line)
175
+ with open(output_file, "w") as out_handle:
176
+ print("".join(output_lines), file=out_handle)
169
177
 
170
- return dict(input=input_file, msg=msg_str)
178
+ return dict(input=input_file, msg=msg_str, res_ratio_with_missing_atoms=ratio)
171
179
 
172
180
 
173
181
  @typechecked
174
- def move_with_overwrite(src_folder: str, dst_folder: str, filename: str):
175
- assert os.path.isdir(src_folder)
176
- assert os.path.isdir(dst_folder)
177
-
178
- src_path = os.path.join(src_folder, filename)
179
- dst_path = os.path.join(dst_folder, filename)
180
-
181
- if os.path.exists(dst_path):
182
- os.remove(dst_path)
183
- shutil.move(src_path, dst_folder)
184
-
182
+ def repair_structure(input_file: str,
183
+ output_file: str,
184
+ complex_with_dna: bool = False,
185
+ complex_with_rna: bool = False,
186
+ timeout: Union[int, float] = 3600):
187
+ """
185
188
 
186
- @typechecked
187
- def repair_structure(input_file: Union[str, pathlib.Path],
188
- out_dir: Union[str, pathlib.Path],
189
- temp_dir: Union[str, pathlib.Path],
190
- foldx_path: Optional[str] = None,
191
- timeout=3600):
189
+ :param input_file: .pdb or .cif or .pdb.gz or .cif.gz
190
+ :param output_file: .pdb file
191
+ :param complex_with_dna: bool, default False, not debug yet
192
+ :param complex_with_rna: bool, default False, not debug yet
193
+ :param timeout: float or int
194
+ :return:
195
+ """
196
+ ############################################################
197
+ # Check and convert input_file to .pdb if not
198
+ ############################################################
199
+ assert os.path.splitext(output_file)[1] == ".pdb", "output_file Not .pdb: %s" % output_file
200
+ assert _is_cif(input_file) or _is_pdb(input_file), "Not .pdb or .cif or .pdb.gz or .cif.gz: %s" % input_file
201
+
202
+ # input_file and output_file can't be the same path
203
+ p_in = pathlib.Path(input_file).expanduser().resolve()
204
+ p_out = pathlib.Path(output_file).expanduser().resolve()
205
+
206
+ assert str(p_in) != str(p_out), "input_file and output_file can't be the same path"
207
+ ############################################################
208
+ # Config Path
209
+ ############################################################
210
+ file_name = os.path.basename(input_file)
211
+ stem_name = os.path.splitext(file_name)[0]
212
+
213
+ in_dir = os.path.dirname(input_file)
214
+ out_dir = os.path.dirname(output_file)
192
215
  if not os.path.isdir(out_dir):
193
- raise NotADirectoryError(out_dir)
194
-
195
- if not os.path.isdir(temp_dir):
196
- raise NotADirectoryError(temp_dir)
197
-
198
- in_path = pathlib.Path(input_file).expanduser().resolve()
199
- pdb_dir = str(in_path.parent)
200
- pdb_file = str(in_path.name)
201
- if not os.path.isfile(input_file):
202
- raise FileNotFoundError(input_file)
203
-
204
- assert pdb_dir != str(out_dir), "output directory can't be the directory of input_file"
205
-
206
- stem_name = in_path.stem
207
-
208
- # create temp dir
209
- sub_temp_dir = os.path.join(temp_dir, "%s_%s" % (stem_name, str(uuid.uuid4())))
216
+ os.makedirs(out_dir)
217
+
218
+ temp_dir = os.path.join(out_dir, "_RepairTemp_%s" % str(uuid.uuid4()))
219
+ if os.path.isdir(temp_dir):
220
+ shutil.rmtree(temp_dir)
221
+ os.makedirs(temp_dir)
222
+
223
+ if os.path.splitext(input_file)[1] != ".pdb":
224
+ # convert to .pdb
225
+ st = StructureParser()
226
+ st.load_from_file(input_file)
227
+ file_name_r = "%s.pdb" % stem_name
228
+ in_dir_r = temp_dir
229
+ st.to_pdb(os.path.join(in_dir_r, file_name_r))
230
+ else:
231
+ file_name_r = file_name
232
+ in_dir_r = in_dir
210
233
 
211
- if os.path.isdir(sub_temp_dir):
212
- shutil.rmtree(sub_temp_dir)
234
+ foldx_path = shutil.which("foldx")
235
+ if foldx_path is None:
236
+ raise RuntimeError("path of foldx is not set or found in PATH")
213
237
 
214
- os.makedirs(sub_temp_dir)
238
+ cwd_dir = os.getcwd()
215
239
 
216
- if foldx_path is None:
217
- foldx_path = shutil.which("foldx")
240
+ repair_cmd = [foldx_path,
241
+ "-c RepairPDB",
242
+ "--pdb %s" % file_name_r,
243
+ "--pdb-dir %s" % in_dir_r,
244
+ "--output-dir %s" % temp_dir
245
+ ]
246
+ if complex_with_dna:
247
+ repair_cmd.append("--complexWithDNA true")
218
248
 
219
- if foldx_path is None:
220
- raise RuntimeError("path of foldx is not set or found in PATH")
249
+ if complex_with_rna:
250
+ repair_cmd.append("--complexWithRNA true")
221
251
 
222
- old_dir = os.getcwd()
223
- command_settings = ["cd %s" % sub_temp_dir,
224
- "&&",
225
- foldx_path,
226
- "-c RepairPDB",
227
- "--pdb %s" % pdb_file,
228
- "--pdb-dir %s" % pdb_dir,
229
- "--output-dir %s" % sub_temp_dir,
230
- "&&",
231
- "cd %s" % old_dir
232
- ]
252
+ command_settings = ["cd %s &&" % temp_dir] + repair_cmd + ["&& cd %s" % cwd_dir]
233
253
 
234
254
  start = time.time()
235
-
236
255
  try:
237
256
  result = subprocess.run(" ".join(command_settings), shell=True, check=True,
238
257
  stdout=subprocess.PIPE, stderr=subprocess.PIPE,
@@ -241,22 +260,19 @@ def repair_structure(input_file: Union[str, pathlib.Path],
241
260
  if result.returncode == 0:
242
261
  msg_str = "Finished"
243
262
  else:
244
- msg_str = result.stderr
245
-
246
- result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
247
- fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
248
- if os.path.exists(result_file) and os.path.exists(fxout_file):
249
- move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
250
- move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
251
- except subprocess.CalledProcessError as e:
252
- # Handle errors in the called executable
253
- msg_str = e.stderr
263
+ msg_str = str(result.stderr)
254
264
  except Exception as e:
255
- # Handle other exceptions such as file not found or permissions issues
256
- msg_str = str(e).encode()
265
+ msg_str = str(e)
266
+ else:
267
+ if msg_str == "Finished":
268
+ # just keep .pdb, ignore .fxout
269
+ result_file = os.path.join(temp_dir, "%s_Repair.pdb" % stem_name)
270
+ if os.path.exists(result_file):
271
+ shutil.move(result_file, output_file)
272
+ else:
273
+ msg_str = "result file not found"
257
274
  finally:
258
- # clean sub temp
259
- if os.path.isdir(sub_temp_dir):
260
- shutil.rmtree(sub_temp_dir)
275
+ if os.path.isdir(temp_dir):
276
+ shutil.rmtree(temp_dir)
261
277
  end = time.time()
262
- return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
278
+ return dict(input=input_file, output=output_file, msg=msg_str, use_time=round(end - start, 1))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
@@ -11,10 +11,10 @@ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR5
11
11
  gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
12
12
  gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
13
13
  gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
14
- gemmi_protools/utils/fixer.py,sha256=oiNtLCjQLFk4JHeO4exEkMH8QoEiKsJZIJRebkGGfBM,9545
14
+ gemmi_protools/utils/fixer.py,sha256=RDmpoZpTrGdwuJQTTK1eif132MHV1I-T6Nt47ezgxTM,10236
15
15
  gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
16
- gemmi_protools-0.1.8.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
- gemmi_protools-0.1.8.dist-info/METADATA,sha256=j0xy5aqbEvdlqoNrEdCSiBpmLVOpe3URit-85PBWa1s,567
18
- gemmi_protools-0.1.8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
19
- gemmi_protools-0.1.8.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
- gemmi_protools-0.1.8.dist-info/RECORD,,
16
+ gemmi_protools-0.1.9.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
+ gemmi_protools-0.1.9.dist-info/METADATA,sha256=OFqx1MONnQTZ7b65AspnNX0FIvtrE2gQVWT7yCTGhqE,567
18
+ gemmi_protools-0.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
+ gemmi_protools-0.1.9.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
+ gemmi_protools-0.1.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5