gemmi-protools 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -1,8 +1,8 @@
1
1
  """
2
2
  @Author: Luo Jiejian
3
3
  """
4
- from .reader import StructureParser
5
- from .convert import gemmi2bio, bio2gemmi
6
- from .align import StructureAligner
7
- from .ppi import ppi_interface_residues
8
- from .dockq import dockq_score, dockq_score_interface
4
+ from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
5
+ from gemmi_protools.io.reader import StructureParser
6
+ from gemmi_protools.utils.align import StructureAligner
7
+ from gemmi_protools.utils.ppi import ppi_interface_residues
8
+ from gemmi_protools.utils.dockq import dockq_score, dockq_score_interface
@@ -0,0 +1,3 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
@@ -9,7 +9,7 @@ import gemmi
9
9
  import pandas as pd
10
10
  from typeguard import typechecked
11
11
 
12
- from .struct_info import Entity
12
+ from gemmi_protools.io.struct_info import Entity
13
13
 
14
14
 
15
15
  @typechecked
@@ -8,9 +8,9 @@ from typing import Union, Optional, Dict, List
8
8
  import gemmi
9
9
  from typeguard import typechecked
10
10
 
11
- from .cif_opts import _cif_entity_info, _is_cif, _get_cif_resolution
12
- from .pdb_opts import _pdb_entity_info, _is_pdb, _get_pdb_resolution
13
- from .struct_info import Entity
11
+ from gemmi_protools.io.cif_opts import _cif_entity_info, _is_cif, _get_cif_resolution
12
+ from gemmi_protools.io.pdb_opts import _pdb_entity_info, _is_pdb, _get_pdb_resolution
13
+ from gemmi_protools.io.struct_info import Entity
14
14
 
15
15
 
16
16
  @typechecked
@@ -134,14 +134,19 @@ def pdb_parser(path: Union[str, pathlib.Path]):
134
134
  block = struct.make_mmcif_block()
135
135
  ent_t = _cif_entity_info(block)
136
136
  rec = defaultdict(list)
137
+
137
138
  for cn, middle_eid in ent_t.polymer2eid.items():
138
139
  rec[middle_eid].append(cn)
139
140
 
140
141
  _mapper = _assign_digital_entity_names(struct)
141
142
  _mapper_n = dict()
143
+
142
144
  for middle_eid, new_eid in _mapper.items():
143
- old_eid = str(",".join(rec[middle_eid]))
144
- _mapper_n[old_eid] = new_eid
145
+ if middle_eid in rec:
146
+ mid = rec[middle_eid]
147
+ mid.sort()
148
+ old_eid = str(",".join(mid))
149
+ _mapper_n[old_eid] = new_eid
145
150
 
146
151
  if _mapper_n:
147
152
  _update_entity_names(ent_0, _mapper_n)
@@ -10,8 +10,8 @@ from typing import Dict, Union, List
10
10
 
11
11
  from typeguard import typechecked
12
12
 
13
- from .parse_pdb_header import _parse_pdb_header_list
14
- from .struct_info import Entity
13
+ from gemmi_protools.io.parse_pdb_header import _parse_pdb_header_list
14
+ from gemmi_protools.io.struct_info import Entity
15
15
 
16
16
 
17
17
  @typechecked
@@ -119,6 +119,8 @@ def _compound_source_string(entity: Entity) -> List[str]:
119
119
  entity2polymer[v].append(k)
120
120
  entity_labels = list(entity2polymer.keys())
121
121
  entity_labels.sort()
122
+ for v in entity2polymer.values():
123
+ v.sort()
122
124
 
123
125
  values = []
124
126
  for i, el in enumerate(entity_labels):
@@ -136,9 +138,9 @@ def _compound_source_string(entity: Entity) -> List[str]:
136
138
  compound_molecule = "COMPND {n_line:>3} MOLECULE: {molecule};"
137
139
  compound_chain = "COMPND {n_line:>3} CHAIN: {chain};"
138
140
 
139
- i = 0
141
+ i = 1
140
142
  for val in values:
141
- if i == 0:
143
+ if i == 1:
142
144
  outputs.append(compound_mol0.format(**val))
143
145
  i += 1
144
146
  for c_str in [compound_molecule, compound_chain]:
@@ -11,13 +11,13 @@ import gemmi
11
11
  import numpy as np
12
12
  from typeguard import typechecked
13
13
 
14
- from .cif_opts import _cif_block_for_output, _is_cif
15
- from .parser import (_assign_digital_entity_names, _ent_from_structure,
16
- pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
17
- _assert_unique_chain_names_in_models, get_assembly)
18
- from .pdb_opts import _compound_source_string, _is_pdb
19
- from .peptide import nucleic_3to1_mapper, protein_3to1_mapper
20
- from .struct_info import Info
14
+ from gemmi_protools.io.cif_opts import _cif_block_for_output, _is_cif
15
+ from gemmi_protools.io.parser import (_assign_digital_entity_names, _ent_from_structure,
16
+ pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
17
+ _assert_unique_chain_names_in_models, get_assembly)
18
+ from gemmi_protools.io.pdb_opts import _compound_source_string, _is_pdb
19
+ from gemmi_protools.io.peptide import nucleic_3to1_mapper, protein_3to1_mapper
20
+ from gemmi_protools.io.struct_info import Info
21
21
 
22
22
 
23
23
  class StructureParser(object):
@@ -419,3 +419,27 @@ class StructureParser(object):
419
419
  else:
420
420
  one_letter_code = ""
421
421
  return one_letter_code
422
+
423
+ def clean_structure(self, keep_ligand=True):
424
+ """
425
+ (1) remove_alternative_conformations
426
+ (2) remove_hydrogens
427
+ (3) remove_water
428
+ (4) remove_empty_chains
429
+
430
+ :return:
431
+ """
432
+ self.set_default_model()
433
+ self.STRUCT.remove_alternative_conformations()
434
+ self.STRUCT.remove_hydrogens()
435
+
436
+ if keep_ligand:
437
+ self.STRUCT.remove_waters()
438
+ else:
439
+ self.STRUCT.remove_ligands_and_waters()
440
+
441
+ self.STRUCT.remove_empty_chains()
442
+
443
+ # update information
444
+ self.update_entity()
445
+ self.update_full_sequences()
@@ -0,0 +1,3 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
@@ -15,8 +15,8 @@ import numpy as np
15
15
  from Bio.PDB import Superimposer
16
16
  from typeguard import typechecked
17
17
 
18
- from .convert import gemmi2bio, bio2gemmi
19
- from .reader import StructureParser
18
+ from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
19
+ from gemmi_protools.io.reader import StructureParser
20
20
 
21
21
 
22
22
  class StructureAligner(object):
@@ -13,7 +13,7 @@ from typing import Optional, Union
13
13
  import pandas as pd
14
14
  from typeguard import typechecked
15
15
 
16
- from .reader import StructureParser
16
+ from gemmi_protools.io.reader import StructureParser
17
17
 
18
18
 
19
19
  @typechecked
@@ -0,0 +1,231 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ @Date: 2025/1/21
4
+ """
5
+ import gzip
6
+ import io
7
+ import os
8
+ import pathlib
9
+ import shutil
10
+ import subprocess
11
+ import time
12
+ import uuid
13
+ from typing import Union, Optional
14
+
15
+ import pdbfixer
16
+ from openmm import app
17
+ from typeguard import typechecked
18
+
19
+ from gemmi_protools.io.cif_opts import _is_cif
20
+ from gemmi_protools.io.pdb_opts import _is_pdb
21
+
22
+
23
+ @typechecked
24
+ def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
25
+ cur_path = pathlib.Path(path)
26
+ if _is_pdb(path) or _is_cif(path):
27
+ s1 = cur_path.suffixes[-1]
28
+ s2 = "".join(cur_path.suffixes[-2:])
29
+
30
+ if s1 in [".pdb", ".cif"]:
31
+ # s1 suffix
32
+ fixer = pdbfixer.PDBFixer(filename=path)
33
+ else:
34
+ # s2 suffix
35
+ with gzip.open(path, "rb") as gz_handle:
36
+ with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
37
+ if s2 == ".pdb.gz":
38
+ fixer = pdbfixer.PDBFixer(pdbfile=text_io)
39
+ else:
40
+ fixer = pdbfixer.PDBFixer(pdbxfile=text_io)
41
+ else:
42
+ raise ValueError("Only support .cif, .cif.gz, .pdb or .pdb.gz file, but got %s" % path)
43
+ return fixer
44
+
45
+
46
+ @typechecked
47
+ def clean_structure(input_file: Union[str, pathlib.Path],
48
+ output_file: Union[str, pathlib.Path],
49
+ add_missing_residue: bool = False,
50
+ add_missing_atoms: str = "heavy",
51
+ keep_heterogens: str = "all",
52
+ replace_nonstandard: bool = True,
53
+ ph: Union[float, int] = 7.0
54
+ ):
55
+ """
56
+
57
+ :param input_file: str, Input structure file, support file format .cif, .cif.gz, .pdb or .pdb.gz
58
+ :param output_file: str, Output structure file, support file format .cif, .pdb
59
+ :param add_missing_residue: default False
60
+ :param add_missing_atoms: default heavy, accepted values 'all', 'heavy', 'hydrogen', 'none'
61
+ all: add missing heavy and hydrogen atoms
62
+ heavy: add missing heavy atoms only
63
+ hydrogen: add missing hydrogen atoms only
64
+ none: not add missing atoms
65
+
66
+ :param keep_heterogens: default all, accepted values 'all', 'water', 'none'
67
+ all: keep all heterogens
68
+ water: only keep water
69
+ none: remove all heterogens
70
+ :param replace_nonstandard: default True, replace all non-standard residues to standard ones
71
+ :param ph: default 7.0, ph values to add missing hydrogen atoms
72
+ :return:
73
+ str, status message of fixing
74
+ if successful, return Finish, otherwise message of error
75
+ """
76
+ assert add_missing_atoms in ['all', 'heavy', 'hydrogen', 'none']
77
+ assert keep_heterogens in ['all', 'water', 'none']
78
+
79
+ try:
80
+ ######################################################
81
+ # load structure
82
+ ######################################################
83
+ fixer = _load_by_pbdfixer(input_file)
84
+
85
+ ######################################################
86
+ # replace non-standard residues
87
+ ######################################################
88
+ if replace_nonstandard:
89
+ fixer.findNonstandardResidues()
90
+ fixer.replaceNonstandardResidues()
91
+
92
+ ######################################################
93
+ # remove heterogens
94
+ ######################################################
95
+ if keep_heterogens == 'none':
96
+ fixer.removeHeterogens(keepWater=False)
97
+ elif keep_heterogens == 'water':
98
+ fixer.removeHeterogens(keepWater=True)
99
+
100
+ ######################################################
101
+ # missing residue
102
+ ######################################################
103
+ if add_missing_residue:
104
+ fixer.findMissingResidues()
105
+ else:
106
+ fixer.missingResidues = {}
107
+
108
+ ######################################################
109
+ # missing atoms
110
+ ######################################################
111
+ fixer.findMissingAtoms()
112
+ if add_missing_atoms not in ['all', 'heavy']:
113
+ fixer.missingAtoms = {}
114
+ fixer.missingTerminals = {}
115
+ fixer.addMissingAtoms()
116
+ if add_missing_atoms in ['all', 'hydrogen']:
117
+ fixer.addMissingHydrogens(ph)
118
+
119
+ ######################################################
120
+ # output
121
+ ######################################################
122
+ out_dir = os.path.dirname(output_file)
123
+ if not os.path.isdir(out_dir):
124
+ os.makedirs(out_dir)
125
+
126
+ suffix = pathlib.Path(output_file).suffix
127
+ assert suffix in [".pdb", ".cif"], "output file must be .cif or .pdb"
128
+
129
+ with open(output_file, 'w') as out_handle:
130
+ if suffix == ".pdb":
131
+ app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
132
+ else:
133
+ app.PDBxFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
134
+
135
+ msg_str = "Finished"
136
+ except Exception as e:
137
+ msg_str = str(e)
138
+
139
+ return dict(input=input_file, msg=msg_str)
140
+
141
+
142
+ @typechecked
143
+ def move_with_overwrite(src_folder: str, dst_folder: str, filename: str):
144
+ assert os.path.isdir(src_folder)
145
+ assert os.path.isdir(dst_folder)
146
+
147
+ src_path = os.path.join(src_folder, filename)
148
+ dst_path = os.path.join(dst_folder, filename)
149
+
150
+ if os.path.exists(dst_path):
151
+ os.remove(dst_path)
152
+ shutil.move(src_path, dst_folder)
153
+
154
+
155
+ @typechecked
156
+ def repair_structure(input_file: Union[str, pathlib.Path],
157
+ out_dir: Union[str, pathlib.Path],
158
+ temp_dir: Union[str, pathlib.Path],
159
+ foldx_path: Optional[str] = None,
160
+ timeout=3600):
161
+ if not os.path.isdir(out_dir):
162
+ raise NotADirectoryError(out_dir)
163
+
164
+ if not os.path.isdir(temp_dir):
165
+ raise NotADirectoryError(temp_dir)
166
+
167
+ in_path = pathlib.Path(input_file).expanduser().resolve()
168
+ pdb_dir = str(in_path.parent)
169
+ pdb_file = str(in_path.name)
170
+ if not os.path.isfile(input_file):
171
+ raise FileNotFoundError(input_file)
172
+
173
+ assert pdb_dir != str(out_dir), "output directory can't be the directory of input_file"
174
+
175
+ stem_name = in_path.stem
176
+
177
+ # create temp dir
178
+ sub_temp_dir = os.path.join(temp_dir, "%s_%s" % (stem_name, str(uuid.uuid4())))
179
+
180
+ if os.path.isdir(sub_temp_dir):
181
+ shutil.rmtree(sub_temp_dir)
182
+
183
+ os.makedirs(sub_temp_dir)
184
+
185
+ if foldx_path is None:
186
+ foldx_path = shutil.which("foldx")
187
+
188
+ if foldx_path is None:
189
+ raise RuntimeError("path of foldx is not set or found in PATH")
190
+
191
+ old_dir = os.getcwd()
192
+ command_settings = ["cd %s" % sub_temp_dir,
193
+ "&&",
194
+ foldx_path,
195
+ "-c RepairPDB",
196
+ "--pdb %s" % pdb_file,
197
+ "--pdb-dir %s" % pdb_dir,
198
+ "--output-dir %s" % sub_temp_dir,
199
+ "&&",
200
+ "cd %s" % old_dir
201
+ ]
202
+
203
+ start = time.time()
204
+
205
+ try:
206
+ result = subprocess.run(" ".join(command_settings), shell=True, check=True,
207
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
208
+ timeout=timeout)
209
+ # Return a tuple of the file name and the stdout or stderr if command fails
210
+ if result.returncode == 0:
211
+ msg_str = "Finished"
212
+ else:
213
+ msg_str = result.stderr
214
+
215
+ result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
216
+ fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
217
+ if os.path.exists(result_file) and os.path.exists(fxout_file):
218
+ move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
219
+ move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
220
+ except subprocess.CalledProcessError as e:
221
+ # Handle errors in the called executable
222
+ msg_str = e.stderr
223
+ except Exception as e:
224
+ # Handle other exceptions such as file not found or permissions issues
225
+ msg_str = str(e).encode()
226
+ finally:
227
+ # clean sub temp
228
+ if os.path.isdir(sub_temp_dir):
229
+ shutil.rmtree(sub_temp_dir)
230
+ end = time.time()
231
+ return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
@@ -7,7 +7,7 @@ from typing import Union, List
7
7
  import numpy as np
8
8
  from scipy.spatial import cKDTree
9
9
 
10
- from .reader import StructureParser
10
+ from gemmi_protools.io.reader import StructureParser
11
11
 
12
12
 
13
13
  def _ppi_atoms(struct, chains):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
@@ -0,0 +1,20 @@
1
+ gemmi_protools/__init__.py,sha256=hwUw-EieCG0kwzHjTjzHF9Bc3D-J5R_l6G8PCcFegkw,331
2
+ gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
3
+ gemmi_protools/io/cif_opts.py,sha256=TKND91aRGB5hYNdTFElCKMGabCg4klLk_c1evC3WZuA,6368
4
+ gemmi_protools/io/convert.py,sha256=780sQcwhslUD4Hj5UZMVlQdbicniJ6jNjncTl_7jaMk,3841
5
+ gemmi_protools/io/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
6
+ gemmi_protools/io/parser.py,sha256=NCLc9IHH-tb2hIm0jZeKu2nVw2Isr-n-pH4l7JbKA5w,9130
7
+ gemmi_protools/io/pdb_opts.py,sha256=laUqxlecOe6goax12q8EJGZuZbHyIGsXVucMV3gVrgg,5741
8
+ gemmi_protools/io/peptide.py,sha256=a2wiEutJmvhl6gDCIzzqRCbmyknk2mwgy2FZ53lXclU,750
9
+ gemmi_protools/io/reader.py,sha256=oaJ5TTWLFCZ3tDq3R5dAyBqm4Q2sxmfl4D8cGvSOdl0,16060
10
+ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
11
+ gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
12
+ gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
13
+ gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
14
+ gemmi_protools/utils/fixer.py,sha256=f1MAeYkFukijaBwTdINO0733Qbe5B9xSgRCE69nHkN4,8396
15
+ gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
16
+ gemmi_protools-0.1.6.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
+ gemmi_protools-0.1.6.dist-info/METADATA,sha256=N8Os-XHbUKuRDyQw0Z3MjtYy5Z5fqLFpP1sxEZnoEbU,567
18
+ gemmi_protools-0.1.6.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
19
+ gemmi_protools-0.1.6.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
+ gemmi_protools-0.1.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- gemmi_protools/__init__.py,sha256=SvBS-OBVhYsoHCWw4Rwp-6p8vaFhJC9FQMlx0vYpITQ,237
2
- gemmi_protools/align.py,sha256=LoN2xlZbvEwceQiz6F_VQBxlsNTKfGnJQ1LM937t1qw,6925
3
- gemmi_protools/cif_opts.py,sha256=bfJuUQSYzz_703cIgxgvEVxXalfVYEZwpVjuYEB9O2U,6351
4
- gemmi_protools/convert.py,sha256=780sQcwhslUD4Hj5UZMVlQdbicniJ6jNjncTl_7jaMk,3841
5
- gemmi_protools/dockq.py,sha256=JGPQ7Xs7gz9wubVVT9WSP5lZsLnfgcUH-_nLJ3c8I3U,4172
6
- gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
7
- gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
8
- gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
9
- gemmi_protools/peptide.py,sha256=a2wiEutJmvhl6gDCIzzqRCbmyknk2mwgy2FZ53lXclU,750
10
- gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
11
- gemmi_protools/reader.py,sha256=u7872K-XeUW_sr0E1SaQWrPVWm88jPBUN8LLzf0flM0,15321
12
- gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
13
- gemmi_protools-0.1.4.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
14
- gemmi_protools-0.1.4.dist-info/METADATA,sha256=aKz2slj7fXafD3SnNaI0apr7PIxK87FFXnnwHcCVfvw,567
15
- gemmi_protools-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
16
- gemmi_protools-0.1.4.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
17
- gemmi_protools-0.1.4.dist-info/RECORD,,
File without changes
File without changes
File without changes