gemmi-protools 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -1,8 +1,8 @@
1
1
  """
2
2
  @Author: Luo Jiejian
3
3
  """
4
- from .reader import StructureParser
5
- from .convert import gemmi2bio, bio2gemmi
6
- from .align import StructureAligner
7
- from .ppi import ppi_interface_residues
8
- from .dockq import dockq_score, dockq_score_interface
4
+ from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
5
+ from gemmi_protools.io.reader import StructureParser
6
+ from gemmi_protools.utils.align import StructureAligner
7
+ from gemmi_protools.utils.ppi import ppi_interface_residues
8
+ from gemmi_protools.utils.dockq import dockq_score, dockq_score_interface
@@ -0,0 +1,3 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
@@ -9,7 +9,7 @@ import gemmi
9
9
  import pandas as pd
10
10
  from typeguard import typechecked
11
11
 
12
- from .struct_info import Entity
12
+ from gemmi_protools.io.struct_info import Entity
13
13
 
14
14
 
15
15
  @typechecked
@@ -8,9 +8,9 @@ from typing import Union, Optional, Dict, List
8
8
  import gemmi
9
9
  from typeguard import typechecked
10
10
 
11
- from .cif_opts import _cif_entity_info, _is_cif, _get_cif_resolution
12
- from .pdb_opts import _pdb_entity_info, _is_pdb, _get_pdb_resolution
13
- from .struct_info import Entity
11
+ from gemmi_protools.io.cif_opts import _cif_entity_info, _is_cif, _get_cif_resolution
12
+ from gemmi_protools.io.pdb_opts import _pdb_entity_info, _is_pdb, _get_pdb_resolution
13
+ from gemmi_protools.io.struct_info import Entity
14
14
 
15
15
 
16
16
  @typechecked
@@ -10,8 +10,8 @@ from typing import Dict, Union, List
10
10
 
11
11
  from typeguard import typechecked
12
12
 
13
- from .parse_pdb_header import _parse_pdb_header_list
14
- from .struct_info import Entity
13
+ from gemmi_protools.io.parse_pdb_header import _parse_pdb_header_list
14
+ from gemmi_protools.io.struct_info import Entity
15
15
 
16
16
 
17
17
  @typechecked
@@ -0,0 +1,32 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
4
+ from copy import deepcopy
5
+
6
+ from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
7
+
8
+
9
+ def strip_key_val(inputs):
10
+ outputs = dict()
11
+ for key, val in inputs.items():
12
+ outputs[key.strip()] = val.strip()
13
+ return outputs
14
+
15
+
16
+ def __nucleic_3to1_mapper():
17
+ mapper = deepcopy(nucleic_letters_3to1_extended)
18
+ mapper["DN"] = "N"
19
+ mapper["N"] = "N"
20
+ new_mapper = strip_key_val(mapper)
21
+ return new_mapper
22
+
23
+
24
+ def __protein_3to1_mapper():
25
+ mapper = deepcopy(protein_letters_3to1_extended)
26
+ mapper["UNK"] = "X"
27
+ new_mapper = strip_key_val(mapper)
28
+ return new_mapper
29
+
30
+
31
+ nucleic_3to1_mapper = __nucleic_3to1_mapper()
32
+ protein_3to1_mapper = __protein_3to1_mapper()
@@ -9,19 +9,15 @@ from typing import Union, Optional, List
9
9
 
10
10
  import gemmi
11
11
  import numpy as np
12
- from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
13
12
  from typeguard import typechecked
14
13
 
15
- from .cif_opts import _cif_block_for_output, _is_cif
16
- from .parser import (_assign_digital_entity_names, _ent_from_structure,
17
- pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
18
- _assert_unique_chain_names_in_models, get_assembly)
19
- from .pdb_opts import _compound_source_string, _is_pdb
20
- from .struct_info import Info
21
-
22
- nucleic_letters_3to1_extended["DN"] = "N"
23
- nucleic_letters_3to1_extended["N"] = "N"
24
- protein_letters_3to1_extended["UNK"] = "X"
14
+ from gemmi_protools.io.cif_opts import _cif_block_for_output, _is_cif
15
+ from gemmi_protools.io.parser import (_assign_digital_entity_names, _ent_from_structure,
16
+ pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
17
+ _assert_unique_chain_names_in_models, get_assembly)
18
+ from gemmi_protools.io.pdb_opts import _compound_source_string, _is_pdb
19
+ from gemmi_protools.io.peptide import nucleic_3to1_mapper, protein_3to1_mapper
20
+ from gemmi_protools.io.struct_info import Info
25
21
 
26
22
 
27
23
  class StructureParser(object):
@@ -417,9 +413,33 @@ class StructureParser(object):
417
413
  residues = self.chain_residues(polymer_only=True, with_water=False)[chain_id]
418
414
 
419
415
  if c_type == "protein":
420
- one_letter_code = "".join([protein_letters_3to1_extended.get(r, "X") for r in residues])
416
+ one_letter_code = "".join([protein_3to1_mapper.get(r, "X") for r in residues])
421
417
  elif c_type in ["dna", "rna"]:
422
- one_letter_code = "".join([nucleic_letters_3to1_extended.get(r, "N") for r in residues])
418
+ one_letter_code = "".join([nucleic_3to1_mapper.get(r, "N") for r in residues])
423
419
  else:
424
420
  one_letter_code = ""
425
421
  return one_letter_code
422
+
423
+ def clean_structure(self, keep_ligand=True):
424
+ """
425
+ (1) remove_alternative_conformations
426
+ (2) remove_hydrogens
427
+ (3) remove_water
428
+ (4) remove_empty_chains
429
+
430
+ :return:
431
+ """
432
+ self.set_default_model()
433
+ self.STRUCT.remove_alternative_conformations()
434
+ self.STRUCT.remove_hydrogens()
435
+
436
+ if keep_ligand:
437
+ self.STRUCT.remove_waters()
438
+ else:
439
+ self.STRUCT.remove_ligands_and_waters()
440
+
441
+ self.STRUCT.remove_empty_chains()
442
+
443
+ # update information
444
+ self.update_entity()
445
+ self.update_full_sequences()
@@ -0,0 +1,3 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
@@ -15,8 +15,8 @@ import numpy as np
15
15
  from Bio.PDB import Superimposer
16
16
  from typeguard import typechecked
17
17
 
18
- from .convert import gemmi2bio, bio2gemmi
19
- from .reader import StructureParser
18
+ from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
19
+ from gemmi_protools.io.reader import StructureParser
20
20
 
21
21
 
22
22
  class StructureAligner(object):
@@ -13,7 +13,7 @@ from typing import Optional, Union
13
13
  import pandas as pd
14
14
  from typeguard import typechecked
15
15
 
16
- from .reader import StructureParser
16
+ from gemmi_protools.io.reader import StructureParser
17
17
 
18
18
 
19
19
  @typechecked
@@ -0,0 +1,231 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ @Date: 2025/1/21
4
+ """
5
+ import gzip
6
+ import io
7
+ import os
8
+ import pathlib
9
+ import shutil
10
+ import subprocess
11
+ import time
12
+ import uuid
13
+ from typing import Union, Optional
14
+
15
+ import pdbfixer
16
+ from openmm import app
17
+ from typeguard import typechecked
18
+
19
+ from gemmi_protools.io.cif_opts import _is_cif
20
+ from gemmi_protools.io.pdb_opts import _is_pdb
21
+
22
+
23
+ @typechecked
24
+ def _load_by_pbdfixer(path: Union[str, pathlib.Path]) -> pdbfixer.PDBFixer:
25
+ cur_path = pathlib.Path(path)
26
+ if _is_pdb(path) or _is_cif(path):
27
+ s1 = cur_path.suffixes[-1]
28
+ s2 = "".join(cur_path.suffixes[-2:])
29
+
30
+ if s1 in [".pdb", ".cif"]:
31
+ # s1 suffix
32
+ fixer = pdbfixer.PDBFixer(filename=path)
33
+ else:
34
+ # s2 suffix
35
+ with gzip.open(path, "rb") as gz_handle:
36
+ with io.TextIOWrapper(gz_handle, encoding="utf-8") as text_io:
37
+ if s2 == ".pdb.gz":
38
+ fixer = pdbfixer.PDBFixer(pdbfile=text_io)
39
+ else:
40
+ fixer = pdbfixer.PDBFixer(pdbxfile=text_io)
41
+ else:
42
+ raise ValueError("Only support .cif, .cif.gz, .pdb or .pdb.gz file, but got %s" % path)
43
+ return fixer
44
+
45
+
46
+ @typechecked
47
+ def clean_structure(input_file: Union[str, pathlib.Path],
48
+ output_file: Union[str, pathlib.Path],
49
+ add_missing_residue: bool = False,
50
+ add_missing_atoms: str = "heavy",
51
+ keep_heterogens: str = "all",
52
+ replace_nonstandard: bool = True,
53
+ ph: Union[float, int] = 7.0
54
+ ):
55
+ """
56
+
57
+ :param input_file: str, Input structure file, support file format .cif, .cif.gz, .pdb or .pdb.gz
58
+ :param output_file: str, Output structure file, support file format .cif, .pdb
59
+ :param add_missing_residue: default False
60
+ :param add_missing_atoms: default heavy, accepted values 'all', 'heavy', 'hydrogen', 'none'
61
+ all: add missing heavy and hydrogen atoms
62
+ heavy: add missing heavy atoms only
63
+ hydrogen: add missing hydrogen atoms only
64
+ none: not add missing atoms
65
+
66
+ :param keep_heterogens: default all, accepted values 'all', 'water', 'none'
67
+ all: keep all heterogens
68
+ water: only keep water
69
+ none: remove all heterogens
70
+ :param replace_nonstandard: default True, replace all non-standard residues to standard ones
71
+ :param ph: default 7.0, ph values to add missing hydrogen atoms
72
+ :return:
73
+ str, status message of fixing
74
+ if successful, return Finish, otherwise message of error
75
+ """
76
+ assert add_missing_atoms in ['all', 'heavy', 'hydrogen', 'none']
77
+ assert keep_heterogens in ['all', 'water', 'none']
78
+
79
+ try:
80
+ ######################################################
81
+ # load structure
82
+ ######################################################
83
+ fixer = _load_by_pbdfixer(input_file)
84
+
85
+ ######################################################
86
+ # replace non-standard residues
87
+ ######################################################
88
+ if replace_nonstandard:
89
+ fixer.findNonstandardResidues()
90
+ fixer.replaceNonstandardResidues()
91
+
92
+ ######################################################
93
+ # remove heterogens
94
+ ######################################################
95
+ if keep_heterogens == 'none':
96
+ fixer.removeHeterogens(keepWater=False)
97
+ elif keep_heterogens == 'water':
98
+ fixer.removeHeterogens(keepWater=True)
99
+
100
+ ######################################################
101
+ # missing residue
102
+ ######################################################
103
+ if add_missing_residue:
104
+ fixer.findMissingResidues()
105
+ else:
106
+ fixer.missingResidues = {}
107
+
108
+ ######################################################
109
+ # missing atoms
110
+ ######################################################
111
+ fixer.findMissingAtoms()
112
+ if add_missing_atoms not in ['all', 'heavy']:
113
+ fixer.missingAtoms = {}
114
+ fixer.missingTerminals = {}
115
+ fixer.addMissingAtoms()
116
+ if add_missing_atoms in ['all', 'hydrogen']:
117
+ fixer.addMissingHydrogens(ph)
118
+
119
+ ######################################################
120
+ # output
121
+ ######################################################
122
+ out_dir = os.path.dirname(output_file)
123
+ if not os.path.isdir(out_dir):
124
+ os.makedirs(out_dir)
125
+
126
+ suffix = pathlib.Path(output_file).suffix
127
+ assert suffix in [".pdb", ".cif"], "output file must be .cif or .pdb"
128
+
129
+ with open(output_file, 'w') as out_handle:
130
+ if suffix == ".pdb":
131
+ app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
132
+ else:
133
+ app.PDBxFile.writeFile(fixer.topology, fixer.positions, out_handle, keepIds=True)
134
+
135
+ msg_str = "Finished"
136
+ except Exception as e:
137
+ msg_str = str(e)
138
+
139
+ return dict(input=input_file, msg=msg_str)
140
+
141
+
142
+ @typechecked
143
+ def move_with_overwrite(src_folder: str, dst_folder: str, filename: str):
144
+ assert os.path.isdir(src_folder)
145
+ assert os.path.isdir(dst_folder)
146
+
147
+ src_path = os.path.join(src_folder, filename)
148
+ dst_path = os.path.join(dst_folder, filename)
149
+
150
+ if os.path.exists(dst_path):
151
+ os.remove(dst_path)
152
+ shutil.move(src_path, dst_folder)
153
+
154
+
155
+ @typechecked
156
+ def repair_structure(input_file: Union[str, pathlib.Path],
157
+ out_dir: Union[str, pathlib.Path],
158
+ temp_dir: Union[str, pathlib.Path],
159
+ foldx_path: Optional[str] = None,
160
+ timeout=3600):
161
+ if not os.path.isdir(out_dir):
162
+ raise NotADirectoryError(out_dir)
163
+
164
+ if not os.path.isdir(temp_dir):
165
+ raise NotADirectoryError(temp_dir)
166
+
167
+ in_path = pathlib.Path(input_file).expanduser().resolve()
168
+ pdb_dir = str(in_path.parent)
169
+ pdb_file = str(in_path.name)
170
+ if not os.path.isfile(input_file):
171
+ raise FileNotFoundError(input_file)
172
+
173
+ assert pdb_dir != str(out_dir), "output directory can't be the directory of input_file"
174
+
175
+ stem_name = in_path.stem
176
+
177
+ # create temp dir
178
+ sub_temp_dir = os.path.join(temp_dir, "%s_%s" % (stem_name, str(uuid.uuid4())))
179
+
180
+ if os.path.isdir(sub_temp_dir):
181
+ shutil.rmtree(sub_temp_dir)
182
+
183
+ os.makedirs(sub_temp_dir)
184
+
185
+ if foldx_path is None:
186
+ foldx_path = shutil.which("foldx")
187
+
188
+ if foldx_path is None:
189
+ raise RuntimeError("path of foldx is not set or found in PATH")
190
+
191
+ old_dir = os.getcwd()
192
+ command_settings = ["cd %s" % sub_temp_dir,
193
+ "&&",
194
+ foldx_path,
195
+ "-c RepairPDB",
196
+ "--pdb %s" % pdb_file,
197
+ "--pdb-dir %s" % pdb_dir,
198
+ "--output-dir %s" % sub_temp_dir,
199
+ "&&",
200
+ "cd %s" % old_dir
201
+ ]
202
+
203
+ start = time.time()
204
+
205
+ try:
206
+ result = subprocess.run(" ".join(command_settings), shell=True, check=True,
207
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
208
+ timeout=timeout)
209
+ # Return a tuple of the file name and the stdout or stderr if command fails
210
+ if result.returncode == 0:
211
+ msg_str = "Finished"
212
+ else:
213
+ msg_str = result.stderr
214
+
215
+ result_file = os.path.join(sub_temp_dir, "%s_Repair.pdb" % stem_name)
216
+ fxout_file = os.path.join(sub_temp_dir, "%s_Repair.fxout" % stem_name)
217
+ if os.path.exists(result_file) and os.path.exists(fxout_file):
218
+ move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.pdb" % stem_name)
219
+ move_with_overwrite(sub_temp_dir, out_dir, "%s_Repair.fxout" % stem_name)
220
+ except subprocess.CalledProcessError as e:
221
+ # Handle errors in the called executable
222
+ msg_str = e.stderr
223
+ except Exception as e:
224
+ # Handle other exceptions such as file not found or permissions issues
225
+ msg_str = str(e).encode()
226
+ finally:
227
+ # clean sub temp
228
+ if os.path.isdir(sub_temp_dir):
229
+ shutil.rmtree(sub_temp_dir)
230
+ end = time.time()
231
+ return dict(input=input_file, msg=msg_str, use_time=round(end - start, 1))
@@ -7,7 +7,7 @@ from typing import Union, List
7
7
  import numpy as np
8
8
  from scipy.spatial import cKDTree
9
9
 
10
- from .reader import StructureParser
10
+ from gemmi_protools.io.reader import StructureParser
11
11
 
12
12
 
13
13
  def _ppi_atoms(struct, chains):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
@@ -0,0 +1,20 @@
1
+ gemmi_protools/__init__.py,sha256=hwUw-EieCG0kwzHjTjzHF9Bc3D-J5R_l6G8PCcFegkw,331
2
+ gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
3
+ gemmi_protools/io/cif_opts.py,sha256=TKND91aRGB5hYNdTFElCKMGabCg4klLk_c1evC3WZuA,6368
4
+ gemmi_protools/io/convert.py,sha256=780sQcwhslUD4Hj5UZMVlQdbicniJ6jNjncTl_7jaMk,3841
5
+ gemmi_protools/io/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
6
+ gemmi_protools/io/parser.py,sha256=lCeIzEnBhTl0AQPwa68PNrvj1d4vJdS99Mj-WypCvYo,9033
7
+ gemmi_protools/io/pdb_opts.py,sha256=VYIUsEGiFtbcK2eEUi4rsRKDCfMiV-WSXzz4zivxm7A,5686
8
+ gemmi_protools/io/peptide.py,sha256=a2wiEutJmvhl6gDCIzzqRCbmyknk2mwgy2FZ53lXclU,750
9
+ gemmi_protools/io/reader.py,sha256=oaJ5TTWLFCZ3tDq3R5dAyBqm4Q2sxmfl4D8cGvSOdl0,16060
10
+ gemmi_protools/io/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
11
+ gemmi_protools/utils/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
12
+ gemmi_protools/utils/align.py,sha256=CZcrvjy-ZbX2u7OAn-YGblbxaj9YFUDX4CFZcpbpnB8,6959
13
+ gemmi_protools/utils/dockq.py,sha256=XmMwVEy-H4p6sH_HPcDWA3TP77OWdih0fE_BQJDr4pU,4189
14
+ gemmi_protools/utils/fixer.py,sha256=f1MAeYkFukijaBwTdINO0733Qbe5B9xSgRCE69nHkN4,8396
15
+ gemmi_protools/utils/ppi.py,sha256=VWYsdxWwQoS1xwEYj5KB96Zz3F8r5Eyuw6NT3ReD-wc,2330
16
+ gemmi_protools-0.1.5.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
17
+ gemmi_protools-0.1.5.dist-info/METADATA,sha256=0IypVyv4jYIvts3I3uoQC5R_6dG8dooo1B7XC9MgavQ,567
18
+ gemmi_protools-0.1.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
19
+ gemmi_protools-0.1.5.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
20
+ gemmi_protools-0.1.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.7.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,16 +0,0 @@
1
- gemmi_protools/__init__.py,sha256=SvBS-OBVhYsoHCWw4Rwp-6p8vaFhJC9FQMlx0vYpITQ,237
2
- gemmi_protools/align.py,sha256=LoN2xlZbvEwceQiz6F_VQBxlsNTKfGnJQ1LM937t1qw,6925
3
- gemmi_protools/cif_opts.py,sha256=bfJuUQSYzz_703cIgxgvEVxXalfVYEZwpVjuYEB9O2U,6351
4
- gemmi_protools/convert.py,sha256=780sQcwhslUD4Hj5UZMVlQdbicniJ6jNjncTl_7jaMk,3841
5
- gemmi_protools/dockq.py,sha256=JGPQ7Xs7gz9wubVVT9WSP5lZsLnfgcUH-_nLJ3c8I3U,4172
6
- gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
7
- gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
8
- gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
9
- gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
10
- gemmi_protools/reader.py,sha256=4xuK7UmJVc6Nt9YhYS1FPsi5PL4QwZHilk8s3V-jf0Y,15499
11
- gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
12
- gemmi_protools-0.1.3.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
13
- gemmi_protools-0.1.3.dist-info/METADATA,sha256=ymS32ZVWiYY7LDzrQkEqiatqY2qZBqenf3X0BvX5DT4,567
14
- gemmi_protools-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
- gemmi_protools-0.1.3.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
16
- gemmi_protools-0.1.3.dist-info/RECORD,,
File without changes
File without changes