gemmi-protools 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/io/reader.py +28 -24
- gemmi_protools/tools/align.py +1 -0
- gemmi_protools/tools/dockq.py +4 -3
- gemmi_protools/tools/mesh.py +116 -14
- gemmi_protools/tools/pdb_annot.py +1 -0
- {gemmi_protools-1.0.0.dist-info → gemmi_protools-1.0.1.dist-info}/METADATA +11 -6
- {gemmi_protools-1.0.0.dist-info → gemmi_protools-1.0.1.dist-info}/RECORD +10 -10
- {gemmi_protools-1.0.0.dist-info → gemmi_protools-1.0.1.dist-info}/WHEEL +0 -0
- {gemmi_protools-1.0.0.dist-info → gemmi_protools-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-1.0.0.dist-info → gemmi_protools-1.0.1.dist-info}/top_level.txt +0 -0
gemmi_protools/io/reader.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import gzip
|
|
2
|
+
import io
|
|
1
3
|
import itertools
|
|
2
4
|
import pathlib
|
|
3
5
|
import random
|
|
@@ -274,19 +276,7 @@ class StructureParser(object):
|
|
|
274
276
|
else:
|
|
275
277
|
raise ValueError("structure must be gemmi.Structure or None")
|
|
276
278
|
|
|
277
|
-
self.
|
|
278
|
-
self.STRUCT.assign_serial_numbers()
|
|
279
|
-
|
|
280
|
-
self.STRUCT.renumber_models()
|
|
281
|
-
if len(self.STRUCT) > 1:
|
|
282
|
-
for idx in range(1, len(self.STRUCT)):
|
|
283
|
-
del self.STRUCT[idx]
|
|
284
|
-
|
|
285
|
-
self.MODEL = self.STRUCT[0]
|
|
286
|
-
self.STRUCT.remove_alternative_conformations()
|
|
287
|
-
self.STRUCT.remove_hydrogens()
|
|
288
|
-
self.STRUCT.remove_empty_chains()
|
|
289
|
-
self._update_full_sequences()
|
|
279
|
+
self._init_struct()
|
|
290
280
|
|
|
291
281
|
info_map = dict(self.STRUCT.info)
|
|
292
282
|
pdb_code = info_map.get("_entry.id", "").lower()
|
|
@@ -300,6 +290,22 @@ class StructureParser(object):
|
|
|
300
290
|
)
|
|
301
291
|
self.update_entity()
|
|
302
292
|
|
|
293
|
+
def _init_struct(self):
|
|
294
|
+
self.STRUCT.setup_entities()
|
|
295
|
+
self.STRUCT.assign_serial_numbers()
|
|
296
|
+
self.STRUCT.renumber_models()
|
|
297
|
+
|
|
298
|
+
# keep the first model
|
|
299
|
+
if len(self.STRUCT) > 1:
|
|
300
|
+
for idx in reversed(list(range(1, len(self.STRUCT)))):
|
|
301
|
+
del self.STRUCT[idx]
|
|
302
|
+
|
|
303
|
+
self.MODEL = self.STRUCT[0]
|
|
304
|
+
self.STRUCT.remove_alternative_conformations()
|
|
305
|
+
self.STRUCT.remove_hydrogens()
|
|
306
|
+
self.STRUCT.remove_empty_chains()
|
|
307
|
+
self._update_full_sequences()
|
|
308
|
+
|
|
303
309
|
def load_from_file(self, path: str):
|
|
304
310
|
"""
|
|
305
311
|
Load model from file, default use the first model.
|
|
@@ -315,17 +321,7 @@ class StructureParser(object):
|
|
|
315
321
|
else:
|
|
316
322
|
raise ValueError("path must be files with suffixes [ .cif, .cif.gz, .pdb or .pdb.gz]")
|
|
317
323
|
|
|
318
|
-
|
|
319
|
-
self.STRUCT.renumber_models()
|
|
320
|
-
if len(self.STRUCT) > 1:
|
|
321
|
-
for idx in range(1, len(self.STRUCT)):
|
|
322
|
-
del self.STRUCT[idx]
|
|
323
|
-
|
|
324
|
-
self.MODEL = self.STRUCT[0]
|
|
325
|
-
self.STRUCT.remove_alternative_conformations()
|
|
326
|
-
self.STRUCT.remove_hydrogens()
|
|
327
|
-
self.STRUCT.remove_empty_chains()
|
|
328
|
-
self._update_full_sequences()
|
|
324
|
+
self._init_struct()
|
|
329
325
|
self.update_entity()
|
|
330
326
|
|
|
331
327
|
def _update_full_sequences(self):
|
|
@@ -390,6 +386,14 @@ class StructureParser(object):
|
|
|
390
386
|
|
|
391
387
|
return out
|
|
392
388
|
|
|
389
|
+
@property
|
|
390
|
+
def subchain_id_to_entity_id(self):
|
|
391
|
+
return {ch: ent.name for ent in self.STRUCT.entities for ch in ent.subchains}
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def subchain_id_to_chain_id(self):
|
|
395
|
+
return {sch.subchain_id(): chain.name for chain in self.MODEL for sch in chain.subchains()}
|
|
396
|
+
|
|
393
397
|
def get_chain(self, chain_id: str):
|
|
394
398
|
return self.MODEL[chain_id]
|
|
395
399
|
|
gemmi_protools/tools/align.py
CHANGED
gemmi_protools/tools/dockq.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import List, Tuple
|
|
|
11
11
|
|
|
12
12
|
import gemmi
|
|
13
13
|
import pandas as pd
|
|
14
|
+
|
|
14
15
|
from gemmi_protools.io.reader import StructureParser
|
|
15
16
|
|
|
16
17
|
|
|
@@ -22,8 +23,8 @@ def dockq_score_interface(query_model: str,
|
|
|
22
23
|
"""
|
|
23
24
|
Calculate Dockq Score for an interface (partner 1 vs partner 2)
|
|
24
25
|
|
|
25
|
-
:param query_model: str
|
|
26
|
-
|
|
26
|
+
:param query_model: str
|
|
27
|
+
path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
|
|
27
28
|
:param native_model:
|
|
28
29
|
:param partner_1_mapping: a list of chain ID mapping between query and native for partner1 of the interface
|
|
29
30
|
e.g. [(q chain1, n chain1), (q chain2, n chain2)]
|
|
@@ -124,4 +125,4 @@ def dockq_score_interface(query_model: str,
|
|
|
124
125
|
else:
|
|
125
126
|
score = ""
|
|
126
127
|
|
|
127
|
-
return score
|
|
128
|
+
return dict(score=score, status=msg)
|
gemmi_protools/tools/mesh.py
CHANGED
|
@@ -4,12 +4,15 @@
|
|
|
4
4
|
import os
|
|
5
5
|
import subprocess
|
|
6
6
|
import tempfile
|
|
7
|
-
from
|
|
7
|
+
from collections import defaultdict
|
|
8
|
+
from typing import List, Optional, Union
|
|
8
9
|
|
|
10
|
+
import freesasa
|
|
9
11
|
import numpy as np
|
|
10
12
|
import trimesh
|
|
11
13
|
from Bio.PDB import Selection
|
|
12
14
|
from Bio.PDB.ResidueDepth import _get_atom_radius, _read_vertex_array
|
|
15
|
+
|
|
13
16
|
from gemmi_protools import StructureParser
|
|
14
17
|
from gemmi_protools import gemmi2bio
|
|
15
18
|
|
|
@@ -39,20 +42,32 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
|
|
|
39
42
|
:return:
|
|
40
43
|
https://ccsb.scripps.edu/msms/downloads/
|
|
41
44
|
"""
|
|
45
|
+
xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
46
|
+
surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
47
|
+
msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
48
|
+
face_file = surface_tmp + ".face"
|
|
49
|
+
surface_file = surface_tmp + ".vert"
|
|
42
50
|
|
|
43
51
|
try:
|
|
44
52
|
st = StructureParser()
|
|
45
53
|
st.load_from_file(struct_file)
|
|
46
54
|
st.clean_structure(remove_ligand=True)
|
|
47
55
|
|
|
48
|
-
|
|
56
|
+
if chains is None:
|
|
57
|
+
st_p = st
|
|
58
|
+
else:
|
|
59
|
+
for ch in chains:
|
|
60
|
+
if ch not in st.chain_ids:
|
|
61
|
+
raise ValueError("Chain %s not found (only [%s])" % (ch, " ".join(st.chain_ids)))
|
|
62
|
+
st_p = st.pick_chains(chains)
|
|
63
|
+
|
|
64
|
+
bio_st = gemmi2bio(st_p.STRUCT)
|
|
49
65
|
model = bio_st[0]
|
|
50
66
|
|
|
51
67
|
# Replace pdb_to_xyzr
|
|
52
68
|
# Make x,y,z,radius file
|
|
53
69
|
atom_list = Selection.unfold_entities(model, "A")
|
|
54
70
|
|
|
55
|
-
xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
56
71
|
with open(xyz_tmp, "w") as pdb_to_xyzr:
|
|
57
72
|
for atom in atom_list:
|
|
58
73
|
x, y, z = atom.coord
|
|
@@ -60,13 +75,9 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
|
|
|
60
75
|
pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
|
|
61
76
|
|
|
62
77
|
# Make surface
|
|
63
|
-
surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
64
|
-
msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
|
|
65
78
|
MSMS = MSMS + " -no_header -probe_radius 1.5 -if %s -of %s > " + msms_tmp
|
|
66
79
|
make_surface = MSMS % (xyz_tmp, surface_tmp)
|
|
67
80
|
subprocess.call(make_surface, shell=True)
|
|
68
|
-
face_file = surface_tmp + ".face"
|
|
69
|
-
surface_file = surface_tmp + ".vert"
|
|
70
81
|
if not os.path.isfile(surface_file):
|
|
71
82
|
raise RuntimeError(
|
|
72
83
|
f"Failed to generate surface file using command:\n{make_surface}"
|
|
@@ -84,12 +95,103 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
|
|
|
84
95
|
mesh.update_faces(mesh.unique_faces())
|
|
85
96
|
mesh.update_faces(mesh.nondegenerate_faces())
|
|
86
97
|
mesh.remove_unreferenced_vertices()
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
98
|
+
|
|
99
|
+
# Remove temporary files
|
|
100
|
+
for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
|
|
101
|
+
try:
|
|
102
|
+
os.remove(fn)
|
|
103
|
+
except OSError:
|
|
104
|
+
pass
|
|
94
105
|
|
|
95
106
|
return mesh
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_surface_residues(struct_file: str,
|
|
110
|
+
chains: Optional[List[str]] = None,
|
|
111
|
+
relative_sasa_cutoff: Union[int, float] = 0.15):
|
|
112
|
+
####################
|
|
113
|
+
# check and pick
|
|
114
|
+
####################
|
|
115
|
+
st = StructureParser()
|
|
116
|
+
st.load_from_file(struct_file)
|
|
117
|
+
st.clean_structure()
|
|
118
|
+
|
|
119
|
+
if chains is None:
|
|
120
|
+
chains = st.chain_ids
|
|
121
|
+
|
|
122
|
+
if isinstance(chains, list):
|
|
123
|
+
if len(chains) == 0:
|
|
124
|
+
raise ValueError("chains is not set")
|
|
125
|
+
else:
|
|
126
|
+
# check if chains valid
|
|
127
|
+
for ch in chains:
|
|
128
|
+
if ch not in st.chain_ids:
|
|
129
|
+
raise ValueError("Chain %s not found" % ch)
|
|
130
|
+
|
|
131
|
+
st_p = st.pick_chains(chains)
|
|
132
|
+
# sequences = {k: s.replace("-", "").upper() for k, s in st_p.polymer_sequences().items()}
|
|
133
|
+
|
|
134
|
+
# start from 1
|
|
135
|
+
seq_num_mapper = dict()
|
|
136
|
+
for chain in st_p.MODEL:
|
|
137
|
+
for i, res in enumerate(chain):
|
|
138
|
+
key = (chain.name, str(res.seqid.num) + res.seqid.icode.strip(), res.name)
|
|
139
|
+
seq_num_mapper[key] = i + 1
|
|
140
|
+
|
|
141
|
+
# make one upper letter chain ID
|
|
142
|
+
mapper = st_p.make_one_letter_chain(only_uppercase=True)
|
|
143
|
+
mapper_r = {v: k for k, v in mapper.items()}
|
|
144
|
+
|
|
145
|
+
####################
|
|
146
|
+
# save to pdb
|
|
147
|
+
####################
|
|
148
|
+
with tempfile.NamedTemporaryFile(delete=True, suffix=".pdb", mode='w') as tmp_file:
|
|
149
|
+
st_p.to_pdb(tmp_file.name)
|
|
150
|
+
structure = freesasa.Structure(tmp_file.name)
|
|
151
|
+
|
|
152
|
+
result = freesasa.calc(structure)
|
|
153
|
+
|
|
154
|
+
residue_areas = result.residueAreas()
|
|
155
|
+
|
|
156
|
+
surface_residues_relative_sasa = dict()
|
|
157
|
+
surface_atoms = defaultdict(list)
|
|
158
|
+
for atom_index in range(structure.nAtoms()):
|
|
159
|
+
ch = structure.chainLabel(atom_index)
|
|
160
|
+
ch = mapper_r.get(ch, ch)
|
|
161
|
+
|
|
162
|
+
res_num = structure.residueNumber(atom_index).strip()
|
|
163
|
+
res_name = structure.residueName(atom_index)
|
|
164
|
+
atom_sasa = result.atomArea(atom_index)
|
|
165
|
+
|
|
166
|
+
res_id = (ch, res_num, res_name)
|
|
167
|
+
res_relative_total = residue_areas[ch][res_num].relativeTotal
|
|
168
|
+
if res_relative_total > relative_sasa_cutoff:
|
|
169
|
+
if res_id not in surface_residues_relative_sasa:
|
|
170
|
+
surface_residues_relative_sasa[res_id] = res_relative_total
|
|
171
|
+
if atom_sasa > 0:
|
|
172
|
+
atom_name = structure.atomName(atom_index).strip()
|
|
173
|
+
pos = structure.coord(atom_index)
|
|
174
|
+
surface_atoms[res_id].append((atom_sasa, atom_name, pos))
|
|
175
|
+
|
|
176
|
+
results = []
|
|
177
|
+
for res_id, query_atoms in surface_atoms.items():
|
|
178
|
+
seq_loc = seq_num_mapper[res_id]
|
|
179
|
+
|
|
180
|
+
query_atoms.sort(reverse=True)
|
|
181
|
+
centroid = tuple(np.array([a[2] for a in query_atoms[0:3]]).mean(axis=0).tolist())
|
|
182
|
+
results.append((res_id[0],
|
|
183
|
+
res_id[1],
|
|
184
|
+
res_id[2],
|
|
185
|
+
seq_loc,
|
|
186
|
+
centroid,
|
|
187
|
+
surface_residues_relative_sasa[res_id]
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
dtype = [("chain_name", "U5"),
|
|
191
|
+
("residue_numi", "U8"),
|
|
192
|
+
("residue_name", "U5"),
|
|
193
|
+
("sequential_residue_num", "i4"),
|
|
194
|
+
("centroid", ("f4", (3,))),
|
|
195
|
+
("relative_sasa", "f4"),
|
|
196
|
+
]
|
|
197
|
+
return np.array(results, dtype=dtype)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gemmi_protools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: An Enhanced tool to process PDB structures based on Gemmi
|
|
5
5
|
Author: Luo Jiejian
|
|
6
6
|
Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
|
|
@@ -16,6 +16,8 @@ Requires-Dist: typeguard
|
|
|
16
16
|
Requires-Dist: numpy
|
|
17
17
|
Requires-Dist: scipy
|
|
18
18
|
Requires-Dist: trimesh
|
|
19
|
+
Requires-Dist: joblib
|
|
20
|
+
Requires-Dist: rtree
|
|
19
21
|
Requires-Dist: freesasa==2.2.1
|
|
20
22
|
Dynamic: author
|
|
21
23
|
Dynamic: license-file
|
|
@@ -24,10 +26,8 @@ Dynamic: license-file
|
|
|
24
26
|
|
|
25
27
|
# Install
|
|
26
28
|
```commandline
|
|
27
|
-
|
|
28
|
-
conda install
|
|
29
|
-
conda install -n gemmi_protools dockq trimesh -c conda-forge
|
|
30
|
-
conda activate gemmi_protools
|
|
29
|
+
|
|
30
|
+
conda install python=3.12.9 anarci hmmer dockq trimesh rtree -c bioconda -c conda-forge
|
|
31
31
|
pip install gemmi_protools
|
|
32
32
|
```
|
|
33
33
|
|
|
@@ -36,6 +36,11 @@ pip install gemmi_protools
|
|
|
36
36
|
## read structures
|
|
37
37
|
```commandline
|
|
38
38
|
from gemmi_protools import StructureParser
|
|
39
|
+
|
|
40
|
+
# load structure
|
|
39
41
|
st=StructureParser()
|
|
40
|
-
st.load_from_file("
|
|
42
|
+
st.load_from_file("7mmo.cif")
|
|
43
|
+
|
|
44
|
+
# get chain IDs
|
|
45
|
+
print(st.chain_ids)
|
|
41
46
|
```
|
|
@@ -6,14 +6,14 @@ gemmi_protools/data/MHC/MHC_combined.hmm.h3m,sha256=CvNMCsobQiX-wL7iB4CreNcbpnEl
|
|
|
6
6
|
gemmi_protools/data/MHC/MHC_combined.hmm.h3p,sha256=-mK278pRedG3-KL-DtuVAQy7La9DgXg5FcP89D6X3Ck,78325
|
|
7
7
|
gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
8
8
|
gemmi_protools/io/convert.py,sha256=A1i1vPgxG1LqMSUvWtegLl9LipgUQbfmKeGJ_f00UYo,3781
|
|
9
|
-
gemmi_protools/io/reader.py,sha256=
|
|
9
|
+
gemmi_protools/io/reader.py,sha256=joQr_glerss3QcfIJGr0O6lw8Mc4N1-pVobMHqY1zi0,33255
|
|
10
10
|
gemmi_protools/tools/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
11
|
-
gemmi_protools/tools/align.py,sha256=
|
|
12
|
-
gemmi_protools/tools/dockq.py,sha256=
|
|
13
|
-
gemmi_protools/tools/mesh.py,sha256=
|
|
14
|
-
gemmi_protools/tools/pdb_annot.py,sha256=
|
|
15
|
-
gemmi_protools-1.0.
|
|
16
|
-
gemmi_protools-1.0.
|
|
17
|
-
gemmi_protools-1.0.
|
|
18
|
-
gemmi_protools-1.0.
|
|
19
|
-
gemmi_protools-1.0.
|
|
11
|
+
gemmi_protools/tools/align.py,sha256=oKHvpeDa62zEjLkPmuyBM6avYDl3HFeJVHeRX62I2f4,7085
|
|
12
|
+
gemmi_protools/tools/dockq.py,sha256=baCuO5-GZCwrlS59T5UIXogpM44OIFIfXqksqRBAb0A,4428
|
|
13
|
+
gemmi_protools/tools/mesh.py,sha256=73MuJYwS_ACJI15OsrooAAhB1Ti4fM8CJSBqFOBR7LU,6537
|
|
14
|
+
gemmi_protools/tools/pdb_annot.py,sha256=EzgcntlERR04TfN0dIhf_GM9UCXEvUaH60Xohmbx_do,8253
|
|
15
|
+
gemmi_protools-1.0.1.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
16
|
+
gemmi_protools-1.0.1.dist-info/METADATA,sha256=cdKO7zuEv4ZwwCrcBZcprXmDtLB4AbEFDKH887JRcTI,1034
|
|
17
|
+
gemmi_protools-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
gemmi_protools-1.0.1.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
19
|
+
gemmi_protools-1.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|