gemmi-protools 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/reader.py +59 -5
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.3.dist-info}/METADATA +1 -1
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.3.dist-info}/RECORD +6 -6
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.3.dist-info}/WHEEL +0 -0
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.3.dist-info}/top_level.txt +0 -0
gemmi_protools/reader.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
@Author: Luo Jiejian
|
|
3
3
|
"""
|
|
4
4
|
import pathlib
|
|
5
|
-
import re
|
|
6
5
|
import string
|
|
7
6
|
import warnings
|
|
8
7
|
from copy import deepcopy
|
|
@@ -10,6 +9,7 @@ from typing import Union, Optional, List
|
|
|
10
9
|
|
|
11
10
|
import gemmi
|
|
12
11
|
import numpy as np
|
|
12
|
+
from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
|
|
13
13
|
from typeguard import typechecked
|
|
14
14
|
|
|
15
15
|
from .cif_opts import _cif_block_for_output, _is_cif
|
|
@@ -19,6 +19,10 @@ from .parser import (_assign_digital_entity_names, _ent_from_structure,
|
|
|
19
19
|
from .pdb_opts import _compound_source_string, _is_pdb
|
|
20
20
|
from .struct_info import Info
|
|
21
21
|
|
|
22
|
+
nucleic_letters_3to1_extended["DN"] = "N"
|
|
23
|
+
nucleic_letters_3to1_extended["N"] = "N"
|
|
24
|
+
protein_letters_3to1_extended["UNK"] = "X"
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
class StructureParser(object):
|
|
24
28
|
"""
|
|
@@ -42,6 +46,27 @@ class StructureParser(object):
|
|
|
42
46
|
self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
|
|
43
47
|
self.ENTITY = _ent_from_structure(self.STRUCT)
|
|
44
48
|
self.update_entity()
|
|
49
|
+
self.update_full_sequences()
|
|
50
|
+
|
|
51
|
+
def update_full_sequences(self):
|
|
52
|
+
for ent_idx, ent in enumerate(self.STRUCT.entities):
|
|
53
|
+
# get full sequence
|
|
54
|
+
full_seq = ent.full_sequence
|
|
55
|
+
|
|
56
|
+
# when missing, construct from Residues
|
|
57
|
+
if not full_seq:
|
|
58
|
+
sel_ch_id = None
|
|
59
|
+
sel_ch_len = 0
|
|
60
|
+
for ch_id, ent_id in self.ENTITY.polymer2eid.items():
|
|
61
|
+
if ent_id == ent.name:
|
|
62
|
+
cur_len = len(self.polymer_sequences[ch_id])
|
|
63
|
+
if cur_len > sel_ch_len:
|
|
64
|
+
sel_ch_id = ch_id
|
|
65
|
+
sel_ch_len = cur_len
|
|
66
|
+
|
|
67
|
+
if sel_ch_id is not None and sel_ch_len > 0:
|
|
68
|
+
full_seq = [r.name for r in self.STRUCT[0][sel_ch_id].get_polymer() if not r.is_water()]
|
|
69
|
+
self.STRUCT.entities[ent_idx].full_sequence = full_seq
|
|
45
70
|
|
|
46
71
|
@typechecked
|
|
47
72
|
def load_from_file(self, path: Union[str, pathlib.PosixPath]):
|
|
@@ -56,6 +81,7 @@ class StructureParser(object):
|
|
|
56
81
|
self.STRUCT, self.ENTITY = struct, entity
|
|
57
82
|
self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
|
|
58
83
|
self.update_entity()
|
|
84
|
+
self.update_full_sequences()
|
|
59
85
|
|
|
60
86
|
@typechecked
|
|
61
87
|
def to_pdb(self, outfile: str, write_minimal_pdb=False):
|
|
@@ -131,7 +157,7 @@ class StructureParser(object):
|
|
|
131
157
|
for chain in model:
|
|
132
158
|
ct = cts.get(chain.name, "other")
|
|
133
159
|
if ct != "other":
|
|
134
|
-
out[chain.name] =
|
|
160
|
+
out[chain.name] = self.make_one_letter_sequence(chain.name)
|
|
135
161
|
return out
|
|
136
162
|
|
|
137
163
|
@property
|
|
@@ -152,12 +178,28 @@ class StructureParser(object):
|
|
|
152
178
|
for r in chain.get_polymer()], dtype=id_type)
|
|
153
179
|
return out
|
|
154
180
|
|
|
155
|
-
|
|
156
|
-
|
|
181
|
+
def chain_residues(self, polymer_only=True, with_water=False):
|
|
182
|
+
"""
|
|
183
|
+
:param polymer_only, bool
|
|
184
|
+
:param with_water:
|
|
185
|
+
:return: dict of Three-letter codes of chain residues
|
|
186
|
+
"""
|
|
187
|
+
|
|
157
188
|
out = dict()
|
|
158
189
|
for model in self.STRUCT:
|
|
159
190
|
for chain in model:
|
|
160
|
-
|
|
191
|
+
res_codes = []
|
|
192
|
+
for r in chain:
|
|
193
|
+
if r.is_water():
|
|
194
|
+
if with_water:
|
|
195
|
+
res_codes.append(r.name)
|
|
196
|
+
else:
|
|
197
|
+
if polymer_only:
|
|
198
|
+
if r.entity_type.name == "Polymer":
|
|
199
|
+
res_codes.append(r.name)
|
|
200
|
+
else:
|
|
201
|
+
res_codes.append(r.name)
|
|
202
|
+
out[chain.name] = res_codes
|
|
161
203
|
return out
|
|
162
204
|
|
|
163
205
|
def update_entity(self):
|
|
@@ -369,3 +411,15 @@ class StructureParser(object):
|
|
|
369
411
|
return np.array(coord, dtype=np.float32), np.array(atom_id, dtype=id_type)
|
|
370
412
|
else:
|
|
371
413
|
return np.empty(shape=(0, 3), dtype=np.float32), np.array(atom_id, dtype=id_type)
|
|
414
|
+
|
|
415
|
+
def make_one_letter_sequence(self, chain_id):
|
|
416
|
+
c_type = self.chain_types[chain_id]
|
|
417
|
+
residues = self.chain_residues(polymer_only=True, with_water=False)[chain_id]
|
|
418
|
+
|
|
419
|
+
if c_type == "protein":
|
|
420
|
+
one_letter_code = "".join([protein_letters_3to1_extended.get(r, "X") for r in residues])
|
|
421
|
+
elif c_type in ["dna", "rna"]:
|
|
422
|
+
one_letter_code = "".join([nucleic_letters_3to1_extended.get(r, "N") for r in residues])
|
|
423
|
+
else:
|
|
424
|
+
one_letter_code = ""
|
|
425
|
+
return one_letter_code
|
|
@@ -7,10 +7,10 @@ gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_R
|
|
|
7
7
|
gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
|
|
8
8
|
gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
|
|
9
9
|
gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
|
|
10
|
-
gemmi_protools/reader.py,sha256=
|
|
10
|
+
gemmi_protools/reader.py,sha256=4xuK7UmJVc6Nt9YhYS1FPsi5PL4QwZHilk8s3V-jf0Y,15499
|
|
11
11
|
gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
|
|
12
|
-
gemmi_protools-0.1.
|
|
13
|
-
gemmi_protools-0.1.
|
|
14
|
-
gemmi_protools-0.1.
|
|
15
|
-
gemmi_protools-0.1.
|
|
16
|
-
gemmi_protools-0.1.
|
|
12
|
+
gemmi_protools-0.1.3.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
13
|
+
gemmi_protools-0.1.3.dist-info/METADATA,sha256=ymS32ZVWiYY7LDzrQkEqiatqY2qZBqenf3X0BvX5DT4,567
|
|
14
|
+
gemmi_protools-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
15
|
+
gemmi_protools-0.1.3.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
16
|
+
gemmi_protools-0.1.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|