gemmi-protools 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/peptide.py +32 -0
- gemmi_protools/reader.py +55 -5
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.4.dist-info}/METADATA +1 -1
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.4.dist-info}/RECORD +7 -6
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.4.dist-info}/WHEEL +0 -0
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-0.1.2.dist-info → gemmi_protools-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""
|
|
2
|
+
@Author: Luo Jiejian
|
|
3
|
+
"""
|
|
4
|
+
from copy import deepcopy
|
|
5
|
+
|
|
6
|
+
from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def strip_key_val(inputs):
|
|
10
|
+
outputs = dict()
|
|
11
|
+
for key, val in inputs.items():
|
|
12
|
+
outputs[key.strip()] = val.strip()
|
|
13
|
+
return outputs
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def __nucleic_3to1_mapper():
|
|
17
|
+
mapper = deepcopy(nucleic_letters_3to1_extended)
|
|
18
|
+
mapper["DN"] = "N"
|
|
19
|
+
mapper["N"] = "N"
|
|
20
|
+
new_mapper = strip_key_val(mapper)
|
|
21
|
+
return new_mapper
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def __protein_3to1_mapper():
|
|
25
|
+
mapper = deepcopy(protein_letters_3to1_extended)
|
|
26
|
+
mapper["UNK"] = "X"
|
|
27
|
+
new_mapper = strip_key_val(mapper)
|
|
28
|
+
return new_mapper
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
nucleic_3to1_mapper = __nucleic_3to1_mapper()
|
|
32
|
+
protein_3to1_mapper = __protein_3to1_mapper()
|
gemmi_protools/reader.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
@Author: Luo Jiejian
|
|
3
3
|
"""
|
|
4
4
|
import pathlib
|
|
5
|
-
import re
|
|
6
5
|
import string
|
|
7
6
|
import warnings
|
|
8
7
|
from copy import deepcopy
|
|
@@ -17,6 +16,7 @@ from .parser import (_assign_digital_entity_names, _ent_from_structure,
|
|
|
17
16
|
pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
|
|
18
17
|
_assert_unique_chain_names_in_models, get_assembly)
|
|
19
18
|
from .pdb_opts import _compound_source_string, _is_pdb
|
|
19
|
+
from .peptide import nucleic_3to1_mapper, protein_3to1_mapper
|
|
20
20
|
from .struct_info import Info
|
|
21
21
|
|
|
22
22
|
|
|
@@ -42,6 +42,27 @@ class StructureParser(object):
|
|
|
42
42
|
self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
|
|
43
43
|
self.ENTITY = _ent_from_structure(self.STRUCT)
|
|
44
44
|
self.update_entity()
|
|
45
|
+
self.update_full_sequences()
|
|
46
|
+
|
|
47
|
+
def update_full_sequences(self):
|
|
48
|
+
for ent_idx, ent in enumerate(self.STRUCT.entities):
|
|
49
|
+
# get full sequence
|
|
50
|
+
full_seq = ent.full_sequence
|
|
51
|
+
|
|
52
|
+
# when missing, construct from Residues
|
|
53
|
+
if not full_seq:
|
|
54
|
+
sel_ch_id = None
|
|
55
|
+
sel_ch_len = 0
|
|
56
|
+
for ch_id, ent_id in self.ENTITY.polymer2eid.items():
|
|
57
|
+
if ent_id == ent.name:
|
|
58
|
+
cur_len = len(self.polymer_sequences[ch_id])
|
|
59
|
+
if cur_len > sel_ch_len:
|
|
60
|
+
sel_ch_id = ch_id
|
|
61
|
+
sel_ch_len = cur_len
|
|
62
|
+
|
|
63
|
+
if sel_ch_id is not None and sel_ch_len > 0:
|
|
64
|
+
full_seq = [r.name for r in self.STRUCT[0][sel_ch_id].get_polymer() if not r.is_water()]
|
|
65
|
+
self.STRUCT.entities[ent_idx].full_sequence = full_seq
|
|
45
66
|
|
|
46
67
|
@typechecked
|
|
47
68
|
def load_from_file(self, path: Union[str, pathlib.PosixPath]):
|
|
@@ -56,6 +77,7 @@ class StructureParser(object):
|
|
|
56
77
|
self.STRUCT, self.ENTITY = struct, entity
|
|
57
78
|
self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
|
|
58
79
|
self.update_entity()
|
|
80
|
+
self.update_full_sequences()
|
|
59
81
|
|
|
60
82
|
@typechecked
|
|
61
83
|
def to_pdb(self, outfile: str, write_minimal_pdb=False):
|
|
@@ -131,7 +153,7 @@ class StructureParser(object):
|
|
|
131
153
|
for chain in model:
|
|
132
154
|
ct = cts.get(chain.name, "other")
|
|
133
155
|
if ct != "other":
|
|
134
|
-
out[chain.name] =
|
|
156
|
+
out[chain.name] = self.make_one_letter_sequence(chain.name)
|
|
135
157
|
return out
|
|
136
158
|
|
|
137
159
|
@property
|
|
@@ -152,12 +174,28 @@ class StructureParser(object):
|
|
|
152
174
|
for r in chain.get_polymer()], dtype=id_type)
|
|
153
175
|
return out
|
|
154
176
|
|
|
155
|
-
|
|
156
|
-
|
|
177
|
+
def chain_residues(self, polymer_only=True, with_water=False):
|
|
178
|
+
"""
|
|
179
|
+
:param polymer_only, bool
|
|
180
|
+
:param with_water:
|
|
181
|
+
:return: dict of Three-letter codes of chain residues
|
|
182
|
+
"""
|
|
183
|
+
|
|
157
184
|
out = dict()
|
|
158
185
|
for model in self.STRUCT:
|
|
159
186
|
for chain in model:
|
|
160
|
-
|
|
187
|
+
res_codes = []
|
|
188
|
+
for r in chain:
|
|
189
|
+
if r.is_water():
|
|
190
|
+
if with_water:
|
|
191
|
+
res_codes.append(r.name)
|
|
192
|
+
else:
|
|
193
|
+
if polymer_only:
|
|
194
|
+
if r.entity_type.name == "Polymer":
|
|
195
|
+
res_codes.append(r.name)
|
|
196
|
+
else:
|
|
197
|
+
res_codes.append(r.name)
|
|
198
|
+
out[chain.name] = res_codes
|
|
161
199
|
return out
|
|
162
200
|
|
|
163
201
|
def update_entity(self):
|
|
@@ -369,3 +407,15 @@ class StructureParser(object):
|
|
|
369
407
|
return np.array(coord, dtype=np.float32), np.array(atom_id, dtype=id_type)
|
|
370
408
|
else:
|
|
371
409
|
return np.empty(shape=(0, 3), dtype=np.float32), np.array(atom_id, dtype=id_type)
|
|
410
|
+
|
|
411
|
+
def make_one_letter_sequence(self, chain_id):
|
|
412
|
+
c_type = self.chain_types[chain_id]
|
|
413
|
+
residues = self.chain_residues(polymer_only=True, with_water=False)[chain_id]
|
|
414
|
+
|
|
415
|
+
if c_type == "protein":
|
|
416
|
+
one_letter_code = "".join([protein_3to1_mapper.get(r, "X") for r in residues])
|
|
417
|
+
elif c_type in ["dna", "rna"]:
|
|
418
|
+
one_letter_code = "".join([nucleic_3to1_mapper.get(r, "N") for r in residues])
|
|
419
|
+
else:
|
|
420
|
+
one_letter_code = ""
|
|
421
|
+
return one_letter_code
|
|
@@ -6,11 +6,12 @@ gemmi_protools/dockq.py,sha256=JGPQ7Xs7gz9wubVVT9WSP5lZsLnfgcUH-_nLJ3c8I3U,4172
|
|
|
6
6
|
gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
|
|
7
7
|
gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
|
|
8
8
|
gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
|
|
9
|
+
gemmi_protools/peptide.py,sha256=a2wiEutJmvhl6gDCIzzqRCbmyknk2mwgy2FZ53lXclU,750
|
|
9
10
|
gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
|
|
10
|
-
gemmi_protools/reader.py,sha256=
|
|
11
|
+
gemmi_protools/reader.py,sha256=u7872K-XeUW_sr0E1SaQWrPVWm88jPBUN8LLzf0flM0,15321
|
|
11
12
|
gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
|
|
12
|
-
gemmi_protools-0.1.
|
|
13
|
-
gemmi_protools-0.1.
|
|
14
|
-
gemmi_protools-0.1.
|
|
15
|
-
gemmi_protools-0.1.
|
|
16
|
-
gemmi_protools-0.1.
|
|
13
|
+
gemmi_protools-0.1.4.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
14
|
+
gemmi_protools-0.1.4.dist-info/METADATA,sha256=aKz2slj7fXafD3SnNaI0apr7PIxK87FFXnnwHcCVfvw,567
|
|
15
|
+
gemmi_protools-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
16
|
+
gemmi_protools-0.1.4.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
17
|
+
gemmi_protools-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|