gemmi-protools 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -0,0 +1,32 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
4
+ from copy import deepcopy
5
+
6
+ from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
7
+
8
+
9
+ def strip_key_val(inputs):
10
+ outputs = dict()
11
+ for key, val in inputs.items():
12
+ outputs[key.strip()] = val.strip()
13
+ return outputs
14
+
15
+
16
+ def __nucleic_3to1_mapper():
17
+ mapper = deepcopy(nucleic_letters_3to1_extended)
18
+ mapper["DN"] = "N"
19
+ mapper["N"] = "N"
20
+ new_mapper = strip_key_val(mapper)
21
+ return new_mapper
22
+
23
+
24
+ def __protein_3to1_mapper():
25
+ mapper = deepcopy(protein_letters_3to1_extended)
26
+ mapper["UNK"] = "X"
27
+ new_mapper = strip_key_val(mapper)
28
+ return new_mapper
29
+
30
+
31
+ nucleic_3to1_mapper = __nucleic_3to1_mapper()
32
+ protein_3to1_mapper = __protein_3to1_mapper()
gemmi_protools/reader.py CHANGED
@@ -2,7 +2,6 @@
2
2
  @Author: Luo Jiejian
3
3
  """
4
4
  import pathlib
5
- import re
6
5
  import string
7
6
  import warnings
8
7
  from copy import deepcopy
@@ -17,6 +16,7 @@ from .parser import (_assign_digital_entity_names, _ent_from_structure,
17
16
  pdb_parser, cif_parser, _chain_type, _chain_names2one_letter,
18
17
  _assert_unique_chain_names_in_models, get_assembly)
19
18
  from .pdb_opts import _compound_source_string, _is_pdb
19
+ from .peptide import nucleic_3to1_mapper, protein_3to1_mapper
20
20
  from .struct_info import Info
21
21
 
22
22
 
@@ -42,6 +42,27 @@ class StructureParser(object):
42
42
  self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
43
43
  self.ENTITY = _ent_from_structure(self.STRUCT)
44
44
  self.update_entity()
45
+ self.update_full_sequences()
46
+
47
+ def update_full_sequences(self):
48
+ for ent_idx, ent in enumerate(self.STRUCT.entities):
49
+ # get full sequence
50
+ full_seq = ent.full_sequence
51
+
52
+ # when missing, construct from Residues
53
+ if not full_seq:
54
+ sel_ch_id = None
55
+ sel_ch_len = 0
56
+ for ch_id, ent_id in self.ENTITY.polymer2eid.items():
57
+ if ent_id == ent.name:
58
+ cur_len = len(self.polymer_sequences[ch_id])
59
+ if cur_len > sel_ch_len:
60
+ sel_ch_id = ch_id
61
+ sel_ch_len = cur_len
62
+
63
+ if sel_ch_id is not None and sel_ch_len > 0:
64
+ full_seq = [r.name for r in self.STRUCT[0][sel_ch_id].get_polymer() if not r.is_water()]
65
+ self.STRUCT.entities[ent_idx].full_sequence = full_seq
45
66
 
46
67
  @typechecked
47
68
  def load_from_file(self, path: Union[str, pathlib.PosixPath]):
@@ -56,6 +77,7 @@ class StructureParser(object):
56
77
  self.STRUCT, self.ENTITY = struct, entity
57
78
  self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
58
79
  self.update_entity()
80
+ self.update_full_sequences()
59
81
 
60
82
  @typechecked
61
83
  def to_pdb(self, outfile: str, write_minimal_pdb=False):
@@ -131,7 +153,7 @@ class StructureParser(object):
131
153
  for chain in model:
132
154
  ct = cts.get(chain.name, "other")
133
155
  if ct != "other":
134
- out[chain.name] = re.sub("-", "", chain.get_polymer().make_one_letter_sequence())
156
+ out[chain.name] = self.make_one_letter_sequence(chain.name)
135
157
  return out
136
158
 
137
159
  @property
@@ -152,12 +174,28 @@ class StructureParser(object):
152
174
  for r in chain.get_polymer()], dtype=id_type)
153
175
  return out
154
176
 
155
- @property
156
- def chain_residues(self):
177
+ def chain_residues(self, polymer_only=True, with_water=False):
178
+ """
179
+ :param polymer_only, bool
180
+ :param with_water:
181
+ :return: dict of Three-letter codes of chain residues
182
+ """
183
+
157
184
  out = dict()
158
185
  for model in self.STRUCT:
159
186
  for chain in model:
160
- out[chain.name] = [r.name for r in chain]
187
+ res_codes = []
188
+ for r in chain:
189
+ if r.is_water():
190
+ if with_water:
191
+ res_codes.append(r.name)
192
+ else:
193
+ if polymer_only:
194
+ if r.entity_type.name == "Polymer":
195
+ res_codes.append(r.name)
196
+ else:
197
+ res_codes.append(r.name)
198
+ out[chain.name] = res_codes
161
199
  return out
162
200
 
163
201
  def update_entity(self):
@@ -369,3 +407,15 @@ class StructureParser(object):
369
407
  return np.array(coord, dtype=np.float32), np.array(atom_id, dtype=id_type)
370
408
  else:
371
409
  return np.empty(shape=(0, 3), dtype=np.float32), np.array(atom_id, dtype=id_type)
410
+
411
+ def make_one_letter_sequence(self, chain_id):
412
+ c_type = self.chain_types[chain_id]
413
+ residues = self.chain_residues(polymer_only=True, with_water=False)[chain_id]
414
+
415
+ if c_type == "protein":
416
+ one_letter_code = "".join([protein_3to1_mapper.get(r, "X") for r in residues])
417
+ elif c_type in ["dna", "rna"]:
418
+ one_letter_code = "".join([nucleic_3to1_mapper.get(r, "N") for r in residues])
419
+ else:
420
+ one_letter_code = ""
421
+ return one_letter_code
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
@@ -6,11 +6,12 @@ gemmi_protools/dockq.py,sha256=JGPQ7Xs7gz9wubVVT9WSP5lZsLnfgcUH-_nLJ3c8I3U,4172
6
6
  gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_RPOicI,14223
7
7
  gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
8
8
  gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
9
+ gemmi_protools/peptide.py,sha256=a2wiEutJmvhl6gDCIzzqRCbmyknk2mwgy2FZ53lXclU,750
9
10
  gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
10
- gemmi_protools/reader.py,sha256=0VjMxOogqB1dccucQU2I703W5Ro4JYBakE2eXftUTPU,13194
11
+ gemmi_protools/reader.py,sha256=u7872K-XeUW_sr0E1SaQWrPVWm88jPBUN8LLzf0flM0,15321
11
12
  gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
12
- gemmi_protools-0.1.2.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
13
- gemmi_protools-0.1.2.dist-info/METADATA,sha256=XTB_XuZpn611rYsLD2GMq6qd_bwm-TDBtGcNr-NROzE,567
14
- gemmi_protools-0.1.2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
- gemmi_protools-0.1.2.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
16
- gemmi_protools-0.1.2.dist-info/RECORD,,
13
+ gemmi_protools-0.1.4.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
14
+ gemmi_protools-0.1.4.dist-info/METADATA,sha256=aKz2slj7fXafD3SnNaI0apr7PIxK87FFXnnwHcCVfvw,567
15
+ gemmi_protools-0.1.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
16
+ gemmi_protools-0.1.4.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
17
+ gemmi_protools-0.1.4.dist-info/RECORD,,