gemmi-protools 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

gemmi_protools/reader.py CHANGED
@@ -2,7 +2,6 @@
2
2
  @Author: Luo Jiejian
3
3
  """
4
4
  import pathlib
5
- import re
6
5
  import string
7
6
  import warnings
8
7
  from copy import deepcopy
@@ -10,6 +9,7 @@ from typing import Union, Optional, List
10
9
 
11
10
  import gemmi
12
11
  import numpy as np
12
+ from Bio.PDB.Polypeptide import nucleic_letters_3to1_extended, protein_letters_3to1_extended
13
13
  from typeguard import typechecked
14
14
 
15
15
  from .cif_opts import _cif_block_for_output, _is_cif
@@ -19,6 +19,10 @@ from .parser import (_assign_digital_entity_names, _ent_from_structure,
19
19
  from .pdb_opts import _compound_source_string, _is_pdb
20
20
  from .struct_info import Info
21
21
 
22
+ nucleic_letters_3to1_extended["DN"] = "N"
23
+ nucleic_letters_3to1_extended["N"] = "N"
24
+ protein_letters_3to1_extended["UNK"] = "X"
25
+
22
26
 
23
27
  class StructureParser(object):
24
28
  """
@@ -42,6 +46,27 @@ class StructureParser(object):
42
46
  self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
43
47
  self.ENTITY = _ent_from_structure(self.STRUCT)
44
48
  self.update_entity()
49
+ self.update_full_sequences()
50
+
51
+ def update_full_sequences(self):
52
+ for ent_idx, ent in enumerate(self.STRUCT.entities):
53
+ # get full sequence
54
+ full_seq = ent.full_sequence
55
+
56
+ # when missing, construct from Residues
57
+ if not full_seq:
58
+ sel_ch_id = None
59
+ sel_ch_len = 0
60
+ for ch_id, ent_id in self.ENTITY.polymer2eid.items():
61
+ if ent_id == ent.name:
62
+ cur_len = len(self.polymer_sequences[ch_id])
63
+ if cur_len > sel_ch_len:
64
+ sel_ch_id = ch_id
65
+ sel_ch_len = cur_len
66
+
67
+ if sel_ch_id is not None and sel_ch_len > 0:
68
+ full_seq = [r.name for r in self.STRUCT[0][sel_ch_id].get_polymer() if not r.is_water()]
69
+ self.STRUCT.entities[ent_idx].full_sequence = full_seq
45
70
 
46
71
  @typechecked
47
72
  def load_from_file(self, path: Union[str, pathlib.PosixPath]):
@@ -56,6 +81,7 @@ class StructureParser(object):
56
81
  self.STRUCT, self.ENTITY = struct, entity
57
82
  self.INFO.from_gemmi_structure_infomap(self.STRUCT.info)
58
83
  self.update_entity()
84
+ self.update_full_sequences()
59
85
 
60
86
  @typechecked
61
87
  def to_pdb(self, outfile: str, write_minimal_pdb=False):
@@ -131,7 +157,7 @@ class StructureParser(object):
131
157
  for chain in model:
132
158
  ct = cts.get(chain.name, "other")
133
159
  if ct != "other":
134
- out[chain.name] = re.sub("-", "", chain.get_polymer().make_one_letter_sequence())
160
+ out[chain.name] = self.make_one_letter_sequence(chain.name)
135
161
  return out
136
162
 
137
163
  @property
@@ -152,12 +178,28 @@ class StructureParser(object):
152
178
  for r in chain.get_polymer()], dtype=id_type)
153
179
  return out
154
180
 
155
- @property
156
- def chain_residues(self):
181
+ def chain_residues(self, polymer_only=True, with_water=False):
182
+ """
183
+ :param polymer_only, bool
184
+ :param with_water:
185
+ :return: dict of Three-letter codes of chain residues
186
+ """
187
+
157
188
  out = dict()
158
189
  for model in self.STRUCT:
159
190
  for chain in model:
160
- out[chain.name] = [r.name for r in chain]
191
+ res_codes = []
192
+ for r in chain:
193
+ if r.is_water():
194
+ if with_water:
195
+ res_codes.append(r.name)
196
+ else:
197
+ if polymer_only:
198
+ if r.entity_type.name == "Polymer":
199
+ res_codes.append(r.name)
200
+ else:
201
+ res_codes.append(r.name)
202
+ out[chain.name] = res_codes
161
203
  return out
162
204
 
163
205
  def update_entity(self):
@@ -369,3 +411,15 @@ class StructureParser(object):
369
411
  return np.array(coord, dtype=np.float32), np.array(atom_id, dtype=id_type)
370
412
  else:
371
413
  return np.empty(shape=(0, 3), dtype=np.float32), np.array(atom_id, dtype=id_type)
414
+
415
+ def make_one_letter_sequence(self, chain_id):
416
+ c_type = self.chain_types[chain_id]
417
+ residues = self.chain_residues(polymer_only=True, with_water=False)[chain_id]
418
+
419
+ if c_type == "protein":
420
+ one_letter_code = "".join([protein_letters_3to1_extended.get(r, "X") for r in residues])
421
+ elif c_type in ["dna", "rna"]:
422
+ one_letter_code = "".join([nucleic_letters_3to1_extended.get(r, "N") for r in residues])
423
+ else:
424
+ one_letter_code = ""
425
+ return one_letter_code
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
6
6
  License-Expression: MIT
7
7
  Requires-Python: >=3.10
8
8
  Description-Content-Type: text/markdown
9
9
  License-File: LICENSE
10
- Requires-Dist: gemmi>=0.6.7
10
+ Requires-Dist: gemmi>=0.7.0
11
11
  Requires-Dist: pandas>=2.2.3
12
12
  Requires-Dist: typeguard>=4.1.2
13
13
  Requires-Dist: numpy
@@ -7,10 +7,10 @@ gemmi_protools/parse_pdb_header.py,sha256=UOGMsE3-d3APhO7zaAEE0NT31n-iqt55VpDh_R
7
7
  gemmi_protools/parser.py,sha256=QIJCOfK8FaFbLMvBG82zTOAjIvQJcf2WRwuFSxj4zvc,8982
8
8
  gemmi_protools/pdb_opts.py,sha256=NbXLDNNVF7tuG_bUM0Infylf5aYnOCP2Pd-ndqm5bK4,5652
9
9
  gemmi_protools/ppi.py,sha256=nRzRWv28SDjVt6hMShRL_QYKFsBO1xA5jSGIQrN0JBg,2313
10
- gemmi_protools/reader.py,sha256=0VjMxOogqB1dccucQU2I703W5Ro4JYBakE2eXftUTPU,13194
10
+ gemmi_protools/reader.py,sha256=4xuK7UmJVc6Nt9YhYS1FPsi5PL4QwZHilk8s3V-jf0Y,15499
11
11
  gemmi_protools/struct_info.py,sha256=9nBj1Zer03S8_Wks7L7uRlc9PlbfCKzoaT32pKR58X8,2769
12
- gemmi_protools-0.1.1.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
13
- gemmi_protools-0.1.1.dist-info/METADATA,sha256=7-_OWwHZXdcX3SaFluIag3dOia5Xm7poRUQXmMWdjJs,567
14
- gemmi_protools-0.1.1.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
15
- gemmi_protools-0.1.1.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
16
- gemmi_protools-0.1.1.dist-info/RECORD,,
12
+ gemmi_protools-0.1.3.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
13
+ gemmi_protools-0.1.3.dist-info/METADATA,sha256=ymS32ZVWiYY7LDzrQkEqiatqY2qZBqenf3X0BvX5DT4,567
14
+ gemmi_protools-0.1.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
15
+ gemmi_protools-0.1.3.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
16
+ gemmi_protools-0.1.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.3)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5