gemmi-protools 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -1,3 +1,5 @@
1
+ import gzip
2
+ import io
1
3
  import itertools
2
4
  import pathlib
3
5
  import random
@@ -274,19 +276,7 @@ class StructureParser(object):
274
276
  else:
275
277
  raise ValueError("structure must be gemmi.Structure or None")
276
278
 
277
- self.STRUCT.setup_entities()
278
- self.STRUCT.assign_serial_numbers()
279
-
280
- self.STRUCT.renumber_models()
281
- if len(self.STRUCT) > 1:
282
- for idx in range(1, len(self.STRUCT)):
283
- del self.STRUCT[idx]
284
-
285
- self.MODEL = self.STRUCT[0]
286
- self.STRUCT.remove_alternative_conformations()
287
- self.STRUCT.remove_hydrogens()
288
- self.STRUCT.remove_empty_chains()
289
- self._update_full_sequences()
279
+ self._init_struct()
290
280
 
291
281
  info_map = dict(self.STRUCT.info)
292
282
  pdb_code = info_map.get("_entry.id", "").lower()
@@ -300,6 +290,22 @@ class StructureParser(object):
300
290
  )
301
291
  self.update_entity()
302
292
 
293
+ def _init_struct(self):
294
+ self.STRUCT.setup_entities()
295
+ self.STRUCT.assign_serial_numbers()
296
+ self.STRUCT.renumber_models()
297
+
298
+ # keep the first model
299
+ if len(self.STRUCT) > 1:
300
+ for idx in reversed(list(range(1, len(self.STRUCT)))):
301
+ del self.STRUCT[idx]
302
+
303
+ self.MODEL = self.STRUCT[0]
304
+ self.STRUCT.remove_alternative_conformations()
305
+ self.STRUCT.remove_hydrogens()
306
+ self.STRUCT.remove_empty_chains()
307
+ self._update_full_sequences()
308
+
303
309
  def load_from_file(self, path: str):
304
310
  """
305
311
  Load model from file, default use the first model.
@@ -315,17 +321,7 @@ class StructureParser(object):
315
321
  else:
316
322
  raise ValueError("path must be files with suffixes [ .cif, .cif.gz, .pdb or .pdb.gz]")
317
323
 
318
- # force to use first model when mulitple models exist
319
- self.STRUCT.renumber_models()
320
- if len(self.STRUCT) > 1:
321
- for idx in range(1, len(self.STRUCT)):
322
- del self.STRUCT[idx]
323
-
324
- self.MODEL = self.STRUCT[0]
325
- self.STRUCT.remove_alternative_conformations()
326
- self.STRUCT.remove_hydrogens()
327
- self.STRUCT.remove_empty_chains()
328
- self._update_full_sequences()
324
+ self._init_struct()
329
325
  self.update_entity()
330
326
 
331
327
  def _update_full_sequences(self):
@@ -368,13 +364,24 @@ class StructureParser(object):
368
364
  return out
369
365
 
370
366
  def polymer_sequences(self, pdbx: bool = False):
367
+ """
368
+ entity sequences for polymers
369
+ :param pdbx:
370
+ :return:
371
+ """
371
372
  out = dict()
373
+ subchain_id2entity_id = self.subchain_id_to_entity_id
374
+ entity_dict = {ent.name: ent for ent in self.STRUCT.entities}
375
+
372
376
  for ch, polymer_type in self.polymer_types.items():
373
377
  polymer = self.get_chain(ch).get_polymer()
378
+ entity_id = subchain_id2entity_id[polymer.subchain_id()]
379
+ ent = entity_dict[entity_id]
380
+
374
381
  if pdbx:
375
- s = gemmi.pdbx_one_letter_code(polymer.extract_sequence(), gemmi.sequence_kind(polymer_type))
382
+ s = gemmi.pdbx_one_letter_code(ent.full_sequence, gemmi.sequence_kind(polymer_type))
376
383
  else:
377
- s = polymer.make_one_letter_sequence().replace("-", "")
384
+ s = "".join([gemmi.find_tabulated_residue(r).one_letter_code for r in ent.full_sequence]).upper()
378
385
  out[ch] = s
379
386
  return out
380
387
 
@@ -390,6 +397,14 @@ class StructureParser(object):
390
397
 
391
398
  return out
392
399
 
400
+ @property
401
+ def subchain_id_to_entity_id(self):
402
+ return {ch: ent.name for ent in self.STRUCT.entities for ch in ent.subchains}
403
+
404
+ @property
405
+ def subchain_id_to_chain_id(self):
406
+ return {sch.subchain_id(): chain.name for chain in self.MODEL for sch in chain.subchains()}
407
+
393
408
  def get_chain(self, chain_id: str):
394
409
  return self.MODEL[chain_id]
395
410
 
@@ -656,15 +671,18 @@ class StructureParser(object):
656
671
  raise ValueError("Chain %s not in found" % chain_name_2)
657
672
 
658
673
  flag = True
674
+ sw_name = ""
675
+
659
676
  while flag:
660
677
  characters = string.ascii_letters + string.digits
661
678
  sw_name = ''.join(random.choices(characters, k=4))
662
679
  if sw_name not in self.chain_ids:
663
680
  flag = False
664
681
 
665
- self.rename_chain(chain_name_1, sw_name)
666
- self.rename_chain(chain_name_2, chain_name_1)
667
- self.rename_chain(sw_name, chain_name_2)
682
+ if sw_name != "":
683
+ self.rename_chain(chain_name_1, sw_name)
684
+ self.rename_chain(chain_name_2, chain_name_1)
685
+ self.rename_chain(sw_name, chain_name_2)
668
686
 
669
687
  def make_one_letter_chain(self, only_uppercase: bool = True):
670
688
  uppercase_letters = list(string.ascii_uppercase)
@@ -691,7 +709,6 @@ class StructureParser(object):
691
709
 
692
710
  # not use yet
693
711
  letters_valid = [l for l in letters if l not in self.chain_ids]
694
- chains2rename = [ch for ch in self.chain_ids if ch not in letters]
695
712
  mapper = {ch: letters_valid.pop() for ch in self.chain_ids if ch not in letters}
696
713
 
697
714
  for origin_name, target_name in mapper.items():
@@ -10,6 +10,7 @@ from typing import Dict, Any, List, Optional
10
10
 
11
11
  import numpy as np
12
12
  from Bio.PDB import Superimposer
13
+
13
14
  from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
14
15
  from gemmi_protools.io.reader import StructureParser
15
16
 
@@ -11,6 +11,7 @@ from typing import List, Tuple
11
11
 
12
12
  import gemmi
13
13
  import pandas as pd
14
+
14
15
  from gemmi_protools.io.reader import StructureParser
15
16
 
16
17
 
@@ -22,8 +23,8 @@ def dockq_score_interface(query_model: str,
22
23
  """
23
24
  Calculate Dockq Score for an interface (partner 1 vs partner 2)
24
25
 
25
- :param query_model: str or pathlib.Path
26
- Path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
26
+ :param query_model: str
27
+ path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
27
28
  :param native_model:
28
29
  :param partner_1_mapping: a list of chain ID mapping between query and native for partner1 of the interface
29
30
  e.g. [(q chain1, n chain1), (q chain2, n chain2)]
@@ -124,4 +125,4 @@ def dockq_score_interface(query_model: str,
124
125
  else:
125
126
  score = ""
126
127
 
127
- return score
128
+ return dict(score=score, status=msg)
@@ -4,12 +4,15 @@
4
4
  import os
5
5
  import subprocess
6
6
  import tempfile
7
- from typing import Optional, List
7
+ from collections import defaultdict
8
+ from typing import List, Optional, Union
8
9
 
10
+ import freesasa
9
11
  import numpy as np
10
12
  import trimesh
11
13
  from Bio.PDB import Selection
12
14
  from Bio.PDB.ResidueDepth import _get_atom_radius, _read_vertex_array
15
+
13
16
  from gemmi_protools import StructureParser
14
17
  from gemmi_protools import gemmi2bio
15
18
 
@@ -39,20 +42,32 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
39
42
  :return:
40
43
  https://ccsb.scripps.edu/msms/downloads/
41
44
  """
45
+ xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
46
+ surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
47
+ msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
48
+ face_file = surface_tmp + ".face"
49
+ surface_file = surface_tmp + ".vert"
42
50
 
43
51
  try:
44
52
  st = StructureParser()
45
53
  st.load_from_file(struct_file)
46
54
  st.clean_structure(remove_ligand=True)
47
55
 
48
- bio_st = gemmi2bio(st.STRUCT)
56
+ if chains is None:
57
+ st_p = st
58
+ else:
59
+ for ch in chains:
60
+ if ch not in st.chain_ids:
61
+ raise ValueError("Chain %s not found (only [%s])" % (ch, " ".join(st.chain_ids)))
62
+ st_p = st.pick_chains(chains)
63
+
64
+ bio_st = gemmi2bio(st_p.STRUCT)
49
65
  model = bio_st[0]
50
66
 
51
67
  # Replace pdb_to_xyzr
52
68
  # Make x,y,z,radius file
53
69
  atom_list = Selection.unfold_entities(model, "A")
54
70
 
55
- xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
56
71
  with open(xyz_tmp, "w") as pdb_to_xyzr:
57
72
  for atom in atom_list:
58
73
  x, y, z = atom.coord
@@ -60,13 +75,9 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
60
75
  pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
61
76
 
62
77
  # Make surface
63
- surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
64
- msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
65
78
  MSMS = MSMS + " -no_header -probe_radius 1.5 -if %s -of %s > " + msms_tmp
66
79
  make_surface = MSMS % (xyz_tmp, surface_tmp)
67
80
  subprocess.call(make_surface, shell=True)
68
- face_file = surface_tmp + ".face"
69
- surface_file = surface_tmp + ".vert"
70
81
  if not os.path.isfile(surface_file):
71
82
  raise RuntimeError(
72
83
  f"Failed to generate surface file using command:\n{make_surface}"
@@ -84,12 +95,103 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
84
95
  mesh.update_faces(mesh.unique_faces())
85
96
  mesh.update_faces(mesh.nondegenerate_faces())
86
97
  mesh.remove_unreferenced_vertices()
87
- finally:
88
- # Remove temporary files
89
- for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
90
- try:
91
- os.remove(fn)
92
- except OSError:
93
- pass
98
+
99
+ # Remove temporary files
100
+ for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
101
+ try:
102
+ os.remove(fn)
103
+ except OSError:
104
+ pass
94
105
 
95
106
  return mesh
107
+
108
+
109
+ def get_surface_residues(struct_file: str,
110
+ chains: Optional[List[str]] = None,
111
+ relative_sasa_cutoff: Union[int, float] = 0.15):
112
+ ####################
113
+ # check and pick
114
+ ####################
115
+ st = StructureParser()
116
+ st.load_from_file(struct_file)
117
+ st.clean_structure()
118
+
119
+ if chains is None:
120
+ chains = st.chain_ids
121
+
122
+ if isinstance(chains, list):
123
+ if len(chains) == 0:
124
+ raise ValueError("chains is not set")
125
+ else:
126
+ # check if chains valid
127
+ for ch in chains:
128
+ if ch not in st.chain_ids:
129
+ raise ValueError("Chain %s not found" % ch)
130
+
131
+ st_p = st.pick_chains(chains)
132
+ # sequences = {k: s.replace("-", "").upper() for k, s in st_p.polymer_sequences().items()}
133
+
134
+ # start from 1
135
+ seq_num_mapper = dict()
136
+ for chain in st_p.MODEL:
137
+ for i, res in enumerate(chain):
138
+ key = (chain.name, str(res.seqid.num) + res.seqid.icode.strip(), res.name)
139
+ seq_num_mapper[key] = i + 1
140
+
141
+ # make one upper letter chain ID
142
+ mapper = st_p.make_one_letter_chain(only_uppercase=True)
143
+ mapper_r = {v: k for k, v in mapper.items()}
144
+
145
+ ####################
146
+ # save to pdb
147
+ ####################
148
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".pdb", mode='w') as tmp_file:
149
+ st_p.to_pdb(tmp_file.name)
150
+ structure = freesasa.Structure(tmp_file.name)
151
+
152
+ result = freesasa.calc(structure)
153
+
154
+ residue_areas = result.residueAreas()
155
+
156
+ surface_residues_relative_sasa = dict()
157
+ surface_atoms = defaultdict(list)
158
+ for atom_index in range(structure.nAtoms()):
159
+ ch = structure.chainLabel(atom_index)
160
+ ch = mapper_r.get(ch, ch)
161
+
162
+ res_num = structure.residueNumber(atom_index).strip()
163
+ res_name = structure.residueName(atom_index)
164
+ atom_sasa = result.atomArea(atom_index)
165
+
166
+ res_id = (ch, res_num, res_name)
167
+ res_relative_total = residue_areas[ch][res_num].relativeTotal
168
+ if res_relative_total > relative_sasa_cutoff:
169
+ if res_id not in surface_residues_relative_sasa:
170
+ surface_residues_relative_sasa[res_id] = res_relative_total
171
+ if atom_sasa > 0:
172
+ atom_name = structure.atomName(atom_index).strip()
173
+ pos = structure.coord(atom_index)
174
+ surface_atoms[res_id].append((atom_sasa, atom_name, pos))
175
+
176
+ results = []
177
+ for res_id, query_atoms in surface_atoms.items():
178
+ seq_loc = seq_num_mapper[res_id]
179
+
180
+ query_atoms.sort(reverse=True)
181
+ centroid = tuple(np.array([a[2] for a in query_atoms[0:3]]).mean(axis=0).tolist())
182
+ results.append((res_id[0],
183
+ res_id[1],
184
+ res_id[2],
185
+ seq_loc,
186
+ centroid,
187
+ surface_residues_relative_sasa[res_id]
188
+ )
189
+ )
190
+ dtype = [("chain_name", "U5"),
191
+ ("residue_numi", "U8"),
192
+ ("residue_name", "U5"),
193
+ ("sequential_residue_num", "i4"),
194
+ ("centroid", ("f4", (3,))),
195
+ ("relative_sasa", "f4"),
196
+ ]
197
+ return np.array(results, dtype=dtype)
@@ -12,6 +12,7 @@ from importlib.resources import files
12
12
 
13
13
  from anarci import run_anarci
14
14
  from anarci.germlines import all_germlines
15
+
15
16
  from gemmi_protools import StructureParser
16
17
 
17
18
 
@@ -204,10 +205,7 @@ def annotate_pdb(struct_file: str):
204
205
  st.load_from_file(struct_file)
205
206
  st.clean_structure()
206
207
 
207
- subchain_id2entity_id = dict()
208
- for ent in st.STRUCT.entities:
209
- for ch in ent.subchains:
210
- subchain_id2entity_id[ch] = ent.name
208
+ subchain_id2entity_id = st.subchain_id_to_entity_id
211
209
 
212
210
  # Merge sequences
213
211
  polymers = dict()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author: Luo Jiejian
6
6
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
@@ -16,6 +16,8 @@ Requires-Dist: typeguard
16
16
  Requires-Dist: numpy
17
17
  Requires-Dist: scipy
18
18
  Requires-Dist: trimesh
19
+ Requires-Dist: joblib
20
+ Requires-Dist: rtree
19
21
  Requires-Dist: freesasa==2.2.1
20
22
  Dynamic: author
21
23
  Dynamic: license-file
@@ -24,10 +26,8 @@ Dynamic: license-file
24
26
 
25
27
  # Install
26
28
  ```commandline
27
- conda create -n gemmi_protools python=3.12
28
- conda install -n gemmi_protools anarci hmmer -c bioconda
29
- conda install -n gemmi_protools dockq trimesh -c conda-forge
30
- conda activate gemmi_protools
29
+
30
+ conda install python=3.12.9 anarci hmmer dockq trimesh rtree -c bioconda -c conda-forge
31
31
  pip install gemmi_protools
32
32
  ```
33
33
 
@@ -36,6 +36,11 @@ pip install gemmi_protools
36
36
  ## read structures
37
37
  ```commandline
38
38
  from gemmi_protools import StructureParser
39
+
40
+ # load structure
39
41
  st=StructureParser()
40
- st.load_from_file("your.pdb")
42
+ st.load_from_file("7mmo.cif")
43
+
44
+ # get chain IDs
45
+ print(st.chain_ids)
41
46
  ```
@@ -6,14 +6,14 @@ gemmi_protools/data/MHC/MHC_combined.hmm.h3m,sha256=CvNMCsobQiX-wL7iB4CreNcbpnEl
6
6
  gemmi_protools/data/MHC/MHC_combined.hmm.h3p,sha256=-mK278pRedG3-KL-DtuVAQy7La9DgXg5FcP89D6X3Ck,78325
7
7
  gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
8
8
  gemmi_protools/io/convert.py,sha256=A1i1vPgxG1LqMSUvWtegLl9LipgUQbfmKeGJ_f00UYo,3781
9
- gemmi_protools/io/reader.py,sha256=hUIY0YKBXDCyiWTNgfX7KsZRUxBOb-v6KYKxlWWtzEk,33238
9
+ gemmi_protools/io/reader.py,sha256=X4onV0IVl0Q7JVH0yg2Zy-8iPIZvRPM-aaxDapawiro,33617
10
10
  gemmi_protools/tools/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
11
- gemmi_protools/tools/align.py,sha256=tsn8Fp-Xc9CulVyVst4uFgL6gQKVOEvoUmdgcfF8dCg,7084
12
- gemmi_protools/tools/dockq.py,sha256=a6i4S0O7Z2jxqQMBQilbcxNEDm35i8hzm_anvJXB6uo,4419
13
- gemmi_protools/tools/mesh.py,sha256=YiHPZUS-Y0gtLTbwdUXO7jS07PS5PP452LxvPTfHFJE,2986
14
- gemmi_protools/tools/pdb_annot.py,sha256=MHl-2BAFr__eO1ohPPLfBR17G2wPZti7Lq9UlS7AEX4,8252
15
- gemmi_protools-1.0.0.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
16
- gemmi_protools-1.0.0.dist-info/METADATA,sha256=bRuphRjLJsZz-CmSRKau3cxi8yCPnF-E9NDavUTS1DA,1038
17
- gemmi_protools-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
- gemmi_protools-1.0.0.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
19
- gemmi_protools-1.0.0.dist-info/RECORD,,
11
+ gemmi_protools/tools/align.py,sha256=oKHvpeDa62zEjLkPmuyBM6avYDl3HFeJVHeRX62I2f4,7085
12
+ gemmi_protools/tools/dockq.py,sha256=baCuO5-GZCwrlS59T5UIXogpM44OIFIfXqksqRBAb0A,4428
13
+ gemmi_protools/tools/mesh.py,sha256=73MuJYwS_ACJI15OsrooAAhB1Ti4fM8CJSBqFOBR7LU,6537
14
+ gemmi_protools/tools/pdb_annot.py,sha256=enATyAHq0dE8TMsKQhsSbYj-baGrI33iviJdW2R7Hv8,8157
15
+ gemmi_protools-1.0.2.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
16
+ gemmi_protools-1.0.2.dist-info/METADATA,sha256=29ea2GIobnQjR6N0VQaI6MRvcH6UCG7cTpzETUUIrCE,1034
17
+ gemmi_protools-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ gemmi_protools-1.0.2.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
19
+ gemmi_protools-1.0.2.dist-info/RECORD,,