gemmi-protools 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of gemmi-protools might be problematic. Click here for more details.

@@ -1,3 +1,5 @@
1
+ import gzip
2
+ import io
1
3
  import itertools
2
4
  import pathlib
3
5
  import random
@@ -274,19 +276,7 @@ class StructureParser(object):
274
276
  else:
275
277
  raise ValueError("structure must be gemmi.Structure or None")
276
278
 
277
- self.STRUCT.setup_entities()
278
- self.STRUCT.assign_serial_numbers()
279
-
280
- self.STRUCT.renumber_models()
281
- if len(self.STRUCT) > 1:
282
- for idx in range(1, len(self.STRUCT)):
283
- del self.STRUCT[idx]
284
-
285
- self.MODEL = self.STRUCT[0]
286
- self.STRUCT.remove_alternative_conformations()
287
- self.STRUCT.remove_hydrogens()
288
- self.STRUCT.remove_empty_chains()
289
- self._update_full_sequences()
279
+ self._init_struct()
290
280
 
291
281
  info_map = dict(self.STRUCT.info)
292
282
  pdb_code = info_map.get("_entry.id", "").lower()
@@ -300,6 +290,22 @@ class StructureParser(object):
300
290
  )
301
291
  self.update_entity()
302
292
 
293
+ def _init_struct(self):
294
+ self.STRUCT.setup_entities()
295
+ self.STRUCT.assign_serial_numbers()
296
+ self.STRUCT.renumber_models()
297
+
298
+ # keep the first model
299
+ if len(self.STRUCT) > 1:
300
+ for idx in reversed(list(range(1, len(self.STRUCT)))):
301
+ del self.STRUCT[idx]
302
+
303
+ self.MODEL = self.STRUCT[0]
304
+ self.STRUCT.remove_alternative_conformations()
305
+ self.STRUCT.remove_hydrogens()
306
+ self.STRUCT.remove_empty_chains()
307
+ self._update_full_sequences()
308
+
303
309
  def load_from_file(self, path: str):
304
310
  """
305
311
  Load model from file, default use the first model.
@@ -315,17 +321,7 @@ class StructureParser(object):
315
321
  else:
316
322
  raise ValueError("path must be files with suffixes [ .cif, .cif.gz, .pdb or .pdb.gz]")
317
323
 
318
- # force to use first model when mulitple models exist
319
- self.STRUCT.renumber_models()
320
- if len(self.STRUCT) > 1:
321
- for idx in range(1, len(self.STRUCT)):
322
- del self.STRUCT[idx]
323
-
324
- self.MODEL = self.STRUCT[0]
325
- self.STRUCT.remove_alternative_conformations()
326
- self.STRUCT.remove_hydrogens()
327
- self.STRUCT.remove_empty_chains()
328
- self._update_full_sequences()
324
+ self._init_struct()
329
325
  self.update_entity()
330
326
 
331
327
  def _update_full_sequences(self):
@@ -390,6 +386,14 @@ class StructureParser(object):
390
386
 
391
387
  return out
392
388
 
389
+ @property
390
+ def subchain_id_to_entity_id(self):
391
+ return {ch: ent.name for ent in self.STRUCT.entities for ch in ent.subchains}
392
+
393
+ @property
394
+ def subchain_id_to_chain_id(self):
395
+ return {sch.subchain_id(): chain.name for chain in self.MODEL for sch in chain.subchains()}
396
+
393
397
  def get_chain(self, chain_id: str):
394
398
  return self.MODEL[chain_id]
395
399
 
@@ -10,6 +10,7 @@ from typing import Dict, Any, List, Optional
10
10
 
11
11
  import numpy as np
12
12
  from Bio.PDB import Superimposer
13
+
13
14
  from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
14
15
  from gemmi_protools.io.reader import StructureParser
15
16
 
@@ -11,6 +11,7 @@ from typing import List, Tuple
11
11
 
12
12
  import gemmi
13
13
  import pandas as pd
14
+
14
15
  from gemmi_protools.io.reader import StructureParser
15
16
 
16
17
 
@@ -22,8 +23,8 @@ def dockq_score_interface(query_model: str,
22
23
  """
23
24
  Calculate Dockq Score for an interface (partner 1 vs partner 2)
24
25
 
25
- :param query_model: str or pathlib.Path
26
- Path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
26
+ :param query_model: str
27
+ path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
27
28
  :param native_model:
28
29
  :param partner_1_mapping: a list of chain ID mapping between query and native for partner1 of the interface
29
30
  e.g. [(q chain1, n chain1), (q chain2, n chain2)]
@@ -124,4 +125,4 @@ def dockq_score_interface(query_model: str,
124
125
  else:
125
126
  score = ""
126
127
 
127
- return score
128
+ return dict(score=score, status=msg)
@@ -4,12 +4,15 @@
4
4
  import os
5
5
  import subprocess
6
6
  import tempfile
7
- from typing import Optional, List
7
+ from collections import defaultdict
8
+ from typing import List, Optional, Union
8
9
 
10
+ import freesasa
9
11
  import numpy as np
10
12
  import trimesh
11
13
  from Bio.PDB import Selection
12
14
  from Bio.PDB.ResidueDepth import _get_atom_radius, _read_vertex_array
15
+
13
16
  from gemmi_protools import StructureParser
14
17
  from gemmi_protools import gemmi2bio
15
18
 
@@ -39,20 +42,32 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
39
42
  :return:
40
43
  https://ccsb.scripps.edu/msms/downloads/
41
44
  """
45
+ xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
46
+ surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
47
+ msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
48
+ face_file = surface_tmp + ".face"
49
+ surface_file = surface_tmp + ".vert"
42
50
 
43
51
  try:
44
52
  st = StructureParser()
45
53
  st.load_from_file(struct_file)
46
54
  st.clean_structure(remove_ligand=True)
47
55
 
48
- bio_st = gemmi2bio(st.STRUCT)
56
+ if chains is None:
57
+ st_p = st
58
+ else:
59
+ for ch in chains:
60
+ if ch not in st.chain_ids:
61
+ raise ValueError("Chain %s not found (only [%s])" % (ch, " ".join(st.chain_ids)))
62
+ st_p = st.pick_chains(chains)
63
+
64
+ bio_st = gemmi2bio(st_p.STRUCT)
49
65
  model = bio_st[0]
50
66
 
51
67
  # Replace pdb_to_xyzr
52
68
  # Make x,y,z,radius file
53
69
  atom_list = Selection.unfold_entities(model, "A")
54
70
 
55
- xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
56
71
  with open(xyz_tmp, "w") as pdb_to_xyzr:
57
72
  for atom in atom_list:
58
73
  x, y, z = atom.coord
@@ -60,13 +75,9 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
60
75
  pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
61
76
 
62
77
  # Make surface
63
- surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
64
- msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
65
78
  MSMS = MSMS + " -no_header -probe_radius 1.5 -if %s -of %s > " + msms_tmp
66
79
  make_surface = MSMS % (xyz_tmp, surface_tmp)
67
80
  subprocess.call(make_surface, shell=True)
68
- face_file = surface_tmp + ".face"
69
- surface_file = surface_tmp + ".vert"
70
81
  if not os.path.isfile(surface_file):
71
82
  raise RuntimeError(
72
83
  f"Failed to generate surface file using command:\n{make_surface}"
@@ -84,12 +95,103 @@ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "
84
95
  mesh.update_faces(mesh.unique_faces())
85
96
  mesh.update_faces(mesh.nondegenerate_faces())
86
97
  mesh.remove_unreferenced_vertices()
87
- finally:
88
- # Remove temporary files
89
- for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
90
- try:
91
- os.remove(fn)
92
- except OSError:
93
- pass
98
+
99
+ # Remove temporary files
100
+ for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
101
+ try:
102
+ os.remove(fn)
103
+ except OSError:
104
+ pass
94
105
 
95
106
  return mesh
107
+
108
+
109
+ def get_surface_residues(struct_file: str,
110
+ chains: Optional[List[str]] = None,
111
+ relative_sasa_cutoff: Union[int, float] = 0.15):
112
+ ####################
113
+ # check and pick
114
+ ####################
115
+ st = StructureParser()
116
+ st.load_from_file(struct_file)
117
+ st.clean_structure()
118
+
119
+ if chains is None:
120
+ chains = st.chain_ids
121
+
122
+ if isinstance(chains, list):
123
+ if len(chains) == 0:
124
+ raise ValueError("chains is not set")
125
+ else:
126
+ # check if chains valid
127
+ for ch in chains:
128
+ if ch not in st.chain_ids:
129
+ raise ValueError("Chain %s not found" % ch)
130
+
131
+ st_p = st.pick_chains(chains)
132
+ # sequences = {k: s.replace("-", "").upper() for k, s in st_p.polymer_sequences().items()}
133
+
134
+ # start from 1
135
+ seq_num_mapper = dict()
136
+ for chain in st_p.MODEL:
137
+ for i, res in enumerate(chain):
138
+ key = (chain.name, str(res.seqid.num) + res.seqid.icode.strip(), res.name)
139
+ seq_num_mapper[key] = i + 1
140
+
141
+ # make one upper letter chain ID
142
+ mapper = st_p.make_one_letter_chain(only_uppercase=True)
143
+ mapper_r = {v: k for k, v in mapper.items()}
144
+
145
+ ####################
146
+ # save to pdb
147
+ ####################
148
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".pdb", mode='w') as tmp_file:
149
+ st_p.to_pdb(tmp_file.name)
150
+ structure = freesasa.Structure(tmp_file.name)
151
+
152
+ result = freesasa.calc(structure)
153
+
154
+ residue_areas = result.residueAreas()
155
+
156
+ surface_residues_relative_sasa = dict()
157
+ surface_atoms = defaultdict(list)
158
+ for atom_index in range(structure.nAtoms()):
159
+ ch = structure.chainLabel(atom_index)
160
+ ch = mapper_r.get(ch, ch)
161
+
162
+ res_num = structure.residueNumber(atom_index).strip()
163
+ res_name = structure.residueName(atom_index)
164
+ atom_sasa = result.atomArea(atom_index)
165
+
166
+ res_id = (ch, res_num, res_name)
167
+ res_relative_total = residue_areas[ch][res_num].relativeTotal
168
+ if res_relative_total > relative_sasa_cutoff:
169
+ if res_id not in surface_residues_relative_sasa:
170
+ surface_residues_relative_sasa[res_id] = res_relative_total
171
+ if atom_sasa > 0:
172
+ atom_name = structure.atomName(atom_index).strip()
173
+ pos = structure.coord(atom_index)
174
+ surface_atoms[res_id].append((atom_sasa, atom_name, pos))
175
+
176
+ results = []
177
+ for res_id, query_atoms in surface_atoms.items():
178
+ seq_loc = seq_num_mapper[res_id]
179
+
180
+ query_atoms.sort(reverse=True)
181
+ centroid = tuple(np.array([a[2] for a in query_atoms[0:3]]).mean(axis=0).tolist())
182
+ results.append((res_id[0],
183
+ res_id[1],
184
+ res_id[2],
185
+ seq_loc,
186
+ centroid,
187
+ surface_residues_relative_sasa[res_id]
188
+ )
189
+ )
190
+ dtype = [("chain_name", "U5"),
191
+ ("residue_numi", "U8"),
192
+ ("residue_name", "U5"),
193
+ ("sequential_residue_num", "i4"),
194
+ ("centroid", ("f4", (3,))),
195
+ ("relative_sasa", "f4"),
196
+ ]
197
+ return np.array(results, dtype=dtype)
@@ -12,6 +12,7 @@ from importlib.resources import files
12
12
 
13
13
  from anarci import run_anarci
14
14
  from anarci.germlines import all_germlines
15
+
15
16
  from gemmi_protools import StructureParser
16
17
 
17
18
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gemmi_protools
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: An Enhanced tool to process PDB structures based on Gemmi
5
5
  Author: Luo Jiejian
6
6
  Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
@@ -16,6 +16,8 @@ Requires-Dist: typeguard
16
16
  Requires-Dist: numpy
17
17
  Requires-Dist: scipy
18
18
  Requires-Dist: trimesh
19
+ Requires-Dist: joblib
20
+ Requires-Dist: rtree
19
21
  Requires-Dist: freesasa==2.2.1
20
22
  Dynamic: author
21
23
  Dynamic: license-file
@@ -24,10 +26,8 @@ Dynamic: license-file
24
26
 
25
27
  # Install
26
28
  ```commandline
27
- conda create -n gemmi_protools python=3.12
28
- conda install -n gemmi_protools anarci hmmer -c bioconda
29
- conda install -n gemmi_protools dockq trimesh -c conda-forge
30
- conda activate gemmi_protools
29
+
30
+ conda install python=3.12.9 anarci hmmer dockq trimesh rtree -c bioconda -c conda-forge
31
31
  pip install gemmi_protools
32
32
  ```
33
33
 
@@ -36,6 +36,11 @@ pip install gemmi_protools
36
36
  ## read structures
37
37
  ```commandline
38
38
  from gemmi_protools import StructureParser
39
+
40
+ # load structure
39
41
  st=StructureParser()
40
- st.load_from_file("your.pdb")
42
+ st.load_from_file("7mmo.cif")
43
+
44
+ # get chain IDs
45
+ print(st.chain_ids)
41
46
  ```
@@ -6,14 +6,14 @@ gemmi_protools/data/MHC/MHC_combined.hmm.h3m,sha256=CvNMCsobQiX-wL7iB4CreNcbpnEl
6
6
  gemmi_protools/data/MHC/MHC_combined.hmm.h3p,sha256=-mK278pRedG3-KL-DtuVAQy7La9DgXg5FcP89D6X3Ck,78325
7
7
  gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
8
8
  gemmi_protools/io/convert.py,sha256=A1i1vPgxG1LqMSUvWtegLl9LipgUQbfmKeGJ_f00UYo,3781
9
- gemmi_protools/io/reader.py,sha256=hUIY0YKBXDCyiWTNgfX7KsZRUxBOb-v6KYKxlWWtzEk,33238
9
+ gemmi_protools/io/reader.py,sha256=joQr_glerss3QcfIJGr0O6lw8Mc4N1-pVobMHqY1zi0,33255
10
10
  gemmi_protools/tools/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
11
- gemmi_protools/tools/align.py,sha256=tsn8Fp-Xc9CulVyVst4uFgL6gQKVOEvoUmdgcfF8dCg,7084
12
- gemmi_protools/tools/dockq.py,sha256=a6i4S0O7Z2jxqQMBQilbcxNEDm35i8hzm_anvJXB6uo,4419
13
- gemmi_protools/tools/mesh.py,sha256=YiHPZUS-Y0gtLTbwdUXO7jS07PS5PP452LxvPTfHFJE,2986
14
- gemmi_protools/tools/pdb_annot.py,sha256=MHl-2BAFr__eO1ohPPLfBR17G2wPZti7Lq9UlS7AEX4,8252
15
- gemmi_protools-1.0.0.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
16
- gemmi_protools-1.0.0.dist-info/METADATA,sha256=bRuphRjLJsZz-CmSRKau3cxi8yCPnF-E9NDavUTS1DA,1038
17
- gemmi_protools-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
- gemmi_protools-1.0.0.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
19
- gemmi_protools-1.0.0.dist-info/RECORD,,
11
+ gemmi_protools/tools/align.py,sha256=oKHvpeDa62zEjLkPmuyBM6avYDl3HFeJVHeRX62I2f4,7085
12
+ gemmi_protools/tools/dockq.py,sha256=baCuO5-GZCwrlS59T5UIXogpM44OIFIfXqksqRBAb0A,4428
13
+ gemmi_protools/tools/mesh.py,sha256=73MuJYwS_ACJI15OsrooAAhB1Ti4fM8CJSBqFOBR7LU,6537
14
+ gemmi_protools/tools/pdb_annot.py,sha256=EzgcntlERR04TfN0dIhf_GM9UCXEvUaH60Xohmbx_do,8253
15
+ gemmi_protools-1.0.1.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
16
+ gemmi_protools-1.0.1.dist-info/METADATA,sha256=cdKO7zuEv4ZwwCrcBZcprXmDtLB4AbEFDKH887JRcTI,1034
17
+ gemmi_protools-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
18
+ gemmi_protools-1.0.1.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
19
+ gemmi_protools-1.0.1.dist-info/RECORD,,