gemmi-protools 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
@@ -0,0 +1,336 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
4
+
5
+ import os
6
+ import re
7
+ import shutil
8
+ import string
9
+ import subprocess
10
+ import tempfile
11
+ from typing import Literal, Optional, Dict, Any, List
12
+
13
+ import numpy as np
14
+ from Bio.Align import PairwiseAligner, substitution_matrices
15
+ from Bio.PDB import Superimposer
16
+ from gemmi_protools import StructureParser
17
+ from gemmi_protools.io.convert import gemmi2bio, bio2gemmi
18
+
19
+
20
+ def check_sequence(seq: str):
21
+ """
22
+ Remove space, star at the end, and \n, upper the letters
23
+ Check sequence is valid or not
24
+
25
+ :param seq:str
26
+ :return:
27
+
28
+ """
29
+ seq_clean = re.sub(pattern=r" |\*|-", repl='', string=seq.upper().strip())
30
+ if len(seq_clean) == 0:
31
+ raise ValueError("Sequence is empty")
32
+
33
+ s = re.sub(pattern=r"[A-Z]", repl="", string=seq_clean)
34
+ if len(s) > 0:
35
+ raise ValueError("Sequence has Non-alphabetic characters: %s" % str(set(s)))
36
+
37
+ return seq_clean
38
+
39
+
40
+ def align_sequences(seq1: str,
41
+ seq2: str,
42
+ seq_type: Literal["dna", "rna", "protein"] = "protein",
43
+ mode: Literal["global", "local"] = "local",
44
+ substitution_matrix: Optional[str] = None,
45
+ open_gap_score: Optional[float] = None,
46
+ extend_gap_score: Optional[float] = None,
47
+ ):
48
+ """
49
+ [To Do]: when one insertion greater than 52, raise mapping error
50
+ """
51
+ default_params = {
52
+ "dna": {
53
+ "matrix": "NUC.4.4",
54
+ "open_gap_score": -10.0,
55
+ "extend_gap_score": -0.5,
56
+ "mode": "global"
57
+ },
58
+ "rna": {
59
+ "matrix": "NUC.4.4",
60
+ "open_gap_score": -10.0,
61
+ "extend_gap_score": -0.5,
62
+ "mode": "global"
63
+ },
64
+ "protein": {
65
+ "matrix": "BLOSUM62",
66
+ "open_gap_score": -11.0,
67
+ "extend_gap_score": -1.0,
68
+ "mode": "global"
69
+ }
70
+
71
+ }
72
+
73
+ available_matrices = {
74
+ "dna": ["NUC.4.4"],
75
+ "rna": ["NUC.4.4"],
76
+ "protein": ["BLOSUM45", "BLOSUM50", "BLOSUM62",
77
+ "BLOSUM80", "BLOSUM90",
78
+ "PAM30", "PAM70", "PAM250"]
79
+ }
80
+
81
+ seq1 = check_sequence(seq1)
82
+ seq2 = check_sequence(seq2)
83
+
84
+ params = default_params[seq_type].copy()
85
+ a_mats = available_matrices[seq_type]
86
+
87
+ if substitution_matrix is not None:
88
+ if substitution_matrix not in a_mats:
89
+ raise ValueError("substitution matrix `%s` not support for %s" % (substitution_matrix, seq_type))
90
+ else:
91
+ params["matrix"] = substitution_matrix
92
+
93
+ if open_gap_score is not None:
94
+ params["open_gap_score"] = open_gap_score
95
+ if extend_gap_score is not None:
96
+ params["extend_gap_score"] = extend_gap_score
97
+
98
+ params["mode"] = mode
99
+ # Finish parameters checking and setting
100
+ aligner = PairwiseAligner()
101
+ aligner.mode = params["mode"]
102
+ aligner.substitution_matrix = substitution_matrices.load(params["matrix"])
103
+ aligner.open_gap_score = params["open_gap_score"]
104
+ aligner.extend_gap_score = params["extend_gap_score"]
105
+
106
+ best_alignment = aligner.align(seq1, seq2)[0]
107
+
108
+ aligned_seq1, aligned_seq2 = best_alignment
109
+
110
+ # start from 1
111
+ aa_mapper = dict()
112
+ i = 0
113
+ j = 0
114
+
115
+ ins_letters = string.ascii_uppercase + string.ascii_lowercase
116
+ k = 0
117
+
118
+ for aa1, aa2 in zip(aligned_seq1, aligned_seq2):
119
+ if aa1 != "-":
120
+ i += 1
121
+ if aa2 != "-":
122
+ j += 1
123
+ # reset k
124
+ if k > 0:
125
+ k = 0
126
+
127
+ if aa1 != "-" and aa2 != "-":
128
+ aa_mapper[i] = (j, "")
129
+
130
+ # for insertion of seq1
131
+ if aa1 != "-" and aa2 == "-":
132
+ aa_mapper[i] = (j, ins_letters[k])
133
+ k += 1
134
+
135
+ # from align idx to sequence idx
136
+ # tmap_1 = re.search(aligned_seq1.replace("-", ""), seq1)
137
+ # tmap_2 = re.search(aligned_seq2.replace("-", ""), seq2)
138
+ # shift_1 = tmap_1.span()[0]
139
+ # shift_2 = tmap_2.span()[0]
140
+
141
+ start_1, start_2 = best_alignment.coordinates[:, 0]
142
+ _mapper = {k + start_1: "%d%s" % (v[0] + start_2, v[1]) for k, v in aa_mapper.items()}
143
+
144
+ out_mapper = dict()
145
+ # check head and tail of seq1 with E prefix
146
+ for i in range(1, len(seq1) + 1):
147
+ if i not in _mapper:
148
+ out_mapper[i] = "E%d" % i
149
+ else:
150
+ out_mapper[i] = _mapper[i]
151
+
152
+ ident = best_alignment.counts().identities / best_alignment.length
153
+ n_aligned = best_alignment.length - best_alignment.counts().gaps
154
+
155
+ coverage_1 = n_aligned / len(seq1)
156
+ coverage_2 = n_aligned / len(seq2)
157
+
158
+ return dict(seq1=seq1,
159
+ seq2=seq2,
160
+ aligned_seq1=aligned_seq1,
161
+ aligned_seq2=aligned_seq2,
162
+ alignment_length=best_alignment.length,
163
+ aligned_aa_mapper=out_mapper,
164
+ identity=round(ident, 3),
165
+ coverage_1=round(coverage_1, 3),
166
+ coverage_2=round(coverage_2, 3),
167
+ )
168
+
169
+
170
+ class StructureAligner(object):
171
+ def __init__(self, query_path: str, ref_path: str):
172
+ self._query_st = StructureParser()
173
+ self._query_st.load_from_file(query_path)
174
+
175
+ self._ref_st = StructureParser()
176
+ self._ref_st.load_from_file(ref_path)
177
+
178
+ self.values = dict()
179
+ self.rot_mat = None
180
+ self.is_aligned = False
181
+ self.by_query = None
182
+ self.by_ref = None
183
+ self.query_path = query_path
184
+ self.ref_path = ref_path
185
+
186
+ @property
187
+ def __mmalign_path(self):
188
+ _path = shutil.which("MMAlign") or shutil.which("MMalign")
189
+ if _path is None:
190
+ raise RuntimeError("Executable program MMAlign is not found. "
191
+ "Download from https://zhanggroup.org/MM-align/ ."
192
+ "Build it and add MMAlign to environment PATH")
193
+ else:
194
+ return _path
195
+
196
+ @staticmethod
197
+ def __parser_rotation_matrix(matrix_file: str):
198
+ rotation_matrix = []
199
+ translation_vector = []
200
+
201
+ with open(matrix_file, 'r') as file:
202
+ lines = file.readlines()
203
+ values = lines[2:5]
204
+ for cur_line in values:
205
+ tmp = re.split(pattern=r"\s+", string=cur_line.strip())
206
+ assert len(tmp) == 5
207
+ rotation_matrix.append(tmp[2:])
208
+ translation_vector.append(tmp[1])
209
+ return dict(R=np.array(rotation_matrix).astype(np.float32),
210
+ T=np.array(translation_vector).astype(np.float32))
211
+
212
+ @staticmethod
213
+ def __parse_terminal_outputs(output_string: str) -> Dict[str, Any]:
214
+ lines = re.split(pattern=r"\n", string=output_string)
215
+ # chain mapping
216
+ patterns = dict(query_chain_ids=r"Structure_1.+\.pdb:([\w:]+)",
217
+ ref_chain_ids=r"Structure_2.+\.pdb:([\w:]+)",
218
+ query_total_length=r"Length of Structure_1.*?(\d+).*residues",
219
+ ref_total_length=r"Length of Structure_2.*?(\d+).*residues",
220
+ aligned_length=r"Aligned length=.*?(\d+)",
221
+ rmsd=r"RMSD=.*?([\d.]+)",
222
+ tmscore_by_query=r"TM-score=.*?([\d.]+).+Structure_1",
223
+ tmscore_by_ref=r"TM-score=.*?([\d.]+).+Structure_2",
224
+ aligned_seq_start=r"denotes other aligned residues",
225
+ )
226
+
227
+ values = dict()
228
+ for idx, line in enumerate(lines):
229
+ current_keys = list(patterns.keys())
230
+ for key in current_keys:
231
+ tmp = re.search(patterns[key], line)
232
+ if tmp:
233
+ if key in ['query_chain_ids', 'ref_chain_ids']:
234
+ values[key] = re.split(pattern=":", string=tmp.groups()[0])
235
+ del patterns[key]
236
+ elif key in ['query_total_length', 'ref_total_length', 'aligned_length']:
237
+ values[key] = int(tmp.groups()[0])
238
+ del patterns[key]
239
+ elif key in ['rmsd', 'tmscore_by_query', 'tmscore_by_ref']:
240
+ values[key] = float(tmp.groups()[0])
241
+ del patterns[key]
242
+ elif key == "aligned_seq_start":
243
+ # idx + 1 and idx + 3 for aligned sequences 1 and 2
244
+ seq_1 = lines[idx + 1]
245
+ seq_2 = lines[idx + 3]
246
+
247
+ sp1 = re.split(pattern=r"\*", string=seq_1)
248
+ sp2 = re.split(pattern=r"\*", string=seq_2)
249
+ values["query_sequences"] = sp1[:-1] if "*" in seq_1 else sp1
250
+ values["ref_sequences"] = sp2[:-1] if "*" in seq_2 else sp2
251
+ del patterns[key]
252
+ return values
253
+
254
+ def make_alignment(self, query_chains: Optional[List[str]] = None,
255
+ ref_chains: Optional[List[str]] = None, timeout=300.0):
256
+ """
257
+
258
+ :param
259
+ query_chains: list, None
260
+ for all chains
261
+ :param
262
+ ref_chains: list, None
263
+ for all chains
264
+ :param
265
+ timeout: default
266
+ 300
267
+ :return:
268
+ """
269
+
270
+ program_path = self.__mmalign_path
271
+
272
+ # clone
273
+ if isinstance(query_chains, list):
274
+ q_st = self._query_st.pick_chains(query_chains)
275
+ else:
276
+ q_st = self._query_st
277
+
278
+ if isinstance(ref_chains, list):
279
+ r_st = self._ref_st.pick_chains(ref_chains)
280
+ else:
281
+ r_st = self._ref_st
282
+
283
+ q_ch_mapper = q_st.make_one_letter_chain()
284
+ r_ch_mapper = r_st.make_one_letter_chain()
285
+
286
+ q_ch_mapper_r = {v: k for k, v in q_ch_mapper.items()}
287
+ r_ch_mapper_r = {v: k for k, v in r_ch_mapper.items()}
288
+
289
+ with tempfile.TemporaryDirectory() as tmp_dir:
290
+ _tmp_a = os.path.join(tmp_dir, "a.pdb")
291
+ q_st.to_pdb(_tmp_a)
292
+
293
+ _tmp_b = os.path.join(tmp_dir, "b.pdb")
294
+ r_st.to_pdb(_tmp_b)
295
+
296
+ matrix_file = os.path.join(tmp_dir, "m.txt")
297
+ _command = "%s %s %s -m %s" % (program_path, _tmp_a, _tmp_b, matrix_file)
298
+
299
+ try:
300
+ result = subprocess.run(_command, shell=True, check=True,
301
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
302
+ timeout=timeout)
303
+ except Exception as e:
304
+ print("%s: between files %s and %s; between chains: %s and %s" % (
305
+ str(e), self.query_path, self.ref_path,
306
+ str(q_st.chain_ids), str(r_st.chain_ids))
307
+ )
308
+ else:
309
+ self.values = self.__parse_terminal_outputs(result.stdout.decode())
310
+ self.rot_mat = self.__parser_rotation_matrix(matrix_file)
311
+ self.is_aligned = True
312
+ self.by_query = q_st.chain_ids if query_chains is None else query_chains
313
+ self.by_ref = r_st.chain_ids if ref_chains is None else ref_chains
314
+ self.values["query_chain_ids"] = [q_ch_mapper_r.get(ch, ch) for ch in self.values["query_chain_ids"]]
315
+ self.values["ref_chain_ids"] = [r_ch_mapper_r.get(ch, ch) for ch in self.values["ref_chain_ids"]]
316
+
317
+ def save_aligned_query(self, out_file: str):
318
+ """
319
+
320
+ :param
321
+ out_file:.cif
322
+ file
323
+ :return:
324
+ """
325
+ if not self.is_aligned:
326
+ raise RuntimeError("structure not aligned, run make_alignment first")
327
+
328
+ super_imposer = Superimposer()
329
+ super_imposer.rotran = (self.rot_mat["R"].T, self.rot_mat["T"])
330
+
331
+ bio_s = gemmi2bio(self._query_st.STRUCT)
332
+ super_imposer.apply(bio_s)
333
+ query_st_aligned = bio2gemmi(bio_s)
334
+
335
+ block = query_st_aligned.make_mmcif_block()
336
+ block.write_file(out_file)
@@ -0,0 +1,128 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
4
+ import json
5
+ import os
6
+ import shutil
7
+ import subprocess
8
+ import tempfile
9
+ from copy import deepcopy
10
+ from typing import List, Tuple
11
+
12
+ import gemmi
13
+ import pandas as pd
14
+
15
+ from gemmi_protools.io.reader import StructureParser
16
+
17
+
18
+ def dockq_score_interface(query_model: str,
19
+ native_model: str,
20
+ partner_1_mapping: List[Tuple[str, str]],
21
+ partner_2_mapping: List[Tuple[str, str]],
22
+ ):
23
+ """
24
+ Calculate Dockq Score for an interface (partner 1 vs partner 2)
25
+
26
+ :param query_model: str
27
+ path of query model, support .pdb, .pdb.gz, .cif, .cif.gz
28
+ :param native_model:
29
+ :param partner_1_mapping: a list of chain ID mapping between query and native for partner1 of the interface
30
+ e.g. [(q chain1, n chain1), (q chain2, n chain2)]
31
+ :param partner_2_mapping:
32
+ :return:
33
+ """
34
+ dockq_program = shutil.which("DockQ")
35
+ if dockq_program is None:
36
+ raise RuntimeError("DockQ is need")
37
+
38
+ assert len(partner_1_mapping) > 0, "partner_1_mapping must be a list of chain ID tuples, can't be empty"
39
+ assert len(partner_2_mapping) > 0, "partner_2_mapping must be a list of chain ID tuples, can't be empty"
40
+
41
+ def load_struct(path: str, partner_1: List[str], partner_2: List[str]):
42
+ st = StructureParser()
43
+ st.load_from_file(path)
44
+ st.clean_structure()
45
+
46
+ for ch in partner_1 + partner_2:
47
+ if ch not in st.chain_ids:
48
+ raise ValueError("Chain %s not found for %s (only [%s])" % (ch, path, " ".join(st.chain_ids)))
49
+
50
+ # merge chains in each each partner into on chain
51
+ # partner_1 with chain ID A
52
+ # partner_2 with chain ID B
53
+
54
+ chain_a = gemmi.Chain("A")
55
+ idx_a = 1
56
+ for ch in partner_1:
57
+ for res in st.get_chain(ch):
58
+ nr = deepcopy(res)
59
+ nr.seqid.icode = " "
60
+ nr.seqid.num = idx_a
61
+ chain_a.add_residue(nr)
62
+ idx_a += 1
63
+
64
+ chain_b = gemmi.Chain("B")
65
+ idx_b = 1
66
+ for ch in partner_2:
67
+ for res in st.get_chain(ch):
68
+ nr = deepcopy(res)
69
+ nr.seqid.icode = " "
70
+ nr.seqid.num = idx_b
71
+ chain_b.add_residue(nr)
72
+ idx_b += 1
73
+
74
+ model = gemmi.Model(1)
75
+ model.add_chain(chain_a)
76
+ model.add_chain(chain_b)
77
+
78
+ struct = gemmi.Structure()
79
+ struct.add_model(model)
80
+
81
+ output = StructureParser(struct)
82
+ return output
83
+
84
+ partner_1_query, partner_1_native = list(zip(*partner_1_mapping))
85
+ partner_2_query, partner_2_native = list(zip(*partner_2_mapping))
86
+
87
+ q_st = load_struct(query_model, list(partner_1_query), list(partner_2_query))
88
+ n_st = load_struct(native_model, list(partner_1_native), list(partner_2_native))
89
+
90
+ with tempfile.TemporaryDirectory() as tmp_dir:
91
+ result_file = os.path.join(tmp_dir, "result.json")
92
+ q_file = os.path.join(tmp_dir, "q.pdb")
93
+ n_file = os.path.join(tmp_dir, "n.pdb")
94
+ q_st.to_pdb(q_file, write_minimal_pdb=True)
95
+ n_st.to_pdb(n_file, write_minimal_pdb=True)
96
+
97
+ mapping = "AB:AB"
98
+
99
+ _command = "%s --mapping %s --json %s %s %s" % (dockq_program, mapping, result_file, q_file, n_file)
100
+ metrics = ['DockQ', 'F1', 'chain1', 'chain2']
101
+
102
+ try:
103
+ _ = subprocess.run(_command, shell=True, check=True,
104
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
105
+ timeout=300.0)
106
+ except subprocess.CalledProcessError as e:
107
+ # Handle errors in the called executable
108
+ msg = e.stderr.decode()
109
+ outputs = pd.DataFrame(columns=metrics)
110
+ except Exception as e:
111
+ # Handle other exceptions such as file not found or permissions issues
112
+ msg = str(e)
113
+ outputs = pd.DataFrame(columns=metrics)
114
+ else:
115
+ with open(result_file, "r") as fin:
116
+ vals = json.load(fin)
117
+ msg = "Finished"
118
+ result = []
119
+ for v in vals["best_result"].values():
120
+ result.append(v)
121
+ outputs = pd.DataFrame(result)[metrics]
122
+
123
+ if len(outputs) > 0:
124
+ score = "%.4f" % outputs.iloc[0]["DockQ"]
125
+ else:
126
+ score = ""
127
+
128
+ return dict(score=score, status=msg)
@@ -0,0 +1,197 @@
1
+ """
2
+ @Author: Luo Jiejian
3
+ """
4
+ import os
5
+ import subprocess
6
+ import tempfile
7
+ from collections import defaultdict
8
+ from typing import List, Optional, Union
9
+
10
+ import freesasa
11
+ import numpy as np
12
+ import trimesh
13
+ from Bio.PDB import Selection
14
+ from Bio.PDB.ResidueDepth import _get_atom_radius, _read_vertex_array
15
+
16
+ from gemmi_protools import StructureParser
17
+ from gemmi_protools import gemmi2bio
18
+
19
+
20
+ def _read_face_array(filename: str):
21
+ with open(filename) as fp:
22
+ face_list = []
23
+ for line in fp:
24
+ sl = line.split()
25
+ if len(sl) != 5:
26
+ # skip header
27
+ continue
28
+ vl = [int(x) for x in sl[0:3]]
29
+ face_list.append(vl)
30
+ return np.array(face_list)
31
+
32
+
33
+ def get_mesh(struct_file: str, chains: Optional[List[str]] = None, MSMS: str = "msms"):
34
+ """
35
+
36
+ :param struct_file: str
37
+ .pdb, .cif, .pdb.gz, .cif.gz
38
+ :param chains: a list of chain names
39
+ default None to include all chains
40
+ :param MSMS: str
41
+ path of msms executable
42
+ :return:
43
+ https://ccsb.scripps.edu/msms/downloads/
44
+ """
45
+ xyz_tmp = tempfile.NamedTemporaryFile(delete=False).name
46
+ surface_tmp = tempfile.NamedTemporaryFile(delete=False).name
47
+ msms_tmp = tempfile.NamedTemporaryFile(delete=False).name
48
+ face_file = surface_tmp + ".face"
49
+ surface_file = surface_tmp + ".vert"
50
+
51
+ try:
52
+ st = StructureParser()
53
+ st.load_from_file(struct_file)
54
+ st.clean_structure(remove_ligand=True)
55
+
56
+ if chains is None:
57
+ st_p = st
58
+ else:
59
+ for ch in chains:
60
+ if ch not in st.chain_ids:
61
+ raise ValueError("Chain %s not found (only [%s])" % (ch, " ".join(st.chain_ids)))
62
+ st_p = st.pick_chains(chains)
63
+
64
+ bio_st = gemmi2bio(st_p.STRUCT)
65
+ model = bio_st[0]
66
+
67
+ # Replace pdb_to_xyzr
68
+ # Make x,y,z,radius file
69
+ atom_list = Selection.unfold_entities(model, "A")
70
+
71
+ with open(xyz_tmp, "w") as pdb_to_xyzr:
72
+ for atom in atom_list:
73
+ x, y, z = atom.coord
74
+ radius = _get_atom_radius(atom, rtype="united")
75
+ pdb_to_xyzr.write(f"{x:6.3f}\t{y:6.3f}\t{z:6.3f}\t{radius:1.2f}\n")
76
+
77
+ # Make surface
78
+ MSMS = MSMS + " -no_header -probe_radius 1.5 -if %s -of %s > " + msms_tmp
79
+ make_surface = MSMS % (xyz_tmp, surface_tmp)
80
+ subprocess.call(make_surface, shell=True)
81
+ if not os.path.isfile(surface_file):
82
+ raise RuntimeError(
83
+ f"Failed to generate surface file using command:\n{make_surface}"
84
+ )
85
+
86
+ except Exception as e:
87
+ print(str(e))
88
+ mesh = None
89
+ else:
90
+ # Read surface vertices from vertex file
91
+ vertices = _read_vertex_array(surface_file)
92
+ faces = _read_face_array(face_file)
93
+ mesh = trimesh.Trimesh(vertices=vertices, faces=faces - 1)
94
+ mesh.merge_vertices()
95
+ mesh.update_faces(mesh.unique_faces())
96
+ mesh.update_faces(mesh.nondegenerate_faces())
97
+ mesh.remove_unreferenced_vertices()
98
+
99
+ # Remove temporary files
100
+ for fn in [xyz_tmp, surface_tmp, msms_tmp, face_file, surface_file]:
101
+ try:
102
+ os.remove(fn)
103
+ except OSError:
104
+ pass
105
+
106
+ return mesh
107
+
108
+
109
+ def get_surface_residues(struct_file: str,
110
+ chains: Optional[List[str]] = None,
111
+ relative_sasa_cutoff: Union[int, float] = 0.15):
112
+ ####################
113
+ # check and pick
114
+ ####################
115
+ st = StructureParser()
116
+ st.load_from_file(struct_file)
117
+ st.clean_structure()
118
+
119
+ if chains is None:
120
+ chains = st.chain_ids
121
+
122
+ if isinstance(chains, list):
123
+ if len(chains) == 0:
124
+ raise ValueError("chains is not set")
125
+ else:
126
+ # check if chains valid
127
+ for ch in chains:
128
+ if ch not in st.chain_ids:
129
+ raise ValueError("Chain %s not found" % ch)
130
+
131
+ st_p = st.pick_chains(chains)
132
+ # sequences = {k: s.replace("-", "").upper() for k, s in st_p.polymer_sequences().items()}
133
+
134
+ # start from 1
135
+ seq_num_mapper = dict()
136
+ for chain in st_p.MODEL:
137
+ for i, res in enumerate(chain):
138
+ key = (chain.name, str(res.seqid.num) + res.seqid.icode.strip(), res.name)
139
+ seq_num_mapper[key] = i + 1
140
+
141
+ # make one upper letter chain ID
142
+ mapper = st_p.make_one_letter_chain(only_uppercase=True)
143
+ mapper_r = {v: k for k, v in mapper.items()}
144
+
145
+ ####################
146
+ # save to pdb
147
+ ####################
148
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".pdb", mode='w') as tmp_file:
149
+ st_p.to_pdb(tmp_file.name)
150
+ structure = freesasa.Structure(tmp_file.name)
151
+
152
+ result = freesasa.calc(structure)
153
+
154
+ residue_areas = result.residueAreas()
155
+
156
+ surface_residues_relative_sasa = dict()
157
+ surface_atoms = defaultdict(list)
158
+ for atom_index in range(structure.nAtoms()):
159
+ ch = structure.chainLabel(atom_index)
160
+ ch = mapper_r.get(ch, ch)
161
+
162
+ res_num = structure.residueNumber(atom_index).strip()
163
+ res_name = structure.residueName(atom_index)
164
+ atom_sasa = result.atomArea(atom_index)
165
+
166
+ res_id = (ch, res_num, res_name)
167
+ res_relative_total = residue_areas[ch][res_num].relativeTotal
168
+ if res_relative_total > relative_sasa_cutoff:
169
+ if res_id not in surface_residues_relative_sasa:
170
+ surface_residues_relative_sasa[res_id] = res_relative_total
171
+ if atom_sasa > 0:
172
+ atom_name = structure.atomName(atom_index).strip()
173
+ pos = structure.coord(atom_index)
174
+ surface_atoms[res_id].append((atom_sasa, atom_name, pos))
175
+
176
+ results = []
177
+ for res_id, query_atoms in surface_atoms.items():
178
+ seq_loc = seq_num_mapper[res_id]
179
+
180
+ query_atoms.sort(reverse=True)
181
+ centroid = tuple(np.array([a[2] for a in query_atoms[0:3]]).mean(axis=0).tolist())
182
+ results.append((res_id[0],
183
+ res_id[1],
184
+ res_id[2],
185
+ seq_loc,
186
+ centroid,
187
+ surface_residues_relative_sasa[res_id]
188
+ )
189
+ )
190
+ dtype = [("chain_name", "U5"),
191
+ ("residue_numi", "U8"),
192
+ ("residue_name", "U5"),
193
+ ("sequential_residue_num", "i4"),
194
+ ("centroid", ("f4", (3,))),
195
+ ("relative_sasa", "f4"),
196
+ ]
197
+ return np.array(results, dtype=dtype)