EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
@@ -0,0 +1,543 @@
1
+ # ===============================================================================
2
+ # This file is part of Jwalk (Python 3).
3
+ #
4
+ # Jwalk - A tool to calculate the solvent accessible surface distance (SASD)
5
+ # between crosslinked residues.
6
+ #
7
+ # Copyright 2016 Josh Bullock and Birkbeck College University of London.
8
+ #
9
+ # Jwalk is available under Public Licence.
10
+ # This software is made available under GPL V3
11
+ #
12
+ # Please cite your use of Jwalk in published work:
13
+ #
14
+ # J.Bullock, J. Schwab, K. Thalassinos, M. Topf (2016)
15
+ # The importance of non-accessible crosslinks and solvent accessible surface distance
16
+ # in modelling proteins with restraints from crosslinking mass spectrometry.
17
+ # Molecular and Cellular Proteomics (15) pp.2491-2500
18
+ #
19
+ # ===============================================================================
20
+
21
+ import math
22
+ import itertools
23
+ from collections import deque
24
+ from multiprocessing import Pool, freeze_support
25
+
26
+ def calculate_specific_SASD(single_crosslink, aa1_voxels, aa2_voxels, dens_map, aa1_CA, aa2_CA,
27
+ max_dist, vox):
28
+
29
+ '''
30
+
31
+ Breadth First Search of grid. For general info on algorithm see:
32
+ https://en.wikipedia.org/wiki/Breadth-first_search
33
+
34
+ Returns dictionary containing solvent accessible surface distances between specific starting res
35
+ and ending res.
36
+
37
+ {start res, end res, length in angstroms : voxel path of sasd}
38
+
39
+ Arguments:
40
+
41
+ *single_crosslink*
42
+ start and end residue.
43
+ start is key of aa1_voxels. aa1_voxels[start_residue] = all the starting voxels for that
44
+ residue
45
+ *aa1_voxels*
46
+ dictionary containing starting voxels {start_residue : starting voxels}
47
+ *aa2_voxels*
48
+ dictionary containing ending voxels {end_residue : ending voxels}
49
+ *dens_map*
50
+ grid with solvent accessible surface (masked array)
51
+ *aa1_CA*
52
+ dictionary containing voxel of C-alpha
53
+ *aa2_CA*
54
+ dictionary containing voxel of C-alpha
55
+ *max_dist*
56
+ maximum distance BFS will search until
57
+ *vox*
58
+ number of angstoms per voxel
59
+
60
+ '''
61
+
62
+ start_residue = single_crosslink[0]
63
+ end_residue = single_crosslink[1]
64
+
65
+ specific_xl = {}
66
+
67
+ comb = [[+1, +0, +0],[-1, +0, +0],
68
+ [+0, +1, +0],[+0, -1, +0],
69
+ [+0, +0, +1],[+0, +0, -1],
70
+ [+1, +0, +1],[-1, +0, +1],
71
+ [+0, +1, +1],[+0, -1, +1],
72
+ [+1, -1, +0],[-1, -1, +0],
73
+ [+1, +1, +0],[-1, +1, +0],
74
+ [+1, +0, -1],[-1, +0, -1],
75
+ [+0, +1, -1],[+0, -1, -1],
76
+ [+1, +1, +1],[+1, -1, +1],
77
+ [-1, +1, +1],[-1, -1, +1],
78
+ [+1, +1, -1],[+1, -1, -1],
79
+ [-1, +1, -1],[-1, -1, -1]]
80
+
81
+ # distance of diagonal steps
82
+ diag1 = (math.sqrt((vox ** 2) * 2)) # 2d diagonal
83
+ diag2 = (math.sqrt((vox ** 2) * 3)) # 3d diagonal
84
+
85
+ queue = [] # voxels in queue for searching
86
+ end_voxels = [] # list of voxels to find path to
87
+ visited = {} # list works as all the coordinates that have been visited - dictionary gives the path to said coordinate from startpoint
88
+ distance = {} # keeps distance from starting voxel for each other voxel
89
+
90
+ # place starting voxels into queue and initialise visited and distance
91
+ for j in aa1_voxels[start_residue]:
92
+ queue.append([j[0], j[1], j[2]])
93
+ visited[j[0], j[1], j[2]] = [[j[0], j[1], j[2]]]
94
+ distance[j[0], j[1], j[2]] = 0
95
+
96
+ while queue:
97
+ x_n, y_n, z_n = queue.pop(0)
98
+ if distance[x_n, y_n, z_n] <= max_dist:
99
+ for c in comb:
100
+ x_temp = x_n + c[0]
101
+ y_temp = y_n + c[1]
102
+ z_temp = z_n + c[2]
103
+ if (x_temp, y_temp, z_temp) not in visited:
104
+ if ((0 <= x_temp < dens_map.x_size()) and (0 <= y_temp < dens_map.y_size()) and (
105
+ 0 <= z_temp < dens_map.z_size())):
106
+ temp_list = visited[x_n, y_n, z_n][:]
107
+ temp_list.append([x_temp, y_temp, z_temp])
108
+ visited[x_temp, y_temp, z_temp] = temp_list # updated visited list
109
+
110
+ if dens_map.fullMap[z_temp][y_temp][x_temp] <= 0: # if the voxel is in empty space
111
+ queue.append(([x_temp, y_temp, z_temp]))
112
+ # calculate the distance
113
+ diff_x = x_temp - x_n
114
+ diff_y = y_temp - y_n
115
+ diff_z = z_temp - z_n
116
+ if diff_x != 0 and diff_y != 0 and diff_z != 0:
117
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag2
118
+ elif diff_x != 0 and diff_y != 0:
119
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
120
+ elif diff_x != 0 and diff_z != 0:
121
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
122
+ elif diff_y != 0 and diff_z != 0:
123
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
124
+ else:
125
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + vox
126
+
127
+ # now we have a full set of paths into empty space starting from start_residue
128
+ # all stored in visited. Now need to extract paths to specific residue
129
+ shortest_distance = 9999
130
+ all_distances = {}
131
+
132
+ for j in aa2_voxels[end_residue]:
133
+
134
+ (x, y, z) = j
135
+
136
+ if (x, y, z) in visited:
137
+
138
+ visited[(x, y, z)].insert(0, aa1_CA[start_residue]) # add aa1 CA voxel to path
139
+ visited[(x, y, z)].append(aa2_CA[end_residue]) # add aa2 CA voxel to end of path
140
+
141
+ # add the distance between starting/ending residue CA voxel and start/end voxel in path
142
+ for i in [1, len(visited[(x, y, z)]) - 1]:
143
+ (x_1, y_1, z_1) = visited[(x, y, z)][i - 1]
144
+ (x_2, y_2, z_2) = visited[(x, y, z)][i]
145
+ distance[(x, y, z)] += math.sqrt((x_1 - x_2) ** 2 + (y_1 - y_2) ** 2 + (z_1 - z_2) ** 2)
146
+
147
+ all_distances[distance[(x, y, z)]] = visited[(x, y, z)] # linking distance:path
148
+
149
+ # keep record of shortest distance
150
+ if shortest_distance > distance[(x, y, z)]:
151
+ shortest_distance = distance[(x, y, z)]
152
+
153
+ # now adding shortest xl to the final list
154
+
155
+ if shortest_distance != 9999:
156
+ # this is just to order the dict so that chain goes alphabetically
157
+ specific_xl[start_residue, end_residue, shortest_distance] = all_distances[
158
+ shortest_distance] # start lys, end lys, length of xl = path of xl
159
+
160
+ return specific_xl
161
+
162
+
163
+ def calculate_SASDs(start_residue, aa1_voxels, aa2_voxels, dens_map, aa1_CA, aa2_CA,
164
+ max_dist, vox):
165
+
166
+ """
167
+
168
+ Breadth First Search of grid. For general info on algorithm see:
169
+ https://en.wikipedia.org/wiki/Breadth-first_search
170
+
171
+ Returns dictionary containing solvent accessible surface distances between starting res
172
+ and all possible ending res.
173
+
174
+ {start res, end res, length in angstroms : voxel path of sasd}
175
+
176
+ Arguments:
177
+
178
+ *start_residue*
179
+ key of aa1_voxels. aa1_voxels[start_residue] = all the starting voxels for that
180
+ residue
181
+ *aa1_voxels*
182
+ dictionary containing starting voxels {start_residue : starting voxels}
183
+ *aa2_voxels*
184
+ dictionary containing ending voxels {end_residue : ending voxels}
185
+ *dens_map*
186
+ grid with solvent accessible surface (masked array)
187
+ *aa1_CA*
188
+ dictionary containing voxel of C-alpha
189
+ *aa2_CA*
190
+ dictionary containing voxel of C-alpha
191
+ *max_dist*
192
+ maximum distance BFS will search until
193
+ *vox*
194
+ number of angstoms per voxel
195
+
196
+
197
+ """
198
+
199
+ sasds = {}
200
+
201
+ # order of voxels to search - by having diagonals last ensures shortest path is returned
202
+ comb = [[+1, +0, +0],[-1, +0, +0],
203
+ [+0, +1, +0],[+0, -1, +0],
204
+ [+0, +0, +1],[+0, +0, -1],
205
+ [+1, +0, +1],[-1, +0, +1],
206
+ [+0, +1, +1],[+0, -1, +1],
207
+ [+1, -1, +0],[-1, -1, +0],
208
+ [+1, +1, +0],[-1, +1, +0],
209
+ [+1, +0, -1],[-1, +0, -1],
210
+ [+0, +1, -1],[+0, -1, -1],
211
+ [+1, +1, +1],[+1, -1, +1],
212
+ [-1, +1, +1],[-1, -1, +1],
213
+ [+1, +1, -1],[+1, -1, -1],
214
+ [-1, +1, -1],[-1, -1, -1]]
215
+
216
+ # distance of diagonal steps
217
+ diag1 = (math.sqrt((vox ** 2) * 2)) # 2d diagonal
218
+ diag2 = (math.sqrt((vox ** 2) * 3)) # 3d diagonal
219
+
220
+ queue = [] # voxels in queue for searching
221
+ visited = {} # list works as all the coordinates that have been visited - dictionary gives the path to said coordinate from startpoint
222
+ distance = {} # keeps distance from starting voxel for each other voxel
223
+
224
+ # place starting voxels into queue and initialise visited and distance
225
+ for j in aa1_voxels[start_residue]:
226
+ queue.append([j[0], j[1], j[2]])
227
+ visited[j[0], j[1], j[2]] = [[j[0], j[1], j[2]]]
228
+ distance[j[0], j[1], j[2]] = 0
229
+
230
+ # grid is searched until queue is empty
231
+ while queue:
232
+ x_n, y_n, z_n = queue.pop(0) # take first voxel in queue
233
+ if distance[x_n, y_n, z_n] <= max_dist:
234
+ for c in comb: # expand in all directions from voxel - in order of comb.
235
+ x_temp = x_n + c[0]
236
+ y_temp = y_n + c[1]
237
+ z_temp = z_n + c[2]
238
+ # check voxel hasn't already been searched
239
+ if (x_temp, y_temp, z_temp) not in visited:
240
+ # check that voxel is within bounds of the grid
241
+ if ((0 <= x_temp < dens_map.x_size()) and (0 <= y_temp < dens_map.y_size()) and (
242
+ 0 <= z_temp < dens_map.z_size())):
243
+ # add path to this voxel to visited
244
+ temp_list = visited[x_n, y_n, z_n][:]
245
+ temp_list.append([x_temp, y_temp, z_temp])
246
+ visited[x_temp, y_temp, z_temp] = temp_list
247
+
248
+ if dens_map.fullMap[z_temp][y_temp][x_temp] <= 0: # if the voxel is in empty space
249
+ queue.append(([x_temp, y_temp, z_temp])) # add to queue for later searching
250
+
251
+ # calculate the distance to voxel from start voxel
252
+ diff_x = x_temp - x_n
253
+ diff_y = y_temp - y_n
254
+ diff_z = z_temp - z_n
255
+ if diff_x != 0 and diff_y != 0 and diff_z != 0:
256
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag2
257
+ elif diff_x != 0 and diff_y != 0:
258
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
259
+ elif diff_x != 0 and diff_z != 0:
260
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
261
+ elif diff_y != 0 and diff_z != 0:
262
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + diag1
263
+ else:
264
+ distance[x_temp, y_temp, z_temp] = distance[x_n, y_n, z_n] + vox
265
+
266
+ # now we have a full set of paths into empty space starting from start_residue
267
+ # all stored in visited. Now need to extract paths to specific residues
268
+ for end_residue in aa2_voxels:
269
+ if start_residue != end_residue:
270
+ shortest_distance = 9999
271
+ all_distances = {}
272
+
273
+ # cycling through possible end coords of end_residue to get shortest sasd
274
+ for j in aa2_voxels[end_residue]:
275
+
276
+ (x, y, z) = j
277
+
278
+ if (x, y, z) in visited:
279
+
280
+ visited[(x, y, z)].insert(0, aa1_CA[start_residue]) # add aa1 CA voxel to path
281
+ visited[(x, y, z)].append(aa2_CA[end_residue]) # add aa2 CA voxel to end of path
282
+
283
+ # add the distance between starting/ending residue CA voxel and start/end voxel in path
284
+ for i in [1, len(visited[(x, y, z)]) - 1]:
285
+ (x_1, y_1, z_1) = visited[(x, y, z)][i - 1]
286
+ (x_2, y_2, z_2) = visited[(x, y, z)][i]
287
+ distance[(x, y, z)] += math.sqrt((x_1 - x_2) ** 2 + (y_1 - y_2) ** 2 + (z_1 - z_2) ** 2)
288
+
289
+ all_distances[distance[(x, y, z)]] = visited[(x, y, z)] # linking distance:path
290
+
291
+ # keep record of shortest distance
292
+ if shortest_distance > distance[(x, y, z)]:
293
+ shortest_distance = distance[(x, y, z)]
294
+
295
+ # add shortest distance sasd to output dictionary
296
+
297
+ if shortest_distance != 9999:
298
+ if start_residue[1] < end_residue[1]: # this to order the dict so that chain goes alphabetically
299
+ sasds[start_residue, end_residue, shortest_distance] = all_distances[shortest_distance]
300
+ elif end_residue[1] < start_residue[1]:
301
+ sasds[end_residue, start_residue, shortest_distance] = all_distances[shortest_distance]
302
+ # if both on the same chain, then ordered to go numerically
303
+ elif start_residue[0] < end_residue[0]:
304
+ sasds[start_residue, end_residue, shortest_distance] = all_distances[shortest_distance]
305
+ else:
306
+ sasds[end_residue, start_residue, shortest_distance] = all_distances[shortest_distance]
307
+
308
+ return sasds
309
+
310
+ def calculate_SASDs_star(a_b):
311
+ """Convert `f([1,2])` to `f(1,2)` call."""
312
+ return calculate_SASDs(*a_b)
313
+
314
+ def calculate_specific_SASD_star(a_b):
315
+ """Convert `f([1,2])` to `f(1,2)` call."""
316
+ return calculate_specific_SASD(*a_b)
317
+
318
+ # ---------------------------------------------------------------------------
319
+ # Fast BFS helpers (deque queue, no full-path storage, grouped by start residue)
320
+ # ---------------------------------------------------------------------------
321
+
322
+ _COMB = (
323
+ (+1,+0,+0),(-1,+0,+0),(+0,+1,+0),(+0,-1,+0),(+0,+0,+1),(+0,+0,-1),
324
+ (+1,+0,+1),(-1,+0,+1),(+0,+1,+1),(+0,-1,+1),(+1,-1,+0),(-1,-1,+0),
325
+ (+1,+1,+0),(-1,+1,+0),(+1,+0,-1),(-1,+0,-1),(+0,+1,-1),(+0,-1,-1),
326
+ (+1,+1,+1),(+1,-1,+1),(-1,+1,+1),(-1,-1,+1),(+1,+1,-1),(+1,-1,-1),
327
+ (-1,+1,-1),(-1,-1,-1),
328
+ )
329
+ # number of non-zero components per move (determines step size)
330
+ _COMB_N = tuple(abs(c[0]) + abs(c[1]) + abs(c[2]) for c in _COMB)
331
+
332
+
333
+ def _bfs_fast(start_residue, aa1_voxels, dens_map, max_dist, vox):
334
+ """
335
+ Fast BFS using a deque queue with no full path storage.
336
+
337
+ Returns:
338
+ distance : dict {(x,y,z): float} — path length through solvent from
339
+ any start-surface voxel to each reachable voxel.
340
+ start_origin: dict {(x,y,z): (sx,sy,sz)} — which start-surface voxel
341
+ originated the shortest path to each voxel (needed for the
342
+ CA-to-surface correction).
343
+ """
344
+ diag1 = math.sqrt(vox * vox * 2)
345
+ diag2 = math.sqrt(vox * vox * 3)
346
+
347
+ queue = deque()
348
+ visited = set()
349
+ distance = {}
350
+ start_origin = {}
351
+
352
+ for j in aa1_voxels[start_residue]:
353
+ key = (j[0], j[1], j[2])
354
+ if key not in visited:
355
+ queue.append(key)
356
+ visited.add(key)
357
+ distance[key] = 0.0
358
+ start_origin[key] = key
359
+
360
+ x_size = dens_map.x_size()
361
+ y_size = dens_map.y_size()
362
+ z_size = dens_map.z_size()
363
+ full_map = dens_map.fullMap
364
+
365
+ while queue:
366
+ x_n, y_n, z_n = queue.popleft()
367
+ d_n = distance[x_n, y_n, z_n]
368
+ if d_n > max_dist:
369
+ continue
370
+ orig = start_origin[x_n, y_n, z_n]
371
+ for c, n in zip(_COMB, _COMB_N):
372
+ x_t = x_n + c[0]
373
+ y_t = y_n + c[1]
374
+ z_t = z_n + c[2]
375
+ key = (x_t, y_t, z_t)
376
+ if key not in visited:
377
+ if 0 <= x_t < x_size and 0 <= y_t < y_size and 0 <= z_t < z_size:
378
+ visited.add(key)
379
+ step = diag2 if n == 3 else (diag1 if n == 2 else vox)
380
+ distance[key] = d_n + step
381
+ start_origin[key] = orig
382
+ if full_map[z_t][y_t][x_t] <= 0:
383
+ queue.append(key)
384
+
385
+ return distance, start_origin
386
+
387
+
388
+ def calculate_SASDs_for_start_fast(args):
389
+ """
390
+ Run ONE BFS from *start_residue* and extract the shortest distance to every
391
+ end residue listed in *end_residues*. This replaces running one BFS per
392
+ crosslink pair (O(pairs) BFS runs → O(unique start residues) BFS runs).
393
+
394
+ Args: (start_residue, end_residues, aa1_voxels, aa2_voxels, dens_map,
395
+ aa1_CA, aa2_CA, max_dist, vox)
396
+ """
397
+ start_residue, end_residues, aa1_voxels, aa2_voxels, dens_map, aa1_CA, aa2_CA, max_dist, vox = args
398
+
399
+ distance, start_origin = _bfs_fast(start_residue, aa1_voxels, dens_map, max_dist, vox)
400
+
401
+ ca1 = aa1_CA[start_residue] # [gx, gy, gz] in grid coords
402
+ result = {}
403
+
404
+ for end_residue in end_residues:
405
+ if end_residue == start_residue:
406
+ continue
407
+ if end_residue not in aa2_voxels:
408
+ continue
409
+
410
+ shortest_dist = 9999.0
411
+ ca2 = aa2_CA[end_residue]
412
+
413
+ for j in aa2_voxels[end_residue]:
414
+ voxel = (j[0], j[1], j[2])
415
+ if voxel in distance:
416
+ d = distance[voxel]
417
+ # correction 1: start CA → the start-surface voxel that seeded this path
418
+ sv = start_origin[voxel]
419
+ d += math.sqrt((ca1[0]-sv[0])**2 + (ca1[1]-sv[1])**2 + (ca1[2]-sv[2])**2)
420
+ # correction 2: end-surface voxel → end CA
421
+ d += math.sqrt((j[0]-ca2[0])**2 + (j[1]-ca2[1])**2 + (j[2]-ca2[2])**2)
422
+ if d < shortest_dist:
423
+ shortest_dist = d
424
+
425
+ if shortest_dist < 9999.0:
426
+ # preserve chain-alphabetical / residue-numerical ordering of the key
427
+ if start_residue[1] < end_residue[1]:
428
+ result[(start_residue, end_residue, shortest_dist)] = []
429
+ elif end_residue[1] < start_residue[1]:
430
+ result[(end_residue, start_residue, shortest_dist)] = []
431
+ elif start_residue[0] < end_residue[0]:
432
+ result[(start_residue, end_residue, shortest_dist)] = []
433
+ else:
434
+ result[(end_residue, start_residue, shortest_dist)] = []
435
+
436
+ return result
437
+
438
+ # ---------------------------------------------------------------------------
439
+
440
+ def parallel_BFS(aa1_voxels, aa2_voxels, dens_map, aa1_CA, aa2_CA, crosslink_pairs,
441
+ max_dist, vox, ncpus, xl_list):
442
+
443
+ """
444
+
445
+ Parallelised Breadth First Search of grid.
446
+
447
+ Returns dictionary containing all solvent accessible surface distances
448
+ {start res, end res, length in angstroms : voxel path of sasd}
449
+
450
+ When xl_list is provided, pairs are grouped by start residue so that only
451
+ ONE BFS is run per unique start residue (instead of one BFS per pair).
452
+ This typically reduces BFS count by 20-50x for large crosslink lists.
453
+
454
+ """
455
+
456
+ freeze_support()
457
+ final_XL = {}
458
+
459
+ if xl_list != "NULL":
460
+ # --- grouped fast path: one BFS per unique start residue ---
461
+ pairs_by_start = {}
462
+ for pair in crosslink_pairs:
463
+ start = pair[0]
464
+ end = pair[1]
465
+ pairs_by_start.setdefault(start, []).append(end)
466
+
467
+ tasks = [
468
+ (start, ends, aa1_voxels, aa2_voxels, dens_map, aa1_CA, aa2_CA, max_dist, vox)
469
+ for start, ends in pairs_by_start.items()
470
+ ]
471
+
472
+ if ncpus > 1:
473
+ pool = Pool(ncpus)
474
+ xl_dictionaries = pool.map(calculate_SASDs_for_start_fast, tasks)
475
+ pool.close()
476
+ pool.join()
477
+ else:
478
+ xl_dictionaries = [calculate_SASDs_for_start_fast(t) for t in tasks]
479
+
480
+ for c in xl_dictionaries:
481
+ final_XL.update(c)
482
+
483
+ else:
484
+ if ncpus > 1:
485
+
486
+ pool = Pool(ncpus)
487
+ xl_dictionaries = pool.map(calculate_SASDs_star,
488
+ zip(aa1_voxels,
489
+ itertools.repeat(aa1_voxels),
490
+ itertools.repeat(aa2_voxels),
491
+ itertools.repeat(dens_map),
492
+ itertools.repeat(aa1_CA),
493
+ itertools.repeat(aa2_CA),
494
+ itertools.repeat(max_dist),
495
+ itertools.repeat(vox)))
496
+ pool.close()
497
+ pool.join()
498
+
499
+ for c in xl_dictionaries:
500
+ final_XL.update(c)
501
+
502
+ else:
503
+ # alternative call to allow single cpu running on Windows machines
504
+ for start_residue in aa1_voxels:
505
+ xl_dictionaries = calculate_SASDs(start_residue, aa1_voxels, aa2_voxels,
506
+ dens_map, aa1_CA, aa2_CA, max_dist, vox)
507
+ final_XL.update(xl_dictionaries)
508
+
509
+ return final_XL
510
+
511
+ def calculate_distance(cords):
512
+ ''' Calculates the distance of points in 3d, input e.g. [[x1,y1,z1],[x2,y2,z3]] '''
513
+ return math.sqrt(((cords[0][0]-cords[1][0])**2)+((cords[0][1]-cords[1][1])**2)+((cords[0][2]-cords[1][2])**2))
514
+
515
+ def get_euclidean_distances(sasds, pdb, aa1, aa2):
516
+
517
+ residues = {}
518
+ euc_dists = {}
519
+ with open (pdb) as inf:
520
+ for line in inf:
521
+ if line.startswith('ATOM') and (line[12:16].strip() == 'CA'):
522
+ if line[21:22].strip() == "":
523
+ chain = " "
524
+ else:
525
+ chain = line[21:22].strip()
526
+ residues[line[22:26].strip(),chain] = [float(line[30:38].strip()),
527
+ float(line[38:46].strip()),
528
+ float(line[46:54].strip())]
529
+
530
+ for k,v in residues.items():
531
+ for k1,v1 in residues.items():
532
+ if k1 != k:
533
+
534
+ euc_dists[int(k[0]),k[1], int(k1[0]),k1[1]] = calculate_distance([v,v1])
535
+
536
+ sasds_and_eucs = {}
537
+
538
+ for s in sasds:
539
+ if (s[0][0],s[0][1],s[1][0],s[1][1]) in euc_dists:
540
+ sasds_and_eucs[s[0],s[1],s[2],euc_dists[(s[0][0],s[0][1],s[1][0],s[1][1])]] = sasds[s]
541
+
542
+ return sasds_and_eucs
543
+
@@ -0,0 +1,150 @@
1
+ # ===============================================================================
2
+ # This file is part of Jwalk (Python 3).
3
+ #
4
+ # Jwalk - A tool to calculate the solvent accessible surface distance (SASD)
5
+ # between crosslinked residues.
6
+ #
7
+ # Copyright 2016 Josh Bullock and Birkbeck College University of London.
8
+ #
9
+ # Jwalk is available under Public Licence.
10
+ # This software is made available under GPL V3
11
+ #
12
+ # Please cite your use of Jwalk in published work:
13
+ #
14
+ # J.Bullock, J. Schwab, K. Thalassinos, M. Topf (2016)
15
+ # The importance of non-accessible crosslinks and solvent accessible surface distance
16
+ # in modelling proteins with restraints from crosslinking mass spectrometry.
17
+ # Molecular and Cellular Proteomics (15) pp.2491-2500
18
+ #
19
+ # ===============================================================================
20
+
21
+ import os
22
+ import freesasa
23
+
24
+ def update_crosslink_pairs(crosslink_pairs, aa1_CA, aa2_CA, remove_aa1, remove_aa2):
25
+
26
+ '''Removes buried residues from crosslink_pairs'''
27
+
28
+ buried_residues = []
29
+ index_to_delete = []
30
+
31
+ for i in range(len(crosslink_pairs)): # for each residue pair, check both are solvent accessible
32
+
33
+ xl_pair_1, xl_pair_2 = crosslink_pairs[i]
34
+
35
+ if xl_pair_1 not in aa1_CA:
36
+ index_to_delete.append(i)
37
+ if xl_pair_1 not in buried_residues:
38
+ buried_residues.append(xl_pair_1)
39
+ if xl_pair_2 not in aa2_CA and xl_pair_2 not in buried_residues:
40
+ buried_residues.append(xl_pair_2)
41
+ elif xl_pair_2 not in aa2_CA:
42
+ index_to_delete.append(i)
43
+ if xl_pair_2 not in buried_residues:
44
+ buried_residues.append(xl_pair_2)
45
+
46
+ if [xl_pair_1[0],xl_pair_1[1]] in remove_aa1:
47
+ index_to_delete.append(i)
48
+ if xl_pair_1 not in buried_residues:
49
+ buried_residues.append(xl_pair_1)
50
+ if xl_pair_2 in remove_aa2 and not xl_pair_2 in buried_residues:
51
+ buried_residues.append(xl_pair_2)
52
+
53
+ elif [xl_pair_2[0],xl_pair_2[1]] in remove_aa2:
54
+ index_to_delete.append(i)
55
+ if xl_pair_2 not in buried_residues:
56
+ buried_residues.append(xl_pair_2)
57
+
58
+ no_sasd_possible = []
59
+ crosslink_pairs_final = []
60
+ for i in range(len(crosslink_pairs)):
61
+ if i not in index_to_delete:
62
+ crosslink_pairs_final.append(crosslink_pairs[i])
63
+ else:
64
+ no_sasd_possible.append(crosslink_pairs[i])
65
+
66
+ if len(no_sasd_possible) > 0:
67
+ print("the following crosslinks cannot be calculated:")
68
+ for s in no_sasd_possible:
69
+ print("{}-{}-{} - {}-{}-{}".format(s[0][2],s[0][0],s[0][1],s[1][2],s[1][0],s[1][1]))
70
+
71
+ return crosslink_pairs_final
72
+
73
+ def check_solvent_accessibility_freesasa(prot, aa_CA, xl_list, aa_dict, ncpus):
74
+
75
+ freesasa.Parameters().setNSlices(50)
76
+ freesasa.Parameters().setNThreads(ncpus)
77
+
78
+ pt = os.path.dirname(os.path.realpath(__file__))
79
+ classifier = freesasa.Classifier(os.path.join(pt,"naccess.config.txt"))
80
+ structure = freesasa.Structure(os.path.normpath(prot), classifier)
81
+ result = freesasa.calc(structure)
82
+
83
+ solv_access_residue = {}
84
+ for chain, residue in result.residueAreas().items():
85
+ for res_sasa_info in residue.values():
86
+ if res_sasa_info.total > 7.0: # if total residue SASA is greater than 7.0 ...
87
+ solv_access_residue[(int(res_sasa_info.residueNumber), chain, res_sasa_info.residueType)] = True
88
+
89
+ surface_solv_access_residue = {}
90
+
91
+ for res_num, chain, res_name in aa_CA:
92
+ if (res_num, chain, res_name) in solv_access_residue:
93
+ surface_solv_access_residue[(res_num, chain, res_name)] = aa_CA[(res_num, chain, res_name)]
94
+ sd_res = res_name
95
+ else:
96
+ print("Residue {}-{}-{} is buried".format(res_num, chain, res_name))
97
+ sd_res = res_name
98
+
99
+ # inform user on buried resiudes
100
+ if xl_list != "NULL":
101
+ pass
102
+ elif sd_res == "LYS":
103
+ print("{} {} and 1 N-terminus of which {} are on the surface".format(len(aa_CA)-1, aa_dict[sd_res], len(surface_solv_access_residue)))
104
+ else:
105
+ print("{} {} of which {} are on the surface".format(len(aa_CA), aa_dict[sd_res], len(surface_solv_access_residue)))
106
+
107
+ return surface_solv_access_residue
108
+
109
+
110
+ def check_solvent_accessibility_freesasa_both(prot, aa1_CA, aa2_CA, xl_list, aa_dict, ncpus):
111
+ """
112
+ Run freesasa ONCE for the PDB and return filtered surface-accessible dicts
113
+ for both aa1_CA and aa2_CA. Avoids the duplicate freesasa call that occurs
114
+ when the function is called separately for each residue set.
115
+
116
+ Returns: (surface_aa1_CA, surface_aa2_CA)
117
+ """
118
+ freesasa.Parameters().setNSlices(50)
119
+ freesasa.Parameters().setNThreads(ncpus)
120
+
121
+ pt = os.path.dirname(os.path.realpath(__file__))
122
+ classifier = freesasa.Classifier(os.path.join(pt, "naccess.config.txt"))
123
+ structure = freesasa.Structure(os.path.normpath(prot), classifier)
124
+ result = freesasa.calc(structure)
125
+
126
+ solv_access_residue = {}
127
+ for chain, residue in result.residueAreas().items():
128
+ for res_sasa_info in residue.values():
129
+ if res_sasa_info.total > 7.0:
130
+ solv_access_residue[(int(res_sasa_info.residueNumber), chain, res_sasa_info.residueType)] = True
131
+
132
+ def _filter(aa_CA):
133
+ out = {}
134
+ for res_num, chain, res_name in aa_CA:
135
+ if (res_num, chain, res_name) in solv_access_residue:
136
+ out[(res_num, chain, res_name)] = aa_CA[(res_num, chain, res_name)]
137
+ else:
138
+ print("Residue {}-{}-{} is buried".format(res_num, chain, res_name))
139
+ if xl_list == "NULL" and out:
140
+ sd_res = next(iter(out))[2]
141
+ if sd_res == "LYS":
142
+ print("{} {} and 1 N-terminus of which {} are on the surface".format(
143
+ len(aa_CA) - 1, aa_dict[sd_res], len(out)))
144
+ else:
145
+ print("{} {} of which {} are on the surface".format(
146
+ len(aa_CA), aa_dict[sd_res], len(out)))
147
+ return out
148
+
149
+ return _filter(aa1_CA), _filter(aa2_CA)
150
+