EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
@@ -0,0 +1,567 @@
1
+ # ===============================================================================
2
+ # This file is part of Jwalk (Python 3).
3
+ #
4
+ # Jwalk - A tool to calculate the solvent accessible surface distance (SASD)
5
+ # between crosslinked residues.
6
+ #
7
+ # Copyright 2016 Josh Bullock and Birkbeck College University of London.
8
+ #
9
+ # Jwalk is available under Public Licence.
10
+ # This software is made available under GPL V3
11
+ #
12
+ # Please cite your use of Jwalk in published work:
13
+ #
14
+ # J.Bullock, J. Schwab, K. Thalassinos, M. Topf (2016)
15
+ # The importance of non-accessible crosslinks and solvent accessible surface distance
16
+ # in modelling proteins with restraints from crosslinking mass spectrometry.
17
+ # Molecular and Cellular Proteomics (15) pp.2491-2500
18
+ #
19
+ # ===============================================================================
20
+
21
+ from numpy import zeros
22
+ from collections import OrderedDict
23
+
24
+ class Map:
25
+ """
26
+
27
+ A class representing all information from a density map file.
28
+ NOTE: Currently it can only read the CCP4/MRC format.
29
+
30
+ """
31
+
32
+ def __init__(self, fullMap, origin, apix, filename, header=[]):
33
+ """
34
+
35
+ Read a map and its parameters in to Map class instance.
36
+
37
+ *filename*
38
+ name of map file.
39
+ *origin*
40
+ origin co-ordinates of the map (x_origin, y_origin, z_origin).
41
+ *apix*
42
+ grid spacing of map.
43
+ *filename*
44
+ filename of the Map instance
45
+
46
+ NOTE: The *filename* 'build++copy' is reserved for copying of other Map class instances."""
47
+ self.header = header
48
+ self.origin = origin
49
+ self.apix = apix
50
+ self.filename = filename
51
+ self.fullMap = fullMap
52
+
53
+ def copy(self):
54
+ """
55
+
56
+ Return:
57
+ copy of the Map.
58
+
59
+ """
60
+ copy = Map(self.fullMap.copy(), self.origin[:], self.apix, self.filename, self.header[:])
61
+ return copy
62
+
63
+ def box_size(self):
64
+ """
65
+
66
+ Return:
67
+ size of the map array, in ZYX format.
68
+
69
+ """
70
+ return self.fullMap.shape
71
+
72
+ def x_size(self):
73
+ """
74
+
75
+ Return:
76
+ x size of the map array in x direction.
77
+
78
+ """
79
+ return self.fullMap.shape[2]
80
+
81
+ def y_size(self):
82
+ """
83
+
84
+ Return:
85
+ y size of the map array in y direction.
86
+
87
+ """
88
+ return self.fullMap.shape[1]
89
+
90
+ def z_size(self):
91
+ """
92
+
93
+ Return:
94
+ z size of the map array in z direction.
95
+
96
+ """
97
+ return self.fullMap.shape[0]
98
+
99
+ def makeGrid(struct, apix, resolution = 3, filename = "None"):
100
+ """
101
+
102
+ Returns protein grid.
103
+
104
+ Arguments:
105
+
106
+ *struct*
107
+ Tempy structure instance
108
+ *apix*
109
+ angstroms per voxel
110
+
111
+ """
112
+ # Build empty template map based on the size of the protein and the resolution.
113
+ extr = struct.get_extreme_values()
114
+ edge = int(2*resolution/apix)+2
115
+ x_size = int((extr[1]-extr[0])/apix)+edge
116
+ y_size = int((extr[3]-extr[2])/apix)+edge
117
+ z_size = int((extr[5]-extr[4])/apix)+edge
118
+
119
+ # Origin calculated such that the centre of the map is the centre of mass of the protein.
120
+ x_origin = (extr[1]+extr[0])/2-(apix*x_size/2.0)
121
+ y_origin = (extr[3]+extr[2])/2-(apix*y_size/2.0)
122
+ z_origin = (extr[5]+extr[4])/2-(apix*z_size/2.0)
123
+
124
+ newMap = zeros((z_size, y_size, x_size))
125
+ fullMap = Map(newMap, [x_origin, y_origin, z_origin], apix, filename)
126
+ return fullMap
127
+
128
+ def mapGridPosition(densMap, atom):
129
+
130
+ """
131
+
132
+ Returns the index of the nearest pixel to an atom, and atom mass (4 values in list form).
133
+
134
+ Arguments:
135
+
136
+ *densMap*
137
+ Map instance the atom is to be placed on.
138
+ *atom*
139
+ Atom instance.
140
+
141
+ """
142
+ origin = densMap.origin
143
+ apix = densMap.apix
144
+ box_size = densMap.box_size()
145
+ x_pos = int(round((atom.x-origin[0])/apix,0))
146
+ y_pos = int(round((atom.y-origin[1])/apix,0))
147
+ z_pos = int(round((atom.z-origin[2])/apix,0))
148
+
149
+ if((densMap.x_size() > x_pos >= 0) and (densMap.y_size() > y_pos >= 0) and (densMap.z_size() > z_pos >= 0)):
150
+ return (x_pos, y_pos, z_pos, atom.mass)
151
+ else:
152
+ return 0
153
+
154
+ def mark_CAlphas(densMap, prot, aa1, aa2):
155
+ """
156
+
157
+ Returns ordered dictionaries containing {residue_number, chain, residue name : x, y, z}
158
+ for both aa1 and aa2.
159
+
160
+ Arguments:
161
+
162
+ *densMap*
163
+ Protein grid
164
+ *prot*
165
+ Tempy structure instance
166
+ *aa1*
167
+ Residue type 1
168
+ *aa2*
169
+ Residue type 2
170
+
171
+ """
172
+
173
+ aa1_CA = OrderedDict()
174
+ aa2_CA = OrderedDict()
175
+
176
+ for atom in prot.atomList:
177
+
178
+ if atom.res == aa1:
179
+
180
+ if atom.atom_name == 'CA':
181
+ pos = mapGridPosition(densMap, atom)
182
+ aa1_CA[atom.res_no,atom.chain,atom.res]=[pos[0],pos[1],pos[2]]
183
+
184
+ if atom.res == aa2:
185
+ if atom.atom_name == 'CA':
186
+ pos = mapGridPosition(densMap, atom)
187
+ aa2_CA[atom.res_no,atom.chain,atom.res]=[pos[0],pos[1],pos[2]]
188
+
189
+
190
+ return aa1_CA, aa2_CA
191
+
192
+ def process_input_crosslinks(uv_xl):
193
+ """Processes crosslink input .txt file and returns list of residues and crosslinked pairs"""
194
+
195
+ aa1 = []
196
+ aa2 = []
197
+ crosslink_pairs = []
198
+ #c = 0
199
+ count = 0
200
+ with open(uv_xl) as xl_in:
201
+ for line in xl_in:
202
+ count +=1
203
+ col = line.split("|")
204
+ try:
205
+ chain1 = col[1].rstrip()
206
+ chain1 = chain1.lstrip()
207
+ chain2 = col[3].rstrip()
208
+ chain2 = chain2.lstrip()
209
+ except:
210
+ print("ERROR: formatting error on line {} : {}".format(str(count),line))
211
+ exit(1)
212
+ # if no chain is given
213
+ if len(chain1) == 0:
214
+ chain1 = " "
215
+ if len(chain2) == 0:
216
+ chain2 = " "
217
+
218
+ aa1.append([int(col[0]),chain1])
219
+ aa2.append([int(col[2]),chain2])
220
+
221
+ crosslink_pairs.append([(int(col[0]),chain1),(int(col[2]),chain2)])
222
+ '''
223
+ if (int(col[0]),chain1,c) in xl_pair:
224
+ c +=1
225
+ xl_pair[int(col[0]),chain1,c] = [int(col[2]),chain2,c]
226
+ '''
227
+
228
+ return aa1, aa2 , crosslink_pairs
229
+
230
+ def mark_CAlphas_pairs(densMap, prot, uv_xl):
231
+
232
+ """
233
+ Processes input txt file. Checks each atom is in the structure and returns
234
+ the pairs of crosslinks as well as the Calpha positions on the grid.
235
+
236
+ Arguments:
237
+ *densMap*
238
+ grid that encompasses protein
239
+ *prot*
240
+ .pdb file
241
+ *uv_xl*
242
+ .txt input file
243
+
244
+ """
245
+
246
+ # process txt file
247
+ aa1, aa2, crosslink_pairs = process_input_crosslinks(uv_xl)
248
+ # crosslink_pairs is a list of [crosslinked aa1, aa2]
249
+
250
+ aa1_CA = OrderedDict()
251
+ aa2_CA = OrderedDict()
252
+
253
+ atom_check = []
254
+
255
+ for atom in prot.atomList:
256
+ if [atom.res_no,atom.chain] in aa1:
257
+ if atom.atom_name == "CA":
258
+ pos = mapGridPosition(densMap, atom)
259
+ aa1_CA[atom.res_no,atom.chain,atom.res]=[pos[0],pos[1],pos[2]]
260
+ atom_check.append([atom.res_no,atom.chain])
261
+ if [atom.res_no,atom.chain] in aa2:
262
+ if atom.atom_name == "CA":
263
+ pos = mapGridPosition(densMap, atom)
264
+ aa2_CA[atom.res_no,atom.chain,atom.res]=[pos[0],pos[1],pos[2]]
265
+ atom_check.append([atom.res_no,atom.chain])
266
+
267
+ # check that all the residues listed are in the structure
268
+ rem_x = []
269
+
270
+ for x in aa1:
271
+ if x not in atom_check:
272
+ print("ERROR ! Residue {} - {} not in pdb structure - please check input files".format(x[0], x[1]))
273
+ rem_x.append((x[0], x[1]))
274
+ for x in aa2:
275
+ if x not in atom_check:
276
+ print("ERROR ! Residue {} - {} not in pdb structure - please check input files".format(x[0], x[1]))
277
+ rem_x.append((x[0], x[1]))
278
+
279
+ # remove crosslinks from crosslink_pairs if one or both residues are not in structure
280
+ index_to_delete = []
281
+
282
+ for i in range(len(crosslink_pairs)):
283
+ [x1, x2] = crosslink_pairs[i]
284
+ if x1 in rem_x:
285
+ index_to_delete.append(i)
286
+ elif x2 in rem_x:
287
+ index_to_delete.append(i)
288
+
289
+ crosslink_pairs_hold = []
290
+ for i in range(len(crosslink_pairs)):
291
+ if i not in index_to_delete:
292
+ crosslink_pairs_hold.append(crosslink_pairs[i])
293
+
294
+ # append residue name to crosslink_pairs_final
295
+
296
+ aa_d = {}
297
+
298
+ for a in aa1_CA:
299
+ aa_d[a[0],a[1]]= a
300
+ for a in aa2_CA:
301
+ aa_d[a[0],a[1]]= a
302
+
303
+ crosslink_pairs_final = []
304
+ for x1, x2 in crosslink_pairs_hold:
305
+ crosslink_pairs_final.append([aa_d[x1],aa_d[x2]])
306
+
307
+ return crosslink_pairs_final, aa1_CA, aa2_CA
308
+
309
+ def generate_solvent_accessible_surface(densMap,prot,aa1_CA, aa2_CA):
310
+
311
+ """
312
+
313
+ Returns masked array which functions as solvent accessible surface
314
+
315
+ Arguments:
316
+
317
+ *densMap*
318
+ Protein Grid
319
+ *prot*
320
+ Tempy structure instance
321
+ *aa1_CA*
322
+ voxel positions of each C_alpha atom of interest
323
+ *aa2_CA*
324
+ voxel positions of each C_alpha atom of interest
325
+
326
+ """
327
+ # store different radii for each atom and calculate the voxel spheres for each
328
+ sphere = {}
329
+ radius = {}
330
+
331
+ C = 0.8
332
+
333
+ radius['CA'] = 1.73 + C
334
+ radius['S'] = 1.67 + C
335
+ radius['N'] = 1.43 + C
336
+ radius['OH'] = 1.30 + C
337
+
338
+ for r in radius:
339
+
340
+ sphere[r] = []
341
+ rad = int(round(radius[r]/densMap.apix))
342
+
343
+ for x in range(-rad,rad+1):
344
+ for y in range(-rad,rad+1):
345
+ for z in range(-rad,rad+1):
346
+ if (x**2 + y**2 + z**2) <= (rad**2):
347
+ sphere[r].append([x,y,z])
348
+
349
+ backbone = ['N','CA','C','O']
350
+
351
+ # generate solvent accessible surface
352
+
353
+ for atom in prot.atomList:
354
+
355
+ pos = mapGridPosition(densMap, atom)
356
+
357
+ if pos:
358
+ # don't place side chain atoms of residues of interest in the surface
359
+ if ((atom.res_no,atom.chain,atom.res) in aa1_CA and atom.atom_name not in backbone) or (
360
+ (atom.res_no,atom.chain,atom.res) in aa2_CA and atom.atom_name not in backbone):
361
+ pass
362
+ # for each atom, expand the corresponding voxel sphere around it to create solvent accessible surface
363
+ else:
364
+ if atom.atom_name[:1] == 'C':
365
+ for (x,y,z) in sphere['CA']:
366
+ if((densMap.x_size() > (pos[0]+x) >= 0) and (densMap.y_size() > (pos[1]+y) >= 0) and (densMap.z_size() > (pos[2]+z) >= 0)):
367
+ densMap.fullMap[pos[2]+z][pos[1]+y][pos[0]+x] += 1
368
+ elif atom.atom_name[:1] == 'O':
369
+ for (x,y,z) in sphere['OH']:
370
+ if((densMap.x_size() > (pos[0]+x) >= 0) and (densMap.y_size() > (pos[1]+y) >= 0) and (densMap.z_size() > (pos[2]+z) >= 0)):
371
+ densMap.fullMap[pos[2]+z][pos[1]+y][pos[0]+x] += 1
372
+ elif atom.atom_name[:1] == 'N':
373
+ for (x,y,z) in sphere['N']:
374
+ if((densMap.x_size() > (pos[0]+x) >= 0) and (densMap.y_size() > (pos[1]+y) >= 0) and (densMap.z_size() > (pos[2]+z) >= 0)):
375
+ densMap.fullMap[pos[2]+z][pos[1]+y][pos[0]+x] += 1
376
+ elif atom.atom_name[:1] == 'S':
377
+ for (x,y,z) in sphere['S']:
378
+ if((densMap.x_size() > (pos[0]+x) >= 0) and (densMap.y_size() > (pos[1]+y) >= 0) and (densMap.z_size() > (pos[2]+z) >= 0)):
379
+ densMap.fullMap[pos[2]+z][pos[1]+y][pos[0]+x] += 1
380
+
381
+ return densMap
382
+
383
+ def find_empty_space(res,sphere,densMap,CA):
384
+
385
+ """
386
+
387
+ Returns list of empty voxels in voxel sphere shell
388
+
389
+ Arguments:
390
+
391
+ *res*
392
+ residue where search is happening around
393
+ *sphere*
394
+ voxel sphere shell to be expanded around CA voxel
395
+ *densMap*
396
+ Solvent accessible surface (masked array)
397
+ *CA*
398
+ Calpha voxels
399
+
400
+ """
401
+
402
+ starters = []
403
+ (x,y,z) = CA[res]
404
+ for (x_s,y_s,z_s) in sphere:
405
+ if((densMap.x_size() > (x+x_s) >= 0) and (densMap.y_size() > (y+y_s) >= 0) and (densMap.z_size() > (z+z_s) >= 0)):
406
+ if densMap.fullMap[z_s+z][y_s+y][x_s+x] <= 0:
407
+ starters.append([x_s+x,y_s+y,z_s+z])
408
+ return starters
409
+
410
+ def find_surface_voxels(aa1_CA, densMap, xl_list = []):
411
+
412
+ """
413
+
414
+ Returns ordered dictionaries containing all possible staring voxels for each Calpha of
415
+ interest. If Calpha is not solvent accessible then no starting voxels are returned.
416
+
417
+ If xl_list flag True, then list of entries to be removed is also returned. empty list
418
+ otherwise.
419
+
420
+ Arguments:
421
+
422
+ *aa1_CA*
423
+ Calpha voxels for amino acid type 1
424
+ *densMap*
425
+ Solvent accessible surface (masked array)
426
+
427
+ """
428
+
429
+ # generate voxel spheres shells to progressively extend search for starting voxels
430
+ # this is to keep starting voxels as close to CA as possible.
431
+
432
+ sphere1 = []
433
+ sphere2 = []
434
+ sphere3 = []
435
+ sphere4 = []
436
+ sphere5 = []
437
+ sphere6 = []
438
+
439
+ C = 1.68
440
+
441
+ radius = 1.73 + C
442
+
443
+ radius4 = int(round(radius/densMap.apix))+1 # radius rounded up = 4 with apix = 1
444
+ radius3 = radius4 -1
445
+ radius2 = radius3 -1
446
+ radius1 = radius2 -1
447
+ radius5 = radius4 + 1
448
+ radius6 = radius5 + 1
449
+
450
+ for x in range(-radius1,radius1+1):
451
+ for y in range(-radius1,radius1+1):
452
+ for z in range(-radius1,radius1+1):
453
+ if (x**2 + y**2 + z**2) <= (radius1**2):
454
+ sphere1.append([x,y,z])
455
+ sphere1_set = {(v[0],v[1],v[2]) for v in sphere1}
456
+
457
+ for x in range(-radius2,radius2+1):
458
+ for y in range(-radius2,radius2+1):
459
+ for z in range(-radius2,radius2+1):
460
+ if (x**2 + y**2 + z**2) <= (radius2**2):
461
+ if (x,y,z) not in sphere1_set:
462
+ sphere2.append([x,y,z])
463
+ sphere2_set = {(v[0],v[1],v[2]) for v in sphere2}
464
+
465
+ for x in range(-radius3,radius3+1):
466
+ for y in range(-radius3,radius3+1):
467
+ for z in range(-radius3,radius3+1):
468
+ if (x**2 + y**2 + z**2) <= (radius3**2):
469
+ if (x,y,z) not in sphere1_set and (x,y,z) not in sphere2_set:
470
+ sphere3.append([x,y,z])
471
+ sphere3_set = {(v[0],v[1],v[2]) for v in sphere3}
472
+
473
+ for x in range(-radius4,radius4+1):
474
+ for y in range(-radius4,radius4+1):
475
+ for z in range(-radius4,radius4+1):
476
+ if (x**2 + y**2 + z**2) <= (radius4**2):
477
+ if (x,y,z) not in sphere1_set and (x,y,z) not in sphere2_set and (
478
+ x,y,z) not in sphere3_set:
479
+ sphere4.append([x,y,z])
480
+ sphere4_set = {(v[0],v[1],v[2]) for v in sphere4}
481
+
482
+ for x in range(-radius5,radius5+1):
483
+ for y in range(-radius5,radius5+1):
484
+ for z in range(-radius5,radius5+1):
485
+ if (x**2 + y**2 + z**2) <= (radius5**2):
486
+ if (x,y,z) not in sphere1_set and (x,y,z) not in sphere2_set and (
487
+ x,y,z) not in sphere3_set and (x,y,z) not in sphere4_set:
488
+ sphere5.append([x,y,z])
489
+ sphere5_set = {(v[0],v[1],v[2]) for v in sphere5}
490
+
491
+ for x in range(-radius6,radius6+1):
492
+ for y in range(-radius6,radius6+1):
493
+ for z in range(-radius6,radius6+1):
494
+ if (x**2 + y**2 + z**2) <= (radius6**2):
495
+ if (x,y,z) not in sphere1_set and (x,y,z) not in sphere2_set and (
496
+ x,y,z) not in sphere3_set and (x,y,z) not in sphere4_set and (x,y,z) not in sphere5_set:
497
+ sphere6.append([x,y,z])
498
+
499
+ # iterate through sphere shells and append starting voxels to dictionary
500
+
501
+ aa_1_start_voxels = OrderedDict()
502
+ buried = []
503
+ k_count = 0
504
+ k_buried = 0
505
+
506
+ for k in aa1_CA:
507
+ k_count +=1
508
+ aa_1_start_voxels[k] = find_empty_space(k,sphere1,densMap,aa1_CA)
509
+ if aa_1_start_voxels[k] == []:
510
+ aa_1_start_voxels[k] = find_empty_space(k,sphere2,densMap,aa1_CA)
511
+ if aa_1_start_voxels[k] == []:
512
+ aa_1_start_voxels[k] = find_empty_space(k,sphere3,densMap,aa1_CA)
513
+ if aa_1_start_voxels[k] == []:
514
+ aa_1_start_voxels[k] = find_empty_space(k,sphere4,densMap,aa1_CA)
515
+ if aa_1_start_voxels[k] == []:
516
+ aa_1_start_voxels[k] = find_empty_space(k,sphere5,densMap,aa1_CA)
517
+ if aa_1_start_voxels[k] == []:
518
+ aa_1_start_voxels[k] = find_empty_space(k,sphere6,densMap,aa1_CA)
519
+
520
+ rem_x = []
521
+
522
+ if len(buried) > 0 and len(xl_list) > 0:
523
+ print("ERROR - {} buried residue(s) in xl_list:".format(k_buried))
524
+ for t in buried:
525
+ print("{} - {} - {}".format(str(t[0]), str(t[1]), str(t[2])))
526
+ rem_x.append([t[0],t[1]])
527
+ #sys.exit(2)
528
+
529
+ return aa_1_start_voxels, rem_x
530
+
531
+ def remove_duplicates(sasds):
532
+
533
+ """
534
+
535
+ Returns sasds with duplicates removed.
536
+
537
+ Arguments:
538
+
539
+ *sasds*
540
+ dictionary of sasds
541
+
542
+ """
543
+
544
+ keep = {}
545
+ keep_keys = []
546
+ keep_sasds = {}
547
+
548
+ for (start, end, distance) in sasds:
549
+
550
+ # check if there is a duplicate
551
+ if (start, end) not in keep:
552
+ keep[start, end] = distance
553
+
554
+ # if there is a duplicate check which has the shortest distance and keep
555
+ elif (start, end) in keep:
556
+ if (distance < keep[start, end]):
557
+ keep[start, end] = distance
558
+
559
+ # reform the dictionary key
560
+ for (j,k) in keep:
561
+ keep_keys.append((j,k,keep[j,k]))
562
+
563
+ # filter out original dictionary
564
+ for k in keep_keys:
565
+ keep_sasds[k] = sasds[k]
566
+
567
+ return keep_sasds