EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
@@ -0,0 +1,532 @@
1
+ # ===============================================================================
2
+ # This file is part of Jwalk (Python 3).
3
+ #
4
+ # Jwalk - A tool to calculate the solvent accessible surface distance (SASD)
5
+ # between crosslinked residues.
6
+ #
7
+ # Copyright 2016 Josh Bullock and Birkbeck College University of London.
8
+ #
9
+ # Jwalk is available under Public Licence.
10
+ # This software is made available under GPL V3
11
+ #
12
+ # Please cite your use of Jwalk in published work:
13
+ #
14
+ # J.Bullock, J. Schwab, K. Thalassinos, M. Topf (2016)
15
+ # The importance of non-accessible crosslinks and solvent accessible surface distance
16
+ # in modelling proteins with restraints from crosslinking mass spectrometry.
17
+ # Molecular and Cellular Proteomics (15) pp.2491-2500
18
+ #
19
+ # ===============================================================================
20
+
21
+ import os, pathlib
22
+ from string import ascii_uppercase
23
+ from numpy import array, append
24
+ from Bio.PDB import PDBParser as PDBParserBiopy
25
+
26
+ class Vector:
27
+ """A class representing Cartesian 3-dimensonal vectors."""
28
+
29
+ def __init__(self, x,y,z):
30
+ """x, y, z = Cartesian co-ordinates of vector."""
31
+ self.x = x
32
+ self.y = y
33
+ self.z = z
34
+
35
+ def copy(self):
36
+ """
37
+ Return:
38
+ A copy of Vector instance
39
+ """
40
+ return Vector(self.x, self.y, self.z)
41
+
42
+ def to_atom(self):
43
+ """
44
+ Create an Atom instance based on Vector instance.
45
+
46
+ Return:
47
+ Atom instance
48
+ """
49
+ atom = BioPyAtom([])
50
+ atom.x = self.x
51
+ atom.y = self.y
52
+ atom.z = self.z
53
+ return atom
54
+
55
+ class BioPy_Structure:
56
+
57
+
58
+ """
59
+
60
+ A class representing a bjectStructure o, as read from a PDB file using Bio.PDB in Biopython.
61
+
62
+
63
+ """
64
+
65
+ def __init__(self, atomList, filename='Unknown', header='', footer =''):
66
+ """
67
+
68
+ Initialise using a string of the relevant pdb file name or a numpy array of Atom objects.
69
+
70
+ Arguments:
71
+ *pdbFileOrList*
72
+ String of pdb file name or array of Atom objects
73
+
74
+ """
75
+ self.header = header
76
+ self.footer = footer
77
+ self.filename = filename
78
+ self.atomList = array(atomList)
79
+ #Centre of mass calculations
80
+ self.CoM = self.calculate_centre_of_mass()
81
+ self.initCoM = self.CoM.copy()
82
+
83
+
84
+ def __getitem__(self, index):
85
+ return self.atomList[index]
86
+
87
+ def __len__(self):
88
+ return len(self.atomList)
89
+
90
+ def __repr__(self):
91
+ if not self.filename == 'Unknown':
92
+ repr_str = 'Filename: ' + self.filename + '\n'
93
+ else:
94
+ repr_str = ''
95
+ repr_str += 'No Of Atoms: ' + str(len(self)) + '\n'
96
+ repr_str += 'First Atom: ' + str(self.atomList[0]) + '\n'
97
+ repr_str += 'Last Atom: ' + str(self.atomList[-1]) + '\n'
98
+ return repr_str
99
+
100
+ def copy(self):
101
+ """
102
+
103
+ Return:
104
+ Copy of Structure instance.
105
+
106
+ """
107
+ newAtomList = []
108
+ for atom in self.atomList:
109
+ newAtomList.append(atom.copy())
110
+ return BioPy_Structure(newAtomList)
111
+
112
+ def calculate_centre_of_mass(self):
113
+ """
114
+
115
+ Return:
116
+ Center of mass of structure as a Vector instance.
117
+
118
+ """
119
+ x_momentTotal = 0.0
120
+ y_momentTotal = 0.0
121
+ z_momentTotal = 0.0
122
+ massTotal = 0.0
123
+ for atom in self.atomList:
124
+ x = atom.get_x()
125
+ y = atom.get_y()
126
+ z = atom.get_z()
127
+ m = atom.get_mass()
128
+ x_momentTotal += x*m
129
+ y_momentTotal += y*m
130
+ z_momentTotal += z*m
131
+ massTotal += m
132
+ x_CoM = x_momentTotal/massTotal
133
+ y_CoM = y_momentTotal/massTotal
134
+ z_CoM = z_momentTotal/massTotal
135
+ return Vector(x_CoM, y_CoM, z_CoM)
136
+
137
+ def get_extreme_values(self):
138
+ """
139
+
140
+ Return:
141
+ A 6-tuple containing the minimum and maximum of x, y and z co-ordinates of the structure.
142
+ Given in order (min_x, max_x, min_y, max_y, min_z, max_z).
143
+
144
+ """
145
+ min_x = self.atomList[0].get_x()
146
+ max_x = self.atomList[0].get_x()
147
+ min_y = self.atomList[0].get_y()
148
+ max_y = self.atomList[0].get_y()
149
+ min_z = self.atomList[0].get_z()
150
+ max_z = self.atomList[0].get_z()
151
+ for atom in self.atomList[1:]:
152
+ if atom.get_x() < min_x:
153
+ min_x = atom.get_x()
154
+ if atom.get_x() > max_x:
155
+ max_x = atom.get_x()
156
+ if atom.get_y() < min_y:
157
+ min_y = atom.get_y()
158
+ if atom.get_y() > max_y:
159
+ max_y = atom.get_y()
160
+ if atom.get_z() < min_z:
161
+ min_z = atom.get_z()
162
+ if atom.get_z() > max_z:
163
+ max_z = atom.get_z()
164
+ return (min_x, max_x, min_y, max_y, min_z, max_z)
165
+
166
+ class BioPyAtom:
167
+ """
168
+
169
+ A class representing an atom, as read from a PDB file using Biopython.
170
+
171
+ """
172
+
173
+ def __init__(self, atom):
174
+ """Atom from BioPython"""
175
+ if atom == []:
176
+ return
177
+
178
+ #http://deposit.rcsb.org/adit/docs/pdb_atom_format.html
179
+ #print "bioatom",atom#'bioatom <Atom O>'
180
+ if atom.get_parent().get_id()[0][0] == "W" or atom.get_parent().id[0][0]=="H":
181
+ self.record_name = "HETATM"
182
+ else:
183
+ self.record_name = "ATOM" # was pdbString[:6].strip() as "ATOM"
184
+ # res.id[0] == "W" or res.id[0][0]=="H": #skip water and hetero residues
185
+ self.serial = atom.get_serial_number()
186
+ self.atom_name = atom.get_name()
187
+ self.alt_loc = atom.get_altloc() #Return alternative location specifier.
188
+ self.fullid=atom.get_full_id()
189
+ #('3ukr_test', 0, 'G', (' ', 113, ' '), ('CA', ' '))
190
+ self.res = atom.get_parent().get_resname()
191
+ self.chain = atom.get_full_id()[2]
192
+ self.res_no = int(self.fullid[3][1])
193
+ self.icode = ""
194
+ if atom.is_disordered()==1:
195
+ self.icode = "D"
196
+ # 1 if the residue has disordered atoms
197
+ # self.icode = pdbString[26].strip()#code for insertion residues
198
+ # # Starting co-ordinates of atom.
199
+ self.init_x = atom.get_coord()[0]
200
+ self.init_y = atom.get_coord()[1]
201
+ self.init_z = atom.get_coord()[2]
202
+ # # Current co-ordinates of atom.
203
+ self.x = float(atom.get_coord()[0])
204
+ self.y = float(atom.get_coord()[1])
205
+ self.z = float(atom.get_coord()[2])
206
+ #
207
+ self.occ = atom.get_occupancy()
208
+ self.temp_fac = atom.get_bfactor()
209
+ try:
210
+ self.elem = atom.get_element()
211
+ except:
212
+ self.elem=""
213
+ self.charge=""
214
+ #Mass of atom as given by atomicMasses global constant. Defaults to 1.
215
+ self.mass = 1.0
216
+
217
+ # # True if atom is the terminal of a chain. Automatically false until modified.
218
+ self.isTerm = False
219
+
220
+ def __repr__(self):
221
+ return '('+ self.get_res() +' '+ str(self.res_no) + ' '+self.chain + ': ' + str(self.x) + ', ' + str(self.y) + ', ' + str(self.z) + ')'
222
+
223
+
224
+ def copy(self):
225
+ """
226
+
227
+ Return:
228
+ Copy of the Atom instance.
229
+ """
230
+ atom = BioPyAtom([])
231
+ atom.record_name = self.record_name
232
+ atom.serial = self.serial
233
+ atom.atom_name = self.atom_name
234
+ atom.alt_loc = self.alt_loc
235
+ atom.res = self.res
236
+ atom.chain = self.chain
237
+ atom.res_no = self.res_no
238
+ atom.icode = self.icode
239
+ atom.init_x = self.init_x
240
+ atom.init_y = self.init_y
241
+ atom.init_z = self.init_z
242
+ atom.x = self.x
243
+ atom.y = self.y
244
+ atom.z = self.z
245
+ atom.occ =self.occ
246
+ atom.temp_fac = self.temp_fac
247
+ atom.elem = self.elem
248
+ atom.charge = self.charge
249
+ atom.mass = self.mass
250
+ atom.isTerm = self.isTerm
251
+ return atom
252
+
253
+ def get_mass(self):
254
+ """
255
+
256
+ Return:
257
+ Atom mass.
258
+ """
259
+ return self.mass
260
+
261
+ def map_grid_position(self, densMap):
262
+ """
263
+
264
+ Arguments:
265
+ *densMap*
266
+ EM map object consisting the 3D grid of density values.
267
+
268
+ Return:
269
+ The co-ordinates and density value of the grid point in a density map closest to this atom.
270
+ Return 0 if atom is outside of map.
271
+ """
272
+ x_origin = densMap.x_origin
273
+ y_origin = densMap.y_origin
274
+ z_origin = densMap.z_origin
275
+ apix = densMap.apix
276
+ x_size = densMap.x_size
277
+ y_size = densMap.y_size
278
+ z_size = densMap.z_size
279
+ x_pos = int((self.getX()-x_origin)/apix)
280
+ y_pos = int((self.getY()-y_origin)/apix)
281
+ z_pos = int((self.getZ()-z_origin)/apix)
282
+ if((x_size > x_pos >= 0) and (y_size > y_pos >= 0) and (z_size > z_pos >= 0)):
283
+ return (x_pos, y_pos, z_pos, self.mass)
284
+ else:
285
+ return 0
286
+
287
+ def get_x(self):
288
+ """
289
+
290
+ Return:
291
+ x co-ordinate of atom.
292
+ """
293
+ return float(self.x)
294
+
295
+ def get_y(self):
296
+ """
297
+
298
+ Return:
299
+ y co-ordinate of atom.
300
+ """
301
+ return float(self.y)
302
+
303
+ def get_z(self):
304
+ """
305
+
306
+ Return:
307
+ z co-ordinate of atom.
308
+ """
309
+ return float(self.z)
310
+
311
+
312
+
313
+
314
+ def get_name(self):
315
+ """
316
+ atom name (ie. 'CA' or 'O')
317
+
318
+ Return:
319
+ atom name.
320
+ """
321
+ return self.atom_name
322
+
323
+ def get_res(self):
324
+ """
325
+
326
+ Return:
327
+ three letter residue code corresponding to the atom (i.e 'ARG').
328
+ """
329
+ return self.res
330
+
331
+ def get_res_no(self):
332
+ """
333
+
334
+ Return:
335
+ residue number corresponding to the atom.
336
+ """
337
+ return self.res_no
338
+
339
+ def get_id_no(self):
340
+ """
341
+
342
+ Return:
343
+ string of atom serial number.
344
+ """
345
+ return self.serial
346
+
347
+ def write_to_PDB(self):
348
+ """
349
+
350
+ Writes a PDB ATOM record based in the atom attributes to a file.
351
+ """
352
+ line = ''
353
+ line += self.record_name.ljust(6)
354
+ line += str(self.serial).rjust(5)+' '
355
+ line += self.atom_name.center(4)
356
+ line += self.alt_loc.ljust(1)
357
+ line += self.res.ljust(3)+' '
358
+ line += self.chain.ljust(1)
359
+ line += str(self.res_no).rjust(4)
360
+ line += str(self.icode).ljust(1)+' '
361
+ x = '%.3f' % self.x
362
+ y = '%.3f' % self.y
363
+ z = '%.3f' % self.z
364
+ line += x.rjust(8)
365
+ line += y.rjust(8)
366
+ line += z.rjust(8)
367
+ occ = '%.2f'% float(self.occ)
368
+ temp_fac = '%.2f'% float(self.temp_fac)
369
+ line += occ.rjust(6)
370
+ line += temp_fac.rjust(6)+' '
371
+ line += self.elem.strip().rjust(2)
372
+ line += self.charge.strip().ljust(2)
373
+ return line + '\n'
374
+
375
+ def read_PDB_file(filename,hetatm=False,water=False):
376
+ struct_file = open(filename, "r")
377
+ # hydrogens are omitted.
378
+ p=PDBParserBiopy(QUIET=True)#permissive default True
379
+ structure=p.get_structure("id", struct_file)
380
+
381
+ atomList = []
382
+ hetatomList=[]
383
+ wateratomList=[]
384
+ footer = ''
385
+ header = ''
386
+
387
+ residues = structure[0].get_residues()
388
+ for res in residues:
389
+ hetfield=res.get_id()[0]
390
+ if hetfield[0]=="H":
391
+ for atom in res:
392
+ BioPyAtom(atom)
393
+ hetatomList.append(BioPyAtom(atom))
394
+ elif hetfield[0]=="W":
395
+ for atom in res:
396
+ BioPyAtom(atom)
397
+ wateratomList.append(BioPyAtom(atom))
398
+ else:
399
+ for atom in res:
400
+ if atom.id[0] != "H":
401
+ BioPyAtom(atom)
402
+ atomList.append(BioPyAtom(atom))
403
+ if hetatm:
404
+ atomList = append(atomList, hetatomList)
405
+ if water:
406
+ atomList = append(atomList, wateratomList)
407
+
408
+ return BioPy_Structure(atomList, filename, header, footer)
409
+
410
+ def write_sasd_to_txt(sasds,pdb,result_dir):
411
+
412
+ """
413
+
414
+ Outputs sasds to .txt file
415
+
416
+ Arguments:
417
+
418
+ *sasds*
419
+ dictionary of sasds
420
+ *pdb*
421
+ .pdb file sasds were calculated on
422
+ """
423
+
424
+ jwalk_pure_path = pathlib.PurePath(result_dir, 'Jwalk_results')
425
+ jwalk_path = pathlib.Path(jwalk_pure_path)
426
+ if not jwalk_path.exists():
427
+ os.mkdir(jwalk_path)
428
+
429
+ pdb = pathlib.Path(pdb)
430
+ write_pure_path = pathlib.PurePath(jwalk_pure_path,'{}_crosslink_list.txt'.format(pdb.stem))
431
+ write_path = pathlib.Path(write_pure_path)
432
+ with open(write_path,'w') as outf:
433
+
434
+ outf.write(' '.join('{0:<13}'.format(col) for col in ['Index','Model','Atom1','Atom2','SASD','Euclidean Distance']))
435
+ outf.write('\n')
436
+ index = 1
437
+
438
+ for xl in sasds:
439
+ (aa1,chain1,res1)=xl[0]
440
+ (aa2,chain2,res2)=xl[1]
441
+ atom1 = ('%s-%d-%s-CA' % (res1,aa1,chain1) )
442
+ atom2 = ('%s-%d-%s-CA' % (res2,aa2,chain2) )
443
+ sasd=xl[2]
444
+ ed=xl[3]
445
+ outf.write(' '.join('{0:<13}'.format(col) for col in [index,pdb.stem,atom1,atom2,sasd,ed]))
446
+ outf.write('\n')
447
+ index +=1
448
+
449
+ def write_sasd_to_pdb(dens_map,sasds,pdb,result_dir):
450
+
451
+ """
452
+
453
+ Outputs sasds to .pdb file
454
+
455
+ Arguments:
456
+
457
+ *dens_map*
458
+ Solvent accessible surface on masked array
459
+ *sasds*
460
+ dictionary of sasds
461
+ *pdb*
462
+ .pdb file sasds were calculated on
463
+ """
464
+ jwalk_pure_path = pathlib.PurePath(result_dir, 'Jwalk_results')
465
+ jwalk_path = pathlib.Path(jwalk_pure_path)
466
+ if not jwalk_path.exists():
467
+ os.mkdir(jwalk_path)
468
+
469
+ apix = dens_map.apix
470
+ origin = dens_map.origin
471
+ path_coord = {}
472
+
473
+ for xl in sasds:
474
+ a = []
475
+ for (x,y,z) in sasds[xl]:
476
+ a.append([(x*apix)+origin[0], (y*apix)+origin[1], (z*apix)+origin[2]])
477
+
478
+ path_coord[xl] = a
479
+
480
+ pdb = pathlib.Path(pdb)
481
+ write_pure_path = pathlib.PurePath(jwalk_path,'{}_crosslinks.pdb'.format(pdb.stem))
482
+ write_path = pathlib.Path(write_pure_path)
483
+ with open(write_path,'w') as pdb:
484
+ # little trick to uniquely id all crosslinks with unique flase ATOM (X[A-Z]) / CHAIN ([A-Z]) name pairs
485
+ atom_cnt = 0
486
+ chain_cnt = 0
487
+ model_cnt = 1
488
+ for xl in path_coord:
489
+ (aa1,chain1,res1)=xl[0]
490
+ (aa2,chain2,res2)=xl[1]
491
+
492
+ atom_count_per_model = 1
493
+ pdb.write('# MODEL {:d} {:s}{:d}{:s}-{:s}{:d}{:s}\n'.format(model_cnt,res1,aa1,chain1,res2,aa2,chain2))
494
+ model_cnt += 1
495
+
496
+ for (x,y,z) in path_coord[xl]:
497
+
498
+ if atom_cnt > 25:
499
+ chain_cnt += 1
500
+ atom_cnt = 0
501
+
502
+ atom_tmp = ascii_uppercase[atom_cnt]+'X'
503
+ chain_tmp = ascii_uppercase[chain_cnt]
504
+
505
+ p=Vector(x,y,z)
506
+ a=p.to_atom()
507
+ a.record_name = 'ATOM'
508
+ a.serial = atom_count_per_model
509
+ a.atom_name = atom_tmp
510
+ a.alt_loc = ''
511
+ a.res = chain_tmp+atom_tmp
512
+ a.chain = chain_tmp
513
+ a.res_no = atom_count_per_model
514
+ a.icode = ''
515
+ a.occ = 1
516
+ a.temp_fac = 0
517
+ a.elem = 'X'
518
+ a.charge = ''
519
+ #print a.__dict__
520
+ #atom = BioPyAtom(a)
521
+ pdb.write(a.write_to_PDB())
522
+
523
+ atom_count_per_model += 1
524
+ atom_cnt += 1
525
+
526
+ # accounting for the extra count+=1 in the previous for loop
527
+ atom_count_per_model -= 1
528
+ # added for better visualization in pymol
529
+ for i in range(1,atom_count_per_model):
530
+ pdb.write('CONECT {:4d} {:4d}\n'.format(i,i+1))
531
+ pdb.write('END\n')
532
+