EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
@@ -0,0 +1,2361 @@
1
+ #!/usr/bin/env python3
2
+ try:
3
+ from openmm.app import *
4
+ from openmm import *
5
+ from openmm.unit import *
6
+ except:
7
+ from simtk.openmm.app import *
8
+ from simtk.openmm import *
9
+ from simtk.unit import *
10
+ from sys import stdout, exit, stderr
11
+ import getopt, os, time, random, math, traceback, io, sys, string
12
+ import parmed as pmd
13
+ import numpy as np
14
+ from importlib.resources import files
15
+ import xml.etree.cElementTree as ET
16
+ import xml.dom.minidom as MD
17
+ import numpy
18
+ import pathlib
19
+ import subprocess
20
+ import logging
21
+ from EntDetect._logging import setup_logger
22
+
23
+ sys.setrecursionlimit(int(1e6))
24
+
25
+ class CoarseGrain:
26
+ """
27
+ Processes biological data including PDB files, sequence data, and interaction potentials.
28
+ """
29
+ #############################################################################################################
30
+ def __init__(self, pdbfile:str, ID:str='ID', nscal:int = 1.5, outdir:str = './', fnn:int = 1,
31
+ potential_name:str = 'bt', casm:int = 0, domain_file:str = 'None', ca_prefix:str = 'A', sc_prefix:str = 'B', log_level:int = logging.INFO, logdir:str = None):
32
+
33
+ self.pdbfile = pdbfile
34
+ self.ID = ID
35
+ self.nscal = nscal
36
+ self.outdir = outdir
37
+ self.logger = setup_logger('CoarseGrain', outdir=logdir if logdir is not None else outdir, ID=ID, log_level=log_level)
38
+ self.fnn = fnn
39
+ self.potential_name = potential_name
40
+ self.casm = casm
41
+ self.domain_file = domain_file
42
+ self.ca_prefix = ca_prefix
43
+ self.sc_prefix = sc_prefix
44
+ self.heav_cut = 4.5
45
+
46
+ if not os.path.exists(self.outdir):
47
+ os.makedirs(self.outdir)
48
+ self.logger.info(f'Made directory: {self.outdir}')
49
+
50
+ ######################## Data #########################
51
+ ## Loop-up table for uniquely indentifying residues #
52
+ self.aa = ["GLY","ALA","VAL","LEU","ILE","MET","PHE","PRO","SER","THR","CYS","ASN","GLN","TYR","TRP","ASP","GLU","HIS","LYS","ARG"]
53
+ if len(self.aa) != 20:
54
+ self.logger.error('ERROR')
55
+ sys.exit()
56
+ res2n = {}
57
+ n2res = {}
58
+ for i, a in enumerate(self.aa):
59
+ res2n[a] = i
60
+ n2res[i] = a
61
+ self.res2n = res2n
62
+ self.n2res = n2res
63
+
64
+ self.Mass = {"N": 14.0067,
65
+ "H": 1.00794,
66
+ "C": 12.011,
67
+ "O": 15.9994,
68
+ "S": 32.06,}
69
+
70
+ # number of heavy atoms in sidechains
71
+ self.refNscat = {"ALA": 1,
72
+ "CYS": 2,
73
+ "ASP": 4,
74
+ "GLU": 5,
75
+ "PHE": 7,
76
+ "GLY": 0,
77
+ "HIS": 6,
78
+ "HSD": 6,
79
+ "HSE": 6,
80
+ "HSP": 6,
81
+ "ILE": 4,
82
+ "LYS": 5,
83
+ "LEU": 4,
84
+ "MET": 4,
85
+ "ASN": 4,
86
+ "PRO": 3,
87
+ "GLN": 5,
88
+ "ARG": 7,
89
+ "SER": 2,
90
+ "THR": 3,
91
+ "VAL": 3,
92
+ "TRP": 10,
93
+ "TYR": 8}
94
+
95
+ # charges on side chains at pH 7
96
+ self.refcharge = {"ALA": 0.0,
97
+ "CYS": 0.0,
98
+ "ASP": -1.0,
99
+ "GLU": -1.0,
100
+ "PHE": 0.0,
101
+ "GLY": 0.0,
102
+ "HIS": 0.0,
103
+ "HSD": 0.0,
104
+ "HSE": 0.0,
105
+ "HSP": 0.0,
106
+ "ILE": 0.0,
107
+ "LYS": 1.0,
108
+ "LEU": 0.0,
109
+ "MET": 0.0,
110
+ "ASN": 0.0,
111
+ "PRO": 0.0,
112
+ "GLN": 0.0,
113
+ "ARG": 1.0,
114
+ "SER": 0.0,
115
+ "THR": 0.0,
116
+ "VAL": 0.0,
117
+ "TRP": 0.0,
118
+ "TYR": 0.0}
119
+
120
+ # Generic C_alpha side-chain center of mass distance
121
+ self.lbs_nongo = {"ASP": 2.46916481058687,
122
+ "PRO": 1.87381801537346,
123
+ "LYS": 3.49738414814426,
124
+ "ILE": 2.25260184847053,
125
+ "TRP": 3.58251993741888,
126
+ "CYS": 2.06666004558289,
127
+ "HSD": 3.15209719417679,
128
+ "PHE": 3.38385541816659,
129
+ "HSP": 3.15209719417679,
130
+ "GLN": 3.08654121335,
131
+ "SER": 1.89840600762153,
132
+ "ASN": 2.46916481058687,
133
+ "VAL": 1.93953811063784,
134
+ "LEU": 2.56580983973678,
135
+ "TYR": 3.38981664391425,
136
+ "GLU": 3.07971386504681,
137
+ "ARG": 3.39687572938579,
138
+ "THR": 1.931721703272,
139
+ "ALA": 1.51146031725997,
140
+ "MET": 2.95389402456081,
141
+ "HIS": 3.15209719417679,
142
+ "HSE": 3.15209719417679}
143
+
144
+ self.improper_nongo = {"ASP": 14.655341300544,
145
+ "PRO": 26.763068425539,
146
+ "LYS": 12.765248692601,
147
+ "ILE": 13.5446902008313,
148
+ "TRP": 11.4483488626106,
149
+ "CYS": 20.0484470024042,
150
+ "HSD": 14.9962640689562,
151
+ "PHE": 10.9217771918902,
152
+ "HSP": 14.9962640689562,
153
+ "GLN": 17.3050853491068,
154
+ "SER": 20.1390130256255,
155
+ "ASN": 14.655341300544,
156
+ "VAL": 13.3216022614598,
157
+ "LEU": 11.8137180266206,
158
+ "TYR": 12.2715081962165,
159
+ "GLU": 15.4130821146834,
160
+ "ARG": 15.5451613009777,
161
+ "THR": 16.2956083930276,
162
+ "ALA": 16.8418866013662,
163
+ "MET": 12.7046284165739,
164
+ "HIS": 14.9962640689562,
165
+ "HSE": 14.9962640689562}
166
+
167
+ self.ang_sb_nongo = {"ASP": 120.380153696218,
168
+ "PRO": 125.127927161651,
169
+ "LYS": 119.523270610009,
170
+ "ILE": 118.791108398805,
171
+ "TRP": 130.018548241749,
172
+ "CYS": 110.512719347428,
173
+ "HSD": 116.815900172681,
174
+ "PHE": 122.937540996701,
175
+ "HSP": 116.815900172681,
176
+ "GLN": 116.182123224059,
177
+ "SER": 107.971234136647,
178
+ "ASN": 120.380153696218,
179
+ "VAL": 112.877421898116,
180
+ "LEU": 123.32179171436,
181
+ "TYR": 116.783314494739,
182
+ "GLU": 116.659068554985,
183
+ "ARG": 119.709740783191,
184
+ "THR": 111.719883260793,
185
+ "ALA": 108.623605160075,
186
+ "MET": 116.636559053295,
187
+ "HIS": 116.815900172681,
188
+ "HSE": 116.815900172681}
189
+
190
+ self.ang_bs_nongo = {"ASP": 116.629356207687,
191
+ "PRO": 79.4932105625367,
192
+ "LYS": 119.779735484239,
193
+ "ILE": 116.923861483529,
194
+ "TRP": 100.858690902849,
195
+ "CYS": 114.816253227757,
196
+ "HSD": 115.848569293979,
197
+ "PHE": 112.804608190743,
198
+ "HSP": 115.848569293979,
199
+ "GLN": 119.106753006548,
200
+ "SER": 116.361829754186,
201
+ "ASN": 116.629356207687,
202
+ "VAL": 121.299281732077,
203
+ "LEU": 117.587011217416,
204
+ "TYR": 116.72484692836,
205
+ "GLU": 119.507585037498,
206
+ "ARG": 117.532816176021,
207
+ "THR": 117.044133956143,
208
+ "ALA": 120.747734648009,
209
+ "MET": 123.234171432545,
210
+ "HIS": 115.848569293979,
211
+ "HSE": 115.848569293979}
212
+
213
+ # segment id relationships
214
+ self.alphabet = list(map(chr, range(ord('A'), ord('Z')+1)))
215
+ segid2num = {}
216
+ for nseg, letter in enumerate(self.alphabet):
217
+ segid2num[letter] = nseg
218
+ self.segid2num = segid2num
219
+
220
+ # mass of amino acids
221
+ # UNSURE! about pro, arg, his and cys weights
222
+ self.aaSCmass = {"ALA": 71.000000,
223
+ "CYS": 114.000000,
224
+ "ASP": 114.000000,
225
+ "GLU": 128.000000,
226
+ "PHE": 147.000000,
227
+ "GLY": 57.000000,
228
+ "HIS": 114.000000,
229
+ "HSD": 114.000000,
230
+ "HSE": 114.000000,
231
+ "HSP": 114.000000,
232
+ "ILE": 113.000000,
233
+ "LYS": 128.000000,
234
+ "LEU": 113.000000,
235
+ "MET": 131.000000,
236
+ "ASN": 114.000000,
237
+ "PRO": 114.000000,
238
+ "GLN": 128.000000,
239
+ "ARG": 114.000000,
240
+ "SER": 87.000000,
241
+ "THR": 101.000000,
242
+ "VAL": 99.000000,
243
+ "TRP": 186.000000,
244
+ "TYR": 163.000000}
245
+
246
+ # vdw radius of sidechains
247
+ self.rvdw = {"ALA": 2.51958406732374,
248
+ "CYS": 2.73823091624513,
249
+ "ASP": 2.79030096923572,
250
+ "GLU": 2.96332591119925,
251
+ "PHE": 3.18235414984794,
252
+ "GLY": 2.25450393833984,
253
+ "HIS": 3.04273820988499,
254
+ "HSD": 3.04273820988499,
255
+ "HSE": 3.04273820988499,
256
+ "HSP": 3.04273820988499,
257
+ "ILE": 3.09345983013354,
258
+ "LYS": 3.18235414984794,
259
+ "LEU": 3.09345983013354,
260
+ "MET": 3.09345983013354,
261
+ "ASN": 2.84049696898525,
262
+ "PRO": 2.78004241717965,
263
+ "GLN": 3.00796101305807,
264
+ "ARG": 3.28138980397453,
265
+ "SER": 2.59265585208464,
266
+ "THR": 2.81059478021734,
267
+ "VAL": 2.92662460060742,
268
+ "TRP": 3.38869998431408,
269
+ "TYR": 3.22881842919248}
270
+
271
+
272
+ ## Check dependency installation ##
273
+ # find stride resource
274
+ self.stride_path = files('EntDetect.resources').joinpath('stride')
275
+ self.logger.debug(f'stride_path: {self.stride_path}')
276
+
277
+ #if os.popen('stride 2>&1').readlines()[0].strip().endswith('command not found'):
278
+ if os.popen(f'{self.stride_path} 2>&1').readlines()[0].strip().endswith('command not found'):
279
+ self.logger.error('Error: Essential software "stride" is not installed.\nPlease install stride before coarse-graining.')
280
+ sys.exit()
281
+ else:
282
+ self.logger.info(f'STRIDE found')
283
+
284
+ Header = f"""
285
+
286
+ # Build CG Protein Model: Python version #
287
+ # Yang Jiang & Edward P. O'Brien Jr. #
288
+ # Dept. of Chemistry #
289
+ # Penn State University #
290
+
291
+ Configuration:
292
+ pdbfile = {self.pdbfile}
293
+ casm = {self.casm}
294
+ nscal = {self.nscal}
295
+ fnn = {self.fnn}
296
+ potential_name = {self.potential_name}
297
+ domain_file = {self.domain_file}
298
+ sc_prefix = {self.sc_prefix}
299
+ ca_prefix = {self.ca_prefix}
300
+ """
301
+ self.logger.debug(Header)
302
+
303
+ if self.domain_file != "None":
304
+ self.nscal_0 = '1'
305
+ self.nscal = 1
306
+ self.logger.info('domain_file is defined, nscal will be ignored.\n')
307
+
308
+ if self.casm != 0 and self.casm != 1:
309
+ self.logger.error('ERROR: casm can only be either 0 (ca model) or 1 (ca-sidechain model).')
310
+
311
+ if self.potential_name.upper().startswith('GENERIC'):
312
+ words = self.potential_name.split('-')
313
+ if len(words) == 1:
314
+ self.logger.error("ERROR: Generic potential keyword must be invoked as 'generic-bt'")
315
+ sys.exit()
316
+ else:
317
+ if words[-1].upper() != 'BT' and words[-1].upper() != 'MJ' and words[-1].upper() != 'KGS':
318
+ self.logger.error("ERROR: You can only invoke Generic potential keyword as 'generic-bt' or 'generic-mj' or 'generic-kgs'")
319
+ sys.exit()
320
+ else:
321
+ self.potential_name = self.potential_name.upper()
322
+ self.logger.error("ERROR: The generic potential is not supported in this version.\nCoarse-graining terminated.")
323
+ sys.exit()
324
+ else:
325
+ self.potential_name = self.potential_name.upper()
326
+ ### END: get info from control file ###
327
+
328
+ ## BEGIND: Conditional Defaults ##
329
+ if self.casm == 1:
330
+ self.ene_bsc = 0.37 # energy of a backbone-sidechain native contact (0.03 in old version)
331
+ self.single_hbond_ene = 0.75 # energy of a hydrogen bond for everthing but helices (0.50 in old version)
332
+ self.single_hbond_ene_helix = 0.75 # energy of a hydrogen bond in a helix (0.50 in old version)
333
+ self.bondlength_go = 0 # non-Go bond length
334
+ self.angle_dw = 0 # Go angle potential
335
+ self.dihedral_go = 1 # Go dihedral potential
336
+ self.improperdihed_go = 1 # Go improper dihedral potential
337
+
338
+ else:
339
+ self.ene_bsc = 0.37;
340
+ self.single_hbond_ene = 0.75; # energy of a hydrogen bond for everthing but helices
341
+ self.single_hbond_ene_helix = 0.75; # energy of a hydrogen bond in a helix
342
+ self.bondlength_go = 0 # non-Go bond length
343
+ self.angle_dw = 1 # double-well angle potential
344
+ self.dihedral_go = 0 # non-Go dihedral potential
345
+ self.improperdihed_go = 0 # non-Go improper dihedral potential
346
+
347
+ # read domain nscal values if domain is defined
348
+ dom_nscal = []
349
+ ndomain = 0
350
+ dom = []
351
+ if self.domain_file != "None":
352
+ if not os.path.exists(self.domain_file):
353
+ self.logger.error("ERROR: File %s does not exist"%self.domain_file)
354
+ sys.exit()
355
+ f = open(self.domain_file)
356
+ lines = f.readlines()
357
+ f.close()
358
+ for line in lines:
359
+ line = line.strip()
360
+ if line.startswith('scale factor'):
361
+ words = line.split('=')
362
+ dom_nscal.append(float(words[-1]))
363
+ if line.startswith('domain'):
364
+ ndomain += 1
365
+ words = line.split('=')[-1].split('-')
366
+ words = [int(w) for w in words]
367
+ dom.append(words)
368
+ if words[0] > words[1]:
369
+ self.logger.error("ERROR: When defining the domains in the interface file, index %d is Greater than %d!"%(words[0], words[1]))
370
+ sys.exit()
371
+ self.logger.info('%d domain(s) defined in the Domain file %s'%(ndomain, self.domain_file))
372
+ if ndomain == 0:
373
+ self.logger.error("ERROR: No domain definitions were read. Check the domain definition file!")
374
+ sys.exit()
375
+ self.logger.info("Domain information:")
376
+ for i, d in enumerate(dom):
377
+ self.logger.info("Domain %d: %d to %d"%(i+1, d[0], d[1]))
378
+ self.logger.info("")
379
+ if len(dom_nscal) != (1+ndomain)*ndomain/2:
380
+ self.logger.error("ERROR: Incorrect number of interfaces assigned. (%d, should be %d)"%(len(dom_nscal)-ndomain, (ndomain-1)*ndomain/2))
381
+ sys.exit()
382
+ self.dom_nscal = dom_nscal
383
+ self.ndomain = ndomain
384
+ self.dom = dom
385
+ # END read domain nscal values if domain is defined
386
+
387
+ # initialize nonbonding potential
388
+ root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
389
+ if self.potential_name.startswith('MJ'):
390
+ miya = files('EntDetect.resources.shared_files').joinpath('mj_contact_potential.dat')
391
+ elif self.potential_name.startswith('KGS'):
392
+ miya = files('EntDetect.resources.shared_files').joinpath('kgs_contact_potential.dat')
393
+ elif self.potential_name.startswith('BT'):
394
+ miya = files('EntDetect.resources.shared_files').joinpath('bt_contact_potential.dat')
395
+ else:
396
+ self.logger.error("ERROR: Unrecognized force-field %s"%self.potential_name)
397
+ sys.exit()
398
+ self.logger.debug(miya)
399
+
400
+ eps = np.zeros((20,20))
401
+
402
+ f = open(miya)
403
+ lines = f.readlines()
404
+ f.close()
405
+ nrows = 0
406
+ avg_mj = 0
407
+ nmj = 0
408
+ for line in lines:
409
+ line = line.strip()
410
+ if line.startswith('#'):
411
+ continue
412
+ if line.startswith('AA'):
413
+ words = line.split()
414
+ vec = []
415
+ for w in words[1:]:
416
+ vec.append(self.res2n[w.upper()])
417
+ if len(vec) != 20:
418
+ self.logger.error("ERROR: missing residues in file %s"%miya)
419
+ sys.exit()
420
+ else:
421
+ words = line.split()
422
+ for tc, w in enumerate(words):
423
+ w = float(w)
424
+ if self.potential_name.startswith('MJ'):
425
+ eps[vec[nrows]][vec[tc]] = nscal * abs(w-1.2)
426
+ eps[vec[tc]][vec[nrows]] = nscal * abs(w-1.2)
427
+ avg_mj += nscal * abs(w-1.2)
428
+ elif self.potential_name.startswith('BT'):
429
+ eps[vec[nrows]][vec[tc]] = nscal * abs(w-0.6)
430
+ eps[vec[tc]][vec[nrows]] = nscal * abs(w-0.6)
431
+ avg_mj += nscal * abs(w-0.6)
432
+ elif self.potential_name.startswith('KGS'):
433
+ eps[vec[nrows]][vec[tc]] = nscal * abs(w-1.8)
434
+ eps[vec[tc]][vec[nrows]] = nscal * abs(w-1.8)
435
+ avg_mj += nscal * abs(w-1.8)
436
+ nmj += 1
437
+ nrows += 1
438
+ if nrows > 20:
439
+ self.logger.error("ERROR 2: missing residues in file %s: %d"%(miya, nrows))
440
+ sys.exit()
441
+ if len(words) != nrows:
442
+ self.logger.error("ERROR 3: missing residues in file %s, %d != %d"%(miya, len(words), nrows))
443
+ sys.exit()
444
+ self.eps = eps
445
+ avg_mj = avg_mj/nmj
446
+ self.avg_mj = avg_mj
447
+ self.logger.info("The average %s interaction energy is %.4f\n"%(self.potential_name, self.avg_mj))
448
+ # END initialize nonbonding potential
449
+
450
+ # Read in the generic backbone dihedral potential of CL Brooks if NON-GO dihedrals
451
+ # requested by user.
452
+ if self.dihedral_go == 0:
453
+ dihedb_nongo = [[[] for j in range(20)] for i in range(20)]
454
+ kpot_f = files('EntDetect.resources.shared_files').joinpath('karanicolas_dihe_parm.dat')
455
+ f = open(kpot_f)
456
+ lines = f.readlines()
457
+ f.close()
458
+ nphi = 0
459
+ r1_old = None
460
+ r2_old = None
461
+ for line in lines:
462
+ line = line.strip()
463
+ dat = line.split()
464
+ r1 = dat[0].upper()
465
+ r2 = dat[1].upper()
466
+ if r1 != r1_old or r2 != r2_old:
467
+ nphi = 0
468
+ dihedb_nongo[self.res2n[r1]][self.res2n[r2]].append([0.756*float(dat[2]), int(dat[3]), float(dat[4])])
469
+ nphi += 1
470
+ r1_old = r1
471
+ r2_old = r2
472
+ if nphi > 4:
473
+ self.logger.error("ERROR: nphi = %d upon reading in generic dihedral file"%nphi)
474
+ self.logger.debug(line)
475
+ sys.exit()
476
+ self.dihedb_nongo = dihedb_nongo
477
+ # END Read in the generic backbone dihedral potential
478
+ #############################################################################################################
479
+
480
+ ###################################################################################################
481
+ # generate charmm .psf
482
+ def create_psf(self, struct, ca_list, name):
483
+ # creat backbone bonds
484
+ for i in range(len(ca_list)-1):
485
+ segid_list = [ca_list[i+j].residue.segid for j in range(2)]
486
+ segid_list = list(set(segid_list))
487
+ if len(segid_list) == 1:
488
+ struct.bonds.append(pmd.topologyobjects.Bond(ca_list[i], ca_list[i+1]))
489
+ # creat backbone-sidechain bonds if exist
490
+ for ca_atom in ca_list:
491
+ if len(ca_atom.residue.atoms) > 1:
492
+ b_bead = ca_atom.residue.atoms[1]
493
+ struct.bonds.append(pmd.topologyobjects.Bond(ca_atom, b_bead))
494
+ # create Angles
495
+ for atm in struct.atoms:
496
+ bond_list = atm.bond_partners
497
+ if len(bond_list) > 1:
498
+ for i in range(len(bond_list)-1):
499
+ for j in range(i+1, len(bond_list)):
500
+ struct.angles.append(pmd.topologyobjects.Angle(bond_list[i], atm, bond_list[j]))
501
+ # create Dihedrals
502
+ for i in range(len(ca_list)-3):
503
+ segid_list = [ca_list[i+j].residue.segid for j in range(4)]
504
+ segid_list = list(set(segid_list))
505
+ if len(segid_list) == 1:
506
+ struct.dihedrals.append(pmd.topologyobjects.Dihedral(ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]))
507
+ # create Impropers
508
+ for i in range(1, len(ca_list)-1):
509
+ segid_list = [ca_list[i+j-1].residue.segid for j in range(3)]
510
+ segid_list = list(set(segid_list))
511
+ if len(segid_list) == 1 and len(ca_list[i].residue.atoms) > 1:
512
+ b_bead = ca_list[i].residue.atoms[1]
513
+ struct.impropers.append(pmd.topologyobjects.Improper(ca_list[i], ca_list[i-1], ca_list[i+1], b_bead))
514
+ psffile = os.path.join(self.outdir, name+'.psf')
515
+ self.logger.info(f'Writing {psffile}')
516
+ struct.save(psffile, overwrite=True, vmd=False)
517
+ return psffile
518
+ # END generate charmm .psf
519
+ ###################################################################################################
520
+
521
+ ###################################################################################################
522
+ # generate charmm .top
523
+ def Create_rtf(self, struct, out_name):
524
+ #global self.pdbfile, self.casm
525
+ topfile = os.path.join(self.outdir, out_name+'.top')
526
+ self.logger.info(f'Writing {topfile}')
527
+ fo = open(topfile, 'w')
528
+ if self.casm == 1:
529
+ fo.write('* This CHARMM .top file describes a Ca-Cb Go model of %s\n*\n20 1\n'%self.pdbfile)
530
+ else:
531
+ fo.write('* This CHARMM .top file describes a Ca Go model of %s\n*\n20 1\n'%self.pdbfile)
532
+ # MASS section
533
+ fo.write('! backbone masses\n')
534
+ for idx, atm in enumerate(struct.atoms):
535
+ fo.write('MASS %-4s %-8s %.6f\n'%(str(idx+1), atm.type, atm.mass))
536
+ fo.write('\n')
537
+ fo.write('DECL +%s\n'%struct[0].name)
538
+ fo.write('DECL -%s\n'%struct[0].name)
539
+ fo.write('DECL #%s\n'%struct[0].name)
540
+ # residue section
541
+ for res in struct.residues:
542
+ res_charge = 0
543
+ for atm in res.atoms:
544
+ res_charge += atm.charge
545
+ fo.write('RESI %-6s %.1f\n'%(res.name, res_charge))
546
+ fo.write('GROUP\n')
547
+ for atm in res.atoms:
548
+ fo.write('ATOM %s %-6s %.1f\n'%(atm.name, atm.type, atm.charge))
549
+ if self.casm == 1 and len(res.atoms) != 1:
550
+ fo.write("Bond %s %s %s +%s\n"%(res.atoms[0].name, res.atoms[1].name,
551
+ res.atoms[0].name, res.atoms[0].name))
552
+ fo.write("Angle -%s %s %s %s %s +%s -%s %s +%s\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[1].name,
553
+ res.atoms[1].name, res.atoms[0].name, res.atoms[0].name,
554
+ res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
555
+ fo.write("DIHE -%s %s +%s #%s\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
556
+ fo.write("IMPH %s -%s +%s %s\n\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[1].name))
557
+ else:
558
+ fo.write('Bond %s +%s\n'%(res.atoms[0].name, res.atoms[0].name))
559
+ fo.write('Angle -%s %s +%s\n'%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
560
+ fo.write('DIHE -%s %s +%s #%s\n\n'%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
561
+ # end section
562
+ fo.write('END\n')
563
+ fo.close()
564
+ return topfile
565
+ # END generate charmm .top
566
+ ###################################################################################################
567
+
568
+ ###################################################################################################
569
+ def calc_distance(self, atom_1, atom_2):
570
+ dist = ((atom_1.xx - atom_2.xx)**2 + (atom_1.xy - atom_2.xy)**2 + (atom_1.xz - atom_2.xz)**2)**0.5
571
+ return dist
572
+ ###################################################################################################
573
+
574
+ ###################################################################################################
575
+ def cg_energy_minimization(self, cor, prefix, prm_file):
576
+ temp = 310
577
+ np = '1'
578
+ timestep = 0.015*picoseconds
579
+ fbsolu = 0.05/picosecond
580
+ temp = temp*kelvin
581
+
582
+ psf_pmd = pmd.charmm.CharmmPsfFile(prefix+'.psf')
583
+ psf = CharmmPsfFile(prefix+'.psf')
584
+ top = psf.topology
585
+ os.system('parse_cg_cacb_prm.py -p '+prm_file+' -t '+prefix+'.top')
586
+ name = prm_file.split('.prm')[0]
587
+ forcefield = ForceField(name+'.xml')
588
+
589
+ template_map = {}
590
+ for chain in top.chains():
591
+ for res in chain.residues():
592
+ template_map[res] = res.name
593
+
594
+
595
+ system = forcefield.createSystem(top, nonbondedCutoff=2.0*nanometer,
596
+ constraints=None, removeCMMotion=False, ignoreExternalBonds=True,
597
+ residueTemplates=template_map)
598
+ custom_nb_force = system.getForce(4)
599
+ custom_nb_force.setUseSwitchingFunction(True)
600
+ custom_nb_force.setSwitchingDistance(1.8*nanometer)
601
+ custom_nb_force.setNonbondedMethod(custom_nb_force.CutoffNonPeriodic)
602
+
603
+ # add position restraints
604
+ force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
605
+ force.addPerParticleParameter("k")
606
+ force.addPerParticleParameter("x0")
607
+ force.addPerParticleParameter("y0")
608
+ force.addPerParticleParameter("z0")
609
+ system.addForce(force)
610
+ # END add position restraints
611
+
612
+ # add position restraints for CA
613
+ force = system.getForces()[-1]
614
+ k = 100*kilocalorie/mole/angstrom**2
615
+ for atm in top.atoms():
616
+ if atm.name == 'A':
617
+ force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
618
+
619
+ integrator = LangevinIntegrator(temp, fbsolu, timestep)
620
+ integrator.setConstraintTolerance(0.00001)
621
+ # prepare simulation
622
+ platform = Platform.getPlatformByName('CPU')
623
+ properties = {'Threads': np}
624
+ simulation = Simulation(top, system, integrator, platform, properties)
625
+ simulation.context.setPositions(cor)
626
+ simulation.context.setVelocitiesToTemperature(temp)
627
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
628
+ getEnergyDecomposition(stdout, simulation.context, system)
629
+ self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
630
+ simulation.minimizeEnergy(tolerance=0.1*kilocalories_per_mole)
631
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
632
+ getEnergyDecomposition(stdout, simulation.context, system)
633
+ self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
634
+ current_cor = simulation.context.getState(getPositions=True).getPositions()
635
+ return current_cor
636
+ ###################################################################################################
637
+
638
+ ###################################################################################################
639
+ # remove bond constraints of 0 mass atoms
640
+ def rm_cons_0_mass(self, system):
641
+ tag = 0
642
+ while tag == 0 and system.getNumConstraints() != 0:
643
+ for i in range(system.getNumConstraints()):
644
+ con_i = system.getConstraintParameters(i)[0]
645
+ con_j = system.getConstraintParameters(i)[1]
646
+ mass_i = system.getParticleMass(con_i).value_in_unit(dalton)
647
+ mass_j = system.getParticleMass(con_j).value_in_unit(dalton)
648
+ if mass_i == 0 and mass_j == 0:
649
+ system.removeConstraint(i)
650
+ #print('Constraint %d is removed, range is %d'%(i, system.getNumConstraints()))
651
+ tag = 0
652
+ break
653
+ elif mass_i == 0 or mass_j == 0:
654
+ system.removeConstraint(i)
655
+ #print('Constraint %d is removed, range is %d'%(i, system.getNumConstraints()))
656
+ system.getForce(0).addBond(con_i, con_j, 3.81*angstroms, 50*kilocalories/mole/angstroms**2)
657
+ tag = 0
658
+ break
659
+ else:
660
+ tag = 1
661
+ # END remove bond constraints of 0 mass atoms
662
+ ###################################################################################################
663
+
664
+ ###################################################################################################
665
+ # energy decomposition
666
+ def forcegroupify(self, system):
667
+ forcegroups = {}
668
+ for i in range(system.getNumForces()):
669
+ force = system.getForce(i)
670
+ force.setForceGroup(i)
671
+ f = str(type(force))
672
+ s = f.split('\'')
673
+ f = s[1]
674
+ s = f.split('.')
675
+ f = s[-1]
676
+ forcegroups[i] = f
677
+ return forcegroups
678
+ ###################################################################################################
679
+
680
+ ###################################################################################################
681
+ def getEnergyDecomposition(self, handle, context, system):
682
+ forcegroups = forcegroupify(system)
683
+ energies = {}
684
+ for i, f in forcegroups.items():
685
+ try:
686
+ states = context.getState(getEnergy=True, groups={i})
687
+ except ValueError as e:
688
+ self.logger.debug(str(e))
689
+ energies[i] = Quantity(np.nan, kilocalories/mole)
690
+ else:
691
+ energies[i] = states.getPotentialEnergy()
692
+ results = energies
693
+ handle.write(' Potential Energy:\n')
694
+ for idd in energies.keys():
695
+ handle.write(' %s: %.4f kcal/mol\n'%(forcegroups[idd], energies[idd].value_in_unit(kilocalories/mole)))
696
+ return results
697
+ ###################################################################################################
698
+
699
+ ###################################################################################################
700
+ def parse_cg_prm(self, prmfile:str, topfile:str):
701
+ """
702
+ Parse CHARMM parameter file and generate OpenMM XML file.
703
+ """
704
+
705
+ top_file_list = topfile.strip().split()
706
+
707
+ command = 'pmd.charmm.CharmmParameterSet('
708
+ for tf in top_file_list:
709
+ command += '"'+tf + '", '
710
+ command += 'prmfile)'
711
+ self.logger.debug(command)
712
+
713
+ param=eval(command)
714
+
715
+ name = prmfile.split('.prm')
716
+ file_name = name[0]
717
+
718
+ openmm_param=pmd.openmm.parameters.OpenMMParameterSet.from_parameterset(param)
719
+ openmm_param.write(file_name+'_tmp.xml', skip_duplicates=False)
720
+ self.logger.info(f'Writing {file_name}_tmp.xml')
721
+
722
+ dom = MD.parse(file_name+'_tmp.xml')
723
+ root = dom.documentElement
724
+ atom_type = root.getElementsByTagName('AtomTypes')
725
+ residue = root.getElementsByTagName('Residues')
726
+ os.remove(file_name+'_tmp.xml')
727
+
728
+ root = ET.Element("ForceField")
729
+
730
+ pf = open(prmfile, 'r')
731
+ section = None
732
+ node = None
733
+ nbxmod = None
734
+ ep = None
735
+ kc = 138.935485
736
+ ld = 1 # 10 Angstrom
737
+ atom_type_list = [];
738
+ num_atom = 0
739
+ dihedral_array = []
740
+ acoef_array = None
741
+ bcoef_array = None
742
+ ccoef_array = None
743
+ nb_table = []
744
+ nbfix_table = []
745
+ try:
746
+ for line in pf:
747
+ line = line.strip()
748
+ if not line:
749
+ # This is a blank line
750
+ continue
751
+ if line.startswith('!'):
752
+ # This is a comment line
753
+ continue
754
+ if line.startswith('ATOM'):
755
+ section = 'ATOM'
756
+ #node = ET.SubElement(root, "AtomTypes")
757
+ continue
758
+ if line.startswith('BOND'):
759
+ section = 'BOND'
760
+ node = ET.SubElement(root, 'HarmonicBondForce')
761
+ continue
762
+ if line.startswith('ANGLE'):
763
+ section = 'ANGLE'
764
+ node = ET.SubElement(root, 'CustomAngleForce',
765
+ energy='-1/gamma*log(e); e=exp(-gamma*(k_alpha*(theta-theta_alpha)^2+epsilon_alpha))+exp(-gamma*k_betta*(theta-theta_betta)^2)')
766
+ ET.SubElement(node, 'PerAngleParameter', name='k_alpha')
767
+ ET.SubElement(node, 'PerAngleParameter', name='theta_alpha')
768
+ ET.SubElement(node, 'PerAngleParameter', name='k_betta')
769
+ ET.SubElement(node, 'PerAngleParameter', name='theta_betta')
770
+ ET.SubElement(node, 'PerAngleParameter', name='gamma')
771
+ ET.SubElement(node, 'PerAngleParameter', name='epsilon_alpha')
772
+ continue
773
+ if line.startswith('DIHEDRAL'):
774
+ section = 'DIHEDRAL'
775
+ node = ET.SubElement(root, 'PeriodicTorsionForce')
776
+ continue
777
+ if line.startswith('IMPHI'):
778
+ section = 'IMPROPER'
779
+ if len(dihedral_array) != 0:
780
+ proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
781
+ type3=dihedral_array[2], type4=dihedral_array[3])
782
+ n0 = 1
783
+ for index in range(4, len(dihedral_array), 3):
784
+ proper_node.set('k'+str(n0), dihedral_array[index])
785
+ proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
786
+ proper_node.set('phase'+str(n0), dihedral_array[index+2])
787
+ n0 += 1
788
+ node = ET.SubElement(root, 'CustomTorsionForce',
789
+ energy='k*min(dtheta, 2*pi-dtheta)^2; dtheta = abs(theta-theta0); pi = 3.1415926535')
790
+ ET.SubElement(node, 'PerTorsionParameter', name='k')
791
+ ET.SubElement(node, 'PerTorsionParameter', name='theta0')
792
+ continue
793
+ if line.startswith('NONBONDED'):
794
+ section = 'NONBONDED'
795
+ words = line.split()
796
+ nbxmod = int(words[2])
797
+ continue
798
+ if line.startswith('CUTNB'):
799
+ words = line.split()
800
+ ep = float(words[7])
801
+ node = ET.SubElement(root, 'CustomNonbondedForce',
802
+ energy='ke*charge1*charge2/ep/r*exp(-r/ld)+kv*(a/r^12+b/r^10+c/r^6); '+
803
+ 'ke=ke1*ke2; ep=ep1*ep2; ld=ld1*ld2; kv=kv1*kv2; '+
804
+ 'a=acoef(index1, index2); b=bcoef(index1, index2); c=ccoef(index1, index2)',
805
+ bondCutoff=str(nbxmod-1))
806
+ ET.SubElement(node, 'PerParticleParameter', name='ke')
807
+ ET.SubElement(node, 'PerParticleParameter', name='kv')
808
+ ET.SubElement(node, 'PerParticleParameter', name='ep')
809
+ ET.SubElement(node, 'PerParticleParameter', name='ld')
810
+ ET.SubElement(node, 'PerParticleParameter', name='charge')
811
+ ET.SubElement(node, 'PerParticleParameter', name='index')
812
+ acoef_array = numpy.zeros((num_atom, num_atom))
813
+ bcoef_array = numpy.zeros((num_atom, num_atom))
814
+ ccoef_array = numpy.zeros((num_atom, num_atom))
815
+ nb_table = [[] for i in atom_type_list]
816
+ continue
817
+ if line.startswith('NBFIX'):
818
+ section = 'NBFIX'
819
+ continue
820
+ # It seems like files? sections? can be terminated with 'END'
821
+ if line.startswith('END'): # should this be case-insensitive?
822
+ section = None
823
+ continue
824
+ # If we have no section, skip
825
+ if section is None: continue
826
+ # Now handle each section specifically
827
+ if section == 'ATOM':
828
+ words = line.split()
829
+ idx = int(words[1])
830
+ name = words[2]
831
+ mass = float(words[3])
832
+ #atom_node = ET.SubElement(node, 'Type', name=name, element='C', mass=str(mass))
833
+ #atom_node.set('class', name)
834
+ num_atom += 1
835
+ atom_type_list.append(name)
836
+ if section == 'BOND':
837
+ words = line.split()
838
+ ET.SubElement(node, 'Bond', type1=words[0], type2=words[1], length=str(float(words[3])/10), k=str(float(words[2])*4.184*100*2))
839
+ if section == 'ANGLE':
840
+ words = line.split()
841
+ ET.SubElement(node, 'Angle', type1=words[0], type2=words[1], type3= words[2],
842
+ k_alpha=str(float(words[3])*4.184), theta_alpha=str(float(words[4])/180*math.pi),
843
+ k_betta=str(float(words[5])*4.184), theta_betta=str(float(words[6])/180*math.pi),
844
+ gamma=str(float(words[7])/4.184), epsilon_alpha=str(float(words[8])*4.184))
845
+ if section == 'DIHEDRAL':
846
+ words = line.split()
847
+ type1 = words[0]
848
+ type2 = words[1]
849
+ type3 = words[2]
850
+ type4 = words[3]
851
+ k = str(float(words[4])*4.184)
852
+ n = words[5]
853
+ phase = str(float(words[6])/180*math.pi)
854
+ if len(dihedral_array) == 0:
855
+ dihedral_array.append(type1)
856
+ dihedral_array.append(type2)
857
+ dihedral_array.append(type3)
858
+ dihedral_array.append(type4)
859
+ dihedral_array.append(k)
860
+ dihedral_array.append(n)
861
+ dihedral_array.append(phase)
862
+ elif (type1 == dihedral_array[0] and type2 == dihedral_array[1] and
863
+ type3 == dihedral_array[2] and type4 == dihedral_array[3]):
864
+ dihedral_array.append(k)
865
+ dihedral_array.append(n)
866
+ dihedral_array.append(phase)
867
+ else:
868
+ proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
869
+ type3=dihedral_array[2], type4=dihedral_array[3])
870
+ n0 = 1
871
+ for index in range(4, len(dihedral_array), 3):
872
+ proper_node.set('k'+str(n0), dihedral_array[index])
873
+ proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
874
+ proper_node.set('phase'+str(n0), dihedral_array[index+2])
875
+ n0 += 1
876
+ dihedral_array = []
877
+ dihedral_array.append(type1)
878
+ dihedral_array.append(type2)
879
+ dihedral_array.append(type3)
880
+ dihedral_array.append(type4)
881
+ dihedral_array.append(k)
882
+ dihedral_array.append(n)
883
+ dihedral_array.append(phase)
884
+ if section == 'IMPROPER':
885
+ # No improper torsion energy term for Ca model
886
+ continue
887
+ if section == 'NONBONDED':
888
+ words = line.split()
889
+ name = words[0]
890
+ epsilon = -float(words[2])*4.184
891
+ R_min_half = float(words[3])/10
892
+ index = atom_type_list.index(name)
893
+ nb_table[index] = [epsilon, R_min_half]
894
+ if section == 'NBFIX':
895
+ words = line.split()
896
+ type1 = words[0]
897
+ type2 = words[1]
898
+ index1 = atom_type_list.index(type1)
899
+ index2 = atom_type_list.index(type2)
900
+ epsilon = -float(words[2])*4.184
901
+ R_min_half = float(words[3])/10
902
+ nbfix_table.append([index1, index2, epsilon, R_min_half])
903
+ finally:
904
+ pf.close()
905
+
906
+ #Build acoef, bcoef, ccoef tables
907
+ for index1 in range(num_atom):
908
+ epsilon1 = nb_table[index1][0]
909
+ R_min1 = nb_table[index1][1]
910
+ for index2 in range(num_atom):
911
+ epsilon2 = nb_table[index2][0]
912
+ R_min2 = nb_table[index2][1]
913
+ epsilon = numpy.sqrt(epsilon1 * epsilon2)
914
+ R_min = R_min1 + R_min2
915
+ a = 13 * epsilon * pow(R_min, 12)
916
+ b = -18 * epsilon * pow(R_min, 10)
917
+ c = 4 * epsilon * pow(R_min, 6)
918
+ acoef_array[index1, index2] = a
919
+ bcoef_array[index1, index2] = b
920
+ ccoef_array[index1, index2] = c
921
+ for nbfix_list in nbfix_table:
922
+ index1 = nbfix_list[0]
923
+ index2 = nbfix_list[1]
924
+ epsilon = nbfix_list[2]
925
+ R_min = nbfix_list[3]
926
+ a = 13 * epsilon * pow(R_min, 12)
927
+ b = -18 * epsilon * pow(R_min, 10)
928
+ c = 4 * epsilon * pow(R_min, 6)
929
+ acoef_array[index1, index2] = a
930
+ bcoef_array[index1, index2] = b
931
+ ccoef_array[index1, index2] = c
932
+ acoef_array[index2, index1] = a
933
+ bcoef_array[index2, index1] = b
934
+ ccoef_array[index2, index1] = c
935
+
936
+ #build tabulated function for acoef, bcoef, ccoef
937
+ acoef_node = ET.SubElement(node, "Function", name='acoef', type='Discrete2D',
938
+ xsize=str(num_atom), ysize=str(num_atom))
939
+ text = ''
940
+ for index1 in range(num_atom):
941
+ for index2 in range(num_atom):
942
+ text += str(acoef_array[index1, index2]) + " "
943
+ acoef_node.text = text
944
+
945
+ bcoef_node = ET.SubElement(node, "Function", name='bcoef', type='Discrete2D',
946
+ xsize=str(num_atom), ysize=str(num_atom))
947
+ text = ''
948
+ for index1 in range(num_atom):
949
+ for index2 in range(num_atom):
950
+ text += str(bcoef_array[index1, index2]) + " "
951
+ bcoef_node.text = text
952
+
953
+ ccoef_node = ET.SubElement(node, "Function", name='ccoef', type='Discrete2D',
954
+ xsize=str(num_atom), ysize=str(num_atom))
955
+ text = ''
956
+ for index1 in range(num_atom):
957
+ for index2 in range(num_atom):
958
+ text += str(ccoef_array[index1, index2]) + " "
959
+ ccoef_node.text = text
960
+
961
+ #add custom nonbond parameters
962
+ ET.SubElement(node, 'UseAttributeFromResidue', name='charge')
963
+ for index in range(num_atom):
964
+ name = atom_type_list[index]
965
+ ET.SubElement(node, 'Atom', type=name, index=str(index), ke=str(kc**0.5), ep=str(ep**0.5), ld=str(ld**0.5), kv='1')
966
+
967
+ dom = MD.parseString(ET.tostring(root))
968
+ root = dom.documentElement
969
+ root = root.toprettyxml(indent=' ', newl='\n')
970
+ dom = MD.parseString(root)
971
+ root = dom.documentElement
972
+ bond = root.getElementsByTagName('HarmonicBondForce')
973
+ root.insertBefore(atom_type[0], bond[0])
974
+ if len(residue) > 0:
975
+ root.insertBefore(residue[0], bond[0])
976
+
977
+ xf = open(file_name+'.xml', 'w')
978
+ #dom.writexml(xf, indent='', addindent=' ', newl='\n')
979
+ dom.writexml(xf, indent='')
980
+ ###################################################################################################
981
+
982
+ ###################################################################################################
983
+ def parse_cg_cacb_prm(self, prmfile:str, topfile:str):
984
+
985
+
986
+ top_file_list = topfile.strip().split()
987
+
988
+ command = 'pmd.charmm.CharmmParameterSet('
989
+ for tf in top_file_list:
990
+ command += '"'+tf + '", '
991
+ command += 'prmfile)'
992
+
993
+ param=eval(command)
994
+
995
+ name = prmfile.split('.prm')
996
+ file_name = name[0]
997
+
998
+ openmm_param=pmd.openmm.parameters.OpenMMParameterSet.from_parameterset(param)
999
+ openmm_param.write(file_name+'_tmp.xml', skip_duplicates=False)
1000
+ dom = MD.parse(file_name+'_tmp.xml')
1001
+ root = dom.documentElement
1002
+ atom_type = root.getElementsByTagName('AtomTypes')
1003
+ residue = root.getElementsByTagName('Residues')
1004
+ os.remove(file_name+'_tmp.xml')
1005
+
1006
+ root = ET.Element("ForceField")
1007
+
1008
+ pf = open(prmfile, 'r')
1009
+ section = None
1010
+ node = None
1011
+ nbxmod = None
1012
+ ep = None
1013
+ kc = 138.935485
1014
+ ld = 1 # 10 Angstrom
1015
+ atom_type_list = [];
1016
+ num_atom = 0
1017
+ dihedral_array = []
1018
+ acoef_array = None
1019
+ bcoef_array = None
1020
+ ccoef_array = None
1021
+ nb_table = []
1022
+ nbfix_table = []
1023
+ try:
1024
+ for line in pf:
1025
+ line = line.strip()
1026
+ if not line:
1027
+ # This is a blank line
1028
+ continue
1029
+ if line.startswith('!'):
1030
+ # This is a comment line
1031
+ continue
1032
+ if line.startswith('ATOM'):
1033
+ section = 'ATOM'
1034
+ #node = ET.SubElement(root, "AtomTypes")
1035
+ continue
1036
+ if line.startswith('BOND'):
1037
+ section = 'BOND'
1038
+ node = ET.SubElement(root, 'HarmonicBondForce')
1039
+ continue
1040
+ if line.startswith('ANGLE'):
1041
+ section = 'ANGLE'
1042
+ node = ET.SubElement(root, 'HarmonicAngleForce')
1043
+ continue
1044
+ if line.startswith('DIHEDRAL'):
1045
+ section = 'DIHEDRAL'
1046
+ node = ET.SubElement(root, 'PeriodicTorsionForce')
1047
+ continue
1048
+ if line.startswith('IMPHI'):
1049
+ section = 'IMPROPER'
1050
+ if len(dihedral_array) != 0:
1051
+ proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
1052
+ type3=dihedral_array[2], type4=dihedral_array[3])
1053
+ n0 = 1
1054
+ for index in range(4, len(dihedral_array), 3):
1055
+ proper_node.set('k'+str(n0), dihedral_array[index])
1056
+ proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
1057
+ proper_node.set('phase'+str(n0), dihedral_array[index+2])
1058
+ n0 += 1
1059
+ node = ET.SubElement(root, 'CustomTorsionForce',
1060
+ energy='k*min(dtheta, 2*pi-dtheta)^2; dtheta = abs(theta-theta0); pi = 3.1415926535')
1061
+ ET.SubElement(node, 'PerTorsionParameter', name='k')
1062
+ ET.SubElement(node, 'PerTorsionParameter', name='theta0')
1063
+ continue
1064
+ if line.startswith('NONBONDED'):
1065
+ section = 'NONBONDED'
1066
+ words = line.split()
1067
+ nbxmod = int(words[2])
1068
+ continue
1069
+ if line.startswith('CUTNB'):
1070
+ words = line.split()
1071
+ ep = float(words[7])
1072
+ node = ET.SubElement(root, 'CustomNonbondedForce',
1073
+ energy='ke*charge1*charge2/ep/r*exp(-r/ld)+kv*(a/r^12+b/r^6); '+
1074
+ 'ke=ke1*ke2; ep=ep1*ep2; ld=ld1*ld2; kv=kv1*kv2; '+
1075
+ 'a=acoef(index1, index2); b=bcoef(index1, index2)',
1076
+ bondCutoff=str(nbxmod-1))
1077
+ ET.SubElement(node, 'PerParticleParameter', name='ke')
1078
+ ET.SubElement(node, 'PerParticleParameter', name='kv')
1079
+ ET.SubElement(node, 'PerParticleParameter', name='ep')
1080
+ ET.SubElement(node, 'PerParticleParameter', name='ld')
1081
+ ET.SubElement(node, 'PerParticleParameter', name='charge')
1082
+ ET.SubElement(node, 'PerParticleParameter', name='index')
1083
+ acoef_array = numpy.zeros((num_atom, num_atom))
1084
+ bcoef_array = numpy.zeros((num_atom, num_atom))
1085
+ nb_table = [[] for i in atom_type_list]
1086
+ continue
1087
+ if line.startswith('NBFIX'):
1088
+ section = 'NBFIX'
1089
+ continue
1090
+ # It seems like files? sections? can be terminated with 'END'
1091
+ if line.startswith('END'): # should this be case-insensitive?
1092
+ section = None
1093
+ continue
1094
+ # If we have no section, skip
1095
+ if section is None: continue
1096
+ # Now handle each section specifically
1097
+ if section == 'ATOM':
1098
+ words = line.split()
1099
+ idx = int(words[1])
1100
+ name = words[2]
1101
+ mass = float(words[3])
1102
+ #atom_node = ET.SubElement(node, 'Type', name=name, element='C', mass=str(mass))
1103
+ #atom_node.set('class', name)
1104
+ num_atom += 1
1105
+ atom_type_list.append(name)
1106
+ if section == 'BOND':
1107
+ words = line.split()
1108
+ ET.SubElement(node, 'Bond', type1=words[0], type2=words[1], length=str(float(words[3])/10), k=str(float(words[2])*4.184*100*2))
1109
+ if section == 'ANGLE':
1110
+ words = line.split()
1111
+ ET.SubElement(node, 'Angle', type1=words[0], type2=words[1], type3= words[2],
1112
+ k=str(float(words[3])*4.184*2), angle=str(float(words[4])/180*math.pi))
1113
+ if section == 'DIHEDRAL':
1114
+ words = line.split()
1115
+ type1 = words[0]
1116
+ type2 = words[1]
1117
+ type3 = words[2]
1118
+ type4 = words[3]
1119
+ k = str(float(words[4])*4.184)
1120
+ n = words[5]
1121
+ phase = str(float(words[6])/180*math.pi)
1122
+ if len(dihedral_array) == 0:
1123
+ dihedral_array.append(type1)
1124
+ dihedral_array.append(type2)
1125
+ dihedral_array.append(type3)
1126
+ dihedral_array.append(type4)
1127
+ dihedral_array.append(k)
1128
+ dihedral_array.append(n)
1129
+ dihedral_array.append(phase)
1130
+ elif (type1 == dihedral_array[0] and type2 == dihedral_array[1] and
1131
+ type3 == dihedral_array[2] and type4 == dihedral_array[3]):
1132
+ dihedral_array.append(k)
1133
+ dihedral_array.append(n)
1134
+ dihedral_array.append(phase)
1135
+ else:
1136
+ proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
1137
+ type3=dihedral_array[2], type4=dihedral_array[3])
1138
+ n0 = 1
1139
+ for index in range(4, len(dihedral_array), 3):
1140
+ proper_node.set('k'+str(n0), dihedral_array[index])
1141
+ proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
1142
+ proper_node.set('phase'+str(n0), dihedral_array[index+2])
1143
+ n0 += 1
1144
+ dihedral_array = []
1145
+ dihedral_array.append(type1)
1146
+ dihedral_array.append(type2)
1147
+ dihedral_array.append(type3)
1148
+ dihedral_array.append(type4)
1149
+ dihedral_array.append(k)
1150
+ dihedral_array.append(n)
1151
+ dihedral_array.append(phase)
1152
+ if section == 'IMPROPER':
1153
+ words = line.split()
1154
+ type1 = words[0]
1155
+ type2 = words[1]
1156
+ type3 = words[2]
1157
+ type4 = words[3]
1158
+ k = str(float(words[4])*4.184)
1159
+ phase = str((float(words[6])-180)/180*math.pi)
1160
+ improper_node = ET.SubElement(node, 'Improper', type1=type1, type2=type2,
1161
+ type3=type3, type4=type4, k=k,theta0=phase)
1162
+ continue
1163
+ if section == 'NONBONDED':
1164
+ words = line.split()
1165
+ name = words[0]
1166
+ epsilon = -float(words[2])*4.184
1167
+ R_min_half = float(words[3])/10
1168
+ index = atom_type_list.index(name)
1169
+ nb_table[index] = [epsilon, R_min_half]
1170
+ if section == 'NBFIX':
1171
+ words = line.split()
1172
+ type1 = words[0]
1173
+ type2 = words[1]
1174
+ index1 = atom_type_list.index(type1)
1175
+ index2 = atom_type_list.index(type2)
1176
+ epsilon = -float(words[2])*4.184
1177
+ R_min_half = float(words[3])/10
1178
+ nbfix_table.append([index1, index2, epsilon, R_min_half])
1179
+ finally:
1180
+ pf.close()
1181
+
1182
+ #Build acoef, bcoef, ccoef tables
1183
+ for index1 in range(num_atom):
1184
+ epsilon1 = nb_table[index1][0]
1185
+ R_min1 = nb_table[index1][1]
1186
+ for index2 in range(num_atom):
1187
+ epsilon2 = nb_table[index2][0]
1188
+ R_min2 = nb_table[index2][1]
1189
+ epsilon = numpy.sqrt(epsilon1 * epsilon2)
1190
+ R_min = R_min1 + R_min2
1191
+ a = epsilon * pow(R_min, 12)
1192
+ b = -2 * epsilon * pow(R_min, 6)
1193
+ acoef_array[index1, index2] = a
1194
+ bcoef_array[index1, index2] = b
1195
+ for nbfix_list in nbfix_table:
1196
+ index1 = nbfix_list[0]
1197
+ index2 = nbfix_list[1]
1198
+ epsilon = nbfix_list[2]
1199
+ R_min = nbfix_list[3]
1200
+ a = epsilon * pow(R_min, 12)
1201
+ b = -2 * epsilon * pow(R_min, 6)
1202
+ acoef_array[index1, index2] = a
1203
+ bcoef_array[index1, index2] = b
1204
+ acoef_array[index2, index1] = a
1205
+ bcoef_array[index2, index1] = b
1206
+
1207
+ #build tabulated function for acoef, bcoef, ccoef
1208
+ acoef_node = ET.SubElement(node, "Function", name='acoef', type='Discrete2D',
1209
+ xsize=str(num_atom), ysize=str(num_atom))
1210
+ text = ''
1211
+ for index1 in range(num_atom):
1212
+ for index2 in range(num_atom):
1213
+ text += str(acoef_array[index1, index2]) + " "
1214
+ acoef_node.text = text
1215
+
1216
+ bcoef_node = ET.SubElement(node, "Function", name='bcoef', type='Discrete2D',
1217
+ xsize=str(num_atom), ysize=str(num_atom))
1218
+ text = ''
1219
+ for index1 in range(num_atom):
1220
+ for index2 in range(num_atom):
1221
+ text += str(bcoef_array[index1, index2]) + " "
1222
+ bcoef_node.text = text
1223
+
1224
+ #add custom nonbond parameters
1225
+ ET.SubElement(node, 'UseAttributeFromResidue', name='charge')
1226
+ for index in range(num_atom):
1227
+ name = atom_type_list[index]
1228
+ ET.SubElement(node, 'Atom', type=name, index=str(index), ke=str(kc**0.5), ep=str(ep**0.5), ld=str(ld**0.5), kv='1')
1229
+
1230
+ dom = MD.parseString(ET.tostring(root))
1231
+ root = dom.documentElement
1232
+ root = root.toprettyxml(indent=' ', newl='\n')
1233
+ dom = MD.parseString(root)
1234
+ root = dom.documentElement
1235
+ bond = root.getElementsByTagName('HarmonicBondForce')
1236
+ root.insertBefore(atom_type[0], bond[0])
1237
+ if len(residue) > 0:
1238
+ root.insertBefore(residue[0], bond[0])
1239
+
1240
+ xf = open(file_name+'.xml', 'w')
1241
+ #dom.writexml(xf, indent='', addindent=' ', newl='\n')
1242
+ dom.writexml(xf, indent='')
1243
+ ###################################################################################################
1244
+
1245
+ ###################################################################################################
1246
+ def run(self,):
1247
+
1248
+ resname_prefix = 'G'
1249
+ atomname_prefix = ''
1250
+
1251
+ # Read PDB file
1252
+ cg_structure = pmd.Structure()
1253
+ self.logger.info("Reading in PDB file %s"%self.pdbfile)
1254
+
1255
+ struct = pmd.load_file(self.pdbfile)
1256
+ sel_idx = np.zeros(len(struct.atoms))
1257
+ for idx, res in enumerate(struct.residues):
1258
+ res.number = idx+1
1259
+ if res.name in self.aa:
1260
+ for atm in res.atoms:
1261
+ if atm.element != 1:
1262
+ sel_idx[atm.idx] = 1
1263
+ heavy_protein = struct[sel_idx]
1264
+
1265
+ for idx, res in enumerate(heavy_protein.residues):
1266
+ num_backbone = 0
1267
+ num_sidechain = 0
1268
+ for atm in res.atoms:
1269
+ if atm.name in ['C', 'N', 'O', 'CA']:
1270
+ num_backbone += 1
1271
+ elif atm.name != 'OXT':
1272
+ num_sidechain += 1
1273
+ if num_backbone != 4:
1274
+ self.logger.error("ERROR: In pdb the number of backbone atoms in residue %d is incorrect: %d != 4"%(idx+1, num_backbone))
1275
+ sys.exit()
1276
+ if num_sidechain != self.refNscat[res.name]:
1277
+ self.logger.error("ERROR: In pdb the number of sidechain atoms in residue %d is incorrect: %d != %d"%(idx+1, num_sidechain, self.refNscat[res.name]))
1278
+ sys.exit()
1279
+
1280
+ idx_atm = 0
1281
+ ca_list = []
1282
+ chain_id_list = []
1283
+ for res in heavy_protein.residues:
1284
+ if not res.chain in chain_id_list:
1285
+ chain_id_list.append(res.chain)
1286
+ if len(chain_id_list) > len(self.alphabet):
1287
+ self.logger.error('ERROR: The number of chains in pdb file (%d) exceeds the maximum (%d)'%(len(chain_id_list), len(self.alphabet)))
1288
+ sys.exit()
1289
+ resid = 0
1290
+ chainid = chain_id_list[0]
1291
+ for idx, res in enumerate(heavy_protein.residues):
1292
+ if res.segid == '':
1293
+ segid = self.alphabet[chain_id_list.index(res.chain)]
1294
+ else:
1295
+ segid = res.segid
1296
+
1297
+ if res.chain != chainid:
1298
+ chainid = res.chain
1299
+ resid = 1
1300
+ else:
1301
+ resid += 1
1302
+
1303
+ SC_Mass = self.aaSCmass[res.name] - self.aaSCmass['GLY']
1304
+ CA_Mass = self.aaSCmass['GLY']
1305
+ SC_COM = np.zeros(3)
1306
+ CA_COM = np.zeros(3)
1307
+ sum_SC_Mass = 0
1308
+
1309
+ for atm in res.atoms:
1310
+ if atm.name not in ['C', 'N', 'O', 'CA', 'OXT']:
1311
+ sum_SC_Mass += atm.mass
1312
+ SC_COM += atm.mass * np.array([atm.xx, atm.xy, atm.xz])
1313
+ elif atm.name == 'CA':
1314
+ CA_COM[0] = atm.xx
1315
+ CA_COM[1] = atm.xy
1316
+ CA_COM[2] = atm.xz
1317
+ if sum_SC_Mass == 0:
1318
+ is_gly = True
1319
+ else:
1320
+ is_gly = False
1321
+ SC_COM /= sum_SC_Mass
1322
+
1323
+ if self.casm == 0:
1324
+ cg_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.ca_prefix,
1325
+ type=self.ca_prefix+str(idx+1), charge=self.refcharge[res.name],
1326
+ mass=self.aaSCmass[res.name], number=idx_atm+1)
1327
+ cg_atm.xx = CA_COM[0]
1328
+ cg_atm.xy = CA_COM[1]
1329
+ cg_atm.xz = CA_COM[2]
1330
+ cg_structure.add_atom(cg_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
1331
+ idx_atm += 1
1332
+ ca_list.append(cg_atm)
1333
+ else:
1334
+ ca_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.ca_prefix,
1335
+ type=self.ca_prefix+str(idx+1), charge=0.0,
1336
+ mass=CA_Mass, number=idx_atm+1)
1337
+ ca_atm.xx = CA_COM[0]
1338
+ ca_atm.xy = CA_COM[1]
1339
+ ca_atm.xz = CA_COM[2]
1340
+ cg_structure.add_atom(ca_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
1341
+ idx_atm += 1
1342
+ ca_list.append(ca_atm)
1343
+
1344
+ if not is_gly:
1345
+ sc_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.sc_prefix,
1346
+ type=self.sc_prefix+str(idx+1), charge=self.refcharge[res.name],
1347
+ mass=SC_Mass, number=idx_atm+1)
1348
+ sc_atm.xx = SC_COM[0]
1349
+ sc_atm.xy = SC_COM[1]
1350
+ sc_atm.xz = SC_COM[2]
1351
+ cg_structure.add_atom(sc_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
1352
+ idx_atm += 1
1353
+
1354
+ # Assign domain id to atom
1355
+ if self.ndomain != 0:
1356
+ self.logger.debug('Assign domain id to each atom')
1357
+ id_domain = []
1358
+ for atm in cg_structure.atoms:
1359
+ res_id = atm.residue.idx+1
1360
+ found = False
1361
+ for i, di in enumerate(self.dom):
1362
+ if res_id >= di[0] and res_id <= di[1]:
1363
+ id_domain.append(i)
1364
+ found = True
1365
+ break
1366
+ if not found:
1367
+ self.logger.error('ERROR: %s is not located in any domain.'%atm)
1368
+ sys.exit()
1369
+ self.logger.debug('')
1370
+
1371
+ # Write psf, cor and top
1372
+ output_prefix = self.pdbfile.strip().split('/')[-1].split('.pdb')[0]
1373
+ if self.casm == 1:
1374
+ output_prefix += '_ca-cb'
1375
+ else:
1376
+ output_prefix += '_ca'
1377
+ self.logger.info('Create psf')
1378
+ psffile = self.create_psf(cg_structure, ca_list, output_prefix)
1379
+
1380
+ self.logger.debug('Create cor')
1381
+ corfile = os.path.join(self.outdir, output_prefix+'.cor')
1382
+ self.logger.info(f'Writing {corfile}')
1383
+ cg_structure.save(corfile, overwrite=True, format='charmmcrd')
1384
+
1385
+ self.logger.debug('Create top')
1386
+ topfile = self.Create_rtf(cg_structure, output_prefix)
1387
+
1388
+ # Prepare FF parameters
1389
+ self.logger.info("Determining native contacts")
1390
+ dist_map = np.zeros((len(cg_structure.atoms), len(cg_structure.atoms)))
1391
+ for idx_1, atm_1 in enumerate(cg_structure.atoms):
1392
+ for idx_2, atm_2 in enumerate(cg_structure.atoms):
1393
+ dist_map[idx_1, idx_2] = self.calc_distance(atm_1, atm_2)
1394
+ self.logger.info("Finished calculating distance matrix")
1395
+
1396
+ ## Compute native contacts between side-chains
1397
+ self.logger.info("Determining side-chains - side-chains contacts")
1398
+ native_ss_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
1399
+ for i in range(len(cg_structure.residues)-3):
1400
+ res_1 = heavy_protein.residues[i]
1401
+ for j in range(i+3, len(cg_structure.residues)): # separate by 2 residues
1402
+ res_2 = heavy_protein.residues[j]
1403
+ found = False
1404
+ for atm_1 in res_1.atoms:
1405
+ for atm_2 in res_2.atoms:
1406
+ if not atm_1.name in ['C', 'N', 'O', 'CA', 'OXT'] and not atm_2.name in ['C', 'N', 'O', 'CA', 'OXT']:
1407
+ dij = self.calc_distance(atm_1, atm_2)
1408
+ if dij <= self.heav_cut:
1409
+ native_ss_map[i,j] = 1
1410
+ native_ss_map[j,i] = 1
1411
+ found = True
1412
+ break
1413
+ if found:
1414
+ break
1415
+ ## Compute native contacts between backbone and side-chains
1416
+ self.logger.info("Determining backbone - side-chains contacts")
1417
+ native_bsc_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
1418
+ for i in range(len(cg_structure.residues)):
1419
+ res_1 = heavy_protein.residues[i]
1420
+ for j in range(len(cg_structure.residues)):
1421
+ res_2 = heavy_protein.residues[j]
1422
+ if i < j-2 or i > j+2: # separate by 2 residues
1423
+ found = False
1424
+ for atm_1 in res_1.atoms:
1425
+ for atm_2 in res_2.atoms:
1426
+ if atm_1.name in ['C', 'N', 'O', 'CA', 'OXT'] and atm_2.name not in ['C', 'N', 'O', 'CA', 'OXT']:
1427
+ dij = self.calc_distance(atm_1, atm_2)
1428
+ if dij <= self.heav_cut:
1429
+ native_bsc_map[i,j] = 1
1430
+ found = True
1431
+ break
1432
+ if found:
1433
+ break
1434
+ self.logger.info('# nat sc-sc contacts %d, # nat bb-sc contacts %d, and # non-nat sc-sc %d' % (np.sum(native_ss_map)/2,
1435
+ np.sum(native_bsc_map), (len(cg_structure.residues)-3)*(len(cg_structure.residues)-2)/2 - np.sum(native_ss_map)/2))
1436
+
1437
+ ## Determine hydrogen bonds that are present using STRIDE,
1438
+ ## and assign to Calpha-Calpha pairs. Also secondary structural elements
1439
+ ## within the native structure.
1440
+ self.logger.info("Determining the presence of hydrogen bonds using STRIDE")
1441
+ native_hb_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
1442
+ helical_list = np.zeros(len(cg_structure.residues))
1443
+ hb_ene_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
1444
+ #screen_out = os.popen(f'stride -h %s'%self.pdbfile).readlines()
1445
+ stride_cmd = f'{self.stride_path} -h {self.pdbfile}'
1446
+ #print(stride_cmd)
1447
+ screen_out = subprocess.run(stride_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1448
+ screen_out = screen_out.stdout.decode('utf-8', errors='replace').splitlines()
1449
+ #screen_out = os.popen(stride_cmd).read().splitlines()
1450
+ #print(screen_out)
1451
+
1452
+ for line in screen_out:
1453
+ line = line.strip()
1454
+ resid = 0
1455
+ if line.startswith('ASD '):
1456
+ if 'Helix' in line.split()[6]:
1457
+ helical_list[resid] = 1
1458
+ resid += 1
1459
+ if line.startswith('ACC ') or line.startswith('DNR '):
1460
+ # Get H-bonding info
1461
+ resid_1 = int(line[16:20])+1
1462
+ resid_2 = int(line[36:40])+1
1463
+ chainid_1 = line[8:10].strip()
1464
+ if chainid_1 == '-':
1465
+ chainid_1 = ''
1466
+ chainid_2 = line[28:30].strip()
1467
+ if chainid_2 == '-':
1468
+ chainid_2 = ''
1469
+ found = [0, 0]
1470
+ for idx, res in enumerate(cg_structure.residues):
1471
+ if res.number == resid_1 and res.chain == chainid_1:
1472
+ idx_1 = idx
1473
+ found[0] = 1
1474
+ elif res.number == resid_2 and res.chain == chainid_2:
1475
+ idx_2 = idx
1476
+ found[1] = 1
1477
+ if sum(found) == 2:
1478
+ break
1479
+ if sum(found) != 2:
1480
+ self.logger.error("ERROR: Cannot find residue in parmed structure according to the Hbond info.\n %s"%line)
1481
+ sys.exit()
1482
+ if chainid_1 == chainid_2:
1483
+ if idx_1 < idx_2:
1484
+ if native_hb_map[idx_1, idx_2] == 1:
1485
+ if helical_list[idx_1] == 1 and helical_list[idx_2] == 1:
1486
+ hb_ene_map[idx_1, idx_2] = 2*self.single_hbond_ene_helix
1487
+ hb_ene_map[idx_2, idx_1] = 2*self.single_hbond_ene_helix
1488
+ else:
1489
+ hb_ene_map[idx_1, idx_2] = 2*self.single_hbond_ene
1490
+ hb_ene_map[idx_2, idx_1] = 2*self.single_hbond_ene
1491
+ else:
1492
+ native_hb_map[idx_1, idx_2] = 1
1493
+ native_hb_map[idx_2, idx_1] = 1
1494
+ if helical_list[idx_1] == 1 and helical_list[idx_2] == 1:
1495
+ hb_ene_map[idx_1, idx_2] = self.single_hbond_ene_helix
1496
+ hb_ene_map[idx_2, idx_1] = self.single_hbond_ene_helix
1497
+ else:
1498
+ hb_ene_map[idx_1, idx_2] = self.single_hbond_ene
1499
+ hb_ene_map[idx_2, idx_1] = self.single_hbond_ene
1500
+ else:
1501
+ native_hb_map[idx_1, idx_2] = 1
1502
+ native_hb_map[idx_2, idx_1] = 1
1503
+ hb_ene_map[idx_1, idx_2] = self.single_hbond_ene
1504
+ hb_ene_map[idx_2, idx_1] = self.single_hbond_ene
1505
+ num_hb = 0
1506
+ for i in range(len(cg_structure.residues)-1):
1507
+ for j in range(i+1, len(cg_structure.residues)):
1508
+ if native_hb_map[i,j] == 1:
1509
+ num_hb += 1
1510
+ #print('%d %.4f, %d %d'%(num_hb, hb_ene_map[i,j], i+1, j+1))
1511
+ self.logger.info('# of unique Hbonds %d'%num_hb)
1512
+ native_contact_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
1513
+ for i in range(len(cg_structure.residues)):
1514
+ for j in range(len(cg_structure.residues)):
1515
+ if native_ss_map[i,j] == 1 or native_bsc_map[i,j] == 1 or native_hb_map[i,j] == 1:
1516
+ native_contact_map[i,j] == 1
1517
+
1518
+ ## Write prm file ##
1519
+ self.logger.debug('Create prm')
1520
+ prmfile = self.pdbfile.strip().split('/')[-1].split('.pdb')[0] + '_nscal' + str(self.nscal) + '_fnn' + str(self.fnn) + '_go_' + self.potential_name.lower() + '.prm'
1521
+ prmfile = os.path.join(self.outdir, prmfile)
1522
+ self.logger.info(f'Writing {prmfile}')
1523
+
1524
+ f = open(prmfile, 'w')
1525
+ f.write('* This CHARMM .param file describes a Go model of %s\n'%(self.pdbfile.split('/')[-1]))
1526
+ f.write('*\n\n')
1527
+ # Atomic mass
1528
+ f.write('ATOM\n')
1529
+ for idx, atm in enumerate(cg_structure.atoms):
1530
+ f.write('MASS %-5s %-8s %-10.6f\n'%(str(idx+1), atm.type, atm.mass))
1531
+ f.write('\n')
1532
+ # Bond section (non-go bondlength for both models)
1533
+ f.write('BOND\n')
1534
+ kb = 50.0
1535
+ for idx, bond in enumerate(cg_structure.bonds):
1536
+ if self.bondlength_go == 0:
1537
+ if bond.atom2.name == (atomname_prefix+self.sc_prefix):
1538
+ res_idx = bond.atom2.residue.idx
1539
+ bond_length = self.lbs_nongo[heavy_protein.residues[res_idx].name]
1540
+ f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, bond_length))
1541
+ else:
1542
+ f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, 3.81))
1543
+ else:
1544
+ f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, bond.measure()))
1545
+ f.write('\n')
1546
+ # Angle section
1547
+ f.write('ANGLE\n')
1548
+ ka = 30.0
1549
+ for idx, angle in enumerate(cg_structure.angles):
1550
+ if self.angle_dw == 0:
1551
+ f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
1552
+ ka, angle.measure()))
1553
+ else:
1554
+ if angle.atom1 == (atomname_prefix+self.sc_prefix):
1555
+ res_idx = angle.atom1.residue.idx
1556
+ angle_value = self.ang_sb_nongo[heavy_protein.residues[res_idx].name]
1557
+ f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
1558
+ ka, angle_value))
1559
+ elif angle.atom3 == (atomname_prefix+self.sc_prefix):
1560
+ res_idx = angle.atom3.residue.idx
1561
+ angle_value = self.ang_bs_nongo[heavy_protein.residues[res_idx].name]
1562
+ f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
1563
+ ka, angle_value))
1564
+ else:
1565
+ f.write('%-8s%-8s%-10s 106.4 91.7 26.3 130.0 0.1 4.3\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type))
1566
+ f.write('\n')
1567
+ # Dihedral section
1568
+ f.write('DIHEDRAL\n')
1569
+ f.write('! backbone dihedrals\n')
1570
+ for idx, dihedral in enumerate(cg_structure.dihedrals):
1571
+ if self.dihedral_go == 1: # Use Go backbone dihedral angles
1572
+ delta = 1*dihedral.measure()-180
1573
+ if self.casm == 1:
1574
+ if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
1575
+ kd = 0.30
1576
+ else: # not helical
1577
+ kd = 0.55
1578
+ f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
1579
+ dihedral.atom4.type, kd, 1, delta))
1580
+ delta = 3*dihedral.measure()-180
1581
+ if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
1582
+ kd = 0.15
1583
+ else: # not helical
1584
+ kd = 0.275
1585
+ f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
1586
+ dihedral.atom4.type, kd, 3, delta))
1587
+ else:
1588
+ if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
1589
+ kd = 0.75
1590
+ else: # not helical
1591
+ kd = 0.75
1592
+ f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
1593
+ dihedral.atom4.type, kd, 1, delta))
1594
+ delta = 3*dihedral.measure()-180
1595
+ if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
1596
+ kd = 0.275
1597
+ else: # not helical
1598
+ kd = 0.275
1599
+ f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
1600
+ dihedral.atom4.type, kd, 3, delta))
1601
+ else: # Use Non-go dihedrals
1602
+ for i in range(4):
1603
+ res_idx_1 = dihedral.atom2.residue.idx
1604
+ res_idx_2 = dihedral.atom3.residue.idx
1605
+ [kd, period, delta] = self.dihedb_nongo[self.res2n[heavy_protein.residues[res_idx_1].name]][self.res2n[heavy_protein.residues[res_idx_2].name]][i]
1606
+ f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
1607
+ dihedral.atom4.type, kd, period, delta))
1608
+ f.write('\n')
1609
+ # Improper dihedral section
1610
+ f.write('IMPHI\n')
1611
+ f.write('! sidechain improper dihedrals to maintain chirality\n')
1612
+ if self.casm == 1:
1613
+ for idx, improper in enumerate(cg_structure.impropers):
1614
+ if self.improperdihed_go == 1:
1615
+ angle = improper.measure()
1616
+ else:
1617
+ res_idx = improper.atom1.residue.idx
1618
+ angle = self.improper_nongo[heavy_protein.residues[res_idx].name] # use transferable improper dihedral
1619
+ delta = angle + 180
1620
+ kd = 20*abs(self.avg_mj)
1621
+ f.write('%-5s %-5s %-5s %-7s%.6f %-3d%-10.5f\n'%(improper.atom1.type, improper.atom2.type, improper.atom3.type,
1622
+ improper.atom4.type, kd, 1, delta))
1623
+ f.write('\n')
1624
+
1625
+ ## nonbonded section
1626
+ f.write('NONBONDED NBXMOD 3 ATOM CDIEL SWITCH VATOM VDISTANCE VSWITCH -\n')
1627
+ f.write('CUTNB 32 CTOFNB 20 CTONNB 18 EPS 78.5 WMIN 1.5 E14FAC 1.0\n')
1628
+ f.write('!atom e_min r_min/2\n')
1629
+ # if using the C-alpha only model do some preprocessing to determine the collision
1630
+ # diameter of non-native interactions according to the Karanacolis-Brooks
1631
+ # algorithm
1632
+ if self.casm != 1:
1633
+ sigmin = 1000000*np.ones(len(cg_structure.residues))
1634
+ if self.potential_name.startswith('GENERIC'):
1635
+ for idx, res in enumerate(cg_structure.residues):
1636
+ sigmin[idx] = 2*self.rvdw[heavy_protein.residues[idx].name]
1637
+ else:
1638
+ # determine the collision diameter
1639
+ for i in range(len(cg_structure.residues)):
1640
+ for j in range(len(cg_structure.residues)):
1641
+ if native_contact_map[i,j] != 1 and (j < i-2 or j > i+2):
1642
+ if dist_map[i,j] < sigmin[i]:
1643
+ sigmin[i] = dist_map[i,j]
1644
+ for idx, atm in enumerate(cg_structure.atoms):
1645
+ eps2 = -0.000132
1646
+ rmin2 = sigmin[idx]*2**(1/6)/2
1647
+ temp = self.fnn*rmin2
1648
+ f.write("%-9s%-5.1f%-9.6f %-10.6f\n"%(atm.type, 0.0, eps2, temp))
1649
+ else:
1650
+ eps2 = '-1e-12' #!!!! SYSTem dependent !!!!!!!!
1651
+ rmin2 = 20.0
1652
+ for idx, atm in enumerate(cg_structure.atoms):
1653
+ if atm.name == (atomname_prefix+self.ca_prefix):
1654
+ f.write("%-9s%-5.1f%-s %-10.6f\n"%(atm.type, 0.0, eps2, rmin2))
1655
+ else:
1656
+ t1 = 1
1657
+ t2 = (t1*(2*self.rvdw[heavy_protein.residues[atm.residue.idx].name]*2**(1/6))**12/(1e-12))**(1/12)
1658
+ temp = self.fnn*t2/2
1659
+ f.write("%-9s%-5.1f%-s %-10.6f\n"%(atm.type, 0.0, eps2, temp))
1660
+ f.write('\n')
1661
+ ## NBFIX section
1662
+ f.write('NBFIX\n')
1663
+ ### native side-chain pairs and backbone Hbonding
1664
+ if self.casm == 1:
1665
+ f.write('! b-b due to Hbonding\n')
1666
+ totene_bb = 0
1667
+ for i in range(len(cg_structure.residues)-1):
1668
+ for j in range(i+1, len(cg_structure.residues)):
1669
+ if native_hb_map[i,j] == 1:
1670
+ atm_i = cg_structure.residues[i].atoms[0]
1671
+ atm_j = cg_structure.residues[j].atoms[0]
1672
+ comment = ''
1673
+ if self.ndomain == 0: # No domain defined
1674
+ ene = hb_ene_map[i,j]
1675
+ else: # Domain defined
1676
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1677
+ di = id_domain[atm_i.idx]
1678
+ comment = '! in Domain %d'%(di+1)
1679
+ ene = hb_ene_map[i,j]
1680
+ else: # in the interface
1681
+ di = id_domain[atm_i.idx]
1682
+ dj = id_domain[atm_j.idx]
1683
+ comment = '! in Interface %d | %d'%(di+1, dj+1)
1684
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1685
+ #ene = self.dom_nscal[ii] # ??? Use nscal at interface
1686
+ ene = hb_ene_map[i,j] # ??? Use the same energy
1687
+ f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
1688
+ totene_bb += ene
1689
+
1690
+ totene_sc = 0
1691
+ totene_bsc = 0
1692
+ if self.potential_name.startswith('GENERIC'): # C-alpha - side chain model Generic non-bond interactions
1693
+ f.write('!Generic interactions between unstructured portions of this protein\n')
1694
+ # Print out NBFIX energy values
1695
+ for i in range(len(cg_structure.residues)-3):
1696
+ resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
1697
+ for j in range(i+3, len(cg_structure.residues)):
1698
+ resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
1699
+ atm_i = cg_structure.residues[i].atoms[1] # ??? should be side-chain
1700
+ atm_j = cg_structure.residues[j].atoms[1] # ??? should be side-chain
1701
+ temp = self.rvdw[resname_1] + self.rvdw[resname_2]
1702
+ ene=(0.3/10)*self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
1703
+ f.write('%-8s%-11s%-13.6f%-11.6f\n'%(atm_i.type, atm_j.type, -ene, temp))
1704
+ else: # Go non-bond interactions
1705
+ f.write('! native side-chain interactions\n')
1706
+ for i in range(len(cg_structure.residues)-1):
1707
+ resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
1708
+ for j in range(i+1, len(cg_structure.residues)):
1709
+ resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
1710
+ if native_ss_map[i,j] == 1:
1711
+ atm_i = cg_structure.residues[i].atoms[1]
1712
+ atm_j = cg_structure.residues[j].atoms[1]
1713
+ if self.eps[self.res2n[resname_1]][self.res2n[resname_2]] == 0:
1714
+ self.logger.error('ERROR 1: Well depth equal to zero!!! %s - %s'%(resname_1, resname_2))
1715
+ sys.exit()
1716
+ comment = ''
1717
+ if self.ndomain == 0: # No domain defined
1718
+ ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
1719
+ else: # If domain is defined
1720
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1721
+ di = id_domain[atm_i.idx]
1722
+ comment = '! in Domain %d'%(di+1)
1723
+ ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[di]
1724
+ else: # in the interface
1725
+ di = id_domain[atm_i.idx]
1726
+ dj = id_domain[atm_j.idx]
1727
+ comment = '! in Interface %d | %d'%(di+1, dj+1)
1728
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1729
+ ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[ii]
1730
+ f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
1731
+ totene_sc += ene
1732
+
1733
+ f.write('! backbone-sidechain interactions\n')
1734
+ for i in range(len(cg_structure.residues)):
1735
+ resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
1736
+ for j in range(len(cg_structure.residues)):
1737
+ resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
1738
+ if native_bsc_map[i,j] == 1:
1739
+ atm_i = cg_structure.residues[i].atoms[0] # backbone
1740
+ atm_j = cg_structure.residues[j].atoms[1] # sidechain
1741
+ comment = ''
1742
+ if self.ndomain == 0: # No domain defined
1743
+ ene = self.ene_bsc
1744
+ else: # If domain is defined
1745
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1746
+ di = id_domain[atm_i.idx]
1747
+ comment = '! in Domain %d'%(di+1)
1748
+ ene = self.ene_bsc
1749
+ else: # in the interface
1750
+ di = id_domain[atm_i.idx]
1751
+ dj = id_domain[atm_j.idx]
1752
+ comment = '! in Interface %d | %d'%(di+1, dj+1)
1753
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1754
+ ene = self.ene_bsc * self.dom_nscal[ii] # Rescaled energy
1755
+ f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
1756
+ totene_bsc += ene
1757
+
1758
+ f.write('\n')
1759
+ f.write('! %.4f, %.4f, %.4f\n'%(totene_bb, totene_sc, totene_bsc))
1760
+ else:
1761
+ if not self.potential_name.startswith('GENERIC'): # C-alpha model
1762
+ f.write('! b-b due to Hbonding plus native side-chain interactions plus backbone-sidechain interactions\n')
1763
+ # Add up non-bonded energies
1764
+ for i in range(len(cg_structure.residues)-1):
1765
+ resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
1766
+ for j in range(i+1, len(cg_structure.residues)):
1767
+ resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
1768
+ atm_i = cg_structure.residues[i].atoms[0]
1769
+ atm_j = cg_structure.residues[j].atoms[0]
1770
+ ene = 0
1771
+ # hydrogen bonds
1772
+ if native_hb_map[i,j] == 1:
1773
+ if self.ndomain == 0: # No domain defined
1774
+ ene += hb_ene_map[i,j]
1775
+ else: # Domain defined
1776
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1777
+ di = id_domain[atm_i.idx]
1778
+ ene += hb_ene_map[i,j]
1779
+ else: # in the interface
1780
+ di = id_domain[atm_i.idx]
1781
+ dj = id_domain[atm_j.idx]
1782
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1783
+ ene += hb_ene_map[i,j] # Use the same energy
1784
+ # sc-sc interactions
1785
+ if native_ss_map[i,j] == 1:
1786
+ if self.eps[self.res2n[resname_1]][self.res2n[resname_2]] == 0:
1787
+ self.logger.error('ERROR 1: Well depth equal to zero!!! %s - %s'%(resname_1, resname_2))
1788
+ sys.exit()
1789
+ if self.ndomain == 0: # No domain defined
1790
+ ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
1791
+ else: # Domain defined
1792
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1793
+ di = id_domain[atm_i.idx]
1794
+ ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[di]
1795
+ else: # in the interface
1796
+ di = id_domain[atm_i.idx]
1797
+ dj = id_domain[atm_j.idx]
1798
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1799
+ ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[ii]
1800
+ # b-sc interactions
1801
+ if native_bsc_map[i,j] == 1:
1802
+ if self.ndomain == 0: # No domain defined
1803
+ ene += self.ene_bsc
1804
+ else: # Domain defined
1805
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1806
+ di = id_domain[atm_i.idx]
1807
+ ene += self.ene_bsc
1808
+ else: # in the interface
1809
+ di = id_domain[atm_i.idx]
1810
+ dj = id_domain[atm_j.idx]
1811
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1812
+ ene += self.ene_bsc # Use the same energy
1813
+ if native_bsc_map[j,i] == 1:
1814
+ if self.ndomain == 0: # No domain defined
1815
+ ene += self.ene_bsc
1816
+ else: # Domain defined
1817
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1818
+ di = id_domain[atm_i.idx]
1819
+ ene += self.ene_bsc
1820
+ else: # in the interface
1821
+ di = id_domain[atm_i.idx]
1822
+ dj = id_domain[atm_j.idx]
1823
+ ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
1824
+ ene += self.ene_bsc # Use the same energy
1825
+
1826
+ # Write NBFIX
1827
+ if ene != 0:
1828
+ comment = ''
1829
+ if self.ndomain != 0:
1830
+ if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
1831
+ di = id_domain[atm_i.idx]
1832
+ comment = '! in Domain %d'%(di+1)
1833
+ else: # in the interface
1834
+ di = id_domain[atm_i.idx]
1835
+ dj = id_domain[atm_j.idx]
1836
+ comment = '! in Interface %d | %d'%(di+1, dj+1)
1837
+ f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
1838
+ else:
1839
+ f.write('!Generic interactions between unstructured portions of this protein\n')
1840
+ # Print out NBFIX energy values
1841
+ for i in range(len(cg_structure.residues)-3):
1842
+ resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
1843
+ for j in range(i+3, len(cg_structure.residues)):
1844
+ resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
1845
+ atm_i = cg_structure.residues[i].atoms[0]
1846
+ atm_j = cg_structure.residues[j].atoms[0]
1847
+ temp = self.rvdw[resname_1] + self.rvdw[resname_2]
1848
+ ene=(0.3/10)*self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
1849
+ f.write('%-8s%-11s%-13.6f%-11.6f\n'%(atm_i.type, atm_j.type, -ene, temp))
1850
+ f.write('\nEND\n')
1851
+ f.close()
1852
+
1853
+ self.logger.debug('All done.')
1854
+ return {'cor': corfile, 'prm': prmfile, 'psf': psffile, 'top': topfile,}
1855
+ ##########################################################################################
1856
+
1857
+
1858
+ class BackMapping:
1859
+ """
1860
+ Take a C-alpha coarse grained structure and backmap it to the all-atom resolution
1861
+ """
1862
+ #############################################################################################################
1863
+ def __init__(self, nproc:int=1, outdir:str='./'):
1864
+
1865
+ self.nproc = str(nproc)
1866
+ self.outdir = outdir
1867
+ if not os.path.exists(self.outdir):
1868
+ os.makedirs(self.outdir)
1869
+ self.logger.info(f'Made directory: {self.outdir}')
1870
+
1871
+ #######################################################################################################
1872
+
1873
+ #######################################################################################################
1874
+ def backmap(self, cg_pdb:str, aa_pdb:str, ID:str):
1875
+ """
1876
+ Backmap a C-alpha coarse-grained structure to an all-atom structure.
1877
+ """
1878
+
1879
+ ##########################################################################
1880
+ self.logger.info(f"-> Cleaning PDB file {aa_pdb}")
1881
+ name = pathlib.Path(cg_pdb).stem + f'_{ID}'
1882
+ work_dir = os.path.join(self.outdir, 'rebuild_'+name)
1883
+ self.logger.info(name, work_dir)
1884
+
1885
+ if not os.path.exists(work_dir):
1886
+ os.makedirs(work_dir)
1887
+
1888
+ aa_clean_pdb, aa_clean_pdb_outfile = self.clean_pdb(aa_pdb, work_dir, name)
1889
+ os.chdir(work_dir)
1890
+ self.logger.debug(' Done')
1891
+ ##########################################################################
1892
+
1893
+ ##########################################################################
1894
+ # buld ca-cb model
1895
+ self.logger.info(f"-> Building ca-cb model for {aa_clean_pdb_outfile}")
1896
+ (prefix, prm_file) = self.create_cg_model(aa_clean_pdb_outfile, ID)
1897
+ self.logger.debug(' Done')
1898
+
1899
+ cacb_struct = pmd.load_file(prefix+'.psf')
1900
+ cacb_cor = pmd.load_file(prefix+'.cor')
1901
+ cacb_struct.coordinates = cacb_cor.coordinates
1902
+ #print(f'cacb_struct.coordinates: {cacb_struct.coordinates[:10]}')
1903
+ ##########################################################################
1904
+
1905
+ ##########################################################################
1906
+ # add SC beads to cg pdb
1907
+ self.logger.info("-> Adding side chain beads")
1908
+ target_name = name
1909
+ cg_sc_struct = self.add_sc_beads(cg_pdb, cacb_struct)
1910
+ self.logger.debug(' Done')
1911
+ ##########################################################################
1912
+
1913
+ ##########################################################################
1914
+ # run energy minimization for cacb model
1915
+ self.logger.info("-> Running energy minimization for ca-cb model")
1916
+ cg_sc_min_cor = self.cacb_energy_minimization(cg_sc_struct.positions, prefix, prm_file)
1917
+ aa_pdb_struct = pmd.load_file(aa_clean_pdb)
1918
+ for index in range(len(aa_pdb_struct.residues)):
1919
+ res_name = aa_pdb_struct.residues[index].name
1920
+ cg_sc_struct.residues[index].name = res_name
1921
+ for atm in cg_sc_struct.atoms:
1922
+ if atm.name == 'A':
1923
+ atm.name = ' CA'
1924
+ elif atm.name == 'B':
1925
+ atm.name = ' SC'
1926
+
1927
+ # Remove units to perform arithmetic
1928
+ coordinates_values = cg_sc_min_cor.value_in_unit(nanometer)
1929
+
1930
+ # Calculate the geometric center (center of mass could be calculated similarly if masses are available)
1931
+ geometric_center = np.mean(coordinates_values, axis=0)
1932
+
1933
+ # Shift coordinates to center at the origin
1934
+ centered_coordinates = coordinates_values - geometric_center
1935
+
1936
+ # Reapply the original unit
1937
+ centered_coordinates = centered_coordinates * nanometer
1938
+
1939
+ #cg_sc_struct.positions = cg_sc_min_cor
1940
+ cg_sc_struct.positions = centered_coordinates
1941
+ self.logger.info(f'cg_sc_struct.positions: {cg_sc_struct.positions[:10]}')
1942
+ target_name_mini_pdb = target_name+'_mini.pdb'
1943
+ cg_sc_struct.save(target_name_mini_pdb, overwrite=True)
1944
+ self.logger.debug(f'SAVED: {target_name_mini_pdb}')
1945
+ self.logger.debug(' Done')
1946
+
1947
+ #output_from_PD2 = target_name+'_mini.pdb'
1948
+ self.logger.info(f"-> Running Pulchra for {target_name_mini_pdb}")
1949
+ output_from_Pultra = self.Call_Pulchra(target_name_mini_pdb)
1950
+
1951
+
1952
+ ## remove the left over SC atoms that cause a template issue
1953
+ output_from_Pultra_cleaned = output_from_Pultra.replace('.pdb', '_cleaned.pdb')
1954
+ self.remove_sc_beads(output_from_Pultra, output_from_Pultra_cleaned)
1955
+
1956
+ try:
1957
+ rec_pdb = self.OpenMM_vacuum_minimization(output_from_Pultra_cleaned, 500000)
1958
+ os.system('cp '+rec_pdb+' ../'+rec_pdb)
1959
+ except Exception as e:
1960
+ self.logger.info(traceback.print_exc(), e)
1961
+ self.logger.debug('Failed to run OpenMM minimization. Use Pulchra result instead.')
1962
+ rec_pdb = target_name+'_rebuilt.pdb'
1963
+ os.system('cp '+output_from_Pultra_cleaned+' ../'+rec_pdb)
1964
+
1965
+ os.chdir('../')
1966
+ self.logger.info(f'Backmapping from {cg_pdb} -> {rec_pdb}')
1967
+ #######################################################################################################
1968
+
1969
+ #######################################################################################################
1970
+ def clean_pdb(self, pdb, out_dir, name):
1971
+ AA_name_list = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
1972
+ 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
1973
+ 'HIE', 'HID', 'HIP']
1974
+ #name = pdb.split('/')[-1].split('.pdb')[0]
1975
+ struct = pmd.load_file(pdb)
1976
+ sel_idx = np.zeros(len(struct.atoms))
1977
+ for idx, res in enumerate(struct.residues):
1978
+ res.number = idx+1
1979
+ if res.name in AA_name_list:
1980
+ for atm in res.atoms:
1981
+ sel_idx[atm.idx] = 1
1982
+
1983
+ clean_pdb_outfile = os.path.join(out_dir, f'{name}_clean.pdb')
1984
+ self.logger.info(f'Writing {clean_pdb_outfile}')
1985
+ struct[sel_idx].save(clean_pdb_outfile, overwrite=True)
1986
+ return f'{name}_clean.pdb', clean_pdb_outfile
1987
+ #######################################################################################################
1988
+
1989
+ #######################################################################################################
1990
+ def create_psf(self, name):
1991
+ segid = 'A'
1992
+ parm = pmd.charmm.CharmmParameterSet(name+'.top')
1993
+ f = open(name+'.seq','r')
1994
+ seq = f.readlines()[0].strip().split()
1995
+ f.close()
1996
+ struct = pmd.Structure()
1997
+ for resname in seq:
1998
+ struct += parm.residues[resname].to_structure()
1999
+ ca_list = []
2000
+ for atm in struct.atoms:
2001
+ atm.mass = parm.atom_types[atm.type].mass
2002
+ if atm.name == 'A':
2003
+ ca_list.append(atm)
2004
+ # creat backbond bonds
2005
+ for i in range(len(ca_list)-1):
2006
+ struct.bonds.append(pmd.topologyobjects.Bond(ca_list[i], ca_list[i+1]))
2007
+ # create Angles
2008
+ for atm in struct.atoms:
2009
+ bond_list = atm.bond_partners
2010
+ if len(bond_list) > 1:
2011
+ for i in range(len(bond_list)-1):
2012
+ for j in range(i+1, len(bond_list)):
2013
+ struct.angles.append(pmd.topologyobjects.Angle(bond_list[i], atm, bond_list[j]))
2014
+ # create Dihedrals
2015
+ for i in range(len(ca_list)-3):
2016
+ struct.dihedrals.append(pmd.topologyobjects.Dihedral(ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]))
2017
+ # create Impropers
2018
+ for i in range(1, len(ca_list)-1):
2019
+ if len(ca_list[i].residue.atoms) > 1:
2020
+ b_bead = ca_list[i].residue.atoms[1]
2021
+ struct.impropers.append(pmd.topologyobjects.Improper(ca_list[i], ca_list[i-1], ca_list[i+1], b_bead))
2022
+ for res in struct.residues:
2023
+ res.segid = segid
2024
+ struct.save(name+'.psf', overwrite=True)
2025
+ #######################################################################################################
2026
+
2027
+ #######################################################################################################
2028
+ def create_cg_model(self, pdb, ID):
2029
+ self.logger.info(f'Creating CASM CG model from {pdb}')
2030
+ if not os.path.exists('./create_model'):
2031
+ os.makedirs('./create_model')
2032
+ os.chdir("./create_model")
2033
+ self.logger.debug(os.getcwd())
2034
+
2035
+ CoarseGrainer = CoarseGrain(outdir='./',
2036
+ ID=ID,
2037
+ pdbfile=pdb,
2038
+ nscal=10.0,
2039
+ potential_name='mj',
2040
+ casm=1)
2041
+ self.logger.debug(CoarseGrainer)
2042
+
2043
+ CGfiles = CoarseGrainer.run()
2044
+ self.logger.debug(os.getcwd())
2045
+
2046
+
2047
+ name = pathlib.Path(pdb).stem
2048
+ prefix = name+'_ca-cb'
2049
+ prm_name = name + '_nscal10.0_fnn1_go_mj.prm'
2050
+ self.logger.debug(f'name: {name}')
2051
+ self.logger.debug(f'prefix: {prefix}')
2052
+ self.logger.debug(f'prm_name: {prm_name}')
2053
+
2054
+ if os.path.exists(prefix+'.psf'):
2055
+ os.system('cp *.psf ../')
2056
+ os.system('cp *.cor ../')
2057
+ os.system('cp *.top ../')
2058
+ os.system('cp *.prm ../')
2059
+ os.chdir('../')
2060
+ else:
2061
+ self.logger.error("Error: failed to create CG model from %s\n\n"%pdb)
2062
+ sys.exit()
2063
+ return (prefix, prm_name)
2064
+ #######################################################################################################
2065
+
2066
+ #######################################################################################################
2067
+ def add_sc_beads(self, cg_pdb, cacb_struct):
2068
+ self.logger.info(f'Adding SC beads to {cg_pdb}')
2069
+ cor = pmd.load_file(cg_pdb)
2070
+ cor = cor.coordinates
2071
+ cor = cor[0]
2072
+ self.logger.info(cor, cor.shape)
2073
+ new_cacb_struct = cacb_struct.copy(pmd.Structure)
2074
+ idx = 0
2075
+ for res in new_cacb_struct.residues:
2076
+ res.atoms[0].xx = cor[idx,0]
2077
+ res.atoms[0].xy = cor[idx,1]
2078
+ res.atoms[0].xz = cor[idx,2]
2079
+ if len(res.atoms) > 1:
2080
+ cor1 = cacb_struct.coordinates[res.atoms[0].idx,:]
2081
+ cor2 = cacb_struct.coordinates[res.atoms[1].idx,:]
2082
+ bond_length = np.sum((cor1-cor2)**2)**0.5
2083
+ res.atoms[1].xx = cor[idx,0] + bond_length
2084
+ res.atoms[1].xy = cor[idx,1]
2085
+ res.atoms[1].xz = cor[idx,2]
2086
+ idx += 1
2087
+ return new_cacb_struct
2088
+ #######################################################################################################
2089
+
2090
+ #######################################################################################################
2091
+ def cacb_energy_minimization(self, cor, prefix, prm_file):
2092
+ global nproc
2093
+ temp = 310
2094
+ timestep = 0.015*picoseconds
2095
+ fbsolu = 0.05/picosecond
2096
+ temp = temp*kelvin
2097
+
2098
+ psf_pmd = pmd.charmm.CharmmPsfFile(prefix+'.psf')
2099
+ psf = CharmmPsfFile(prefix+'.psf')
2100
+ top = psf.topology
2101
+
2102
+ # parse the cg cacb prm file
2103
+ topfile = f'{prefix}.top'
2104
+ self.logger.debug(os.getcwd())
2105
+ self.logger.debug(f'prm_file: {prm_file}')
2106
+ self.logger.debug(f'topfile: {topfile}')
2107
+ CoarseGrain.parse_cg_cacb_prm(self, prmfile=prm_file, topfile=topfile)
2108
+ xml_file = prm_file.split('.prm')[0]+'.xml'
2109
+ self.logger.debug(f'xml_file: {xml_file}')
2110
+ if not os.path.exists(xml_file):
2111
+ raise ValueError(f"Error: {xml_file} not found. Please check the file path.")
2112
+
2113
+ #os.system('parse_cg_cacb_prm.py -p '+prm_file+' -t '+prefix+'.top')
2114
+ #name = prm_file.split('.prm')[0]
2115
+ forcefield = ForceField(xml_file)
2116
+ self.logger.debug(f'forcefield: {forcefield}')
2117
+
2118
+ # re-name residues that are changed by openmm
2119
+ for resid, res in enumerate(top.residues()):
2120
+ if res.name != psf_pmd.residues[resid].name:
2121
+ res.name = psf_pmd.residues[resid].name
2122
+
2123
+ template_map = {}
2124
+ for chain in top.chains():
2125
+ for res in chain.residues():
2126
+ template_map[res] = res.name
2127
+
2128
+
2129
+ system = forcefield.createSystem(top, nonbondedCutoff=2.0*nanometer, constraints=None,
2130
+ removeCMMotion=False, ignoreExternalBonds=True,
2131
+ residueTemplates=template_map)
2132
+ for force in system.getForces():
2133
+ if force.getName() == 'CustomNonbondedForce':
2134
+ custom_nb_force = force
2135
+ break
2136
+ # custom_nb_force = system.getForce(4)
2137
+ custom_nb_force.setUseSwitchingFunction(True)
2138
+ custom_nb_force.setSwitchingDistance(1.8*nanometer)
2139
+ custom_nb_force.setNonbondedMethod(custom_nb_force.CutoffNonPeriodic)
2140
+
2141
+ # add position restraints
2142
+ force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
2143
+ force.addPerParticleParameter("k")
2144
+ force.addPerParticleParameter("x0")
2145
+ force.addPerParticleParameter("y0")
2146
+ force.addPerParticleParameter("z0")
2147
+ system.addForce(force)
2148
+ # END add position restraints
2149
+
2150
+ # add position restraints for CA
2151
+ force = system.getForces()[-1]
2152
+ k = 100*kilocalorie/mole/angstrom**2
2153
+ for atm in top.atoms():
2154
+ if atm.name == 'A':
2155
+ force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
2156
+
2157
+ integrator = LangevinIntegrator(temp, fbsolu, timestep)
2158
+ integrator.setConstraintTolerance(0.00001)
2159
+ # prepare simulation
2160
+ platform = Platform.getPlatformByName('CPU')
2161
+ properties = {'Threads': self.nproc}
2162
+ simulation = Simulation(top, system, integrator, platform, properties)
2163
+ simulation.context.setPositions(cor)
2164
+ simulation.context.setVelocitiesToTemperature(temp)
2165
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
2166
+ self.getEnergyDecomposition(stdout, simulation.context, system)
2167
+ self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
2168
+ simulation.minimizeEnergy(tolerance=0.1*kilocalories_per_mole)
2169
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
2170
+ self.getEnergyDecomposition(stdout, simulation.context, system)
2171
+ self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
2172
+ current_cor = simulation.context.getState(getPositions=True).getPositions()
2173
+ #print(f'current_cor:\n{current_cor[:10]}')
2174
+ return current_cor
2175
+ #######################################################################################################
2176
+
2177
+ #######################################################################################################
2178
+ # energy decomposition
2179
+ def forcegroupify(self, system):
2180
+ forcegroups = {}
2181
+ for i in range(system.getNumForces()):
2182
+ force = system.getForce(i)
2183
+ force.setForceGroup(i)
2184
+ f = str(type(force))
2185
+ s = f.split('\'')
2186
+ f = s[1]
2187
+ s = f.split('.')
2188
+ f = s[-1]
2189
+ forcegroups[i] = f
2190
+ return forcegroups
2191
+ #######################################################################################################
2192
+
2193
+ #######################################################################################################
2194
+ def getEnergyDecomposition(self, handle, context, system):
2195
+ forcegroups = self.forcegroupify(system)
2196
+ energies = {}
2197
+ for i, f in forcegroups.items():
2198
+ try:
2199
+ states = context.getState(getEnergy=True, groups={i})
2200
+ except ValueError as e:
2201
+ self.logger.debug(str(e))
2202
+ energies[i] = Quantity(np.nan, kilocalories/mole)
2203
+ else:
2204
+ energies[i] = states.getPotentialEnergy()
2205
+ results = energies
2206
+ handle.write(' Potential Energy:\n')
2207
+ for idd in energies.keys():
2208
+ handle.write(' %s: %.4f kcal/mol\n'%(forcegroups[idd], energies[idd].value_in_unit(kilocalories/mole)))
2209
+ return results
2210
+ #######################################################################################################
2211
+
2212
+ #######################################################################################################
2213
+ def Call_Pulchra(self, rebult_pdb):
2214
+ self.logger.info("-> Calling pulchra to reconstruct all-atom PDB")
2215
+ self.pulchra = files('EntDetect.resources').joinpath('pulchra')
2216
+ self.logger.debug(f'pulchra: {self.pulchra}')
2217
+ pulchra_cmd = f'{self.pulchra} -v -g -q {rebult_pdb} > pulchra.log'
2218
+ self.logger.debug(f'CALL: {pulchra_cmd}')
2219
+ os.system(pulchra_cmd)
2220
+
2221
+ pdb_code = rebult_pdb.split('.pdb')[0]
2222
+ old_name = pdb_code + ".rebuilt.pdb"
2223
+ new_name = pdb_code + "_pulchra.pdb"
2224
+ os.system("mv "+old_name+" "+new_name)
2225
+ self.logger.info(" Reconstructed all-atom PDB "+new_name)
2226
+
2227
+ return new_name
2228
+ #######################################################################################################
2229
+
2230
+ #######################################################################################################
2231
+ def OpenMM_vacuum_minimization(self, input_pdb, maxcyc):
2232
+ global nproc
2233
+ pdb_code = input_pdb.split('.pdb')[0]
2234
+
2235
+ self.logger.info("-> Running all-atom energy minimization for %d steps in vacuum via OpenMM"%maxcyc)
2236
+
2237
+ #platform = Platform.getPlatformByName('CUDA')
2238
+ #properties = {'CudaPrecision': 'mixed'}
2239
+ platform = Platform.getPlatformByName('CPU')
2240
+ properties = {'Threads': self.nproc}
2241
+
2242
+ forcefield = ForceField('amber14-all.xml')
2243
+ self.logger.debug(f'input_pdb: {input_pdb}')
2244
+ pdb = pdbfile.PDBFile(input_pdb)
2245
+ self.logger.debug('FF made and PDB file loaded')
2246
+
2247
+ # Check if the end residue has missing OXT atom and add if needed
2248
+ for chain in pdb.topology.chains():
2249
+ end_res = list(chain.residues())[-1]
2250
+ found = False
2251
+ for atom in end_res.atoms():
2252
+ if atom.name == 'OXT':
2253
+ found = True
2254
+ elif atom.name == 'C':
2255
+ C_atom = atom
2256
+ elif atom.name == 'CA':
2257
+ CA_atom = atom
2258
+ elif atom.name == 'O':
2259
+ O_atom = atom
2260
+ C_position = np.array(pdb.positions[C_atom.index].value_in_unit(nanometer))
2261
+ CA_position = np.array(pdb.positions[CA_atom.index].value_in_unit(nanometer))
2262
+ O_position = np.array(pdb.positions[O_atom.index].value_in_unit(nanometer))
2263
+ if not found:
2264
+ new_atom = pdb.topology.addAtom('OXT', element.oxygen, end_res)
2265
+ pdb.topology.addBond(C_atom, new_atom)
2266
+ new_position = np.dot(self.rotation_matrix(C_position-CA_position, np.pi), O_position-C_position) + C_position
2267
+ new_position = Quantity(value=Vec3(x=new_position[0], y=new_position[1], z=new_position[2]), unit=nanometer)
2268
+ pdb.positions.insert(O_atom.index+1, new_position)
2269
+ self.logger.debug('QC for OXT complete')
2270
+
2271
+ model = modeller.Modeller(pdb.topology, pdb.positions)
2272
+ self.logger.debug(f'model: {model}')
2273
+ model.addHydrogens(forcefield=forcefield, pH=7.0)
2274
+ #model.addHydrogens(forcefield=forcefield, pH=7.0, variants=None, platform=platform)
2275
+ self.logger.debug('Hydrogens added')
2276
+
2277
+ top = model.topology
2278
+ structure = pmd.openmm.load_topology(top)
2279
+ cor = model.positions
2280
+ #structure.positions = cor
2281
+ #structure.save('111.pdb', overwrite=True)
2282
+
2283
+ system = forcefield.createSystem(top, nonbondedMethod=NoCutoff, constraints=None)
2284
+ self.logger.debug('System created')
2285
+
2286
+ # add position restraints
2287
+ force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
2288
+ force.addPerParticleParameter("k")
2289
+ force.addPerParticleParameter("x0")
2290
+ force.addPerParticleParameter("y0")
2291
+ force.addPerParticleParameter("z0")
2292
+ system.addForce(force)
2293
+ self.logger.debug('Position restraints added')
2294
+ # END add position restraints
2295
+
2296
+ # add position restraints for CA
2297
+ force = system.getForces()[-1]
2298
+ k = 500*kilocalorie/mole/angstrom**2
2299
+ for atm in top.atoms():
2300
+ if atm.name == 'CA':
2301
+ force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
2302
+
2303
+ integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
2304
+ integrator.setConstraintTolerance(0.00001)
2305
+ self.logger.debug('Integrator set')
2306
+
2307
+ simulation = Simulation(top, system, integrator, platform, properties)
2308
+ simulation.context.setPositions(cor)
2309
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
2310
+ self.getEnergyDecomposition(stdout, simulation.context, system)
2311
+ self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
2312
+
2313
+ simulation.minimizeEnergy(maxIterations=maxcyc)
2314
+ energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
2315
+ self.getEnergyDecomposition(stdout, simulation.context, system)
2316
+ self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
2317
+ current_cor = simulation.context.getState(getPositions=True).getPositions()
2318
+
2319
+ structure.positions = current_cor
2320
+ outfile = pdb_code+'_OpenMM_min.pdb'
2321
+ structure['!@/H'].save(outfile, overwrite=True)
2322
+ self.logger.debug(f'SAVED: {outfile}')
2323
+ return outfile
2324
+ #######################################################################################################
2325
+
2326
+ #######################################################################################################
2327
+ def remove_sc_beads(self, input_pdb, output_pdb):
2328
+ """
2329
+ Removes any atoms named 'SC' from a PDB file and writes the cleaned file.
2330
+
2331
+ Parameters:
2332
+ input_pdb (str): Path to the input PDB file.
2333
+ output_pdb (str): Path to save the cleaned PDB file.
2334
+ """
2335
+ with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
2336
+ for line in infile:
2337
+ if line.startswith("ATOM") or line.startswith("HETATM"):
2338
+ atom_name = line[12:16].strip() # Extract the atom name
2339
+ if atom_name == "SC":
2340
+ continue # Skip this line if the atom name is 'SC'
2341
+ outfile.write(line) # Write all other lines
2342
+
2343
+ self.logger.info(f"Cleaned PDB file saved to {output_pdb}")
2344
+ #######################################################################################################
2345
+
2346
+ #######################################################################################################
2347
+ def rotation_matrix(self, axis, theta):
2348
+ """
2349
+ Return the rotation matrix associated with counterclockwise rotation about
2350
+ the given axis by theta radians.
2351
+ """
2352
+ axis = np.asarray(axis)
2353
+ axis = axis / math.sqrt(np.dot(axis, axis))
2354
+ a = math.cos(theta / 2.0)
2355
+ b, c, d = -axis * math.sin(theta / 2.0)
2356
+ aa, bb, cc, dd = a * a, b * b, c * c, d * d
2357
+ bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
2358
+ return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
2359
+ [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
2360
+ [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
2361
+ #######################################################################################################