EntDetect 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- EntDetect/Jwalk/GridTools.py +567 -0
- EntDetect/Jwalk/PDBTools.py +532 -0
- EntDetect/Jwalk/SASDTools.py +543 -0
- EntDetect/Jwalk/SurfaceTools.py +150 -0
- EntDetect/Jwalk/__init__.py +19 -0
- EntDetect/Jwalk/naccess.config.txt +255 -0
- EntDetect/__init__.py +10 -0
- EntDetect/_logging.py +71 -0
- EntDetect/change_resolution.py +2361 -0
- EntDetect/clustering.py +2626 -0
- EntDetect/compare_sim2exp.py +1927 -0
- EntDetect/entanglement_features.py +478 -0
- EntDetect/gaussian_entanglement.py +2067 -0
- EntDetect/order_params.py +1048 -0
- EntDetect/resources/__init__.py +11 -0
- EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
- EntDetect/resources/calc_K.pl +712 -0
- EntDetect/resources/calc_Q.pl +962 -0
- EntDetect/resources/pulchra +0 -0
- EntDetect/resources/shared_files/__init__.py +2 -0
- EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
- EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
- EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
- EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
- EntDetect/resources/stride +0 -0
- EntDetect/statistics.py +1344 -0
- EntDetect/utilities.py +201 -0
- entdetect-1.2.0.dist-info/METADATA +26 -0
- entdetect-1.2.0.dist-info/RECORD +45 -0
- entdetect-1.2.0.dist-info/WHEEL +5 -0
- entdetect-1.2.0.dist-info/entry_points.txt +11 -0
- entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
- entdetect-1.2.0.dist-info/top_level.txt +2 -0
- scripts/__init__.py +5 -0
- scripts/convert_cor_psf_to_pdb.py +103 -0
- scripts/run_Foldingpathway.py +162 -0
- scripts/run_MSM.py +152 -0
- scripts/run_OP_on_simulation_traj.py +194 -0
- scripts/run_change_resolution.py +63 -0
- scripts/run_compare_sim2exp.py +215 -0
- scripts/run_montecarlo.py +158 -0
- scripts/run_nativeNCLE.py +179 -0
- scripts/run_nonnative_entanglement_clustering.py +110 -0
- scripts/run_population_modeling.py +117 -0
- scripts/run_workflow4_nativeNCLE_batch.py +412 -0
|
@@ -0,0 +1,2361 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
try:
|
|
3
|
+
from openmm.app import *
|
|
4
|
+
from openmm import *
|
|
5
|
+
from openmm.unit import *
|
|
6
|
+
except:
|
|
7
|
+
from simtk.openmm.app import *
|
|
8
|
+
from simtk.openmm import *
|
|
9
|
+
from simtk.unit import *
|
|
10
|
+
from sys import stdout, exit, stderr
|
|
11
|
+
import getopt, os, time, random, math, traceback, io, sys, string
|
|
12
|
+
import parmed as pmd
|
|
13
|
+
import numpy as np
|
|
14
|
+
from importlib.resources import files
|
|
15
|
+
import xml.etree.cElementTree as ET
|
|
16
|
+
import xml.dom.minidom as MD
|
|
17
|
+
import numpy
|
|
18
|
+
import pathlib
|
|
19
|
+
import subprocess
|
|
20
|
+
import logging
|
|
21
|
+
from EntDetect._logging import setup_logger
|
|
22
|
+
|
|
23
|
+
sys.setrecursionlimit(int(1e6))
|
|
24
|
+
|
|
25
|
+
class CoarseGrain:
|
|
26
|
+
"""
|
|
27
|
+
Processes biological data including PDB files, sequence data, and interaction potentials.
|
|
28
|
+
"""
|
|
29
|
+
#############################################################################################################
|
|
30
|
+
def __init__(self, pdbfile:str, ID:str='ID', nscal:int = 1.5, outdir:str = './', fnn:int = 1,
|
|
31
|
+
potential_name:str = 'bt', casm:int = 0, domain_file:str = 'None', ca_prefix:str = 'A', sc_prefix:str = 'B', log_level:int = logging.INFO, logdir:str = None):
|
|
32
|
+
|
|
33
|
+
self.pdbfile = pdbfile
|
|
34
|
+
self.ID = ID
|
|
35
|
+
self.nscal = nscal
|
|
36
|
+
self.outdir = outdir
|
|
37
|
+
self.logger = setup_logger('CoarseGrain', outdir=logdir if logdir is not None else outdir, ID=ID, log_level=log_level)
|
|
38
|
+
self.fnn = fnn
|
|
39
|
+
self.potential_name = potential_name
|
|
40
|
+
self.casm = casm
|
|
41
|
+
self.domain_file = domain_file
|
|
42
|
+
self.ca_prefix = ca_prefix
|
|
43
|
+
self.sc_prefix = sc_prefix
|
|
44
|
+
self.heav_cut = 4.5
|
|
45
|
+
|
|
46
|
+
if not os.path.exists(self.outdir):
|
|
47
|
+
os.makedirs(self.outdir)
|
|
48
|
+
self.logger.info(f'Made directory: {self.outdir}')
|
|
49
|
+
|
|
50
|
+
######################## Data #########################
|
|
51
|
+
## Loop-up table for uniquely indentifying residues #
|
|
52
|
+
self.aa = ["GLY","ALA","VAL","LEU","ILE","MET","PHE","PRO","SER","THR","CYS","ASN","GLN","TYR","TRP","ASP","GLU","HIS","LYS","ARG"]
|
|
53
|
+
if len(self.aa) != 20:
|
|
54
|
+
self.logger.error('ERROR')
|
|
55
|
+
sys.exit()
|
|
56
|
+
res2n = {}
|
|
57
|
+
n2res = {}
|
|
58
|
+
for i, a in enumerate(self.aa):
|
|
59
|
+
res2n[a] = i
|
|
60
|
+
n2res[i] = a
|
|
61
|
+
self.res2n = res2n
|
|
62
|
+
self.n2res = n2res
|
|
63
|
+
|
|
64
|
+
self.Mass = {"N": 14.0067,
|
|
65
|
+
"H": 1.00794,
|
|
66
|
+
"C": 12.011,
|
|
67
|
+
"O": 15.9994,
|
|
68
|
+
"S": 32.06,}
|
|
69
|
+
|
|
70
|
+
# number of heavy atoms in sidechains
|
|
71
|
+
self.refNscat = {"ALA": 1,
|
|
72
|
+
"CYS": 2,
|
|
73
|
+
"ASP": 4,
|
|
74
|
+
"GLU": 5,
|
|
75
|
+
"PHE": 7,
|
|
76
|
+
"GLY": 0,
|
|
77
|
+
"HIS": 6,
|
|
78
|
+
"HSD": 6,
|
|
79
|
+
"HSE": 6,
|
|
80
|
+
"HSP": 6,
|
|
81
|
+
"ILE": 4,
|
|
82
|
+
"LYS": 5,
|
|
83
|
+
"LEU": 4,
|
|
84
|
+
"MET": 4,
|
|
85
|
+
"ASN": 4,
|
|
86
|
+
"PRO": 3,
|
|
87
|
+
"GLN": 5,
|
|
88
|
+
"ARG": 7,
|
|
89
|
+
"SER": 2,
|
|
90
|
+
"THR": 3,
|
|
91
|
+
"VAL": 3,
|
|
92
|
+
"TRP": 10,
|
|
93
|
+
"TYR": 8}
|
|
94
|
+
|
|
95
|
+
# charges on side chains at pH 7
|
|
96
|
+
self.refcharge = {"ALA": 0.0,
|
|
97
|
+
"CYS": 0.0,
|
|
98
|
+
"ASP": -1.0,
|
|
99
|
+
"GLU": -1.0,
|
|
100
|
+
"PHE": 0.0,
|
|
101
|
+
"GLY": 0.0,
|
|
102
|
+
"HIS": 0.0,
|
|
103
|
+
"HSD": 0.0,
|
|
104
|
+
"HSE": 0.0,
|
|
105
|
+
"HSP": 0.0,
|
|
106
|
+
"ILE": 0.0,
|
|
107
|
+
"LYS": 1.0,
|
|
108
|
+
"LEU": 0.0,
|
|
109
|
+
"MET": 0.0,
|
|
110
|
+
"ASN": 0.0,
|
|
111
|
+
"PRO": 0.0,
|
|
112
|
+
"GLN": 0.0,
|
|
113
|
+
"ARG": 1.0,
|
|
114
|
+
"SER": 0.0,
|
|
115
|
+
"THR": 0.0,
|
|
116
|
+
"VAL": 0.0,
|
|
117
|
+
"TRP": 0.0,
|
|
118
|
+
"TYR": 0.0}
|
|
119
|
+
|
|
120
|
+
# Generic C_alpha side-chain center of mass distance
|
|
121
|
+
self.lbs_nongo = {"ASP": 2.46916481058687,
|
|
122
|
+
"PRO": 1.87381801537346,
|
|
123
|
+
"LYS": 3.49738414814426,
|
|
124
|
+
"ILE": 2.25260184847053,
|
|
125
|
+
"TRP": 3.58251993741888,
|
|
126
|
+
"CYS": 2.06666004558289,
|
|
127
|
+
"HSD": 3.15209719417679,
|
|
128
|
+
"PHE": 3.38385541816659,
|
|
129
|
+
"HSP": 3.15209719417679,
|
|
130
|
+
"GLN": 3.08654121335,
|
|
131
|
+
"SER": 1.89840600762153,
|
|
132
|
+
"ASN": 2.46916481058687,
|
|
133
|
+
"VAL": 1.93953811063784,
|
|
134
|
+
"LEU": 2.56580983973678,
|
|
135
|
+
"TYR": 3.38981664391425,
|
|
136
|
+
"GLU": 3.07971386504681,
|
|
137
|
+
"ARG": 3.39687572938579,
|
|
138
|
+
"THR": 1.931721703272,
|
|
139
|
+
"ALA": 1.51146031725997,
|
|
140
|
+
"MET": 2.95389402456081,
|
|
141
|
+
"HIS": 3.15209719417679,
|
|
142
|
+
"HSE": 3.15209719417679}
|
|
143
|
+
|
|
144
|
+
self.improper_nongo = {"ASP": 14.655341300544,
|
|
145
|
+
"PRO": 26.763068425539,
|
|
146
|
+
"LYS": 12.765248692601,
|
|
147
|
+
"ILE": 13.5446902008313,
|
|
148
|
+
"TRP": 11.4483488626106,
|
|
149
|
+
"CYS": 20.0484470024042,
|
|
150
|
+
"HSD": 14.9962640689562,
|
|
151
|
+
"PHE": 10.9217771918902,
|
|
152
|
+
"HSP": 14.9962640689562,
|
|
153
|
+
"GLN": 17.3050853491068,
|
|
154
|
+
"SER": 20.1390130256255,
|
|
155
|
+
"ASN": 14.655341300544,
|
|
156
|
+
"VAL": 13.3216022614598,
|
|
157
|
+
"LEU": 11.8137180266206,
|
|
158
|
+
"TYR": 12.2715081962165,
|
|
159
|
+
"GLU": 15.4130821146834,
|
|
160
|
+
"ARG": 15.5451613009777,
|
|
161
|
+
"THR": 16.2956083930276,
|
|
162
|
+
"ALA": 16.8418866013662,
|
|
163
|
+
"MET": 12.7046284165739,
|
|
164
|
+
"HIS": 14.9962640689562,
|
|
165
|
+
"HSE": 14.9962640689562}
|
|
166
|
+
|
|
167
|
+
self.ang_sb_nongo = {"ASP": 120.380153696218,
|
|
168
|
+
"PRO": 125.127927161651,
|
|
169
|
+
"LYS": 119.523270610009,
|
|
170
|
+
"ILE": 118.791108398805,
|
|
171
|
+
"TRP": 130.018548241749,
|
|
172
|
+
"CYS": 110.512719347428,
|
|
173
|
+
"HSD": 116.815900172681,
|
|
174
|
+
"PHE": 122.937540996701,
|
|
175
|
+
"HSP": 116.815900172681,
|
|
176
|
+
"GLN": 116.182123224059,
|
|
177
|
+
"SER": 107.971234136647,
|
|
178
|
+
"ASN": 120.380153696218,
|
|
179
|
+
"VAL": 112.877421898116,
|
|
180
|
+
"LEU": 123.32179171436,
|
|
181
|
+
"TYR": 116.783314494739,
|
|
182
|
+
"GLU": 116.659068554985,
|
|
183
|
+
"ARG": 119.709740783191,
|
|
184
|
+
"THR": 111.719883260793,
|
|
185
|
+
"ALA": 108.623605160075,
|
|
186
|
+
"MET": 116.636559053295,
|
|
187
|
+
"HIS": 116.815900172681,
|
|
188
|
+
"HSE": 116.815900172681}
|
|
189
|
+
|
|
190
|
+
self.ang_bs_nongo = {"ASP": 116.629356207687,
|
|
191
|
+
"PRO": 79.4932105625367,
|
|
192
|
+
"LYS": 119.779735484239,
|
|
193
|
+
"ILE": 116.923861483529,
|
|
194
|
+
"TRP": 100.858690902849,
|
|
195
|
+
"CYS": 114.816253227757,
|
|
196
|
+
"HSD": 115.848569293979,
|
|
197
|
+
"PHE": 112.804608190743,
|
|
198
|
+
"HSP": 115.848569293979,
|
|
199
|
+
"GLN": 119.106753006548,
|
|
200
|
+
"SER": 116.361829754186,
|
|
201
|
+
"ASN": 116.629356207687,
|
|
202
|
+
"VAL": 121.299281732077,
|
|
203
|
+
"LEU": 117.587011217416,
|
|
204
|
+
"TYR": 116.72484692836,
|
|
205
|
+
"GLU": 119.507585037498,
|
|
206
|
+
"ARG": 117.532816176021,
|
|
207
|
+
"THR": 117.044133956143,
|
|
208
|
+
"ALA": 120.747734648009,
|
|
209
|
+
"MET": 123.234171432545,
|
|
210
|
+
"HIS": 115.848569293979,
|
|
211
|
+
"HSE": 115.848569293979}
|
|
212
|
+
|
|
213
|
+
# segment id relationships
|
|
214
|
+
self.alphabet = list(map(chr, range(ord('A'), ord('Z')+1)))
|
|
215
|
+
segid2num = {}
|
|
216
|
+
for nseg, letter in enumerate(self.alphabet):
|
|
217
|
+
segid2num[letter] = nseg
|
|
218
|
+
self.segid2num = segid2num
|
|
219
|
+
|
|
220
|
+
# mass of amino acids
|
|
221
|
+
# UNSURE! about pro, arg, his and cys weights
|
|
222
|
+
self.aaSCmass = {"ALA": 71.000000,
|
|
223
|
+
"CYS": 114.000000,
|
|
224
|
+
"ASP": 114.000000,
|
|
225
|
+
"GLU": 128.000000,
|
|
226
|
+
"PHE": 147.000000,
|
|
227
|
+
"GLY": 57.000000,
|
|
228
|
+
"HIS": 114.000000,
|
|
229
|
+
"HSD": 114.000000,
|
|
230
|
+
"HSE": 114.000000,
|
|
231
|
+
"HSP": 114.000000,
|
|
232
|
+
"ILE": 113.000000,
|
|
233
|
+
"LYS": 128.000000,
|
|
234
|
+
"LEU": 113.000000,
|
|
235
|
+
"MET": 131.000000,
|
|
236
|
+
"ASN": 114.000000,
|
|
237
|
+
"PRO": 114.000000,
|
|
238
|
+
"GLN": 128.000000,
|
|
239
|
+
"ARG": 114.000000,
|
|
240
|
+
"SER": 87.000000,
|
|
241
|
+
"THR": 101.000000,
|
|
242
|
+
"VAL": 99.000000,
|
|
243
|
+
"TRP": 186.000000,
|
|
244
|
+
"TYR": 163.000000}
|
|
245
|
+
|
|
246
|
+
# vdw radius of sidechains
|
|
247
|
+
self.rvdw = {"ALA": 2.51958406732374,
|
|
248
|
+
"CYS": 2.73823091624513,
|
|
249
|
+
"ASP": 2.79030096923572,
|
|
250
|
+
"GLU": 2.96332591119925,
|
|
251
|
+
"PHE": 3.18235414984794,
|
|
252
|
+
"GLY": 2.25450393833984,
|
|
253
|
+
"HIS": 3.04273820988499,
|
|
254
|
+
"HSD": 3.04273820988499,
|
|
255
|
+
"HSE": 3.04273820988499,
|
|
256
|
+
"HSP": 3.04273820988499,
|
|
257
|
+
"ILE": 3.09345983013354,
|
|
258
|
+
"LYS": 3.18235414984794,
|
|
259
|
+
"LEU": 3.09345983013354,
|
|
260
|
+
"MET": 3.09345983013354,
|
|
261
|
+
"ASN": 2.84049696898525,
|
|
262
|
+
"PRO": 2.78004241717965,
|
|
263
|
+
"GLN": 3.00796101305807,
|
|
264
|
+
"ARG": 3.28138980397453,
|
|
265
|
+
"SER": 2.59265585208464,
|
|
266
|
+
"THR": 2.81059478021734,
|
|
267
|
+
"VAL": 2.92662460060742,
|
|
268
|
+
"TRP": 3.38869998431408,
|
|
269
|
+
"TYR": 3.22881842919248}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
## Check dependency installation ##
|
|
273
|
+
# find stride resource
|
|
274
|
+
self.stride_path = files('EntDetect.resources').joinpath('stride')
|
|
275
|
+
self.logger.debug(f'stride_path: {self.stride_path}')
|
|
276
|
+
|
|
277
|
+
#if os.popen('stride 2>&1').readlines()[0].strip().endswith('command not found'):
|
|
278
|
+
if os.popen(f'{self.stride_path} 2>&1').readlines()[0].strip().endswith('command not found'):
|
|
279
|
+
self.logger.error('Error: Essential software "stride" is not installed.\nPlease install stride before coarse-graining.')
|
|
280
|
+
sys.exit()
|
|
281
|
+
else:
|
|
282
|
+
self.logger.info(f'STRIDE found')
|
|
283
|
+
|
|
284
|
+
Header = f"""
|
|
285
|
+
|
|
286
|
+
# Build CG Protein Model: Python version #
|
|
287
|
+
# Yang Jiang & Edward P. O'Brien Jr. #
|
|
288
|
+
# Dept. of Chemistry #
|
|
289
|
+
# Penn State University #
|
|
290
|
+
|
|
291
|
+
Configuration:
|
|
292
|
+
pdbfile = {self.pdbfile}
|
|
293
|
+
casm = {self.casm}
|
|
294
|
+
nscal = {self.nscal}
|
|
295
|
+
fnn = {self.fnn}
|
|
296
|
+
potential_name = {self.potential_name}
|
|
297
|
+
domain_file = {self.domain_file}
|
|
298
|
+
sc_prefix = {self.sc_prefix}
|
|
299
|
+
ca_prefix = {self.ca_prefix}
|
|
300
|
+
"""
|
|
301
|
+
self.logger.debug(Header)
|
|
302
|
+
|
|
303
|
+
if self.domain_file != "None":
|
|
304
|
+
self.nscal_0 = '1'
|
|
305
|
+
self.nscal = 1
|
|
306
|
+
self.logger.info('domain_file is defined, nscal will be ignored.\n')
|
|
307
|
+
|
|
308
|
+
if self.casm != 0 and self.casm != 1:
|
|
309
|
+
self.logger.error('ERROR: casm can only be either 0 (ca model) or 1 (ca-sidechain model).')
|
|
310
|
+
|
|
311
|
+
if self.potential_name.upper().startswith('GENERIC'):
|
|
312
|
+
words = self.potential_name.split('-')
|
|
313
|
+
if len(words) == 1:
|
|
314
|
+
self.logger.error("ERROR: Generic potential keyword must be invoked as 'generic-bt'")
|
|
315
|
+
sys.exit()
|
|
316
|
+
else:
|
|
317
|
+
if words[-1].upper() != 'BT' and words[-1].upper() != 'MJ' and words[-1].upper() != 'KGS':
|
|
318
|
+
self.logger.error("ERROR: You can only invoke Generic potential keyword as 'generic-bt' or 'generic-mj' or 'generic-kgs'")
|
|
319
|
+
sys.exit()
|
|
320
|
+
else:
|
|
321
|
+
self.potential_name = self.potential_name.upper()
|
|
322
|
+
self.logger.error("ERROR: The generic potential is not supported in this version.\nCoarse-graining terminated.")
|
|
323
|
+
sys.exit()
|
|
324
|
+
else:
|
|
325
|
+
self.potential_name = self.potential_name.upper()
|
|
326
|
+
### END: get info from control file ###
|
|
327
|
+
|
|
328
|
+
## BEGIND: Conditional Defaults ##
|
|
329
|
+
if self.casm == 1:
|
|
330
|
+
self.ene_bsc = 0.37 # energy of a backbone-sidechain native contact (0.03 in old version)
|
|
331
|
+
self.single_hbond_ene = 0.75 # energy of a hydrogen bond for everthing but helices (0.50 in old version)
|
|
332
|
+
self.single_hbond_ene_helix = 0.75 # energy of a hydrogen bond in a helix (0.50 in old version)
|
|
333
|
+
self.bondlength_go = 0 # non-Go bond length
|
|
334
|
+
self.angle_dw = 0 # Go angle potential
|
|
335
|
+
self.dihedral_go = 1 # Go dihedral potential
|
|
336
|
+
self.improperdihed_go = 1 # Go improper dihedral potential
|
|
337
|
+
|
|
338
|
+
else:
|
|
339
|
+
self.ene_bsc = 0.37;
|
|
340
|
+
self.single_hbond_ene = 0.75; # energy of a hydrogen bond for everthing but helices
|
|
341
|
+
self.single_hbond_ene_helix = 0.75; # energy of a hydrogen bond in a helix
|
|
342
|
+
self.bondlength_go = 0 # non-Go bond length
|
|
343
|
+
self.angle_dw = 1 # double-well angle potential
|
|
344
|
+
self.dihedral_go = 0 # non-Go dihedral potential
|
|
345
|
+
self.improperdihed_go = 0 # non-Go improper dihedral potential
|
|
346
|
+
|
|
347
|
+
# read domain nscal values if domain is defined
|
|
348
|
+
dom_nscal = []
|
|
349
|
+
ndomain = 0
|
|
350
|
+
dom = []
|
|
351
|
+
if self.domain_file != "None":
|
|
352
|
+
if not os.path.exists(self.domain_file):
|
|
353
|
+
self.logger.error("ERROR: File %s does not exist"%self.domain_file)
|
|
354
|
+
sys.exit()
|
|
355
|
+
f = open(self.domain_file)
|
|
356
|
+
lines = f.readlines()
|
|
357
|
+
f.close()
|
|
358
|
+
for line in lines:
|
|
359
|
+
line = line.strip()
|
|
360
|
+
if line.startswith('scale factor'):
|
|
361
|
+
words = line.split('=')
|
|
362
|
+
dom_nscal.append(float(words[-1]))
|
|
363
|
+
if line.startswith('domain'):
|
|
364
|
+
ndomain += 1
|
|
365
|
+
words = line.split('=')[-1].split('-')
|
|
366
|
+
words = [int(w) for w in words]
|
|
367
|
+
dom.append(words)
|
|
368
|
+
if words[0] > words[1]:
|
|
369
|
+
self.logger.error("ERROR: When defining the domains in the interface file, index %d is Greater than %d!"%(words[0], words[1]))
|
|
370
|
+
sys.exit()
|
|
371
|
+
self.logger.info('%d domain(s) defined in the Domain file %s'%(ndomain, self.domain_file))
|
|
372
|
+
if ndomain == 0:
|
|
373
|
+
self.logger.error("ERROR: No domain definitions were read. Check the domain definition file!")
|
|
374
|
+
sys.exit()
|
|
375
|
+
self.logger.info("Domain information:")
|
|
376
|
+
for i, d in enumerate(dom):
|
|
377
|
+
self.logger.info("Domain %d: %d to %d"%(i+1, d[0], d[1]))
|
|
378
|
+
self.logger.info("")
|
|
379
|
+
if len(dom_nscal) != (1+ndomain)*ndomain/2:
|
|
380
|
+
self.logger.error("ERROR: Incorrect number of interfaces assigned. (%d, should be %d)"%(len(dom_nscal)-ndomain, (ndomain-1)*ndomain/2))
|
|
381
|
+
sys.exit()
|
|
382
|
+
self.dom_nscal = dom_nscal
|
|
383
|
+
self.ndomain = ndomain
|
|
384
|
+
self.dom = dom
|
|
385
|
+
# END read domain nscal values if domain is defined
|
|
386
|
+
|
|
387
|
+
# initialize nonbonding potential
|
|
388
|
+
root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
|
389
|
+
if self.potential_name.startswith('MJ'):
|
|
390
|
+
miya = files('EntDetect.resources.shared_files').joinpath('mj_contact_potential.dat')
|
|
391
|
+
elif self.potential_name.startswith('KGS'):
|
|
392
|
+
miya = files('EntDetect.resources.shared_files').joinpath('kgs_contact_potential.dat')
|
|
393
|
+
elif self.potential_name.startswith('BT'):
|
|
394
|
+
miya = files('EntDetect.resources.shared_files').joinpath('bt_contact_potential.dat')
|
|
395
|
+
else:
|
|
396
|
+
self.logger.error("ERROR: Unrecognized force-field %s"%self.potential_name)
|
|
397
|
+
sys.exit()
|
|
398
|
+
self.logger.debug(miya)
|
|
399
|
+
|
|
400
|
+
eps = np.zeros((20,20))
|
|
401
|
+
|
|
402
|
+
f = open(miya)
|
|
403
|
+
lines = f.readlines()
|
|
404
|
+
f.close()
|
|
405
|
+
nrows = 0
|
|
406
|
+
avg_mj = 0
|
|
407
|
+
nmj = 0
|
|
408
|
+
for line in lines:
|
|
409
|
+
line = line.strip()
|
|
410
|
+
if line.startswith('#'):
|
|
411
|
+
continue
|
|
412
|
+
if line.startswith('AA'):
|
|
413
|
+
words = line.split()
|
|
414
|
+
vec = []
|
|
415
|
+
for w in words[1:]:
|
|
416
|
+
vec.append(self.res2n[w.upper()])
|
|
417
|
+
if len(vec) != 20:
|
|
418
|
+
self.logger.error("ERROR: missing residues in file %s"%miya)
|
|
419
|
+
sys.exit()
|
|
420
|
+
else:
|
|
421
|
+
words = line.split()
|
|
422
|
+
for tc, w in enumerate(words):
|
|
423
|
+
w = float(w)
|
|
424
|
+
if self.potential_name.startswith('MJ'):
|
|
425
|
+
eps[vec[nrows]][vec[tc]] = nscal * abs(w-1.2)
|
|
426
|
+
eps[vec[tc]][vec[nrows]] = nscal * abs(w-1.2)
|
|
427
|
+
avg_mj += nscal * abs(w-1.2)
|
|
428
|
+
elif self.potential_name.startswith('BT'):
|
|
429
|
+
eps[vec[nrows]][vec[tc]] = nscal * abs(w-0.6)
|
|
430
|
+
eps[vec[tc]][vec[nrows]] = nscal * abs(w-0.6)
|
|
431
|
+
avg_mj += nscal * abs(w-0.6)
|
|
432
|
+
elif self.potential_name.startswith('KGS'):
|
|
433
|
+
eps[vec[nrows]][vec[tc]] = nscal * abs(w-1.8)
|
|
434
|
+
eps[vec[tc]][vec[nrows]] = nscal * abs(w-1.8)
|
|
435
|
+
avg_mj += nscal * abs(w-1.8)
|
|
436
|
+
nmj += 1
|
|
437
|
+
nrows += 1
|
|
438
|
+
if nrows > 20:
|
|
439
|
+
self.logger.error("ERROR 2: missing residues in file %s: %d"%(miya, nrows))
|
|
440
|
+
sys.exit()
|
|
441
|
+
if len(words) != nrows:
|
|
442
|
+
self.logger.error("ERROR 3: missing residues in file %s, %d != %d"%(miya, len(words), nrows))
|
|
443
|
+
sys.exit()
|
|
444
|
+
self.eps = eps
|
|
445
|
+
avg_mj = avg_mj/nmj
|
|
446
|
+
self.avg_mj = avg_mj
|
|
447
|
+
self.logger.info("The average %s interaction energy is %.4f\n"%(self.potential_name, self.avg_mj))
|
|
448
|
+
# END initialize nonbonding potential
|
|
449
|
+
|
|
450
|
+
# Read in the generic backbone dihedral potential of CL Brooks if NON-GO dihedrals
|
|
451
|
+
# requested by user.
|
|
452
|
+
if self.dihedral_go == 0:
|
|
453
|
+
dihedb_nongo = [[[] for j in range(20)] for i in range(20)]
|
|
454
|
+
kpot_f = files('EntDetect.resources.shared_files').joinpath('karanicolas_dihe_parm.dat')
|
|
455
|
+
f = open(kpot_f)
|
|
456
|
+
lines = f.readlines()
|
|
457
|
+
f.close()
|
|
458
|
+
nphi = 0
|
|
459
|
+
r1_old = None
|
|
460
|
+
r2_old = None
|
|
461
|
+
for line in lines:
|
|
462
|
+
line = line.strip()
|
|
463
|
+
dat = line.split()
|
|
464
|
+
r1 = dat[0].upper()
|
|
465
|
+
r2 = dat[1].upper()
|
|
466
|
+
if r1 != r1_old or r2 != r2_old:
|
|
467
|
+
nphi = 0
|
|
468
|
+
dihedb_nongo[self.res2n[r1]][self.res2n[r2]].append([0.756*float(dat[2]), int(dat[3]), float(dat[4])])
|
|
469
|
+
nphi += 1
|
|
470
|
+
r1_old = r1
|
|
471
|
+
r2_old = r2
|
|
472
|
+
if nphi > 4:
|
|
473
|
+
self.logger.error("ERROR: nphi = %d upon reading in generic dihedral file"%nphi)
|
|
474
|
+
self.logger.debug(line)
|
|
475
|
+
sys.exit()
|
|
476
|
+
self.dihedb_nongo = dihedb_nongo
|
|
477
|
+
# END Read in the generic backbone dihedral potential
|
|
478
|
+
#############################################################################################################
|
|
479
|
+
|
|
480
|
+
###################################################################################################
|
|
481
|
+
# generate charmm .psf
|
|
482
|
+
def create_psf(self, struct, ca_list, name):
|
|
483
|
+
# creat backbone bonds
|
|
484
|
+
for i in range(len(ca_list)-1):
|
|
485
|
+
segid_list = [ca_list[i+j].residue.segid for j in range(2)]
|
|
486
|
+
segid_list = list(set(segid_list))
|
|
487
|
+
if len(segid_list) == 1:
|
|
488
|
+
struct.bonds.append(pmd.topologyobjects.Bond(ca_list[i], ca_list[i+1]))
|
|
489
|
+
# creat backbone-sidechain bonds if exist
|
|
490
|
+
for ca_atom in ca_list:
|
|
491
|
+
if len(ca_atom.residue.atoms) > 1:
|
|
492
|
+
b_bead = ca_atom.residue.atoms[1]
|
|
493
|
+
struct.bonds.append(pmd.topologyobjects.Bond(ca_atom, b_bead))
|
|
494
|
+
# create Angles
|
|
495
|
+
for atm in struct.atoms:
|
|
496
|
+
bond_list = atm.bond_partners
|
|
497
|
+
if len(bond_list) > 1:
|
|
498
|
+
for i in range(len(bond_list)-1):
|
|
499
|
+
for j in range(i+1, len(bond_list)):
|
|
500
|
+
struct.angles.append(pmd.topologyobjects.Angle(bond_list[i], atm, bond_list[j]))
|
|
501
|
+
# create Dihedrals
|
|
502
|
+
for i in range(len(ca_list)-3):
|
|
503
|
+
segid_list = [ca_list[i+j].residue.segid for j in range(4)]
|
|
504
|
+
segid_list = list(set(segid_list))
|
|
505
|
+
if len(segid_list) == 1:
|
|
506
|
+
struct.dihedrals.append(pmd.topologyobjects.Dihedral(ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]))
|
|
507
|
+
# create Impropers
|
|
508
|
+
for i in range(1, len(ca_list)-1):
|
|
509
|
+
segid_list = [ca_list[i+j-1].residue.segid for j in range(3)]
|
|
510
|
+
segid_list = list(set(segid_list))
|
|
511
|
+
if len(segid_list) == 1 and len(ca_list[i].residue.atoms) > 1:
|
|
512
|
+
b_bead = ca_list[i].residue.atoms[1]
|
|
513
|
+
struct.impropers.append(pmd.topologyobjects.Improper(ca_list[i], ca_list[i-1], ca_list[i+1], b_bead))
|
|
514
|
+
psffile = os.path.join(self.outdir, name+'.psf')
|
|
515
|
+
self.logger.info(f'Writing {psffile}')
|
|
516
|
+
struct.save(psffile, overwrite=True, vmd=False)
|
|
517
|
+
return psffile
|
|
518
|
+
# END generate charmm .psf
|
|
519
|
+
###################################################################################################
|
|
520
|
+
|
|
521
|
+
###################################################################################################
|
|
522
|
+
# generate charmm .top
|
|
523
|
+
def Create_rtf(self, struct, out_name):
|
|
524
|
+
#global self.pdbfile, self.casm
|
|
525
|
+
topfile = os.path.join(self.outdir, out_name+'.top')
|
|
526
|
+
self.logger.info(f'Writing {topfile}')
|
|
527
|
+
fo = open(topfile, 'w')
|
|
528
|
+
if self.casm == 1:
|
|
529
|
+
fo.write('* This CHARMM .top file describes a Ca-Cb Go model of %s\n*\n20 1\n'%self.pdbfile)
|
|
530
|
+
else:
|
|
531
|
+
fo.write('* This CHARMM .top file describes a Ca Go model of %s\n*\n20 1\n'%self.pdbfile)
|
|
532
|
+
# MASS section
|
|
533
|
+
fo.write('! backbone masses\n')
|
|
534
|
+
for idx, atm in enumerate(struct.atoms):
|
|
535
|
+
fo.write('MASS %-4s %-8s %.6f\n'%(str(idx+1), atm.type, atm.mass))
|
|
536
|
+
fo.write('\n')
|
|
537
|
+
fo.write('DECL +%s\n'%struct[0].name)
|
|
538
|
+
fo.write('DECL -%s\n'%struct[0].name)
|
|
539
|
+
fo.write('DECL #%s\n'%struct[0].name)
|
|
540
|
+
# residue section
|
|
541
|
+
for res in struct.residues:
|
|
542
|
+
res_charge = 0
|
|
543
|
+
for atm in res.atoms:
|
|
544
|
+
res_charge += atm.charge
|
|
545
|
+
fo.write('RESI %-6s %.1f\n'%(res.name, res_charge))
|
|
546
|
+
fo.write('GROUP\n')
|
|
547
|
+
for atm in res.atoms:
|
|
548
|
+
fo.write('ATOM %s %-6s %.1f\n'%(atm.name, atm.type, atm.charge))
|
|
549
|
+
if self.casm == 1 and len(res.atoms) != 1:
|
|
550
|
+
fo.write("Bond %s %s %s +%s\n"%(res.atoms[0].name, res.atoms[1].name,
|
|
551
|
+
res.atoms[0].name, res.atoms[0].name))
|
|
552
|
+
fo.write("Angle -%s %s %s %s %s +%s -%s %s +%s\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[1].name,
|
|
553
|
+
res.atoms[1].name, res.atoms[0].name, res.atoms[0].name,
|
|
554
|
+
res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
|
|
555
|
+
fo.write("DIHE -%s %s +%s #%s\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
|
|
556
|
+
fo.write("IMPH %s -%s +%s %s\n\n"%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[1].name))
|
|
557
|
+
else:
|
|
558
|
+
fo.write('Bond %s +%s\n'%(res.atoms[0].name, res.atoms[0].name))
|
|
559
|
+
fo.write('Angle -%s %s +%s\n'%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
|
|
560
|
+
fo.write('DIHE -%s %s +%s #%s\n\n'%(res.atoms[0].name, res.atoms[0].name, res.atoms[0].name, res.atoms[0].name))
|
|
561
|
+
# end section
|
|
562
|
+
fo.write('END\n')
|
|
563
|
+
fo.close()
|
|
564
|
+
return topfile
|
|
565
|
+
# END generate charmm .top
|
|
566
|
+
###################################################################################################
|
|
567
|
+
|
|
568
|
+
###################################################################################################
|
|
569
|
+
def calc_distance(self, atom_1, atom_2):
|
|
570
|
+
dist = ((atom_1.xx - atom_2.xx)**2 + (atom_1.xy - atom_2.xy)**2 + (atom_1.xz - atom_2.xz)**2)**0.5
|
|
571
|
+
return dist
|
|
572
|
+
###################################################################################################
|
|
573
|
+
|
|
574
|
+
###################################################################################################
|
|
575
|
+
def cg_energy_minimization(self, cor, prefix, prm_file):
|
|
576
|
+
temp = 310
|
|
577
|
+
np = '1'
|
|
578
|
+
timestep = 0.015*picoseconds
|
|
579
|
+
fbsolu = 0.05/picosecond
|
|
580
|
+
temp = temp*kelvin
|
|
581
|
+
|
|
582
|
+
psf_pmd = pmd.charmm.CharmmPsfFile(prefix+'.psf')
|
|
583
|
+
psf = CharmmPsfFile(prefix+'.psf')
|
|
584
|
+
top = psf.topology
|
|
585
|
+
os.system('parse_cg_cacb_prm.py -p '+prm_file+' -t '+prefix+'.top')
|
|
586
|
+
name = prm_file.split('.prm')[0]
|
|
587
|
+
forcefield = ForceField(name+'.xml')
|
|
588
|
+
|
|
589
|
+
template_map = {}
|
|
590
|
+
for chain in top.chains():
|
|
591
|
+
for res in chain.residues():
|
|
592
|
+
template_map[res] = res.name
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
system = forcefield.createSystem(top, nonbondedCutoff=2.0*nanometer,
|
|
596
|
+
constraints=None, removeCMMotion=False, ignoreExternalBonds=True,
|
|
597
|
+
residueTemplates=template_map)
|
|
598
|
+
custom_nb_force = system.getForce(4)
|
|
599
|
+
custom_nb_force.setUseSwitchingFunction(True)
|
|
600
|
+
custom_nb_force.setSwitchingDistance(1.8*nanometer)
|
|
601
|
+
custom_nb_force.setNonbondedMethod(custom_nb_force.CutoffNonPeriodic)
|
|
602
|
+
|
|
603
|
+
# add position restraints
|
|
604
|
+
force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
|
|
605
|
+
force.addPerParticleParameter("k")
|
|
606
|
+
force.addPerParticleParameter("x0")
|
|
607
|
+
force.addPerParticleParameter("y0")
|
|
608
|
+
force.addPerParticleParameter("z0")
|
|
609
|
+
system.addForce(force)
|
|
610
|
+
# END add position restraints
|
|
611
|
+
|
|
612
|
+
# add position restraints for CA
|
|
613
|
+
force = system.getForces()[-1]
|
|
614
|
+
k = 100*kilocalorie/mole/angstrom**2
|
|
615
|
+
for atm in top.atoms():
|
|
616
|
+
if atm.name == 'A':
|
|
617
|
+
force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
|
|
618
|
+
|
|
619
|
+
integrator = LangevinIntegrator(temp, fbsolu, timestep)
|
|
620
|
+
integrator.setConstraintTolerance(0.00001)
|
|
621
|
+
# prepare simulation
|
|
622
|
+
platform = Platform.getPlatformByName('CPU')
|
|
623
|
+
properties = {'Threads': np}
|
|
624
|
+
simulation = Simulation(top, system, integrator, platform, properties)
|
|
625
|
+
simulation.context.setPositions(cor)
|
|
626
|
+
simulation.context.setVelocitiesToTemperature(temp)
|
|
627
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
628
|
+
getEnergyDecomposition(stdout, simulation.context, system)
|
|
629
|
+
self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
|
|
630
|
+
simulation.minimizeEnergy(tolerance=0.1*kilocalories_per_mole)
|
|
631
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
632
|
+
getEnergyDecomposition(stdout, simulation.context, system)
|
|
633
|
+
self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
|
|
634
|
+
current_cor = simulation.context.getState(getPositions=True).getPositions()
|
|
635
|
+
return current_cor
|
|
636
|
+
###################################################################################################
|
|
637
|
+
|
|
638
|
+
###################################################################################################
|
|
639
|
+
# remove bond constraints of 0 mass atoms
|
|
640
|
+
def rm_cons_0_mass(self, system):
|
|
641
|
+
tag = 0
|
|
642
|
+
while tag == 0 and system.getNumConstraints() != 0:
|
|
643
|
+
for i in range(system.getNumConstraints()):
|
|
644
|
+
con_i = system.getConstraintParameters(i)[0]
|
|
645
|
+
con_j = system.getConstraintParameters(i)[1]
|
|
646
|
+
mass_i = system.getParticleMass(con_i).value_in_unit(dalton)
|
|
647
|
+
mass_j = system.getParticleMass(con_j).value_in_unit(dalton)
|
|
648
|
+
if mass_i == 0 and mass_j == 0:
|
|
649
|
+
system.removeConstraint(i)
|
|
650
|
+
#print('Constraint %d is removed, range is %d'%(i, system.getNumConstraints()))
|
|
651
|
+
tag = 0
|
|
652
|
+
break
|
|
653
|
+
elif mass_i == 0 or mass_j == 0:
|
|
654
|
+
system.removeConstraint(i)
|
|
655
|
+
#print('Constraint %d is removed, range is %d'%(i, system.getNumConstraints()))
|
|
656
|
+
system.getForce(0).addBond(con_i, con_j, 3.81*angstroms, 50*kilocalories/mole/angstroms**2)
|
|
657
|
+
tag = 0
|
|
658
|
+
break
|
|
659
|
+
else:
|
|
660
|
+
tag = 1
|
|
661
|
+
# END remove bond constraints of 0 mass atoms
|
|
662
|
+
###################################################################################################
|
|
663
|
+
|
|
664
|
+
###################################################################################################
|
|
665
|
+
# energy decomposition
|
|
666
|
+
def forcegroupify(self, system):
|
|
667
|
+
forcegroups = {}
|
|
668
|
+
for i in range(system.getNumForces()):
|
|
669
|
+
force = system.getForce(i)
|
|
670
|
+
force.setForceGroup(i)
|
|
671
|
+
f = str(type(force))
|
|
672
|
+
s = f.split('\'')
|
|
673
|
+
f = s[1]
|
|
674
|
+
s = f.split('.')
|
|
675
|
+
f = s[-1]
|
|
676
|
+
forcegroups[i] = f
|
|
677
|
+
return forcegroups
|
|
678
|
+
###################################################################################################
|
|
679
|
+
|
|
680
|
+
###################################################################################################
|
|
681
|
+
def getEnergyDecomposition(self, handle, context, system):
|
|
682
|
+
forcegroups = forcegroupify(system)
|
|
683
|
+
energies = {}
|
|
684
|
+
for i, f in forcegroups.items():
|
|
685
|
+
try:
|
|
686
|
+
states = context.getState(getEnergy=True, groups={i})
|
|
687
|
+
except ValueError as e:
|
|
688
|
+
self.logger.debug(str(e))
|
|
689
|
+
energies[i] = Quantity(np.nan, kilocalories/mole)
|
|
690
|
+
else:
|
|
691
|
+
energies[i] = states.getPotentialEnergy()
|
|
692
|
+
results = energies
|
|
693
|
+
handle.write(' Potential Energy:\n')
|
|
694
|
+
for idd in energies.keys():
|
|
695
|
+
handle.write(' %s: %.4f kcal/mol\n'%(forcegroups[idd], energies[idd].value_in_unit(kilocalories/mole)))
|
|
696
|
+
return results
|
|
697
|
+
###################################################################################################
|
|
698
|
+
|
|
699
|
+
###################################################################################################
|
|
700
|
+
def parse_cg_prm(self, prmfile:str, topfile:str):
|
|
701
|
+
"""
|
|
702
|
+
Parse CHARMM parameter file and generate OpenMM XML file.
|
|
703
|
+
"""
|
|
704
|
+
|
|
705
|
+
top_file_list = topfile.strip().split()
|
|
706
|
+
|
|
707
|
+
command = 'pmd.charmm.CharmmParameterSet('
|
|
708
|
+
for tf in top_file_list:
|
|
709
|
+
command += '"'+tf + '", '
|
|
710
|
+
command += 'prmfile)'
|
|
711
|
+
self.logger.debug(command)
|
|
712
|
+
|
|
713
|
+
param=eval(command)
|
|
714
|
+
|
|
715
|
+
name = prmfile.split('.prm')
|
|
716
|
+
file_name = name[0]
|
|
717
|
+
|
|
718
|
+
openmm_param=pmd.openmm.parameters.OpenMMParameterSet.from_parameterset(param)
|
|
719
|
+
openmm_param.write(file_name+'_tmp.xml', skip_duplicates=False)
|
|
720
|
+
self.logger.info(f'Writing {file_name}_tmp.xml')
|
|
721
|
+
|
|
722
|
+
dom = MD.parse(file_name+'_tmp.xml')
|
|
723
|
+
root = dom.documentElement
|
|
724
|
+
atom_type = root.getElementsByTagName('AtomTypes')
|
|
725
|
+
residue = root.getElementsByTagName('Residues')
|
|
726
|
+
os.remove(file_name+'_tmp.xml')
|
|
727
|
+
|
|
728
|
+
root = ET.Element("ForceField")
|
|
729
|
+
|
|
730
|
+
pf = open(prmfile, 'r')
|
|
731
|
+
section = None
|
|
732
|
+
node = None
|
|
733
|
+
nbxmod = None
|
|
734
|
+
ep = None
|
|
735
|
+
kc = 138.935485
|
|
736
|
+
ld = 1 # 10 Angstrom
|
|
737
|
+
atom_type_list = [];
|
|
738
|
+
num_atom = 0
|
|
739
|
+
dihedral_array = []
|
|
740
|
+
acoef_array = None
|
|
741
|
+
bcoef_array = None
|
|
742
|
+
ccoef_array = None
|
|
743
|
+
nb_table = []
|
|
744
|
+
nbfix_table = []
|
|
745
|
+
try:
|
|
746
|
+
for line in pf:
|
|
747
|
+
line = line.strip()
|
|
748
|
+
if not line:
|
|
749
|
+
# This is a blank line
|
|
750
|
+
continue
|
|
751
|
+
if line.startswith('!'):
|
|
752
|
+
# This is a comment line
|
|
753
|
+
continue
|
|
754
|
+
if line.startswith('ATOM'):
|
|
755
|
+
section = 'ATOM'
|
|
756
|
+
#node = ET.SubElement(root, "AtomTypes")
|
|
757
|
+
continue
|
|
758
|
+
if line.startswith('BOND'):
|
|
759
|
+
section = 'BOND'
|
|
760
|
+
node = ET.SubElement(root, 'HarmonicBondForce')
|
|
761
|
+
continue
|
|
762
|
+
if line.startswith('ANGLE'):
|
|
763
|
+
section = 'ANGLE'
|
|
764
|
+
node = ET.SubElement(root, 'CustomAngleForce',
|
|
765
|
+
energy='-1/gamma*log(e); e=exp(-gamma*(k_alpha*(theta-theta_alpha)^2+epsilon_alpha))+exp(-gamma*k_betta*(theta-theta_betta)^2)')
|
|
766
|
+
ET.SubElement(node, 'PerAngleParameter', name='k_alpha')
|
|
767
|
+
ET.SubElement(node, 'PerAngleParameter', name='theta_alpha')
|
|
768
|
+
ET.SubElement(node, 'PerAngleParameter', name='k_betta')
|
|
769
|
+
ET.SubElement(node, 'PerAngleParameter', name='theta_betta')
|
|
770
|
+
ET.SubElement(node, 'PerAngleParameter', name='gamma')
|
|
771
|
+
ET.SubElement(node, 'PerAngleParameter', name='epsilon_alpha')
|
|
772
|
+
continue
|
|
773
|
+
if line.startswith('DIHEDRAL'):
|
|
774
|
+
section = 'DIHEDRAL'
|
|
775
|
+
node = ET.SubElement(root, 'PeriodicTorsionForce')
|
|
776
|
+
continue
|
|
777
|
+
if line.startswith('IMPHI'):
|
|
778
|
+
section = 'IMPROPER'
|
|
779
|
+
if len(dihedral_array) != 0:
|
|
780
|
+
proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
|
|
781
|
+
type3=dihedral_array[2], type4=dihedral_array[3])
|
|
782
|
+
n0 = 1
|
|
783
|
+
for index in range(4, len(dihedral_array), 3):
|
|
784
|
+
proper_node.set('k'+str(n0), dihedral_array[index])
|
|
785
|
+
proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
|
|
786
|
+
proper_node.set('phase'+str(n0), dihedral_array[index+2])
|
|
787
|
+
n0 += 1
|
|
788
|
+
node = ET.SubElement(root, 'CustomTorsionForce',
|
|
789
|
+
energy='k*min(dtheta, 2*pi-dtheta)^2; dtheta = abs(theta-theta0); pi = 3.1415926535')
|
|
790
|
+
ET.SubElement(node, 'PerTorsionParameter', name='k')
|
|
791
|
+
ET.SubElement(node, 'PerTorsionParameter', name='theta0')
|
|
792
|
+
continue
|
|
793
|
+
if line.startswith('NONBONDED'):
|
|
794
|
+
section = 'NONBONDED'
|
|
795
|
+
words = line.split()
|
|
796
|
+
nbxmod = int(words[2])
|
|
797
|
+
continue
|
|
798
|
+
if line.startswith('CUTNB'):
|
|
799
|
+
words = line.split()
|
|
800
|
+
ep = float(words[7])
|
|
801
|
+
node = ET.SubElement(root, 'CustomNonbondedForce',
|
|
802
|
+
energy='ke*charge1*charge2/ep/r*exp(-r/ld)+kv*(a/r^12+b/r^10+c/r^6); '+
|
|
803
|
+
'ke=ke1*ke2; ep=ep1*ep2; ld=ld1*ld2; kv=kv1*kv2; '+
|
|
804
|
+
'a=acoef(index1, index2); b=bcoef(index1, index2); c=ccoef(index1, index2)',
|
|
805
|
+
bondCutoff=str(nbxmod-1))
|
|
806
|
+
ET.SubElement(node, 'PerParticleParameter', name='ke')
|
|
807
|
+
ET.SubElement(node, 'PerParticleParameter', name='kv')
|
|
808
|
+
ET.SubElement(node, 'PerParticleParameter', name='ep')
|
|
809
|
+
ET.SubElement(node, 'PerParticleParameter', name='ld')
|
|
810
|
+
ET.SubElement(node, 'PerParticleParameter', name='charge')
|
|
811
|
+
ET.SubElement(node, 'PerParticleParameter', name='index')
|
|
812
|
+
acoef_array = numpy.zeros((num_atom, num_atom))
|
|
813
|
+
bcoef_array = numpy.zeros((num_atom, num_atom))
|
|
814
|
+
ccoef_array = numpy.zeros((num_atom, num_atom))
|
|
815
|
+
nb_table = [[] for i in atom_type_list]
|
|
816
|
+
continue
|
|
817
|
+
if line.startswith('NBFIX'):
|
|
818
|
+
section = 'NBFIX'
|
|
819
|
+
continue
|
|
820
|
+
# It seems like files? sections? can be terminated with 'END'
|
|
821
|
+
if line.startswith('END'): # should this be case-insensitive?
|
|
822
|
+
section = None
|
|
823
|
+
continue
|
|
824
|
+
# If we have no section, skip
|
|
825
|
+
if section is None: continue
|
|
826
|
+
# Now handle each section specifically
|
|
827
|
+
if section == 'ATOM':
|
|
828
|
+
words = line.split()
|
|
829
|
+
idx = int(words[1])
|
|
830
|
+
name = words[2]
|
|
831
|
+
mass = float(words[3])
|
|
832
|
+
#atom_node = ET.SubElement(node, 'Type', name=name, element='C', mass=str(mass))
|
|
833
|
+
#atom_node.set('class', name)
|
|
834
|
+
num_atom += 1
|
|
835
|
+
atom_type_list.append(name)
|
|
836
|
+
if section == 'BOND':
|
|
837
|
+
words = line.split()
|
|
838
|
+
ET.SubElement(node, 'Bond', type1=words[0], type2=words[1], length=str(float(words[3])/10), k=str(float(words[2])*4.184*100*2))
|
|
839
|
+
if section == 'ANGLE':
|
|
840
|
+
words = line.split()
|
|
841
|
+
ET.SubElement(node, 'Angle', type1=words[0], type2=words[1], type3= words[2],
|
|
842
|
+
k_alpha=str(float(words[3])*4.184), theta_alpha=str(float(words[4])/180*math.pi),
|
|
843
|
+
k_betta=str(float(words[5])*4.184), theta_betta=str(float(words[6])/180*math.pi),
|
|
844
|
+
gamma=str(float(words[7])/4.184), epsilon_alpha=str(float(words[8])*4.184))
|
|
845
|
+
if section == 'DIHEDRAL':
|
|
846
|
+
words = line.split()
|
|
847
|
+
type1 = words[0]
|
|
848
|
+
type2 = words[1]
|
|
849
|
+
type3 = words[2]
|
|
850
|
+
type4 = words[3]
|
|
851
|
+
k = str(float(words[4])*4.184)
|
|
852
|
+
n = words[5]
|
|
853
|
+
phase = str(float(words[6])/180*math.pi)
|
|
854
|
+
if len(dihedral_array) == 0:
|
|
855
|
+
dihedral_array.append(type1)
|
|
856
|
+
dihedral_array.append(type2)
|
|
857
|
+
dihedral_array.append(type3)
|
|
858
|
+
dihedral_array.append(type4)
|
|
859
|
+
dihedral_array.append(k)
|
|
860
|
+
dihedral_array.append(n)
|
|
861
|
+
dihedral_array.append(phase)
|
|
862
|
+
elif (type1 == dihedral_array[0] and type2 == dihedral_array[1] and
|
|
863
|
+
type3 == dihedral_array[2] and type4 == dihedral_array[3]):
|
|
864
|
+
dihedral_array.append(k)
|
|
865
|
+
dihedral_array.append(n)
|
|
866
|
+
dihedral_array.append(phase)
|
|
867
|
+
else:
|
|
868
|
+
proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
|
|
869
|
+
type3=dihedral_array[2], type4=dihedral_array[3])
|
|
870
|
+
n0 = 1
|
|
871
|
+
for index in range(4, len(dihedral_array), 3):
|
|
872
|
+
proper_node.set('k'+str(n0), dihedral_array[index])
|
|
873
|
+
proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
|
|
874
|
+
proper_node.set('phase'+str(n0), dihedral_array[index+2])
|
|
875
|
+
n0 += 1
|
|
876
|
+
dihedral_array = []
|
|
877
|
+
dihedral_array.append(type1)
|
|
878
|
+
dihedral_array.append(type2)
|
|
879
|
+
dihedral_array.append(type3)
|
|
880
|
+
dihedral_array.append(type4)
|
|
881
|
+
dihedral_array.append(k)
|
|
882
|
+
dihedral_array.append(n)
|
|
883
|
+
dihedral_array.append(phase)
|
|
884
|
+
if section == 'IMPROPER':
|
|
885
|
+
# No improper torsion energy term for Ca model
|
|
886
|
+
continue
|
|
887
|
+
if section == 'NONBONDED':
|
|
888
|
+
words = line.split()
|
|
889
|
+
name = words[0]
|
|
890
|
+
epsilon = -float(words[2])*4.184
|
|
891
|
+
R_min_half = float(words[3])/10
|
|
892
|
+
index = atom_type_list.index(name)
|
|
893
|
+
nb_table[index] = [epsilon, R_min_half]
|
|
894
|
+
if section == 'NBFIX':
|
|
895
|
+
words = line.split()
|
|
896
|
+
type1 = words[0]
|
|
897
|
+
type2 = words[1]
|
|
898
|
+
index1 = atom_type_list.index(type1)
|
|
899
|
+
index2 = atom_type_list.index(type2)
|
|
900
|
+
epsilon = -float(words[2])*4.184
|
|
901
|
+
R_min_half = float(words[3])/10
|
|
902
|
+
nbfix_table.append([index1, index2, epsilon, R_min_half])
|
|
903
|
+
finally:
|
|
904
|
+
pf.close()
|
|
905
|
+
|
|
906
|
+
#Build acoef, bcoef, ccoef tables
|
|
907
|
+
for index1 in range(num_atom):
|
|
908
|
+
epsilon1 = nb_table[index1][0]
|
|
909
|
+
R_min1 = nb_table[index1][1]
|
|
910
|
+
for index2 in range(num_atom):
|
|
911
|
+
epsilon2 = nb_table[index2][0]
|
|
912
|
+
R_min2 = nb_table[index2][1]
|
|
913
|
+
epsilon = numpy.sqrt(epsilon1 * epsilon2)
|
|
914
|
+
R_min = R_min1 + R_min2
|
|
915
|
+
a = 13 * epsilon * pow(R_min, 12)
|
|
916
|
+
b = -18 * epsilon * pow(R_min, 10)
|
|
917
|
+
c = 4 * epsilon * pow(R_min, 6)
|
|
918
|
+
acoef_array[index1, index2] = a
|
|
919
|
+
bcoef_array[index1, index2] = b
|
|
920
|
+
ccoef_array[index1, index2] = c
|
|
921
|
+
for nbfix_list in nbfix_table:
|
|
922
|
+
index1 = nbfix_list[0]
|
|
923
|
+
index2 = nbfix_list[1]
|
|
924
|
+
epsilon = nbfix_list[2]
|
|
925
|
+
R_min = nbfix_list[3]
|
|
926
|
+
a = 13 * epsilon * pow(R_min, 12)
|
|
927
|
+
b = -18 * epsilon * pow(R_min, 10)
|
|
928
|
+
c = 4 * epsilon * pow(R_min, 6)
|
|
929
|
+
acoef_array[index1, index2] = a
|
|
930
|
+
bcoef_array[index1, index2] = b
|
|
931
|
+
ccoef_array[index1, index2] = c
|
|
932
|
+
acoef_array[index2, index1] = a
|
|
933
|
+
bcoef_array[index2, index1] = b
|
|
934
|
+
ccoef_array[index2, index1] = c
|
|
935
|
+
|
|
936
|
+
#build tabulated function for acoef, bcoef, ccoef
|
|
937
|
+
acoef_node = ET.SubElement(node, "Function", name='acoef', type='Discrete2D',
|
|
938
|
+
xsize=str(num_atom), ysize=str(num_atom))
|
|
939
|
+
text = ''
|
|
940
|
+
for index1 in range(num_atom):
|
|
941
|
+
for index2 in range(num_atom):
|
|
942
|
+
text += str(acoef_array[index1, index2]) + " "
|
|
943
|
+
acoef_node.text = text
|
|
944
|
+
|
|
945
|
+
bcoef_node = ET.SubElement(node, "Function", name='bcoef', type='Discrete2D',
|
|
946
|
+
xsize=str(num_atom), ysize=str(num_atom))
|
|
947
|
+
text = ''
|
|
948
|
+
for index1 in range(num_atom):
|
|
949
|
+
for index2 in range(num_atom):
|
|
950
|
+
text += str(bcoef_array[index1, index2]) + " "
|
|
951
|
+
bcoef_node.text = text
|
|
952
|
+
|
|
953
|
+
ccoef_node = ET.SubElement(node, "Function", name='ccoef', type='Discrete2D',
|
|
954
|
+
xsize=str(num_atom), ysize=str(num_atom))
|
|
955
|
+
text = ''
|
|
956
|
+
for index1 in range(num_atom):
|
|
957
|
+
for index2 in range(num_atom):
|
|
958
|
+
text += str(ccoef_array[index1, index2]) + " "
|
|
959
|
+
ccoef_node.text = text
|
|
960
|
+
|
|
961
|
+
#add custom nonbond parameters
|
|
962
|
+
ET.SubElement(node, 'UseAttributeFromResidue', name='charge')
|
|
963
|
+
for index in range(num_atom):
|
|
964
|
+
name = atom_type_list[index]
|
|
965
|
+
ET.SubElement(node, 'Atom', type=name, index=str(index), ke=str(kc**0.5), ep=str(ep**0.5), ld=str(ld**0.5), kv='1')
|
|
966
|
+
|
|
967
|
+
dom = MD.parseString(ET.tostring(root))
|
|
968
|
+
root = dom.documentElement
|
|
969
|
+
root = root.toprettyxml(indent=' ', newl='\n')
|
|
970
|
+
dom = MD.parseString(root)
|
|
971
|
+
root = dom.documentElement
|
|
972
|
+
bond = root.getElementsByTagName('HarmonicBondForce')
|
|
973
|
+
root.insertBefore(atom_type[0], bond[0])
|
|
974
|
+
if len(residue) > 0:
|
|
975
|
+
root.insertBefore(residue[0], bond[0])
|
|
976
|
+
|
|
977
|
+
xf = open(file_name+'.xml', 'w')
|
|
978
|
+
#dom.writexml(xf, indent='', addindent=' ', newl='\n')
|
|
979
|
+
dom.writexml(xf, indent='')
|
|
980
|
+
###################################################################################################
|
|
981
|
+
|
|
982
|
+
###################################################################################################
|
|
983
|
+
def parse_cg_cacb_prm(self, prmfile:str, topfile:str):
|
|
984
|
+
|
|
985
|
+
|
|
986
|
+
top_file_list = topfile.strip().split()
|
|
987
|
+
|
|
988
|
+
command = 'pmd.charmm.CharmmParameterSet('
|
|
989
|
+
for tf in top_file_list:
|
|
990
|
+
command += '"'+tf + '", '
|
|
991
|
+
command += 'prmfile)'
|
|
992
|
+
|
|
993
|
+
param=eval(command)
|
|
994
|
+
|
|
995
|
+
name = prmfile.split('.prm')
|
|
996
|
+
file_name = name[0]
|
|
997
|
+
|
|
998
|
+
openmm_param=pmd.openmm.parameters.OpenMMParameterSet.from_parameterset(param)
|
|
999
|
+
openmm_param.write(file_name+'_tmp.xml', skip_duplicates=False)
|
|
1000
|
+
dom = MD.parse(file_name+'_tmp.xml')
|
|
1001
|
+
root = dom.documentElement
|
|
1002
|
+
atom_type = root.getElementsByTagName('AtomTypes')
|
|
1003
|
+
residue = root.getElementsByTagName('Residues')
|
|
1004
|
+
os.remove(file_name+'_tmp.xml')
|
|
1005
|
+
|
|
1006
|
+
root = ET.Element("ForceField")
|
|
1007
|
+
|
|
1008
|
+
pf = open(prmfile, 'r')
|
|
1009
|
+
section = None
|
|
1010
|
+
node = None
|
|
1011
|
+
nbxmod = None
|
|
1012
|
+
ep = None
|
|
1013
|
+
kc = 138.935485
|
|
1014
|
+
ld = 1 # 10 Angstrom
|
|
1015
|
+
atom_type_list = [];
|
|
1016
|
+
num_atom = 0
|
|
1017
|
+
dihedral_array = []
|
|
1018
|
+
acoef_array = None
|
|
1019
|
+
bcoef_array = None
|
|
1020
|
+
ccoef_array = None
|
|
1021
|
+
nb_table = []
|
|
1022
|
+
nbfix_table = []
|
|
1023
|
+
try:
|
|
1024
|
+
for line in pf:
|
|
1025
|
+
line = line.strip()
|
|
1026
|
+
if not line:
|
|
1027
|
+
# This is a blank line
|
|
1028
|
+
continue
|
|
1029
|
+
if line.startswith('!'):
|
|
1030
|
+
# This is a comment line
|
|
1031
|
+
continue
|
|
1032
|
+
if line.startswith('ATOM'):
|
|
1033
|
+
section = 'ATOM'
|
|
1034
|
+
#node = ET.SubElement(root, "AtomTypes")
|
|
1035
|
+
continue
|
|
1036
|
+
if line.startswith('BOND'):
|
|
1037
|
+
section = 'BOND'
|
|
1038
|
+
node = ET.SubElement(root, 'HarmonicBondForce')
|
|
1039
|
+
continue
|
|
1040
|
+
if line.startswith('ANGLE'):
|
|
1041
|
+
section = 'ANGLE'
|
|
1042
|
+
node = ET.SubElement(root, 'HarmonicAngleForce')
|
|
1043
|
+
continue
|
|
1044
|
+
if line.startswith('DIHEDRAL'):
|
|
1045
|
+
section = 'DIHEDRAL'
|
|
1046
|
+
node = ET.SubElement(root, 'PeriodicTorsionForce')
|
|
1047
|
+
continue
|
|
1048
|
+
if line.startswith('IMPHI'):
|
|
1049
|
+
section = 'IMPROPER'
|
|
1050
|
+
if len(dihedral_array) != 0:
|
|
1051
|
+
proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
|
|
1052
|
+
type3=dihedral_array[2], type4=dihedral_array[3])
|
|
1053
|
+
n0 = 1
|
|
1054
|
+
for index in range(4, len(dihedral_array), 3):
|
|
1055
|
+
proper_node.set('k'+str(n0), dihedral_array[index])
|
|
1056
|
+
proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
|
|
1057
|
+
proper_node.set('phase'+str(n0), dihedral_array[index+2])
|
|
1058
|
+
n0 += 1
|
|
1059
|
+
node = ET.SubElement(root, 'CustomTorsionForce',
|
|
1060
|
+
energy='k*min(dtheta, 2*pi-dtheta)^2; dtheta = abs(theta-theta0); pi = 3.1415926535')
|
|
1061
|
+
ET.SubElement(node, 'PerTorsionParameter', name='k')
|
|
1062
|
+
ET.SubElement(node, 'PerTorsionParameter', name='theta0')
|
|
1063
|
+
continue
|
|
1064
|
+
if line.startswith('NONBONDED'):
|
|
1065
|
+
section = 'NONBONDED'
|
|
1066
|
+
words = line.split()
|
|
1067
|
+
nbxmod = int(words[2])
|
|
1068
|
+
continue
|
|
1069
|
+
if line.startswith('CUTNB'):
|
|
1070
|
+
words = line.split()
|
|
1071
|
+
ep = float(words[7])
|
|
1072
|
+
node = ET.SubElement(root, 'CustomNonbondedForce',
|
|
1073
|
+
energy='ke*charge1*charge2/ep/r*exp(-r/ld)+kv*(a/r^12+b/r^6); '+
|
|
1074
|
+
'ke=ke1*ke2; ep=ep1*ep2; ld=ld1*ld2; kv=kv1*kv2; '+
|
|
1075
|
+
'a=acoef(index1, index2); b=bcoef(index1, index2)',
|
|
1076
|
+
bondCutoff=str(nbxmod-1))
|
|
1077
|
+
ET.SubElement(node, 'PerParticleParameter', name='ke')
|
|
1078
|
+
ET.SubElement(node, 'PerParticleParameter', name='kv')
|
|
1079
|
+
ET.SubElement(node, 'PerParticleParameter', name='ep')
|
|
1080
|
+
ET.SubElement(node, 'PerParticleParameter', name='ld')
|
|
1081
|
+
ET.SubElement(node, 'PerParticleParameter', name='charge')
|
|
1082
|
+
ET.SubElement(node, 'PerParticleParameter', name='index')
|
|
1083
|
+
acoef_array = numpy.zeros((num_atom, num_atom))
|
|
1084
|
+
bcoef_array = numpy.zeros((num_atom, num_atom))
|
|
1085
|
+
nb_table = [[] for i in atom_type_list]
|
|
1086
|
+
continue
|
|
1087
|
+
if line.startswith('NBFIX'):
|
|
1088
|
+
section = 'NBFIX'
|
|
1089
|
+
continue
|
|
1090
|
+
# It seems like files? sections? can be terminated with 'END'
|
|
1091
|
+
if line.startswith('END'): # should this be case-insensitive?
|
|
1092
|
+
section = None
|
|
1093
|
+
continue
|
|
1094
|
+
# If we have no section, skip
|
|
1095
|
+
if section is None: continue
|
|
1096
|
+
# Now handle each section specifically
|
|
1097
|
+
if section == 'ATOM':
|
|
1098
|
+
words = line.split()
|
|
1099
|
+
idx = int(words[1])
|
|
1100
|
+
name = words[2]
|
|
1101
|
+
mass = float(words[3])
|
|
1102
|
+
#atom_node = ET.SubElement(node, 'Type', name=name, element='C', mass=str(mass))
|
|
1103
|
+
#atom_node.set('class', name)
|
|
1104
|
+
num_atom += 1
|
|
1105
|
+
atom_type_list.append(name)
|
|
1106
|
+
if section == 'BOND':
|
|
1107
|
+
words = line.split()
|
|
1108
|
+
ET.SubElement(node, 'Bond', type1=words[0], type2=words[1], length=str(float(words[3])/10), k=str(float(words[2])*4.184*100*2))
|
|
1109
|
+
if section == 'ANGLE':
|
|
1110
|
+
words = line.split()
|
|
1111
|
+
ET.SubElement(node, 'Angle', type1=words[0], type2=words[1], type3= words[2],
|
|
1112
|
+
k=str(float(words[3])*4.184*2), angle=str(float(words[4])/180*math.pi))
|
|
1113
|
+
if section == 'DIHEDRAL':
|
|
1114
|
+
words = line.split()
|
|
1115
|
+
type1 = words[0]
|
|
1116
|
+
type2 = words[1]
|
|
1117
|
+
type3 = words[2]
|
|
1118
|
+
type4 = words[3]
|
|
1119
|
+
k = str(float(words[4])*4.184)
|
|
1120
|
+
n = words[5]
|
|
1121
|
+
phase = str(float(words[6])/180*math.pi)
|
|
1122
|
+
if len(dihedral_array) == 0:
|
|
1123
|
+
dihedral_array.append(type1)
|
|
1124
|
+
dihedral_array.append(type2)
|
|
1125
|
+
dihedral_array.append(type3)
|
|
1126
|
+
dihedral_array.append(type4)
|
|
1127
|
+
dihedral_array.append(k)
|
|
1128
|
+
dihedral_array.append(n)
|
|
1129
|
+
dihedral_array.append(phase)
|
|
1130
|
+
elif (type1 == dihedral_array[0] and type2 == dihedral_array[1] and
|
|
1131
|
+
type3 == dihedral_array[2] and type4 == dihedral_array[3]):
|
|
1132
|
+
dihedral_array.append(k)
|
|
1133
|
+
dihedral_array.append(n)
|
|
1134
|
+
dihedral_array.append(phase)
|
|
1135
|
+
else:
|
|
1136
|
+
proper_node = ET.SubElement(node, 'Proper', type1=dihedral_array[0], type2=dihedral_array[1],
|
|
1137
|
+
type3=dihedral_array[2], type4=dihedral_array[3])
|
|
1138
|
+
n0 = 1
|
|
1139
|
+
for index in range(4, len(dihedral_array), 3):
|
|
1140
|
+
proper_node.set('k'+str(n0), dihedral_array[index])
|
|
1141
|
+
proper_node.set('periodicity'+str(n0), dihedral_array[index+1])
|
|
1142
|
+
proper_node.set('phase'+str(n0), dihedral_array[index+2])
|
|
1143
|
+
n0 += 1
|
|
1144
|
+
dihedral_array = []
|
|
1145
|
+
dihedral_array.append(type1)
|
|
1146
|
+
dihedral_array.append(type2)
|
|
1147
|
+
dihedral_array.append(type3)
|
|
1148
|
+
dihedral_array.append(type4)
|
|
1149
|
+
dihedral_array.append(k)
|
|
1150
|
+
dihedral_array.append(n)
|
|
1151
|
+
dihedral_array.append(phase)
|
|
1152
|
+
if section == 'IMPROPER':
|
|
1153
|
+
words = line.split()
|
|
1154
|
+
type1 = words[0]
|
|
1155
|
+
type2 = words[1]
|
|
1156
|
+
type3 = words[2]
|
|
1157
|
+
type4 = words[3]
|
|
1158
|
+
k = str(float(words[4])*4.184)
|
|
1159
|
+
phase = str((float(words[6])-180)/180*math.pi)
|
|
1160
|
+
improper_node = ET.SubElement(node, 'Improper', type1=type1, type2=type2,
|
|
1161
|
+
type3=type3, type4=type4, k=k,theta0=phase)
|
|
1162
|
+
continue
|
|
1163
|
+
if section == 'NONBONDED':
|
|
1164
|
+
words = line.split()
|
|
1165
|
+
name = words[0]
|
|
1166
|
+
epsilon = -float(words[2])*4.184
|
|
1167
|
+
R_min_half = float(words[3])/10
|
|
1168
|
+
index = atom_type_list.index(name)
|
|
1169
|
+
nb_table[index] = [epsilon, R_min_half]
|
|
1170
|
+
if section == 'NBFIX':
|
|
1171
|
+
words = line.split()
|
|
1172
|
+
type1 = words[0]
|
|
1173
|
+
type2 = words[1]
|
|
1174
|
+
index1 = atom_type_list.index(type1)
|
|
1175
|
+
index2 = atom_type_list.index(type2)
|
|
1176
|
+
epsilon = -float(words[2])*4.184
|
|
1177
|
+
R_min_half = float(words[3])/10
|
|
1178
|
+
nbfix_table.append([index1, index2, epsilon, R_min_half])
|
|
1179
|
+
finally:
|
|
1180
|
+
pf.close()
|
|
1181
|
+
|
|
1182
|
+
#Build acoef, bcoef, ccoef tables
|
|
1183
|
+
for index1 in range(num_atom):
|
|
1184
|
+
epsilon1 = nb_table[index1][0]
|
|
1185
|
+
R_min1 = nb_table[index1][1]
|
|
1186
|
+
for index2 in range(num_atom):
|
|
1187
|
+
epsilon2 = nb_table[index2][0]
|
|
1188
|
+
R_min2 = nb_table[index2][1]
|
|
1189
|
+
epsilon = numpy.sqrt(epsilon1 * epsilon2)
|
|
1190
|
+
R_min = R_min1 + R_min2
|
|
1191
|
+
a = epsilon * pow(R_min, 12)
|
|
1192
|
+
b = -2 * epsilon * pow(R_min, 6)
|
|
1193
|
+
acoef_array[index1, index2] = a
|
|
1194
|
+
bcoef_array[index1, index2] = b
|
|
1195
|
+
for nbfix_list in nbfix_table:
|
|
1196
|
+
index1 = nbfix_list[0]
|
|
1197
|
+
index2 = nbfix_list[1]
|
|
1198
|
+
epsilon = nbfix_list[2]
|
|
1199
|
+
R_min = nbfix_list[3]
|
|
1200
|
+
a = epsilon * pow(R_min, 12)
|
|
1201
|
+
b = -2 * epsilon * pow(R_min, 6)
|
|
1202
|
+
acoef_array[index1, index2] = a
|
|
1203
|
+
bcoef_array[index1, index2] = b
|
|
1204
|
+
acoef_array[index2, index1] = a
|
|
1205
|
+
bcoef_array[index2, index1] = b
|
|
1206
|
+
|
|
1207
|
+
#build tabulated function for acoef, bcoef, ccoef
|
|
1208
|
+
acoef_node = ET.SubElement(node, "Function", name='acoef', type='Discrete2D',
|
|
1209
|
+
xsize=str(num_atom), ysize=str(num_atom))
|
|
1210
|
+
text = ''
|
|
1211
|
+
for index1 in range(num_atom):
|
|
1212
|
+
for index2 in range(num_atom):
|
|
1213
|
+
text += str(acoef_array[index1, index2]) + " "
|
|
1214
|
+
acoef_node.text = text
|
|
1215
|
+
|
|
1216
|
+
bcoef_node = ET.SubElement(node, "Function", name='bcoef', type='Discrete2D',
|
|
1217
|
+
xsize=str(num_atom), ysize=str(num_atom))
|
|
1218
|
+
text = ''
|
|
1219
|
+
for index1 in range(num_atom):
|
|
1220
|
+
for index2 in range(num_atom):
|
|
1221
|
+
text += str(bcoef_array[index1, index2]) + " "
|
|
1222
|
+
bcoef_node.text = text
|
|
1223
|
+
|
|
1224
|
+
#add custom nonbond parameters
|
|
1225
|
+
ET.SubElement(node, 'UseAttributeFromResidue', name='charge')
|
|
1226
|
+
for index in range(num_atom):
|
|
1227
|
+
name = atom_type_list[index]
|
|
1228
|
+
ET.SubElement(node, 'Atom', type=name, index=str(index), ke=str(kc**0.5), ep=str(ep**0.5), ld=str(ld**0.5), kv='1')
|
|
1229
|
+
|
|
1230
|
+
dom = MD.parseString(ET.tostring(root))
|
|
1231
|
+
root = dom.documentElement
|
|
1232
|
+
root = root.toprettyxml(indent=' ', newl='\n')
|
|
1233
|
+
dom = MD.parseString(root)
|
|
1234
|
+
root = dom.documentElement
|
|
1235
|
+
bond = root.getElementsByTagName('HarmonicBondForce')
|
|
1236
|
+
root.insertBefore(atom_type[0], bond[0])
|
|
1237
|
+
if len(residue) > 0:
|
|
1238
|
+
root.insertBefore(residue[0], bond[0])
|
|
1239
|
+
|
|
1240
|
+
xf = open(file_name+'.xml', 'w')
|
|
1241
|
+
#dom.writexml(xf, indent='', addindent=' ', newl='\n')
|
|
1242
|
+
dom.writexml(xf, indent='')
|
|
1243
|
+
###################################################################################################
|
|
1244
|
+
|
|
1245
|
+
###################################################################################################
|
|
1246
|
+
def run(self,):
|
|
1247
|
+
|
|
1248
|
+
resname_prefix = 'G'
|
|
1249
|
+
atomname_prefix = ''
|
|
1250
|
+
|
|
1251
|
+
# Read PDB file
|
|
1252
|
+
cg_structure = pmd.Structure()
|
|
1253
|
+
self.logger.info("Reading in PDB file %s"%self.pdbfile)
|
|
1254
|
+
|
|
1255
|
+
struct = pmd.load_file(self.pdbfile)
|
|
1256
|
+
sel_idx = np.zeros(len(struct.atoms))
|
|
1257
|
+
for idx, res in enumerate(struct.residues):
|
|
1258
|
+
res.number = idx+1
|
|
1259
|
+
if res.name in self.aa:
|
|
1260
|
+
for atm in res.atoms:
|
|
1261
|
+
if atm.element != 1:
|
|
1262
|
+
sel_idx[atm.idx] = 1
|
|
1263
|
+
heavy_protein = struct[sel_idx]
|
|
1264
|
+
|
|
1265
|
+
for idx, res in enumerate(heavy_protein.residues):
|
|
1266
|
+
num_backbone = 0
|
|
1267
|
+
num_sidechain = 0
|
|
1268
|
+
for atm in res.atoms:
|
|
1269
|
+
if atm.name in ['C', 'N', 'O', 'CA']:
|
|
1270
|
+
num_backbone += 1
|
|
1271
|
+
elif atm.name != 'OXT':
|
|
1272
|
+
num_sidechain += 1
|
|
1273
|
+
if num_backbone != 4:
|
|
1274
|
+
self.logger.error("ERROR: In pdb the number of backbone atoms in residue %d is incorrect: %d != 4"%(idx+1, num_backbone))
|
|
1275
|
+
sys.exit()
|
|
1276
|
+
if num_sidechain != self.refNscat[res.name]:
|
|
1277
|
+
self.logger.error("ERROR: In pdb the number of sidechain atoms in residue %d is incorrect: %d != %d"%(idx+1, num_sidechain, self.refNscat[res.name]))
|
|
1278
|
+
sys.exit()
|
|
1279
|
+
|
|
1280
|
+
idx_atm = 0
|
|
1281
|
+
ca_list = []
|
|
1282
|
+
chain_id_list = []
|
|
1283
|
+
for res in heavy_protein.residues:
|
|
1284
|
+
if not res.chain in chain_id_list:
|
|
1285
|
+
chain_id_list.append(res.chain)
|
|
1286
|
+
if len(chain_id_list) > len(self.alphabet):
|
|
1287
|
+
self.logger.error('ERROR: The number of chains in pdb file (%d) exceeds the maximum (%d)'%(len(chain_id_list), len(self.alphabet)))
|
|
1288
|
+
sys.exit()
|
|
1289
|
+
resid = 0
|
|
1290
|
+
chainid = chain_id_list[0]
|
|
1291
|
+
for idx, res in enumerate(heavy_protein.residues):
|
|
1292
|
+
if res.segid == '':
|
|
1293
|
+
segid = self.alphabet[chain_id_list.index(res.chain)]
|
|
1294
|
+
else:
|
|
1295
|
+
segid = res.segid
|
|
1296
|
+
|
|
1297
|
+
if res.chain != chainid:
|
|
1298
|
+
chainid = res.chain
|
|
1299
|
+
resid = 1
|
|
1300
|
+
else:
|
|
1301
|
+
resid += 1
|
|
1302
|
+
|
|
1303
|
+
SC_Mass = self.aaSCmass[res.name] - self.aaSCmass['GLY']
|
|
1304
|
+
CA_Mass = self.aaSCmass['GLY']
|
|
1305
|
+
SC_COM = np.zeros(3)
|
|
1306
|
+
CA_COM = np.zeros(3)
|
|
1307
|
+
sum_SC_Mass = 0
|
|
1308
|
+
|
|
1309
|
+
for atm in res.atoms:
|
|
1310
|
+
if atm.name not in ['C', 'N', 'O', 'CA', 'OXT']:
|
|
1311
|
+
sum_SC_Mass += atm.mass
|
|
1312
|
+
SC_COM += atm.mass * np.array([atm.xx, atm.xy, atm.xz])
|
|
1313
|
+
elif atm.name == 'CA':
|
|
1314
|
+
CA_COM[0] = atm.xx
|
|
1315
|
+
CA_COM[1] = atm.xy
|
|
1316
|
+
CA_COM[2] = atm.xz
|
|
1317
|
+
if sum_SC_Mass == 0:
|
|
1318
|
+
is_gly = True
|
|
1319
|
+
else:
|
|
1320
|
+
is_gly = False
|
|
1321
|
+
SC_COM /= sum_SC_Mass
|
|
1322
|
+
|
|
1323
|
+
if self.casm == 0:
|
|
1324
|
+
cg_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.ca_prefix,
|
|
1325
|
+
type=self.ca_prefix+str(idx+1), charge=self.refcharge[res.name],
|
|
1326
|
+
mass=self.aaSCmass[res.name], number=idx_atm+1)
|
|
1327
|
+
cg_atm.xx = CA_COM[0]
|
|
1328
|
+
cg_atm.xy = CA_COM[1]
|
|
1329
|
+
cg_atm.xz = CA_COM[2]
|
|
1330
|
+
cg_structure.add_atom(cg_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
|
|
1331
|
+
idx_atm += 1
|
|
1332
|
+
ca_list.append(cg_atm)
|
|
1333
|
+
else:
|
|
1334
|
+
ca_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.ca_prefix,
|
|
1335
|
+
type=self.ca_prefix+str(idx+1), charge=0.0,
|
|
1336
|
+
mass=CA_Mass, number=idx_atm+1)
|
|
1337
|
+
ca_atm.xx = CA_COM[0]
|
|
1338
|
+
ca_atm.xy = CA_COM[1]
|
|
1339
|
+
ca_atm.xz = CA_COM[2]
|
|
1340
|
+
cg_structure.add_atom(ca_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
|
|
1341
|
+
idx_atm += 1
|
|
1342
|
+
ca_list.append(ca_atm)
|
|
1343
|
+
|
|
1344
|
+
if not is_gly:
|
|
1345
|
+
sc_atm = pmd.topologyobjects.Atom(name=atomname_prefix+self.sc_prefix,
|
|
1346
|
+
type=self.sc_prefix+str(idx+1), charge=self.refcharge[res.name],
|
|
1347
|
+
mass=SC_Mass, number=idx_atm+1)
|
|
1348
|
+
sc_atm.xx = SC_COM[0]
|
|
1349
|
+
sc_atm.xy = SC_COM[1]
|
|
1350
|
+
sc_atm.xz = SC_COM[2]
|
|
1351
|
+
cg_structure.add_atom(sc_atm, resname_prefix+str(idx+1), resid, segid=segid, chain=res.chain)
|
|
1352
|
+
idx_atm += 1
|
|
1353
|
+
|
|
1354
|
+
# Assign domain id to atom
|
|
1355
|
+
if self.ndomain != 0:
|
|
1356
|
+
self.logger.debug('Assign domain id to each atom')
|
|
1357
|
+
id_domain = []
|
|
1358
|
+
for atm in cg_structure.atoms:
|
|
1359
|
+
res_id = atm.residue.idx+1
|
|
1360
|
+
found = False
|
|
1361
|
+
for i, di in enumerate(self.dom):
|
|
1362
|
+
if res_id >= di[0] and res_id <= di[1]:
|
|
1363
|
+
id_domain.append(i)
|
|
1364
|
+
found = True
|
|
1365
|
+
break
|
|
1366
|
+
if not found:
|
|
1367
|
+
self.logger.error('ERROR: %s is not located in any domain.'%atm)
|
|
1368
|
+
sys.exit()
|
|
1369
|
+
self.logger.debug('')
|
|
1370
|
+
|
|
1371
|
+
# Write psf, cor and top
|
|
1372
|
+
output_prefix = self.pdbfile.strip().split('/')[-1].split('.pdb')[0]
|
|
1373
|
+
if self.casm == 1:
|
|
1374
|
+
output_prefix += '_ca-cb'
|
|
1375
|
+
else:
|
|
1376
|
+
output_prefix += '_ca'
|
|
1377
|
+
self.logger.info('Create psf')
|
|
1378
|
+
psffile = self.create_psf(cg_structure, ca_list, output_prefix)
|
|
1379
|
+
|
|
1380
|
+
self.logger.debug('Create cor')
|
|
1381
|
+
corfile = os.path.join(self.outdir, output_prefix+'.cor')
|
|
1382
|
+
self.logger.info(f'Writing {corfile}')
|
|
1383
|
+
cg_structure.save(corfile, overwrite=True, format='charmmcrd')
|
|
1384
|
+
|
|
1385
|
+
self.logger.debug('Create top')
|
|
1386
|
+
topfile = self.Create_rtf(cg_structure, output_prefix)
|
|
1387
|
+
|
|
1388
|
+
# Prepare FF parameters
|
|
1389
|
+
self.logger.info("Determining native contacts")
|
|
1390
|
+
dist_map = np.zeros((len(cg_structure.atoms), len(cg_structure.atoms)))
|
|
1391
|
+
for idx_1, atm_1 in enumerate(cg_structure.atoms):
|
|
1392
|
+
for idx_2, atm_2 in enumerate(cg_structure.atoms):
|
|
1393
|
+
dist_map[idx_1, idx_2] = self.calc_distance(atm_1, atm_2)
|
|
1394
|
+
self.logger.info("Finished calculating distance matrix")
|
|
1395
|
+
|
|
1396
|
+
## Compute native contacts between side-chains
|
|
1397
|
+
self.logger.info("Determining side-chains - side-chains contacts")
|
|
1398
|
+
native_ss_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
|
|
1399
|
+
for i in range(len(cg_structure.residues)-3):
|
|
1400
|
+
res_1 = heavy_protein.residues[i]
|
|
1401
|
+
for j in range(i+3, len(cg_structure.residues)): # separate by 2 residues
|
|
1402
|
+
res_2 = heavy_protein.residues[j]
|
|
1403
|
+
found = False
|
|
1404
|
+
for atm_1 in res_1.atoms:
|
|
1405
|
+
for atm_2 in res_2.atoms:
|
|
1406
|
+
if not atm_1.name in ['C', 'N', 'O', 'CA', 'OXT'] and not atm_2.name in ['C', 'N', 'O', 'CA', 'OXT']:
|
|
1407
|
+
dij = self.calc_distance(atm_1, atm_2)
|
|
1408
|
+
if dij <= self.heav_cut:
|
|
1409
|
+
native_ss_map[i,j] = 1
|
|
1410
|
+
native_ss_map[j,i] = 1
|
|
1411
|
+
found = True
|
|
1412
|
+
break
|
|
1413
|
+
if found:
|
|
1414
|
+
break
|
|
1415
|
+
## Compute native contacts between backbone and side-chains
|
|
1416
|
+
self.logger.info("Determining backbone - side-chains contacts")
|
|
1417
|
+
native_bsc_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
|
|
1418
|
+
for i in range(len(cg_structure.residues)):
|
|
1419
|
+
res_1 = heavy_protein.residues[i]
|
|
1420
|
+
for j in range(len(cg_structure.residues)):
|
|
1421
|
+
res_2 = heavy_protein.residues[j]
|
|
1422
|
+
if i < j-2 or i > j+2: # separate by 2 residues
|
|
1423
|
+
found = False
|
|
1424
|
+
for atm_1 in res_1.atoms:
|
|
1425
|
+
for atm_2 in res_2.atoms:
|
|
1426
|
+
if atm_1.name in ['C', 'N', 'O', 'CA', 'OXT'] and atm_2.name not in ['C', 'N', 'O', 'CA', 'OXT']:
|
|
1427
|
+
dij = self.calc_distance(atm_1, atm_2)
|
|
1428
|
+
if dij <= self.heav_cut:
|
|
1429
|
+
native_bsc_map[i,j] = 1
|
|
1430
|
+
found = True
|
|
1431
|
+
break
|
|
1432
|
+
if found:
|
|
1433
|
+
break
|
|
1434
|
+
self.logger.info('# nat sc-sc contacts %d, # nat bb-sc contacts %d, and # non-nat sc-sc %d' % (np.sum(native_ss_map)/2,
|
|
1435
|
+
np.sum(native_bsc_map), (len(cg_structure.residues)-3)*(len(cg_structure.residues)-2)/2 - np.sum(native_ss_map)/2))
|
|
1436
|
+
|
|
1437
|
+
## Determine hydrogen bonds that are present using STRIDE,
|
|
1438
|
+
## and assign to Calpha-Calpha pairs. Also secondary structural elements
|
|
1439
|
+
## within the native structure.
|
|
1440
|
+
self.logger.info("Determining the presence of hydrogen bonds using STRIDE")
|
|
1441
|
+
native_hb_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
|
|
1442
|
+
helical_list = np.zeros(len(cg_structure.residues))
|
|
1443
|
+
hb_ene_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
|
|
1444
|
+
#screen_out = os.popen(f'stride -h %s'%self.pdbfile).readlines()
|
|
1445
|
+
stride_cmd = f'{self.stride_path} -h {self.pdbfile}'
|
|
1446
|
+
#print(stride_cmd)
|
|
1447
|
+
screen_out = subprocess.run(stride_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
1448
|
+
screen_out = screen_out.stdout.decode('utf-8', errors='replace').splitlines()
|
|
1449
|
+
#screen_out = os.popen(stride_cmd).read().splitlines()
|
|
1450
|
+
#print(screen_out)
|
|
1451
|
+
|
|
1452
|
+
for line in screen_out:
|
|
1453
|
+
line = line.strip()
|
|
1454
|
+
resid = 0
|
|
1455
|
+
if line.startswith('ASD '):
|
|
1456
|
+
if 'Helix' in line.split()[6]:
|
|
1457
|
+
helical_list[resid] = 1
|
|
1458
|
+
resid += 1
|
|
1459
|
+
if line.startswith('ACC ') or line.startswith('DNR '):
|
|
1460
|
+
# Get H-bonding info
|
|
1461
|
+
resid_1 = int(line[16:20])+1
|
|
1462
|
+
resid_2 = int(line[36:40])+1
|
|
1463
|
+
chainid_1 = line[8:10].strip()
|
|
1464
|
+
if chainid_1 == '-':
|
|
1465
|
+
chainid_1 = ''
|
|
1466
|
+
chainid_2 = line[28:30].strip()
|
|
1467
|
+
if chainid_2 == '-':
|
|
1468
|
+
chainid_2 = ''
|
|
1469
|
+
found = [0, 0]
|
|
1470
|
+
for idx, res in enumerate(cg_structure.residues):
|
|
1471
|
+
if res.number == resid_1 and res.chain == chainid_1:
|
|
1472
|
+
idx_1 = idx
|
|
1473
|
+
found[0] = 1
|
|
1474
|
+
elif res.number == resid_2 and res.chain == chainid_2:
|
|
1475
|
+
idx_2 = idx
|
|
1476
|
+
found[1] = 1
|
|
1477
|
+
if sum(found) == 2:
|
|
1478
|
+
break
|
|
1479
|
+
if sum(found) != 2:
|
|
1480
|
+
self.logger.error("ERROR: Cannot find residue in parmed structure according to the Hbond info.\n %s"%line)
|
|
1481
|
+
sys.exit()
|
|
1482
|
+
if chainid_1 == chainid_2:
|
|
1483
|
+
if idx_1 < idx_2:
|
|
1484
|
+
if native_hb_map[idx_1, idx_2] == 1:
|
|
1485
|
+
if helical_list[idx_1] == 1 and helical_list[idx_2] == 1:
|
|
1486
|
+
hb_ene_map[idx_1, idx_2] = 2*self.single_hbond_ene_helix
|
|
1487
|
+
hb_ene_map[idx_2, idx_1] = 2*self.single_hbond_ene_helix
|
|
1488
|
+
else:
|
|
1489
|
+
hb_ene_map[idx_1, idx_2] = 2*self.single_hbond_ene
|
|
1490
|
+
hb_ene_map[idx_2, idx_1] = 2*self.single_hbond_ene
|
|
1491
|
+
else:
|
|
1492
|
+
native_hb_map[idx_1, idx_2] = 1
|
|
1493
|
+
native_hb_map[idx_2, idx_1] = 1
|
|
1494
|
+
if helical_list[idx_1] == 1 and helical_list[idx_2] == 1:
|
|
1495
|
+
hb_ene_map[idx_1, idx_2] = self.single_hbond_ene_helix
|
|
1496
|
+
hb_ene_map[idx_2, idx_1] = self.single_hbond_ene_helix
|
|
1497
|
+
else:
|
|
1498
|
+
hb_ene_map[idx_1, idx_2] = self.single_hbond_ene
|
|
1499
|
+
hb_ene_map[idx_2, idx_1] = self.single_hbond_ene
|
|
1500
|
+
else:
|
|
1501
|
+
native_hb_map[idx_1, idx_2] = 1
|
|
1502
|
+
native_hb_map[idx_2, idx_1] = 1
|
|
1503
|
+
hb_ene_map[idx_1, idx_2] = self.single_hbond_ene
|
|
1504
|
+
hb_ene_map[idx_2, idx_1] = self.single_hbond_ene
|
|
1505
|
+
num_hb = 0
|
|
1506
|
+
for i in range(len(cg_structure.residues)-1):
|
|
1507
|
+
for j in range(i+1, len(cg_structure.residues)):
|
|
1508
|
+
if native_hb_map[i,j] == 1:
|
|
1509
|
+
num_hb += 1
|
|
1510
|
+
#print('%d %.4f, %d %d'%(num_hb, hb_ene_map[i,j], i+1, j+1))
|
|
1511
|
+
self.logger.info('# of unique Hbonds %d'%num_hb)
|
|
1512
|
+
native_contact_map = np.zeros((len(cg_structure.residues), len(cg_structure.residues)))
|
|
1513
|
+
for i in range(len(cg_structure.residues)):
|
|
1514
|
+
for j in range(len(cg_structure.residues)):
|
|
1515
|
+
if native_ss_map[i,j] == 1 or native_bsc_map[i,j] == 1 or native_hb_map[i,j] == 1:
|
|
1516
|
+
native_contact_map[i,j] == 1
|
|
1517
|
+
|
|
1518
|
+
## Write prm file ##
|
|
1519
|
+
self.logger.debug('Create prm')
|
|
1520
|
+
prmfile = self.pdbfile.strip().split('/')[-1].split('.pdb')[0] + '_nscal' + str(self.nscal) + '_fnn' + str(self.fnn) + '_go_' + self.potential_name.lower() + '.prm'
|
|
1521
|
+
prmfile = os.path.join(self.outdir, prmfile)
|
|
1522
|
+
self.logger.info(f'Writing {prmfile}')
|
|
1523
|
+
|
|
1524
|
+
f = open(prmfile, 'w')
|
|
1525
|
+
f.write('* This CHARMM .param file describes a Go model of %s\n'%(self.pdbfile.split('/')[-1]))
|
|
1526
|
+
f.write('*\n\n')
|
|
1527
|
+
# Atomic mass
|
|
1528
|
+
f.write('ATOM\n')
|
|
1529
|
+
for idx, atm in enumerate(cg_structure.atoms):
|
|
1530
|
+
f.write('MASS %-5s %-8s %-10.6f\n'%(str(idx+1), atm.type, atm.mass))
|
|
1531
|
+
f.write('\n')
|
|
1532
|
+
# Bond section (non-go bondlength for both models)
|
|
1533
|
+
f.write('BOND\n')
|
|
1534
|
+
kb = 50.0
|
|
1535
|
+
for idx, bond in enumerate(cg_structure.bonds):
|
|
1536
|
+
if self.bondlength_go == 0:
|
|
1537
|
+
if bond.atom2.name == (atomname_prefix+self.sc_prefix):
|
|
1538
|
+
res_idx = bond.atom2.residue.idx
|
|
1539
|
+
bond_length = self.lbs_nongo[heavy_protein.residues[res_idx].name]
|
|
1540
|
+
f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, bond_length))
|
|
1541
|
+
else:
|
|
1542
|
+
f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, 3.81))
|
|
1543
|
+
else:
|
|
1544
|
+
f.write('%-8s%-10s%-12.6f%-9.6f\n'%(bond.atom1.type, bond.atom2.type, kb, bond.measure()))
|
|
1545
|
+
f.write('\n')
|
|
1546
|
+
# Angle section
|
|
1547
|
+
f.write('ANGLE\n')
|
|
1548
|
+
ka = 30.0
|
|
1549
|
+
for idx, angle in enumerate(cg_structure.angles):
|
|
1550
|
+
if self.angle_dw == 0:
|
|
1551
|
+
f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
|
|
1552
|
+
ka, angle.measure()))
|
|
1553
|
+
else:
|
|
1554
|
+
if angle.atom1 == (atomname_prefix+self.sc_prefix):
|
|
1555
|
+
res_idx = angle.atom1.residue.idx
|
|
1556
|
+
angle_value = self.ang_sb_nongo[heavy_protein.residues[res_idx].name]
|
|
1557
|
+
f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
|
|
1558
|
+
ka, angle_value))
|
|
1559
|
+
elif angle.atom3 == (atomname_prefix+self.sc_prefix):
|
|
1560
|
+
res_idx = angle.atom3.residue.idx
|
|
1561
|
+
angle_value = self.ang_bs_nongo[heavy_protein.residues[res_idx].name]
|
|
1562
|
+
f.write('%-8s%-8s%-10s%11.6f%11.6f\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type,
|
|
1563
|
+
ka, angle_value))
|
|
1564
|
+
else:
|
|
1565
|
+
f.write('%-8s%-8s%-10s 106.4 91.7 26.3 130.0 0.1 4.3\n'%(angle.atom1.type, angle.atom2.type, angle.atom3.type))
|
|
1566
|
+
f.write('\n')
|
|
1567
|
+
# Dihedral section
|
|
1568
|
+
f.write('DIHEDRAL\n')
|
|
1569
|
+
f.write('! backbone dihedrals\n')
|
|
1570
|
+
for idx, dihedral in enumerate(cg_structure.dihedrals):
|
|
1571
|
+
if self.dihedral_go == 1: # Use Go backbone dihedral angles
|
|
1572
|
+
delta = 1*dihedral.measure()-180
|
|
1573
|
+
if self.casm == 1:
|
|
1574
|
+
if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
|
|
1575
|
+
kd = 0.30
|
|
1576
|
+
else: # not helical
|
|
1577
|
+
kd = 0.55
|
|
1578
|
+
f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
|
|
1579
|
+
dihedral.atom4.type, kd, 1, delta))
|
|
1580
|
+
delta = 3*dihedral.measure()-180
|
|
1581
|
+
if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
|
|
1582
|
+
kd = 0.15
|
|
1583
|
+
else: # not helical
|
|
1584
|
+
kd = 0.275
|
|
1585
|
+
f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
|
|
1586
|
+
dihedral.atom4.type, kd, 3, delta))
|
|
1587
|
+
else:
|
|
1588
|
+
if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
|
|
1589
|
+
kd = 0.75
|
|
1590
|
+
else: # not helical
|
|
1591
|
+
kd = 0.75
|
|
1592
|
+
f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
|
|
1593
|
+
dihedral.atom4.type, kd, 1, delta))
|
|
1594
|
+
delta = 3*dihedral.measure()-180
|
|
1595
|
+
if helical_list[dihedral.atom2.residue.idx] == 1 and helical_list[dihedral.atom3.residue.idx] == 1: # helical
|
|
1596
|
+
kd = 0.275
|
|
1597
|
+
else: # not helical
|
|
1598
|
+
kd = 0.275
|
|
1599
|
+
f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
|
|
1600
|
+
dihedral.atom4.type, kd, 3, delta))
|
|
1601
|
+
else: # Use Non-go dihedrals
|
|
1602
|
+
for i in range(4):
|
|
1603
|
+
res_idx_1 = dihedral.atom2.residue.idx
|
|
1604
|
+
res_idx_2 = dihedral.atom3.residue.idx
|
|
1605
|
+
[kd, period, delta] = self.dihedb_nongo[self.res2n[heavy_protein.residues[res_idx_1].name]][self.res2n[heavy_protein.residues[res_idx_2].name]][i]
|
|
1606
|
+
f.write('%-5s %-5s %-5s %-7s%-10.6f%-3d%-10.5f\n'%(dihedral.atom1.type, dihedral.atom2.type, dihedral.atom3.type,
|
|
1607
|
+
dihedral.atom4.type, kd, period, delta))
|
|
1608
|
+
f.write('\n')
|
|
1609
|
+
# Improper dihedral section
|
|
1610
|
+
f.write('IMPHI\n')
|
|
1611
|
+
f.write('! sidechain improper dihedrals to maintain chirality\n')
|
|
1612
|
+
if self.casm == 1:
|
|
1613
|
+
for idx, improper in enumerate(cg_structure.impropers):
|
|
1614
|
+
if self.improperdihed_go == 1:
|
|
1615
|
+
angle = improper.measure()
|
|
1616
|
+
else:
|
|
1617
|
+
res_idx = improper.atom1.residue.idx
|
|
1618
|
+
angle = self.improper_nongo[heavy_protein.residues[res_idx].name] # use transferable improper dihedral
|
|
1619
|
+
delta = angle + 180
|
|
1620
|
+
kd = 20*abs(self.avg_mj)
|
|
1621
|
+
f.write('%-5s %-5s %-5s %-7s%.6f %-3d%-10.5f\n'%(improper.atom1.type, improper.atom2.type, improper.atom3.type,
|
|
1622
|
+
improper.atom4.type, kd, 1, delta))
|
|
1623
|
+
f.write('\n')
|
|
1624
|
+
|
|
1625
|
+
## nonbonded section
|
|
1626
|
+
f.write('NONBONDED NBXMOD 3 ATOM CDIEL SWITCH VATOM VDISTANCE VSWITCH -\n')
|
|
1627
|
+
f.write('CUTNB 32 CTOFNB 20 CTONNB 18 EPS 78.5 WMIN 1.5 E14FAC 1.0\n')
|
|
1628
|
+
f.write('!atom e_min r_min/2\n')
|
|
1629
|
+
# if using the C-alpha only model do some preprocessing to determine the collision
|
|
1630
|
+
# diameter of non-native interactions according to the Karanacolis-Brooks
|
|
1631
|
+
# algorithm
|
|
1632
|
+
if self.casm != 1:
|
|
1633
|
+
sigmin = 1000000*np.ones(len(cg_structure.residues))
|
|
1634
|
+
if self.potential_name.startswith('GENERIC'):
|
|
1635
|
+
for idx, res in enumerate(cg_structure.residues):
|
|
1636
|
+
sigmin[idx] = 2*self.rvdw[heavy_protein.residues[idx].name]
|
|
1637
|
+
else:
|
|
1638
|
+
# determine the collision diameter
|
|
1639
|
+
for i in range(len(cg_structure.residues)):
|
|
1640
|
+
for j in range(len(cg_structure.residues)):
|
|
1641
|
+
if native_contact_map[i,j] != 1 and (j < i-2 or j > i+2):
|
|
1642
|
+
if dist_map[i,j] < sigmin[i]:
|
|
1643
|
+
sigmin[i] = dist_map[i,j]
|
|
1644
|
+
for idx, atm in enumerate(cg_structure.atoms):
|
|
1645
|
+
eps2 = -0.000132
|
|
1646
|
+
rmin2 = sigmin[idx]*2**(1/6)/2
|
|
1647
|
+
temp = self.fnn*rmin2
|
|
1648
|
+
f.write("%-9s%-5.1f%-9.6f %-10.6f\n"%(atm.type, 0.0, eps2, temp))
|
|
1649
|
+
else:
|
|
1650
|
+
eps2 = '-1e-12' #!!!! SYSTem dependent !!!!!!!!
|
|
1651
|
+
rmin2 = 20.0
|
|
1652
|
+
for idx, atm in enumerate(cg_structure.atoms):
|
|
1653
|
+
if atm.name == (atomname_prefix+self.ca_prefix):
|
|
1654
|
+
f.write("%-9s%-5.1f%-s %-10.6f\n"%(atm.type, 0.0, eps2, rmin2))
|
|
1655
|
+
else:
|
|
1656
|
+
t1 = 1
|
|
1657
|
+
t2 = (t1*(2*self.rvdw[heavy_protein.residues[atm.residue.idx].name]*2**(1/6))**12/(1e-12))**(1/12)
|
|
1658
|
+
temp = self.fnn*t2/2
|
|
1659
|
+
f.write("%-9s%-5.1f%-s %-10.6f\n"%(atm.type, 0.0, eps2, temp))
|
|
1660
|
+
f.write('\n')
|
|
1661
|
+
## NBFIX section
|
|
1662
|
+
f.write('NBFIX\n')
|
|
1663
|
+
### native side-chain pairs and backbone Hbonding
|
|
1664
|
+
if self.casm == 1:
|
|
1665
|
+
f.write('! b-b due to Hbonding\n')
|
|
1666
|
+
totene_bb = 0
|
|
1667
|
+
for i in range(len(cg_structure.residues)-1):
|
|
1668
|
+
for j in range(i+1, len(cg_structure.residues)):
|
|
1669
|
+
if native_hb_map[i,j] == 1:
|
|
1670
|
+
atm_i = cg_structure.residues[i].atoms[0]
|
|
1671
|
+
atm_j = cg_structure.residues[j].atoms[0]
|
|
1672
|
+
comment = ''
|
|
1673
|
+
if self.ndomain == 0: # No domain defined
|
|
1674
|
+
ene = hb_ene_map[i,j]
|
|
1675
|
+
else: # Domain defined
|
|
1676
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1677
|
+
di = id_domain[atm_i.idx]
|
|
1678
|
+
comment = '! in Domain %d'%(di+1)
|
|
1679
|
+
ene = hb_ene_map[i,j]
|
|
1680
|
+
else: # in the interface
|
|
1681
|
+
di = id_domain[atm_i.idx]
|
|
1682
|
+
dj = id_domain[atm_j.idx]
|
|
1683
|
+
comment = '! in Interface %d | %d'%(di+1, dj+1)
|
|
1684
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1685
|
+
#ene = self.dom_nscal[ii] # ??? Use nscal at interface
|
|
1686
|
+
ene = hb_ene_map[i,j] # ??? Use the same energy
|
|
1687
|
+
f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
|
|
1688
|
+
totene_bb += ene
|
|
1689
|
+
|
|
1690
|
+
totene_sc = 0
|
|
1691
|
+
totene_bsc = 0
|
|
1692
|
+
if self.potential_name.startswith('GENERIC'): # C-alpha - side chain model Generic non-bond interactions
|
|
1693
|
+
f.write('!Generic interactions between unstructured portions of this protein\n')
|
|
1694
|
+
# Print out NBFIX energy values
|
|
1695
|
+
for i in range(len(cg_structure.residues)-3):
|
|
1696
|
+
resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
|
|
1697
|
+
for j in range(i+3, len(cg_structure.residues)):
|
|
1698
|
+
resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
|
|
1699
|
+
atm_i = cg_structure.residues[i].atoms[1] # ??? should be side-chain
|
|
1700
|
+
atm_j = cg_structure.residues[j].atoms[1] # ??? should be side-chain
|
|
1701
|
+
temp = self.rvdw[resname_1] + self.rvdw[resname_2]
|
|
1702
|
+
ene=(0.3/10)*self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
|
|
1703
|
+
f.write('%-8s%-11s%-13.6f%-11.6f\n'%(atm_i.type, atm_j.type, -ene, temp))
|
|
1704
|
+
else: # Go non-bond interactions
|
|
1705
|
+
f.write('! native side-chain interactions\n')
|
|
1706
|
+
for i in range(len(cg_structure.residues)-1):
|
|
1707
|
+
resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
|
|
1708
|
+
for j in range(i+1, len(cg_structure.residues)):
|
|
1709
|
+
resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
|
|
1710
|
+
if native_ss_map[i,j] == 1:
|
|
1711
|
+
atm_i = cg_structure.residues[i].atoms[1]
|
|
1712
|
+
atm_j = cg_structure.residues[j].atoms[1]
|
|
1713
|
+
if self.eps[self.res2n[resname_1]][self.res2n[resname_2]] == 0:
|
|
1714
|
+
self.logger.error('ERROR 1: Well depth equal to zero!!! %s - %s'%(resname_1, resname_2))
|
|
1715
|
+
sys.exit()
|
|
1716
|
+
comment = ''
|
|
1717
|
+
if self.ndomain == 0: # No domain defined
|
|
1718
|
+
ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
|
|
1719
|
+
else: # If domain is defined
|
|
1720
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1721
|
+
di = id_domain[atm_i.idx]
|
|
1722
|
+
comment = '! in Domain %d'%(di+1)
|
|
1723
|
+
ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[di]
|
|
1724
|
+
else: # in the interface
|
|
1725
|
+
di = id_domain[atm_i.idx]
|
|
1726
|
+
dj = id_domain[atm_j.idx]
|
|
1727
|
+
comment = '! in Interface %d | %d'%(di+1, dj+1)
|
|
1728
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1729
|
+
ene = self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[ii]
|
|
1730
|
+
f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
|
|
1731
|
+
totene_sc += ene
|
|
1732
|
+
|
|
1733
|
+
f.write('! backbone-sidechain interactions\n')
|
|
1734
|
+
for i in range(len(cg_structure.residues)):
|
|
1735
|
+
resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
|
|
1736
|
+
for j in range(len(cg_structure.residues)):
|
|
1737
|
+
resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
|
|
1738
|
+
if native_bsc_map[i,j] == 1:
|
|
1739
|
+
atm_i = cg_structure.residues[i].atoms[0] # backbone
|
|
1740
|
+
atm_j = cg_structure.residues[j].atoms[1] # sidechain
|
|
1741
|
+
comment = ''
|
|
1742
|
+
if self.ndomain == 0: # No domain defined
|
|
1743
|
+
ene = self.ene_bsc
|
|
1744
|
+
else: # If domain is defined
|
|
1745
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1746
|
+
di = id_domain[atm_i.idx]
|
|
1747
|
+
comment = '! in Domain %d'%(di+1)
|
|
1748
|
+
ene = self.ene_bsc
|
|
1749
|
+
else: # in the interface
|
|
1750
|
+
di = id_domain[atm_i.idx]
|
|
1751
|
+
dj = id_domain[atm_j.idx]
|
|
1752
|
+
comment = '! in Interface %d | %d'%(di+1, dj+1)
|
|
1753
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1754
|
+
ene = self.ene_bsc * self.dom_nscal[ii] # Rescaled energy
|
|
1755
|
+
f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
|
|
1756
|
+
totene_bsc += ene
|
|
1757
|
+
|
|
1758
|
+
f.write('\n')
|
|
1759
|
+
f.write('! %.4f, %.4f, %.4f\n'%(totene_bb, totene_sc, totene_bsc))
|
|
1760
|
+
else:
|
|
1761
|
+
if not self.potential_name.startswith('GENERIC'): # C-alpha model
|
|
1762
|
+
f.write('! b-b due to Hbonding plus native side-chain interactions plus backbone-sidechain interactions\n')
|
|
1763
|
+
# Add up non-bonded energies
|
|
1764
|
+
for i in range(len(cg_structure.residues)-1):
|
|
1765
|
+
resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
|
|
1766
|
+
for j in range(i+1, len(cg_structure.residues)):
|
|
1767
|
+
resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
|
|
1768
|
+
atm_i = cg_structure.residues[i].atoms[0]
|
|
1769
|
+
atm_j = cg_structure.residues[j].atoms[0]
|
|
1770
|
+
ene = 0
|
|
1771
|
+
# hydrogen bonds
|
|
1772
|
+
if native_hb_map[i,j] == 1:
|
|
1773
|
+
if self.ndomain == 0: # No domain defined
|
|
1774
|
+
ene += hb_ene_map[i,j]
|
|
1775
|
+
else: # Domain defined
|
|
1776
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1777
|
+
di = id_domain[atm_i.idx]
|
|
1778
|
+
ene += hb_ene_map[i,j]
|
|
1779
|
+
else: # in the interface
|
|
1780
|
+
di = id_domain[atm_i.idx]
|
|
1781
|
+
dj = id_domain[atm_j.idx]
|
|
1782
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1783
|
+
ene += hb_ene_map[i,j] # Use the same energy
|
|
1784
|
+
# sc-sc interactions
|
|
1785
|
+
if native_ss_map[i,j] == 1:
|
|
1786
|
+
if self.eps[self.res2n[resname_1]][self.res2n[resname_2]] == 0:
|
|
1787
|
+
self.logger.error('ERROR 1: Well depth equal to zero!!! %s - %s'%(resname_1, resname_2))
|
|
1788
|
+
sys.exit()
|
|
1789
|
+
if self.ndomain == 0: # No domain defined
|
|
1790
|
+
ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
|
|
1791
|
+
else: # Domain defined
|
|
1792
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1793
|
+
di = id_domain[atm_i.idx]
|
|
1794
|
+
ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[di]
|
|
1795
|
+
else: # in the interface
|
|
1796
|
+
di = id_domain[atm_i.idx]
|
|
1797
|
+
dj = id_domain[atm_j.idx]
|
|
1798
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1799
|
+
ene += self.eps[self.res2n[resname_1]][self.res2n[resname_2]] * self.dom_nscal[ii]
|
|
1800
|
+
# b-sc interactions
|
|
1801
|
+
if native_bsc_map[i,j] == 1:
|
|
1802
|
+
if self.ndomain == 0: # No domain defined
|
|
1803
|
+
ene += self.ene_bsc
|
|
1804
|
+
else: # Domain defined
|
|
1805
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1806
|
+
di = id_domain[atm_i.idx]
|
|
1807
|
+
ene += self.ene_bsc
|
|
1808
|
+
else: # in the interface
|
|
1809
|
+
di = id_domain[atm_i.idx]
|
|
1810
|
+
dj = id_domain[atm_j.idx]
|
|
1811
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1812
|
+
ene += self.ene_bsc # Use the same energy
|
|
1813
|
+
if native_bsc_map[j,i] == 1:
|
|
1814
|
+
if self.ndomain == 0: # No domain defined
|
|
1815
|
+
ene += self.ene_bsc
|
|
1816
|
+
else: # Domain defined
|
|
1817
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1818
|
+
di = id_domain[atm_i.idx]
|
|
1819
|
+
ene += self.ene_bsc
|
|
1820
|
+
else: # in the interface
|
|
1821
|
+
di = id_domain[atm_i.idx]
|
|
1822
|
+
dj = id_domain[atm_j.idx]
|
|
1823
|
+
ii = int((2*self.ndomain - min(di, dj)) * (min(di, dj) + 1) / 2 + abs(di - dj) - 1)
|
|
1824
|
+
ene += self.ene_bsc # Use the same energy
|
|
1825
|
+
|
|
1826
|
+
# Write NBFIX
|
|
1827
|
+
if ene != 0:
|
|
1828
|
+
comment = ''
|
|
1829
|
+
if self.ndomain != 0:
|
|
1830
|
+
if id_domain[atm_i.idx] == id_domain[atm_j.idx]: # in the same domain
|
|
1831
|
+
di = id_domain[atm_i.idx]
|
|
1832
|
+
comment = '! in Domain %d'%(di+1)
|
|
1833
|
+
else: # in the interface
|
|
1834
|
+
di = id_domain[atm_i.idx]
|
|
1835
|
+
dj = id_domain[atm_j.idx]
|
|
1836
|
+
comment = '! in Interface %d | %d'%(di+1, dj+1)
|
|
1837
|
+
f.write('%-8s%-11s%-13.6f%-11.6f%s\n'%(atm_i.type, atm_j.type, -ene, dist_map[atm_i.idx, atm_j.idx], comment))
|
|
1838
|
+
else:
|
|
1839
|
+
f.write('!Generic interactions between unstructured portions of this protein\n')
|
|
1840
|
+
# Print out NBFIX energy values
|
|
1841
|
+
for i in range(len(cg_structure.residues)-3):
|
|
1842
|
+
resname_1 = heavy_protein.residues[cg_structure.residues[i].idx].name
|
|
1843
|
+
for j in range(i+3, len(cg_structure.residues)):
|
|
1844
|
+
resname_2 = heavy_protein.residues[cg_structure.residues[j].idx].name
|
|
1845
|
+
atm_i = cg_structure.residues[i].atoms[0]
|
|
1846
|
+
atm_j = cg_structure.residues[j].atoms[0]
|
|
1847
|
+
temp = self.rvdw[resname_1] + self.rvdw[resname_2]
|
|
1848
|
+
ene=(0.3/10)*self.eps[self.res2n[resname_1]][self.res2n[resname_2]]
|
|
1849
|
+
f.write('%-8s%-11s%-13.6f%-11.6f\n'%(atm_i.type, atm_j.type, -ene, temp))
|
|
1850
|
+
f.write('\nEND\n')
|
|
1851
|
+
f.close()
|
|
1852
|
+
|
|
1853
|
+
self.logger.debug('All done.')
|
|
1854
|
+
return {'cor': corfile, 'prm': prmfile, 'psf': psffile, 'top': topfile,}
|
|
1855
|
+
##########################################################################################
|
|
1856
|
+
|
|
1857
|
+
|
|
1858
|
+
class BackMapping:
|
|
1859
|
+
"""
|
|
1860
|
+
Take a C-alpha coarse grained structure and backmap it to the all-atom resolution
|
|
1861
|
+
"""
|
|
1862
|
+
#############################################################################################################
|
|
1863
|
+
def __init__(self, nproc:int=1, outdir:str='./'):
|
|
1864
|
+
|
|
1865
|
+
self.nproc = str(nproc)
|
|
1866
|
+
self.outdir = outdir
|
|
1867
|
+
if not os.path.exists(self.outdir):
|
|
1868
|
+
os.makedirs(self.outdir)
|
|
1869
|
+
self.logger.info(f'Made directory: {self.outdir}')
|
|
1870
|
+
|
|
1871
|
+
#######################################################################################################
|
|
1872
|
+
|
|
1873
|
+
#######################################################################################################
|
|
1874
|
+
def backmap(self, cg_pdb:str, aa_pdb:str, ID:str):
|
|
1875
|
+
"""
|
|
1876
|
+
Backmap a C-alpha coarse-grained structure to an all-atom structure.
|
|
1877
|
+
"""
|
|
1878
|
+
|
|
1879
|
+
##########################################################################
|
|
1880
|
+
self.logger.info(f"-> Cleaning PDB file {aa_pdb}")
|
|
1881
|
+
name = pathlib.Path(cg_pdb).stem + f'_{ID}'
|
|
1882
|
+
work_dir = os.path.join(self.outdir, 'rebuild_'+name)
|
|
1883
|
+
self.logger.info(name, work_dir)
|
|
1884
|
+
|
|
1885
|
+
if not os.path.exists(work_dir):
|
|
1886
|
+
os.makedirs(work_dir)
|
|
1887
|
+
|
|
1888
|
+
aa_clean_pdb, aa_clean_pdb_outfile = self.clean_pdb(aa_pdb, work_dir, name)
|
|
1889
|
+
os.chdir(work_dir)
|
|
1890
|
+
self.logger.debug(' Done')
|
|
1891
|
+
##########################################################################
|
|
1892
|
+
|
|
1893
|
+
##########################################################################
|
|
1894
|
+
# buld ca-cb model
|
|
1895
|
+
self.logger.info(f"-> Building ca-cb model for {aa_clean_pdb_outfile}")
|
|
1896
|
+
(prefix, prm_file) = self.create_cg_model(aa_clean_pdb_outfile, ID)
|
|
1897
|
+
self.logger.debug(' Done')
|
|
1898
|
+
|
|
1899
|
+
cacb_struct = pmd.load_file(prefix+'.psf')
|
|
1900
|
+
cacb_cor = pmd.load_file(prefix+'.cor')
|
|
1901
|
+
cacb_struct.coordinates = cacb_cor.coordinates
|
|
1902
|
+
#print(f'cacb_struct.coordinates: {cacb_struct.coordinates[:10]}')
|
|
1903
|
+
##########################################################################
|
|
1904
|
+
|
|
1905
|
+
##########################################################################
|
|
1906
|
+
# add SC beads to cg pdb
|
|
1907
|
+
self.logger.info("-> Adding side chain beads")
|
|
1908
|
+
target_name = name
|
|
1909
|
+
cg_sc_struct = self.add_sc_beads(cg_pdb, cacb_struct)
|
|
1910
|
+
self.logger.debug(' Done')
|
|
1911
|
+
##########################################################################
|
|
1912
|
+
|
|
1913
|
+
##########################################################################
|
|
1914
|
+
# run energy minimization for cacb model
|
|
1915
|
+
self.logger.info("-> Running energy minimization for ca-cb model")
|
|
1916
|
+
cg_sc_min_cor = self.cacb_energy_minimization(cg_sc_struct.positions, prefix, prm_file)
|
|
1917
|
+
aa_pdb_struct = pmd.load_file(aa_clean_pdb)
|
|
1918
|
+
for index in range(len(aa_pdb_struct.residues)):
|
|
1919
|
+
res_name = aa_pdb_struct.residues[index].name
|
|
1920
|
+
cg_sc_struct.residues[index].name = res_name
|
|
1921
|
+
for atm in cg_sc_struct.atoms:
|
|
1922
|
+
if atm.name == 'A':
|
|
1923
|
+
atm.name = ' CA'
|
|
1924
|
+
elif atm.name == 'B':
|
|
1925
|
+
atm.name = ' SC'
|
|
1926
|
+
|
|
1927
|
+
# Remove units to perform arithmetic
|
|
1928
|
+
coordinates_values = cg_sc_min_cor.value_in_unit(nanometer)
|
|
1929
|
+
|
|
1930
|
+
# Calculate the geometric center (center of mass could be calculated similarly if masses are available)
|
|
1931
|
+
geometric_center = np.mean(coordinates_values, axis=0)
|
|
1932
|
+
|
|
1933
|
+
# Shift coordinates to center at the origin
|
|
1934
|
+
centered_coordinates = coordinates_values - geometric_center
|
|
1935
|
+
|
|
1936
|
+
# Reapply the original unit
|
|
1937
|
+
centered_coordinates = centered_coordinates * nanometer
|
|
1938
|
+
|
|
1939
|
+
#cg_sc_struct.positions = cg_sc_min_cor
|
|
1940
|
+
cg_sc_struct.positions = centered_coordinates
|
|
1941
|
+
self.logger.info(f'cg_sc_struct.positions: {cg_sc_struct.positions[:10]}')
|
|
1942
|
+
target_name_mini_pdb = target_name+'_mini.pdb'
|
|
1943
|
+
cg_sc_struct.save(target_name_mini_pdb, overwrite=True)
|
|
1944
|
+
self.logger.debug(f'SAVED: {target_name_mini_pdb}')
|
|
1945
|
+
self.logger.debug(' Done')
|
|
1946
|
+
|
|
1947
|
+
#output_from_PD2 = target_name+'_mini.pdb'
|
|
1948
|
+
self.logger.info(f"-> Running Pulchra for {target_name_mini_pdb}")
|
|
1949
|
+
output_from_Pultra = self.Call_Pulchra(target_name_mini_pdb)
|
|
1950
|
+
|
|
1951
|
+
|
|
1952
|
+
## remove the left over SC atoms that cause a template issue
|
|
1953
|
+
output_from_Pultra_cleaned = output_from_Pultra.replace('.pdb', '_cleaned.pdb')
|
|
1954
|
+
self.remove_sc_beads(output_from_Pultra, output_from_Pultra_cleaned)
|
|
1955
|
+
|
|
1956
|
+
try:
|
|
1957
|
+
rec_pdb = self.OpenMM_vacuum_minimization(output_from_Pultra_cleaned, 500000)
|
|
1958
|
+
os.system('cp '+rec_pdb+' ../'+rec_pdb)
|
|
1959
|
+
except Exception as e:
|
|
1960
|
+
self.logger.info(traceback.print_exc(), e)
|
|
1961
|
+
self.logger.debug('Failed to run OpenMM minimization. Use Pulchra result instead.')
|
|
1962
|
+
rec_pdb = target_name+'_rebuilt.pdb'
|
|
1963
|
+
os.system('cp '+output_from_Pultra_cleaned+' ../'+rec_pdb)
|
|
1964
|
+
|
|
1965
|
+
os.chdir('../')
|
|
1966
|
+
self.logger.info(f'Backmapping from {cg_pdb} -> {rec_pdb}')
|
|
1967
|
+
#######################################################################################################
|
|
1968
|
+
|
|
1969
|
+
#######################################################################################################
|
|
1970
|
+
def clean_pdb(self, pdb, out_dir, name):
|
|
1971
|
+
AA_name_list = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
|
|
1972
|
+
'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL',
|
|
1973
|
+
'HIE', 'HID', 'HIP']
|
|
1974
|
+
#name = pdb.split('/')[-1].split('.pdb')[0]
|
|
1975
|
+
struct = pmd.load_file(pdb)
|
|
1976
|
+
sel_idx = np.zeros(len(struct.atoms))
|
|
1977
|
+
for idx, res in enumerate(struct.residues):
|
|
1978
|
+
res.number = idx+1
|
|
1979
|
+
if res.name in AA_name_list:
|
|
1980
|
+
for atm in res.atoms:
|
|
1981
|
+
sel_idx[atm.idx] = 1
|
|
1982
|
+
|
|
1983
|
+
clean_pdb_outfile = os.path.join(out_dir, f'{name}_clean.pdb')
|
|
1984
|
+
self.logger.info(f'Writing {clean_pdb_outfile}')
|
|
1985
|
+
struct[sel_idx].save(clean_pdb_outfile, overwrite=True)
|
|
1986
|
+
return f'{name}_clean.pdb', clean_pdb_outfile
|
|
1987
|
+
#######################################################################################################
|
|
1988
|
+
|
|
1989
|
+
#######################################################################################################
|
|
1990
|
+
def create_psf(self, name):
|
|
1991
|
+
segid = 'A'
|
|
1992
|
+
parm = pmd.charmm.CharmmParameterSet(name+'.top')
|
|
1993
|
+
f = open(name+'.seq','r')
|
|
1994
|
+
seq = f.readlines()[0].strip().split()
|
|
1995
|
+
f.close()
|
|
1996
|
+
struct = pmd.Structure()
|
|
1997
|
+
for resname in seq:
|
|
1998
|
+
struct += parm.residues[resname].to_structure()
|
|
1999
|
+
ca_list = []
|
|
2000
|
+
for atm in struct.atoms:
|
|
2001
|
+
atm.mass = parm.atom_types[atm.type].mass
|
|
2002
|
+
if atm.name == 'A':
|
|
2003
|
+
ca_list.append(atm)
|
|
2004
|
+
# creat backbond bonds
|
|
2005
|
+
for i in range(len(ca_list)-1):
|
|
2006
|
+
struct.bonds.append(pmd.topologyobjects.Bond(ca_list[i], ca_list[i+1]))
|
|
2007
|
+
# create Angles
|
|
2008
|
+
for atm in struct.atoms:
|
|
2009
|
+
bond_list = atm.bond_partners
|
|
2010
|
+
if len(bond_list) > 1:
|
|
2011
|
+
for i in range(len(bond_list)-1):
|
|
2012
|
+
for j in range(i+1, len(bond_list)):
|
|
2013
|
+
struct.angles.append(pmd.topologyobjects.Angle(bond_list[i], atm, bond_list[j]))
|
|
2014
|
+
# create Dihedrals
|
|
2015
|
+
for i in range(len(ca_list)-3):
|
|
2016
|
+
struct.dihedrals.append(pmd.topologyobjects.Dihedral(ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]))
|
|
2017
|
+
# create Impropers
|
|
2018
|
+
for i in range(1, len(ca_list)-1):
|
|
2019
|
+
if len(ca_list[i].residue.atoms) > 1:
|
|
2020
|
+
b_bead = ca_list[i].residue.atoms[1]
|
|
2021
|
+
struct.impropers.append(pmd.topologyobjects.Improper(ca_list[i], ca_list[i-1], ca_list[i+1], b_bead))
|
|
2022
|
+
for res in struct.residues:
|
|
2023
|
+
res.segid = segid
|
|
2024
|
+
struct.save(name+'.psf', overwrite=True)
|
|
2025
|
+
#######################################################################################################
|
|
2026
|
+
|
|
2027
|
+
#######################################################################################################
|
|
2028
|
+
def create_cg_model(self, pdb, ID):
|
|
2029
|
+
self.logger.info(f'Creating CASM CG model from {pdb}')
|
|
2030
|
+
if not os.path.exists('./create_model'):
|
|
2031
|
+
os.makedirs('./create_model')
|
|
2032
|
+
os.chdir("./create_model")
|
|
2033
|
+
self.logger.debug(os.getcwd())
|
|
2034
|
+
|
|
2035
|
+
CoarseGrainer = CoarseGrain(outdir='./',
|
|
2036
|
+
ID=ID,
|
|
2037
|
+
pdbfile=pdb,
|
|
2038
|
+
nscal=10.0,
|
|
2039
|
+
potential_name='mj',
|
|
2040
|
+
casm=1)
|
|
2041
|
+
self.logger.debug(CoarseGrainer)
|
|
2042
|
+
|
|
2043
|
+
CGfiles = CoarseGrainer.run()
|
|
2044
|
+
self.logger.debug(os.getcwd())
|
|
2045
|
+
|
|
2046
|
+
|
|
2047
|
+
name = pathlib.Path(pdb).stem
|
|
2048
|
+
prefix = name+'_ca-cb'
|
|
2049
|
+
prm_name = name + '_nscal10.0_fnn1_go_mj.prm'
|
|
2050
|
+
self.logger.debug(f'name: {name}')
|
|
2051
|
+
self.logger.debug(f'prefix: {prefix}')
|
|
2052
|
+
self.logger.debug(f'prm_name: {prm_name}')
|
|
2053
|
+
|
|
2054
|
+
if os.path.exists(prefix+'.psf'):
|
|
2055
|
+
os.system('cp *.psf ../')
|
|
2056
|
+
os.system('cp *.cor ../')
|
|
2057
|
+
os.system('cp *.top ../')
|
|
2058
|
+
os.system('cp *.prm ../')
|
|
2059
|
+
os.chdir('../')
|
|
2060
|
+
else:
|
|
2061
|
+
self.logger.error("Error: failed to create CG model from %s\n\n"%pdb)
|
|
2062
|
+
sys.exit()
|
|
2063
|
+
return (prefix, prm_name)
|
|
2064
|
+
#######################################################################################################
|
|
2065
|
+
|
|
2066
|
+
#######################################################################################################
|
|
2067
|
+
def add_sc_beads(self, cg_pdb, cacb_struct):
|
|
2068
|
+
self.logger.info(f'Adding SC beads to {cg_pdb}')
|
|
2069
|
+
cor = pmd.load_file(cg_pdb)
|
|
2070
|
+
cor = cor.coordinates
|
|
2071
|
+
cor = cor[0]
|
|
2072
|
+
self.logger.info(cor, cor.shape)
|
|
2073
|
+
new_cacb_struct = cacb_struct.copy(pmd.Structure)
|
|
2074
|
+
idx = 0
|
|
2075
|
+
for res in new_cacb_struct.residues:
|
|
2076
|
+
res.atoms[0].xx = cor[idx,0]
|
|
2077
|
+
res.atoms[0].xy = cor[idx,1]
|
|
2078
|
+
res.atoms[0].xz = cor[idx,2]
|
|
2079
|
+
if len(res.atoms) > 1:
|
|
2080
|
+
cor1 = cacb_struct.coordinates[res.atoms[0].idx,:]
|
|
2081
|
+
cor2 = cacb_struct.coordinates[res.atoms[1].idx,:]
|
|
2082
|
+
bond_length = np.sum((cor1-cor2)**2)**0.5
|
|
2083
|
+
res.atoms[1].xx = cor[idx,0] + bond_length
|
|
2084
|
+
res.atoms[1].xy = cor[idx,1]
|
|
2085
|
+
res.atoms[1].xz = cor[idx,2]
|
|
2086
|
+
idx += 1
|
|
2087
|
+
return new_cacb_struct
|
|
2088
|
+
#######################################################################################################
|
|
2089
|
+
|
|
2090
|
+
#######################################################################################################
|
|
2091
|
+
def cacb_energy_minimization(self, cor, prefix, prm_file):
|
|
2092
|
+
global nproc
|
|
2093
|
+
temp = 310
|
|
2094
|
+
timestep = 0.015*picoseconds
|
|
2095
|
+
fbsolu = 0.05/picosecond
|
|
2096
|
+
temp = temp*kelvin
|
|
2097
|
+
|
|
2098
|
+
psf_pmd = pmd.charmm.CharmmPsfFile(prefix+'.psf')
|
|
2099
|
+
psf = CharmmPsfFile(prefix+'.psf')
|
|
2100
|
+
top = psf.topology
|
|
2101
|
+
|
|
2102
|
+
# parse the cg cacb prm file
|
|
2103
|
+
topfile = f'{prefix}.top'
|
|
2104
|
+
self.logger.debug(os.getcwd())
|
|
2105
|
+
self.logger.debug(f'prm_file: {prm_file}')
|
|
2106
|
+
self.logger.debug(f'topfile: {topfile}')
|
|
2107
|
+
CoarseGrain.parse_cg_cacb_prm(self, prmfile=prm_file, topfile=topfile)
|
|
2108
|
+
xml_file = prm_file.split('.prm')[0]+'.xml'
|
|
2109
|
+
self.logger.debug(f'xml_file: {xml_file}')
|
|
2110
|
+
if not os.path.exists(xml_file):
|
|
2111
|
+
raise ValueError(f"Error: {xml_file} not found. Please check the file path.")
|
|
2112
|
+
|
|
2113
|
+
#os.system('parse_cg_cacb_prm.py -p '+prm_file+' -t '+prefix+'.top')
|
|
2114
|
+
#name = prm_file.split('.prm')[0]
|
|
2115
|
+
forcefield = ForceField(xml_file)
|
|
2116
|
+
self.logger.debug(f'forcefield: {forcefield}')
|
|
2117
|
+
|
|
2118
|
+
# re-name residues that are changed by openmm
|
|
2119
|
+
for resid, res in enumerate(top.residues()):
|
|
2120
|
+
if res.name != psf_pmd.residues[resid].name:
|
|
2121
|
+
res.name = psf_pmd.residues[resid].name
|
|
2122
|
+
|
|
2123
|
+
template_map = {}
|
|
2124
|
+
for chain in top.chains():
|
|
2125
|
+
for res in chain.residues():
|
|
2126
|
+
template_map[res] = res.name
|
|
2127
|
+
|
|
2128
|
+
|
|
2129
|
+
system = forcefield.createSystem(top, nonbondedCutoff=2.0*nanometer, constraints=None,
|
|
2130
|
+
removeCMMotion=False, ignoreExternalBonds=True,
|
|
2131
|
+
residueTemplates=template_map)
|
|
2132
|
+
for force in system.getForces():
|
|
2133
|
+
if force.getName() == 'CustomNonbondedForce':
|
|
2134
|
+
custom_nb_force = force
|
|
2135
|
+
break
|
|
2136
|
+
# custom_nb_force = system.getForce(4)
|
|
2137
|
+
custom_nb_force.setUseSwitchingFunction(True)
|
|
2138
|
+
custom_nb_force.setSwitchingDistance(1.8*nanometer)
|
|
2139
|
+
custom_nb_force.setNonbondedMethod(custom_nb_force.CutoffNonPeriodic)
|
|
2140
|
+
|
|
2141
|
+
# add position restraints
|
|
2142
|
+
force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
|
|
2143
|
+
force.addPerParticleParameter("k")
|
|
2144
|
+
force.addPerParticleParameter("x0")
|
|
2145
|
+
force.addPerParticleParameter("y0")
|
|
2146
|
+
force.addPerParticleParameter("z0")
|
|
2147
|
+
system.addForce(force)
|
|
2148
|
+
# END add position restraints
|
|
2149
|
+
|
|
2150
|
+
# add position restraints for CA
|
|
2151
|
+
force = system.getForces()[-1]
|
|
2152
|
+
k = 100*kilocalorie/mole/angstrom**2
|
|
2153
|
+
for atm in top.atoms():
|
|
2154
|
+
if atm.name == 'A':
|
|
2155
|
+
force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
|
|
2156
|
+
|
|
2157
|
+
integrator = LangevinIntegrator(temp, fbsolu, timestep)
|
|
2158
|
+
integrator.setConstraintTolerance(0.00001)
|
|
2159
|
+
# prepare simulation
|
|
2160
|
+
platform = Platform.getPlatformByName('CPU')
|
|
2161
|
+
properties = {'Threads': self.nproc}
|
|
2162
|
+
simulation = Simulation(top, system, integrator, platform, properties)
|
|
2163
|
+
simulation.context.setPositions(cor)
|
|
2164
|
+
simulation.context.setVelocitiesToTemperature(temp)
|
|
2165
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
2166
|
+
self.getEnergyDecomposition(stdout, simulation.context, system)
|
|
2167
|
+
self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
|
|
2168
|
+
simulation.minimizeEnergy(tolerance=0.1*kilocalories_per_mole)
|
|
2169
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
2170
|
+
self.getEnergyDecomposition(stdout, simulation.context, system)
|
|
2171
|
+
self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
|
|
2172
|
+
current_cor = simulation.context.getState(getPositions=True).getPositions()
|
|
2173
|
+
#print(f'current_cor:\n{current_cor[:10]}')
|
|
2174
|
+
return current_cor
|
|
2175
|
+
#######################################################################################################
|
|
2176
|
+
|
|
2177
|
+
#######################################################################################################
|
|
2178
|
+
# energy decomposition
|
|
2179
|
+
def forcegroupify(self, system):
|
|
2180
|
+
forcegroups = {}
|
|
2181
|
+
for i in range(system.getNumForces()):
|
|
2182
|
+
force = system.getForce(i)
|
|
2183
|
+
force.setForceGroup(i)
|
|
2184
|
+
f = str(type(force))
|
|
2185
|
+
s = f.split('\'')
|
|
2186
|
+
f = s[1]
|
|
2187
|
+
s = f.split('.')
|
|
2188
|
+
f = s[-1]
|
|
2189
|
+
forcegroups[i] = f
|
|
2190
|
+
return forcegroups
|
|
2191
|
+
#######################################################################################################
|
|
2192
|
+
|
|
2193
|
+
#######################################################################################################
|
|
2194
|
+
def getEnergyDecomposition(self, handle, context, system):
|
|
2195
|
+
forcegroups = self.forcegroupify(system)
|
|
2196
|
+
energies = {}
|
|
2197
|
+
for i, f in forcegroups.items():
|
|
2198
|
+
try:
|
|
2199
|
+
states = context.getState(getEnergy=True, groups={i})
|
|
2200
|
+
except ValueError as e:
|
|
2201
|
+
self.logger.debug(str(e))
|
|
2202
|
+
energies[i] = Quantity(np.nan, kilocalories/mole)
|
|
2203
|
+
else:
|
|
2204
|
+
energies[i] = states.getPotentialEnergy()
|
|
2205
|
+
results = energies
|
|
2206
|
+
handle.write(' Potential Energy:\n')
|
|
2207
|
+
for idd in energies.keys():
|
|
2208
|
+
handle.write(' %s: %.4f kcal/mol\n'%(forcegroups[idd], energies[idd].value_in_unit(kilocalories/mole)))
|
|
2209
|
+
return results
|
|
2210
|
+
#######################################################################################################
|
|
2211
|
+
|
|
2212
|
+
#######################################################################################################
|
|
2213
|
+
def Call_Pulchra(self, rebult_pdb):
|
|
2214
|
+
self.logger.info("-> Calling pulchra to reconstruct all-atom PDB")
|
|
2215
|
+
self.pulchra = files('EntDetect.resources').joinpath('pulchra')
|
|
2216
|
+
self.logger.debug(f'pulchra: {self.pulchra}')
|
|
2217
|
+
pulchra_cmd = f'{self.pulchra} -v -g -q {rebult_pdb} > pulchra.log'
|
|
2218
|
+
self.logger.debug(f'CALL: {pulchra_cmd}')
|
|
2219
|
+
os.system(pulchra_cmd)
|
|
2220
|
+
|
|
2221
|
+
pdb_code = rebult_pdb.split('.pdb')[0]
|
|
2222
|
+
old_name = pdb_code + ".rebuilt.pdb"
|
|
2223
|
+
new_name = pdb_code + "_pulchra.pdb"
|
|
2224
|
+
os.system("mv "+old_name+" "+new_name)
|
|
2225
|
+
self.logger.info(" Reconstructed all-atom PDB "+new_name)
|
|
2226
|
+
|
|
2227
|
+
return new_name
|
|
2228
|
+
#######################################################################################################
|
|
2229
|
+
|
|
2230
|
+
#######################################################################################################
|
|
2231
|
+
def OpenMM_vacuum_minimization(self, input_pdb, maxcyc):
|
|
2232
|
+
global nproc
|
|
2233
|
+
pdb_code = input_pdb.split('.pdb')[0]
|
|
2234
|
+
|
|
2235
|
+
self.logger.info("-> Running all-atom energy minimization for %d steps in vacuum via OpenMM"%maxcyc)
|
|
2236
|
+
|
|
2237
|
+
#platform = Platform.getPlatformByName('CUDA')
|
|
2238
|
+
#properties = {'CudaPrecision': 'mixed'}
|
|
2239
|
+
platform = Platform.getPlatformByName('CPU')
|
|
2240
|
+
properties = {'Threads': self.nproc}
|
|
2241
|
+
|
|
2242
|
+
forcefield = ForceField('amber14-all.xml')
|
|
2243
|
+
self.logger.debug(f'input_pdb: {input_pdb}')
|
|
2244
|
+
pdb = pdbfile.PDBFile(input_pdb)
|
|
2245
|
+
self.logger.debug('FF made and PDB file loaded')
|
|
2246
|
+
|
|
2247
|
+
# Check if the end residue has missing OXT atom and add if needed
|
|
2248
|
+
for chain in pdb.topology.chains():
|
|
2249
|
+
end_res = list(chain.residues())[-1]
|
|
2250
|
+
found = False
|
|
2251
|
+
for atom in end_res.atoms():
|
|
2252
|
+
if atom.name == 'OXT':
|
|
2253
|
+
found = True
|
|
2254
|
+
elif atom.name == 'C':
|
|
2255
|
+
C_atom = atom
|
|
2256
|
+
elif atom.name == 'CA':
|
|
2257
|
+
CA_atom = atom
|
|
2258
|
+
elif atom.name == 'O':
|
|
2259
|
+
O_atom = atom
|
|
2260
|
+
C_position = np.array(pdb.positions[C_atom.index].value_in_unit(nanometer))
|
|
2261
|
+
CA_position = np.array(pdb.positions[CA_atom.index].value_in_unit(nanometer))
|
|
2262
|
+
O_position = np.array(pdb.positions[O_atom.index].value_in_unit(nanometer))
|
|
2263
|
+
if not found:
|
|
2264
|
+
new_atom = pdb.topology.addAtom('OXT', element.oxygen, end_res)
|
|
2265
|
+
pdb.topology.addBond(C_atom, new_atom)
|
|
2266
|
+
new_position = np.dot(self.rotation_matrix(C_position-CA_position, np.pi), O_position-C_position) + C_position
|
|
2267
|
+
new_position = Quantity(value=Vec3(x=new_position[0], y=new_position[1], z=new_position[2]), unit=nanometer)
|
|
2268
|
+
pdb.positions.insert(O_atom.index+1, new_position)
|
|
2269
|
+
self.logger.debug('QC for OXT complete')
|
|
2270
|
+
|
|
2271
|
+
model = modeller.Modeller(pdb.topology, pdb.positions)
|
|
2272
|
+
self.logger.debug(f'model: {model}')
|
|
2273
|
+
model.addHydrogens(forcefield=forcefield, pH=7.0)
|
|
2274
|
+
#model.addHydrogens(forcefield=forcefield, pH=7.0, variants=None, platform=platform)
|
|
2275
|
+
self.logger.debug('Hydrogens added')
|
|
2276
|
+
|
|
2277
|
+
top = model.topology
|
|
2278
|
+
structure = pmd.openmm.load_topology(top)
|
|
2279
|
+
cor = model.positions
|
|
2280
|
+
#structure.positions = cor
|
|
2281
|
+
#structure.save('111.pdb', overwrite=True)
|
|
2282
|
+
|
|
2283
|
+
system = forcefield.createSystem(top, nonbondedMethod=NoCutoff, constraints=None)
|
|
2284
|
+
self.logger.debug('System created')
|
|
2285
|
+
|
|
2286
|
+
# add position restraints
|
|
2287
|
+
force = CustomExternalForce("k*((x-x0)^2+(y-y0)^2+(z-z0)^2)")
|
|
2288
|
+
force.addPerParticleParameter("k")
|
|
2289
|
+
force.addPerParticleParameter("x0")
|
|
2290
|
+
force.addPerParticleParameter("y0")
|
|
2291
|
+
force.addPerParticleParameter("z0")
|
|
2292
|
+
system.addForce(force)
|
|
2293
|
+
self.logger.debug('Position restraints added')
|
|
2294
|
+
# END add position restraints
|
|
2295
|
+
|
|
2296
|
+
# add position restraints for CA
|
|
2297
|
+
force = system.getForces()[-1]
|
|
2298
|
+
k = 500*kilocalorie/mole/angstrom**2
|
|
2299
|
+
for atm in top.atoms():
|
|
2300
|
+
if atm.name == 'CA':
|
|
2301
|
+
force.addParticle(atm.index, (k, cor[atm.index][0], cor[atm.index][1], cor[atm.index][2]))
|
|
2302
|
+
|
|
2303
|
+
integrator = LangevinIntegrator(300*kelvin, 1/picosecond, 0.002*picoseconds)
|
|
2304
|
+
integrator.setConstraintTolerance(0.00001)
|
|
2305
|
+
self.logger.debug('Integrator set')
|
|
2306
|
+
|
|
2307
|
+
simulation = Simulation(top, system, integrator, platform, properties)
|
|
2308
|
+
simulation.context.setPositions(cor)
|
|
2309
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
2310
|
+
self.getEnergyDecomposition(stdout, simulation.context, system)
|
|
2311
|
+
self.logger.info(' Potential energy before minimization: %.4f kcal/mol'%energy)
|
|
2312
|
+
|
|
2313
|
+
simulation.minimizeEnergy(maxIterations=maxcyc)
|
|
2314
|
+
energy = simulation.context.getState(getEnergy=True).getPotentialEnergy().value_in_unit(kilocalorie/mole)
|
|
2315
|
+
self.getEnergyDecomposition(stdout, simulation.context, system)
|
|
2316
|
+
self.logger.info(' Potential energy after minimization: %.4f kcal/mol'%energy)
|
|
2317
|
+
current_cor = simulation.context.getState(getPositions=True).getPositions()
|
|
2318
|
+
|
|
2319
|
+
structure.positions = current_cor
|
|
2320
|
+
outfile = pdb_code+'_OpenMM_min.pdb'
|
|
2321
|
+
structure['!@/H'].save(outfile, overwrite=True)
|
|
2322
|
+
self.logger.debug(f'SAVED: {outfile}')
|
|
2323
|
+
return outfile
|
|
2324
|
+
#######################################################################################################
|
|
2325
|
+
|
|
2326
|
+
#######################################################################################################
|
|
2327
|
+
def remove_sc_beads(self, input_pdb, output_pdb):
|
|
2328
|
+
"""
|
|
2329
|
+
Removes any atoms named 'SC' from a PDB file and writes the cleaned file.
|
|
2330
|
+
|
|
2331
|
+
Parameters:
|
|
2332
|
+
input_pdb (str): Path to the input PDB file.
|
|
2333
|
+
output_pdb (str): Path to save the cleaned PDB file.
|
|
2334
|
+
"""
|
|
2335
|
+
with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
|
|
2336
|
+
for line in infile:
|
|
2337
|
+
if line.startswith("ATOM") or line.startswith("HETATM"):
|
|
2338
|
+
atom_name = line[12:16].strip() # Extract the atom name
|
|
2339
|
+
if atom_name == "SC":
|
|
2340
|
+
continue # Skip this line if the atom name is 'SC'
|
|
2341
|
+
outfile.write(line) # Write all other lines
|
|
2342
|
+
|
|
2343
|
+
self.logger.info(f"Cleaned PDB file saved to {output_pdb}")
|
|
2344
|
+
#######################################################################################################
|
|
2345
|
+
|
|
2346
|
+
#######################################################################################################
|
|
2347
|
+
def rotation_matrix(self, axis, theta):
|
|
2348
|
+
"""
|
|
2349
|
+
Return the rotation matrix associated with counterclockwise rotation about
|
|
2350
|
+
the given axis by theta radians.
|
|
2351
|
+
"""
|
|
2352
|
+
axis = np.asarray(axis)
|
|
2353
|
+
axis = axis / math.sqrt(np.dot(axis, axis))
|
|
2354
|
+
a = math.cos(theta / 2.0)
|
|
2355
|
+
b, c, d = -axis * math.sin(theta / 2.0)
|
|
2356
|
+
aa, bb, cc, dd = a * a, b * b, c * c, d * d
|
|
2357
|
+
bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
|
|
2358
|
+
return np.array([[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
|
|
2359
|
+
[2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
|
|
2360
|
+
[2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
|
|
2361
|
+
#######################################################################################################
|