packmol-memgen-minimal 1.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. packmol_memgen/__init__.py +2 -0
  2. packmol_memgen/__version__.py +34 -0
  3. packmol_memgen/data/LICENSE.Apache-2.0 +201 -0
  4. packmol_memgen/data/extra_solvents.lib +789 -0
  5. packmol_memgen/data/frcmod.lipid_ext +97 -0
  6. packmol_memgen/data/frcmod.solvents +129 -0
  7. packmol_memgen/data/insane_lipids.txt +138 -0
  8. packmol_memgen/data/insane_solvents.txt +45 -0
  9. packmol_memgen/data/leaprc.extra_solvents +42 -0
  10. packmol_memgen/data/leaprc.lipid_ext +48 -0
  11. packmol_memgen/data/lipid_ext.lib +12312 -0
  12. packmol_memgen/data/martini_v3.0.0.itp +356605 -0
  13. packmol_memgen/data/memgen.parm +4082 -0
  14. packmol_memgen/data/pdbs.tar.gz +0 -0
  15. packmol_memgen/data/solvent.parm +14 -0
  16. packmol_memgen/example/example.sh +31 -0
  17. packmol_memgen/lib/__init__.py +0 -0
  18. packmol_memgen/lib/amber.py +77 -0
  19. packmol_memgen/lib/charmmlipid2amber/__init__.py +0 -0
  20. packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.csv +7164 -0
  21. packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.py +225 -0
  22. packmol_memgen/lib/pdbremix/LICENSE +21 -0
  23. packmol_memgen/lib/pdbremix/__init__.py +0 -0
  24. packmol_memgen/lib/pdbremix/_version.py +1 -0
  25. packmol_memgen/lib/pdbremix/amber.py +1103 -0
  26. packmol_memgen/lib/pdbremix/asa.py +227 -0
  27. packmol_memgen/lib/pdbremix/data/aminoacid.pdb +334 -0
  28. packmol_memgen/lib/pdbremix/data/binaries.json +26 -0
  29. packmol_memgen/lib/pdbremix/data/charmm22.parameter +2250 -0
  30. packmol_memgen/lib/pdbremix/data/charmm22.topology +1635 -0
  31. packmol_memgen/lib/pdbremix/data/color_b.py +682 -0
  32. packmol_memgen/lib/pdbremix/data/hin.lib +130 -0
  33. packmol_memgen/lib/pdbremix/data/hydroxide.lib +88 -0
  34. packmol_memgen/lib/pdbremix/data/make_chi.py +92 -0
  35. packmol_memgen/lib/pdbremix/data/opls.parameter +1108 -0
  36. packmol_memgen/lib/pdbremix/data/opls.topology +1869 -0
  37. packmol_memgen/lib/pdbremix/data/phd.frcmod +82 -0
  38. packmol_memgen/lib/pdbremix/data/phd.leaprc +4 -0
  39. packmol_memgen/lib/pdbremix/data/phd.prepin +35 -0
  40. packmol_memgen/lib/pdbremix/data/template.pdb +334 -0
  41. packmol_memgen/lib/pdbremix/data/znb.frcmod +24 -0
  42. packmol_memgen/lib/pdbremix/data/znb.leaprc +7 -0
  43. packmol_memgen/lib/pdbremix/data/znb.lib +69 -0
  44. packmol_memgen/lib/pdbremix/data.py +264 -0
  45. packmol_memgen/lib/pdbremix/fetch.py +102 -0
  46. packmol_memgen/lib/pdbremix/force.py +627 -0
  47. packmol_memgen/lib/pdbremix/gromacs.py +978 -0
  48. packmol_memgen/lib/pdbremix/lib/__init__.py +0 -0
  49. packmol_memgen/lib/pdbremix/lib/docopt.py +579 -0
  50. packmol_memgen/lib/pdbremix/lib/pyqcprot.py +305 -0
  51. packmol_memgen/lib/pdbremix/namd.py +1078 -0
  52. packmol_memgen/lib/pdbremix/pdbatoms.py +543 -0
  53. packmol_memgen/lib/pdbremix/pdbtext.py +120 -0
  54. packmol_memgen/lib/pdbremix/protein.py +311 -0
  55. packmol_memgen/lib/pdbremix/pymol.py +480 -0
  56. packmol_memgen/lib/pdbremix/rmsd.py +203 -0
  57. packmol_memgen/lib/pdbremix/simulate.py +420 -0
  58. packmol_memgen/lib/pdbremix/spacehash.py +73 -0
  59. packmol_memgen/lib/pdbremix/trajectory.py +286 -0
  60. packmol_memgen/lib/pdbremix/util.py +273 -0
  61. packmol_memgen/lib/pdbremix/v3.py +16 -0
  62. packmol_memgen/lib/pdbremix/v3array.py +482 -0
  63. packmol_memgen/lib/pdbremix/v3numpy.py +350 -0
  64. packmol_memgen/lib/pdbremix/volume.py +155 -0
  65. packmol_memgen/lib/utils.py +1017 -0
  66. packmol_memgen/main.py +2827 -0
  67. packmol_memgen_minimal-1.1.16.dist-info/METADATA +664 -0
  68. packmol_memgen_minimal-1.1.16.dist-info/RECORD +71 -0
  69. packmol_memgen_minimal-1.1.16.dist-info/WHEEL +4 -0
  70. packmol_memgen_minimal-1.1.16.dist-info/entry_points.txt +2 -0
  71. packmol_memgen_minimal-1.1.16.dist-info/licenses/LICENSE +338 -0
@@ -0,0 +1,1103 @@
1
+ # encoding: utf-8
2
+
3
+ __doc__ = """
4
+
5
+ Interface to the AMBER molecular-dynamics package.
6
+
7
+ The library is split into three sections:
8
+
9
+ 1. Read and write restart files
10
+ 2. Generate restart files from PDB
11
+ 3. Run simulations from restart files
12
+ 4. Read trajectories with some post-processing
13
+
14
+ Copyright (C) 2009, 2014, Bosco K. Ho
15
+ """
16
+
17
+ import os
18
+ import copy
19
+ import shutil
20
+ import re
21
+
22
+ from . import util
23
+ from . import v3
24
+ from . import pdbtext
25
+ from . import pdbatoms
26
+ from . import data
27
+ from . import protein
28
+
29
+ # ##########################################################
30
+
31
+ # 1. Reading and writing restart files
32
+
33
+ # In PDBREMIX, restart files for AMBER are assumed to
34
+ # have the naming scheme:
35
+
36
+ # 1. topology file: sim.top
37
+ # 2. coordinate/velocity file: sim.crd(coor) or sim.rst (coor/vel)
38
+
39
+ # Parsers have been written to read .top and .crd/.rst files into
40
+ # Python structures, and to write these back into .top and .crd/.rst
41
+ # files, and to convert them into .pdb files
42
+
43
+ # The units used in AMBER are:
44
+ # - positions: angs
45
+ # - velocities: angs/ps/20.455
46
+
47
+
48
+ def read_top(top):
49
+ """
50
+ Returns a topology dictionary containing all the fields in the
51
+ AMBER .top file referenced by their FLAG name, and formatted into
52
+ python data types. POINTER variables are given their own key-value
53
+ fields.
54
+ """
55
+ section = None
56
+ len_field = None
57
+ parse = None
58
+ parse_map = { 'a':str, 'I':int, 'E':float }
59
+ topology = {}
60
+ for line in open(top, "rU"):
61
+ line = line[:-1]
62
+ if line.startswith("%"):
63
+ words = line.split()
64
+ key = words[0][1:]
65
+ if key == "FLAG":
66
+ section = words[1]
67
+ topology[section] = []
68
+ elif key.startswith("FORMAT"):
69
+ # interprets FORTRAN string format to parse section
70
+ format_str = key[7:-1]
71
+ len_field = int(re.split(r'\D+', format_str)[1])
72
+ val_type = re.search('(a|I|E)', format_str).group(0)
73
+ parse = parse_map[val_type]
74
+ else:
75
+ indices = list(range(0, len(line), len_field))
76
+ pieces = [line[i:i+len_field] for i in indices]
77
+ topology[section].extend(list(map(parse, pieces)))
78
+ name_str = """
79
+ NATOM NTYPES NBONH MBONA NTHETH
80
+ MTHETA NPHIH MPHIA NHPARM NPARM
81
+ NNB NRES NBONA NTHETA NPHIA
82
+ NUMBND NUMANG NPTRA NATYP NPHB
83
+ IFPERT NBPER NGPER NDPER MBPER
84
+ MGPER MDPER IFBOX NMXRS IFCAP
85
+ NUMEXTRA NCOPY """
86
+ for name, val in zip(name_str.split(), topology['POINTERS']):
87
+ topology[name] = val
88
+ return topology
89
+
90
+
91
+ def convert_to_pdb_atom_names(soup):
92
+ for res in soup.residues():
93
+ if res.type in data.solvent_res_types:
94
+ for a in res.atoms():
95
+ a.is_hetatm = True
96
+ for atom in res.atoms():
97
+ if atom.type[-1].isdigit() and atom.type[0] == "H":
98
+ new_atom_type = atom.type[-1] + atom.type[:-1]
99
+ res.change_atom_type(atom.type, new_atom_type)
100
+ if atom.res_type == data.solvent_res_types:
101
+ atom.is_hetatm = True
102
+
103
+
104
+ def soup_from_topology(topology):
105
+ """
106
+ Returns a Soup from a topology dictionary.
107
+ """
108
+ soup = pdbatoms.Soup()
109
+ chain_id = ''
110
+ n_res = topology['NRES']
111
+ n_atom = topology['NATOM']
112
+ for i_res in range(n_res):
113
+ res_type = topology['RESIDUE_LABEL'][i_res].strip()
114
+ if res_type == "WAT":
115
+ res_type = "HOH"
116
+ res = pdbatoms.Residue(res_type, chain_id, i_res+1)
117
+ soup.append_residue(res)
118
+ res = soup.residue(i_res)
119
+ i_atom_start = topology['RESIDUE_POINTER'][i_res] - 1
120
+ if i_res == n_res-1:
121
+ i_atom_end = n_atom
122
+ else:
123
+ i_atom_end = topology['RESIDUE_POINTER'][i_res+1] - 1
124
+ for i_atom in range(i_atom_start, i_atom_end):
125
+ atom = pdbatoms.Atom()
126
+ atom.vel = v3.vector()
127
+ atom.num = i_atom+1
128
+ atom.res_num = i_res+1
129
+ atom.res_type = res_type
130
+ atom.type = topology['ATOM_NAME'][i_atom].strip()
131
+ atom.mass = topology['MASS'][i_atom]
132
+ atom.charge = topology['CHARGE'][i_atom]/sqrt_of_k
133
+ atom.element = data.guess_element(
134
+ atom.res_type, atom.type)
135
+ soup.insert_atom(-1, atom)
136
+ convert_to_pdb_atom_names(soup)
137
+ if topology['IFBOX'] > 0:
138
+ # create dummy dimension to ensure box dimension recognized
139
+ soup.box_dimension_str = "1.000000 1.0000000 1.000000"
140
+ return soup
141
+
142
+
143
+ def soup_from_top(top):
144
+ return soup_from_topology(read_top(top))
145
+
146
+
147
+ def load_crd_or_rst_into_soup(soup, crd_or_rst):
148
+ """
149
+ Loads the coordinates and velocities of .crd or .rst into the soup.
150
+ """
151
+ f = open(crd_or_rst, "r")
152
+
153
+ f.readline() # skip first line
154
+ n_atom = int(f.readline().split()[0])
155
+
156
+ # calculate size of file based on field sizes
157
+ n_crd = n_atom * 3
158
+ n_line = n_crd / 6
159
+ if n_crd % 6 > 0:
160
+ n_line += 1
161
+
162
+ # read all the numbers in the coordinate section
163
+ line_list = [f.readline()[:-1] for i in range(0, n_line)]
164
+ s = "".join(line_list)
165
+ vals = [float(s[i:i+12]) for i in range(0, len(s), 12)]
166
+ if len(vals) != n_crd:
167
+ raise ValueError("Improper number of coordinates in rst file.")
168
+
169
+ # load numbers into soup object
170
+ for i, atom in enumerate(sorted(soup.atoms(), key=pdbatoms.cmp_atom)):
171
+ v3.set_vector(atom.pos, vals[i*3], vals[i*3+1], vals[i*3+2])
172
+
173
+ # if .rst file, then there will be velocity values
174
+ if crd_or_rst.endswith('.rst'):
175
+ line_list = [f.readline()[:-1] for i in range(0, n_line)]
176
+ s = "".join(line_list)
177
+ vals = [float(s[i:i+12]) for i in range(0, len(s), 12)]
178
+ if len(vals) != n_crd:
179
+ raise ValueError("Improper number of coordinates in rst file.")
180
+
181
+ # now convert amber velocities to angs/ps and load into soup
182
+ convert_vel_to_angs_per_ps = 20.455
183
+ for i, atom in enumerate(sorted(soup.atoms(), key=pdbatoms.cmp_atom)):
184
+ v3.set_vector(atom.vel, vals[i*3], vals[i*3+1], vals[i*3+2])
185
+ atom.vel = v3.scale(atom.vel, convert_vel_to_angs_per_ps)
186
+
187
+ f.close()
188
+
189
+
190
+ def soup_from_top_and_crd_or_rst(top, crd_or_rst):
191
+ """
192
+ Returns a Soup from AMBER .top and .crd/.rst files.
193
+ """
194
+ topology = read_top(top)
195
+ soup = soup_from_topology(topology)
196
+ load_crd_or_rst_into_soup(soup, crd_or_rst)
197
+ protein.find_chains(soup)
198
+ if topology['IFBOX'] > 0:
199
+ # if periodic cells are in .crd or .rst then save
200
+ # for later, if we need to write modified .crd or .rst
201
+ lines = open(crd_or_rst, "r").readlines()
202
+ lines = [l for l in reversed(lines) if l.strip()]
203
+ soup.box_dimension_str = lines[0].rstrip()
204
+ return soup
205
+
206
+
207
+ def write_soup_to_rst(soup, rst):
208
+ """
209
+ Writes a .rst file mainly for pulsing simulations.
210
+ """
211
+ f = open(rst, "w")
212
+
213
+ # header with number of atoms in first row
214
+ f.write(" ".ljust(80) + "\n")
215
+ f.write("%5d 0.0000000E+00\n" % len(soup.atoms()))
216
+
217
+ # write coordinates
218
+ i = 0
219
+ for atom in sorted(soup.atoms(), key=pdbatoms.cmp_atom):
220
+ x, y, z = atom.pos
221
+ f.write("%12.7f%12.7f%12.7f" % (x, y, z))
222
+ i += 1
223
+ if i % 2 == 0:
224
+ f.write("\n")
225
+ i = 0
226
+ if len(soup.atoms()) % 2 != 0:
227
+ f.write("\n")
228
+
229
+ # write velocities
230
+ i = 0
231
+ convert_to_amber_vel = 1.0 / 20.455
232
+ for atom in sorted(soup.atoms(), key=pdbatoms.cmp_atom):
233
+ x, y, z = atom.vel
234
+ vx = x * convert_to_amber_vel
235
+ vy = y * convert_to_amber_vel
236
+ vz = z * convert_to_amber_vel
237
+ f.write("%12.7f%12.7f%12.7f" % (vx, vy, vz))
238
+ i += 1
239
+ if i % 2 == 0:
240
+ f.write("\n")
241
+ if len(soup.atoms()) % 2 != 0:
242
+ f.write("\n")
243
+
244
+ # write box dimensions
245
+ if hasattr(soup, 'box_dimension_str'):
246
+ f.write(soup.box_dimension_str.rstrip() + "\n")
247
+
248
+ f.close()
249
+
250
+
251
+ # The following functions wrap the above functions into a
252
+ # standard API that does not explicitly reference AMBER
253
+
254
+
255
+ def expand_restart_files(basename):
256
+ """Returns expanded restart files based on basename"""
257
+ top = os.path.abspath(basename + '.top')
258
+ crds = os.path.abspath(basename + '.rst')
259
+ if not os.path.isfile(crds):
260
+ crds = os.path.abspath(basename + '.crd')
261
+ vels = ''
262
+ return top, crds, vels
263
+
264
+
265
+ def get_restart_files(basename):
266
+ """Returns restart files only if they exist"""
267
+ top, crds, vels = expand_restart_files(basename)
268
+ util.check_files(top, crds)
269
+ return top, crds, vels
270
+
271
+
272
+ def soup_from_restart_files(top, crds, vels, skip_solvent=False):
273
+ """Reads a Soup from restart files."""
274
+ return soup_from_top_and_crd_or_rst(top, crds)
275
+
276
+
277
+ def write_soup_to_crds_and_vels(soup, basename):
278
+ """From soup, writes out the coordinate/velocities, used for pulsing"""
279
+ write_soup_to_rst(soup, basename + '.rst')
280
+ return basename + '.rst', ''
281
+
282
+
283
+ def convert_restart_to_pdb(basename, pdb):
284
+ """Converts restart files with basename into PDB file"""
285
+ top, crds, vels = get_restart_files(basename)
286
+ soup = soup_from_restart_files(top, crds, vels)
287
+ soup.write_pdb(pdb)
288
+
289
+
290
+ # ##########################################################
291
+
292
+ # # 2. Generate restart files from PDB
293
+
294
+ # The restart files used for PDBREMIX assumes a consistent file naming.
295
+ # For a given basename `sim`, the files are:
296
+ # 1. topology file: sim.top
297
+ # 2. coordinate/velocity file: sim.crd or sim.rst
298
+
299
+ # To generate a topology file from the PDB file:
300
+ # - handles multiple protein chains
301
+ # - hydrogens are removed and then regenerated by AMBER
302
+ # - disulfide bonds are identified by PDBREMIX and explicitly encoded
303
+ # - charged residue protonation states are auto-detected
304
+ # - explicit water in cubic box with 10.0 angstrom buffer
305
+ # - counterions to neutralize the system
306
+ # - AMBER8: ff96 force-field
307
+ # - AMBER11: ff99SB force-field
308
+
309
+ # Binaries used to generate restart files:
310
+ # 1. tleap
311
+
312
+ # for charges: 18.312 = sqrt(332) where Columb's law E=332*q*q
313
+ sqrt_of_k = 18.2223
314
+
315
+
316
+ force_field_script = """
317
+ # leaprc to generate AMBER topology and coordinate
318
+
319
+ # load in amber force field
320
+ source %(amber_ff)s
321
+
322
+ # use AMBER6 PB radii as we will use igb=1, gbparm=2
323
+ set default PBradii mbondi
324
+ """
325
+
326
+ explicit_water_box_script = """
327
+ # add explicit waters
328
+ solvateBox pdb TIP3PBOX %(solvent_buffer)f iso
329
+ """
330
+
331
+ save_and_quit_script = """
332
+ # save topology and coordinates
333
+ saveAmberParm pdb %(top)s %(crd)s
334
+ quit
335
+ """
336
+
337
+
338
+ def disulfide_script_and_rename_cysteines(in_pdb, out_pdb):
339
+ """
340
+ Returns the tleap script for disulfide bonds in the in_pdb file.
341
+
342
+ This function opens in_pdb in a Soup, and searches for
343
+ CYS residues where the SG-SG distance < 3 angs. These residues
344
+ are then renamed to CYX and written to out_pdb. The disulfide bonds
345
+ are then returned in a .tleap script fragment.
346
+ """
347
+ soup = pdbatoms.Soup(in_pdb)
348
+ script = " # disulfide bonds\n"
349
+ n = len(soup.residues())
350
+ for i in range(n):
351
+ for j in range(i+1, n):
352
+ if soup.residue(i).type in 'CYS' and soup.residue(j).type in 'CYS':
353
+ p1 = soup.residue(i).atom('SG').pos
354
+ p2 = soup.residue(j).atom('SG').pos
355
+ if v3.distance(p1, p2) < 3.0:
356
+ soup.residue(i).set_type('CYX')
357
+ soup.residue(j).set_type('CYX')
358
+ script += "bond pdb.%d.SG pdb.%d.SG\n" % (i+1, j+1)
359
+ soup.write_pdb(out_pdb)
360
+ util.check_output(out_pdb)
361
+ return script
362
+
363
+
364
+ def run_tleap(
365
+ force_field, pdb, name, solvent_buffer=0.0, excess_charge=0):
366
+ """
367
+ Generates AMBER topology and coordinate files from PDB.
368
+
369
+ Depending on whether excess_charge is non-zero, will also generate
370
+ counterions. If solvent_buffer is non-zero, will generate explicit
371
+ waters, otherwise, no waters generated. No waters is used for
372
+ implicit solvent simulations.
373
+ """
374
+
375
+ util.check_output(pdb)
376
+
377
+ # Remove all but protein heavy atoms in a single clean conformation
378
+ tleap_pdb = name + '.clean.pdb'
379
+ pdbtext.clean_pdb(pdb, tleap_pdb)
380
+
381
+ # The restart files to be generated
382
+ top = name + '.top'
383
+ crd = name + '.crd'
384
+
385
+ # Dictionary to substitute into tleap scripts
386
+ params = {
387
+ 'top': top,
388
+ 'crd': crd,
389
+ 'pdb': tleap_pdb,
390
+ 'data_dir':data.data_dir,
391
+ 'solvent_buffer': solvent_buffer,
392
+ }
393
+
394
+ # use best force-field for the 2 versions of AMBER author has tested
395
+ if 'AMBER11' in force_field:
396
+ params['amber_ff'] = "leaprc.ff99SB"
397
+ elif 'AMBER14' in force_field:
398
+ params['amber_ff'] = "leaprc.ff14SB"
399
+ elif 'AMBER8' in force_field:
400
+ params['amber_ff'] = "leaprc.ff96"
401
+ else:
402
+ raise Exception("Don't know which version of AMBER(8|11|14) to use.")
403
+
404
+ # make the tleap input script
405
+ script = force_field_script
406
+ # check for a few non-standard residue that have been included
407
+ residues = [r.type for r in pdbatoms.Soup(tleap_pdb).residues()]
408
+ if 'PHD' in residues:
409
+ leaprc = open("%s/phd.leaprc" % data.data_dir).read()
410
+ script += leaprc
411
+ if 'ZNB' in residues:
412
+ leaprc = open("%s/znb.leaprc" % data.data_dir).read()
413
+ script += leaprc
414
+ script += "pdb = loadpdb %(pdb)s\n"
415
+ script += disulfide_script_and_rename_cysteines(tleap_pdb, tleap_pdb)
416
+ if 'GBSA' not in force_field:
417
+ # Add explicit waters as not GBSA implicit solvent
418
+ if excess_charge != 0:
419
+ # Add script to add counterions, must specify + or -
420
+ if excess_charge > 0:
421
+ script += "addions pdb Cl- 0\n"
422
+ else:
423
+ script += "addions pdb Na+ 0\n"
424
+ solvent_buffer = 10
425
+ params['solvent_buffer'] = solvent_buffer
426
+ script += explicit_water_box_script
427
+ script += save_and_quit_script
428
+ script = script % params
429
+
430
+ # Now write script to input file
431
+ tleap_in = name + ".tleap.in"
432
+ open(tleap_in, "w").write(script)
433
+
434
+ # Now run tleap with tleap_in
435
+ data.binary('tleap', "-f "+tleap_in, name+'.tleap')
436
+
437
+ # Check output is okay
438
+ if os.path.isfile('leap.log'):
439
+ os.rename('leap.log', name + '.tleap.log')
440
+ util.check_output(name+'.tleap.log', ['FATAL'])
441
+ util.check_output(top)
442
+ util.check_output(crd)
443
+
444
+ return top, crd
445
+
446
+
447
+ def pdb_to_top_and_crds(
448
+ force_field, pdb, name, solvent_buffer=0.0):
449
+ """
450
+ Converts a PDB file into AMBER topology and coordinate files,
451
+ and fully converted PDB file. These constitute the restart files
452
+ of an AMBER simulation.
453
+ """
454
+ # Generate topology files with explicitly zero excess_charge.
455
+ # We will then check if system is charged or not
456
+ top, crd = run_tleap(
457
+ force_field, pdb, name, solvent_buffer, 0)
458
+
459
+ # In implicit solvent, we don't need to worry so much about
460
+ # counterions so will skip counterion generation, otherwise
461
+ if 'GBSA' not in force_field:
462
+ # Get the charge of the system
463
+ charges = read_top(name+'.top')['CHARGE']
464
+ charge = int(round(sum(charges)/sqrt_of_k))
465
+
466
+ # If the system has an overall charge, rerun with excess_charge,
467
+ # which will for tleap to generate counterions
468
+ if charge != 0:
469
+ top, crd = run_tleap(
470
+ force_field, pdb, name, solvent_buffer, charge)
471
+
472
+ # make a reference PDB for generating restraints and viewing
473
+ convert_restart_to_pdb(name, name+'.pdb')
474
+ return top, crd
475
+
476
+
477
+ # ##########################################################
478
+
479
+ # # 3. Run simulations from restart files
480
+
481
+ # Simulation approach for implicit solvent:
482
+ # - optional positional constraints: 100 kcal/mol/angs**2
483
+ # - Langevin thermostat for constant temperature
484
+ # - Nose-Hoover barometer with flexible periodic box size
485
+
486
+ # Simulation approach for explict water:
487
+ # - cubic periodic box
488
+ # - optional positional restraints: 100 kcal/mol/angs**2
489
+ # - PME electrostatics on the periodic box
490
+ # - Langevin thermostat for constant temperature
491
+ # - Nose-Hoover barometer with flexible periodic box size
492
+
493
+ # Binaries used:
494
+ # 1. sander
495
+
496
+ # Files for trajectories:
497
+ # 1. coordinate trajectory: md.trj
498
+ # 2. velocitiy trajectory: md.vel.trj
499
+ # 3. restart coordinate/velocity: md.rst
500
+
501
+
502
+ minimization_parms = {
503
+ 'topology' : 'in.top',
504
+ 'input_crds' : 'in.crd',
505
+ 'output_basename' : 'min',
506
+ 'force_field': 'GBSA',
507
+ 'restraint_pdb': '',
508
+ 'restraint_force': 100.0,
509
+ 'n_step_minimization' : 100,
510
+ }
511
+
512
+ constant_energy_parms = {
513
+ 'topology' : 'in.top',
514
+ 'input_crds' : 'in.crd',
515
+ 'output_basename' : 'md',
516
+ 'force_field': 'GBSA',
517
+ 'restraint_pdb': '',
518
+ 'restraint_force': 100.0,
519
+ 'n_step_per_snapshot' : 5,
520
+ 'n_step_dynamics' : 1000,
521
+ }
522
+
523
+ langevin_thermometer_parms = {
524
+ 'topology' : 'in.top',
525
+ 'input_crds' : 'in.crd',
526
+ 'output_basename' : 'md',
527
+ 'force_field': 'GBSA',
528
+ 'restraint_pdb': '',
529
+ 'restraint_force': 100.0,
530
+ 'random_seed' : 2342,
531
+ 'temperature_thermometer' : 300.0,
532
+ 'temperature_initial_velocities': 0.0, # ignored if it is 0.0
533
+ 'n_step_per_snapshot' : 5,
534
+ 'n_step_dynamics' : 1000,
535
+ 'n_step_per_thermostat' : 100,
536
+ }
537
+
538
+
539
+ # frequent low energy calculation
540
+ sander_script = """
541
+ generated by amber.py
542
+ &cntrl
543
+ """
544
+
545
+
546
+ # no periodicity, generatlized born, and surface area terms
547
+ gbsa_script = " ntb = 0, igb = 2, gbsa = 1, cut = 12.0,"
548
+
549
+
550
+ # peridoicity/constant pressure, isotropic position scaling, no gb/sa
551
+ explicit_water_script = " ntb = 2, ntp = 1, igb = 0, gbsa = 0, cut = 8.0,"
552
+
553
+
554
+ # 10 steps of steepest descent then conjugate gradient for rest of steps
555
+ minimization_script = """
556
+ imin = 1, ntmin = 1, maxcyc = %(n_step_minimization)s, ncyc = 10,
557
+ """
558
+
559
+
560
+ dynamics_script = """
561
+ ntpr = %(n_step_per_snapshot)s, ntave = 0, ntwr = 500, iwrap = 0, ioutfm = 0,
562
+ ntwx = %(n_step_per_snapshot)s, ntwv = %(n_step_per_snapshot)s,
563
+ ntwe = %(n_step_per_snapshot)s,
564
+ nstlim = %(n_step_dynamics)s, nscm = 50, dt = 0.001,
565
+ nrespa = 1,
566
+ """
567
+
568
+ # langevin thermometer
569
+ thermostat_script = """
570
+ ntt = 3, gamma_ln = 5, temp0 = %(temperature_thermometer)s, vlimit = 0.0,
571
+ ig = %(random_seed)s, tempi = %(temperature_initial_velocities)s,
572
+ """
573
+
574
+
575
+ def make_sander_input_file(parms):
576
+ """
577
+ Make Sander input script based on parms dictionary info.
578
+ """
579
+ script = sander_script % parms
580
+
581
+ # all bonds to be simulated, no constraints
582
+ script += " ntf = 1, ntc = 1,\n"
583
+
584
+ # restraints are included
585
+ if parms['restraint_pdb']:
586
+ script += " ntr = 1,\n"
587
+
588
+ # To check if system has explicit solvent, see if the input
589
+ # coordinate file contains periodic box information. This
590
+ # requires a bit of heavy lifting to figure out.
591
+ soup = soup_from_top_and_crd_or_rst(
592
+ parms['topology'], parms['input_crds'])
593
+ has_periodic_box = hasattr(soup, 'box_dimension_str')
594
+ if has_periodic_box:
595
+ script += explicit_water_script
596
+ else:
597
+ script += gbsa_script
598
+
599
+ if 'n_step_minimization' in parms:
600
+ script += minimization_script
601
+
602
+ elif 'n_step_dynamics' in parms:
603
+ if parms['input_crds'].endswith('.rst'):
604
+ script += " ntx = 5, irest = 1,\n"
605
+ else:
606
+ script += " ntx = 1,\n"
607
+ script += dynamics_script
608
+ if 'temperature_thermometer' in parms:
609
+ script += thermostat_script
610
+
611
+ else:
612
+ raise Exception("Can't parse parameters to run")
613
+
614
+ script += "&end\n"
615
+
616
+ return script % parms
617
+
618
+
619
+ restraint_script = """FIND
620
+ * * S *
621
+ * * B *
622
+ * * 3 *
623
+ * * E *
624
+ SEARCH
625
+ """
626
+
627
+ def make_restraint_script(pdb, force=100.0):
628
+ """
629
+ Generates sander input fragment that specifies the atoms
630
+ that will be restrained.
631
+
632
+ The function reads a PDB file that was generated from the
633
+ topology functions above, and uses the B-factor field B>0 to
634
+ determine which atom is to be restrained. The atoms will
635
+ be restrained by a spring of force in kcal/mol/angs**2
636
+ """
637
+ util.check_output(pdb)
638
+ script = "Restrained atoms from %s\n" % pdb
639
+ script += "%s\n" % force
640
+ script += restraint_script
641
+ for i, atom in enumerate(pdbatoms.read_pdb(pdb)):
642
+ if atom.bfactor > 0.0:
643
+ script += "ATOM %d %d\n" % (i+1, i+1)
644
+ script += "END\n"
645
+ script += "END\n"
646
+ return script
647
+
648
+
649
+ def run(in_parms):
650
+ """
651
+ Run a AMBER simulations using the PDBREMIX in_parms dictionary.
652
+ """
653
+ parms = copy.deepcopy(in_parms)
654
+ basename = parms['output_basename']
655
+
656
+ # Copies across topology file
657
+ input_top = parms['topology']
658
+ util.check_files(input_top)
659
+ new_top = basename + '.top'
660
+ shutil.copy(input_top, new_top)
661
+
662
+ # Copies over coordinate/velocity files
663
+ input_crd = parms['input_crds']
664
+ util.check_files(input_crd)
665
+ if input_crd.endswith('.crd'):
666
+ new_crd = basename + '.in.crd'
667
+ else:
668
+ new_crd = basename + '.in.rst'
669
+ shutil.copy(input_crd, new_crd)
670
+
671
+ # Decide on type of output coordinate/velocity file
672
+ if 'n_step_minimization' in parms:
673
+ rst = basename + ".crd"
674
+ else:
675
+ rst = basename + ".rst"
676
+
677
+ # Construct the long list of arguments for sander
678
+ trj = basename + ".trj"
679
+ vel_trj = basename + ".vel.trj"
680
+ ene = basename + ".ene"
681
+ inf = basename + ".inf"
682
+ sander_out = basename + ".sander.out"
683
+ sander_in = basename + ".sander.in"
684
+ args = "-O -i %s -o %s -p %s -c %s -r %s -x %s -v %s -e %s -inf %s" \
685
+ % (sander_in, sander_out, new_top, new_crd, rst, trj, vel_trj, ene, inf)
686
+
687
+ # Make the input script
688
+ script = make_sander_input_file(parms)
689
+
690
+ # If positional restraints
691
+ if parms['restraint_pdb']:
692
+ # Generate the AMBER .crd file that stores the constrained coordinates
693
+ pdb = parms['restraint_pdb']
694
+ soup = pdbatoms.Soup(pdb)
695
+ ref_crd = basename + '.restraint.crd'
696
+ write_soup_to_rst(soup, ref_crd)
697
+ util.check_output(ref_crd)
698
+ # Add the restraints .crd to the SANDER arguments
699
+ args += " -ref %s" % ref_crd
700
+ # Add the restraint forces and atom indices to the SANDER input file
701
+ script += make_restraint_script(pdb, parms['restraint_force'])
702
+
703
+ open(sander_in, "w").write(script)
704
+
705
+ # Run the simulation
706
+ data.binary('sander', args, basename)
707
+
708
+ # Check if output is okay
709
+ util.check_output(sander_out, ['FATAL'])
710
+ top, crds, vels = get_restart_files(basename)
711
+ util.check_output(top)
712
+ util.check_output(crds)
713
+
714
+
715
+ # ##########################################################
716
+
717
+ # # 4. Read trajectories with some post-processing
718
+
719
+ # The units used in these files are:
720
+ # - positions: angstroms
721
+ # - velocities: angs/ps/20.455
722
+
723
+
724
+ class TrjReader:
725
+ """
726
+ Class to read AMBER .trj and .trj.gz files.
727
+
728
+ .trj files do not tell us how many atoms are in each frame, this
729
+ must be entered. box dimensions are auto-detected.
730
+
731
+ Attributes:
732
+ top (str) - name of .top file
733
+ trj (str) - name of .trj file
734
+ file (file) - file object to trajectory
735
+ pos_start_frame (int) - position of the beginning of frames
736
+ size_frame (int) - the size of the frame in bytes
737
+ n_atom (int) - number of atoms simulated
738
+ n_frame (int) - number of frames in trajectory
739
+ i_frame (int) - index of current frame
740
+ frame (array) - container of coordinates of current frame
741
+ is_box_dims (bool) - frame contains extra line for periodic box
742
+
743
+ Methods:
744
+ __init__ - initializes trajectory and loads 1st frame
745
+ load_frame(i) - loads the i frame
746
+ __getitem__ - returns the frame
747
+ save_to_crd - save current frame to a .crd file
748
+ __repr__ - string representation
749
+ """
750
+
751
+ def __init__(self, n_atom, trj):
752
+ self.n_atom = n_atom
753
+ self.trj = trj
754
+
755
+ # Since .trj is a text format, it can be readily gzip'd,
756
+ # so opening .trj.gz is a useful option to have.
757
+ if self.trj.split(".")[-1].strip().lower() == "gz":
758
+ self.file = gzip.GzipFile(self.trj, "r")
759
+ else:
760
+ self.file = open(self.trj, "r")
761
+
762
+ # only 1-line header, frames starts after this line
763
+ self.pos_start_frame = len(self.file.readline())
764
+
765
+ # get the size of all frames
766
+ self.file.seek(0, 2)
767
+ pos_eof = self.file.tell()
768
+ size_all_frames = pos_eof - self.pos_start_frame
769
+
770
+ # auto-detect n_frame
771
+ self.is_box_dims = False
772
+ self.size_frame = self.calc_size_frame(self.n_atom)
773
+ n_frame = size_all_frames / float(self.size_frame)
774
+ # check if n_frame is exact
775
+ if n_frame % 1.0 > 0.0:
776
+ # n_frame is not exact, check for box dimensions
777
+ self.size_frame = self.calc_size_frame(self.n_atom, True)
778
+ n_frame = size_all_frames / float(self.size_frame)
779
+ if n_frame % 1.0 != 0.0:
780
+ raise Exception('frames don\'t fit n_atom for ' + self.trj)
781
+ self.is_box_dims = True
782
+ self.n_frame = int(n_frame)
783
+
784
+ self.load_frame(0)
785
+
786
+ def calc_size_frame(self, n_crd, is_box_dims=False):
787
+ self.file.seek(self.pos_start_frame)
788
+ n_line = (3 * n_crd) / 10
789
+ if (3 * n_crd) % 10 > 0:
790
+ n_line += 1
791
+ if is_box_dims:
792
+ n_line += 1
793
+ size_frame = 0
794
+ for i in range(0, n_line):
795
+ size_frame += len(self.file.readline())
796
+ return size_frame
797
+
798
+ def load_frame(self, i_frame):
799
+ """
800
+ Loads the frame into self.frame, a list of 3*n_atom floats
801
+ """
802
+ # Check bounds
803
+ if i_frame < - 1*self.n_frame or i_frame >= self.n_frame:
804
+ raise IndexError
805
+ elif i_frame < 0:
806
+ i_frame = self.n_frame + i_frame
807
+
808
+ self.file.seek(self.pos_start_frame + i_frame*(self.size_frame))
809
+
810
+ # read frame as list of 3 floats
811
+ s = self.file.read(self.size_frame).replace('\n', '')
812
+ pieces = [s[i:i+8] for i in range(0, len(s), 8)]
813
+ vals = list(map(float, pieces))
814
+
815
+ # account for box dimensions
816
+ if self.is_box_dims:
817
+ # drop the box dimension values
818
+ vals = vals[:-3]
819
+ if len(vals) != 3*self.n_atom:
820
+ raise ValueError("Improper number of coordinates in frame.")
821
+
822
+ self.frame = vals
823
+ self.i_frame = i_frame
824
+
825
+ def __getitem__(self, i_frame):
826
+ """
827
+ Returns the container for coordinates of the i'th frame.
828
+ """
829
+ self.load_frame(i_frame)
830
+ return self.frame
831
+
832
+ def save_to_crd(self, crd):
833
+ """
834
+ Saves coordinates of current frame to an AMBER .crd file.
835
+ """
836
+ f = open(crd, "w")
837
+
838
+ coords = self.frame
839
+
840
+ f.write("ACE".ljust(80) + "\n")
841
+ f.write("%5d 0.0000000E+00\n" % (len(coords) // 3))
842
+
843
+ p = ["%12.7f" % x for x in coords]
844
+
845
+ n_val_per_line = 6
846
+ r = len(p) % n_val_per_line
847
+ if r > 0:
848
+ p.extend([""] * (n_val_per_line - r))
849
+
850
+ for i in range(0, len(p), n_val_per_line):
851
+ f.write("".join(p[i:i + n_val_per_line]) + "\n")
852
+
853
+ f.close()
854
+
855
+ def __repr__(self):
856
+ return "< Amber Coord file %s with %d frames of %d atoms >" % \
857
+ (self.trj, self.n_frame, self.n_atom)
858
+
859
+
860
+ class SoupTrajectory:
861
+ """
862
+ Class to provide common frame API with AMBER trajectories.
863
+ """
864
+ def __init__(self, soup, trj, vel_trj=''):
865
+ self.trj = trj
866
+ self.vel_trj = vel_trj
867
+ self.soup = soup
868
+ self.n_atom = len(soup.atoms())
869
+
870
+ self.trj_reader = TrjReader(self.n_atom, self.trj)
871
+ if self.vel_trj:
872
+ self.vel_trj_reader = TrjReader(self.n_atom, self.vel_trj)
873
+ else:
874
+ self.vel_trj_reader = None
875
+
876
+ self.n_frame = self.trj_reader.n_frame
877
+
878
+ def load_frame(self, i_frame):
879
+ # Load coordinates of soup with coordinates from self.trj_reader
880
+ crds = self.trj_reader[i_frame]
881
+ vels = self.vel_trj_reader[i_frame] if self.vel_trj_reader else None
882
+ atoms = self.soup.atoms()
883
+ for i in range(self.n_atom):
884
+ atom = atoms[i]
885
+ k = 3*i
886
+ v3.set_vector(atom.pos, crds[k], crds[k+1], crds[k+2])
887
+ if vels:
888
+ v3.set_vector(atom.vel, vels[k], vels[k+1], vels[k+2])
889
+ self.i_frame = self.trj_reader.i_frame
890
+
891
+
892
+ class Trajectory:
893
+ """
894
+ Class to interact with an AMBER trajctory using soup.
895
+
896
+ Attributes:
897
+ basename (str) - basename used to guess required files
898
+ n_frame (int) - number of frames in trajectory
899
+ i_frame (int) - index of current frame
900
+ soup (Soup) - Soup object holding current coordinates/velocities
901
+ Methods:
902
+ load_frame - loads new frame into soup
903
+ """
904
+
905
+ def __init__(self, basename):
906
+ self.basename = basename
907
+ self.top = basename + '.top'
908
+ self.soup = soup_from_top(self.top)
909
+ self.trj = basename + '.trj'
910
+ self.vel_trj = basename + '.vel.trj'
911
+ if not os.path.isfile(self.vel_trj):
912
+ self.vel_trj = ''
913
+ self.soup_trj = SoupTrajectory(self.soup, self.trj, self.vel_trj)
914
+ self.n_frame = self.soup_trj.n_frame
915
+ self.i_frame = 0
916
+ self.load_frame(0)
917
+
918
+ def load_frame(self, i_frame):
919
+ self.soup_trj.load_frame(i_frame)
920
+ self.i_frame = self.soup_trj.i_frame
921
+
922
+
923
+ def merge_amber_trajs(top, trajs, out_traj):
924
+ """
925
+ Given a list of traj filenames (trajs), merges them into one complete
926
+ trajectory (out_traj) using top to work out the number of atoms, and
927
+ hence the size of the frame of the trajectory.
928
+ """
929
+ # Get pos_start_frame and size_frame by opening one of the
930
+ # trajectories via trj_reader
931
+ topology = read_top(top)
932
+ trj_reader = TrjReader(topology['NATOM'], trajs[0])
933
+ pos_start_frame = trj_reader.pos_start_frame
934
+ size_frame = trj_reader.size_frame
935
+ del trj_reader
936
+
937
+ # Start the merged file by copying the first trajectory
938
+ shutil.copy(trajs[0], out_traj)
939
+
940
+ # Now open the merged file in appended form and add it to the
941
+ # merged file, one frame at a time
942
+ merge_traj_file = open(out_traj, "ab+")
943
+ for traj in trajs[1:]:
944
+ traj_file = open(traj, "rb")
945
+ traj_file.seek(-1, 2)
946
+ eof = traj_file.tell()
947
+ traj_file.seek(pos_start_frame)
948
+ while traj_file.tell() < eof:
949
+ merge_traj_file.write(traj_file.read(size_frame))
950
+ traj_file.close()
951
+ merge_traj_file.close()
952
+
953
+
954
+ def merge_trajectories(basename, traj_basenames):
955
+ """
956
+ Given a list of directories with partial trajectories in each directory
957
+ with the same basename for the md, will splice them together into one uber
958
+ simulation.
959
+ """
960
+ shutil.copy(traj_basenames[-1] + '.sander.in', basename + '.sander.in')
961
+ shutil.copy(traj_basenames[-1] + '.top', basename + '.top')
962
+ shutil.copy(traj_basenames[-1] + '.rst', basename + '.rst')
963
+
964
+ # merge energies of sims into one energy file
965
+ f = open(basename + '.energy', 'w')
966
+ f.write('[\n')
967
+ n_step = 0
968
+ time = 0.0
969
+ for traj_basename in traj_basenames:
970
+ energy_fname = traj_basename + '.energy'
971
+ if os.path.isfile(energy_fname):
972
+ blocks = eval(open(energy_fname).read())
973
+ else:
974
+ sander_out = traj_basename + '.sander.out'
975
+ blocks = read_dynamics_sander_out(sander_out)
976
+ for block in blocks:
977
+ block_n_step = int(block['NSTEP'])
978
+ block_time = float(block['TIME(PS)'])
979
+ block['NSTEP'] = str(block_n_step + n_step)
980
+ block['TIME(PS)'] = str(block_time + time)
981
+ f.write(str(block) + ',\n')
982
+ n_step = int(blocks[-1]['NSTEP'])
983
+ time = float(blocks[-1]['TIME(PS)'])
984
+ f.write(']\n')
985
+ f.close()
986
+
987
+ trajs = [b + '.trj' for b in traj_basenames]
988
+ merge_amber_trajs(basename + '.top', trajs, basename + '.trj')
989
+
990
+ vels = [b + '.vel.trj' for b in traj_basenames]
991
+ merge_amber_trajs(basename + '.top', vels, basename + '.vel.trj')
992
+
993
+
994
+ def convert_crd_to_trj_frame(crd):
995
+ """
996
+ Returns a string that corresponds to a frame in a .trj from a
997
+ .crd file. This is for writing to .trj files.
998
+ """
999
+ vals = [float(word) for word in util.words_in_file(crd)[1:]]
1000
+ lines = []
1001
+ line = ''
1002
+ for i in range(0, len(vals)):
1003
+ line += "%8.3f" % vals[i]
1004
+ if (i % 10) == 9:
1005
+ lines.append(line)
1006
+ line = ''
1007
+ if line:
1008
+ lines.append(line)
1009
+ return '\n'.join(lines) + '\n'
1010
+
1011
+
1012
+ def read_dynamics_sander_out(sander_out):
1013
+ """
1014
+ Returns a list of dictionaries containing energy values
1015
+ from sander out file for molecular dynamics.
1016
+ """
1017
+ results = []
1018
+ block_dict = {}
1019
+ is_header = True
1020
+ for line in open(sander_out):
1021
+ if is_header:
1022
+ if '4' in line and 'RESULTS' in line:
1023
+ is_header = False
1024
+ continue
1025
+ if 'A V E R A G E S' in line:
1026
+ # End of time blocks
1027
+ break
1028
+ if line.startswith('|'):
1029
+ continue
1030
+ if '----' in line:
1031
+ # New block: save last block
1032
+ if block_dict:
1033
+ results.append(block_dict.copy())
1034
+ else:
1035
+ words = line.split()
1036
+ for i in range(len(words)):
1037
+ if words[i] == "=":
1038
+ key = words[i-1].strip()
1039
+ val = words[i+1]
1040
+ if key == 'NSTEP':
1041
+ block_dict[key] = int(val)
1042
+ else:
1043
+ block_dict[key] = float(val)
1044
+ return results
1045
+
1046
+
1047
+ def read_minimization_sander_out(sander_out):
1048
+ """
1049
+ Returns a list of dictionaries containing energy values
1050
+ from sander out file for minimization steps.
1051
+ """
1052
+ results = []
1053
+ block_dict = {}
1054
+ lines = open(sander_out).readlines()
1055
+ is_results = False
1056
+ for i, line in enumerate(lines):
1057
+ if not is_results:
1058
+ if '4' in line and 'RESULTS' in line:
1059
+ is_results = True
1060
+ continue
1061
+ if 'NSTEP' in line and 'ENERGY' in line:
1062
+ if block_dict:
1063
+ results.append(block_dict.copy())
1064
+ words = lines[i+1].split()
1065
+ block_dict['NSTEP'] = int(words[0])
1066
+ block_dict['ENERGY'] = float(words[1])
1067
+ for line in lines[i+3:i+6]:
1068
+ pieces = line[:25], line[25:50], line[50:]
1069
+ for piece in pieces:
1070
+ key, value = piece.split('=')
1071
+ block_dict[key.strip()] = float(value)
1072
+ return results
1073
+
1074
+
1075
+ def calculate_energy(top, crd):
1076
+ """
1077
+ Returns potential energy of top and crd by running sander
1078
+ and parsing the sander output.
1079
+ """
1080
+ top = os.path.abspath(top)
1081
+ crd = os.path.abspath(crd)
1082
+
1083
+ util.goto_dir('energy-temp')
1084
+
1085
+ parms = minimization_parms.copy()
1086
+ parms.extend({
1087
+ 'topology': top,
1088
+ 'input_crds': crd,
1089
+ 'output_basename': 'energy',
1090
+ 'n_step_minimization': 0,
1091
+ 'n_step_steepest_descent': 0})
1092
+ run(parms)
1093
+
1094
+ blocks = read_minimization_sander_out('energy.sander.out')
1095
+
1096
+ util.goto_dir('..')
1097
+ util.clean_fname('energy-temp')
1098
+
1099
+ return blocks[0]['ENERGY']
1100
+
1101
+
1102
+
1103
+