packmol-memgen-minimal 1.1.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- packmol_memgen/__init__.py +2 -0
- packmol_memgen/__version__.py +34 -0
- packmol_memgen/data/LICENSE.Apache-2.0 +201 -0
- packmol_memgen/data/extra_solvents.lib +789 -0
- packmol_memgen/data/frcmod.lipid_ext +97 -0
- packmol_memgen/data/frcmod.solvents +129 -0
- packmol_memgen/data/insane_lipids.txt +138 -0
- packmol_memgen/data/insane_solvents.txt +45 -0
- packmol_memgen/data/leaprc.extra_solvents +42 -0
- packmol_memgen/data/leaprc.lipid_ext +48 -0
- packmol_memgen/data/lipid_ext.lib +12312 -0
- packmol_memgen/data/martini_v3.0.0.itp +356605 -0
- packmol_memgen/data/memgen.parm +4082 -0
- packmol_memgen/data/pdbs.tar.gz +0 -0
- packmol_memgen/data/solvent.parm +14 -0
- packmol_memgen/example/example.sh +31 -0
- packmol_memgen/lib/__init__.py +0 -0
- packmol_memgen/lib/amber.py +77 -0
- packmol_memgen/lib/charmmlipid2amber/__init__.py +0 -0
- packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.csv +7164 -0
- packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.py +225 -0
- packmol_memgen/lib/pdbremix/LICENSE +21 -0
- packmol_memgen/lib/pdbremix/__init__.py +0 -0
- packmol_memgen/lib/pdbremix/_version.py +1 -0
- packmol_memgen/lib/pdbremix/amber.py +1103 -0
- packmol_memgen/lib/pdbremix/asa.py +227 -0
- packmol_memgen/lib/pdbremix/data/aminoacid.pdb +334 -0
- packmol_memgen/lib/pdbremix/data/binaries.json +26 -0
- packmol_memgen/lib/pdbremix/data/charmm22.parameter +2250 -0
- packmol_memgen/lib/pdbremix/data/charmm22.topology +1635 -0
- packmol_memgen/lib/pdbremix/data/color_b.py +682 -0
- packmol_memgen/lib/pdbremix/data/hin.lib +130 -0
- packmol_memgen/lib/pdbremix/data/hydroxide.lib +88 -0
- packmol_memgen/lib/pdbremix/data/make_chi.py +92 -0
- packmol_memgen/lib/pdbremix/data/opls.parameter +1108 -0
- packmol_memgen/lib/pdbremix/data/opls.topology +1869 -0
- packmol_memgen/lib/pdbremix/data/phd.frcmod +82 -0
- packmol_memgen/lib/pdbremix/data/phd.leaprc +4 -0
- packmol_memgen/lib/pdbremix/data/phd.prepin +35 -0
- packmol_memgen/lib/pdbremix/data/template.pdb +334 -0
- packmol_memgen/lib/pdbremix/data/znb.frcmod +24 -0
- packmol_memgen/lib/pdbremix/data/znb.leaprc +7 -0
- packmol_memgen/lib/pdbremix/data/znb.lib +69 -0
- packmol_memgen/lib/pdbremix/data.py +264 -0
- packmol_memgen/lib/pdbremix/fetch.py +102 -0
- packmol_memgen/lib/pdbremix/force.py +627 -0
- packmol_memgen/lib/pdbremix/gromacs.py +978 -0
- packmol_memgen/lib/pdbremix/lib/__init__.py +0 -0
- packmol_memgen/lib/pdbremix/lib/docopt.py +579 -0
- packmol_memgen/lib/pdbremix/lib/pyqcprot.py +305 -0
- packmol_memgen/lib/pdbremix/namd.py +1078 -0
- packmol_memgen/lib/pdbremix/pdbatoms.py +543 -0
- packmol_memgen/lib/pdbremix/pdbtext.py +120 -0
- packmol_memgen/lib/pdbremix/protein.py +311 -0
- packmol_memgen/lib/pdbremix/pymol.py +480 -0
- packmol_memgen/lib/pdbremix/rmsd.py +203 -0
- packmol_memgen/lib/pdbremix/simulate.py +420 -0
- packmol_memgen/lib/pdbremix/spacehash.py +73 -0
- packmol_memgen/lib/pdbremix/trajectory.py +286 -0
- packmol_memgen/lib/pdbremix/util.py +273 -0
- packmol_memgen/lib/pdbremix/v3.py +16 -0
- packmol_memgen/lib/pdbremix/v3array.py +482 -0
- packmol_memgen/lib/pdbremix/v3numpy.py +350 -0
- packmol_memgen/lib/pdbremix/volume.py +155 -0
- packmol_memgen/lib/utils.py +1017 -0
- packmol_memgen/main.py +2827 -0
- packmol_memgen_minimal-1.1.16.dist-info/METADATA +664 -0
- packmol_memgen_minimal-1.1.16.dist-info/RECORD +71 -0
- packmol_memgen_minimal-1.1.16.dist-info/WHEEL +4 -0
- packmol_memgen_minimal-1.1.16.dist-info/entry_points.txt +2 -0
- packmol_memgen_minimal-1.1.16.dist-info/licenses/LICENSE +338 -0
|
@@ -0,0 +1,1017 @@
|
|
|
1
|
+
#!/usr/bin/python
|
|
2
|
+
|
|
3
|
+
from __future__ import print_function
|
|
4
|
+
import os, sys, math, string, copy, random, shlex, subprocess, time
|
|
5
|
+
import numpy as np
|
|
6
|
+
import logging
|
|
7
|
+
import contextlib
|
|
8
|
+
import warnings
|
|
9
|
+
from scipy import integrate
|
|
10
|
+
from .pdbremix import data
|
|
11
|
+
import tempfile
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger("pmmg_log")
|
|
14
|
+
|
|
15
|
+
#Load pdb2pqr functions if available
|
|
16
|
+
try:
|
|
17
|
+
# from pdb2pqr.main import main_driver as pdb2pqr
|
|
18
|
+
# from pdb2pqr.main import build_main_parser as pdb2pqr_args
|
|
19
|
+
import shutil
|
|
20
|
+
pdb2pqr = shutil.which("pdb2pqr30") or shutil.which("pdb2pqr")
|
|
21
|
+
if not pdb2pqr and shutil.which("uv"):
|
|
22
|
+
pdb2pqr = "uv run pdb2pqr"
|
|
23
|
+
except:
|
|
24
|
+
logger.debug("PDB2PQR not available. Protonation with --pdb2pqr will not be available")
|
|
25
|
+
pdb2pqr = False
|
|
26
|
+
|
|
27
|
+
VCH = 21.65 # A^3
|
|
28
|
+
VCH2 = 27.03 # A^3
|
|
29
|
+
avogadro = 6.02214086*10**23
|
|
30
|
+
residues = {"CYS","CYX","CYM","MET","HIS","HSD","HIE","HID","HIP","HSE","SER","GLN","ASP","ASH","GLU","GLH","TYR","THR","ALA","LEU","ILE","PHE","TRP","ARG","ASN","LYS","LYN","VAL","PRO","GLY"}
|
|
31
|
+
cgatoms = {"CA","CB","C","N","O"}
|
|
32
|
+
charged = {"ASP":-1,"GLU":-1,"LYS":1,"ARG":1,"HIP":1,"Cl-":-1,"MG":2,"Na+":1,"CA":2,"OHE":-0.308100,
|
|
33
|
+
"A":-1,"A5":-0.3081,"A3":-0.6919,"DA":-1,"DA5":-0.3079,"DA3":-0.6921,
|
|
34
|
+
"C":-1,"C5":-0.3081,"C3":-0.6919,"DC":-1,"DC5":-0.3079,"DC3":-0.6921,
|
|
35
|
+
"G":-1,"G5":-0.3081,"G3":-0.6919,"DG":-1,"DG5":-0.3079,"DG3":-0.6921,
|
|
36
|
+
"U":-1,"U5":-0.3081,"U3":-0.6919,"DT":-1,"DT5":-0.3079,"DT3":-0.6921,
|
|
37
|
+
"PTR":-2,"SEP":-2,"TPO":-2,"Y1P":-1,"S1P":-1,"T1P":-1,"H1D": 0,"H2D":-1,"H1E": 0,"H2E":-1,
|
|
38
|
+
"NME":1,"ACE":-1} #These two are actually neutral, but by being added, the opposite terminal end charge is not neutralized. Should work as long as no custom terminal ends or protein constructs are used.
|
|
39
|
+
|
|
40
|
+
tails = {"LAL","MY","PA","ST","OL","AR","DHA","SA"}
|
|
41
|
+
|
|
42
|
+
sterols_PI = {"CHL1","ERG","CAM","SIT","STI","PI"}
|
|
43
|
+
sterol_ring_probes = [["C1","C2","C3","C4","C5","C10"],["C5","C6","C7","C8","C9","C10"],["C8","C9","C11","C12","C13","C14"],["C13","C14","C15","C16","C17"]]
|
|
44
|
+
PI_ring_probe = ["C31","C32","C33","C34","C35","C36"]
|
|
45
|
+
|
|
46
|
+
#masses = {"C": 12, "S": 32, "O": 16, "H": 1, "N": 14}
|
|
47
|
+
|
|
48
|
+
_HY36_DIGITS_UPPER = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
49
|
+
_HY36_DIGITS_LOWER = "0123456789abcdefghijklmnopqrstuvwxyz"
|
|
50
|
+
|
|
51
|
+
_XSPONGE_ION_RENAMES = {
|
|
52
|
+
"LI": "LI",
|
|
53
|
+
"Na+": "NA",
|
|
54
|
+
"K+": "K",
|
|
55
|
+
"RB": "RB",
|
|
56
|
+
"CS": "CS",
|
|
57
|
+
"F": "F",
|
|
58
|
+
"Cl-": "CL",
|
|
59
|
+
"BR": "BR",
|
|
60
|
+
"IOD": "I",
|
|
61
|
+
"I-": "I",
|
|
62
|
+
"AG": "AG",
|
|
63
|
+
"CU1": "CU",
|
|
64
|
+
"TL": "TL",
|
|
65
|
+
"Be": "BE2",
|
|
66
|
+
"CU": "CU2",
|
|
67
|
+
"NI": "NI2",
|
|
68
|
+
"PT": "PT2",
|
|
69
|
+
"ZN": "ZN2",
|
|
70
|
+
"CO": "CO2",
|
|
71
|
+
"PD": "PD2",
|
|
72
|
+
"Ag": "AG2",
|
|
73
|
+
"Cr": "CR2",
|
|
74
|
+
"FE2": "FE2",
|
|
75
|
+
"MG": "MG2",
|
|
76
|
+
"V2+": "V2",
|
|
77
|
+
"MN": "MN2",
|
|
78
|
+
"HG": "HG2",
|
|
79
|
+
"CD": "CD2",
|
|
80
|
+
"YB2": "YB2",
|
|
81
|
+
"CA": "CA2",
|
|
82
|
+
"Sn": "SN2",
|
|
83
|
+
"PB": "PB2",
|
|
84
|
+
"EU": "EU2",
|
|
85
|
+
"SR": "SR2",
|
|
86
|
+
"Sm": "SM2",
|
|
87
|
+
"BA": "BA2",
|
|
88
|
+
"Ra": "RA2",
|
|
89
|
+
"AL": "AL3",
|
|
90
|
+
"FE": "FE3",
|
|
91
|
+
"CR": "CR3",
|
|
92
|
+
"IN": "IN3",
|
|
93
|
+
"Tl": "TL3",
|
|
94
|
+
"Y": "Y3",
|
|
95
|
+
"LA": "LA3",
|
|
96
|
+
"CE": "CE3",
|
|
97
|
+
"PR": "PR3",
|
|
98
|
+
"Nd": "ND3",
|
|
99
|
+
"SM": "SM3",
|
|
100
|
+
"EU3": "EU3",
|
|
101
|
+
"GD3": "GD3",
|
|
102
|
+
"TB": "TB3",
|
|
103
|
+
"Dy": "DY3",
|
|
104
|
+
"Er": "ER3",
|
|
105
|
+
"Tm": "TM3",
|
|
106
|
+
"LU": "LU3",
|
|
107
|
+
"Hf": "HF4",
|
|
108
|
+
"Zr": "ZR4",
|
|
109
|
+
"Ce": "CE4",
|
|
110
|
+
"U4+": "U4",
|
|
111
|
+
"Pu": "PU4",
|
|
112
|
+
"Th": "TH4",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
def _base36_encode(value, width, digits):
|
|
116
|
+
out = []
|
|
117
|
+
for _ in range(width):
|
|
118
|
+
value, rem = divmod(value, 36)
|
|
119
|
+
out.append(digits[rem])
|
|
120
|
+
if value:
|
|
121
|
+
raise ValueError("Value exceeds width for base-36 encoding")
|
|
122
|
+
return "".join(reversed(out))
|
|
123
|
+
|
|
124
|
+
def hy36encode(width, value):
|
|
125
|
+
"""Encode integer using PDB hybrid-36 for fixed-width fields."""
|
|
126
|
+
if value < 0:
|
|
127
|
+
raise ValueError("Hybrid-36 encoding does not support negative values")
|
|
128
|
+
decimal_limit = 10**width
|
|
129
|
+
if value < decimal_limit:
|
|
130
|
+
return f"{value:>{width}d}"
|
|
131
|
+
range_per_case = 26 * (36**(width - 1))
|
|
132
|
+
base_offset = 10 * (36**(width - 1))
|
|
133
|
+
value -= decimal_limit
|
|
134
|
+
if value < range_per_case:
|
|
135
|
+
return _base36_encode(base_offset + value, width, _HY36_DIGITS_UPPER)
|
|
136
|
+
value -= range_per_case
|
|
137
|
+
if value < range_per_case:
|
|
138
|
+
return _base36_encode(base_offset + value, width, _HY36_DIGITS_LOWER)
|
|
139
|
+
raise ValueError("Value exceeds hybrid-36 representable range")
|
|
140
|
+
|
|
141
|
+
def _format_pdb_int(value, width, mode):
|
|
142
|
+
if mode == "hy36":
|
|
143
|
+
return hy36encode(width, value)
|
|
144
|
+
if mode == "hex":
|
|
145
|
+
return f"{value:>{width}X}"
|
|
146
|
+
if mode == "decimal":
|
|
147
|
+
return f"{value:>{width}d}"
|
|
148
|
+
raise ValueError("Unknown format mode: %s" % mode)
|
|
149
|
+
|
|
150
|
+
def convert_pdb_indices_to_hybrid36(
|
|
151
|
+
pdbfile,
|
|
152
|
+
outfile=None,
|
|
153
|
+
atom_base=10,
|
|
154
|
+
res_base=10,
|
|
155
|
+
):
|
|
156
|
+
"""Convert atom serials and residue numbers to hybrid-36 in a PDB file."""
|
|
157
|
+
if outfile is None:
|
|
158
|
+
outfile = pdbfile
|
|
159
|
+
dir_name = os.path.dirname(outfile) or "."
|
|
160
|
+
fd, tmp_path = tempfile.mkstemp(prefix="pdb_hy36_", suffix=".tmp", dir=dir_name)
|
|
161
|
+
try:
|
|
162
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fout, \
|
|
163
|
+
open(pdbfile, "r", encoding="utf-8") as fin:
|
|
164
|
+
for line in fin:
|
|
165
|
+
if line.startswith(("ATOM", "HETATM", "ANISOU", "TER")):
|
|
166
|
+
raw = line.rstrip("\n")
|
|
167
|
+
if len(raw) < 26:
|
|
168
|
+
raw = raw.ljust(26)
|
|
169
|
+
serial_field = raw[6:11].strip()
|
|
170
|
+
resseq_field = raw[22:26].strip()
|
|
171
|
+
if serial_field and serial_field != "*****":
|
|
172
|
+
try:
|
|
173
|
+
serial_num = int(serial_field, atom_base)
|
|
174
|
+
serial = hy36encode(5, serial_num)
|
|
175
|
+
raw = raw[:6] + serial + raw[11:]
|
|
176
|
+
except ValueError:
|
|
177
|
+
logger.warning("Failed to parse atom serial '%s' in %s", serial_field, pdbfile)
|
|
178
|
+
if resseq_field:
|
|
179
|
+
try:
|
|
180
|
+
resseq_num = int(resseq_field, res_base)
|
|
181
|
+
resseq = hy36encode(4, resseq_num)
|
|
182
|
+
raw = raw[:22] + resseq + raw[26:]
|
|
183
|
+
except ValueError:
|
|
184
|
+
logger.warning("Failed to parse residue id '%s' in %s", resseq_field, pdbfile)
|
|
185
|
+
fout.write(raw + "\n")
|
|
186
|
+
else:
|
|
187
|
+
fout.write(line)
|
|
188
|
+
os.replace(tmp_path, outfile)
|
|
189
|
+
finally:
|
|
190
|
+
if os.path.exists(tmp_path):
|
|
191
|
+
os.remove(tmp_path)
|
|
192
|
+
|
|
193
|
+
def apply_xponge_ion_names(pdbfile, outfile=None):
|
|
194
|
+
if outfile is None:
|
|
195
|
+
outfile = pdbfile
|
|
196
|
+
dir_name = os.path.dirname(outfile) or "."
|
|
197
|
+
fd, tmp_path = tempfile.mkstemp(prefix="pdb_xponge_", suffix=".tmp", dir=dir_name)
|
|
198
|
+
try:
|
|
199
|
+
with os.fdopen(fd, "w", encoding="utf-8") as fout, \
|
|
200
|
+
open(pdbfile, "r", encoding="utf-8") as fin:
|
|
201
|
+
for line in fin:
|
|
202
|
+
if line.startswith(("ATOM", "HETATM")):
|
|
203
|
+
raw = line.rstrip("\n")
|
|
204
|
+
if len(raw) < 80:
|
|
205
|
+
raw = raw.ljust(80)
|
|
206
|
+
resname = raw[17:20].strip()
|
|
207
|
+
if resname in _XSPONGE_ION_RENAMES:
|
|
208
|
+
new_name = _XSPONGE_ION_RENAMES[resname]
|
|
209
|
+
atom_field = f"{new_name:>4}"
|
|
210
|
+
res_field = f"{new_name:>3}"
|
|
211
|
+
elem = "".join(ch for ch in new_name if ch.isalpha()).upper()
|
|
212
|
+
elem_field = f"{elem:>2}"[:2]
|
|
213
|
+
raw = raw[:12] + atom_field + raw[16:]
|
|
214
|
+
raw = raw[:17] + res_field + raw[20:]
|
|
215
|
+
raw = raw[:76] + elem_field + raw[78:]
|
|
216
|
+
fout.write(raw + "\n")
|
|
217
|
+
else:
|
|
218
|
+
fout.write(line)
|
|
219
|
+
os.replace(tmp_path, outfile)
|
|
220
|
+
finally:
|
|
221
|
+
if os.path.exists(tmp_path):
|
|
222
|
+
os.remove(tmp_path)
|
|
223
|
+
|
|
224
|
+
def pdb2pqr_protonate(pdb,overwrite=False,ffout='AMBER',pH=7.0, output_dir=None):
|
|
225
|
+
if not pdb2pqr:
|
|
226
|
+
logger.critical("PDB2PQR module was not found. Use a different method to protonate your system")
|
|
227
|
+
exit()
|
|
228
|
+
stem = os.path.splitext(os.path.basename(pdb))[0]
|
|
229
|
+
base_dir = output_dir if output_dir else (os.path.dirname(pdb) or ".")
|
|
230
|
+
output_pqr = os.path.join(base_dir, stem + "_H.pqr")
|
|
231
|
+
output_pdb = os.path.join(base_dir, stem + "_H.pdb")
|
|
232
|
+
if os.path.exists(output_pdb) and os.path.exists(output_pqr) and not overwrite:
|
|
233
|
+
return output_pdb
|
|
234
|
+
#As pdb2pqr call logging.basicConfig in the main function, it disrupts the logging setup. Calling in os.system to avoid issues
|
|
235
|
+
# with open("pdb2pqr.log", "w") as f, contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):
|
|
236
|
+
# pdb2pqr_parser = pdb2pqr_args()
|
|
237
|
+
# args = pdb2pqr_parser.parse_args(['--pdb-output='+output_pdb,'--ff=PARSE','--ffout=AMBER','--with-ph='+str(pH),pdb,output_pqr])
|
|
238
|
+
# pdb2pqr(args)
|
|
239
|
+
log_path = os.path.join(os.path.dirname(output_pdb) or ".", "pdb2pqr.log")
|
|
240
|
+
cmd = shlex.split(pdb2pqr) + [
|
|
241
|
+
'--pdb-output='+output_pdb,
|
|
242
|
+
'--ff=PARSE',
|
|
243
|
+
'--ffout=AMBER',
|
|
244
|
+
'--titration-state-method=propka',
|
|
245
|
+
'--with-ph='+str(pH),
|
|
246
|
+
pdb,
|
|
247
|
+
output_pqr,
|
|
248
|
+
]
|
|
249
|
+
with open(log_path, "w") as log_handle:
|
|
250
|
+
result = subprocess.run(cmd, stdout=log_handle, stderr=log_handle)
|
|
251
|
+
if result.returncode != 0:
|
|
252
|
+
logger.critical("CRITICAL:\n PDB2PQR failed! Check %s", log_path)
|
|
253
|
+
exit()
|
|
254
|
+
for _ in range(5):
|
|
255
|
+
if os.path.exists(output_pdb) and os.stat(output_pdb).st_size > 0:
|
|
256
|
+
break
|
|
257
|
+
time.sleep(0.2)
|
|
258
|
+
if not os.path.exists(output_pdb) or os.stat(output_pdb).st_size == 0:
|
|
259
|
+
logger.critical("CRITICAL:\n PDB2PQR did not create %s. Check %s and input paths.", output_pdb, log_path)
|
|
260
|
+
exit()
|
|
261
|
+
return output_pdb
|
|
262
|
+
|
|
263
|
+
def estimated_density(MW):
|
|
264
|
+
density = 1.41 + 0.145*math.exp(float(-MW)/13000) #Protein Sci. 2004 Oct; 13(10):2825-2828
|
|
265
|
+
return density
|
|
266
|
+
|
|
267
|
+
def distribute_integer(integer, fracs):
|
|
268
|
+
dist = []
|
|
269
|
+
tot = sum(fracs)
|
|
270
|
+
for frac in fracs:
|
|
271
|
+
d = int(round(integer*frac/tot))
|
|
272
|
+
dist.append(d)
|
|
273
|
+
tot -= frac
|
|
274
|
+
integer -= d
|
|
275
|
+
return dist
|
|
276
|
+
|
|
277
|
+
def rotation_matrix(axis, theta):
|
|
278
|
+
"""
|
|
279
|
+
Return the rotation matrix associated with counterclockwise rotation about
|
|
280
|
+
the given axis by theta radians.
|
|
281
|
+
"""
|
|
282
|
+
axis = np.asarray(axis)
|
|
283
|
+
axis = axis/math.sqrt(np.dot(axis, axis))
|
|
284
|
+
a = math.cos(theta/2.0)
|
|
285
|
+
b, c, d = -axis*math.sin(theta/2.0)
|
|
286
|
+
aa, bb, cc, dd = a*a, b*b, c*c, d*d
|
|
287
|
+
bc, ad, ac, ab, bd, cd = b*c, a*d, a*c, a*b, b*d, c*d
|
|
288
|
+
return np.array([[aa+bb-cc-dd, 2*(bc+ad), 2*(bd-ac)],
|
|
289
|
+
[2*(bc-ad), aa+cc-bb-dd, 2*(cd+ab)],
|
|
290
|
+
[2*(bd+ac), 2*(cd-ab), aa+dd-bb-cc]])
|
|
291
|
+
|
|
292
|
+
def vector_angle(vec1,vec2, ref=[1,1,0]):
|
|
293
|
+
cross = np.cross(vec1,vec2)
|
|
294
|
+
dot = np.dot(vec1,vec2)
|
|
295
|
+
dot /= np.linalg.norm(vec1)*np.linalg.norm(vec2)
|
|
296
|
+
ori = np.dot(cross, ref)
|
|
297
|
+
return np.arccos(dot)
|
|
298
|
+
|
|
299
|
+
def align_vectors(vec1,vec2, R=None):
|
|
300
|
+
if np.allclose(vec1,np.array([0,0,0])):
|
|
301
|
+
return np.array([0,0,0])
|
|
302
|
+
angle = vector_angle(vec1,vec2)
|
|
303
|
+
if R is None:
|
|
304
|
+
R = rotation_matrix(cross,angle)
|
|
305
|
+
return np.matmul(vec1,R)
|
|
306
|
+
|
|
307
|
+
def rotate_pdb(pdb_ori,tip_num,pivot_num, ref=[0,0,1],randomize=False,randomize_vec=[0,0,1]):
|
|
308
|
+
pdb = copy.deepcopy(pdb_ori)
|
|
309
|
+
tip = []
|
|
310
|
+
pivot = []
|
|
311
|
+
for res in pdb:
|
|
312
|
+
for atom in pdb[res]:
|
|
313
|
+
if isinstance(tip_num, list):
|
|
314
|
+
if atom[1] in tip_num:
|
|
315
|
+
tip.append(pdb[res][atom])
|
|
316
|
+
else:
|
|
317
|
+
if atom[1] == tip_num:
|
|
318
|
+
tip = pdb[res][atom]
|
|
319
|
+
if isinstance(pivot_num,list):
|
|
320
|
+
if atom[1] in pivot_num:
|
|
321
|
+
pivot.append(pdb[res][atom])
|
|
322
|
+
else:
|
|
323
|
+
if atom[1] == pivot_num:
|
|
324
|
+
pivot = pdb[res][atom]
|
|
325
|
+
if isinstance(tip_num, list):
|
|
326
|
+
tip = np.mean(tip,axis=0)
|
|
327
|
+
if isinstance(pivot_num,list):
|
|
328
|
+
pivot = np.mean(pivot,axis=0)
|
|
329
|
+
angle = vector_angle(tip-pivot,np.array(ref))
|
|
330
|
+
axis = np.cross(tip-pivot,np.array(ref)) # vector perpendicular to v1 v2 plane
|
|
331
|
+
R = rotation_matrix(axis,angle)
|
|
332
|
+
for res in pdb:
|
|
333
|
+
if randomize:
|
|
334
|
+
angle = random.uniform(0,6.28)
|
|
335
|
+
axis = np.array([0,0,1])
|
|
336
|
+
R = np.matmul(R,rotation_matrix(axis,angle))
|
|
337
|
+
for atom in pdb[res]:
|
|
338
|
+
pdb[res][atom] = align_vectors(pdb[res][atom]-pivot,np.array(ref),R)+pivot
|
|
339
|
+
return pdb
|
|
340
|
+
|
|
341
|
+
def randomize_pdb(pdb_ori,tip_num,pivot_num, ref=[0,0,1]):
|
|
342
|
+
pdb = copy.deepcopy(pdb_ori)
|
|
343
|
+
tip = []
|
|
344
|
+
pivot = []
|
|
345
|
+
for res in pdb:
|
|
346
|
+
for atom in pdb[res]:
|
|
347
|
+
if isinstance(tip_num, list):
|
|
348
|
+
if atom[1] in tip_num:
|
|
349
|
+
tip.append(pdb[res][atom])
|
|
350
|
+
else:
|
|
351
|
+
if atom[1] == tip_num:
|
|
352
|
+
tip = pdb[res][atom]
|
|
353
|
+
if isinstance(pivot_num,list):
|
|
354
|
+
if atom[1] in pivot_num:
|
|
355
|
+
pivot.append(pdb[res][atom])
|
|
356
|
+
else:
|
|
357
|
+
if atom[1] == pivot_num:
|
|
358
|
+
pivot = pdb[res][atom]
|
|
359
|
+
if isinstance(tip_num, list):
|
|
360
|
+
tip = np.mean(tip,axis=0)
|
|
361
|
+
if isinstance(pivot_num,list):
|
|
362
|
+
pivot = np.mean(pivot,axis=0)
|
|
363
|
+
angle = random.uniform(0,6.28)
|
|
364
|
+
axis = np.array(ref)
|
|
365
|
+
R = rotation_matrix(axis,angle)
|
|
366
|
+
if len(pdb[res]) > 1:
|
|
367
|
+
for res in pdb:
|
|
368
|
+
for atom in pdb[res]:
|
|
369
|
+
pdb[res][atom] = align_vectors(pdb[res][atom]-pivot,np.array(ref),R)+pivot
|
|
370
|
+
return pdb
|
|
371
|
+
|
|
372
|
+
def translate_pdb(pdb_ori,target=None,ref_atm=None,vec=None):
|
|
373
|
+
pdb = copy.deepcopy(pdb_ori)
|
|
374
|
+
if target is not None and ref_atm is not None:
|
|
375
|
+
for res in pdb:
|
|
376
|
+
for atom in pdb[res]:
|
|
377
|
+
if atom[1] == ref_atm:
|
|
378
|
+
ref = pdb[res][atom]
|
|
379
|
+
tran_vec = ref-target
|
|
380
|
+
elif vec is not None:
|
|
381
|
+
tran_vec = vec
|
|
382
|
+
else:
|
|
383
|
+
print("A target and reference atom, or a translation vector has to be provided")
|
|
384
|
+
exit()
|
|
385
|
+
for res in pdb:
|
|
386
|
+
for atom in pdb[res]:
|
|
387
|
+
pdb[res][atom] = pdb[res][atom]-tran_vec
|
|
388
|
+
return pdb
|
|
389
|
+
|
|
390
|
+
def superimpose_pdb(pdb1,pdb2):
|
|
391
|
+
|
|
392
|
+
points = []
|
|
393
|
+
|
|
394
|
+
count = 0
|
|
395
|
+
while count < 3:
|
|
396
|
+
key = list(pdb1.keys())[count]
|
|
397
|
+
key2 = list(pdb1[key].keys())[count]
|
|
398
|
+
points.append([pdb1[key][key2],pdb2[key][key2]])
|
|
399
|
+
count += 1
|
|
400
|
+
|
|
401
|
+
trans_vec = points[0][1]-points[0][0]
|
|
402
|
+
# print(trans_vec)
|
|
403
|
+
|
|
404
|
+
for n, point in enumerate(points):
|
|
405
|
+
points[n][1] = points[n][1]-trans_vec
|
|
406
|
+
|
|
407
|
+
angle = vector_angle(points[1][1]-points[0][0],points[1][0]-points[0][0])
|
|
408
|
+
axis = np.cross(points[1][1]-points[0][0],points[1][0]-points[0][0])
|
|
409
|
+
|
|
410
|
+
R = rotation_matrix(axis,angle)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
if not np.allclose(align_vectors(points[1][1]-points[0][0],points[1][0]-points[0][0],R),points[1][0]-points[0][0],atol=1e-01):
|
|
414
|
+
print("Flipping!")
|
|
415
|
+
R = rotation_matrix(axis,-angle)
|
|
416
|
+
|
|
417
|
+
pdb3 = translate_pdb(pdb2,vec=trans_vec)
|
|
418
|
+
|
|
419
|
+
pdb_write(pdb3,outfile="trans.pdb")
|
|
420
|
+
|
|
421
|
+
for res in pdb3:
|
|
422
|
+
for atom in pdb3[res]:
|
|
423
|
+
pdb3[res][atom] = align_vectors(pdb3[res][atom]-points[0][0],points[1][0]-points[0][0],R)+points[0][0]
|
|
424
|
+
|
|
425
|
+
for n, point in enumerate(points):
|
|
426
|
+
points[n][1] = align_vectors(points[n][1]-points[0][0],points[1][0]-points[0][0],R)+points[0][0]
|
|
427
|
+
|
|
428
|
+
pdb_write(pdb3,outfile="rot1.pdb")
|
|
429
|
+
|
|
430
|
+
axis = points[1][0]-points[0][0]
|
|
431
|
+
|
|
432
|
+
axis_n = axis/np.linalg.norm(axis)
|
|
433
|
+
vec_proj1 = np.dot((points[2][1]-points[0][0]),axis_n)
|
|
434
|
+
vec_proj2 = np.dot((points[2][0]-points[0][0]),axis_n)
|
|
435
|
+
line_point1 = points[0][0] + vec_proj1*axis_n
|
|
436
|
+
line_point2 = points[0][0] + vec_proj2*axis_n
|
|
437
|
+
|
|
438
|
+
angle = vector_angle(points[2][1]-line_point1,points[2][0]-line_point1)
|
|
439
|
+
|
|
440
|
+
R = rotation_matrix(axis,angle)
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
if not np.allclose(align_vectors(points[2][1]-line_point1,points[2][0]-line_point1,R),points[2][0]-line_point1,atol=1e-02):
|
|
444
|
+
# print("Flipping2!")
|
|
445
|
+
R = rotation_matrix(axis,-angle)
|
|
446
|
+
|
|
447
|
+
for res in pdb3:
|
|
448
|
+
for atom in pdb3[res]:
|
|
449
|
+
pdb3[res][atom] = align_vectors(pdb3[res][atom]-line_point1,points[1][0]-line_point1,R)+line_point1
|
|
450
|
+
|
|
451
|
+
return pdb3
|
|
452
|
+
|
|
453
|
+
def sphere_dist(rad, dx, dy=0, dz=0):
|
|
454
|
+
return rad*2*np.arcsin(np.sqrt(dx**2+dy**2+dz**2)/(2*rad))
|
|
455
|
+
|
|
456
|
+
def sphere_rectangle_area(rad, a, b):
|
|
457
|
+
a = sphere_dist(rad, a)
|
|
458
|
+
b = sphere_dist(rad, b)
|
|
459
|
+
return rad**2*(2*np.pi-4*np.arccos(np.tan(a/(2*rad))*np.tan(b/(2*rad))))
|
|
460
|
+
|
|
461
|
+
def sphere_integral(a,b,c,r):
|
|
462
|
+
f = lambda z, y, x: 1
|
|
463
|
+
return integrate.tplquad(f, a-r, a+r,
|
|
464
|
+
lambda x: -np.sqrt(r**2-(x-a)**2)+b, lambda x: np.sqrt(r**2-(x-a)**2)+b,
|
|
465
|
+
lambda x, y: -np.sqrt(r**2-(x-a)**2-(y-b)**2)+c, lambda x, y: np.sqrt(r**2-(x-a)**2-(y-b)**2)+c)[0]
|
|
466
|
+
|
|
467
|
+
def sphere_integral_square(x_min,x_max,y_min,y_max,z_min=None, z_max=None,r1=None,r2=None, a=0,b=0,c=0):
|
|
468
|
+
f = lambda z, y, x: 1
|
|
469
|
+
if r1 is not None and r2 is not None:
|
|
470
|
+
if r1**2-(x_max-a)**2-(y_max-b)**2 < 0:
|
|
471
|
+
print("Radius is too small for given dimensions!")
|
|
472
|
+
raise ValueError
|
|
473
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
474
|
+
lambda x: y_min, lambda x: y_max,
|
|
475
|
+
lambda x, y: np.sqrt(r1**2-(x-a)**2-(y-b)**2)+c, lambda x, y: np.sqrt(r2**2-(x-a)**2-(y-b)**2)+c)[0]
|
|
476
|
+
elif z_min is not None and r2 is not None:
|
|
477
|
+
if r2**2-(x_max-a)**2-(y_max-b)**2 < 0:
|
|
478
|
+
print("Radius is too small for given dimensions!")
|
|
479
|
+
raise ValueError
|
|
480
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
481
|
+
lambda x: y_min, lambda x: y_max,
|
|
482
|
+
lambda x, y: z_min, lambda x, y: np.sqrt(r2**2-(x-a)**2-(y-b)**2)+c)[0]
|
|
483
|
+
elif z_max is not None and r1 is not None:
|
|
484
|
+
if r1**2-(x_max-a)**2-(y_max-b)**2 < 0:
|
|
485
|
+
print("Radius is too small for given dimensions!")
|
|
486
|
+
raise ValueError
|
|
487
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
488
|
+
lambda x: y_min, lambda x: y_max,
|
|
489
|
+
lambda x, y: np.sqrt(r1**2-(x-a)**2-(y-b)**2)+c, lambda x, y: z_max)[0]
|
|
490
|
+
else:
|
|
491
|
+
raise ValueError
|
|
492
|
+
|
|
493
|
+
def gauss_rectangle_area(x_min,x_max,y_min,y_max,b,d,h,a=0,c=0):
|
|
494
|
+
f = lambda y, x: np.sqrt(1 + (-h*(x-a)*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))/b**2)**2 + (-h*(y-c)*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))/d**2)**2)
|
|
495
|
+
return integrate.dblquad(f, x_min, x_max,
|
|
496
|
+
lambda x: y_min,lambda x: y_max)[0]
|
|
497
|
+
|
|
498
|
+
def gauss_integral_square(x_min,x_max,y_min,y_max,b,d,h,a=0,c=0,z_min=None, z_max=None, g1=None, g2=None):
|
|
499
|
+
f = lambda z, y, x: 1
|
|
500
|
+
f1= lambda y, x: h*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))+g1
|
|
501
|
+
f2= lambda y, x: h*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))+g2
|
|
502
|
+
if g1 is not None and g2 is not None:
|
|
503
|
+
if g2-g1 < 0:
|
|
504
|
+
raise ValueError
|
|
505
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
506
|
+
lambda x: y_min, lambda x: y_max,
|
|
507
|
+
f1, f2)[0]
|
|
508
|
+
elif z_min is not None and g2 is not None:
|
|
509
|
+
if g2-z_min < 0:
|
|
510
|
+
raise ValueError
|
|
511
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
512
|
+
lambda x: y_min, lambda x: y_max,
|
|
513
|
+
lambda x, y: z_min, f2)[0]
|
|
514
|
+
elif z_max is not None and g1 is not None:
|
|
515
|
+
if z_max-g1 < 0:
|
|
516
|
+
raise ValueError
|
|
517
|
+
return integrate.tplquad(f, x_min, x_max,
|
|
518
|
+
lambda x: y_min, lambda x: y_max,
|
|
519
|
+
f1, lambda x, y: z_max)[0]
|
|
520
|
+
else:
|
|
521
|
+
raise ValueError
|
|
522
|
+
|
|
523
|
+
class MembraneParams(object):
|
|
524
|
+
"""
|
|
525
|
+
A class to store membrane params corresponding to a PDB
|
|
526
|
+
"""
|
|
527
|
+
def __init__(self,pdb, leaflet_z, grid=None, move=False, move_vec=[0,0,0], xy_cen=False, z_cen = False, outpdb="PROT.pdb", chain=" ",renumber=False):
|
|
528
|
+
#Getting variables into class attributes
|
|
529
|
+
self.pdb = pdb
|
|
530
|
+
self.leaflet_z = leaflet_z
|
|
531
|
+
self.grid = grid
|
|
532
|
+
self.move = move
|
|
533
|
+
self.move_vec = move_vec
|
|
534
|
+
self.xy_cen = xy_cen
|
|
535
|
+
self.z_cen = z_cen
|
|
536
|
+
self.outpdb = outpdb
|
|
537
|
+
self.chain = chain
|
|
538
|
+
self.renumber = renumber
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
#Variables to store values after
|
|
542
|
+
self.density = None
|
|
543
|
+
self.chains = 1
|
|
544
|
+
self.charge = 0
|
|
545
|
+
self.mass = 0
|
|
546
|
+
self.hydrogens = 0
|
|
547
|
+
self.use_hex = False
|
|
548
|
+
self.mem_atoms_mass_up = 0
|
|
549
|
+
self.mem_atoms_mass_down = 0
|
|
550
|
+
self.solv_atoms_mass_up = 0
|
|
551
|
+
self.solv_atoms_mass_down = 0
|
|
552
|
+
self.new_pdb = []
|
|
553
|
+
self.x_mem = []
|
|
554
|
+
self.y_mem = []
|
|
555
|
+
self.pdblines = None
|
|
556
|
+
self.minmax = None
|
|
557
|
+
|
|
558
|
+
self.x = []
|
|
559
|
+
self.y = []
|
|
560
|
+
self.z = []
|
|
561
|
+
|
|
562
|
+
self._x_cen = 0
|
|
563
|
+
self._y_cen = 0
|
|
564
|
+
self._z_cen = 0
|
|
565
|
+
|
|
566
|
+
def read_pdb(self):
|
|
567
|
+
"Read pdb lines into class"
|
|
568
|
+
file = open(self.pdb, "r")
|
|
569
|
+
self.pdblines = file.readlines()
|
|
570
|
+
file.close()
|
|
571
|
+
|
|
572
|
+
def write_pdb(self):
|
|
573
|
+
new_file = open(self.outpdb,"w")
|
|
574
|
+
new_file.writelines(self.new_pdb)
|
|
575
|
+
new_file.close()
|
|
576
|
+
|
|
577
|
+
def xyz_center(self):
|
|
578
|
+
"""
|
|
579
|
+
Calculate "center" of pdb from coordinate max min average
|
|
580
|
+
"""
|
|
581
|
+
x = []
|
|
582
|
+
y = []
|
|
583
|
+
z = []
|
|
584
|
+
for line in self.pdblines:
|
|
585
|
+
if (line[0:4] == "ATOM" or line[0:6] == "HETATM") and line[17:20].strip() != "DUM":
|
|
586
|
+
x_coord = float(line[30:38])+self.move_vec[0]
|
|
587
|
+
y_coord = float(line[38:46])+self.move_vec[1]
|
|
588
|
+
z_coord = float(line[46:54])+self.move_vec[2]
|
|
589
|
+
x.append(float(x_coord))
|
|
590
|
+
y.append(float(y_coord))
|
|
591
|
+
z.append(float(z_coord))
|
|
592
|
+
self._x_cen = (max(x)+min(x))/2
|
|
593
|
+
self._y_cen = (max(y)+min(y))/2
|
|
594
|
+
self._z_cen = (max(z)+min(z))/2
|
|
595
|
+
|
|
596
|
+
def pdb_reindex(self):
|
|
597
|
+
"""
|
|
598
|
+
Go over pdblines, renumber, add chain ids and skip as needed
|
|
599
|
+
"""
|
|
600
|
+
last_chain = None
|
|
601
|
+
last_resnum = None
|
|
602
|
+
last_type = None
|
|
603
|
+
track = None
|
|
604
|
+
chain_list = list(string.ascii_uppercase)+list(string.ascii_lowercase)+list(map(str,range(0,10)))
|
|
605
|
+
resnum_index = 1
|
|
606
|
+
chain = self.chain
|
|
607
|
+
|
|
608
|
+
#Calcualte PDB center to be used later
|
|
609
|
+
self.xyz_center()
|
|
610
|
+
|
|
611
|
+
if chain == " ":
|
|
612
|
+
chain_index = 0
|
|
613
|
+
else:
|
|
614
|
+
try:
|
|
615
|
+
chain_index = chain_list.index(chain)
|
|
616
|
+
except:
|
|
617
|
+
print("Chain ID not found in list")
|
|
618
|
+
chain_index = 0
|
|
619
|
+
for line in self.pdblines:
|
|
620
|
+
if line[0:3] == "TER":
|
|
621
|
+
self.new_pdb.append("TER\n")
|
|
622
|
+
continue
|
|
623
|
+
if (line[0:4] == "ATOM" or line[0:6] == "HETATM") and line[17:20].strip() != "DUM":
|
|
624
|
+
if last_type is not None and line[0:6].strip() != last_type:
|
|
625
|
+
if not line.startswith("TER") and not self.new_pdb[-1].startswith("TER"):
|
|
626
|
+
self.new_pdb.append("TER\n")
|
|
627
|
+
last_type = line[0:6].strip()
|
|
628
|
+
residue = line[17:21].strip()
|
|
629
|
+
atomnum = int(line[6:11].strip())
|
|
630
|
+
atomname = line[12:16].strip()
|
|
631
|
+
resnum = int(line[22:26].strip())
|
|
632
|
+
if self.renumber and resnum != last_resnum:
|
|
633
|
+
last_resnum = resnum
|
|
634
|
+
resnum_new = resnum_index
|
|
635
|
+
resnum_index += 1
|
|
636
|
+
if resnum_new > 9999:
|
|
637
|
+
resnum_new = ((resnum_new-1)%9999)+1
|
|
638
|
+
if self.renumber:
|
|
639
|
+
resnum = resnum_new
|
|
640
|
+
segid = line[72:76].strip()
|
|
641
|
+
if residue == "ILE" and atomname == "CD":
|
|
642
|
+
atomname = "CD1"
|
|
643
|
+
if residue == "CYM":
|
|
644
|
+
if atomname == "HN1" or atomname == "HB1":
|
|
645
|
+
continue
|
|
646
|
+
if atomname == "OT1":
|
|
647
|
+
atomname = "O"
|
|
648
|
+
if atomname == "OT2":
|
|
649
|
+
atomname = "OXT"
|
|
650
|
+
if len(atomname) == 3:
|
|
651
|
+
ali = ">"
|
|
652
|
+
else:
|
|
653
|
+
ali = "^"
|
|
654
|
+
if last_chain is not None and last_chain != line[21:22]:
|
|
655
|
+
if not line.startswith("TER") and not self.new_pdb[-1].startswith("TER"):
|
|
656
|
+
self.new_pdb.append("TER\n")
|
|
657
|
+
self.chains += 1
|
|
658
|
+
chain_index += 1
|
|
659
|
+
chain = chain_list[chain_index]
|
|
660
|
+
last_chain = line[21:22]
|
|
661
|
+
if not self.move and not self.xy_cen:
|
|
662
|
+
x_coord = float(line[30:38])
|
|
663
|
+
y_coord = float(line[38:46])
|
|
664
|
+
z_coord = float(line[46:54])
|
|
665
|
+
elif not self.move:
|
|
666
|
+
x_coord = float(line[30:38])-self._x_cen
|
|
667
|
+
y_coord = float(line[38:46])-self._y_cen
|
|
668
|
+
z_coord = float(line[46:54])
|
|
669
|
+
if self.z_cen:
|
|
670
|
+
z_coord = float(line[46:54])-self._z_cen
|
|
671
|
+
elif not self.xy_cen:
|
|
672
|
+
x_coord = float(line[30:38])+self.move_vec[0]
|
|
673
|
+
y_coord = float(line[38:46])+self.move_vec[1]
|
|
674
|
+
z_coord = float(line[46:54])+self.move_vec[2]
|
|
675
|
+
else:
|
|
676
|
+
x_coord = float(line[30:38])+self.move_vec[0]-self._x_cen
|
|
677
|
+
y_coord = float(line[38:46])+self.move_vec[1]-self._y_cen
|
|
678
|
+
z_coord = float(line[46:54])+self.move_vec[2]
|
|
679
|
+
if self.z_cen:
|
|
680
|
+
z_coord = float(line[46:54])+self.move_vec[2]-self._z_cen
|
|
681
|
+
self.x.append(float(x_coord))
|
|
682
|
+
self.y.append(float(y_coord))
|
|
683
|
+
self.z.append(float(z_coord))
|
|
684
|
+
line = line[0:6]+"{:>5d} {:{align}4} {:<4}{:1}{:>4} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:<4}{:>2}\n".format( atomnum,atomname,residue,chain,resnum,x_coord,y_coord,z_coord,1,0,segid,atomname[0],align=ali)
|
|
685
|
+
self.new_pdb.append(line)
|
|
686
|
+
|
|
687
|
+
if line[17:20].strip() in charged and track != line[22:26].strip():
|
|
688
|
+
self.charge += charged[line[17:20].strip()]
|
|
689
|
+
track = line[22:26].strip()
|
|
690
|
+
#Add masses according to z location
|
|
691
|
+
if line[0:4] == "ATOM":
|
|
692
|
+
element = data.guess_element(residue,atomname)
|
|
693
|
+
if element in data.masses:
|
|
694
|
+
self.mass += data.masses[element]
|
|
695
|
+
if element == "H":
|
|
696
|
+
self.hydrogens += 1
|
|
697
|
+
if self.z[-1] > self.leaflet_z:
|
|
698
|
+
self.solv_atoms_mass_up += data.masses[element]
|
|
699
|
+
elif self.z[-1] < -self.leaflet_z:
|
|
700
|
+
self.solv_atoms_mass_down += data.masses[element]
|
|
701
|
+
else:
|
|
702
|
+
if self.z[-1] >= 0:
|
|
703
|
+
self.mem_atoms_mass_up += data.masses[element]
|
|
704
|
+
else:
|
|
705
|
+
self.mem_atoms_mass_down += data.masses[element]
|
|
706
|
+
self.x_mem.append(self.x[-1])
|
|
707
|
+
self.y_mem.append(self.y[-1])
|
|
708
|
+
else:
|
|
709
|
+
print("Atom "+element+" mass will not be considered!\n")
|
|
710
|
+
|
|
711
|
+
def read_grid(self):
|
|
712
|
+
file = open(self.grid,"r").readlines()
|
|
713
|
+
solv_up = 0
|
|
714
|
+
solv_down = 0
|
|
715
|
+
mem_up = 0
|
|
716
|
+
mem_down = 0
|
|
717
|
+
for line in file:
|
|
718
|
+
coord = float(line[46:54])
|
|
719
|
+
if coord+self.move_vec[2] > self.leaflet_z:
|
|
720
|
+
solv_up += 1
|
|
721
|
+
elif coord+self.move_vec[2] > 0:
|
|
722
|
+
mem_up += 1
|
|
723
|
+
elif coord+self.move_vec[2] >= -self.leaflet_z:
|
|
724
|
+
mem_down += 1
|
|
725
|
+
elif coord+self.move_vec[2] < -self.leaflet_z:
|
|
726
|
+
solv_down += 1
|
|
727
|
+
else:
|
|
728
|
+
print("Coordinate not assigned?") #Shouldn't be called
|
|
729
|
+
self.mem_vol_up = mem_up/8
|
|
730
|
+
self.mem_vol_down = mem_down/8
|
|
731
|
+
self.solv_vol_up = solv_up/8
|
|
732
|
+
self.solv_vol_down = solv_down/8
|
|
733
|
+
self.volume = self.mem_vol_up+self.mem_vol_down+self.solv_vol_up+self.solv_vol_down
|
|
734
|
+
|
|
735
|
+
def estimated_atoms(self):
|
|
736
|
+
est_density = estimated_density(self.mass)
|
|
737
|
+
self.density = est_density*avogadro/10**24
|
|
738
|
+
self.volume = self.mass/self.density
|
|
739
|
+
self.mem_vol_up = self.mem_atoms_mass_up/self.density
|
|
740
|
+
self.mem_vol_down = self.mem_atoms_mass_down/self.density
|
|
741
|
+
self.solv_vol_up = self.solv_atoms_mass_up/self.density
|
|
742
|
+
self.solv_vol_down = self.solv_atoms_mass_down/self.density
|
|
743
|
+
|
|
744
|
+
def measure(self):
|
|
745
|
+
|
|
746
|
+
self.read_pdb()
|
|
747
|
+
self.pdb_reindex()
|
|
748
|
+
self.write_pdb()
|
|
749
|
+
|
|
750
|
+
if self.hydrogens == 0:
|
|
751
|
+
print("Protein doesn't look to be protonated! Please consider that this will cause a bad estimation of the volume and of the packing process!\n\n")
|
|
752
|
+
|
|
753
|
+
#### MAXIMUM PROTEIN XY RADIUS ###
|
|
754
|
+
|
|
755
|
+
mean_x = sum(self.x)/len(self.x)
|
|
756
|
+
mean_y = sum(self.y)/len(self.y)
|
|
757
|
+
self.max_rad = max([math.sqrt((self.x[n]-mean_x)**2+(self.y[n]-mean_y)**2) for n, _ in enumerate(self.x)])
|
|
758
|
+
|
|
759
|
+
#### IF GRID VOL CALCULATION, REPLACE ESTIMATION ####
|
|
760
|
+
|
|
761
|
+
self.estimated_atoms()
|
|
762
|
+
if self.grid != None:
|
|
763
|
+
self.read_grid()
|
|
764
|
+
|
|
765
|
+
self.x.sort()
|
|
766
|
+
self.y.sort()
|
|
767
|
+
self.z.sort()
|
|
768
|
+
self.x_mem.sort()
|
|
769
|
+
self.y_mem.sort()
|
|
770
|
+
self.minmax = [self.x[0], self.y[0], self.z[0], self.x[-1], self.y[-1], self.z[-1]]
|
|
771
|
+
try:
|
|
772
|
+
area_est = (((self.x_mem[-1]-self.x_mem[0])+(self.y_mem[-1]-self.y_mem[0]))/4)**2*math.pi # Maybe estimate the protein area in the membrane...(Not used ATM)
|
|
773
|
+
except:
|
|
774
|
+
print("WARNING! The protein doesn't have atoms sitting in the membrane! Make sure that it was correctly aligned and that the placement is as intended!")
|
|
775
|
+
return [self.x[0], self.y[0], self.z[0], self.x[-1], self.y[-1], self.z[-1]], self.max_rad, self.charge, self.volume, self.mem_vol_up, self.mem_vol_down, self.solv_vol_up, self.solv_vol_down, self.density, self.mass, self.chains
|
|
776
|
+
|
|
777
|
+
def __repr__(self):
|
|
778
|
+
return f"<MembraneParam PDB:{self.pdb}>"
|
|
779
|
+
#
|
|
780
|
+
|
|
781
|
+
def is_number(num):
|
|
782
|
+
try:
|
|
783
|
+
float(num)
|
|
784
|
+
return True
|
|
785
|
+
except:
|
|
786
|
+
return False
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def pdb_parse(pdbfile, onlybb=True):
|
|
790
|
+
CA_CB = {}
|
|
791
|
+
pdb = open(pdbfile,"r").readlines()
|
|
792
|
+
for line in pdb:
|
|
793
|
+
if (line.startswith("ATOM") or line.startswith("HETATM")):
|
|
794
|
+
residue = line[17:21].strip()
|
|
795
|
+
atomnum = int(line[6:11].strip())
|
|
796
|
+
atomname = line[12:16].strip()
|
|
797
|
+
resnum = int(line[22:26].strip())
|
|
798
|
+
chain = line[21:22]
|
|
799
|
+
id = (residue,resnum,chain)
|
|
800
|
+
if atomname in cgatoms and residue in residues and onlybb:
|
|
801
|
+
if id not in CA_CB:
|
|
802
|
+
CA_CB[id]= {}
|
|
803
|
+
CA_CB[id][(atomname,atomnum)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
|
|
804
|
+
if not onlybb:
|
|
805
|
+
if id not in CA_CB:
|
|
806
|
+
CA_CB[id]= {}
|
|
807
|
+
CA_CB[id][(atomname,atomnum)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
|
|
808
|
+
return CA_CB
|
|
809
|
+
|
|
810
|
+
def pdb_write(CA_CB, outfile="test.pdb"):
|
|
811
|
+
handle = open(outfile,"w")
|
|
812
|
+
for res in sorted(CA_CB,key=lambda x:(x[2],x[1])):
|
|
813
|
+
for atom in sorted(CA_CB[res], key=lambda x:x[1]):
|
|
814
|
+
handle.write("ATOM {:>5d} {:>4} {:>3}{:>2}{:>4d} {:>8.3f}{:>8.3f}{:>8.3f} 1.00 0.00 {:1}\n".format(atom[1],"{:<3}".format(atom[0]),res[0],res[2],res[1],CA_CB[res][atom][0],CA_CB[res][atom][1],CA_CB[res][atom][2],atom[0][0]))
|
|
815
|
+
handle.close()
|
|
816
|
+
|
|
817
|
+
def pdb_parse_TER(pdbfile, onlybb=True, noH=True, filter_res=None, filter_atm=None, packmol_hex_after=99999, hexadecimal_indices=False):
|
|
818
|
+
CA_CB = {}
|
|
819
|
+
pdb = open(pdbfile,"r").readlines()
|
|
820
|
+
molnum = 1
|
|
821
|
+
tracker = 1
|
|
822
|
+
atom_index = 0
|
|
823
|
+
hex_switch = False
|
|
824
|
+
atomlimit = False
|
|
825
|
+
for line in pdb:
|
|
826
|
+
if line.startswith("TER"):
|
|
827
|
+
molnum += 1
|
|
828
|
+
if (line.startswith("ATOM") or line.startswith("HETATM")):
|
|
829
|
+
atom_index += 1
|
|
830
|
+
if line[6:11].strip() == "*****" and not atomlimit:
|
|
831
|
+
logger.warning("Found atom number limit '*****'. Atom number parsing will be unreliable")
|
|
832
|
+
atomlimit = True
|
|
833
|
+
residue = line[17:21].strip()
|
|
834
|
+
if not str(line[6:11].strip()).isnumeric():
|
|
835
|
+
hex_switch = True
|
|
836
|
+
if atomlimit:
|
|
837
|
+
atomnum = atomnum + 1
|
|
838
|
+
else:
|
|
839
|
+
if hexadecimal_indices:
|
|
840
|
+
atomnum = int(line[6:11].strip(),16)
|
|
841
|
+
else:
|
|
842
|
+
# Packmol switches to hex after serial 99999; use atom count to disambiguate numeric hex like "20000".
|
|
843
|
+
if packmol_hex_after is not None and atom_index > packmol_hex_after:
|
|
844
|
+
atomnum = int(line[6:11].strip(),16)
|
|
845
|
+
else:
|
|
846
|
+
atomnum = int(line[6:11].strip(),16) if hex_switch else int(line[6:11].strip()) # asume hex 16 if parsing packmol
|
|
847
|
+
atomname = line[12:16].strip()
|
|
848
|
+
resnum = int(line[22:26].strip(),16) if hexadecimal_indices else int(line[22:26].strip())
|
|
849
|
+
chain = line[21:22]
|
|
850
|
+
id = (molnum,chain)
|
|
851
|
+
if atomname in cgatoms and residue in residues and onlybb:
|
|
852
|
+
if id not in CA_CB:
|
|
853
|
+
CA_CB[id]= {}
|
|
854
|
+
CA_CB[id][(residue, resnum, atomname, atomnum, tracker)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
|
|
855
|
+
if not onlybb:
|
|
856
|
+
if noH:
|
|
857
|
+
if atomname.startswith("H"):
|
|
858
|
+
continue
|
|
859
|
+
if filter_res != None:
|
|
860
|
+
if residue not in filter_res:
|
|
861
|
+
continue
|
|
862
|
+
if filter_atm != None:
|
|
863
|
+
if atomname not in filter_atm:
|
|
864
|
+
continue
|
|
865
|
+
if id not in CA_CB:
|
|
866
|
+
CA_CB[id]= {}
|
|
867
|
+
CA_CB[id][(residue, resnum, atomname, atomnum, tracker)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
|
|
868
|
+
tracker += 1
|
|
869
|
+
return CA_CB
|
|
870
|
+
|
|
871
|
+
|
|
872
|
+
|
|
873
|
+
def pdb_write_TER(CA_CB, outfile="test.pdb", serial_format="hy36", resseq_format="decimal"):
|
|
874
|
+
handle = open(outfile,"w")
|
|
875
|
+
for mol in sorted(CA_CB,key=lambda x:x[0]):
|
|
876
|
+
for atom in sorted(CA_CB[mol], key=lambda x:(x[4],x[1])): # Packmol output comes serialized first and foremost by atomnumber
|
|
877
|
+
serial = _format_pdb_int(atom[3], 5, serial_format)
|
|
878
|
+
resseq = _format_pdb_int(atom[1], 4, resseq_format)
|
|
879
|
+
handle.write("ATOM {:>5} {:>4} {:>3}{:>2}{:>4} {:>8.3f}{:>8.3f}{:>8.3f} 1.00 0.00 {:1}\n".format(serial,"{:<3}".format(atom[2]),atom[0],mol[1],resseq,CA_CB[mol][atom][0],CA_CB[mol][atom][1],CA_CB[mol][atom][2],atom[0][0]))
|
|
880
|
+
handle.write("TER\n")
|
|
881
|
+
handle.write("END\n")
|
|
882
|
+
handle.close()
|
|
883
|
+
return outfile
|
|
884
|
+
|
|
885
|
+
def find_piercing_lipids(pdb, outfile="noclash.pdb", verbose=False, hexadecimal_indices=False):
|
|
886
|
+
tails_dict = pdb_parse_TER(pdb, onlybb=False, filter_res=tails, hexadecimal_indices=hexadecimal_indices)
|
|
887
|
+
sterol_PI_dict = pdb_parse_TER(pdb, onlybb=False, filter_res=sterols_PI, hexadecimal_indices=hexadecimal_indices)
|
|
888
|
+
|
|
889
|
+
midpoints = np.zeros((len(tails_dict),50,3))+np.inf
|
|
890
|
+
midpointmap = {}
|
|
891
|
+
|
|
892
|
+
ringpoints = np.zeros((len(sterol_PI_dict),5,3))+np.inf
|
|
893
|
+
ringmap = {}
|
|
894
|
+
|
|
895
|
+
for i,r in enumerate(tails_dict):
|
|
896
|
+
bond_idx = 0
|
|
897
|
+
if i not in midpointmap:
|
|
898
|
+
midpointmap[i] = r
|
|
899
|
+
search_keys = list(tails_dict[r].keys())
|
|
900
|
+
for x,a in enumerate(search_keys):
|
|
901
|
+
for b in search_keys[x+1:]:
|
|
902
|
+
if np.linalg.norm(tails_dict[r][a]-tails_dict[r][b]) < 1.7: # C-C bond length shouldn't be larger then 1.59A / 1.7 just in case
|
|
903
|
+
midpoints[i,bond_idx] = np.mean([tails_dict[r][a],tails_dict[r][b]], axis=0)
|
|
904
|
+
bond_idx += 1
|
|
905
|
+
|
|
906
|
+
for i,r in enumerate(sterol_PI_dict):
|
|
907
|
+
if i not in ringmap:
|
|
908
|
+
ringmap[i] = r
|
|
909
|
+
for ring_idx, ring in enumerate(sterol_ring_probes):
|
|
910
|
+
ring_coords = []
|
|
911
|
+
for ring_atom in ring:
|
|
912
|
+
for a in sterol_PI_dict[r]:
|
|
913
|
+
#Have to check PI and sterols independently, as PI has same atomnames as sterol rings
|
|
914
|
+
# structure of dict key ('PI', 2, 'P31', 63, 25) resname, resnum, atomname, atomnum, internal_idx
|
|
915
|
+
if a[2].strip() == ring_atom and not a[0] == "PI":
|
|
916
|
+
ring_coords.append(sterol_PI_dict[r][a])
|
|
917
|
+
if len(ring_coords) > 0:
|
|
918
|
+
ringpoints[i,ring_idx] = np.mean(ring_coords,axis=0)
|
|
919
|
+
#Now check for PI rings
|
|
920
|
+
ring_coords = []
|
|
921
|
+
for ring_atom in PI_ring_probe:
|
|
922
|
+
for a in sterol_PI_dict[r]:
|
|
923
|
+
if a[2].strip() == ring_atom and a[0] == "PI":
|
|
924
|
+
ring_coords.append(sterol_PI_dict[r][a])
|
|
925
|
+
if len(ring_coords) > 0:
|
|
926
|
+
ringpoints[i,4] = np.mean(ring_coords,axis=0)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
to_remove = []
|
|
930
|
+
|
|
931
|
+
for i,sterol in enumerate(ringpoints):
|
|
932
|
+
for ring_center in sterol:
|
|
933
|
+
with warnings.catch_warnings():
|
|
934
|
+
warnings.simplefilter("ignore", RuntimeWarning)
|
|
935
|
+
pierce_dist = np.linalg.norm(midpoints-ring_center,axis=2)
|
|
936
|
+
for pierce in np.argwhere(pierce_dist < 2.5): # H to H benzene "ring diameter" should be about 4.963A (from quick Avogadro min). Distance of the center of an aliphatic bond should be farther then this.
|
|
937
|
+
to_remove.append(midpointmap[pierce[0]])
|
|
938
|
+
to_remove = set(to_remove)
|
|
939
|
+
|
|
940
|
+
if len(to_remove) > 0:
|
|
941
|
+
logger.debug("The following lipids have clashing tails with sterols:")
|
|
942
|
+
else:
|
|
943
|
+
logger.debug("No piercing lipid found!")
|
|
944
|
+
for clash in to_remove:
|
|
945
|
+
tr_names = [i[0] for i in set([clash_res[:2] for clash_res in tails_dict[clash].keys()])]
|
|
946
|
+
tr_resids = [i[1] for i in set([clash_res[:2] for clash_res in tails_dict[clash].keys()])]
|
|
947
|
+
logger.debug("Resnames:%s, Resids:%s" % (tr_names,tr_resids))
|
|
948
|
+
return to_remove
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
def remove_piercing_lipids(pdb, to_remove, outfile="noclash.pdb", verbose=False, hexadecimal_indices=False):
|
|
952
|
+
original_dict = pdb_parse_TER(pdb, onlybb=False, noH=False, hexadecimal_indices=hexadecimal_indices)
|
|
953
|
+
|
|
954
|
+
if verbose:
|
|
955
|
+
logger.info("Removing clashing lipids")
|
|
956
|
+
for clash in to_remove:
|
|
957
|
+
del original_dict[clash]
|
|
958
|
+
|
|
959
|
+
serial_format = "hex" if hexadecimal_indices else "hy36"
|
|
960
|
+
resseq_format = "hex" if hexadecimal_indices else "decimal"
|
|
961
|
+
return pdb_write_TER(original_dict, outfile=outfile, serial_format=serial_format, resseq_format=resseq_format)
|
|
962
|
+
|
|
963
|
+
|
|
964
|
+
def fix_illegal_chain_id (
|
|
965
|
+
path: str,
|
|
966
|
+
valid: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
|
967
|
+
default: str = "Z",
|
|
968
|
+
encoding: str = "utf-8",
|
|
969
|
+
backup: bool = False,
|
|
970
|
+
) -> None:
|
|
971
|
+
"""
|
|
972
|
+
就地修改 PDB 文件中不合法的链 ID。
|
|
973
|
+
- 仅处理以 ATOM/HETATM/ANISOU/TER 开头的行
|
|
974
|
+
- 第 22 列(下标 21)不是 valid 中字符时,改为 default
|
|
975
|
+
- path: PDB 文件路径
|
|
976
|
+
- backup: 若为 True,会生成同目录下 path + ".bak" 的备份
|
|
977
|
+
"""
|
|
978
|
+
dir_name = os.path.dirname(path) or "."
|
|
979
|
+
fd, tmp_path = tempfile.mkstemp(prefix="pdb_fix_", suffix=".tmp", dir=dir_name)
|
|
980
|
+
try:
|
|
981
|
+
with os.fdopen(fd, "w", encoding=encoding) as fout, \
|
|
982
|
+
open(path, "r", encoding=encoding) as fin:
|
|
983
|
+
target_prefixes = ("ATOM", "HETATM", "ANISOU", "TER")
|
|
984
|
+
for line in fin:
|
|
985
|
+
if line.startswith("SEQRES"):
|
|
986
|
+
chain = line[11]
|
|
987
|
+
if chain not in valid:
|
|
988
|
+
line = line[:11] + default + line[12:]
|
|
989
|
+
if line.startswith(target_prefixes) and len(line) >= 22:
|
|
990
|
+
chain = line[21]
|
|
991
|
+
if chain not in valid:
|
|
992
|
+
line = line[:21] + default + line[22:]
|
|
993
|
+
fout.write(line)
|
|
994
|
+
# 是否备份
|
|
995
|
+
if backup:
|
|
996
|
+
bak_path = path + ".bak"
|
|
997
|
+
if os.path.exists(bak_path):
|
|
998
|
+
os.remove(bak_path)
|
|
999
|
+
os.rename(path, bak_path)
|
|
1000
|
+
# 用临时文件替换原文件(原子操作)
|
|
1001
|
+
os.replace(tmp_path, path)
|
|
1002
|
+
except Exception:
|
|
1003
|
+
# 失败时清理临时文件再抛出
|
|
1004
|
+
try:
|
|
1005
|
+
if os.path.exists(tmp_path):
|
|
1006
|
+
os.remove(tmp_path)
|
|
1007
|
+
finally:
|
|
1008
|
+
raise
|
|
1009
|
+
|
|
1010
|
+
if __name__ == "__main__":
|
|
1011
|
+
pdb = sys.argv[1]
|
|
1012
|
+
print(measure_parms(pdb,23,None))
|
|
1013
|
+
if "-move" in sys.argv:
|
|
1014
|
+
vec = [float(i) for i in sys.argv[sys.argv.index("-move")+1].split(",")]
|
|
1015
|
+
print(measure_parms(pdb,23,move=True, move_vec=vec))
|
|
1016
|
+
if "-cen" in sys.argv:
|
|
1017
|
+
print(measure_parms(pdb,23,xy_cen=True))
|