packmol-memgen-minimal 1.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. packmol_memgen/__init__.py +2 -0
  2. packmol_memgen/__version__.py +34 -0
  3. packmol_memgen/data/LICENSE.Apache-2.0 +201 -0
  4. packmol_memgen/data/extra_solvents.lib +789 -0
  5. packmol_memgen/data/frcmod.lipid_ext +97 -0
  6. packmol_memgen/data/frcmod.solvents +129 -0
  7. packmol_memgen/data/insane_lipids.txt +138 -0
  8. packmol_memgen/data/insane_solvents.txt +45 -0
  9. packmol_memgen/data/leaprc.extra_solvents +42 -0
  10. packmol_memgen/data/leaprc.lipid_ext +48 -0
  11. packmol_memgen/data/lipid_ext.lib +12312 -0
  12. packmol_memgen/data/martini_v3.0.0.itp +356605 -0
  13. packmol_memgen/data/memgen.parm +4082 -0
  14. packmol_memgen/data/pdbs.tar.gz +0 -0
  15. packmol_memgen/data/solvent.parm +14 -0
  16. packmol_memgen/example/example.sh +31 -0
  17. packmol_memgen/lib/__init__.py +0 -0
  18. packmol_memgen/lib/amber.py +77 -0
  19. packmol_memgen/lib/charmmlipid2amber/__init__.py +0 -0
  20. packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.csv +7164 -0
  21. packmol_memgen/lib/charmmlipid2amber/charmmlipid2amber.py +225 -0
  22. packmol_memgen/lib/pdbremix/LICENSE +21 -0
  23. packmol_memgen/lib/pdbremix/__init__.py +0 -0
  24. packmol_memgen/lib/pdbremix/_version.py +1 -0
  25. packmol_memgen/lib/pdbremix/amber.py +1103 -0
  26. packmol_memgen/lib/pdbremix/asa.py +227 -0
  27. packmol_memgen/lib/pdbremix/data/aminoacid.pdb +334 -0
  28. packmol_memgen/lib/pdbremix/data/binaries.json +26 -0
  29. packmol_memgen/lib/pdbremix/data/charmm22.parameter +2250 -0
  30. packmol_memgen/lib/pdbremix/data/charmm22.topology +1635 -0
  31. packmol_memgen/lib/pdbremix/data/color_b.py +682 -0
  32. packmol_memgen/lib/pdbremix/data/hin.lib +130 -0
  33. packmol_memgen/lib/pdbremix/data/hydroxide.lib +88 -0
  34. packmol_memgen/lib/pdbremix/data/make_chi.py +92 -0
  35. packmol_memgen/lib/pdbremix/data/opls.parameter +1108 -0
  36. packmol_memgen/lib/pdbremix/data/opls.topology +1869 -0
  37. packmol_memgen/lib/pdbremix/data/phd.frcmod +82 -0
  38. packmol_memgen/lib/pdbremix/data/phd.leaprc +4 -0
  39. packmol_memgen/lib/pdbremix/data/phd.prepin +35 -0
  40. packmol_memgen/lib/pdbremix/data/template.pdb +334 -0
  41. packmol_memgen/lib/pdbremix/data/znb.frcmod +24 -0
  42. packmol_memgen/lib/pdbremix/data/znb.leaprc +7 -0
  43. packmol_memgen/lib/pdbremix/data/znb.lib +69 -0
  44. packmol_memgen/lib/pdbremix/data.py +264 -0
  45. packmol_memgen/lib/pdbremix/fetch.py +102 -0
  46. packmol_memgen/lib/pdbremix/force.py +627 -0
  47. packmol_memgen/lib/pdbremix/gromacs.py +978 -0
  48. packmol_memgen/lib/pdbremix/lib/__init__.py +0 -0
  49. packmol_memgen/lib/pdbremix/lib/docopt.py +579 -0
  50. packmol_memgen/lib/pdbremix/lib/pyqcprot.py +305 -0
  51. packmol_memgen/lib/pdbremix/namd.py +1078 -0
  52. packmol_memgen/lib/pdbremix/pdbatoms.py +543 -0
  53. packmol_memgen/lib/pdbremix/pdbtext.py +120 -0
  54. packmol_memgen/lib/pdbremix/protein.py +311 -0
  55. packmol_memgen/lib/pdbremix/pymol.py +480 -0
  56. packmol_memgen/lib/pdbremix/rmsd.py +203 -0
  57. packmol_memgen/lib/pdbremix/simulate.py +420 -0
  58. packmol_memgen/lib/pdbremix/spacehash.py +73 -0
  59. packmol_memgen/lib/pdbremix/trajectory.py +286 -0
  60. packmol_memgen/lib/pdbremix/util.py +273 -0
  61. packmol_memgen/lib/pdbremix/v3.py +16 -0
  62. packmol_memgen/lib/pdbremix/v3array.py +482 -0
  63. packmol_memgen/lib/pdbremix/v3numpy.py +350 -0
  64. packmol_memgen/lib/pdbremix/volume.py +155 -0
  65. packmol_memgen/lib/utils.py +1017 -0
  66. packmol_memgen/main.py +2827 -0
  67. packmol_memgen_minimal-1.1.16.dist-info/METADATA +664 -0
  68. packmol_memgen_minimal-1.1.16.dist-info/RECORD +71 -0
  69. packmol_memgen_minimal-1.1.16.dist-info/WHEEL +4 -0
  70. packmol_memgen_minimal-1.1.16.dist-info/entry_points.txt +2 -0
  71. packmol_memgen_minimal-1.1.16.dist-info/licenses/LICENSE +338 -0
@@ -0,0 +1,1017 @@
1
+ #!/usr/bin/python
2
+
3
+ from __future__ import print_function
4
+ import os, sys, math, string, copy, random, shlex, subprocess, time
5
+ import numpy as np
6
+ import logging
7
+ import contextlib
8
+ import warnings
9
+ from scipy import integrate
10
+ from .pdbremix import data
11
+ import tempfile
12
+
13
+ logger = logging.getLogger("pmmg_log")
14
+
15
+ #Load pdb2pqr functions if available
16
+ try:
17
+ # from pdb2pqr.main import main_driver as pdb2pqr
18
+ # from pdb2pqr.main import build_main_parser as pdb2pqr_args
19
+ import shutil
20
+ pdb2pqr = shutil.which("pdb2pqr30") or shutil.which("pdb2pqr")
21
+ if not pdb2pqr and shutil.which("uv"):
22
+ pdb2pqr = "uv run pdb2pqr"
23
+ except:
24
+ logger.debug("PDB2PQR not available. Protonation with --pdb2pqr will not be available")
25
+ pdb2pqr = False
26
+
27
+ VCH = 21.65 # A^3
28
+ VCH2 = 27.03 # A^3
29
+ avogadro = 6.02214086*10**23
30
+ residues = {"CYS","CYX","CYM","MET","HIS","HSD","HIE","HID","HIP","HSE","SER","GLN","ASP","ASH","GLU","GLH","TYR","THR","ALA","LEU","ILE","PHE","TRP","ARG","ASN","LYS","LYN","VAL","PRO","GLY"}
31
+ cgatoms = {"CA","CB","C","N","O"}
32
+ charged = {"ASP":-1,"GLU":-1,"LYS":1,"ARG":1,"HIP":1,"Cl-":-1,"MG":2,"Na+":1,"CA":2,"OHE":-0.308100,
33
+ "A":-1,"A5":-0.3081,"A3":-0.6919,"DA":-1,"DA5":-0.3079,"DA3":-0.6921,
34
+ "C":-1,"C5":-0.3081,"C3":-0.6919,"DC":-1,"DC5":-0.3079,"DC3":-0.6921,
35
+ "G":-1,"G5":-0.3081,"G3":-0.6919,"DG":-1,"DG5":-0.3079,"DG3":-0.6921,
36
+ "U":-1,"U5":-0.3081,"U3":-0.6919,"DT":-1,"DT5":-0.3079,"DT3":-0.6921,
37
+ "PTR":-2,"SEP":-2,"TPO":-2,"Y1P":-1,"S1P":-1,"T1P":-1,"H1D": 0,"H2D":-1,"H1E": 0,"H2E":-1,
38
+ "NME":1,"ACE":-1} #These two are actually neutral, but by being added, the opposite terminal end charge is not neutralized. Should work as long as no custom terminal ends or protein constructs are used.
39
+
40
+ tails = {"LAL","MY","PA","ST","OL","AR","DHA","SA"}
41
+
42
+ sterols_PI = {"CHL1","ERG","CAM","SIT","STI","PI"}
43
+ sterol_ring_probes = [["C1","C2","C3","C4","C5","C10"],["C5","C6","C7","C8","C9","C10"],["C8","C9","C11","C12","C13","C14"],["C13","C14","C15","C16","C17"]]
44
+ PI_ring_probe = ["C31","C32","C33","C34","C35","C36"]
45
+
46
+ #masses = {"C": 12, "S": 32, "O": 16, "H": 1, "N": 14}
47
+
48
+ _HY36_DIGITS_UPPER = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
49
+ _HY36_DIGITS_LOWER = "0123456789abcdefghijklmnopqrstuvwxyz"
50
+
51
+ _XSPONGE_ION_RENAMES = {
52
+ "LI": "LI",
53
+ "Na+": "NA",
54
+ "K+": "K",
55
+ "RB": "RB",
56
+ "CS": "CS",
57
+ "F": "F",
58
+ "Cl-": "CL",
59
+ "BR": "BR",
60
+ "IOD": "I",
61
+ "I-": "I",
62
+ "AG": "AG",
63
+ "CU1": "CU",
64
+ "TL": "TL",
65
+ "Be": "BE2",
66
+ "CU": "CU2",
67
+ "NI": "NI2",
68
+ "PT": "PT2",
69
+ "ZN": "ZN2",
70
+ "CO": "CO2",
71
+ "PD": "PD2",
72
+ "Ag": "AG2",
73
+ "Cr": "CR2",
74
+ "FE2": "FE2",
75
+ "MG": "MG2",
76
+ "V2+": "V2",
77
+ "MN": "MN2",
78
+ "HG": "HG2",
79
+ "CD": "CD2",
80
+ "YB2": "YB2",
81
+ "CA": "CA2",
82
+ "Sn": "SN2",
83
+ "PB": "PB2",
84
+ "EU": "EU2",
85
+ "SR": "SR2",
86
+ "Sm": "SM2",
87
+ "BA": "BA2",
88
+ "Ra": "RA2",
89
+ "AL": "AL3",
90
+ "FE": "FE3",
91
+ "CR": "CR3",
92
+ "IN": "IN3",
93
+ "Tl": "TL3",
94
+ "Y": "Y3",
95
+ "LA": "LA3",
96
+ "CE": "CE3",
97
+ "PR": "PR3",
98
+ "Nd": "ND3",
99
+ "SM": "SM3",
100
+ "EU3": "EU3",
101
+ "GD3": "GD3",
102
+ "TB": "TB3",
103
+ "Dy": "DY3",
104
+ "Er": "ER3",
105
+ "Tm": "TM3",
106
+ "LU": "LU3",
107
+ "Hf": "HF4",
108
+ "Zr": "ZR4",
109
+ "Ce": "CE4",
110
+ "U4+": "U4",
111
+ "Pu": "PU4",
112
+ "Th": "TH4",
113
+ }
114
+
115
+ def _base36_encode(value, width, digits):
116
+ out = []
117
+ for _ in range(width):
118
+ value, rem = divmod(value, 36)
119
+ out.append(digits[rem])
120
+ if value:
121
+ raise ValueError("Value exceeds width for base-36 encoding")
122
+ return "".join(reversed(out))
123
+
124
+ def hy36encode(width, value):
125
+ """Encode integer using PDB hybrid-36 for fixed-width fields."""
126
+ if value < 0:
127
+ raise ValueError("Hybrid-36 encoding does not support negative values")
128
+ decimal_limit = 10**width
129
+ if value < decimal_limit:
130
+ return f"{value:>{width}d}"
131
+ range_per_case = 26 * (36**(width - 1))
132
+ base_offset = 10 * (36**(width - 1))
133
+ value -= decimal_limit
134
+ if value < range_per_case:
135
+ return _base36_encode(base_offset + value, width, _HY36_DIGITS_UPPER)
136
+ value -= range_per_case
137
+ if value < range_per_case:
138
+ return _base36_encode(base_offset + value, width, _HY36_DIGITS_LOWER)
139
+ raise ValueError("Value exceeds hybrid-36 representable range")
140
+
141
+ def _format_pdb_int(value, width, mode):
142
+ if mode == "hy36":
143
+ return hy36encode(width, value)
144
+ if mode == "hex":
145
+ return f"{value:>{width}X}"
146
+ if mode == "decimal":
147
+ return f"{value:>{width}d}"
148
+ raise ValueError("Unknown format mode: %s" % mode)
149
+
150
+ def convert_pdb_indices_to_hybrid36(
151
+ pdbfile,
152
+ outfile=None,
153
+ atom_base=10,
154
+ res_base=10,
155
+ ):
156
+ """Convert atom serials and residue numbers to hybrid-36 in a PDB file."""
157
+ if outfile is None:
158
+ outfile = pdbfile
159
+ dir_name = os.path.dirname(outfile) or "."
160
+ fd, tmp_path = tempfile.mkstemp(prefix="pdb_hy36_", suffix=".tmp", dir=dir_name)
161
+ try:
162
+ with os.fdopen(fd, "w", encoding="utf-8") as fout, \
163
+ open(pdbfile, "r", encoding="utf-8") as fin:
164
+ for line in fin:
165
+ if line.startswith(("ATOM", "HETATM", "ANISOU", "TER")):
166
+ raw = line.rstrip("\n")
167
+ if len(raw) < 26:
168
+ raw = raw.ljust(26)
169
+ serial_field = raw[6:11].strip()
170
+ resseq_field = raw[22:26].strip()
171
+ if serial_field and serial_field != "*****":
172
+ try:
173
+ serial_num = int(serial_field, atom_base)
174
+ serial = hy36encode(5, serial_num)
175
+ raw = raw[:6] + serial + raw[11:]
176
+ except ValueError:
177
+ logger.warning("Failed to parse atom serial '%s' in %s", serial_field, pdbfile)
178
+ if resseq_field:
179
+ try:
180
+ resseq_num = int(resseq_field, res_base)
181
+ resseq = hy36encode(4, resseq_num)
182
+ raw = raw[:22] + resseq + raw[26:]
183
+ except ValueError:
184
+ logger.warning("Failed to parse residue id '%s' in %s", resseq_field, pdbfile)
185
+ fout.write(raw + "\n")
186
+ else:
187
+ fout.write(line)
188
+ os.replace(tmp_path, outfile)
189
+ finally:
190
+ if os.path.exists(tmp_path):
191
+ os.remove(tmp_path)
192
+
193
+ def apply_xponge_ion_names(pdbfile, outfile=None):
194
+ if outfile is None:
195
+ outfile = pdbfile
196
+ dir_name = os.path.dirname(outfile) or "."
197
+ fd, tmp_path = tempfile.mkstemp(prefix="pdb_xponge_", suffix=".tmp", dir=dir_name)
198
+ try:
199
+ with os.fdopen(fd, "w", encoding="utf-8") as fout, \
200
+ open(pdbfile, "r", encoding="utf-8") as fin:
201
+ for line in fin:
202
+ if line.startswith(("ATOM", "HETATM")):
203
+ raw = line.rstrip("\n")
204
+ if len(raw) < 80:
205
+ raw = raw.ljust(80)
206
+ resname = raw[17:20].strip()
207
+ if resname in _XSPONGE_ION_RENAMES:
208
+ new_name = _XSPONGE_ION_RENAMES[resname]
209
+ atom_field = f"{new_name:>4}"
210
+ res_field = f"{new_name:>3}"
211
+ elem = "".join(ch for ch in new_name if ch.isalpha()).upper()
212
+ elem_field = f"{elem:>2}"[:2]
213
+ raw = raw[:12] + atom_field + raw[16:]
214
+ raw = raw[:17] + res_field + raw[20:]
215
+ raw = raw[:76] + elem_field + raw[78:]
216
+ fout.write(raw + "\n")
217
+ else:
218
+ fout.write(line)
219
+ os.replace(tmp_path, outfile)
220
+ finally:
221
+ if os.path.exists(tmp_path):
222
+ os.remove(tmp_path)
223
+
224
+ def pdb2pqr_protonate(pdb,overwrite=False,ffout='AMBER',pH=7.0, output_dir=None):
225
+ if not pdb2pqr:
226
+ logger.critical("PDB2PQR module was not found. Use a different method to protonate your system")
227
+ exit()
228
+ stem = os.path.splitext(os.path.basename(pdb))[0]
229
+ base_dir = output_dir if output_dir else (os.path.dirname(pdb) or ".")
230
+ output_pqr = os.path.join(base_dir, stem + "_H.pqr")
231
+ output_pdb = os.path.join(base_dir, stem + "_H.pdb")
232
+ if os.path.exists(output_pdb) and os.path.exists(output_pqr) and not overwrite:
233
+ return output_pdb
234
+ #As pdb2pqr call logging.basicConfig in the main function, it disrupts the logging setup. Calling in os.system to avoid issues
235
+ # with open("pdb2pqr.log", "w") as f, contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):
236
+ # pdb2pqr_parser = pdb2pqr_args()
237
+ # args = pdb2pqr_parser.parse_args(['--pdb-output='+output_pdb,'--ff=PARSE','--ffout=AMBER','--with-ph='+str(pH),pdb,output_pqr])
238
+ # pdb2pqr(args)
239
+ log_path = os.path.join(os.path.dirname(output_pdb) or ".", "pdb2pqr.log")
240
+ cmd = shlex.split(pdb2pqr) + [
241
+ '--pdb-output='+output_pdb,
242
+ '--ff=PARSE',
243
+ '--ffout=AMBER',
244
+ '--titration-state-method=propka',
245
+ '--with-ph='+str(pH),
246
+ pdb,
247
+ output_pqr,
248
+ ]
249
+ with open(log_path, "w") as log_handle:
250
+ result = subprocess.run(cmd, stdout=log_handle, stderr=log_handle)
251
+ if result.returncode != 0:
252
+ logger.critical("CRITICAL:\n PDB2PQR failed! Check %s", log_path)
253
+ exit()
254
+ for _ in range(5):
255
+ if os.path.exists(output_pdb) and os.stat(output_pdb).st_size > 0:
256
+ break
257
+ time.sleep(0.2)
258
+ if not os.path.exists(output_pdb) or os.stat(output_pdb).st_size == 0:
259
+ logger.critical("CRITICAL:\n PDB2PQR did not create %s. Check %s and input paths.", output_pdb, log_path)
260
+ exit()
261
+ return output_pdb
262
+
263
+ def estimated_density(MW):
264
+ density = 1.41 + 0.145*math.exp(float(-MW)/13000) #Protein Sci. 2004 Oct; 13(10):2825-2828
265
+ return density
266
+
267
+ def distribute_integer(integer, fracs):
268
+ dist = []
269
+ tot = sum(fracs)
270
+ for frac in fracs:
271
+ d = int(round(integer*frac/tot))
272
+ dist.append(d)
273
+ tot -= frac
274
+ integer -= d
275
+ return dist
276
+
277
+ def rotation_matrix(axis, theta):
278
+ """
279
+ Return the rotation matrix associated with counterclockwise rotation about
280
+ the given axis by theta radians.
281
+ """
282
+ axis = np.asarray(axis)
283
+ axis = axis/math.sqrt(np.dot(axis, axis))
284
+ a = math.cos(theta/2.0)
285
+ b, c, d = -axis*math.sin(theta/2.0)
286
+ aa, bb, cc, dd = a*a, b*b, c*c, d*d
287
+ bc, ad, ac, ab, bd, cd = b*c, a*d, a*c, a*b, b*d, c*d
288
+ return np.array([[aa+bb-cc-dd, 2*(bc+ad), 2*(bd-ac)],
289
+ [2*(bc-ad), aa+cc-bb-dd, 2*(cd+ab)],
290
+ [2*(bd+ac), 2*(cd-ab), aa+dd-bb-cc]])
291
+
292
+ def vector_angle(vec1,vec2, ref=[1,1,0]):
293
+ cross = np.cross(vec1,vec2)
294
+ dot = np.dot(vec1,vec2)
295
+ dot /= np.linalg.norm(vec1)*np.linalg.norm(vec2)
296
+ ori = np.dot(cross, ref)
297
+ return np.arccos(dot)
298
+
299
+ def align_vectors(vec1,vec2, R=None):
300
+ if np.allclose(vec1,np.array([0,0,0])):
301
+ return np.array([0,0,0])
302
+ angle = vector_angle(vec1,vec2)
303
+ if R is None:
304
+ R = rotation_matrix(cross,angle)
305
+ return np.matmul(vec1,R)
306
+
307
+ def rotate_pdb(pdb_ori,tip_num,pivot_num, ref=[0,0,1],randomize=False,randomize_vec=[0,0,1]):
308
+ pdb = copy.deepcopy(pdb_ori)
309
+ tip = []
310
+ pivot = []
311
+ for res in pdb:
312
+ for atom in pdb[res]:
313
+ if isinstance(tip_num, list):
314
+ if atom[1] in tip_num:
315
+ tip.append(pdb[res][atom])
316
+ else:
317
+ if atom[1] == tip_num:
318
+ tip = pdb[res][atom]
319
+ if isinstance(pivot_num,list):
320
+ if atom[1] in pivot_num:
321
+ pivot.append(pdb[res][atom])
322
+ else:
323
+ if atom[1] == pivot_num:
324
+ pivot = pdb[res][atom]
325
+ if isinstance(tip_num, list):
326
+ tip = np.mean(tip,axis=0)
327
+ if isinstance(pivot_num,list):
328
+ pivot = np.mean(pivot,axis=0)
329
+ angle = vector_angle(tip-pivot,np.array(ref))
330
+ axis = np.cross(tip-pivot,np.array(ref)) # vector perpendicular to v1 v2 plane
331
+ R = rotation_matrix(axis,angle)
332
+ for res in pdb:
333
+ if randomize:
334
+ angle = random.uniform(0,6.28)
335
+ axis = np.array([0,0,1])
336
+ R = np.matmul(R,rotation_matrix(axis,angle))
337
+ for atom in pdb[res]:
338
+ pdb[res][atom] = align_vectors(pdb[res][atom]-pivot,np.array(ref),R)+pivot
339
+ return pdb
340
+
341
+ def randomize_pdb(pdb_ori,tip_num,pivot_num, ref=[0,0,1]):
342
+ pdb = copy.deepcopy(pdb_ori)
343
+ tip = []
344
+ pivot = []
345
+ for res in pdb:
346
+ for atom in pdb[res]:
347
+ if isinstance(tip_num, list):
348
+ if atom[1] in tip_num:
349
+ tip.append(pdb[res][atom])
350
+ else:
351
+ if atom[1] == tip_num:
352
+ tip = pdb[res][atom]
353
+ if isinstance(pivot_num,list):
354
+ if atom[1] in pivot_num:
355
+ pivot.append(pdb[res][atom])
356
+ else:
357
+ if atom[1] == pivot_num:
358
+ pivot = pdb[res][atom]
359
+ if isinstance(tip_num, list):
360
+ tip = np.mean(tip,axis=0)
361
+ if isinstance(pivot_num,list):
362
+ pivot = np.mean(pivot,axis=0)
363
+ angle = random.uniform(0,6.28)
364
+ axis = np.array(ref)
365
+ R = rotation_matrix(axis,angle)
366
+ if len(pdb[res]) > 1:
367
+ for res in pdb:
368
+ for atom in pdb[res]:
369
+ pdb[res][atom] = align_vectors(pdb[res][atom]-pivot,np.array(ref),R)+pivot
370
+ return pdb
371
+
372
+ def translate_pdb(pdb_ori,target=None,ref_atm=None,vec=None):
373
+ pdb = copy.deepcopy(pdb_ori)
374
+ if target is not None and ref_atm is not None:
375
+ for res in pdb:
376
+ for atom in pdb[res]:
377
+ if atom[1] == ref_atm:
378
+ ref = pdb[res][atom]
379
+ tran_vec = ref-target
380
+ elif vec is not None:
381
+ tran_vec = vec
382
+ else:
383
+ print("A target and reference atom, or a translation vector has to be provided")
384
+ exit()
385
+ for res in pdb:
386
+ for atom in pdb[res]:
387
+ pdb[res][atom] = pdb[res][atom]-tran_vec
388
+ return pdb
389
+
390
+ def superimpose_pdb(pdb1,pdb2):
391
+
392
+ points = []
393
+
394
+ count = 0
395
+ while count < 3:
396
+ key = list(pdb1.keys())[count]
397
+ key2 = list(pdb1[key].keys())[count]
398
+ points.append([pdb1[key][key2],pdb2[key][key2]])
399
+ count += 1
400
+
401
+ trans_vec = points[0][1]-points[0][0]
402
+ # print(trans_vec)
403
+
404
+ for n, point in enumerate(points):
405
+ points[n][1] = points[n][1]-trans_vec
406
+
407
+ angle = vector_angle(points[1][1]-points[0][0],points[1][0]-points[0][0])
408
+ axis = np.cross(points[1][1]-points[0][0],points[1][0]-points[0][0])
409
+
410
+ R = rotation_matrix(axis,angle)
411
+
412
+
413
+ if not np.allclose(align_vectors(points[1][1]-points[0][0],points[1][0]-points[0][0],R),points[1][0]-points[0][0],atol=1e-01):
414
+ print("Flipping!")
415
+ R = rotation_matrix(axis,-angle)
416
+
417
+ pdb3 = translate_pdb(pdb2,vec=trans_vec)
418
+
419
+ pdb_write(pdb3,outfile="trans.pdb")
420
+
421
+ for res in pdb3:
422
+ for atom in pdb3[res]:
423
+ pdb3[res][atom] = align_vectors(pdb3[res][atom]-points[0][0],points[1][0]-points[0][0],R)+points[0][0]
424
+
425
+ for n, point in enumerate(points):
426
+ points[n][1] = align_vectors(points[n][1]-points[0][0],points[1][0]-points[0][0],R)+points[0][0]
427
+
428
+ pdb_write(pdb3,outfile="rot1.pdb")
429
+
430
+ axis = points[1][0]-points[0][0]
431
+
432
+ axis_n = axis/np.linalg.norm(axis)
433
+ vec_proj1 = np.dot((points[2][1]-points[0][0]),axis_n)
434
+ vec_proj2 = np.dot((points[2][0]-points[0][0]),axis_n)
435
+ line_point1 = points[0][0] + vec_proj1*axis_n
436
+ line_point2 = points[0][0] + vec_proj2*axis_n
437
+
438
+ angle = vector_angle(points[2][1]-line_point1,points[2][0]-line_point1)
439
+
440
+ R = rotation_matrix(axis,angle)
441
+
442
+
443
+ if not np.allclose(align_vectors(points[2][1]-line_point1,points[2][0]-line_point1,R),points[2][0]-line_point1,atol=1e-02):
444
+ # print("Flipping2!")
445
+ R = rotation_matrix(axis,-angle)
446
+
447
+ for res in pdb3:
448
+ for atom in pdb3[res]:
449
+ pdb3[res][atom] = align_vectors(pdb3[res][atom]-line_point1,points[1][0]-line_point1,R)+line_point1
450
+
451
+ return pdb3
452
+
453
+ def sphere_dist(rad, dx, dy=0, dz=0):
454
+ return rad*2*np.arcsin(np.sqrt(dx**2+dy**2+dz**2)/(2*rad))
455
+
456
+ def sphere_rectangle_area(rad, a, b):
457
+ a = sphere_dist(rad, a)
458
+ b = sphere_dist(rad, b)
459
+ return rad**2*(2*np.pi-4*np.arccos(np.tan(a/(2*rad))*np.tan(b/(2*rad))))
460
+
461
+ def sphere_integral(a,b,c,r):
462
+ f = lambda z, y, x: 1
463
+ return integrate.tplquad(f, a-r, a+r,
464
+ lambda x: -np.sqrt(r**2-(x-a)**2)+b, lambda x: np.sqrt(r**2-(x-a)**2)+b,
465
+ lambda x, y: -np.sqrt(r**2-(x-a)**2-(y-b)**2)+c, lambda x, y: np.sqrt(r**2-(x-a)**2-(y-b)**2)+c)[0]
466
+
467
+ def sphere_integral_square(x_min,x_max,y_min,y_max,z_min=None, z_max=None,r1=None,r2=None, a=0,b=0,c=0):
468
+ f = lambda z, y, x: 1
469
+ if r1 is not None and r2 is not None:
470
+ if r1**2-(x_max-a)**2-(y_max-b)**2 < 0:
471
+ print("Radius is too small for given dimensions!")
472
+ raise ValueError
473
+ return integrate.tplquad(f, x_min, x_max,
474
+ lambda x: y_min, lambda x: y_max,
475
+ lambda x, y: np.sqrt(r1**2-(x-a)**2-(y-b)**2)+c, lambda x, y: np.sqrt(r2**2-(x-a)**2-(y-b)**2)+c)[0]
476
+ elif z_min is not None and r2 is not None:
477
+ if r2**2-(x_max-a)**2-(y_max-b)**2 < 0:
478
+ print("Radius is too small for given dimensions!")
479
+ raise ValueError
480
+ return integrate.tplquad(f, x_min, x_max,
481
+ lambda x: y_min, lambda x: y_max,
482
+ lambda x, y: z_min, lambda x, y: np.sqrt(r2**2-(x-a)**2-(y-b)**2)+c)[0]
483
+ elif z_max is not None and r1 is not None:
484
+ if r1**2-(x_max-a)**2-(y_max-b)**2 < 0:
485
+ print("Radius is too small for given dimensions!")
486
+ raise ValueError
487
+ return integrate.tplquad(f, x_min, x_max,
488
+ lambda x: y_min, lambda x: y_max,
489
+ lambda x, y: np.sqrt(r1**2-(x-a)**2-(y-b)**2)+c, lambda x, y: z_max)[0]
490
+ else:
491
+ raise ValueError
492
+
493
+ def gauss_rectangle_area(x_min,x_max,y_min,y_max,b,d,h,a=0,c=0):
494
+ f = lambda y, x: np.sqrt(1 + (-h*(x-a)*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))/b**2)**2 + (-h*(y-c)*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))/d**2)**2)
495
+ return integrate.dblquad(f, x_min, x_max,
496
+ lambda x: y_min,lambda x: y_max)[0]
497
+
498
+ def gauss_integral_square(x_min,x_max,y_min,y_max,b,d,h,a=0,c=0,z_min=None, z_max=None, g1=None, g2=None):
499
+ f = lambda z, y, x: 1
500
+ f1= lambda y, x: h*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))+g1
501
+ f2= lambda y, x: h*np.exp(-(x-a)**2/(2*b**2)-(y-c)**2/(2*d**2))+g2
502
+ if g1 is not None and g2 is not None:
503
+ if g2-g1 < 0:
504
+ raise ValueError
505
+ return integrate.tplquad(f, x_min, x_max,
506
+ lambda x: y_min, lambda x: y_max,
507
+ f1, f2)[0]
508
+ elif z_min is not None and g2 is not None:
509
+ if g2-z_min < 0:
510
+ raise ValueError
511
+ return integrate.tplquad(f, x_min, x_max,
512
+ lambda x: y_min, lambda x: y_max,
513
+ lambda x, y: z_min, f2)[0]
514
+ elif z_max is not None and g1 is not None:
515
+ if z_max-g1 < 0:
516
+ raise ValueError
517
+ return integrate.tplquad(f, x_min, x_max,
518
+ lambda x: y_min, lambda x: y_max,
519
+ f1, lambda x, y: z_max)[0]
520
+ else:
521
+ raise ValueError
522
+
523
+ class MembraneParams(object):
524
+ """
525
+ A class to store membrane params corresponding to a PDB
526
+ """
527
+ def __init__(self,pdb, leaflet_z, grid=None, move=False, move_vec=[0,0,0], xy_cen=False, z_cen = False, outpdb="PROT.pdb", chain=" ",renumber=False):
528
+ #Getting variables into class attributes
529
+ self.pdb = pdb
530
+ self.leaflet_z = leaflet_z
531
+ self.grid = grid
532
+ self.move = move
533
+ self.move_vec = move_vec
534
+ self.xy_cen = xy_cen
535
+ self.z_cen = z_cen
536
+ self.outpdb = outpdb
537
+ self.chain = chain
538
+ self.renumber = renumber
539
+
540
+
541
+ #Variables to store values after
542
+ self.density = None
543
+ self.chains = 1
544
+ self.charge = 0
545
+ self.mass = 0
546
+ self.hydrogens = 0
547
+ self.use_hex = False
548
+ self.mem_atoms_mass_up = 0
549
+ self.mem_atoms_mass_down = 0
550
+ self.solv_atoms_mass_up = 0
551
+ self.solv_atoms_mass_down = 0
552
+ self.new_pdb = []
553
+ self.x_mem = []
554
+ self.y_mem = []
555
+ self.pdblines = None
556
+ self.minmax = None
557
+
558
+ self.x = []
559
+ self.y = []
560
+ self.z = []
561
+
562
+ self._x_cen = 0
563
+ self._y_cen = 0
564
+ self._z_cen = 0
565
+
566
+ def read_pdb(self):
567
+ "Read pdb lines into class"
568
+ file = open(self.pdb, "r")
569
+ self.pdblines = file.readlines()
570
+ file.close()
571
+
572
+ def write_pdb(self):
573
+ new_file = open(self.outpdb,"w")
574
+ new_file.writelines(self.new_pdb)
575
+ new_file.close()
576
+
577
+ def xyz_center(self):
578
+ """
579
+ Calculate "center" of pdb from coordinate max min average
580
+ """
581
+ x = []
582
+ y = []
583
+ z = []
584
+ for line in self.pdblines:
585
+ if (line[0:4] == "ATOM" or line[0:6] == "HETATM") and line[17:20].strip() != "DUM":
586
+ x_coord = float(line[30:38])+self.move_vec[0]
587
+ y_coord = float(line[38:46])+self.move_vec[1]
588
+ z_coord = float(line[46:54])+self.move_vec[2]
589
+ x.append(float(x_coord))
590
+ y.append(float(y_coord))
591
+ z.append(float(z_coord))
592
+ self._x_cen = (max(x)+min(x))/2
593
+ self._y_cen = (max(y)+min(y))/2
594
+ self._z_cen = (max(z)+min(z))/2
595
+
596
+ def pdb_reindex(self):
597
+ """
598
+ Go over pdblines, renumber, add chain ids and skip as needed
599
+ """
600
+ last_chain = None
601
+ last_resnum = None
602
+ last_type = None
603
+ track = None
604
+ chain_list = list(string.ascii_uppercase)+list(string.ascii_lowercase)+list(map(str,range(0,10)))
605
+ resnum_index = 1
606
+ chain = self.chain
607
+
608
+ #Calcualte PDB center to be used later
609
+ self.xyz_center()
610
+
611
+ if chain == " ":
612
+ chain_index = 0
613
+ else:
614
+ try:
615
+ chain_index = chain_list.index(chain)
616
+ except:
617
+ print("Chain ID not found in list")
618
+ chain_index = 0
619
+ for line in self.pdblines:
620
+ if line[0:3] == "TER":
621
+ self.new_pdb.append("TER\n")
622
+ continue
623
+ if (line[0:4] == "ATOM" or line[0:6] == "HETATM") and line[17:20].strip() != "DUM":
624
+ if last_type is not None and line[0:6].strip() != last_type:
625
+ if not line.startswith("TER") and not self.new_pdb[-1].startswith("TER"):
626
+ self.new_pdb.append("TER\n")
627
+ last_type = line[0:6].strip()
628
+ residue = line[17:21].strip()
629
+ atomnum = int(line[6:11].strip())
630
+ atomname = line[12:16].strip()
631
+ resnum = int(line[22:26].strip())
632
+ if self.renumber and resnum != last_resnum:
633
+ last_resnum = resnum
634
+ resnum_new = resnum_index
635
+ resnum_index += 1
636
+ if resnum_new > 9999:
637
+ resnum_new = ((resnum_new-1)%9999)+1
638
+ if self.renumber:
639
+ resnum = resnum_new
640
+ segid = line[72:76].strip()
641
+ if residue == "ILE" and atomname == "CD":
642
+ atomname = "CD1"
643
+ if residue == "CYM":
644
+ if atomname == "HN1" or atomname == "HB1":
645
+ continue
646
+ if atomname == "OT1":
647
+ atomname = "O"
648
+ if atomname == "OT2":
649
+ atomname = "OXT"
650
+ if len(atomname) == 3:
651
+ ali = ">"
652
+ else:
653
+ ali = "^"
654
+ if last_chain is not None and last_chain != line[21:22]:
655
+ if not line.startswith("TER") and not self.new_pdb[-1].startswith("TER"):
656
+ self.new_pdb.append("TER\n")
657
+ self.chains += 1
658
+ chain_index += 1
659
+ chain = chain_list[chain_index]
660
+ last_chain = line[21:22]
661
+ if not self.move and not self.xy_cen:
662
+ x_coord = float(line[30:38])
663
+ y_coord = float(line[38:46])
664
+ z_coord = float(line[46:54])
665
+ elif not self.move:
666
+ x_coord = float(line[30:38])-self._x_cen
667
+ y_coord = float(line[38:46])-self._y_cen
668
+ z_coord = float(line[46:54])
669
+ if self.z_cen:
670
+ z_coord = float(line[46:54])-self._z_cen
671
+ elif not self.xy_cen:
672
+ x_coord = float(line[30:38])+self.move_vec[0]
673
+ y_coord = float(line[38:46])+self.move_vec[1]
674
+ z_coord = float(line[46:54])+self.move_vec[2]
675
+ else:
676
+ x_coord = float(line[30:38])+self.move_vec[0]-self._x_cen
677
+ y_coord = float(line[38:46])+self.move_vec[1]-self._y_cen
678
+ z_coord = float(line[46:54])+self.move_vec[2]
679
+ if self.z_cen:
680
+ z_coord = float(line[46:54])+self.move_vec[2]-self._z_cen
681
+ self.x.append(float(x_coord))
682
+ self.y.append(float(y_coord))
683
+ self.z.append(float(z_coord))
684
+ line = line[0:6]+"{:>5d} {:{align}4} {:<4}{:1}{:>4} {:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f} {:<4}{:>2}\n".format( atomnum,atomname,residue,chain,resnum,x_coord,y_coord,z_coord,1,0,segid,atomname[0],align=ali)
685
+ self.new_pdb.append(line)
686
+
687
+ if line[17:20].strip() in charged and track != line[22:26].strip():
688
+ self.charge += charged[line[17:20].strip()]
689
+ track = line[22:26].strip()
690
+ #Add masses according to z location
691
+ if line[0:4] == "ATOM":
692
+ element = data.guess_element(residue,atomname)
693
+ if element in data.masses:
694
+ self.mass += data.masses[element]
695
+ if element == "H":
696
+ self.hydrogens += 1
697
+ if self.z[-1] > self.leaflet_z:
698
+ self.solv_atoms_mass_up += data.masses[element]
699
+ elif self.z[-1] < -self.leaflet_z:
700
+ self.solv_atoms_mass_down += data.masses[element]
701
+ else:
702
+ if self.z[-1] >= 0:
703
+ self.mem_atoms_mass_up += data.masses[element]
704
+ else:
705
+ self.mem_atoms_mass_down += data.masses[element]
706
+ self.x_mem.append(self.x[-1])
707
+ self.y_mem.append(self.y[-1])
708
+ else:
709
+ print("Atom "+element+" mass will not be considered!\n")
710
+
711
+ def read_grid(self):
712
+ file = open(self.grid,"r").readlines()
713
+ solv_up = 0
714
+ solv_down = 0
715
+ mem_up = 0
716
+ mem_down = 0
717
+ for line in file:
718
+ coord = float(line[46:54])
719
+ if coord+self.move_vec[2] > self.leaflet_z:
720
+ solv_up += 1
721
+ elif coord+self.move_vec[2] > 0:
722
+ mem_up += 1
723
+ elif coord+self.move_vec[2] >= -self.leaflet_z:
724
+ mem_down += 1
725
+ elif coord+self.move_vec[2] < -self.leaflet_z:
726
+ solv_down += 1
727
+ else:
728
+ print("Coordinate not assigned?") #Shouldn't be called
729
+ self.mem_vol_up = mem_up/8
730
+ self.mem_vol_down = mem_down/8
731
+ self.solv_vol_up = solv_up/8
732
+ self.solv_vol_down = solv_down/8
733
+ self.volume = self.mem_vol_up+self.mem_vol_down+self.solv_vol_up+self.solv_vol_down
734
+
735
+ def estimated_atoms(self):
736
+ est_density = estimated_density(self.mass)
737
+ self.density = est_density*avogadro/10**24
738
+ self.volume = self.mass/self.density
739
+ self.mem_vol_up = self.mem_atoms_mass_up/self.density
740
+ self.mem_vol_down = self.mem_atoms_mass_down/self.density
741
+ self.solv_vol_up = self.solv_atoms_mass_up/self.density
742
+ self.solv_vol_down = self.solv_atoms_mass_down/self.density
743
+
744
+ def measure(self):
745
+
746
+ self.read_pdb()
747
+ self.pdb_reindex()
748
+ self.write_pdb()
749
+
750
+ if self.hydrogens == 0:
751
+ print("Protein doesn't look to be protonated! Please consider that this will cause a bad estimation of the volume and of the packing process!\n\n")
752
+
753
+ #### MAXIMUM PROTEIN XY RADIUS ###
754
+
755
+ mean_x = sum(self.x)/len(self.x)
756
+ mean_y = sum(self.y)/len(self.y)
757
+ self.max_rad = max([math.sqrt((self.x[n]-mean_x)**2+(self.y[n]-mean_y)**2) for n, _ in enumerate(self.x)])
758
+
759
+ #### IF GRID VOL CALCULATION, REPLACE ESTIMATION ####
760
+
761
+ self.estimated_atoms()
762
+ if self.grid != None:
763
+ self.read_grid()
764
+
765
+ self.x.sort()
766
+ self.y.sort()
767
+ self.z.sort()
768
+ self.x_mem.sort()
769
+ self.y_mem.sort()
770
+ self.minmax = [self.x[0], self.y[0], self.z[0], self.x[-1], self.y[-1], self.z[-1]]
771
+ try:
772
+ area_est = (((self.x_mem[-1]-self.x_mem[0])+(self.y_mem[-1]-self.y_mem[0]))/4)**2*math.pi # Maybe estimate the protein area in the membrane...(Not used ATM)
773
+ except:
774
+ print("WARNING! The protein doesn't have atoms sitting in the membrane! Make sure that it was correctly aligned and that the placement is as intended!")
775
+ return [self.x[0], self.y[0], self.z[0], self.x[-1], self.y[-1], self.z[-1]], self.max_rad, self.charge, self.volume, self.mem_vol_up, self.mem_vol_down, self.solv_vol_up, self.solv_vol_down, self.density, self.mass, self.chains
776
+
777
+ def __repr__(self):
778
+ return f"<MembraneParam PDB:{self.pdb}>"
779
+ #
780
+
781
+ def is_number(num):
782
+ try:
783
+ float(num)
784
+ return True
785
+ except:
786
+ return False
787
+
788
+
789
+ def pdb_parse(pdbfile, onlybb=True):
790
+ CA_CB = {}
791
+ pdb = open(pdbfile,"r").readlines()
792
+ for line in pdb:
793
+ if (line.startswith("ATOM") or line.startswith("HETATM")):
794
+ residue = line[17:21].strip()
795
+ atomnum = int(line[6:11].strip())
796
+ atomname = line[12:16].strip()
797
+ resnum = int(line[22:26].strip())
798
+ chain = line[21:22]
799
+ id = (residue,resnum,chain)
800
+ if atomname in cgatoms and residue in residues and onlybb:
801
+ if id not in CA_CB:
802
+ CA_CB[id]= {}
803
+ CA_CB[id][(atomname,atomnum)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
804
+ if not onlybb:
805
+ if id not in CA_CB:
806
+ CA_CB[id]= {}
807
+ CA_CB[id][(atomname,atomnum)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
808
+ return CA_CB
809
+
810
+ def pdb_write(CA_CB, outfile="test.pdb"):
811
+ handle = open(outfile,"w")
812
+ for res in sorted(CA_CB,key=lambda x:(x[2],x[1])):
813
+ for atom in sorted(CA_CB[res], key=lambda x:x[1]):
814
+ handle.write("ATOM {:>5d} {:>4} {:>3}{:>2}{:>4d} {:>8.3f}{:>8.3f}{:>8.3f} 1.00 0.00 {:1}\n".format(atom[1],"{:<3}".format(atom[0]),res[0],res[2],res[1],CA_CB[res][atom][0],CA_CB[res][atom][1],CA_CB[res][atom][2],atom[0][0]))
815
+ handle.close()
816
+
817
+ def pdb_parse_TER(pdbfile, onlybb=True, noH=True, filter_res=None, filter_atm=None, packmol_hex_after=99999, hexadecimal_indices=False):
818
+ CA_CB = {}
819
+ pdb = open(pdbfile,"r").readlines()
820
+ molnum = 1
821
+ tracker = 1
822
+ atom_index = 0
823
+ hex_switch = False
824
+ atomlimit = False
825
+ for line in pdb:
826
+ if line.startswith("TER"):
827
+ molnum += 1
828
+ if (line.startswith("ATOM") or line.startswith("HETATM")):
829
+ atom_index += 1
830
+ if line[6:11].strip() == "*****" and not atomlimit:
831
+ logger.warning("Found atom number limit '*****'. Atom number parsing will be unreliable")
832
+ atomlimit = True
833
+ residue = line[17:21].strip()
834
+ if not str(line[6:11].strip()).isnumeric():
835
+ hex_switch = True
836
+ if atomlimit:
837
+ atomnum = atomnum + 1
838
+ else:
839
+ if hexadecimal_indices:
840
+ atomnum = int(line[6:11].strip(),16)
841
+ else:
842
+ # Packmol switches to hex after serial 99999; use atom count to disambiguate numeric hex like "20000".
843
+ if packmol_hex_after is not None and atom_index > packmol_hex_after:
844
+ atomnum = int(line[6:11].strip(),16)
845
+ else:
846
+ atomnum = int(line[6:11].strip(),16) if hex_switch else int(line[6:11].strip()) # asume hex 16 if parsing packmol
847
+ atomname = line[12:16].strip()
848
+ resnum = int(line[22:26].strip(),16) if hexadecimal_indices else int(line[22:26].strip())
849
+ chain = line[21:22]
850
+ id = (molnum,chain)
851
+ if atomname in cgatoms and residue in residues and onlybb:
852
+ if id not in CA_CB:
853
+ CA_CB[id]= {}
854
+ CA_CB[id][(residue, resnum, atomname, atomnum, tracker)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
855
+ if not onlybb:
856
+ if noH:
857
+ if atomname.startswith("H"):
858
+ continue
859
+ if filter_res != None:
860
+ if residue not in filter_res:
861
+ continue
862
+ if filter_atm != None:
863
+ if atomname not in filter_atm:
864
+ continue
865
+ if id not in CA_CB:
866
+ CA_CB[id]= {}
867
+ CA_CB[id][(residue, resnum, atomname, atomnum, tracker)] = np.array([float(line[30:38].strip()),float(line[38:46].strip()),float(line[46:54].strip())])
868
+ tracker += 1
869
+ return CA_CB
870
+
871
+
872
+
873
+ def pdb_write_TER(CA_CB, outfile="test.pdb", serial_format="hy36", resseq_format="decimal"):
874
+ handle = open(outfile,"w")
875
+ for mol in sorted(CA_CB,key=lambda x:x[0]):
876
+ for atom in sorted(CA_CB[mol], key=lambda x:(x[4],x[1])): # Packmol output comes serialized first and foremost by atomnumber
877
+ serial = _format_pdb_int(atom[3], 5, serial_format)
878
+ resseq = _format_pdb_int(atom[1], 4, resseq_format)
879
+ handle.write("ATOM {:>5} {:>4} {:>3}{:>2}{:>4} {:>8.3f}{:>8.3f}{:>8.3f} 1.00 0.00 {:1}\n".format(serial,"{:<3}".format(atom[2]),atom[0],mol[1],resseq,CA_CB[mol][atom][0],CA_CB[mol][atom][1],CA_CB[mol][atom][2],atom[0][0]))
880
+ handle.write("TER\n")
881
+ handle.write("END\n")
882
+ handle.close()
883
+ return outfile
884
+
885
+ def find_piercing_lipids(pdb, outfile="noclash.pdb", verbose=False, hexadecimal_indices=False):
886
+ tails_dict = pdb_parse_TER(pdb, onlybb=False, filter_res=tails, hexadecimal_indices=hexadecimal_indices)
887
+ sterol_PI_dict = pdb_parse_TER(pdb, onlybb=False, filter_res=sterols_PI, hexadecimal_indices=hexadecimal_indices)
888
+
889
+ midpoints = np.zeros((len(tails_dict),50,3))+np.inf
890
+ midpointmap = {}
891
+
892
+ ringpoints = np.zeros((len(sterol_PI_dict),5,3))+np.inf
893
+ ringmap = {}
894
+
895
+ for i,r in enumerate(tails_dict):
896
+ bond_idx = 0
897
+ if i not in midpointmap:
898
+ midpointmap[i] = r
899
+ search_keys = list(tails_dict[r].keys())
900
+ for x,a in enumerate(search_keys):
901
+ for b in search_keys[x+1:]:
902
+ if np.linalg.norm(tails_dict[r][a]-tails_dict[r][b]) < 1.7: # C-C bond length shouldn't be larger then 1.59A / 1.7 just in case
903
+ midpoints[i,bond_idx] = np.mean([tails_dict[r][a],tails_dict[r][b]], axis=0)
904
+ bond_idx += 1
905
+
906
+ for i,r in enumerate(sterol_PI_dict):
907
+ if i not in ringmap:
908
+ ringmap[i] = r
909
+ for ring_idx, ring in enumerate(sterol_ring_probes):
910
+ ring_coords = []
911
+ for ring_atom in ring:
912
+ for a in sterol_PI_dict[r]:
913
+ #Have to check PI and sterols independently, as PI has same atomnames as sterol rings
914
+ # structure of dict key ('PI', 2, 'P31', 63, 25) resname, resnum, atomname, atomnum, internal_idx
915
+ if a[2].strip() == ring_atom and not a[0] == "PI":
916
+ ring_coords.append(sterol_PI_dict[r][a])
917
+ if len(ring_coords) > 0:
918
+ ringpoints[i,ring_idx] = np.mean(ring_coords,axis=0)
919
+ #Now check for PI rings
920
+ ring_coords = []
921
+ for ring_atom in PI_ring_probe:
922
+ for a in sterol_PI_dict[r]:
923
+ if a[2].strip() == ring_atom and a[0] == "PI":
924
+ ring_coords.append(sterol_PI_dict[r][a])
925
+ if len(ring_coords) > 0:
926
+ ringpoints[i,4] = np.mean(ring_coords,axis=0)
927
+
928
+
929
+ to_remove = []
930
+
931
+ for i,sterol in enumerate(ringpoints):
932
+ for ring_center in sterol:
933
+ with warnings.catch_warnings():
934
+ warnings.simplefilter("ignore", RuntimeWarning)
935
+ pierce_dist = np.linalg.norm(midpoints-ring_center,axis=2)
936
+ for pierce in np.argwhere(pierce_dist < 2.5): # H to H benzene "ring diameter" should be about 4.963A (from quick Avogadro min). Distance of the center of an aliphatic bond should be farther then this.
937
+ to_remove.append(midpointmap[pierce[0]])
938
+ to_remove = set(to_remove)
939
+
940
+ if len(to_remove) > 0:
941
+ logger.debug("The following lipids have clashing tails with sterols:")
942
+ else:
943
+ logger.debug("No piercing lipid found!")
944
+ for clash in to_remove:
945
+ tr_names = [i[0] for i in set([clash_res[:2] for clash_res in tails_dict[clash].keys()])]
946
+ tr_resids = [i[1] for i in set([clash_res[:2] for clash_res in tails_dict[clash].keys()])]
947
+ logger.debug("Resnames:%s, Resids:%s" % (tr_names,tr_resids))
948
+ return to_remove
949
+
950
+
951
+ def remove_piercing_lipids(pdb, to_remove, outfile="noclash.pdb", verbose=False, hexadecimal_indices=False):
952
+ original_dict = pdb_parse_TER(pdb, onlybb=False, noH=False, hexadecimal_indices=hexadecimal_indices)
953
+
954
+ if verbose:
955
+ logger.info("Removing clashing lipids")
956
+ for clash in to_remove:
957
+ del original_dict[clash]
958
+
959
+ serial_format = "hex" if hexadecimal_indices else "hy36"
960
+ resseq_format = "hex" if hexadecimal_indices else "decimal"
961
+ return pdb_write_TER(original_dict, outfile=outfile, serial_format=serial_format, resseq_format=resseq_format)
962
+
963
+
964
+ def fix_illegal_chain_id (
965
+ path: str,
966
+ valid: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
967
+ default: str = "Z",
968
+ encoding: str = "utf-8",
969
+ backup: bool = False,
970
+ ) -> None:
971
+ """
972
+ 就地修改 PDB 文件中不合法的链 ID。
973
+ - 仅处理以 ATOM/HETATM/ANISOU/TER 开头的行
974
+ - 第 22 列(下标 21)不是 valid 中字符时,改为 default
975
+ - path: PDB 文件路径
976
+ - backup: 若为 True,会生成同目录下 path + ".bak" 的备份
977
+ """
978
+ dir_name = os.path.dirname(path) or "."
979
+ fd, tmp_path = tempfile.mkstemp(prefix="pdb_fix_", suffix=".tmp", dir=dir_name)
980
+ try:
981
+ with os.fdopen(fd, "w", encoding=encoding) as fout, \
982
+ open(path, "r", encoding=encoding) as fin:
983
+ target_prefixes = ("ATOM", "HETATM", "ANISOU", "TER")
984
+ for line in fin:
985
+ if line.startswith("SEQRES"):
986
+ chain = line[11]
987
+ if chain not in valid:
988
+ line = line[:11] + default + line[12:]
989
+ if line.startswith(target_prefixes) and len(line) >= 22:
990
+ chain = line[21]
991
+ if chain not in valid:
992
+ line = line[:21] + default + line[22:]
993
+ fout.write(line)
994
+ # 是否备份
995
+ if backup:
996
+ bak_path = path + ".bak"
997
+ if os.path.exists(bak_path):
998
+ os.remove(bak_path)
999
+ os.rename(path, bak_path)
1000
+ # 用临时文件替换原文件(原子操作)
1001
+ os.replace(tmp_path, path)
1002
+ except Exception:
1003
+ # 失败时清理临时文件再抛出
1004
+ try:
1005
+ if os.path.exists(tmp_path):
1006
+ os.remove(tmp_path)
1007
+ finally:
1008
+ raise
1009
+
1010
+ if __name__ == "__main__":
1011
+ pdb = sys.argv[1]
1012
+ print(measure_parms(pdb,23,None))
1013
+ if "-move" in sys.argv:
1014
+ vec = [float(i) for i in sys.argv[sys.argv.index("-move")+1].split(",")]
1015
+ print(measure_parms(pdb,23,move=True, move_vec=vec))
1016
+ if "-cen" in sys.argv:
1017
+ print(measure_parms(pdb,23,xy_cen=True))