mimicpy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimicpy/__init__.py +1 -1
- mimicpy/__main__.py +726 -2
- mimicpy/_authors.py +2 -2
- mimicpy/_version.py +2 -2
- mimicpy/coords/__init__.py +1 -1
- mimicpy/coords/base.py +1 -1
- mimicpy/coords/cpmdgeo.py +1 -1
- mimicpy/coords/gro.py +1 -1
- mimicpy/coords/pdb.py +1 -1
- mimicpy/core/__init__.py +1 -1
- mimicpy/core/prepare.py +3 -3
- mimicpy/core/selector.py +1 -1
- mimicpy/force_matching/__init__.py +34 -0
- mimicpy/force_matching/bonded_forces.py +628 -0
- mimicpy/force_matching/compare_top.py +809 -0
- mimicpy/force_matching/dresp.py +435 -0
- mimicpy/force_matching/nonbonded_forces.py +32 -0
- mimicpy/force_matching/opt_ff.py +2114 -0
- mimicpy/force_matching/qm_region.py +1960 -0
- mimicpy/plugins/__main_installer__.py +76 -0
- mimicpy/{__main_vmd__.py → plugins/__main_vmd__.py} +2 -2
- mimicpy/plugins/pymol.py +56 -0
- mimicpy/plugins/vmd.tcl +78 -0
- mimicpy/scripts/__init__.py +1 -1
- mimicpy/scripts/cpmd.py +1 -1
- mimicpy/scripts/fm_input.py +265 -0
- mimicpy/scripts/fmdata.py +120 -0
- mimicpy/scripts/mdp.py +1 -1
- mimicpy/scripts/ndx.py +1 -1
- mimicpy/scripts/script.py +1 -1
- mimicpy/topology/__init__.py +1 -1
- mimicpy/topology/itp.py +603 -35
- mimicpy/topology/mpt.py +1 -1
- mimicpy/topology/top.py +254 -15
- mimicpy/topology/topol_dict.py +233 -4
- mimicpy/utils/__init__.py +1 -1
- mimicpy/utils/atomic_numbers.py +1 -1
- mimicpy/utils/constants.py +17 -3
- mimicpy/utils/elements.py +1 -1
- mimicpy/utils/errors.py +1 -1
- mimicpy/utils/file_handler.py +1 -1
- mimicpy/utils/strings.py +1 -1
- mimicpy-0.3.0.dist-info/METADATA +156 -0
- mimicpy-0.3.0.dist-info/RECORD +50 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
- mimicpy-0.3.0.dist-info/entry_points.txt +4 -0
- mimicpy-0.2.0.dist-info/METADATA +0 -86
- mimicpy-0.2.0.dist-info/RECORD +0 -38
- mimicpy-0.2.0.dist-info/entry_points.txt +0 -3
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
- {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,1960 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from ..topology.top import Top
|
|
6
|
+
from ..topology.mpt import Mpt
|
|
7
|
+
from ..core.prepare import Preparation
|
|
8
|
+
from ..utils.errors import MiMiCPyError, SelectionError
|
|
9
|
+
from ..utils.file_handler import read
|
|
10
|
+
from ..utils.constants import nm_to_au, kb_gmx2au, au_to_nm, kjm_au, au_kjm, kb_au2gmx
|
|
11
|
+
import copy
|
|
12
|
+
import numpy as np
|
|
13
|
+
from math import isclose
|
|
14
|
+
|
|
15
|
+
def read_qm_selection(selection_file: Path, prep: Preparation):
|
|
16
|
+
"""Read QM atom selections from a file and update the Preparation object
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
selection_file (Path): Path to selection file
|
|
20
|
+
prep (Preparation): Preparation object to update
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
Preparation: Updated Preparation object with QM atoms selected
|
|
24
|
+
"""
|
|
25
|
+
if not selection_file.exists():
|
|
26
|
+
raise FileNotFoundError(f'Selection file not found: {selection_file}')
|
|
27
|
+
|
|
28
|
+
selection_text = read(selection_file)
|
|
29
|
+
logging.info(f'Reading selections from {selection_file}')
|
|
30
|
+
|
|
31
|
+
for line in selection_text.splitlines():
|
|
32
|
+
if not line.strip():
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
parts = line.split()
|
|
36
|
+
if not parts:
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
command = parts[0].lower()
|
|
40
|
+
selection = ' '.join(parts[1:]) if len(parts) > 1 else None
|
|
41
|
+
try:
|
|
42
|
+
if command == 'add':
|
|
43
|
+
prep.add(selection, False)
|
|
44
|
+
logging.info(f'Added atoms to QM region using selection: {selection}')
|
|
45
|
+
elif command == 'add-bound':
|
|
46
|
+
prep.add(selection, True)
|
|
47
|
+
logging.info(f'Added boundary atoms to QM region using selection: {selection}')
|
|
48
|
+
elif command == 'delete':
|
|
49
|
+
prep.delete(selection)
|
|
50
|
+
logging.info(f'Deleted atoms from QM region using selection: {selection}')
|
|
51
|
+
elif command == 'clear':
|
|
52
|
+
prep.clear()
|
|
53
|
+
logging.info('Cleared all atoms from QM region')
|
|
54
|
+
else:
|
|
55
|
+
raise MiMiCPyError(f'Invalid command: {command}')
|
|
56
|
+
|
|
57
|
+
except Exception as e:
|
|
58
|
+
logging.error(f'Failed to process selection command: {str(e)}')
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
return prep
|
|
62
|
+
|
|
63
|
+
class QMRegion:
|
|
64
|
+
"""Class to handle QM region topology formation and extraction"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, top_file, coords_file, gmxdata=None, buffer=1000,
|
|
67
|
+
guess_elements=True, nonstandard_atomtypes=None):
|
|
68
|
+
"""Initialize QMRegion with topology and coordinate files
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
top_file (str): Path to topology file
|
|
72
|
+
coords_file (str): Path to coordinate file
|
|
73
|
+
gmxdata (str, optional): Path to GROMACS data directory
|
|
74
|
+
buffer (int, optional): Buffer size for reading files
|
|
75
|
+
guess_elements (bool, optional): Whether to guess atomic elements
|
|
76
|
+
nonstandard_atomtypes (dict, optional): Dictionary of non-standard atom types
|
|
77
|
+
"""
|
|
78
|
+
self.top_file = top_file
|
|
79
|
+
self.coords_file = coords_file
|
|
80
|
+
self.gmxdata = gmxdata
|
|
81
|
+
self.buffer = buffer
|
|
82
|
+
self.guess_elements = guess_elements
|
|
83
|
+
self.nonstandard_atomtypes = nonstandard_atomtypes
|
|
84
|
+
|
|
85
|
+
# Initialize topology and preparation objects
|
|
86
|
+
self.top = None
|
|
87
|
+
self.mpt = None
|
|
88
|
+
self.prep = None
|
|
89
|
+
self.qm_atoms = None
|
|
90
|
+
self.gmx_to_cpmd_map = None
|
|
91
|
+
self.cpmd_to_gmx_map = None
|
|
92
|
+
self.qm_interactions = None
|
|
93
|
+
self.boundary_atoms = None
|
|
94
|
+
self.extended_qm_atoms = None
|
|
95
|
+
self.gmx_to_seq_map = None
|
|
96
|
+
self.qm_total_charge = None
|
|
97
|
+
# Initialize selector
|
|
98
|
+
self.selector = None
|
|
99
|
+
self.solvent_atom_indices = None
|
|
100
|
+
self.load_topology()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def load_topology(self):
|
|
104
|
+
"""Load the topology file"""
|
|
105
|
+
# Load topology using Top class
|
|
106
|
+
self.top = Top(self.top_file, mode='r', buffer=self.buffer,
|
|
107
|
+
gmxdata=self.gmxdata, guess_elements=self.guess_elements,
|
|
108
|
+
nonstandard_atomtypes=self.nonstandard_atomtypes)
|
|
109
|
+
logging.info(f'Successfully loaded topology from {self.top_file}')
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# Create Mpt object from Top
|
|
113
|
+
self.mpt = Mpt.from_top(self.top)
|
|
114
|
+
|
|
115
|
+
# Initialize selector with Mpt object
|
|
116
|
+
from ..core.selector import DefaultSelector
|
|
117
|
+
self.selector = DefaultSelector(self.mpt, self.coords_file, buffer=self.buffer)
|
|
118
|
+
self.prep = Preparation(self.selector)
|
|
119
|
+
|
|
120
|
+
def setup_qm_region(self, selection_file: Path, solvent_names:dict=None):
|
|
121
|
+
"""Set up QM region using selection file
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
selection_file (Path): Path to selection file
|
|
125
|
+
"""
|
|
126
|
+
if not self.top:
|
|
127
|
+
raise MiMiCPyError('Topology not loaded. Call load_topology() first.')
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
self.prep = read_qm_selection(selection_file, self.prep)
|
|
131
|
+
self.qm_atoms = self.prep.qm_atoms
|
|
132
|
+
self.gmx_to_cpmd_map = self.gmx_to_cpmd_idx()
|
|
133
|
+
self.cpmd_to_gmx_map = {v: k for k, v in self.gmx_to_cpmd_map.items()}
|
|
134
|
+
self.boundary_atoms = self.qm_atoms[self.qm_atoms['is_bound'] == 1]
|
|
135
|
+
self.qm_interactions = self.extract_qm_interactions()
|
|
136
|
+
self.solvent_atom_indices = self.identify_solvent_atoms(solvent_names)
|
|
137
|
+
self.qm_total_charge = self.qm_atoms['charge'].sum()
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logging.error(f'Failed to setup QM region: {str(e)}')
|
|
140
|
+
raise MiMiCPyError(f'Failed to setup QM region: {str(e)}')
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def qm_charges(self):
|
|
144
|
+
"""Get QM atom charges"""
|
|
145
|
+
if not self.qm_atoms is not None:
|
|
146
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
147
|
+
|
|
148
|
+
return self.qm_atoms['charge']
|
|
149
|
+
|
|
150
|
+
def find_boundary_atoms(self):
|
|
151
|
+
"""Find and mark boundary atoms in QM region"""
|
|
152
|
+
if not self.prep:
|
|
153
|
+
raise MiMiCPyError('QM region not setup. Call setup_qm_region() first.')
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
self.prep.find_bound_atoms()
|
|
157
|
+
self.qm_atoms = self.prep.qm_atoms
|
|
158
|
+
logging.info('Successfully identified boundary atoms')
|
|
159
|
+
return True
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logging.error(f'Failed to find boundary atoms: {str(e)}')
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def write_topology(self, topol_dict=None, directory='.', prefix=''):
|
|
166
|
+
"""Write .itp files only for molecules containing QM atoms
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
topol_dict (dict): Dictionary of topology data
|
|
170
|
+
directory (str): Path to output directory for .itp files
|
|
171
|
+
prefix (str): Prefix for the output .itp files
|
|
172
|
+
"""
|
|
173
|
+
if not self.qm_atoms is not None:
|
|
174
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if topol_dict is None:
|
|
178
|
+
topol_dict = self.top.topol_dict
|
|
179
|
+
|
|
180
|
+
# Get QM atom indices (1-based)
|
|
181
|
+
qm_indices = set(self.qm_atoms.index)
|
|
182
|
+
|
|
183
|
+
# Create output directory if it doesn't exist
|
|
184
|
+
output_dir = Path(directory)
|
|
185
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
186
|
+
|
|
187
|
+
# Write molecule types and their interactions to separate .itp files
|
|
188
|
+
# Only for molecules containing QM atoms
|
|
189
|
+
self.written_files = []
|
|
190
|
+
|
|
191
|
+
# First pass: calculate total atoms up to each molecule type
|
|
192
|
+
mol_offsets = {}
|
|
193
|
+
total_atoms = 0
|
|
194
|
+
for mol, n_mols in self.top.molecules:
|
|
195
|
+
mol_offsets[mol] = total_atoms
|
|
196
|
+
total_atoms += len(topol_dict[mol]) * n_mols
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# Second pass: check for QM atoms and write .itp files
|
|
200
|
+
for mol, n_mols in self.top.molecules:
|
|
201
|
+
mol_size = len(topol_dict[mol])
|
|
202
|
+
mol_has_qm = False
|
|
203
|
+
|
|
204
|
+
# Check each instance of this molecule type
|
|
205
|
+
for i in range(n_mols):
|
|
206
|
+
# Convert to 1-based indexing for comparison with qm_indices
|
|
207
|
+
mol_start = mol_offsets[mol] + (i * mol_size) + 1
|
|
208
|
+
mol_end = mol_start + mol_size
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# Check if any QM atoms are in this molecule instance
|
|
212
|
+
mol_qm_atoms = [idx for idx in qm_indices if mol_start <= idx < mol_end]
|
|
213
|
+
if mol_qm_atoms:
|
|
214
|
+
mol_has_qm = True
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
if not mol_has_qm:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
# Create .itp file for this molecule
|
|
222
|
+
source_file = self.top.topol_dict.get_source_file(mol)
|
|
223
|
+
if source_file:
|
|
224
|
+
# Use the original filename from the source file
|
|
225
|
+
itp_file = output_dir / (prefix + Path(source_file).name)
|
|
226
|
+
else:
|
|
227
|
+
# Fallback to molecule name if source file not found
|
|
228
|
+
itp_file = output_dir / prefix + f"{mol}.itp"
|
|
229
|
+
|
|
230
|
+
# Get nrexcl value for this molecule
|
|
231
|
+
nrexcl = self.top.topol_dict.get_nrexcl_value(mol)
|
|
232
|
+
|
|
233
|
+
itp_str = f"; Created by mimicpy {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n[ moleculetype ]\n; Name nrexcl\n{mol:16s} {nrexcl}\n\n"
|
|
234
|
+
|
|
235
|
+
# Write atoms section
|
|
236
|
+
itp_str += "[ atoms ]\n"
|
|
237
|
+
itp_str += "; nr type resnr residu atom cgnr charge mass\n"
|
|
238
|
+
atoms_df = topol_dict[mol]
|
|
239
|
+
for idx, row in atoms_df.iterrows():
|
|
240
|
+
itp_str += f"{idx:6d} {row['type']:8s} {row['resid']:6d} {row['resname']:8s} {row['name']:8s} {row['cgnr']:6d} {row['charge']:8.6f} {row['mass']:12.5f}\n"
|
|
241
|
+
itp_str += "\n"
|
|
242
|
+
|
|
243
|
+
# Get interactions for this molecule
|
|
244
|
+
mol_bonds = topol_dict.get_bonds(mol)
|
|
245
|
+
mol_angles = topol_dict.get_angles(mol)
|
|
246
|
+
mol_dihedrals = topol_dict.get_dihedrals(mol)
|
|
247
|
+
mol_pairs = topol_dict.get_pairs(mol)
|
|
248
|
+
# Write bonds
|
|
249
|
+
if mol_bonds:
|
|
250
|
+
itp_str += "[ bonds ]\n"
|
|
251
|
+
itp_str += "; ai aj funct r k\n"
|
|
252
|
+
for j in range(len(mol_bonds[0])):
|
|
253
|
+
i_idx = mol_bonds[0][j]
|
|
254
|
+
j_idx = mol_bonds[1][j]
|
|
255
|
+
func = mol_bonds[2][j]
|
|
256
|
+
p1 = mol_bonds[3][j]
|
|
257
|
+
p2 = mol_bonds[4][j]
|
|
258
|
+
# Handle None values - only write parameters if they are not None
|
|
259
|
+
if p1 is not None and p2 is not None:
|
|
260
|
+
p1_str = f"{p1:10.5E}"
|
|
261
|
+
p2_str = f"{p2:10.5E}"
|
|
262
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d} {p1_str} {p2_str}\n"
|
|
263
|
+
else:
|
|
264
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d}\n"
|
|
265
|
+
itp_str += "\n"
|
|
266
|
+
|
|
267
|
+
# Write pairs
|
|
268
|
+
if mol_pairs:
|
|
269
|
+
itp_str += "[ pairs ]\n"
|
|
270
|
+
itp_str += "; ai aj funct\n"
|
|
271
|
+
for j in range(len(mol_pairs[0])):
|
|
272
|
+
i_idx = mol_pairs[0][j]
|
|
273
|
+
j_idx = mol_pairs[1][j]
|
|
274
|
+
func = mol_pairs[2][j]
|
|
275
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d}\n"
|
|
276
|
+
itp_str += "\n"
|
|
277
|
+
|
|
278
|
+
# Write angles
|
|
279
|
+
if mol_angles:
|
|
280
|
+
itp_str += "[ angles ]\n"
|
|
281
|
+
itp_str += "; ai aj ak funct theta cth\n"
|
|
282
|
+
for j in range(len(mol_angles[0])):
|
|
283
|
+
i_idx = mol_angles[0][j]
|
|
284
|
+
j_idx = mol_angles[1][j]
|
|
285
|
+
k_idx = mol_angles[2][j]
|
|
286
|
+
func = mol_angles[3][j]
|
|
287
|
+
p1 = mol_angles[4][j]
|
|
288
|
+
p2 = mol_angles[5][j]
|
|
289
|
+
# Handle None values - only write parameters if they are not None
|
|
290
|
+
if p1 is not None and p2 is not None:
|
|
291
|
+
p1_str = f"{p1:8.3f}"
|
|
292
|
+
p2_str = f"{p2:10.5E}"
|
|
293
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {func:6d} {p1_str} {p2_str}\n"
|
|
294
|
+
else:
|
|
295
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {func:6d}\n"
|
|
296
|
+
itp_str += "\n"
|
|
297
|
+
|
|
298
|
+
# Write dihedrals
|
|
299
|
+
if mol_dihedrals:
|
|
300
|
+
# Group dihedrals by function type
|
|
301
|
+
dihedrals_by_func = {}
|
|
302
|
+
for j in range(len(mol_dihedrals[0])):
|
|
303
|
+
func = mol_dihedrals[4][j]
|
|
304
|
+
if func not in dihedrals_by_func:
|
|
305
|
+
dihedrals_by_func[func] = []
|
|
306
|
+
dihedrals_by_func[func].append(j)
|
|
307
|
+
|
|
308
|
+
# Write each function type in a separate section
|
|
309
|
+
for func, indices in dihedrals_by_func.items():
|
|
310
|
+
itp_str += "[ dihedrals ]\n"
|
|
311
|
+
# Write header based on function type
|
|
312
|
+
if func in [1, 4, 9]: # Format 1
|
|
313
|
+
itp_str += "; ai aj ak al funct phi0 cp mult\n"
|
|
314
|
+
elif func == 2: # Format 2
|
|
315
|
+
itp_str += "; ai aj ak al funct param1 param2\n"
|
|
316
|
+
elif func == 3: # Format 3
|
|
317
|
+
itp_str += "; ai aj ak al funct C0 C1 C2 C3 C4 C5\n"
|
|
318
|
+
|
|
319
|
+
for j in indices:
|
|
320
|
+
i_idx = mol_dihedrals[0][j]
|
|
321
|
+
j_idx = mol_dihedrals[1][j]
|
|
322
|
+
k_idx = mol_dihedrals[2][j]
|
|
323
|
+
l_idx = mol_dihedrals[3][j]
|
|
324
|
+
|
|
325
|
+
if func in [1, 4, 9]: # Format 1
|
|
326
|
+
phi0 = mol_dihedrals[5][j]
|
|
327
|
+
cp = mol_dihedrals[6][j]
|
|
328
|
+
mult = mol_dihedrals[7][j]
|
|
329
|
+
# Handle None values - only write parameters if they are not None
|
|
330
|
+
if phi0 is not None and cp is not None and mult is not None:
|
|
331
|
+
phi0_str = f"{phi0:8.1f}"
|
|
332
|
+
cp_str = f"{cp:8.5f}"
|
|
333
|
+
mult_str = f"{mult:6d}"
|
|
334
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {phi0_str} {cp_str} {mult_str}\n"
|
|
335
|
+
else:
|
|
336
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
|
|
337
|
+
elif func == 2: # Format 2
|
|
338
|
+
p1 = mol_dihedrals[8][j]
|
|
339
|
+
p2 = mol_dihedrals[9][j]
|
|
340
|
+
# Handle None values - only write parameters if they are not None
|
|
341
|
+
if p1 is not None and p2 is not None:
|
|
342
|
+
p1_str = f"{p1:8.3f}"
|
|
343
|
+
p2_str = f"{p2:10.5E}"
|
|
344
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {p1_str} {p2_str}\n"
|
|
345
|
+
else:
|
|
346
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
|
|
347
|
+
elif func == 3: # Format 3
|
|
348
|
+
c0, c1, c2, c3, c4, c5 = [mol_dihedrals[k][j] for k in range(10, 16)]
|
|
349
|
+
# Handle None values - only write parameters if they are not None
|
|
350
|
+
if all(x is not None for x in [c0, c1, c2, c3, c4, c5]):
|
|
351
|
+
c0_str = f"{c0:8.5f}"
|
|
352
|
+
c1_str = f"{c1:8.5f}"
|
|
353
|
+
c2_str = f"{c2:8.5f}"
|
|
354
|
+
c3_str = f"{c3:8.5f}"
|
|
355
|
+
c4_str = f"{c4:8.5f}"
|
|
356
|
+
c5_str = f"{c5:8.5f}"
|
|
357
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {c0_str} {c1_str} {c2_str} {c3_str} {c4_str} {c5_str}\n"
|
|
358
|
+
else:
|
|
359
|
+
itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
|
|
360
|
+
itp_str += "\n"
|
|
361
|
+
|
|
362
|
+
# Write the .itp file
|
|
363
|
+
if itp_file in self.written_files:
|
|
364
|
+
with open(itp_file, 'a') as f:
|
|
365
|
+
f.write(itp_str)
|
|
366
|
+
else:
|
|
367
|
+
with open(itp_file, 'w') as f:
|
|
368
|
+
f.write(itp_str)
|
|
369
|
+
self.written_files.append(itp_file)
|
|
370
|
+
|
|
371
|
+
if self.written_files:
|
|
372
|
+
logging.info(f'Successfully wrote .itp files for molecules containing QM atoms: {", ".join(str(f) for f in self.written_files)}')
|
|
373
|
+
else:
|
|
374
|
+
logging.warning('No molecules containing QM atoms found')
|
|
375
|
+
|
|
376
|
+
def _fill_missing_parameters_from_force_field(self):
|
|
377
|
+
"""Fill missing parameters in QM interactions using force field parameters
|
|
378
|
+
|
|
379
|
+
This method looks up missing parameters for bonds, angles, and dihedrals
|
|
380
|
+
that have None parameters by using atom types to find matching force field parameters.
|
|
381
|
+
"""
|
|
382
|
+
if not self.qm_interactions:
|
|
383
|
+
return
|
|
384
|
+
|
|
385
|
+
# Get available force fields
|
|
386
|
+
available_force_fields = self.top.topol_dict.list_force_fields()
|
|
387
|
+
if not available_force_fields:
|
|
388
|
+
logging.debug('No force field parameters available for filling missing parameters')
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
# Get atom types for QM atoms - use the qm_atoms dataframe directly
|
|
392
|
+
if self.boundary_atoms.empty:
|
|
393
|
+
qm_atoms = self.qm_atoms
|
|
394
|
+
else:
|
|
395
|
+
qm_atoms = self.extended_qm_atoms
|
|
396
|
+
qm_atom_types = {}
|
|
397
|
+
for seq_idx in range(len(qm_atoms)):
|
|
398
|
+
qm_atom_types[seq_idx] = qm_atoms.iloc[seq_idx]['type']
|
|
399
|
+
|
|
400
|
+
# Fill missing bond parameters
|
|
401
|
+
for bond in self.qm_interactions['bonds']:
|
|
402
|
+
if None in bond['parameters']:
|
|
403
|
+
atom1, atom2 = bond['atoms']
|
|
404
|
+
type1 = qm_atom_types.get(atom1)
|
|
405
|
+
type2 = qm_atom_types.get(atom2)
|
|
406
|
+
|
|
407
|
+
if type1 and type2:
|
|
408
|
+
# Try both orderings of atom types
|
|
409
|
+
key1 = f"{type1}-{type2}"
|
|
410
|
+
key2 = f"{type2}-{type1}"
|
|
411
|
+
|
|
412
|
+
bond_params = None
|
|
413
|
+
for ff_name in available_force_fields:
|
|
414
|
+
ff_bondtypes = self.top.topol_dict.get_force_field_bondtypes(ff_name)
|
|
415
|
+
if key1 in ff_bondtypes:
|
|
416
|
+
bond_params = ff_bondtypes[key1]
|
|
417
|
+
break
|
|
418
|
+
elif key2 in ff_bondtypes:
|
|
419
|
+
bond_params = ff_bondtypes[key2]
|
|
420
|
+
break
|
|
421
|
+
|
|
422
|
+
if bond_params:
|
|
423
|
+
bond['parameters'] = [bond_params['length'] * nm_to_au, bond_params['force_constant'] * kb_gmx2au]
|
|
424
|
+
bond['source'] = f'force_field_{ff_name}'
|
|
425
|
+
logging.debug(f'Filled missing bond parameters for atoms {atom1}-{atom2} from force field')
|
|
426
|
+
|
|
427
|
+
# Fill missing angle parameters
|
|
428
|
+
for angle in self.qm_interactions['angles']:
|
|
429
|
+
if None in angle['parameters']:
|
|
430
|
+
atom1, atom2, atom3 = angle['atoms']
|
|
431
|
+
type1 = qm_atom_types.get(atom1)
|
|
432
|
+
type2 = qm_atom_types.get(atom2)
|
|
433
|
+
type3 = qm_atom_types.get(atom3)
|
|
434
|
+
|
|
435
|
+
if type1 and type2 and type3:
|
|
436
|
+
# Try different orderings of atom types
|
|
437
|
+
keys = [
|
|
438
|
+
f"{type1}-{type2}-{type3}",
|
|
439
|
+
f"{type3}-{type2}-{type1}"
|
|
440
|
+
]
|
|
441
|
+
|
|
442
|
+
angle_params = None
|
|
443
|
+
for ff_name in available_force_fields:
|
|
444
|
+
ff_angletypes = self.top.topol_dict.get_force_field_angletypes(ff_name)
|
|
445
|
+
for key in keys:
|
|
446
|
+
if key in ff_angletypes:
|
|
447
|
+
angle_params = ff_angletypes[key]
|
|
448
|
+
break
|
|
449
|
+
if angle_params:
|
|
450
|
+
break
|
|
451
|
+
|
|
452
|
+
if angle_params:
|
|
453
|
+
angle['parameters'] = [np.deg2rad(angle_params['angle']), angle_params['force_constant'] * kjm_au]
|
|
454
|
+
angle['source'] = f'force_field_{ff_name}'
|
|
455
|
+
logging.debug(f'Filled missing angle parameters for atoms {atom1}-{atom2}-{atom3} from force field')
|
|
456
|
+
|
|
457
|
+
# Fill missing dihedral parameters
|
|
458
|
+
for dihedral in self.qm_interactions['dihedrals']:
|
|
459
|
+
if None in dihedral['parameters']:
|
|
460
|
+
atom1, atom2, atom3, atom4 = dihedral['atoms']
|
|
461
|
+
type1 = qm_atom_types.get(atom1)
|
|
462
|
+
type2 = qm_atom_types.get(atom2)
|
|
463
|
+
type3 = qm_atom_types.get(atom3)
|
|
464
|
+
type4 = qm_atom_types.get(atom4)
|
|
465
|
+
|
|
466
|
+
if type1 and type2 and type3 and type4:
|
|
467
|
+
# Try different orderings of atom types for dihedrals
|
|
468
|
+
keys = [
|
|
469
|
+
f"{type1}-{type2}-{type3}-{type4}",
|
|
470
|
+
f"{type4}-{type3}-{type2}-{type1}"
|
|
471
|
+
]
|
|
472
|
+
|
|
473
|
+
dihedral_params = None
|
|
474
|
+
for ff_name in available_force_fields:
|
|
475
|
+
ff_dihedraltypes = self.top.topol_dict.get_force_field_dihedraltypes(ff_name)
|
|
476
|
+
|
|
477
|
+
# First try exact matches
|
|
478
|
+
for key in keys:
|
|
479
|
+
if key in ff_dihedraltypes:
|
|
480
|
+
dihedral_params = ff_dihedraltypes[key]
|
|
481
|
+
break
|
|
482
|
+
|
|
483
|
+
# If no exact match, try wildcard matches
|
|
484
|
+
if not dihedral_params:
|
|
485
|
+
dihedral_params = self._find_wildcard_dihedral_params(
|
|
486
|
+
type1, type2, type3, type4, ff_dihedraltypes
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
if dihedral_params:
|
|
490
|
+
break
|
|
491
|
+
|
|
492
|
+
if dihedral_params:
|
|
493
|
+
func = dihedral['function']
|
|
494
|
+
if func in [1, 4, 9]: # Format 1
|
|
495
|
+
dihedral['parameters'] = [
|
|
496
|
+
np.deg2rad(dihedral_params['phi0']),
|
|
497
|
+
dihedral_params['cp'] * kjm_au,
|
|
498
|
+
dihedral_params['mult']
|
|
499
|
+
]
|
|
500
|
+
elif func == 2: # Format 2
|
|
501
|
+
dihedral['parameters'] = [
|
|
502
|
+
dihedral_params['param1'],
|
|
503
|
+
dihedral_params['param2'] * kjm_au
|
|
504
|
+
]
|
|
505
|
+
elif func == 3: # Format 3
|
|
506
|
+
# For format 3, the parameters are stored as a list
|
|
507
|
+
params = dihedral_params['params']
|
|
508
|
+
dihedral['parameters'] = [p * kjm_au for p in params]
|
|
509
|
+
dihedral['source'] = f'force_field_{ff_name}'
|
|
510
|
+
logging.debug(f'Filled missing dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} (types: {type1}-{type2}-{type3}-{type4}) from force field')
|
|
511
|
+
else:
|
|
512
|
+
# Add debugging information for unfilled dihedrals
|
|
513
|
+
logging.debug(f'Could not find dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} (types: {type1}-{type2}-{type3}-{type4}) in any force field')
|
|
514
|
+
|
|
515
|
+
# If not found in dihedraltypes, try to resolve named parameters
|
|
516
|
+
if not dihedral_params:
|
|
517
|
+
parameter_definitions = self.top.topol_dict.get_parameter_definitions()
|
|
518
|
+
if parameter_definitions:
|
|
519
|
+
# Try to resolve named parameters by matching residue and atom names
|
|
520
|
+
# Get residue and atom information for the dihedral atoms
|
|
521
|
+
resolved_params = self._resolve_named_dihedral_parameters(
|
|
522
|
+
atom1, atom2, atom3, atom4,
|
|
523
|
+
dihedral['function'],
|
|
524
|
+
parameter_definitions
|
|
525
|
+
)
|
|
526
|
+
if resolved_params:
|
|
527
|
+
dihedral['parameters'] = resolved_params
|
|
528
|
+
dihedral['source'] = 'named_parameters'
|
|
529
|
+
logging.debug(f'Filled missing dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} from named parameters')
|
|
530
|
+
|
|
531
|
+
def _resolve_named_dihedral_parameters(self, atom1_idx, atom2_idx, atom3_idx, atom4_idx, func, parameter_definitions):
|
|
532
|
+
"""Resolve named dihedral parameters by matching residue and atom names
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
atom1_idx, atom2_idx, atom3_idx, atom4_idx: Sequential QM atom indices
|
|
536
|
+
func: Dihedral function type
|
|
537
|
+
parameter_definitions: Dictionary of parameter definitions
|
|
538
|
+
|
|
539
|
+
Returns:
|
|
540
|
+
list: Resolved parameters or None if not found
|
|
541
|
+
"""
|
|
542
|
+
# Convert sequential indices back to GROMACS indices
|
|
543
|
+
gmx_idx1 = self.qm_atoms.index[atom1_idx]
|
|
544
|
+
gmx_idx2 = self.qm_atoms.index[atom2_idx]
|
|
545
|
+
gmx_idx3 = self.qm_atoms.index[atom3_idx]
|
|
546
|
+
gmx_idx4 = self.qm_atoms.index[atom4_idx]
|
|
547
|
+
|
|
548
|
+
# Get residue and atom information for each atom
|
|
549
|
+
atom_info = []
|
|
550
|
+
for gmx_idx in [gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]:
|
|
551
|
+
# Find which molecule this atom belongs to
|
|
552
|
+
for mol, n_mols in self.top.molecules:
|
|
553
|
+
mol_size = len(self.top.topol_dict[mol])
|
|
554
|
+
mol_offset = 0
|
|
555
|
+
for mol_idx in range(n_mols):
|
|
556
|
+
mol_start = mol_offset + 1
|
|
557
|
+
mol_end = mol_offset + mol_size
|
|
558
|
+
if mol_start <= gmx_idx <= mol_end:
|
|
559
|
+
# Found the molecule, get atom info
|
|
560
|
+
local_idx = gmx_idx - mol_start + 1
|
|
561
|
+
atom_row = self.top.topol_dict[mol].loc[local_idx]
|
|
562
|
+
atom_info.append({
|
|
563
|
+
'resname': atom_row['resname'],
|
|
564
|
+
'name': atom_row['name']
|
|
565
|
+
})
|
|
566
|
+
break
|
|
567
|
+
mol_offset += mol_size
|
|
568
|
+
if len(atom_info) == len([gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]):
|
|
569
|
+
break
|
|
570
|
+
if len(atom_info) == len([gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]):
|
|
571
|
+
break
|
|
572
|
+
|
|
573
|
+
if len(atom_info) != 4:
|
|
574
|
+
return None
|
|
575
|
+
|
|
576
|
+
# Try to match with parameter definitions
|
|
577
|
+
# Format: torsion_RES_ATOM1_ATOM2_ATOM3_ATOM4_multN
|
|
578
|
+
resname = atom_info[0]['resname'] # Use first atom's residue
|
|
579
|
+
atom_names = [info['name'] for info in atom_info]
|
|
580
|
+
|
|
581
|
+
# Try different combinations of atom names
|
|
582
|
+
# For dihedrals, we need to try both forward and reverse orderings
|
|
583
|
+
name_combinations = [
|
|
584
|
+
f"torsion_{resname}_{atom_names[0]}_{atom_names[1]}_{atom_names[2]}_{atom_names[3]}",
|
|
585
|
+
f"torsion_{resname}_{atom_names[3]}_{atom_names[2]}_{atom_names[1]}_{atom_names[0]}"
|
|
586
|
+
]
|
|
587
|
+
|
|
588
|
+
# Look for matching parameter definitions
|
|
589
|
+
for base_name in name_combinations:
|
|
590
|
+
# Try different multiplicities (mult1, mult2, mult3, etc.)
|
|
591
|
+
for mult in range(1, 7): # Usually up to mult6
|
|
592
|
+
param_name = f"{base_name}_mult{mult}"
|
|
593
|
+
if param_name in parameter_definitions:
|
|
594
|
+
param_values = parameter_definitions[param_name]
|
|
595
|
+
if len(param_values) >= 3:
|
|
596
|
+
# Parse the parameter values
|
|
597
|
+
try:
|
|
598
|
+
phi0 = float(param_values[0])
|
|
599
|
+
force_constant = float(param_values[1])
|
|
600
|
+
multiplicity = int(param_values[2])
|
|
601
|
+
|
|
602
|
+
# Convert to appropriate units based on function type
|
|
603
|
+
if func in [1, 4, 9]: # Format 1
|
|
604
|
+
return [
|
|
605
|
+
np.deg2rad(phi0), # Convert to radians
|
|
606
|
+
force_constant * kjm_au, # Convert to atomic units
|
|
607
|
+
multiplicity
|
|
608
|
+
]
|
|
609
|
+
elif func == 2: # Format 2
|
|
610
|
+
return [
|
|
611
|
+
phi0, # Keep as is
|
|
612
|
+
force_constant * kjm_au # Convert to atomic units
|
|
613
|
+
]
|
|
614
|
+
elif func == 3: # Format 3
|
|
615
|
+
# For format 3, we might need to expand the parameters
|
|
616
|
+
# This is more complex and depends on the specific force field
|
|
617
|
+
logging.debug(f'Format 3 dihedral with named parameters not fully implemented for {param_name}')
|
|
618
|
+
return None
|
|
619
|
+
except (ValueError, IndexError) as e:
|
|
620
|
+
logging.debug(f'Error parsing parameter values for {param_name}: {e}')
|
|
621
|
+
continue
|
|
622
|
+
|
|
623
|
+
return None
|
|
624
|
+
|
|
625
|
+
def _find_wildcard_dihedral_params(self, type1, type2, type3, type4, ff_dihedraltypes):
|
|
626
|
+
"""Find dihedral parameters using hierarchical wildcard matching
|
|
627
|
+
|
|
628
|
+
This function implements a hierarchy of specificity for dihedral parameter matching:
|
|
629
|
+
1. Exact match (most specific)
|
|
630
|
+
2. Single wildcard replacements (X or *)
|
|
631
|
+
3. Double wildcard replacements
|
|
632
|
+
4. Triple wildcard replacements
|
|
633
|
+
5. All wildcards (least specific)
|
|
634
|
+
|
|
635
|
+
For peptide bonds and similar cases, this ensures that more specific parameters
|
|
636
|
+
(like X-C-N-X for peptide bonds) are used instead of generic ones.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
type1, type2, type3, type4: Atom types
|
|
640
|
+
ff_dihedraltypes: Dictionary of dihedral types from force field
|
|
641
|
+
|
|
642
|
+
Returns:
|
|
643
|
+
dict: Dihedral parameters or None if not found
|
|
644
|
+
"""
|
|
645
|
+
# Try forward and reverse orderings
|
|
646
|
+
type_combinations = [
|
|
647
|
+
[type1, type2, type3, type4],
|
|
648
|
+
[type4, type3, type2, type1]
|
|
649
|
+
]
|
|
650
|
+
|
|
651
|
+
best_match = None
|
|
652
|
+
best_specificity = -1 # Higher number = more specific
|
|
653
|
+
|
|
654
|
+
for types in type_combinations:
|
|
655
|
+
# 1. Try exact match (specificity = 4)
|
|
656
|
+
key = f"{types[0]}-{types[1]}-{types[2]}-{types[3]}"
|
|
657
|
+
if key in ff_dihedraltypes:
|
|
658
|
+
logging.debug(f'Found exact match for dihedral {key}')
|
|
659
|
+
return ff_dihedraltypes[key] # Most specific, return immediately
|
|
660
|
+
|
|
661
|
+
# 2. Try single wildcard replacements (specificity = 3)
|
|
662
|
+
for i in range(4):
|
|
663
|
+
for wildcard in ['X', '*']:
|
|
664
|
+
wildcard_types = types.copy()
|
|
665
|
+
wildcard_types[i] = wildcard
|
|
666
|
+
key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
|
|
667
|
+
if key in ff_dihedraltypes:
|
|
668
|
+
if best_specificity < 3:
|
|
669
|
+
best_match = ff_dihedraltypes[key]
|
|
670
|
+
best_specificity = 3
|
|
671
|
+
logging.debug(f'Found single wildcard match for dihedral {key} (specificity=3)')
|
|
672
|
+
|
|
673
|
+
# 3. Try double wildcard replacements (specificity = 2)
|
|
674
|
+
for i in range(4):
|
|
675
|
+
for j in range(i+1, 4):
|
|
676
|
+
for wildcard in ['X', '*']:
|
|
677
|
+
wildcard_types = types.copy()
|
|
678
|
+
wildcard_types[i] = wildcard
|
|
679
|
+
wildcard_types[j] = wildcard
|
|
680
|
+
key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
|
|
681
|
+
if key in ff_dihedraltypes:
|
|
682
|
+
if best_specificity < 2:
|
|
683
|
+
best_match = ff_dihedraltypes[key]
|
|
684
|
+
best_specificity = 2
|
|
685
|
+
logging.debug(f'Found double wildcard match for dihedral {key} (specificity=2)')
|
|
686
|
+
|
|
687
|
+
# 4. Try triple wildcard replacements (specificity = 1)
|
|
688
|
+
for i in range(4):
|
|
689
|
+
for j in range(i+1, 4):
|
|
690
|
+
for k in range(j+1, 4):
|
|
691
|
+
for wildcard in ['X', '*']:
|
|
692
|
+
wildcard_types = types.copy()
|
|
693
|
+
wildcard_types[i] = wildcard
|
|
694
|
+
wildcard_types[j] = wildcard
|
|
695
|
+
wildcard_types[k] = wildcard
|
|
696
|
+
key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
|
|
697
|
+
if key in ff_dihedraltypes:
|
|
698
|
+
if best_specificity < 1:
|
|
699
|
+
best_match = ff_dihedraltypes[key]
|
|
700
|
+
best_specificity = 1
|
|
701
|
+
logging.debug(f'Found triple wildcard match for dihedral {key} (specificity=1)')
|
|
702
|
+
|
|
703
|
+
# 5. Try all wildcards (specificity = 0)
|
|
704
|
+
for wildcard in ['X', '*']:
|
|
705
|
+
key = f"{wildcard}-{wildcard}-{wildcard}-{wildcard}"
|
|
706
|
+
if key in ff_dihedraltypes:
|
|
707
|
+
if best_specificity < 0:
|
|
708
|
+
best_match = ff_dihedraltypes[key]
|
|
709
|
+
best_specificity = 0
|
|
710
|
+
logging.debug(f'Found all wildcard match for dihedral {key} (specificity=0)')
|
|
711
|
+
|
|
712
|
+
return best_match
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def extract_qm_interactions(self):
|
|
717
|
+
"""Extract all bonded interactions involving QM atoms from the topology
|
|
718
|
+
|
|
719
|
+
This function handles both molecule-specific interactions (defined in .itp files)
|
|
720
|
+
and force field interactions (defined in force field files and stored in topol_dict).
|
|
721
|
+
It includes interactions where at least one atom is in the QM region.
|
|
722
|
+
|
|
723
|
+
Returns:
|
|
724
|
+
dict: Dictionary containing bonds, angles, and dihedrals involving QM atoms
|
|
725
|
+
"""
|
|
726
|
+
if not self.qm_atoms is not None:
|
|
727
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
728
|
+
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
# Get QM atom indices for checking if atoms are QM
|
|
732
|
+
qm_indices = set(self.qm_atoms.index)
|
|
733
|
+
|
|
734
|
+
# Get boundary atoms if needed
|
|
735
|
+
boundary_indices = set()
|
|
736
|
+
if not self.boundary_atoms.empty:
|
|
737
|
+
boundary_indices = set(self.boundary_atoms.index)
|
|
738
|
+
|
|
739
|
+
# Get extended QM atoms DataFrame (includes both QM and MM atoms)
|
|
740
|
+
self.extended_qm_atoms = self.get_extended_qm_atoms_dataframe()
|
|
741
|
+
|
|
742
|
+
# Create mapping from GROMACS atom indices to sequential indices in extended DataFrame
|
|
743
|
+
# This maps GROMACS indices to sequential indices (0, 1, 2, ...) in the extended array
|
|
744
|
+
self.gmx_to_seq_map = {}
|
|
745
|
+
for seq_idx, gmx_idx in enumerate(self.extended_qm_atoms.index):
|
|
746
|
+
self.gmx_to_seq_map[gmx_idx] = seq_idx
|
|
747
|
+
|
|
748
|
+
else:
|
|
749
|
+
self.gmx_to_seq_map = {gmx_idx: idx for idx, gmx_idx in enumerate(self.qm_atoms.index)}
|
|
750
|
+
|
|
751
|
+
qm_interactions = {
|
|
752
|
+
'bonds': [],
|
|
753
|
+
'angles': [],
|
|
754
|
+
'dihedrals': []
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
bonded_count = 0
|
|
758
|
+
prev_natoms = 0
|
|
759
|
+
for mol, n_mols in self.top.molecules:
|
|
760
|
+
# Get interactions for this molecule
|
|
761
|
+
mol_bonds = self.top.topol_dict.get_bonds(mol)
|
|
762
|
+
mol_angles = self.top.topol_dict.get_angles(mol)
|
|
763
|
+
mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
|
|
764
|
+
|
|
765
|
+
# Get atom types for this molecule
|
|
766
|
+
mol_atoms = self.top.topol_dict[mol]
|
|
767
|
+
|
|
768
|
+
# Process bonds
|
|
769
|
+
if mol_bonds:
|
|
770
|
+
for j in range(len(mol_bonds[0])):
|
|
771
|
+
i_idx = mol_bonds[0][j] + prev_natoms
|
|
772
|
+
j_idx = mol_bonds[1][j] + prev_natoms
|
|
773
|
+
# Include bonds where at least one atom is in QM region
|
|
774
|
+
if i_idx in qm_indices or j_idx in qm_indices:
|
|
775
|
+
# Convert GROMACS indices to sequential indices in extended array
|
|
776
|
+
seq_i = self.gmx_to_seq_map[i_idx]
|
|
777
|
+
seq_j = self.gmx_to_seq_map[j_idx]
|
|
778
|
+
|
|
779
|
+
# Check if bond involves boundary atoms
|
|
780
|
+
involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices) if not self.boundary_atoms.empty else False
|
|
781
|
+
|
|
782
|
+
# Handle None parameters (bonds without explicit parameters)
|
|
783
|
+
param1 = mol_bonds[3][j]
|
|
784
|
+
param2 = mol_bonds[4][j]
|
|
785
|
+
|
|
786
|
+
if param1 is not None and param2 is not None:
|
|
787
|
+
parameters = [param1 * nm_to_au, param2 * kb_gmx2au]
|
|
788
|
+
else:
|
|
789
|
+
# Parameters not available, will be filled from force field later
|
|
790
|
+
parameters = [None, None]
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
qm_interactions['bonds'].append({
|
|
794
|
+
'atoms': [seq_i, seq_j], # Use sequential indices
|
|
795
|
+
'function': mol_bonds[2][j],
|
|
796
|
+
'parameters': parameters,
|
|
797
|
+
'index': bonded_count,
|
|
798
|
+
'optimize': not involves_boundary, # Don't optimize if involves boundary
|
|
799
|
+
'involves_boundary': involves_boundary,
|
|
800
|
+
'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices),
|
|
801
|
+
'molecule': mol
|
|
802
|
+
})
|
|
803
|
+
bonded_count += 1
|
|
804
|
+
|
|
805
|
+
# Process angles
|
|
806
|
+
if mol_angles:
|
|
807
|
+
for j in range(len(mol_angles[0])):
|
|
808
|
+
i_idx = mol_angles[0][j] + prev_natoms
|
|
809
|
+
j_idx = mol_angles[1][j] + prev_natoms
|
|
810
|
+
k_idx = mol_angles[2][j] + prev_natoms
|
|
811
|
+
# Include angles where at least one atom is in QM region
|
|
812
|
+
if i_idx in qm_indices or j_idx in qm_indices or k_idx in qm_indices:
|
|
813
|
+
# Convert GROMACS indices to sequential indices in extended array
|
|
814
|
+
seq_i = self.gmx_to_seq_map[i_idx]
|
|
815
|
+
seq_j = self.gmx_to_seq_map[j_idx]
|
|
816
|
+
seq_k = self.gmx_to_seq_map[k_idx]
|
|
817
|
+
|
|
818
|
+
# Check if angle involves boundary atoms
|
|
819
|
+
involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices or k_idx in boundary_indices) if not self.boundary_atoms.empty else False
|
|
820
|
+
|
|
821
|
+
# Handle None parameters (angles without explicit parameters)
|
|
822
|
+
param1 = mol_angles[4][j]
|
|
823
|
+
param2 = mol_angles[5][j]
|
|
824
|
+
|
|
825
|
+
if param1 is not None and param2 is not None:
|
|
826
|
+
parameters = [np.deg2rad(param1), param2 * kjm_au]
|
|
827
|
+
else:
|
|
828
|
+
# Parameters not available, will be filled from force field later
|
|
829
|
+
parameters = [None, None]
|
|
830
|
+
|
|
831
|
+
qm_interactions['angles'].append({
|
|
832
|
+
'atoms': [seq_i, seq_j, seq_k], # Use sequential indices
|
|
833
|
+
'function': mol_angles[3][j],
|
|
834
|
+
'parameters': parameters,
|
|
835
|
+
'index': bonded_count,
|
|
836
|
+
'optimize': not involves_boundary, # Don't optimize if involves boundary
|
|
837
|
+
'involves_boundary': involves_boundary,
|
|
838
|
+
'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices),
|
|
839
|
+
'molecule': mol
|
|
840
|
+
})
|
|
841
|
+
bonded_count += 1
|
|
842
|
+
|
|
843
|
+
# Process dihedrals
|
|
844
|
+
if mol_dihedrals:
|
|
845
|
+
for j in range(len(mol_dihedrals[0])):
|
|
846
|
+
i_idx = mol_dihedrals[0][j] + prev_natoms
|
|
847
|
+
j_idx = mol_dihedrals[1][j] + prev_natoms
|
|
848
|
+
k_idx = mol_dihedrals[2][j] + prev_natoms
|
|
849
|
+
l_idx = mol_dihedrals[3][j] + prev_natoms
|
|
850
|
+
# Include dihedrals where at least one atom is in QM region
|
|
851
|
+
if i_idx in qm_indices or j_idx in qm_indices or k_idx in qm_indices or l_idx in qm_indices:
|
|
852
|
+
# Convert GROMACS indices to sequential indices in extended array
|
|
853
|
+
seq_i = self.gmx_to_seq_map[i_idx]
|
|
854
|
+
seq_j = self.gmx_to_seq_map[j_idx]
|
|
855
|
+
seq_k = self.gmx_to_seq_map[k_idx]
|
|
856
|
+
seq_l = self.gmx_to_seq_map[l_idx]
|
|
857
|
+
|
|
858
|
+
# Check if dihedral involves boundary atoms
|
|
859
|
+
involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices or k_idx in boundary_indices or l_idx in boundary_indices) if not self.boundary_atoms.empty else False
|
|
860
|
+
|
|
861
|
+
func = mol_dihedrals[4][j]
|
|
862
|
+
if func in [1, 4, 9]: # Format 1
|
|
863
|
+
# Handle None parameters for dihedrals
|
|
864
|
+
phi0 = mol_dihedrals[5][j]
|
|
865
|
+
cp = mol_dihedrals[6][j]
|
|
866
|
+
mult = mol_dihedrals[7][j]
|
|
867
|
+
|
|
868
|
+
if phi0 is not None and cp is not None and mult is not None:
|
|
869
|
+
parameters = [np.deg2rad(phi0), cp * kjm_au, mult]
|
|
870
|
+
else:
|
|
871
|
+
parameters = [None, None, None]
|
|
872
|
+
|
|
873
|
+
qm_interactions['dihedrals'].append({
|
|
874
|
+
'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
|
|
875
|
+
'function': func,
|
|
876
|
+
'parameters': parameters,
|
|
877
|
+
'index': bonded_count,
|
|
878
|
+
'optimize': not involves_boundary, # Don't optimize if involves boundary
|
|
879
|
+
'involves_boundary': involves_boundary,
|
|
880
|
+
'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
|
|
881
|
+
'molecule': mol
|
|
882
|
+
})
|
|
883
|
+
elif func == 2: # Format 2
|
|
884
|
+
# Handle None parameters for dihedrals
|
|
885
|
+
p1 = mol_dihedrals[8][j]
|
|
886
|
+
p2 = mol_dihedrals[9][j]
|
|
887
|
+
|
|
888
|
+
if p1 is not None and p2 is not None:
|
|
889
|
+
parameters = [p1, p2 * kjm_au]
|
|
890
|
+
else:
|
|
891
|
+
parameters = [None, None]
|
|
892
|
+
|
|
893
|
+
qm_interactions['dihedrals'].append({
|
|
894
|
+
'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
|
|
895
|
+
'function': func,
|
|
896
|
+
'parameters': parameters,
|
|
897
|
+
'index': bonded_count,
|
|
898
|
+
'optimize': not involves_boundary, # Don't optimize if involves boundary
|
|
899
|
+
'involves_boundary': involves_boundary,
|
|
900
|
+
'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
|
|
901
|
+
'molecule': mol
|
|
902
|
+
})
|
|
903
|
+
elif func == 3: # Format 3
|
|
904
|
+
# Handle None parameters for dihedrals
|
|
905
|
+
c_params = [mol_dihedrals[10+k][j] for k in range(6)]
|
|
906
|
+
if all(p is not None for p in c_params):
|
|
907
|
+
parameters = [p * kjm_au for p in c_params]
|
|
908
|
+
else:
|
|
909
|
+
parameters = [None] * 6
|
|
910
|
+
|
|
911
|
+
qm_interactions['dihedrals'].append({
|
|
912
|
+
'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
|
|
913
|
+
'function': func,
|
|
914
|
+
'parameters': parameters,
|
|
915
|
+
'index': bonded_count,
|
|
916
|
+
'optimize': not involves_boundary, # Don't optimize if involves boundary
|
|
917
|
+
'involves_boundary': involves_boundary,
|
|
918
|
+
'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
|
|
919
|
+
'molecule': mol
|
|
920
|
+
})
|
|
921
|
+
bonded_count += 1
|
|
922
|
+
|
|
923
|
+
prev_natoms += len(self.top.topol_dict[mol]) * n_mols
|
|
924
|
+
|
|
925
|
+
self.qm_interactions = qm_interactions
|
|
926
|
+
|
|
927
|
+
# Log statistics
|
|
928
|
+
total_bonds = len(qm_interactions["bonds"])
|
|
929
|
+
total_angles = len(qm_interactions["angles"])
|
|
930
|
+
total_dihedrals = len(qm_interactions["dihedrals"])
|
|
931
|
+
|
|
932
|
+
if not self.boundary_atoms.empty:
|
|
933
|
+
boundary_bonds = sum(1 for bond in qm_interactions["bonds"] if bond['involves_boundary'])
|
|
934
|
+
boundary_angles = sum(1 for angle in qm_interactions["angles"] if angle['involves_boundary'])
|
|
935
|
+
boundary_dihedrals = sum(1 for dihedral in qm_interactions["dihedrals"] if dihedral['involves_boundary'])
|
|
936
|
+
|
|
937
|
+
logging.info(f'Extracted QM interactions: {total_bonds} bonds ({boundary_bonds} boundary), '
|
|
938
|
+
f'{total_angles} angles ({boundary_angles} boundary), '
|
|
939
|
+
f'{total_dihedrals} dihedrals ({boundary_dihedrals} boundary)')
|
|
940
|
+
else:
|
|
941
|
+
logging.info(f'Extracted QM interactions: {total_bonds} bonds, '
|
|
942
|
+
f'{total_angles} angles, '
|
|
943
|
+
f'{total_dihedrals} dihedrals')
|
|
944
|
+
|
|
945
|
+
# Fill missing parameters from force field
|
|
946
|
+
self._fill_missing_parameters_from_force_field()
|
|
947
|
+
|
|
948
|
+
return self.qm_interactions
|
|
949
|
+
|
|
950
|
+
def gmx_to_cpmd_idx(self):
|
|
951
|
+
"""Convert GROMACS atom indices to CPMD indices for all atoms (QM and MM),
|
|
952
|
+
using the same logic as CpmdScript.gmx_to_cpmd_idx.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
dict: Dictionary mapping GROMACS atom indices to CPMD indices
|
|
956
|
+
"""
|
|
957
|
+
if not self.qm_atoms is not None:
|
|
958
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
959
|
+
|
|
960
|
+
# Get topology data from Mpt object
|
|
961
|
+
topol = self.mpt.select('all')
|
|
962
|
+
typelist = topol['type'].unique() # get list of types used in order
|
|
963
|
+
|
|
964
|
+
# Get atomtypes section from GROMACS topology
|
|
965
|
+
atomtypes = self.top.atom_types_df.set_index(['type']).loc[typelist]
|
|
966
|
+
|
|
967
|
+
# Convert atomtypes to type ID
|
|
968
|
+
type_id = {}
|
|
969
|
+
count = 0
|
|
970
|
+
for i, rowi in atomtypes.iterrows():
|
|
971
|
+
for j, rowj in atomtypes.iterrows():
|
|
972
|
+
if i == j:
|
|
973
|
+
type_id[i] = count
|
|
974
|
+
count += 1
|
|
975
|
+
break
|
|
976
|
+
if (rowi['X'] == rowj['X']) and isclose(rowi['sigma'], rowj['sigma']) and isclose(rowi['epsilon'], rowj['epsilon']):
|
|
977
|
+
type_id[i] = type_id[j]
|
|
978
|
+
break
|
|
979
|
+
|
|
980
|
+
topol.insert(2, "type_id", [type_id[i] for i in topol['type']], True)
|
|
981
|
+
|
|
982
|
+
# get Gromacs IDs to QM atoms & push their type_ids first
|
|
983
|
+
overlaps = []
|
|
984
|
+
if hasattr(self, 'prep') and self.prep is not None:
|
|
985
|
+
# Try to get overlaps from the same logic as CPMD input
|
|
986
|
+
sorted_qm_atoms = self.qm_atoms.sort_values(by=['is_bound', 'element']).reset_index()
|
|
987
|
+
natms = len(sorted_qm_atoms)
|
|
988
|
+
for i, row in sorted_qm_atoms.iterrows():
|
|
989
|
+
idx = row['id']
|
|
990
|
+
topol.at[idx, 'type_id'] = -natms + i
|
|
991
|
+
|
|
992
|
+
# sort topol by type_id and gromacs id and get a dict of Gromacs IDs to CPMD IDs
|
|
993
|
+
topol = topol.reset_index()
|
|
994
|
+
topol.sort_values(by=['type_id', 'id'], inplace=True)
|
|
995
|
+
cpmd_idx = list(range(1, self.mpt.number_of_atoms + 1))
|
|
996
|
+
gmx_idx = topol['id'].to_list()
|
|
997
|
+
return dict(zip(gmx_idx, cpmd_idx))
|
|
998
|
+
|
|
999
|
+
def update_topology(self, ff_optimize, bond2params):
|
|
1000
|
+
"""Update the system's topology with optimized parameters from force matching
|
|
1001
|
+
|
|
1002
|
+
Args:
|
|
1003
|
+
ff_optimize (numpy.ndarray): Array of optimized force field parameters
|
|
1004
|
+
bond2params (dict): Mapping of interaction indices to parameter indices in ff_optimize
|
|
1005
|
+
|
|
1006
|
+
Returns:
|
|
1007
|
+
bool: True if update was successful, False otherwise
|
|
1008
|
+
"""
|
|
1009
|
+
if not hasattr(self, 'qm_interactions'):
|
|
1010
|
+
raise MiMiCPyError('No QM interactions defined. Call extract_qm_interactions() first.')
|
|
1011
|
+
|
|
1012
|
+
try:
|
|
1013
|
+
# Get QM interactions
|
|
1014
|
+
qm_interactions = self.qm_interactions
|
|
1015
|
+
|
|
1016
|
+
# Update bonds
|
|
1017
|
+
for bond in qm_interactions['bonds']:
|
|
1018
|
+
params_idx = bond2params.get(bond['index'])
|
|
1019
|
+
if params_idx is not None:
|
|
1020
|
+
if params_idx[0] is not None: # Length parameter
|
|
1021
|
+
bond['parameters'][0] = ff_optimize[params_idx[0]] * au_to_nm
|
|
1022
|
+
else:
|
|
1023
|
+
bond['parameters'][0] = bond['parameters'][0] * au_to_nm # Convert non-optimized parameter
|
|
1024
|
+
if params_idx[1] is not None: # Force constant
|
|
1025
|
+
bond['parameters'][1] = ff_optimize[params_idx[1]] * kb_au2gmx
|
|
1026
|
+
else:
|
|
1027
|
+
bond['parameters'][1] = bond['parameters'][1] * kb_au2gmx # Convert non-optimized parameter
|
|
1028
|
+
|
|
1029
|
+
# Update angles
|
|
1030
|
+
for angle in qm_interactions['angles']:
|
|
1031
|
+
params_idx = bond2params.get(angle['index'])
|
|
1032
|
+
if params_idx is not None:
|
|
1033
|
+
if params_idx[0] is not None: # Angle parameter
|
|
1034
|
+
angle['parameters'][0] = np.rad2deg(np.arccos(np.cos(ff_optimize[params_idx[0]])))
|
|
1035
|
+
else:
|
|
1036
|
+
angle['parameters'][0] = np.rad2deg(np.arccos(np.cos(angle['parameters'][0]))) # Convert non-optimized parameter
|
|
1037
|
+
if params_idx[1] is not None: # Force constant
|
|
1038
|
+
angle['parameters'][1] = ff_optimize[params_idx[1]] * au_kjm
|
|
1039
|
+
else:
|
|
1040
|
+
angle['parameters'][1] = angle['parameters'][1] * au_kjm # Convert non-optimized parameter
|
|
1041
|
+
|
|
1042
|
+
# Update dihedrals
|
|
1043
|
+
for dihedral in qm_interactions['dihedrals']:
|
|
1044
|
+
params_idx = bond2params.get(dihedral['index'])
|
|
1045
|
+
if params_idx is not None:
|
|
1046
|
+
if dihedral['function'] in [1, 4, 9]: # Format 1
|
|
1047
|
+
if params_idx[0] is not None: # phi0
|
|
1048
|
+
dihedral['parameters'][0] = np.rad2deg(ff_optimize[params_idx[0]])
|
|
1049
|
+
else:
|
|
1050
|
+
dihedral['parameters'][0] = np.rad2deg(dihedral['parameters'][0]) # Convert non-optimized parameter
|
|
1051
|
+
if params_idx[1] is not None: # cp (force constant)
|
|
1052
|
+
dihedral['parameters'][1] = ff_optimize[params_idx[1]] * au_kjm
|
|
1053
|
+
else:
|
|
1054
|
+
dihedral['parameters'][1] = dihedral['parameters'][1] * au_kjm # Convert non-optimized parameter
|
|
1055
|
+
if params_idx[2] is not None: # mult
|
|
1056
|
+
dihedral['parameters'][2] = ff_optimize[params_idx[2]]
|
|
1057
|
+
elif dihedral['function'] == 2: # Format 2
|
|
1058
|
+
if params_idx[0] is not None: # param1
|
|
1059
|
+
dihedral['parameters'][0] = ff_optimize[params_idx[0]]
|
|
1060
|
+
if params_idx[1] is not None: # param2 (force constant)
|
|
1061
|
+
dihedral['parameters'][1] = ff_optimize[params_idx[1]]
|
|
1062
|
+
elif dihedral['function'] == 3: # Format 3
|
|
1063
|
+
for i in range(6):
|
|
1064
|
+
if params_idx[i] is not None:
|
|
1065
|
+
dihedral['parameters'][i] = ff_optimize[params_idx[i]] * au_kjm
|
|
1066
|
+
else:
|
|
1067
|
+
dihedral['parameters'][i] = dihedral['parameters'][i] * au_kjm # Convert non-optimized parameter
|
|
1068
|
+
# Update the topology dictionary with new parameters
|
|
1069
|
+
prev_natoms = 0
|
|
1070
|
+
|
|
1071
|
+
# Create reverse mapping from sequential indices back to GROMACS indices
|
|
1072
|
+
if self.boundary_atoms.empty:
|
|
1073
|
+
seq_to_gmx_map = {}
|
|
1074
|
+
for seq_idx, gmx_idx in enumerate(self.qm_atoms.index):
|
|
1075
|
+
seq_to_gmx_map[seq_idx] = gmx_idx
|
|
1076
|
+
else:
|
|
1077
|
+
seq_to_gmx_map = {}
|
|
1078
|
+
for seq_idx, gmx_idx in enumerate(self.extended_qm_atoms.index):
|
|
1079
|
+
seq_to_gmx_map[seq_idx] = gmx_idx
|
|
1080
|
+
|
|
1081
|
+
for mol, n_mols in self.top.molecules:
|
|
1082
|
+
# Update bonds
|
|
1083
|
+
mol_bonds = self.top.topol_dict.get_bonds(mol)
|
|
1084
|
+
if mol_bonds:
|
|
1085
|
+
for j in range(len(mol_bonds[0])):
|
|
1086
|
+
i_idx = mol_bonds[0][j] + prev_natoms
|
|
1087
|
+
j_idx = mol_bonds[1][j] + prev_natoms
|
|
1088
|
+
# Find matching bond in qm_interactions
|
|
1089
|
+
for bond in qm_interactions['bonds']:
|
|
1090
|
+
# Convert sequential indices back to GROMACS indices for comparison
|
|
1091
|
+
bond_i_gmx = seq_to_gmx_map.get(bond['atoms'][0], bond['atoms'][0] + 1)
|
|
1092
|
+
bond_j_gmx = seq_to_gmx_map.get(bond['atoms'][1], bond['atoms'][1] + 1)
|
|
1093
|
+
|
|
1094
|
+
if (bond_i_gmx == i_idx and bond_j_gmx == j_idx) or \
|
|
1095
|
+
(bond_i_gmx == j_idx and bond_j_gmx == i_idx):
|
|
1096
|
+
mol_bonds[3][j] = bond['parameters'][0] # param1
|
|
1097
|
+
mol_bonds[4][j] = bond['parameters'][1] # param2
|
|
1098
|
+
break
|
|
1099
|
+
|
|
1100
|
+
# Update angles
|
|
1101
|
+
mol_angles = self.top.topol_dict.get_angles(mol)
|
|
1102
|
+
if mol_angles:
|
|
1103
|
+
for j in range(len(mol_angles[0])):
|
|
1104
|
+
i_idx = mol_angles[0][j] + prev_natoms
|
|
1105
|
+
j_idx = mol_angles[1][j] + prev_natoms
|
|
1106
|
+
k_idx = mol_angles[2][j] + prev_natoms
|
|
1107
|
+
# Find matching angle in qm_interactions
|
|
1108
|
+
for angle in qm_interactions['angles']:
|
|
1109
|
+
# Convert sequential indices back to GROMACS indices for comparison
|
|
1110
|
+
angle_i_gmx = seq_to_gmx_map.get(angle['atoms'][0], angle['atoms'][0] + 1)
|
|
1111
|
+
angle_j_gmx = seq_to_gmx_map.get(angle['atoms'][1], angle['atoms'][1] + 1)
|
|
1112
|
+
angle_k_gmx = seq_to_gmx_map.get(angle['atoms'][2], angle['atoms'][2] + 1)
|
|
1113
|
+
|
|
1114
|
+
if (angle_i_gmx == i_idx and
|
|
1115
|
+
angle_j_gmx == j_idx and
|
|
1116
|
+
angle_k_gmx == k_idx):
|
|
1117
|
+
mol_angles[4][j] = angle['parameters'][0] # param1
|
|
1118
|
+
mol_angles[5][j] = angle['parameters'][1] # param2
|
|
1119
|
+
break
|
|
1120
|
+
|
|
1121
|
+
# Update dihedrals
|
|
1122
|
+
mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
|
|
1123
|
+
if mol_dihedrals:
|
|
1124
|
+
for j in range(len(mol_dihedrals[0])):
|
|
1125
|
+
i_idx = mol_dihedrals[0][j] + prev_natoms
|
|
1126
|
+
j_idx = mol_dihedrals[1][j] + prev_natoms
|
|
1127
|
+
k_idx = mol_dihedrals[2][j] + prev_natoms
|
|
1128
|
+
l_idx = mol_dihedrals[3][j] + prev_natoms
|
|
1129
|
+
# Find matching dihedral in qm_interactions
|
|
1130
|
+
for dihedral in qm_interactions['dihedrals']:
|
|
1131
|
+
# Convert sequential indices back to GROMACS indices for comparison
|
|
1132
|
+
dihedral_i_gmx = seq_to_gmx_map.get(dihedral['atoms'][0], dihedral['atoms'][0] + 1)
|
|
1133
|
+
dihedral_j_gmx = seq_to_gmx_map.get(dihedral['atoms'][1], dihedral['atoms'][1] + 1)
|
|
1134
|
+
dihedral_k_gmx = seq_to_gmx_map.get(dihedral['atoms'][2], dihedral['atoms'][2] + 1)
|
|
1135
|
+
dihedral_l_gmx = seq_to_gmx_map.get(dihedral['atoms'][3], dihedral['atoms'][3] + 1)
|
|
1136
|
+
|
|
1137
|
+
if (dihedral_i_gmx == i_idx and
|
|
1138
|
+
dihedral_j_gmx == j_idx and
|
|
1139
|
+
dihedral_k_gmx == k_idx and
|
|
1140
|
+
dihedral_l_gmx == l_idx):
|
|
1141
|
+
func = dihedral['function']
|
|
1142
|
+
if func in [1, 4, 9]: # Format 1
|
|
1143
|
+
mol_dihedrals[5][j] = dihedral['parameters'][0] # phi0
|
|
1144
|
+
mol_dihedrals[6][j] = dihedral['parameters'][1] # cp
|
|
1145
|
+
mol_dihedrals[7][j] = dihedral['parameters'][2] # mult
|
|
1146
|
+
elif func == 2: # Format 2
|
|
1147
|
+
mol_dihedrals[8][j] = dihedral['parameters'][0] # param1
|
|
1148
|
+
mol_dihedrals[9][j] = dihedral['parameters'][1] # param2
|
|
1149
|
+
elif func == 3: # Format 3
|
|
1150
|
+
for k in range(6):
|
|
1151
|
+
mol_dihedrals[10+k][j] = dihedral['parameters'][k] # C0-C5
|
|
1152
|
+
break
|
|
1153
|
+
|
|
1154
|
+
prev_natoms += len(self.top.topol_dict[mol]) * n_mols
|
|
1155
|
+
|
|
1156
|
+
logging.info('Successfully updated topology with optimized QM interaction parameters')
|
|
1157
|
+
return True
|
|
1158
|
+
|
|
1159
|
+
except Exception as e:
|
|
1160
|
+
logging.error(f'Failed to update topology: {str(e)}')
|
|
1161
|
+
return False
|
|
1162
|
+
|
|
1163
|
+
|
|
1164
|
+
def update_qm_charges(self, new_charges):
|
|
1165
|
+
"""Update the topology with new charges for QM atoms
|
|
1166
|
+
|
|
1167
|
+
Args:
|
|
1168
|
+
new_charges (dict or pandas.Series or np.ndarray or list): New charges for QM atoms.
|
|
1169
|
+
If dict, keys should be QM atom indices and values should be charges.
|
|
1170
|
+
If Series, index should be QM atom indices and values should be charges.
|
|
1171
|
+
If array or list, it should be in the same order as self.qm_atoms.index.
|
|
1172
|
+
|
|
1173
|
+
Returns:
|
|
1174
|
+
bool: True if update was successful, False otherwise
|
|
1175
|
+
"""
|
|
1176
|
+
if not self.qm_atoms is not None:
|
|
1177
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1178
|
+
|
|
1179
|
+
# Convert input to pandas Series if it's a dict, numpy array, or list
|
|
1180
|
+
if isinstance(new_charges, dict):
|
|
1181
|
+
new_charges = pd.Series(new_charges)
|
|
1182
|
+
elif isinstance(new_charges, (list, np.ndarray)):
|
|
1183
|
+
new_charges = pd.Series(new_charges, index=self.qm_atoms.index)
|
|
1184
|
+
# Verify all QM atoms have new charges
|
|
1185
|
+
missing_atoms = set(self.qm_atoms.index) - set(new_charges.index)
|
|
1186
|
+
if missing_atoms:
|
|
1187
|
+
raise MiMiCPyError(f'Missing charges for QM atoms: {missing_atoms}')
|
|
1188
|
+
|
|
1189
|
+
# For each QM atom, find which molecule and which instance it belongs to, and update the charge
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
for idx in self.qm_atoms.index:
|
|
1193
|
+
# Find which molecule instance this atom belongs to
|
|
1194
|
+
for mol, mol_offset in self.top.mol_offsets:
|
|
1195
|
+
mol_len = len(self.top.topol_dict[mol])
|
|
1196
|
+
if mol_offset + 1 <= idx <= mol_offset + mol_len:
|
|
1197
|
+
# DataFrame is indexed by GROMACS atom numbers (1-based)
|
|
1198
|
+
local_idx = idx - mol_offset
|
|
1199
|
+
self.top.topol_dict[mol].loc[local_idx, 'charge'] = new_charges[idx]
|
|
1200
|
+
break
|
|
1201
|
+
|
|
1202
|
+
|
|
1203
|
+
def write_non_bonded_itp(self, directory='.', prefix='non_bonded'):
|
|
1204
|
+
"""Write a non-bonded interaction table to a GROMACS .itp file.
|
|
1205
|
+
This function modifies only the bonded parameters of QM atoms.
|
|
1206
|
+
|
|
1207
|
+
Args:
|
|
1208
|
+
directory (str): Directory to write the output .itp file
|
|
1209
|
+
prefix (str): Prefix for the output .itp file
|
|
1210
|
+
|
|
1211
|
+
Returns:
|
|
1212
|
+
bool: True if successful, False otherwise
|
|
1213
|
+
"""
|
|
1214
|
+
if not self.qm_atoms is not None:
|
|
1215
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
# Get QM atom indices
|
|
1219
|
+
qm_indices = set(self.qm_atoms.index)
|
|
1220
|
+
|
|
1221
|
+
# Create a copy of the topology dictionary
|
|
1222
|
+
nonbonded_top = copy.deepcopy(self.top.topol_dict)
|
|
1223
|
+
|
|
1224
|
+
# Modify bonded parameters for QM atoms
|
|
1225
|
+
prev_natoms = 0
|
|
1226
|
+
for mol, n_mols in self.top.molecules:
|
|
1227
|
+
# Get interactions for this molecule
|
|
1228
|
+
mol_bonds = nonbonded_top.get_bonds(mol)
|
|
1229
|
+
mol_angles = nonbonded_top.get_angles(mol)
|
|
1230
|
+
mol_dihedrals = nonbonded_top.get_dihedrals(mol)
|
|
1231
|
+
|
|
1232
|
+
# Process bonds
|
|
1233
|
+
if mol_bonds:
|
|
1234
|
+
for j in range(len(mol_bonds[0])):
|
|
1235
|
+
i_idx = mol_bonds[0][j] + prev_natoms
|
|
1236
|
+
j_idx = mol_bonds[1][j] + prev_natoms
|
|
1237
|
+
if i_idx in qm_indices or j_idx in qm_indices:
|
|
1238
|
+
mol_bonds[4][j] = 0.0 # Set force constant to 0
|
|
1239
|
+
|
|
1240
|
+
# Process angles
|
|
1241
|
+
if mol_angles:
|
|
1242
|
+
for j in range(len(mol_angles[0])):
|
|
1243
|
+
i_idx = mol_angles[0][j] + prev_natoms
|
|
1244
|
+
j_idx = mol_angles[1][j] + prev_natoms
|
|
1245
|
+
k_idx = mol_angles[2][j] + prev_natoms
|
|
1246
|
+
if any(idx in qm_indices for idx in [i_idx, j_idx, k_idx]):
|
|
1247
|
+
mol_angles[5][j] = 0.0 # Set force constant to 0
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
# Process dihedrals
|
|
1251
|
+
if mol_dihedrals:
|
|
1252
|
+
for j in range(len(mol_dihedrals[0])):
|
|
1253
|
+
i_idx = mol_dihedrals[0][j] + prev_natoms
|
|
1254
|
+
j_idx = mol_dihedrals[1][j] + prev_natoms
|
|
1255
|
+
k_idx = mol_dihedrals[2][j] + prev_natoms
|
|
1256
|
+
l_idx = mol_dihedrals[3][j] + prev_natoms
|
|
1257
|
+
if any(idx in qm_indices for idx in [i_idx, j_idx, k_idx, l_idx]):
|
|
1258
|
+
func = mol_dihedrals[4][j]
|
|
1259
|
+
if func == 3: # Format 3
|
|
1260
|
+
# Set all coefficients C0-C5 to 0
|
|
1261
|
+
for k in range(10, 16):
|
|
1262
|
+
mol_dihedrals[k][j] = 0.0
|
|
1263
|
+
elif func in [1, 4, 9, 2]: # Format 1, 4, or 9
|
|
1264
|
+
mol_dihedrals[6][j] = 0.0 # Set force constant (cp) to 0
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
prev_natoms += len(nonbonded_top[mol]) * n_mols
|
|
1268
|
+
# Write the modified topology to file
|
|
1269
|
+
self.write_topology(directory=directory, prefix=prefix, topol_dict=nonbonded_top)
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
def get_equivalent_map(self, equivalent_atoms=None, use_atomtypes=False):
|
|
1273
|
+
"""Create a mapping of equivalent atoms for QM region
|
|
1274
|
+
|
|
1275
|
+
Args:
|
|
1276
|
+
equivalent_atoms (dict, optional): Dictionary containing equivalent atoms in either global or local format.
|
|
1277
|
+
For global format: {'global': [(1,2), (3,4), ...]}
|
|
1278
|
+
For local format: {'local': {'mol1': [(1,2), (3,4)], 'mol2': [(1,2)]}}
|
|
1279
|
+
use_atomtypes (bool, optional): If True, automatically generate equivalent atom mappings based on atom types.
|
|
1280
|
+
This will map atoms with the same atom type to be equivalent.
|
|
1281
|
+
|
|
1282
|
+
Returns:
|
|
1283
|
+
dict: Mapping of equivalent atoms in contiguous QM indices (0-based)
|
|
1284
|
+
"""
|
|
1285
|
+
if not self.qm_atoms is not None:
|
|
1286
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1287
|
+
|
|
1288
|
+
# Create a mapping from global GROMACS indices to contiguous QM indices
|
|
1289
|
+
global_to_qm_idx = {global_idx: qm_idx for qm_idx, global_idx in enumerate(self.qm_atoms.index)}
|
|
1290
|
+
|
|
1291
|
+
# Initialize mapping with identity mapping for all QM atoms (using contiguous indices)
|
|
1292
|
+
eq_mapping = {qm_idx: qm_idx for qm_idx in range(len(self.qm_atoms))}
|
|
1293
|
+
|
|
1294
|
+
if use_atomtypes:
|
|
1295
|
+
# Group atoms by their atom type using the type column from qm_atoms
|
|
1296
|
+
atomtype_groups = {}
|
|
1297
|
+
for qm_idx, (global_idx, row) in enumerate(self.qm_atoms.iterrows()):
|
|
1298
|
+
atom_type = row['type']
|
|
1299
|
+
if atom_type not in atomtype_groups:
|
|
1300
|
+
atomtype_groups[atom_type] = []
|
|
1301
|
+
atomtype_groups[atom_type].append(qm_idx)
|
|
1302
|
+
|
|
1303
|
+
# For each atom type group, map all atoms to the first atom in the group
|
|
1304
|
+
for atom_type, atoms in atomtype_groups.items():
|
|
1305
|
+
if len(atoms) > 1: # Only create mappings if there are multiple atoms of this type
|
|
1306
|
+
reference_atom = atoms[0]
|
|
1307
|
+
for atom in atoms[1:]:
|
|
1308
|
+
eq_mapping[atom] = reference_atom
|
|
1309
|
+
|
|
1310
|
+
logging.info(f'Generated equivalent atom mappings for {len(atomtype_groups)} atom types')
|
|
1311
|
+
return eq_mapping
|
|
1312
|
+
|
|
1313
|
+
if not equivalent_atoms:
|
|
1314
|
+
return eq_mapping
|
|
1315
|
+
|
|
1316
|
+
# Handle global format
|
|
1317
|
+
if 'global' in equivalent_atoms:
|
|
1318
|
+
for atom1, atom2 in equivalent_atoms['global']:
|
|
1319
|
+
if atom1 in global_to_qm_idx and atom2 in global_to_qm_idx:
|
|
1320
|
+
qm_idx1 = global_to_qm_idx[atom1]
|
|
1321
|
+
qm_idx2 = global_to_qm_idx[atom2]
|
|
1322
|
+
eq_mapping[qm_idx2] = qm_idx1
|
|
1323
|
+
|
|
1324
|
+
# Handle local format
|
|
1325
|
+
elif 'local' in equivalent_atoms:
|
|
1326
|
+
# Get all QM atom indices
|
|
1327
|
+
qm_indices = list(self.qm_atoms.index)
|
|
1328
|
+
|
|
1329
|
+
for mol_name, mol_eq_atoms in equivalent_atoms['local'].items():
|
|
1330
|
+
# Find atoms in this molecule that are in QM region
|
|
1331
|
+
mol_atoms = []
|
|
1332
|
+
|
|
1333
|
+
# Calculate the starting atom number for each molecule type
|
|
1334
|
+
prev_natoms = 0
|
|
1335
|
+
for mol, n_mols in self.top.molecules:
|
|
1336
|
+
if mol == mol_name:
|
|
1337
|
+
mol_len = len(self.top.topol_dict[mol])
|
|
1338
|
+
for i in range(n_mols):
|
|
1339
|
+
mol_start = prev_natoms + (i * mol_len) + 1 # 1-based indexing
|
|
1340
|
+
mol_end = mol_start + mol_len - 1
|
|
1341
|
+
|
|
1342
|
+
# Find QM atoms in this molecule instance
|
|
1343
|
+
for idx in qm_indices:
|
|
1344
|
+
if mol_start <= idx <= mol_end:
|
|
1345
|
+
# Adjust index to be molecule-local (1-based)
|
|
1346
|
+
local_idx = idx - mol_start + 1
|
|
1347
|
+
mol_atoms.append((local_idx, idx))
|
|
1348
|
+
break # Found the molecule, no need to continue
|
|
1349
|
+
|
|
1350
|
+
# Update prev_natoms for next molecule type
|
|
1351
|
+
prev_natoms += len(self.top.topol_dict[mol]) * n_mols
|
|
1352
|
+
|
|
1353
|
+
if not mol_atoms:
|
|
1354
|
+
logging.warning(f"No QM atoms found in molecule {mol_name}")
|
|
1355
|
+
continue
|
|
1356
|
+
|
|
1357
|
+
# Create mapping from local to global indices
|
|
1358
|
+
local_to_global = {local: global_idx for local, global_idx in mol_atoms}
|
|
1359
|
+
for atom1, atom2 in mol_eq_atoms:
|
|
1360
|
+
if atom1 not in local_to_global or atom2 not in local_to_global:
|
|
1361
|
+
logging.warning(f"Atom indices {atom1} or {atom2} not found in QM region for molecule {mol_name}")
|
|
1362
|
+
continue
|
|
1363
|
+
|
|
1364
|
+
global_atom1 = local_to_global[atom1]
|
|
1365
|
+
global_atom2 = local_to_global[atom2]
|
|
1366
|
+
|
|
1367
|
+
# Convert to contiguous QM indices
|
|
1368
|
+
if global_atom1 in global_to_qm_idx and global_atom2 in global_to_qm_idx:
|
|
1369
|
+
qm_idx1 = global_to_qm_idx[global_atom1]
|
|
1370
|
+
qm_idx2 = global_to_qm_idx[global_atom2]
|
|
1371
|
+
eq_mapping[qm_idx2] = qm_idx1
|
|
1372
|
+
|
|
1373
|
+
logging.info(f'Successfully created equivalent atom mapping for {len(eq_mapping)} QM atoms')
|
|
1374
|
+
return eq_mapping
|
|
1375
|
+
|
|
1376
|
+
def redistribute_charges_after_dresp(self, optimized_charges, num_bonds_away=2,
|
|
1377
|
+
fixed_charge_indices=None, charge_group_constraints=None):
|
|
1378
|
+
"""Redistribute charges after DRESP optimization to maintain QM region neutrality and charge group constraints.
|
|
1379
|
+
|
|
1380
|
+
This function:
|
|
1381
|
+
1. Identifies QM atoms that are a specified number of bonds away from boundary atoms
|
|
1382
|
+
2. Replaces their optimized charges with original values
|
|
1383
|
+
3. Calculates the charge difference for each charge group
|
|
1384
|
+
4. Redistributes charge differences within each group to maintain group constraints
|
|
1385
|
+
5. Maintains total system charge conservation
|
|
1386
|
+
|
|
1387
|
+
Args:
|
|
1388
|
+
optimized_charges (numpy.ndarray or list): Optimized charges from DRESP
|
|
1389
|
+
num_bonds_away (int): Number of bonds away from boundary atoms to restore charges
|
|
1390
|
+
fixed_charge_indices (set, optional): Set of atom indices whose charges should be kept at original values
|
|
1391
|
+
charge_group_constraints (list, optional): List of tuples (atom_indices, target_charge) for charge groups
|
|
1392
|
+
|
|
1393
|
+
Returns:
|
|
1394
|
+
numpy.ndarray: Redistributed charges that maintain QM region charge conservation and group constraints
|
|
1395
|
+
"""
|
|
1396
|
+
if not self.qm_atoms is not None:
|
|
1397
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1398
|
+
|
|
1399
|
+
if len(optimized_charges) != len(self.qm_atoms):
|
|
1400
|
+
raise MiMiCPyError(f'Optimized charges length ({len(optimized_charges)}) does not match number of QM atoms ({len(self.qm_atoms)})')
|
|
1401
|
+
|
|
1402
|
+
# Get boundary atoms
|
|
1403
|
+
if self.boundary_atoms.empty:
|
|
1404
|
+
return np.array(optimized_charges)
|
|
1405
|
+
|
|
1406
|
+
# Get original charges
|
|
1407
|
+
original_charges = self.qm_atoms['charge'].values
|
|
1408
|
+
original_charges = np.array(original_charges)
|
|
1409
|
+
|
|
1410
|
+
# Create mapping from QM atom index to position in optimized_charges array
|
|
1411
|
+
qm_atom_to_array_idx = {atom_idx: array_idx for array_idx, atom_idx in enumerate(self.qm_atoms.index)}
|
|
1412
|
+
|
|
1413
|
+
bonds = self.qm_interactions['bonds']
|
|
1414
|
+
|
|
1415
|
+
# Create adjacency list for bond connectivity
|
|
1416
|
+
adjacency_list = {}
|
|
1417
|
+
for bond in bonds:
|
|
1418
|
+
if bond['involves_mm']:
|
|
1419
|
+
continue
|
|
1420
|
+
atom1, atom2 = bond['atoms']
|
|
1421
|
+
if atom1 not in adjacency_list:
|
|
1422
|
+
adjacency_list[atom1] = []
|
|
1423
|
+
if atom2 not in adjacency_list:
|
|
1424
|
+
adjacency_list[atom2] = []
|
|
1425
|
+
adjacency_list[atom1].append(atom2)
|
|
1426
|
+
adjacency_list[atom2].append(atom1)
|
|
1427
|
+
|
|
1428
|
+
# Find boundary atom indices in the sequential QM indexing
|
|
1429
|
+
boundary_indices = set()
|
|
1430
|
+
for boundary_idx in self.boundary_atoms.index:
|
|
1431
|
+
array_idx = qm_atom_to_array_idx[boundary_idx]
|
|
1432
|
+
boundary_indices.add(array_idx)
|
|
1433
|
+
|
|
1434
|
+
# Find atoms that are within num_bonds_away from boundary atoms (inclusive)
|
|
1435
|
+
atoms_to_restore = set()
|
|
1436
|
+
|
|
1437
|
+
# First, add boundary atoms themselves to be restored
|
|
1438
|
+
atoms_to_restore.update(boundary_indices)
|
|
1439
|
+
if fixed_charge_indices:
|
|
1440
|
+
atoms_to_restore.update(fixed_charge_indices)
|
|
1441
|
+
|
|
1442
|
+
for boundary_idx in boundary_indices:
|
|
1443
|
+
# Use breadth-first search to find atoms within the specified bond distance
|
|
1444
|
+
visited = set()
|
|
1445
|
+
queue = [(boundary_idx, 0)] # (atom_index, bond_distance)
|
|
1446
|
+
|
|
1447
|
+
while queue:
|
|
1448
|
+
current_atom, bond_distance = queue.pop(0)
|
|
1449
|
+
|
|
1450
|
+
if current_atom in visited:
|
|
1451
|
+
continue
|
|
1452
|
+
|
|
1453
|
+
visited.add(current_atom)
|
|
1454
|
+
|
|
1455
|
+
if bond_distance <= num_bonds_away and bond_distance > 0:
|
|
1456
|
+
# This atom is within num_bonds_away from boundary (but not the boundary itself)
|
|
1457
|
+
atoms_to_restore.add(current_atom)
|
|
1458
|
+
|
|
1459
|
+
if bond_distance < num_bonds_away:
|
|
1460
|
+
# Continue searching for more distant atoms
|
|
1461
|
+
if current_atom in adjacency_list:
|
|
1462
|
+
for neighbor in adjacency_list[current_atom]:
|
|
1463
|
+
if neighbor not in visited:
|
|
1464
|
+
queue.append((neighbor, bond_distance + 1))
|
|
1465
|
+
|
|
1466
|
+
if len(atoms_to_restore) == 0:
|
|
1467
|
+
logging.warning(f'No atoms found within {num_bonds_away} bond(s) from boundary atoms. Returning optimized charges unchanged.')
|
|
1468
|
+
return optimized_charges
|
|
1469
|
+
|
|
1470
|
+
# Initialize result charges with optimized charges
|
|
1471
|
+
redistributed_charges = optimized_charges.copy()
|
|
1472
|
+
|
|
1473
|
+
# Process charge group constraints if provided
|
|
1474
|
+
if charge_group_constraints:
|
|
1475
|
+
# Create a mapping from atom index to group info
|
|
1476
|
+
atom_to_group = {}
|
|
1477
|
+
for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
|
|
1478
|
+
for atom_idx in atom_indices:
|
|
1479
|
+
atom_to_group[atom_idx] = (group_key, target_charge)
|
|
1480
|
+
|
|
1481
|
+
# Process each charge group separately
|
|
1482
|
+
for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
|
|
1483
|
+
# Find atoms in this group that need to be restored
|
|
1484
|
+
group_atoms_to_restore = atoms_to_restore.intersection(atom_indices)
|
|
1485
|
+
group_atoms_to_optimize = atom_indices - atoms_to_restore
|
|
1486
|
+
|
|
1487
|
+
if len(group_atoms_to_restore) == 0:
|
|
1488
|
+
# No atoms in this group need restoration, skip
|
|
1489
|
+
continue
|
|
1490
|
+
|
|
1491
|
+
# Calculate charge difference for this group
|
|
1492
|
+
group_charge_diff = 0.0
|
|
1493
|
+
for atom_idx in group_atoms_to_restore:
|
|
1494
|
+
original_charge = original_charges[atom_idx]
|
|
1495
|
+
optimized_charge = optimized_charges[atom_idx]
|
|
1496
|
+
|
|
1497
|
+
# Restore original charge
|
|
1498
|
+
redistributed_charges[atom_idx] = original_charge
|
|
1499
|
+
|
|
1500
|
+
# Add to group charge difference
|
|
1501
|
+
charge_diff = optimized_charge - original_charge
|
|
1502
|
+
group_charge_diff += charge_diff
|
|
1503
|
+
|
|
1504
|
+
# Log the restoration
|
|
1505
|
+
actual_atom_idx = list(self.qm_atoms.index)[atom_idx]
|
|
1506
|
+
logging.debug(f'Group {group_key}: Restored charge for atom {actual_atom_idx}: '
|
|
1507
|
+
f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
|
|
1508
|
+
|
|
1509
|
+
# If there are atoms to optimize in this group, redistribute the charge difference
|
|
1510
|
+
if len(group_atoms_to_optimize) > 0 and abs(group_charge_diff) > 1e-10:
|
|
1511
|
+
# Calculate weights for redistribution within the group
|
|
1512
|
+
weights = np.zeros(len(self.qm_atoms))
|
|
1513
|
+
for atom_idx in group_atoms_to_optimize:
|
|
1514
|
+
weights[atom_idx] = abs(optimized_charges[atom_idx])
|
|
1515
|
+
|
|
1516
|
+
# Normalize weights
|
|
1517
|
+
total_weight = np.sum(weights)
|
|
1518
|
+
if total_weight > 0:
|
|
1519
|
+
weights = weights / total_weight
|
|
1520
|
+
else:
|
|
1521
|
+
# If all weights are zero, distribute equally
|
|
1522
|
+
weights = np.ones(len(self.qm_atoms)) / len(group_atoms_to_optimize)
|
|
1523
|
+
# Set weights to zero for atoms not in this group
|
|
1524
|
+
for i in range(len(self.qm_atoms)):
|
|
1525
|
+
if i not in group_atoms_to_optimize:
|
|
1526
|
+
weights[i] = 0.0
|
|
1527
|
+
|
|
1528
|
+
# Redistribute charge difference within the group
|
|
1529
|
+
charge_per_atom = group_charge_diff * weights
|
|
1530
|
+
redistributed_charges += charge_per_atom
|
|
1531
|
+
|
|
1532
|
+
logging.debug(f'Group {group_key}: Redistributed {group_charge_diff:.6f} charge among {len(group_atoms_to_optimize)} atoms')
|
|
1533
|
+
|
|
1534
|
+
# Verify group constraint is maintained
|
|
1535
|
+
group_total = sum(redistributed_charges[atom_idx] for atom_idx in atom_indices)
|
|
1536
|
+
if abs(group_total - target_charge) > 1e-6:
|
|
1537
|
+
logging.warning(f'Group {group_key} constraint violated: target={target_charge:.6f}, actual={group_total:.6f}')
|
|
1538
|
+
|
|
1539
|
+
else:
|
|
1540
|
+
# Original behavior: global redistribution without group constraints
|
|
1541
|
+
total_charge_diff = 0.0
|
|
1542
|
+
|
|
1543
|
+
# Restore original charges for atoms at specified bond distance
|
|
1544
|
+
for atom_idx in atoms_to_restore:
|
|
1545
|
+
original_charge = original_charges[atom_idx]
|
|
1546
|
+
optimized_charge = optimized_charges[atom_idx]
|
|
1547
|
+
|
|
1548
|
+
# Restore original charge
|
|
1549
|
+
redistributed_charges[atom_idx] = original_charge
|
|
1550
|
+
|
|
1551
|
+
# Add to total charge difference
|
|
1552
|
+
charge_diff = optimized_charge - original_charge
|
|
1553
|
+
total_charge_diff += charge_diff
|
|
1554
|
+
|
|
1555
|
+
# Get the actual atom index for logging
|
|
1556
|
+
actual_atom_idx = list(self.qm_atoms.index)[atom_idx]
|
|
1557
|
+
if atom_idx in boundary_indices:
|
|
1558
|
+
logging.debug(f'Restored charge for boundary atom {actual_atom_idx}: '
|
|
1559
|
+
f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
|
|
1560
|
+
else:
|
|
1561
|
+
logging.debug(f'Restored charge for atom {actual_atom_idx} (within {num_bonds_away} bond(s) from boundary): '
|
|
1562
|
+
f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
|
|
1563
|
+
|
|
1564
|
+
# If no charge difference, return as is
|
|
1565
|
+
if abs(total_charge_diff) < 1e-10:
|
|
1566
|
+
logging.info('No charge difference to redistribute')
|
|
1567
|
+
return redistributed_charges
|
|
1568
|
+
|
|
1569
|
+
# Calculate weightage for redistribution based on atom charges
|
|
1570
|
+
weights = np.zeros(len(self.qm_atoms))
|
|
1571
|
+
|
|
1572
|
+
for i in range(len(self.qm_atoms)):
|
|
1573
|
+
if i in atoms_to_restore:
|
|
1574
|
+
# Atoms that are being restored to original charges get zero weight
|
|
1575
|
+
weights[i] = 0.0
|
|
1576
|
+
continue
|
|
1577
|
+
|
|
1578
|
+
# Use the absolute value of the atom's charge as weightage
|
|
1579
|
+
weights[i] = abs(optimized_charges[i])
|
|
1580
|
+
|
|
1581
|
+
# Normalize weights
|
|
1582
|
+
total_weight = np.sum(weights)
|
|
1583
|
+
if total_weight > 0:
|
|
1584
|
+
weights = weights / total_weight
|
|
1585
|
+
else:
|
|
1586
|
+
# If all weights are zero, distribute equally among non-boundary atoms
|
|
1587
|
+
non_boundary_count = len(self.qm_atoms) - len(boundary_indices)
|
|
1588
|
+
if non_boundary_count > 0:
|
|
1589
|
+
weights = np.ones(len(self.qm_atoms)) / non_boundary_count
|
|
1590
|
+
# Set boundary atom weights to zero
|
|
1591
|
+
for boundary_idx in boundary_indices:
|
|
1592
|
+
weights[boundary_idx] = 0.0
|
|
1593
|
+
else:
|
|
1594
|
+
weights = np.ones(len(self.qm_atoms)) / len(self.qm_atoms)
|
|
1595
|
+
|
|
1596
|
+
# Redistribute charge difference based on weights
|
|
1597
|
+
charge_per_atom = total_charge_diff * weights
|
|
1598
|
+
redistributed_charges += charge_per_atom
|
|
1599
|
+
|
|
1600
|
+
# Verify total charge conservation
|
|
1601
|
+
original_total = np.sum(original_charges)
|
|
1602
|
+
optimized_total = np.sum(optimized_charges)
|
|
1603
|
+
redistributed_total = np.sum(redistributed_charges)
|
|
1604
|
+
|
|
1605
|
+
logging.info(f'Charge redistribution summary:')
|
|
1606
|
+
logging.info(f' Original total charge: {original_total:.6f}')
|
|
1607
|
+
logging.info(f' Optimized total charge: {optimized_total:.6f}')
|
|
1608
|
+
logging.info(f' Redistributed total charge: {redistributed_total:.6f}')
|
|
1609
|
+
logging.info(f' Number of boundary atoms: {len(self.boundary_atoms)}')
|
|
1610
|
+
logging.info(f' Bonds away from boundary (inclusive): {num_bonds_away}')
|
|
1611
|
+
logging.info(f' Number of atoms restored: {len(atoms_to_restore)}')
|
|
1612
|
+
|
|
1613
|
+
if charge_group_constraints:
|
|
1614
|
+
logging.info(f' Number of charge groups: {len(charge_group_constraints)}')
|
|
1615
|
+
for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
|
|
1616
|
+
group_total = sum(redistributed_charges[atom_idx] for atom_idx in atom_indices)
|
|
1617
|
+
logging.info(f' Group {group_key}: target={target_charge:.6f}, actual={group_total:.6f}')
|
|
1618
|
+
|
|
1619
|
+
# Check if total charge is conserved
|
|
1620
|
+
if abs(redistributed_total - optimized_total) > 1e-6:
|
|
1621
|
+
logging.warning(f'Total charge not conserved: difference = {redistributed_total - optimized_total:.6f}')
|
|
1622
|
+
|
|
1623
|
+
return redistributed_charges
|
|
1624
|
+
|
|
1625
|
+
def find_mm_atoms_bonded_to_qm(self):
|
|
1626
|
+
"""Find MM atoms that participate in interactions with QM atoms
|
|
1627
|
+
|
|
1628
|
+
This method identifies MM atoms that participate in any bonded interactions
|
|
1629
|
+
with QM atoms (bonds, angles, dihedrals). It uses bond connectivity for
|
|
1630
|
+
efficiency but also checks for MM atoms in angles and dihedrals with QM atoms.
|
|
1631
|
+
|
|
1632
|
+
Returns:
|
|
1633
|
+
pandas.DataFrame: DataFrame similar to qm_atoms containing MM atoms in QM interactions
|
|
1634
|
+
"""
|
|
1635
|
+
if not self.qm_atoms is not None:
|
|
1636
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1637
|
+
|
|
1638
|
+
# Get QM atom indices (1-based GROMACS indices)
|
|
1639
|
+
qm_indices = set(self.qm_atoms.index)
|
|
1640
|
+
|
|
1641
|
+
# Get boundary atom indices to exclude them
|
|
1642
|
+
boundary_indices = set()
|
|
1643
|
+
if not self.boundary_atoms.empty:
|
|
1644
|
+
boundary_indices = set(self.boundary_atoms.index)
|
|
1645
|
+
|
|
1646
|
+
# Dictionary to store MM atoms and their molecule info
|
|
1647
|
+
mm_atoms_info = {} # gmx_idx -> (mol, local_idx)
|
|
1648
|
+
|
|
1649
|
+
# Build bond connectivity graph for the entire system
|
|
1650
|
+
adjacency_list = {}
|
|
1651
|
+
|
|
1652
|
+
prev_natoms = 0
|
|
1653
|
+
for mol, n_mols in self.top.molecules:
|
|
1654
|
+
mol_bonds = self.top.topol_dict.get_bonds(mol)
|
|
1655
|
+
|
|
1656
|
+
if mol_bonds:
|
|
1657
|
+
for j in range(len(mol_bonds[0])):
|
|
1658
|
+
i_idx = mol_bonds[0][j] + prev_natoms
|
|
1659
|
+
j_idx = mol_bonds[1][j] + prev_natoms
|
|
1660
|
+
|
|
1661
|
+
# Add to adjacency list
|
|
1662
|
+
if i_idx not in adjacency_list:
|
|
1663
|
+
adjacency_list[i_idx] = []
|
|
1664
|
+
if j_idx not in adjacency_list:
|
|
1665
|
+
adjacency_list[j_idx] = []
|
|
1666
|
+
adjacency_list[i_idx].append(j_idx)
|
|
1667
|
+
adjacency_list[j_idx].append(i_idx)
|
|
1668
|
+
|
|
1669
|
+
prev_natoms += len(self.top.topol_dict[mol]) * n_mols
|
|
1670
|
+
|
|
1671
|
+
# Find MM atoms that participate in interactions with QM atoms
|
|
1672
|
+
mm_atoms_in_interactions = set()
|
|
1673
|
+
|
|
1674
|
+
# First, find MM atoms directly bonded to QM atoms
|
|
1675
|
+
for qm_idx in qm_indices:
|
|
1676
|
+
if qm_idx in adjacency_list:
|
|
1677
|
+
for neighbor_idx in adjacency_list[qm_idx]:
|
|
1678
|
+
# If neighbor is not in QM and not a boundary atom, it's an MM atom
|
|
1679
|
+
if neighbor_idx not in qm_indices and neighbor_idx not in boundary_indices:
|
|
1680
|
+
mm_atoms_in_interactions.add(neighbor_idx)
|
|
1681
|
+
|
|
1682
|
+
# Now check for MM atoms in angles and dihedrals with QM atoms
|
|
1683
|
+
prev_natoms = 0
|
|
1684
|
+
for mol, n_mols in self.top.molecules:
|
|
1685
|
+
mol_angles = self.top.topol_dict.get_angles(mol)
|
|
1686
|
+
mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
|
|
1687
|
+
|
|
1688
|
+
# Check angles
|
|
1689
|
+
if mol_angles:
|
|
1690
|
+
for j in range(len(mol_angles[0])):
|
|
1691
|
+
i_idx = mol_angles[0][j] + prev_natoms
|
|
1692
|
+
j_idx = mol_angles[1][j] + prev_natoms
|
|
1693
|
+
k_idx = mol_angles[2][j] + prev_natoms
|
|
1694
|
+
|
|
1695
|
+
# Check if any atom is in QM and others are in MM
|
|
1696
|
+
qm_count = sum(1 for idx in [i_idx, j_idx, k_idx] if idx in qm_indices)
|
|
1697
|
+
if qm_count > 0 and qm_count < 3: # Mixed QM-MM interaction
|
|
1698
|
+
for idx in [i_idx, j_idx, k_idx]:
|
|
1699
|
+
if idx not in qm_indices and idx not in boundary_indices:
|
|
1700
|
+
mm_atoms_in_interactions.add(idx)
|
|
1701
|
+
|
|
1702
|
+
# Check dihedrals
|
|
1703
|
+
if mol_dihedrals:
|
|
1704
|
+
for j in range(len(mol_dihedrals[0])):
|
|
1705
|
+
i_idx = mol_dihedrals[0][j] + prev_natoms
|
|
1706
|
+
j_idx = mol_dihedrals[1][j] + prev_natoms
|
|
1707
|
+
k_idx = mol_dihedrals[2][j] + prev_natoms
|
|
1708
|
+
l_idx = mol_dihedrals[3][j] + prev_natoms
|
|
1709
|
+
|
|
1710
|
+
# Check if any atom is in QM and others are in MM
|
|
1711
|
+
qm_count = sum(1 for idx in [i_idx, j_idx, k_idx, l_idx] if idx in qm_indices)
|
|
1712
|
+
if qm_count > 0 and qm_count < 4: # Mixed QM-MM interaction
|
|
1713
|
+
for idx in [i_idx, j_idx, k_idx, l_idx]:
|
|
1714
|
+
if idx not in qm_indices and idx not in boundary_indices:
|
|
1715
|
+
mm_atoms_in_interactions.add(idx)
|
|
1716
|
+
|
|
1717
|
+
prev_natoms += len(self.top.topol_dict[mol]) * n_mols
|
|
1718
|
+
|
|
1719
|
+
# Now get molecule information for the MM atoms
|
|
1720
|
+
prev_natoms = 0
|
|
1721
|
+
for mol, n_mols in self.top.molecules:
|
|
1722
|
+
mol_size = len(self.top.topol_dict[mol])
|
|
1723
|
+
|
|
1724
|
+
for mol_idx in range(n_mols):
|
|
1725
|
+
mol_start = prev_natoms + (mol_idx * mol_size) + 1
|
|
1726
|
+
mol_end = mol_start + mol_size - 1
|
|
1727
|
+
|
|
1728
|
+
# Check if any MM atoms are in this molecule instance
|
|
1729
|
+
for gmx_idx in mm_atoms_in_interactions:
|
|
1730
|
+
if mol_start <= gmx_idx <= mol_end:
|
|
1731
|
+
# Calculate local index
|
|
1732
|
+
local_idx = gmx_idx - mol_start + 1
|
|
1733
|
+
mm_atoms_info[gmx_idx] = (mol, local_idx)
|
|
1734
|
+
|
|
1735
|
+
prev_natoms += mol_size * n_mols
|
|
1736
|
+
|
|
1737
|
+
# Create DataFrame similar to qm_atoms
|
|
1738
|
+
mm_atoms_data = []
|
|
1739
|
+
|
|
1740
|
+
for gmx_idx in sorted(mm_atoms_info.keys()):
|
|
1741
|
+
mol, local_idx = mm_atoms_info[gmx_idx]
|
|
1742
|
+
atom_row = self.top.topol_dict[mol].loc[local_idx]
|
|
1743
|
+
|
|
1744
|
+
mm_atoms_data.append({
|
|
1745
|
+
'id': gmx_idx,
|
|
1746
|
+
'type': atom_row['type'],
|
|
1747
|
+
'resid': atom_row['resid'],
|
|
1748
|
+
'resname': atom_row['resname'],
|
|
1749
|
+
'name': atom_row['name'],
|
|
1750
|
+
'cgnr': atom_row['cgnr'],
|
|
1751
|
+
'charge': atom_row['charge'],
|
|
1752
|
+
'element': atom_row.get('element', ''),
|
|
1753
|
+
'mass': atom_row['mass'],
|
|
1754
|
+
'mol': mol,
|
|
1755
|
+
'is_bound': 0, # MM atoms are not boundary atoms
|
|
1756
|
+
'is_qm': 0 # MM atoms are not QM atoms
|
|
1757
|
+
})
|
|
1758
|
+
|
|
1759
|
+
# Create DataFrame
|
|
1760
|
+
mm_atoms_df = pd.DataFrame(mm_atoms_data)
|
|
1761
|
+
if not mm_atoms_df.empty:
|
|
1762
|
+
mm_atoms_df.set_index('id', inplace=True)
|
|
1763
|
+
|
|
1764
|
+
logging.info(f'Found {len(mm_atoms_df)} MM atoms in QM interactions')
|
|
1765
|
+
return mm_atoms_df
|
|
1766
|
+
|
|
1767
|
+
def get_extended_qm_atoms_dataframe(self):
|
|
1768
|
+
"""Get extended QM atom DataFrame including MM atoms bonded to QM atoms
|
|
1769
|
+
|
|
1770
|
+
This method returns a combined DataFrame containing both QM atoms and MM atoms
|
|
1771
|
+
bonded to QM atoms, with proper flags to distinguish between them.
|
|
1772
|
+
|
|
1773
|
+
Returns:
|
|
1774
|
+
pandas.DataFrame: Combined DataFrame of QM and MM atoms with columns:
|
|
1775
|
+
- All standard atom properties (type, resid, resname, name, cgnr, charge, mass, element)
|
|
1776
|
+
- is_bound: 1 for boundary atoms, 0 for others
|
|
1777
|
+
- is_qm: 1 for QM atoms, 0 for MM atoms
|
|
1778
|
+
"""
|
|
1779
|
+
if not self.qm_atoms is not None:
|
|
1780
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1781
|
+
|
|
1782
|
+
# Get MM atoms DataFrame
|
|
1783
|
+
mm_atoms_df = self.find_mm_atoms_bonded_to_qm()
|
|
1784
|
+
|
|
1785
|
+
# Create a copy of QM atoms DataFrame
|
|
1786
|
+
qm_atoms_copy = self.qm_atoms.copy()
|
|
1787
|
+
|
|
1788
|
+
# Add is_qm column to QM atoms (all QM atoms have is_qm=1)
|
|
1789
|
+
qm_atoms_copy['is_qm'] = 1
|
|
1790
|
+
|
|
1791
|
+
# Add is_qm column to MM atoms (all MM atoms have is_qm=0)
|
|
1792
|
+
if not mm_atoms_df.empty:
|
|
1793
|
+
mm_atoms_df['is_qm'] = 0
|
|
1794
|
+
|
|
1795
|
+
# Combine the DataFrames
|
|
1796
|
+
if not mm_atoms_df.empty:
|
|
1797
|
+
extended_df = pd.concat([qm_atoms_copy, mm_atoms_df])
|
|
1798
|
+
else:
|
|
1799
|
+
extended_df = qm_atoms_copy
|
|
1800
|
+
|
|
1801
|
+
logging.info(f'Extended QM atom DataFrame: {len(qm_atoms_copy)} QM atoms + {len(mm_atoms_df)} MM atoms = {len(extended_df)} total')
|
|
1802
|
+
return extended_df
|
|
1803
|
+
|
|
1804
|
+
def identify_solvent_atoms(self, solvent_names:dict=None):
|
|
1805
|
+
"""Identify solvent atoms in the QM region based on residue names and molecule names.
|
|
1806
|
+
|
|
1807
|
+
This method identifies which atoms in the QM region belong to solvent molecules
|
|
1808
|
+
based on their residue names and the molecule types they belong to.
|
|
1809
|
+
|
|
1810
|
+
Args:
|
|
1811
|
+
solvent_names (dict, optional): Dictionary containing 'resnames' and 'molecules' keys.
|
|
1812
|
+
Default: {'resnames': ['SOL', 'WAT', 'HOH', 'TIP3', 'TIP3P', 'TP3', 'H2O'],
|
|
1813
|
+
'molecules': ['tip3p', 'spc', 'spce', 'spc/e', 'SOL', 'water', 'tip4p']}
|
|
1814
|
+
|
|
1815
|
+
Returns:
|
|
1816
|
+
set: Set of sequential indices (0-based) of solvent atoms
|
|
1817
|
+
"""
|
|
1818
|
+
if not self.qm_atoms is not None:
|
|
1819
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1820
|
+
|
|
1821
|
+
# Set default solvent detection parameters
|
|
1822
|
+
if solvent_names is None:
|
|
1823
|
+
solvent_names = {'resnames': ['SOL', 'WAT', 'HOH', 'TIP3', 'TIP3P', 'TP3', 'H2O'],
|
|
1824
|
+
'molecules': ['tip3p', 'spc', 'spce', 'spc/e', 'SOL', 'water', 'tip4p']}
|
|
1825
|
+
|
|
1826
|
+
# Initialize results
|
|
1827
|
+
solvent_atom_indices = set() # Sequential indices (0-based)
|
|
1828
|
+
solvent_gmx_indices = set() # GROMACS indices (1-based)
|
|
1829
|
+
|
|
1830
|
+
|
|
1831
|
+
# Get QM atoms DataFrame to work with
|
|
1832
|
+
qm_atoms_df = self.qm_atoms
|
|
1833
|
+
|
|
1834
|
+
# Create mapping from GROMACS indices to sequential indices
|
|
1835
|
+
gmx_to_seq_map = {}
|
|
1836
|
+
for seq_idx, gmx_idx in enumerate(qm_atoms_df.index):
|
|
1837
|
+
gmx_to_seq_map[gmx_idx] = seq_idx
|
|
1838
|
+
|
|
1839
|
+
# Check each QM atom
|
|
1840
|
+
for gmx_idx in qm_atoms_df.index:
|
|
1841
|
+
is_solvent = False
|
|
1842
|
+
|
|
1843
|
+
# Check by residue name
|
|
1844
|
+
if solvent_names['resnames'] is not None:
|
|
1845
|
+
resname = str(qm_atoms_df.loc[gmx_idx, 'resname']).strip()
|
|
1846
|
+
if resname in solvent_names['resnames']:
|
|
1847
|
+
is_solvent = True
|
|
1848
|
+
|
|
1849
|
+
# Check by molecule name if not already identified as solvent
|
|
1850
|
+
if not is_solvent:
|
|
1851
|
+
if solvent_names['molecules'] is not None:
|
|
1852
|
+
mol_name = str(qm_atoms_df.loc[gmx_idx, 'mol']).strip()
|
|
1853
|
+
if mol_name in solvent_names['molecules']:
|
|
1854
|
+
is_solvent = True
|
|
1855
|
+
|
|
1856
|
+
|
|
1857
|
+
# Add to solvent sets if identified as solvent
|
|
1858
|
+
if is_solvent:
|
|
1859
|
+
solvent_gmx_indices.add(gmx_idx)
|
|
1860
|
+
solvent_atom_indices.add(gmx_to_seq_map[gmx_idx])
|
|
1861
|
+
|
|
1862
|
+
return solvent_atom_indices
|
|
1863
|
+
|
|
1864
|
+
def is_solvent_interaction(self, interaction):
|
|
1865
|
+
"""Check if an interaction involves solvent atoms.
|
|
1866
|
+
|
|
1867
|
+
This method checks if any of the atoms in the given interaction belong to solvent molecules.
|
|
1868
|
+
|
|
1869
|
+
Args:
|
|
1870
|
+
interaction (dict): Interaction dictionary containing 'atoms' key with atom indices
|
|
1871
|
+
solvent_resnames (list, optional): List of solvent residue names to identify
|
|
1872
|
+
solvent_molecules (list, optional): List of solvent molecule names to identify
|
|
1873
|
+
|
|
1874
|
+
Returns:
|
|
1875
|
+
bool: True if the interaction involves solvent atoms, False otherwise
|
|
1876
|
+
"""
|
|
1877
|
+
if not self.qm_atoms is not None:
|
|
1878
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1879
|
+
|
|
1880
|
+
# Check if any atoms in the interaction are solvent atoms
|
|
1881
|
+
atom_indices = interaction.get('atoms', [])
|
|
1882
|
+
return any(atom_idx in self.solvent_atom_indices for atom_idx in atom_indices)
|
|
1883
|
+
|
|
1884
|
+
def create_charge_group_constraints(self, group_by='mol', target_charges=None,
|
|
1885
|
+
exclude_solvent=True, solvent_names=None):
|
|
1886
|
+
"""
|
|
1887
|
+
Create charge group constraints based on information from qm_atoms dataframe.
|
|
1888
|
+
|
|
1889
|
+
This method groups QM atoms by specified criteria and creates charge constraints
|
|
1890
|
+
for each group. It can group atoms by residue name, molecule name, or atom type.
|
|
1891
|
+
|
|
1892
|
+
Args:
|
|
1893
|
+
group_by (str): Criteria to group atoms by. Options:
|
|
1894
|
+
- 'resname': Group by residue name
|
|
1895
|
+
- 'mol': Group by molecule name
|
|
1896
|
+
- 'type': Group by atom type
|
|
1897
|
+
- 'resid': Group by residue ID
|
|
1898
|
+
target_charges (dict, optional): Dictionary mapping group identifiers to target charges.
|
|
1899
|
+
If None, uses sum of original charges from qm_atoms for each group.
|
|
1900
|
+
Example: {'ALA': 0.0, 'GLY': 0.0, 'SOL': 0.0}
|
|
1901
|
+
exclude_solvent (bool): Whether to exclude solvent atoms from grouping
|
|
1902
|
+
solvent_names (dict, optional): Dictionary for solvent identification.
|
|
1903
|
+
If None, uses default solvent names.
|
|
1904
|
+
|
|
1905
|
+
Returns:
|
|
1906
|
+
list: List of tuples (atom_indices, target_charge) for use in opt_dresp
|
|
1907
|
+
"""
|
|
1908
|
+
if not self.qm_atoms is not None:
|
|
1909
|
+
raise MiMiCPyError('No QM atoms defined')
|
|
1910
|
+
|
|
1911
|
+
# Identify solvent atoms if needed
|
|
1912
|
+
solvent_atom_indices = set()
|
|
1913
|
+
if exclude_solvent:
|
|
1914
|
+
solvent_atom_indices = self.identify_solvent_atoms(solvent_names)
|
|
1915
|
+
|
|
1916
|
+
# Group atoms by the specified criteria
|
|
1917
|
+
groups = {}
|
|
1918
|
+
group_charges = {}
|
|
1919
|
+
|
|
1920
|
+
for seq_idx, (gmx_idx, row) in enumerate(self.qm_atoms.iterrows()):
|
|
1921
|
+
# Skip solvent atoms if exclude_solvent is True
|
|
1922
|
+
if exclude_solvent and seq_idx in solvent_atom_indices:
|
|
1923
|
+
continue
|
|
1924
|
+
|
|
1925
|
+
# Get the grouping key based on group_by parameter
|
|
1926
|
+
if group_by == 'resname':
|
|
1927
|
+
group_key = str(row['resname']).strip()
|
|
1928
|
+
elif group_by == 'mol':
|
|
1929
|
+
group_key = str(row['mol']).strip()
|
|
1930
|
+
elif group_by == 'type':
|
|
1931
|
+
group_key = str(row['type']).strip()
|
|
1932
|
+
elif group_by == 'resid':
|
|
1933
|
+
group_key = str(row['resid'])
|
|
1934
|
+
else:
|
|
1935
|
+
raise ValueError(f"Invalid group_by parameter: {group_by}. "
|
|
1936
|
+
f"Must be one of: 'resname', 'mol', 'type', 'resid'")
|
|
1937
|
+
|
|
1938
|
+
# Add atom to its group
|
|
1939
|
+
if group_key not in groups:
|
|
1940
|
+
groups[group_key] = set()
|
|
1941
|
+
group_charges[group_key] = 0.0
|
|
1942
|
+
groups[group_key].add(seq_idx)
|
|
1943
|
+
group_charges[group_key] += row['charge']
|
|
1944
|
+
|
|
1945
|
+
# Create constraints
|
|
1946
|
+
constraints = {}
|
|
1947
|
+
|
|
1948
|
+
for group_key, atom_indices in groups.items():
|
|
1949
|
+
# Determine target charge for this group
|
|
1950
|
+
if target_charges is not None and group_key in target_charges:
|
|
1951
|
+
target_charge = target_charges[group_key]
|
|
1952
|
+
else:
|
|
1953
|
+
# Use sum of original charges if no specific target is provided
|
|
1954
|
+
target_charge = group_charges[group_key]
|
|
1955
|
+
|
|
1956
|
+
# Only add constraint if group has more than one atom or if target charge is not 0
|
|
1957
|
+
if len(atom_indices) > 1:
|
|
1958
|
+
constraints[group_key] = (atom_indices, target_charge)
|
|
1959
|
+
|
|
1960
|
+
return constraints
|