mimicpy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. mimicpy/__init__.py +1 -1
  2. mimicpy/__main__.py +726 -2
  3. mimicpy/_authors.py +2 -2
  4. mimicpy/_version.py +2 -2
  5. mimicpy/coords/__init__.py +1 -1
  6. mimicpy/coords/base.py +1 -1
  7. mimicpy/coords/cpmdgeo.py +1 -1
  8. mimicpy/coords/gro.py +1 -1
  9. mimicpy/coords/pdb.py +1 -1
  10. mimicpy/core/__init__.py +1 -1
  11. mimicpy/core/prepare.py +3 -3
  12. mimicpy/core/selector.py +1 -1
  13. mimicpy/force_matching/__init__.py +34 -0
  14. mimicpy/force_matching/bonded_forces.py +628 -0
  15. mimicpy/force_matching/compare_top.py +809 -0
  16. mimicpy/force_matching/dresp.py +435 -0
  17. mimicpy/force_matching/nonbonded_forces.py +32 -0
  18. mimicpy/force_matching/opt_ff.py +2114 -0
  19. mimicpy/force_matching/qm_region.py +1960 -0
  20. mimicpy/plugins/__main_installer__.py +76 -0
  21. mimicpy/{__main_vmd__.py → plugins/__main_vmd__.py} +2 -2
  22. mimicpy/plugins/pymol.py +56 -0
  23. mimicpy/plugins/vmd.tcl +78 -0
  24. mimicpy/scripts/__init__.py +1 -1
  25. mimicpy/scripts/cpmd.py +1 -1
  26. mimicpy/scripts/fm_input.py +265 -0
  27. mimicpy/scripts/fmdata.py +120 -0
  28. mimicpy/scripts/mdp.py +1 -1
  29. mimicpy/scripts/ndx.py +1 -1
  30. mimicpy/scripts/script.py +1 -1
  31. mimicpy/topology/__init__.py +1 -1
  32. mimicpy/topology/itp.py +603 -35
  33. mimicpy/topology/mpt.py +1 -1
  34. mimicpy/topology/top.py +254 -15
  35. mimicpy/topology/topol_dict.py +233 -4
  36. mimicpy/utils/__init__.py +1 -1
  37. mimicpy/utils/atomic_numbers.py +1 -1
  38. mimicpy/utils/constants.py +17 -3
  39. mimicpy/utils/elements.py +1 -1
  40. mimicpy/utils/errors.py +1 -1
  41. mimicpy/utils/file_handler.py +1 -1
  42. mimicpy/utils/strings.py +1 -1
  43. mimicpy-0.3.0.dist-info/METADATA +156 -0
  44. mimicpy-0.3.0.dist-info/RECORD +50 -0
  45. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
  46. mimicpy-0.3.0.dist-info/entry_points.txt +4 -0
  47. mimicpy-0.2.0.dist-info/METADATA +0 -86
  48. mimicpy-0.2.0.dist-info/RECORD +0 -38
  49. mimicpy-0.2.0.dist-info/entry_points.txt +0 -3
  50. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
  51. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
  52. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
  53. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1960 @@
1
+ import logging
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from datetime import datetime
5
+ from ..topology.top import Top
6
+ from ..topology.mpt import Mpt
7
+ from ..core.prepare import Preparation
8
+ from ..utils.errors import MiMiCPyError, SelectionError
9
+ from ..utils.file_handler import read
10
+ from ..utils.constants import nm_to_au, kb_gmx2au, au_to_nm, kjm_au, au_kjm, kb_au2gmx
11
+ import copy
12
+ import numpy as np
13
+ from math import isclose
14
+
15
+ def read_qm_selection(selection_file: Path, prep: Preparation):
16
+ """Read QM atom selections from a file and update the Preparation object
17
+
18
+ Args:
19
+ selection_file (Path): Path to selection file
20
+ prep (Preparation): Preparation object to update
21
+
22
+ Returns:
23
+ Preparation: Updated Preparation object with QM atoms selected
24
+ """
25
+ if not selection_file.exists():
26
+ raise FileNotFoundError(f'Selection file not found: {selection_file}')
27
+
28
+ selection_text = read(selection_file)
29
+ logging.info(f'Reading selections from {selection_file}')
30
+
31
+ for line in selection_text.splitlines():
32
+ if not line.strip():
33
+ continue
34
+
35
+ parts = line.split()
36
+ if not parts:
37
+ continue
38
+
39
+ command = parts[0].lower()
40
+ selection = ' '.join(parts[1:]) if len(parts) > 1 else None
41
+ try:
42
+ if command == 'add':
43
+ prep.add(selection, False)
44
+ logging.info(f'Added atoms to QM region using selection: {selection}')
45
+ elif command == 'add-bound':
46
+ prep.add(selection, True)
47
+ logging.info(f'Added boundary atoms to QM region using selection: {selection}')
48
+ elif command == 'delete':
49
+ prep.delete(selection)
50
+ logging.info(f'Deleted atoms from QM region using selection: {selection}')
51
+ elif command == 'clear':
52
+ prep.clear()
53
+ logging.info('Cleared all atoms from QM region')
54
+ else:
55
+ raise MiMiCPyError(f'Invalid command: {command}')
56
+
57
+ except Exception as e:
58
+ logging.error(f'Failed to process selection command: {str(e)}')
59
+ continue
60
+
61
+ return prep
62
+
63
+ class QMRegion:
64
+ """Class to handle QM region topology formation and extraction"""
65
+
66
+ def __init__(self, top_file, coords_file, gmxdata=None, buffer=1000,
67
+ guess_elements=True, nonstandard_atomtypes=None):
68
+ """Initialize QMRegion with topology and coordinate files
69
+
70
+ Args:
71
+ top_file (str): Path to topology file
72
+ coords_file (str): Path to coordinate file
73
+ gmxdata (str, optional): Path to GROMACS data directory
74
+ buffer (int, optional): Buffer size for reading files
75
+ guess_elements (bool, optional): Whether to guess atomic elements
76
+ nonstandard_atomtypes (dict, optional): Dictionary of non-standard atom types
77
+ """
78
+ self.top_file = top_file
79
+ self.coords_file = coords_file
80
+ self.gmxdata = gmxdata
81
+ self.buffer = buffer
82
+ self.guess_elements = guess_elements
83
+ self.nonstandard_atomtypes = nonstandard_atomtypes
84
+
85
+ # Initialize topology and preparation objects
86
+ self.top = None
87
+ self.mpt = None
88
+ self.prep = None
89
+ self.qm_atoms = None
90
+ self.gmx_to_cpmd_map = None
91
+ self.cpmd_to_gmx_map = None
92
+ self.qm_interactions = None
93
+ self.boundary_atoms = None
94
+ self.extended_qm_atoms = None
95
+ self.gmx_to_seq_map = None
96
+ self.qm_total_charge = None
97
+ # Initialize selector
98
+ self.selector = None
99
+ self.solvent_atom_indices = None
100
+ self.load_topology()
101
+
102
+
103
+ def load_topology(self):
104
+ """Load the topology file"""
105
+ # Load topology using Top class
106
+ self.top = Top(self.top_file, mode='r', buffer=self.buffer,
107
+ gmxdata=self.gmxdata, guess_elements=self.guess_elements,
108
+ nonstandard_atomtypes=self.nonstandard_atomtypes)
109
+ logging.info(f'Successfully loaded topology from {self.top_file}')
110
+
111
+
112
+ # Create Mpt object from Top
113
+ self.mpt = Mpt.from_top(self.top)
114
+
115
+ # Initialize selector with Mpt object
116
+ from ..core.selector import DefaultSelector
117
+ self.selector = DefaultSelector(self.mpt, self.coords_file, buffer=self.buffer)
118
+ self.prep = Preparation(self.selector)
119
+
120
+ def setup_qm_region(self, selection_file: Path, solvent_names:dict=None):
121
+ """Set up QM region using selection file
122
+
123
+ Args:
124
+ selection_file (Path): Path to selection file
125
+ """
126
+ if not self.top:
127
+ raise MiMiCPyError('Topology not loaded. Call load_topology() first.')
128
+
129
+ try:
130
+ self.prep = read_qm_selection(selection_file, self.prep)
131
+ self.qm_atoms = self.prep.qm_atoms
132
+ self.gmx_to_cpmd_map = self.gmx_to_cpmd_idx()
133
+ self.cpmd_to_gmx_map = {v: k for k, v in self.gmx_to_cpmd_map.items()}
134
+ self.boundary_atoms = self.qm_atoms[self.qm_atoms['is_bound'] == 1]
135
+ self.qm_interactions = self.extract_qm_interactions()
136
+ self.solvent_atom_indices = self.identify_solvent_atoms(solvent_names)
137
+ self.qm_total_charge = self.qm_atoms['charge'].sum()
138
+ except Exception as e:
139
+ logging.error(f'Failed to setup QM region: {str(e)}')
140
+ raise MiMiCPyError(f'Failed to setup QM region: {str(e)}')
141
+
142
+ @property
143
+ def qm_charges(self):
144
+ """Get QM atom charges"""
145
+ if not self.qm_atoms is not None:
146
+ raise MiMiCPyError('No QM atoms defined')
147
+
148
+ return self.qm_atoms['charge']
149
+
150
+ def find_boundary_atoms(self):
151
+ """Find and mark boundary atoms in QM region"""
152
+ if not self.prep:
153
+ raise MiMiCPyError('QM region not setup. Call setup_qm_region() first.')
154
+
155
+ try:
156
+ self.prep.find_bound_atoms()
157
+ self.qm_atoms = self.prep.qm_atoms
158
+ logging.info('Successfully identified boundary atoms')
159
+ return True
160
+ except Exception as e:
161
+ logging.error(f'Failed to find boundary atoms: {str(e)}')
162
+ return False
163
+
164
+
165
+ def write_topology(self, topol_dict=None, directory='.', prefix=''):
166
+ """Write .itp files only for molecules containing QM atoms
167
+
168
+ Args:
169
+ topol_dict (dict): Dictionary of topology data
170
+ directory (str): Path to output directory for .itp files
171
+ prefix (str): Prefix for the output .itp files
172
+ """
173
+ if not self.qm_atoms is not None:
174
+ raise MiMiCPyError('No QM atoms defined')
175
+
176
+
177
+ if topol_dict is None:
178
+ topol_dict = self.top.topol_dict
179
+
180
+ # Get QM atom indices (1-based)
181
+ qm_indices = set(self.qm_atoms.index)
182
+
183
+ # Create output directory if it doesn't exist
184
+ output_dir = Path(directory)
185
+ output_dir.mkdir(parents=True, exist_ok=True)
186
+
187
+ # Write molecule types and their interactions to separate .itp files
188
+ # Only for molecules containing QM atoms
189
+ self.written_files = []
190
+
191
+ # First pass: calculate total atoms up to each molecule type
192
+ mol_offsets = {}
193
+ total_atoms = 0
194
+ for mol, n_mols in self.top.molecules:
195
+ mol_offsets[mol] = total_atoms
196
+ total_atoms += len(topol_dict[mol]) * n_mols
197
+
198
+
199
+ # Second pass: check for QM atoms and write .itp files
200
+ for mol, n_mols in self.top.molecules:
201
+ mol_size = len(topol_dict[mol])
202
+ mol_has_qm = False
203
+
204
+ # Check each instance of this molecule type
205
+ for i in range(n_mols):
206
+ # Convert to 1-based indexing for comparison with qm_indices
207
+ mol_start = mol_offsets[mol] + (i * mol_size) + 1
208
+ mol_end = mol_start + mol_size
209
+
210
+
211
+ # Check if any QM atoms are in this molecule instance
212
+ mol_qm_atoms = [idx for idx in qm_indices if mol_start <= idx < mol_end]
213
+ if mol_qm_atoms:
214
+ mol_has_qm = True
215
+ break
216
+
217
+
218
+ if not mol_has_qm:
219
+ continue
220
+
221
+ # Create .itp file for this molecule
222
+ source_file = self.top.topol_dict.get_source_file(mol)
223
+ if source_file:
224
+ # Use the original filename from the source file
225
+ itp_file = output_dir / (prefix + Path(source_file).name)
226
+ else:
227
+ # Fallback to molecule name if source file not found
228
+ itp_file = output_dir / prefix + f"{mol}.itp"
229
+
230
+ # Get nrexcl value for this molecule
231
+ nrexcl = self.top.topol_dict.get_nrexcl_value(mol)
232
+
233
+ itp_str = f"; Created by mimicpy {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n[ moleculetype ]\n; Name nrexcl\n{mol:16s} {nrexcl}\n\n"
234
+
235
+ # Write atoms section
236
+ itp_str += "[ atoms ]\n"
237
+ itp_str += "; nr type resnr residu atom cgnr charge mass\n"
238
+ atoms_df = topol_dict[mol]
239
+ for idx, row in atoms_df.iterrows():
240
+ itp_str += f"{idx:6d} {row['type']:8s} {row['resid']:6d} {row['resname']:8s} {row['name']:8s} {row['cgnr']:6d} {row['charge']:8.6f} {row['mass']:12.5f}\n"
241
+ itp_str += "\n"
242
+
243
+ # Get interactions for this molecule
244
+ mol_bonds = topol_dict.get_bonds(mol)
245
+ mol_angles = topol_dict.get_angles(mol)
246
+ mol_dihedrals = topol_dict.get_dihedrals(mol)
247
+ mol_pairs = topol_dict.get_pairs(mol)
248
+ # Write bonds
249
+ if mol_bonds:
250
+ itp_str += "[ bonds ]\n"
251
+ itp_str += "; ai aj funct r k\n"
252
+ for j in range(len(mol_bonds[0])):
253
+ i_idx = mol_bonds[0][j]
254
+ j_idx = mol_bonds[1][j]
255
+ func = mol_bonds[2][j]
256
+ p1 = mol_bonds[3][j]
257
+ p2 = mol_bonds[4][j]
258
+ # Handle None values - only write parameters if they are not None
259
+ if p1 is not None and p2 is not None:
260
+ p1_str = f"{p1:10.5E}"
261
+ p2_str = f"{p2:10.5E}"
262
+ itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d} {p1_str} {p2_str}\n"
263
+ else:
264
+ itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d}\n"
265
+ itp_str += "\n"
266
+
267
+ # Write pairs
268
+ if mol_pairs:
269
+ itp_str += "[ pairs ]\n"
270
+ itp_str += "; ai aj funct\n"
271
+ for j in range(len(mol_pairs[0])):
272
+ i_idx = mol_pairs[0][j]
273
+ j_idx = mol_pairs[1][j]
274
+ func = mol_pairs[2][j]
275
+ itp_str += f"{i_idx:6d} {j_idx:6d} {func:6d}\n"
276
+ itp_str += "\n"
277
+
278
+ # Write angles
279
+ if mol_angles:
280
+ itp_str += "[ angles ]\n"
281
+ itp_str += "; ai aj ak funct theta cth\n"
282
+ for j in range(len(mol_angles[0])):
283
+ i_idx = mol_angles[0][j]
284
+ j_idx = mol_angles[1][j]
285
+ k_idx = mol_angles[2][j]
286
+ func = mol_angles[3][j]
287
+ p1 = mol_angles[4][j]
288
+ p2 = mol_angles[5][j]
289
+ # Handle None values - only write parameters if they are not None
290
+ if p1 is not None and p2 is not None:
291
+ p1_str = f"{p1:8.3f}"
292
+ p2_str = f"{p2:10.5E}"
293
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {func:6d} {p1_str} {p2_str}\n"
294
+ else:
295
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {func:6d}\n"
296
+ itp_str += "\n"
297
+
298
+ # Write dihedrals
299
+ if mol_dihedrals:
300
+ # Group dihedrals by function type
301
+ dihedrals_by_func = {}
302
+ for j in range(len(mol_dihedrals[0])):
303
+ func = mol_dihedrals[4][j]
304
+ if func not in dihedrals_by_func:
305
+ dihedrals_by_func[func] = []
306
+ dihedrals_by_func[func].append(j)
307
+
308
+ # Write each function type in a separate section
309
+ for func, indices in dihedrals_by_func.items():
310
+ itp_str += "[ dihedrals ]\n"
311
+ # Write header based on function type
312
+ if func in [1, 4, 9]: # Format 1
313
+ itp_str += "; ai aj ak al funct phi0 cp mult\n"
314
+ elif func == 2: # Format 2
315
+ itp_str += "; ai aj ak al funct param1 param2\n"
316
+ elif func == 3: # Format 3
317
+ itp_str += "; ai aj ak al funct C0 C1 C2 C3 C4 C5\n"
318
+
319
+ for j in indices:
320
+ i_idx = mol_dihedrals[0][j]
321
+ j_idx = mol_dihedrals[1][j]
322
+ k_idx = mol_dihedrals[2][j]
323
+ l_idx = mol_dihedrals[3][j]
324
+
325
+ if func in [1, 4, 9]: # Format 1
326
+ phi0 = mol_dihedrals[5][j]
327
+ cp = mol_dihedrals[6][j]
328
+ mult = mol_dihedrals[7][j]
329
+ # Handle None values - only write parameters if they are not None
330
+ if phi0 is not None and cp is not None and mult is not None:
331
+ phi0_str = f"{phi0:8.1f}"
332
+ cp_str = f"{cp:8.5f}"
333
+ mult_str = f"{mult:6d}"
334
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {phi0_str} {cp_str} {mult_str}\n"
335
+ else:
336
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
337
+ elif func == 2: # Format 2
338
+ p1 = mol_dihedrals[8][j]
339
+ p2 = mol_dihedrals[9][j]
340
+ # Handle None values - only write parameters if they are not None
341
+ if p1 is not None and p2 is not None:
342
+ p1_str = f"{p1:8.3f}"
343
+ p2_str = f"{p2:10.5E}"
344
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {p1_str} {p2_str}\n"
345
+ else:
346
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
347
+ elif func == 3: # Format 3
348
+ c0, c1, c2, c3, c4, c5 = [mol_dihedrals[k][j] for k in range(10, 16)]
349
+ # Handle None values - only write parameters if they are not None
350
+ if all(x is not None for x in [c0, c1, c2, c3, c4, c5]):
351
+ c0_str = f"{c0:8.5f}"
352
+ c1_str = f"{c1:8.5f}"
353
+ c2_str = f"{c2:8.5f}"
354
+ c3_str = f"{c3:8.5f}"
355
+ c4_str = f"{c4:8.5f}"
356
+ c5_str = f"{c5:8.5f}"
357
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d} {c0_str} {c1_str} {c2_str} {c3_str} {c4_str} {c5_str}\n"
358
+ else:
359
+ itp_str += f"{i_idx:6d} {j_idx:6d} {k_idx:6d} {l_idx:6d} {func:6d}\n"
360
+ itp_str += "\n"
361
+
362
+ # Write the .itp file
363
+ if itp_file in self.written_files:
364
+ with open(itp_file, 'a') as f:
365
+ f.write(itp_str)
366
+ else:
367
+ with open(itp_file, 'w') as f:
368
+ f.write(itp_str)
369
+ self.written_files.append(itp_file)
370
+
371
+ if self.written_files:
372
+ logging.info(f'Successfully wrote .itp files for molecules containing QM atoms: {", ".join(str(f) for f in self.written_files)}')
373
+ else:
374
+ logging.warning('No molecules containing QM atoms found')
375
+
376
+ def _fill_missing_parameters_from_force_field(self):
377
+ """Fill missing parameters in QM interactions using force field parameters
378
+
379
+ This method looks up missing parameters for bonds, angles, and dihedrals
380
+ that have None parameters by using atom types to find matching force field parameters.
381
+ """
382
+ if not self.qm_interactions:
383
+ return
384
+
385
+ # Get available force fields
386
+ available_force_fields = self.top.topol_dict.list_force_fields()
387
+ if not available_force_fields:
388
+ logging.debug('No force field parameters available for filling missing parameters')
389
+ return
390
+
391
+ # Get atom types for QM atoms - use the qm_atoms dataframe directly
392
+ if self.boundary_atoms.empty:
393
+ qm_atoms = self.qm_atoms
394
+ else:
395
+ qm_atoms = self.extended_qm_atoms
396
+ qm_atom_types = {}
397
+ for seq_idx in range(len(qm_atoms)):
398
+ qm_atom_types[seq_idx] = qm_atoms.iloc[seq_idx]['type']
399
+
400
+ # Fill missing bond parameters
401
+ for bond in self.qm_interactions['bonds']:
402
+ if None in bond['parameters']:
403
+ atom1, atom2 = bond['atoms']
404
+ type1 = qm_atom_types.get(atom1)
405
+ type2 = qm_atom_types.get(atom2)
406
+
407
+ if type1 and type2:
408
+ # Try both orderings of atom types
409
+ key1 = f"{type1}-{type2}"
410
+ key2 = f"{type2}-{type1}"
411
+
412
+ bond_params = None
413
+ for ff_name in available_force_fields:
414
+ ff_bondtypes = self.top.topol_dict.get_force_field_bondtypes(ff_name)
415
+ if key1 in ff_bondtypes:
416
+ bond_params = ff_bondtypes[key1]
417
+ break
418
+ elif key2 in ff_bondtypes:
419
+ bond_params = ff_bondtypes[key2]
420
+ break
421
+
422
+ if bond_params:
423
+ bond['parameters'] = [bond_params['length'] * nm_to_au, bond_params['force_constant'] * kb_gmx2au]
424
+ bond['source'] = f'force_field_{ff_name}'
425
+ logging.debug(f'Filled missing bond parameters for atoms {atom1}-{atom2} from force field')
426
+
427
+ # Fill missing angle parameters
428
+ for angle in self.qm_interactions['angles']:
429
+ if None in angle['parameters']:
430
+ atom1, atom2, atom3 = angle['atoms']
431
+ type1 = qm_atom_types.get(atom1)
432
+ type2 = qm_atom_types.get(atom2)
433
+ type3 = qm_atom_types.get(atom3)
434
+
435
+ if type1 and type2 and type3:
436
+ # Try different orderings of atom types
437
+ keys = [
438
+ f"{type1}-{type2}-{type3}",
439
+ f"{type3}-{type2}-{type1}"
440
+ ]
441
+
442
+ angle_params = None
443
+ for ff_name in available_force_fields:
444
+ ff_angletypes = self.top.topol_dict.get_force_field_angletypes(ff_name)
445
+ for key in keys:
446
+ if key in ff_angletypes:
447
+ angle_params = ff_angletypes[key]
448
+ break
449
+ if angle_params:
450
+ break
451
+
452
+ if angle_params:
453
+ angle['parameters'] = [np.deg2rad(angle_params['angle']), angle_params['force_constant'] * kjm_au]
454
+ angle['source'] = f'force_field_{ff_name}'
455
+ logging.debug(f'Filled missing angle parameters for atoms {atom1}-{atom2}-{atom3} from force field')
456
+
457
+ # Fill missing dihedral parameters
458
+ for dihedral in self.qm_interactions['dihedrals']:
459
+ if None in dihedral['parameters']:
460
+ atom1, atom2, atom3, atom4 = dihedral['atoms']
461
+ type1 = qm_atom_types.get(atom1)
462
+ type2 = qm_atom_types.get(atom2)
463
+ type3 = qm_atom_types.get(atom3)
464
+ type4 = qm_atom_types.get(atom4)
465
+
466
+ if type1 and type2 and type3 and type4:
467
+ # Try different orderings of atom types for dihedrals
468
+ keys = [
469
+ f"{type1}-{type2}-{type3}-{type4}",
470
+ f"{type4}-{type3}-{type2}-{type1}"
471
+ ]
472
+
473
+ dihedral_params = None
474
+ for ff_name in available_force_fields:
475
+ ff_dihedraltypes = self.top.topol_dict.get_force_field_dihedraltypes(ff_name)
476
+
477
+ # First try exact matches
478
+ for key in keys:
479
+ if key in ff_dihedraltypes:
480
+ dihedral_params = ff_dihedraltypes[key]
481
+ break
482
+
483
+ # If no exact match, try wildcard matches
484
+ if not dihedral_params:
485
+ dihedral_params = self._find_wildcard_dihedral_params(
486
+ type1, type2, type3, type4, ff_dihedraltypes
487
+ )
488
+
489
+ if dihedral_params:
490
+ break
491
+
492
+ if dihedral_params:
493
+ func = dihedral['function']
494
+ if func in [1, 4, 9]: # Format 1
495
+ dihedral['parameters'] = [
496
+ np.deg2rad(dihedral_params['phi0']),
497
+ dihedral_params['cp'] * kjm_au,
498
+ dihedral_params['mult']
499
+ ]
500
+ elif func == 2: # Format 2
501
+ dihedral['parameters'] = [
502
+ dihedral_params['param1'],
503
+ dihedral_params['param2'] * kjm_au
504
+ ]
505
+ elif func == 3: # Format 3
506
+ # For format 3, the parameters are stored as a list
507
+ params = dihedral_params['params']
508
+ dihedral['parameters'] = [p * kjm_au for p in params]
509
+ dihedral['source'] = f'force_field_{ff_name}'
510
+ logging.debug(f'Filled missing dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} (types: {type1}-{type2}-{type3}-{type4}) from force field')
511
+ else:
512
+ # Add debugging information for unfilled dihedrals
513
+ logging.debug(f'Could not find dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} (types: {type1}-{type2}-{type3}-{type4}) in any force field')
514
+
515
+ # If not found in dihedraltypes, try to resolve named parameters
516
+ if not dihedral_params:
517
+ parameter_definitions = self.top.topol_dict.get_parameter_definitions()
518
+ if parameter_definitions:
519
+ # Try to resolve named parameters by matching residue and atom names
520
+ # Get residue and atom information for the dihedral atoms
521
+ resolved_params = self._resolve_named_dihedral_parameters(
522
+ atom1, atom2, atom3, atom4,
523
+ dihedral['function'],
524
+ parameter_definitions
525
+ )
526
+ if resolved_params:
527
+ dihedral['parameters'] = resolved_params
528
+ dihedral['source'] = 'named_parameters'
529
+ logging.debug(f'Filled missing dihedral parameters for atoms {atom1}-{atom2}-{atom3}-{atom4} from named parameters')
530
+
531
+ def _resolve_named_dihedral_parameters(self, atom1_idx, atom2_idx, atom3_idx, atom4_idx, func, parameter_definitions):
532
+ """Resolve named dihedral parameters by matching residue and atom names
533
+
534
+ Args:
535
+ atom1_idx, atom2_idx, atom3_idx, atom4_idx: Sequential QM atom indices
536
+ func: Dihedral function type
537
+ parameter_definitions: Dictionary of parameter definitions
538
+
539
+ Returns:
540
+ list: Resolved parameters or None if not found
541
+ """
542
+ # Convert sequential indices back to GROMACS indices
543
+ gmx_idx1 = self.qm_atoms.index[atom1_idx]
544
+ gmx_idx2 = self.qm_atoms.index[atom2_idx]
545
+ gmx_idx3 = self.qm_atoms.index[atom3_idx]
546
+ gmx_idx4 = self.qm_atoms.index[atom4_idx]
547
+
548
+ # Get residue and atom information for each atom
549
+ atom_info = []
550
+ for gmx_idx in [gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]:
551
+ # Find which molecule this atom belongs to
552
+ for mol, n_mols in self.top.molecules:
553
+ mol_size = len(self.top.topol_dict[mol])
554
+ mol_offset = 0
555
+ for mol_idx in range(n_mols):
556
+ mol_start = mol_offset + 1
557
+ mol_end = mol_offset + mol_size
558
+ if mol_start <= gmx_idx <= mol_end:
559
+ # Found the molecule, get atom info
560
+ local_idx = gmx_idx - mol_start + 1
561
+ atom_row = self.top.topol_dict[mol].loc[local_idx]
562
+ atom_info.append({
563
+ 'resname': atom_row['resname'],
564
+ 'name': atom_row['name']
565
+ })
566
+ break
567
+ mol_offset += mol_size
568
+ if len(atom_info) == len([gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]):
569
+ break
570
+ if len(atom_info) == len([gmx_idx1, gmx_idx2, gmx_idx3, gmx_idx4]):
571
+ break
572
+
573
+ if len(atom_info) != 4:
574
+ return None
575
+
576
+ # Try to match with parameter definitions
577
+ # Format: torsion_RES_ATOM1_ATOM2_ATOM3_ATOM4_multN
578
+ resname = atom_info[0]['resname'] # Use first atom's residue
579
+ atom_names = [info['name'] for info in atom_info]
580
+
581
+ # Try different combinations of atom names
582
+ # For dihedrals, we need to try both forward and reverse orderings
583
+ name_combinations = [
584
+ f"torsion_{resname}_{atom_names[0]}_{atom_names[1]}_{atom_names[2]}_{atom_names[3]}",
585
+ f"torsion_{resname}_{atom_names[3]}_{atom_names[2]}_{atom_names[1]}_{atom_names[0]}"
586
+ ]
587
+
588
+ # Look for matching parameter definitions
589
+ for base_name in name_combinations:
590
+ # Try different multiplicities (mult1, mult2, mult3, etc.)
591
+ for mult in range(1, 7): # Usually up to mult6
592
+ param_name = f"{base_name}_mult{mult}"
593
+ if param_name in parameter_definitions:
594
+ param_values = parameter_definitions[param_name]
595
+ if len(param_values) >= 3:
596
+ # Parse the parameter values
597
+ try:
598
+ phi0 = float(param_values[0])
599
+ force_constant = float(param_values[1])
600
+ multiplicity = int(param_values[2])
601
+
602
+ # Convert to appropriate units based on function type
603
+ if func in [1, 4, 9]: # Format 1
604
+ return [
605
+ np.deg2rad(phi0), # Convert to radians
606
+ force_constant * kjm_au, # Convert to atomic units
607
+ multiplicity
608
+ ]
609
+ elif func == 2: # Format 2
610
+ return [
611
+ phi0, # Keep as is
612
+ force_constant * kjm_au # Convert to atomic units
613
+ ]
614
+ elif func == 3: # Format 3
615
+ # For format 3, we might need to expand the parameters
616
+ # This is more complex and depends on the specific force field
617
+ logging.debug(f'Format 3 dihedral with named parameters not fully implemented for {param_name}')
618
+ return None
619
+ except (ValueError, IndexError) as e:
620
+ logging.debug(f'Error parsing parameter values for {param_name}: {e}')
621
+ continue
622
+
623
+ return None
624
+
625
+ def _find_wildcard_dihedral_params(self, type1, type2, type3, type4, ff_dihedraltypes):
626
+ """Find dihedral parameters using hierarchical wildcard matching
627
+
628
+ This function implements a hierarchy of specificity for dihedral parameter matching:
629
+ 1. Exact match (most specific)
630
+ 2. Single wildcard replacements (X or *)
631
+ 3. Double wildcard replacements
632
+ 4. Triple wildcard replacements
633
+ 5. All wildcards (least specific)
634
+
635
+ For peptide bonds and similar cases, this ensures that more specific parameters
636
+ (like X-C-N-X for peptide bonds) are used instead of generic ones.
637
+
638
+ Args:
639
+ type1, type2, type3, type4: Atom types
640
+ ff_dihedraltypes: Dictionary of dihedral types from force field
641
+
642
+ Returns:
643
+ dict: Dihedral parameters or None if not found
644
+ """
645
+ # Try forward and reverse orderings
646
+ type_combinations = [
647
+ [type1, type2, type3, type4],
648
+ [type4, type3, type2, type1]
649
+ ]
650
+
651
+ best_match = None
652
+ best_specificity = -1 # Higher number = more specific
653
+
654
+ for types in type_combinations:
655
+ # 1. Try exact match (specificity = 4)
656
+ key = f"{types[0]}-{types[1]}-{types[2]}-{types[3]}"
657
+ if key in ff_dihedraltypes:
658
+ logging.debug(f'Found exact match for dihedral {key}')
659
+ return ff_dihedraltypes[key] # Most specific, return immediately
660
+
661
+ # 2. Try single wildcard replacements (specificity = 3)
662
+ for i in range(4):
663
+ for wildcard in ['X', '*']:
664
+ wildcard_types = types.copy()
665
+ wildcard_types[i] = wildcard
666
+ key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
667
+ if key in ff_dihedraltypes:
668
+ if best_specificity < 3:
669
+ best_match = ff_dihedraltypes[key]
670
+ best_specificity = 3
671
+ logging.debug(f'Found single wildcard match for dihedral {key} (specificity=3)')
672
+
673
+ # 3. Try double wildcard replacements (specificity = 2)
674
+ for i in range(4):
675
+ for j in range(i+1, 4):
676
+ for wildcard in ['X', '*']:
677
+ wildcard_types = types.copy()
678
+ wildcard_types[i] = wildcard
679
+ wildcard_types[j] = wildcard
680
+ key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
681
+ if key in ff_dihedraltypes:
682
+ if best_specificity < 2:
683
+ best_match = ff_dihedraltypes[key]
684
+ best_specificity = 2
685
+ logging.debug(f'Found double wildcard match for dihedral {key} (specificity=2)')
686
+
687
+ # 4. Try triple wildcard replacements (specificity = 1)
688
+ for i in range(4):
689
+ for j in range(i+1, 4):
690
+ for k in range(j+1, 4):
691
+ for wildcard in ['X', '*']:
692
+ wildcard_types = types.copy()
693
+ wildcard_types[i] = wildcard
694
+ wildcard_types[j] = wildcard
695
+ wildcard_types[k] = wildcard
696
+ key = f"{wildcard_types[0]}-{wildcard_types[1]}-{wildcard_types[2]}-{wildcard_types[3]}"
697
+ if key in ff_dihedraltypes:
698
+ if best_specificity < 1:
699
+ best_match = ff_dihedraltypes[key]
700
+ best_specificity = 1
701
+ logging.debug(f'Found triple wildcard match for dihedral {key} (specificity=1)')
702
+
703
+ # 5. Try all wildcards (specificity = 0)
704
+ for wildcard in ['X', '*']:
705
+ key = f"{wildcard}-{wildcard}-{wildcard}-{wildcard}"
706
+ if key in ff_dihedraltypes:
707
+ if best_specificity < 0:
708
+ best_match = ff_dihedraltypes[key]
709
+ best_specificity = 0
710
+ logging.debug(f'Found all wildcard match for dihedral {key} (specificity=0)')
711
+
712
+ return best_match
713
+
714
+
715
+
716
+ def extract_qm_interactions(self):
717
+ """Extract all bonded interactions involving QM atoms from the topology
718
+
719
+ This function handles both molecule-specific interactions (defined in .itp files)
720
+ and force field interactions (defined in force field files and stored in topol_dict).
721
+ It includes interactions where at least one atom is in the QM region.
722
+
723
+ Returns:
724
+ dict: Dictionary containing bonds, angles, and dihedrals involving QM atoms
725
+ """
726
+ if not self.qm_atoms is not None:
727
+ raise MiMiCPyError('No QM atoms defined')
728
+
729
+
730
+
731
+ # Get QM atom indices for checking if atoms are QM
732
+ qm_indices = set(self.qm_atoms.index)
733
+
734
+ # Get boundary atoms if needed
735
+ boundary_indices = set()
736
+ if not self.boundary_atoms.empty:
737
+ boundary_indices = set(self.boundary_atoms.index)
738
+
739
+ # Get extended QM atoms DataFrame (includes both QM and MM atoms)
740
+ self.extended_qm_atoms = self.get_extended_qm_atoms_dataframe()
741
+
742
+ # Create mapping from GROMACS atom indices to sequential indices in extended DataFrame
743
+ # This maps GROMACS indices to sequential indices (0, 1, 2, ...) in the extended array
744
+ self.gmx_to_seq_map = {}
745
+ for seq_idx, gmx_idx in enumerate(self.extended_qm_atoms.index):
746
+ self.gmx_to_seq_map[gmx_idx] = seq_idx
747
+
748
+ else:
749
+ self.gmx_to_seq_map = {gmx_idx: idx for idx, gmx_idx in enumerate(self.qm_atoms.index)}
750
+
751
+ qm_interactions = {
752
+ 'bonds': [],
753
+ 'angles': [],
754
+ 'dihedrals': []
755
+ }
756
+
757
+ bonded_count = 0
758
+ prev_natoms = 0
759
+ for mol, n_mols in self.top.molecules:
760
+ # Get interactions for this molecule
761
+ mol_bonds = self.top.topol_dict.get_bonds(mol)
762
+ mol_angles = self.top.topol_dict.get_angles(mol)
763
+ mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
764
+
765
+ # Get atom types for this molecule
766
+ mol_atoms = self.top.topol_dict[mol]
767
+
768
+ # Process bonds
769
+ if mol_bonds:
770
+ for j in range(len(mol_bonds[0])):
771
+ i_idx = mol_bonds[0][j] + prev_natoms
772
+ j_idx = mol_bonds[1][j] + prev_natoms
773
+ # Include bonds where at least one atom is in QM region
774
+ if i_idx in qm_indices or j_idx in qm_indices:
775
+ # Convert GROMACS indices to sequential indices in extended array
776
+ seq_i = self.gmx_to_seq_map[i_idx]
777
+ seq_j = self.gmx_to_seq_map[j_idx]
778
+
779
+ # Check if bond involves boundary atoms
780
+ involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices) if not self.boundary_atoms.empty else False
781
+
782
+ # Handle None parameters (bonds without explicit parameters)
783
+ param1 = mol_bonds[3][j]
784
+ param2 = mol_bonds[4][j]
785
+
786
+ if param1 is not None and param2 is not None:
787
+ parameters = [param1 * nm_to_au, param2 * kb_gmx2au]
788
+ else:
789
+ # Parameters not available, will be filled from force field later
790
+ parameters = [None, None]
791
+
792
+
793
+ qm_interactions['bonds'].append({
794
+ 'atoms': [seq_i, seq_j], # Use sequential indices
795
+ 'function': mol_bonds[2][j],
796
+ 'parameters': parameters,
797
+ 'index': bonded_count,
798
+ 'optimize': not involves_boundary, # Don't optimize if involves boundary
799
+ 'involves_boundary': involves_boundary,
800
+ 'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices),
801
+ 'molecule': mol
802
+ })
803
+ bonded_count += 1
804
+
805
+ # Process angles
806
+ if mol_angles:
807
+ for j in range(len(mol_angles[0])):
808
+ i_idx = mol_angles[0][j] + prev_natoms
809
+ j_idx = mol_angles[1][j] + prev_natoms
810
+ k_idx = mol_angles[2][j] + prev_natoms
811
+ # Include angles where at least one atom is in QM region
812
+ if i_idx in qm_indices or j_idx in qm_indices or k_idx in qm_indices:
813
+ # Convert GROMACS indices to sequential indices in extended array
814
+ seq_i = self.gmx_to_seq_map[i_idx]
815
+ seq_j = self.gmx_to_seq_map[j_idx]
816
+ seq_k = self.gmx_to_seq_map[k_idx]
817
+
818
+ # Check if angle involves boundary atoms
819
+ involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices or k_idx in boundary_indices) if not self.boundary_atoms.empty else False
820
+
821
+ # Handle None parameters (angles without explicit parameters)
822
+ param1 = mol_angles[4][j]
823
+ param2 = mol_angles[5][j]
824
+
825
+ if param1 is not None and param2 is not None:
826
+ parameters = [np.deg2rad(param1), param2 * kjm_au]
827
+ else:
828
+ # Parameters not available, will be filled from force field later
829
+ parameters = [None, None]
830
+
831
+ qm_interactions['angles'].append({
832
+ 'atoms': [seq_i, seq_j, seq_k], # Use sequential indices
833
+ 'function': mol_angles[3][j],
834
+ 'parameters': parameters,
835
+ 'index': bonded_count,
836
+ 'optimize': not involves_boundary, # Don't optimize if involves boundary
837
+ 'involves_boundary': involves_boundary,
838
+ 'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices),
839
+ 'molecule': mol
840
+ })
841
+ bonded_count += 1
842
+
843
+ # Process dihedrals
844
+ if mol_dihedrals:
845
+ for j in range(len(mol_dihedrals[0])):
846
+ i_idx = mol_dihedrals[0][j] + prev_natoms
847
+ j_idx = mol_dihedrals[1][j] + prev_natoms
848
+ k_idx = mol_dihedrals[2][j] + prev_natoms
849
+ l_idx = mol_dihedrals[3][j] + prev_natoms
850
+ # Include dihedrals where at least one atom is in QM region
851
+ if i_idx in qm_indices or j_idx in qm_indices or k_idx in qm_indices or l_idx in qm_indices:
852
+ # Convert GROMACS indices to sequential indices in extended array
853
+ seq_i = self.gmx_to_seq_map[i_idx]
854
+ seq_j = self.gmx_to_seq_map[j_idx]
855
+ seq_k = self.gmx_to_seq_map[k_idx]
856
+ seq_l = self.gmx_to_seq_map[l_idx]
857
+
858
+ # Check if dihedral involves boundary atoms
859
+ involves_boundary = (i_idx in boundary_indices or j_idx in boundary_indices or k_idx in boundary_indices or l_idx in boundary_indices) if not self.boundary_atoms.empty else False
860
+
861
+ func = mol_dihedrals[4][j]
862
+ if func in [1, 4, 9]: # Format 1
863
+ # Handle None parameters for dihedrals
864
+ phi0 = mol_dihedrals[5][j]
865
+ cp = mol_dihedrals[6][j]
866
+ mult = mol_dihedrals[7][j]
867
+
868
+ if phi0 is not None and cp is not None and mult is not None:
869
+ parameters = [np.deg2rad(phi0), cp * kjm_au, mult]
870
+ else:
871
+ parameters = [None, None, None]
872
+
873
+ qm_interactions['dihedrals'].append({
874
+ 'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
875
+ 'function': func,
876
+ 'parameters': parameters,
877
+ 'index': bonded_count,
878
+ 'optimize': not involves_boundary, # Don't optimize if involves boundary
879
+ 'involves_boundary': involves_boundary,
880
+ 'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
881
+ 'molecule': mol
882
+ })
883
+ elif func == 2: # Format 2
884
+ # Handle None parameters for dihedrals
885
+ p1 = mol_dihedrals[8][j]
886
+ p2 = mol_dihedrals[9][j]
887
+
888
+ if p1 is not None and p2 is not None:
889
+ parameters = [p1, p2 * kjm_au]
890
+ else:
891
+ parameters = [None, None]
892
+
893
+ qm_interactions['dihedrals'].append({
894
+ 'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
895
+ 'function': func,
896
+ 'parameters': parameters,
897
+ 'index': bonded_count,
898
+ 'optimize': not involves_boundary, # Don't optimize if involves boundary
899
+ 'involves_boundary': involves_boundary,
900
+ 'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
901
+ 'molecule': mol
902
+ })
903
+ elif func == 3: # Format 3
904
+ # Handle None parameters for dihedrals
905
+ c_params = [mol_dihedrals[10+k][j] for k in range(6)]
906
+ if all(p is not None for p in c_params):
907
+ parameters = [p * kjm_au for p in c_params]
908
+ else:
909
+ parameters = [None] * 6
910
+
911
+ qm_interactions['dihedrals'].append({
912
+ 'atoms': [seq_i, seq_j, seq_k, seq_l], # Use sequential indices
913
+ 'function': func,
914
+ 'parameters': parameters,
915
+ 'index': bonded_count,
916
+ 'optimize': not involves_boundary, # Don't optimize if involves boundary
917
+ 'involves_boundary': involves_boundary,
918
+ 'involves_mm': (i_idx not in qm_indices or j_idx not in qm_indices or k_idx not in qm_indices or l_idx not in qm_indices),
919
+ 'molecule': mol
920
+ })
921
+ bonded_count += 1
922
+
923
+ prev_natoms += len(self.top.topol_dict[mol]) * n_mols
924
+
925
+ self.qm_interactions = qm_interactions
926
+
927
+ # Log statistics
928
+ total_bonds = len(qm_interactions["bonds"])
929
+ total_angles = len(qm_interactions["angles"])
930
+ total_dihedrals = len(qm_interactions["dihedrals"])
931
+
932
+ if not self.boundary_atoms.empty:
933
+ boundary_bonds = sum(1 for bond in qm_interactions["bonds"] if bond['involves_boundary'])
934
+ boundary_angles = sum(1 for angle in qm_interactions["angles"] if angle['involves_boundary'])
935
+ boundary_dihedrals = sum(1 for dihedral in qm_interactions["dihedrals"] if dihedral['involves_boundary'])
936
+
937
+ logging.info(f'Extracted QM interactions: {total_bonds} bonds ({boundary_bonds} boundary), '
938
+ f'{total_angles} angles ({boundary_angles} boundary), '
939
+ f'{total_dihedrals} dihedrals ({boundary_dihedrals} boundary)')
940
+ else:
941
+ logging.info(f'Extracted QM interactions: {total_bonds} bonds, '
942
+ f'{total_angles} angles, '
943
+ f'{total_dihedrals} dihedrals')
944
+
945
+ # Fill missing parameters from force field
946
+ self._fill_missing_parameters_from_force_field()
947
+
948
+ return self.qm_interactions
949
+
950
+ def gmx_to_cpmd_idx(self):
951
+ """Convert GROMACS atom indices to CPMD indices for all atoms (QM and MM),
952
+ using the same logic as CpmdScript.gmx_to_cpmd_idx.
953
+
954
+ Returns:
955
+ dict: Dictionary mapping GROMACS atom indices to CPMD indices
956
+ """
957
+ if not self.qm_atoms is not None:
958
+ raise MiMiCPyError('No QM atoms defined')
959
+
960
+ # Get topology data from Mpt object
961
+ topol = self.mpt.select('all')
962
+ typelist = topol['type'].unique() # get list of types used in order
963
+
964
+ # Get atomtypes section from GROMACS topology
965
+ atomtypes = self.top.atom_types_df.set_index(['type']).loc[typelist]
966
+
967
+ # Convert atomtypes to type ID
968
+ type_id = {}
969
+ count = 0
970
+ for i, rowi in atomtypes.iterrows():
971
+ for j, rowj in atomtypes.iterrows():
972
+ if i == j:
973
+ type_id[i] = count
974
+ count += 1
975
+ break
976
+ if (rowi['X'] == rowj['X']) and isclose(rowi['sigma'], rowj['sigma']) and isclose(rowi['epsilon'], rowj['epsilon']):
977
+ type_id[i] = type_id[j]
978
+ break
979
+
980
+ topol.insert(2, "type_id", [type_id[i] for i in topol['type']], True)
981
+
982
+ # get Gromacs IDs to QM atoms & push their type_ids first
983
+ overlaps = []
984
+ if hasattr(self, 'prep') and self.prep is not None:
985
+ # Try to get overlaps from the same logic as CPMD input
986
+ sorted_qm_atoms = self.qm_atoms.sort_values(by=['is_bound', 'element']).reset_index()
987
+ natms = len(sorted_qm_atoms)
988
+ for i, row in sorted_qm_atoms.iterrows():
989
+ idx = row['id']
990
+ topol.at[idx, 'type_id'] = -natms + i
991
+
992
+ # sort topol by type_id and gromacs id and get a dict of Gromacs IDs to CPMD IDs
993
+ topol = topol.reset_index()
994
+ topol.sort_values(by=['type_id', 'id'], inplace=True)
995
+ cpmd_idx = list(range(1, self.mpt.number_of_atoms + 1))
996
+ gmx_idx = topol['id'].to_list()
997
+ return dict(zip(gmx_idx, cpmd_idx))
998
+
999
+ def update_topology(self, ff_optimize, bond2params):
1000
+ """Update the system's topology with optimized parameters from force matching
1001
+
1002
+ Args:
1003
+ ff_optimize (numpy.ndarray): Array of optimized force field parameters
1004
+ bond2params (dict): Mapping of interaction indices to parameter indices in ff_optimize
1005
+
1006
+ Returns:
1007
+ bool: True if update was successful, False otherwise
1008
+ """
1009
+ if not hasattr(self, 'qm_interactions'):
1010
+ raise MiMiCPyError('No QM interactions defined. Call extract_qm_interactions() first.')
1011
+
1012
+ try:
1013
+ # Get QM interactions
1014
+ qm_interactions = self.qm_interactions
1015
+
1016
+ # Update bonds
1017
+ for bond in qm_interactions['bonds']:
1018
+ params_idx = bond2params.get(bond['index'])
1019
+ if params_idx is not None:
1020
+ if params_idx[0] is not None: # Length parameter
1021
+ bond['parameters'][0] = ff_optimize[params_idx[0]] * au_to_nm
1022
+ else:
1023
+ bond['parameters'][0] = bond['parameters'][0] * au_to_nm # Convert non-optimized parameter
1024
+ if params_idx[1] is not None: # Force constant
1025
+ bond['parameters'][1] = ff_optimize[params_idx[1]] * kb_au2gmx
1026
+ else:
1027
+ bond['parameters'][1] = bond['parameters'][1] * kb_au2gmx # Convert non-optimized parameter
1028
+
1029
+ # Update angles
1030
+ for angle in qm_interactions['angles']:
1031
+ params_idx = bond2params.get(angle['index'])
1032
+ if params_idx is not None:
1033
+ if params_idx[0] is not None: # Angle parameter
1034
+ angle['parameters'][0] = np.rad2deg(np.arccos(np.cos(ff_optimize[params_idx[0]])))
1035
+ else:
1036
+ angle['parameters'][0] = np.rad2deg(np.arccos(np.cos(angle['parameters'][0]))) # Convert non-optimized parameter
1037
+ if params_idx[1] is not None: # Force constant
1038
+ angle['parameters'][1] = ff_optimize[params_idx[1]] * au_kjm
1039
+ else:
1040
+ angle['parameters'][1] = angle['parameters'][1] * au_kjm # Convert non-optimized parameter
1041
+
1042
+ # Update dihedrals
1043
+ for dihedral in qm_interactions['dihedrals']:
1044
+ params_idx = bond2params.get(dihedral['index'])
1045
+ if params_idx is not None:
1046
+ if dihedral['function'] in [1, 4, 9]: # Format 1
1047
+ if params_idx[0] is not None: # phi0
1048
+ dihedral['parameters'][0] = np.rad2deg(ff_optimize[params_idx[0]])
1049
+ else:
1050
+ dihedral['parameters'][0] = np.rad2deg(dihedral['parameters'][0]) # Convert non-optimized parameter
1051
+ if params_idx[1] is not None: # cp (force constant)
1052
+ dihedral['parameters'][1] = ff_optimize[params_idx[1]] * au_kjm
1053
+ else:
1054
+ dihedral['parameters'][1] = dihedral['parameters'][1] * au_kjm # Convert non-optimized parameter
1055
+ if params_idx[2] is not None: # mult
1056
+ dihedral['parameters'][2] = ff_optimize[params_idx[2]]
1057
+ elif dihedral['function'] == 2: # Format 2
1058
+ if params_idx[0] is not None: # param1
1059
+ dihedral['parameters'][0] = ff_optimize[params_idx[0]]
1060
+ if params_idx[1] is not None: # param2 (force constant)
1061
+ dihedral['parameters'][1] = ff_optimize[params_idx[1]]
1062
+ elif dihedral['function'] == 3: # Format 3
1063
+ for i in range(6):
1064
+ if params_idx[i] is not None:
1065
+ dihedral['parameters'][i] = ff_optimize[params_idx[i]] * au_kjm
1066
+ else:
1067
+ dihedral['parameters'][i] = dihedral['parameters'][i] * au_kjm # Convert non-optimized parameter
1068
+ # Update the topology dictionary with new parameters
1069
+ prev_natoms = 0
1070
+
1071
+ # Create reverse mapping from sequential indices back to GROMACS indices
1072
+ if self.boundary_atoms.empty:
1073
+ seq_to_gmx_map = {}
1074
+ for seq_idx, gmx_idx in enumerate(self.qm_atoms.index):
1075
+ seq_to_gmx_map[seq_idx] = gmx_idx
1076
+ else:
1077
+ seq_to_gmx_map = {}
1078
+ for seq_idx, gmx_idx in enumerate(self.extended_qm_atoms.index):
1079
+ seq_to_gmx_map[seq_idx] = gmx_idx
1080
+
1081
+ for mol, n_mols in self.top.molecules:
1082
+ # Update bonds
1083
+ mol_bonds = self.top.topol_dict.get_bonds(mol)
1084
+ if mol_bonds:
1085
+ for j in range(len(mol_bonds[0])):
1086
+ i_idx = mol_bonds[0][j] + prev_natoms
1087
+ j_idx = mol_bonds[1][j] + prev_natoms
1088
+ # Find matching bond in qm_interactions
1089
+ for bond in qm_interactions['bonds']:
1090
+ # Convert sequential indices back to GROMACS indices for comparison
1091
+ bond_i_gmx = seq_to_gmx_map.get(bond['atoms'][0], bond['atoms'][0] + 1)
1092
+ bond_j_gmx = seq_to_gmx_map.get(bond['atoms'][1], bond['atoms'][1] + 1)
1093
+
1094
+ if (bond_i_gmx == i_idx and bond_j_gmx == j_idx) or \
1095
+ (bond_i_gmx == j_idx and bond_j_gmx == i_idx):
1096
+ mol_bonds[3][j] = bond['parameters'][0] # param1
1097
+ mol_bonds[4][j] = bond['parameters'][1] # param2
1098
+ break
1099
+
1100
+ # Update angles
1101
+ mol_angles = self.top.topol_dict.get_angles(mol)
1102
+ if mol_angles:
1103
+ for j in range(len(mol_angles[0])):
1104
+ i_idx = mol_angles[0][j] + prev_natoms
1105
+ j_idx = mol_angles[1][j] + prev_natoms
1106
+ k_idx = mol_angles[2][j] + prev_natoms
1107
+ # Find matching angle in qm_interactions
1108
+ for angle in qm_interactions['angles']:
1109
+ # Convert sequential indices back to GROMACS indices for comparison
1110
+ angle_i_gmx = seq_to_gmx_map.get(angle['atoms'][0], angle['atoms'][0] + 1)
1111
+ angle_j_gmx = seq_to_gmx_map.get(angle['atoms'][1], angle['atoms'][1] + 1)
1112
+ angle_k_gmx = seq_to_gmx_map.get(angle['atoms'][2], angle['atoms'][2] + 1)
1113
+
1114
+ if (angle_i_gmx == i_idx and
1115
+ angle_j_gmx == j_idx and
1116
+ angle_k_gmx == k_idx):
1117
+ mol_angles[4][j] = angle['parameters'][0] # param1
1118
+ mol_angles[5][j] = angle['parameters'][1] # param2
1119
+ break
1120
+
1121
+ # Update dihedrals
1122
+ mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
1123
+ if mol_dihedrals:
1124
+ for j in range(len(mol_dihedrals[0])):
1125
+ i_idx = mol_dihedrals[0][j] + prev_natoms
1126
+ j_idx = mol_dihedrals[1][j] + prev_natoms
1127
+ k_idx = mol_dihedrals[2][j] + prev_natoms
1128
+ l_idx = mol_dihedrals[3][j] + prev_natoms
1129
+ # Find matching dihedral in qm_interactions
1130
+ for dihedral in qm_interactions['dihedrals']:
1131
+ # Convert sequential indices back to GROMACS indices for comparison
1132
+ dihedral_i_gmx = seq_to_gmx_map.get(dihedral['atoms'][0], dihedral['atoms'][0] + 1)
1133
+ dihedral_j_gmx = seq_to_gmx_map.get(dihedral['atoms'][1], dihedral['atoms'][1] + 1)
1134
+ dihedral_k_gmx = seq_to_gmx_map.get(dihedral['atoms'][2], dihedral['atoms'][2] + 1)
1135
+ dihedral_l_gmx = seq_to_gmx_map.get(dihedral['atoms'][3], dihedral['atoms'][3] + 1)
1136
+
1137
+ if (dihedral_i_gmx == i_idx and
1138
+ dihedral_j_gmx == j_idx and
1139
+ dihedral_k_gmx == k_idx and
1140
+ dihedral_l_gmx == l_idx):
1141
+ func = dihedral['function']
1142
+ if func in [1, 4, 9]: # Format 1
1143
+ mol_dihedrals[5][j] = dihedral['parameters'][0] # phi0
1144
+ mol_dihedrals[6][j] = dihedral['parameters'][1] # cp
1145
+ mol_dihedrals[7][j] = dihedral['parameters'][2] # mult
1146
+ elif func == 2: # Format 2
1147
+ mol_dihedrals[8][j] = dihedral['parameters'][0] # param1
1148
+ mol_dihedrals[9][j] = dihedral['parameters'][1] # param2
1149
+ elif func == 3: # Format 3
1150
+ for k in range(6):
1151
+ mol_dihedrals[10+k][j] = dihedral['parameters'][k] # C0-C5
1152
+ break
1153
+
1154
+ prev_natoms += len(self.top.topol_dict[mol]) * n_mols
1155
+
1156
+ logging.info('Successfully updated topology with optimized QM interaction parameters')
1157
+ return True
1158
+
1159
+ except Exception as e:
1160
+ logging.error(f'Failed to update topology: {str(e)}')
1161
+ return False
1162
+
1163
+
1164
+ def update_qm_charges(self, new_charges):
1165
+ """Update the topology with new charges for QM atoms
1166
+
1167
+ Args:
1168
+ new_charges (dict or pandas.Series or np.ndarray or list): New charges for QM atoms.
1169
+ If dict, keys should be QM atom indices and values should be charges.
1170
+ If Series, index should be QM atom indices and values should be charges.
1171
+ If array or list, it should be in the same order as self.qm_atoms.index.
1172
+
1173
+ Returns:
1174
+ bool: True if update was successful, False otherwise
1175
+ """
1176
+ if not self.qm_atoms is not None:
1177
+ raise MiMiCPyError('No QM atoms defined')
1178
+
1179
+ # Convert input to pandas Series if it's a dict, numpy array, or list
1180
+ if isinstance(new_charges, dict):
1181
+ new_charges = pd.Series(new_charges)
1182
+ elif isinstance(new_charges, (list, np.ndarray)):
1183
+ new_charges = pd.Series(new_charges, index=self.qm_atoms.index)
1184
+ # Verify all QM atoms have new charges
1185
+ missing_atoms = set(self.qm_atoms.index) - set(new_charges.index)
1186
+ if missing_atoms:
1187
+ raise MiMiCPyError(f'Missing charges for QM atoms: {missing_atoms}')
1188
+
1189
+ # For each QM atom, find which molecule and which instance it belongs to, and update the charge
1190
+
1191
+
1192
+ for idx in self.qm_atoms.index:
1193
+ # Find which molecule instance this atom belongs to
1194
+ for mol, mol_offset in self.top.mol_offsets:
1195
+ mol_len = len(self.top.topol_dict[mol])
1196
+ if mol_offset + 1 <= idx <= mol_offset + mol_len:
1197
+ # DataFrame is indexed by GROMACS atom numbers (1-based)
1198
+ local_idx = idx - mol_offset
1199
+ self.top.topol_dict[mol].loc[local_idx, 'charge'] = new_charges[idx]
1200
+ break
1201
+
1202
+
1203
+ def write_non_bonded_itp(self, directory='.', prefix='non_bonded'):
1204
+ """Write a non-bonded interaction table to a GROMACS .itp file.
1205
+ This function modifies only the bonded parameters of QM atoms.
1206
+
1207
+ Args:
1208
+ directory (str): Directory to write the output .itp file
1209
+ prefix (str): Prefix for the output .itp file
1210
+
1211
+ Returns:
1212
+ bool: True if successful, False otherwise
1213
+ """
1214
+ if not self.qm_atoms is not None:
1215
+ raise MiMiCPyError('No QM atoms defined')
1216
+
1217
+
1218
+ # Get QM atom indices
1219
+ qm_indices = set(self.qm_atoms.index)
1220
+
1221
+ # Create a copy of the topology dictionary
1222
+ nonbonded_top = copy.deepcopy(self.top.topol_dict)
1223
+
1224
+ # Modify bonded parameters for QM atoms
1225
+ prev_natoms = 0
1226
+ for mol, n_mols in self.top.molecules:
1227
+ # Get interactions for this molecule
1228
+ mol_bonds = nonbonded_top.get_bonds(mol)
1229
+ mol_angles = nonbonded_top.get_angles(mol)
1230
+ mol_dihedrals = nonbonded_top.get_dihedrals(mol)
1231
+
1232
+ # Process bonds
1233
+ if mol_bonds:
1234
+ for j in range(len(mol_bonds[0])):
1235
+ i_idx = mol_bonds[0][j] + prev_natoms
1236
+ j_idx = mol_bonds[1][j] + prev_natoms
1237
+ if i_idx in qm_indices or j_idx in qm_indices:
1238
+ mol_bonds[4][j] = 0.0 # Set force constant to 0
1239
+
1240
+ # Process angles
1241
+ if mol_angles:
1242
+ for j in range(len(mol_angles[0])):
1243
+ i_idx = mol_angles[0][j] + prev_natoms
1244
+ j_idx = mol_angles[1][j] + prev_natoms
1245
+ k_idx = mol_angles[2][j] + prev_natoms
1246
+ if any(idx in qm_indices for idx in [i_idx, j_idx, k_idx]):
1247
+ mol_angles[5][j] = 0.0 # Set force constant to 0
1248
+
1249
+
1250
+ # Process dihedrals
1251
+ if mol_dihedrals:
1252
+ for j in range(len(mol_dihedrals[0])):
1253
+ i_idx = mol_dihedrals[0][j] + prev_natoms
1254
+ j_idx = mol_dihedrals[1][j] + prev_natoms
1255
+ k_idx = mol_dihedrals[2][j] + prev_natoms
1256
+ l_idx = mol_dihedrals[3][j] + prev_natoms
1257
+ if any(idx in qm_indices for idx in [i_idx, j_idx, k_idx, l_idx]):
1258
+ func = mol_dihedrals[4][j]
1259
+ if func == 3: # Format 3
1260
+ # Set all coefficients C0-C5 to 0
1261
+ for k in range(10, 16):
1262
+ mol_dihedrals[k][j] = 0.0
1263
+ elif func in [1, 4, 9, 2]: # Format 1, 4, or 9
1264
+ mol_dihedrals[6][j] = 0.0 # Set force constant (cp) to 0
1265
+
1266
+
1267
+ prev_natoms += len(nonbonded_top[mol]) * n_mols
1268
+ # Write the modified topology to file
1269
+ self.write_topology(directory=directory, prefix=prefix, topol_dict=nonbonded_top)
1270
+
1271
+
1272
+ def get_equivalent_map(self, equivalent_atoms=None, use_atomtypes=False):
1273
+ """Create a mapping of equivalent atoms for QM region
1274
+
1275
+ Args:
1276
+ equivalent_atoms (dict, optional): Dictionary containing equivalent atoms in either global or local format.
1277
+ For global format: {'global': [(1,2), (3,4), ...]}
1278
+ For local format: {'local': {'mol1': [(1,2), (3,4)], 'mol2': [(1,2)]}}
1279
+ use_atomtypes (bool, optional): If True, automatically generate equivalent atom mappings based on atom types.
1280
+ This will map atoms with the same atom type to be equivalent.
1281
+
1282
+ Returns:
1283
+ dict: Mapping of equivalent atoms in contiguous QM indices (0-based)
1284
+ """
1285
+ if not self.qm_atoms is not None:
1286
+ raise MiMiCPyError('No QM atoms defined')
1287
+
1288
+ # Create a mapping from global GROMACS indices to contiguous QM indices
1289
+ global_to_qm_idx = {global_idx: qm_idx for qm_idx, global_idx in enumerate(self.qm_atoms.index)}
1290
+
1291
+ # Initialize mapping with identity mapping for all QM atoms (using contiguous indices)
1292
+ eq_mapping = {qm_idx: qm_idx for qm_idx in range(len(self.qm_atoms))}
1293
+
1294
+ if use_atomtypes:
1295
+ # Group atoms by their atom type using the type column from qm_atoms
1296
+ atomtype_groups = {}
1297
+ for qm_idx, (global_idx, row) in enumerate(self.qm_atoms.iterrows()):
1298
+ atom_type = row['type']
1299
+ if atom_type not in atomtype_groups:
1300
+ atomtype_groups[atom_type] = []
1301
+ atomtype_groups[atom_type].append(qm_idx)
1302
+
1303
+ # For each atom type group, map all atoms to the first atom in the group
1304
+ for atom_type, atoms in atomtype_groups.items():
1305
+ if len(atoms) > 1: # Only create mappings if there are multiple atoms of this type
1306
+ reference_atom = atoms[0]
1307
+ for atom in atoms[1:]:
1308
+ eq_mapping[atom] = reference_atom
1309
+
1310
+ logging.info(f'Generated equivalent atom mappings for {len(atomtype_groups)} atom types')
1311
+ return eq_mapping
1312
+
1313
+ if not equivalent_atoms:
1314
+ return eq_mapping
1315
+
1316
+ # Handle global format
1317
+ if 'global' in equivalent_atoms:
1318
+ for atom1, atom2 in equivalent_atoms['global']:
1319
+ if atom1 in global_to_qm_idx and atom2 in global_to_qm_idx:
1320
+ qm_idx1 = global_to_qm_idx[atom1]
1321
+ qm_idx2 = global_to_qm_idx[atom2]
1322
+ eq_mapping[qm_idx2] = qm_idx1
1323
+
1324
+ # Handle local format
1325
+ elif 'local' in equivalent_atoms:
1326
+ # Get all QM atom indices
1327
+ qm_indices = list(self.qm_atoms.index)
1328
+
1329
+ for mol_name, mol_eq_atoms in equivalent_atoms['local'].items():
1330
+ # Find atoms in this molecule that are in QM region
1331
+ mol_atoms = []
1332
+
1333
+ # Calculate the starting atom number for each molecule type
1334
+ prev_natoms = 0
1335
+ for mol, n_mols in self.top.molecules:
1336
+ if mol == mol_name:
1337
+ mol_len = len(self.top.topol_dict[mol])
1338
+ for i in range(n_mols):
1339
+ mol_start = prev_natoms + (i * mol_len) + 1 # 1-based indexing
1340
+ mol_end = mol_start + mol_len - 1
1341
+
1342
+ # Find QM atoms in this molecule instance
1343
+ for idx in qm_indices:
1344
+ if mol_start <= idx <= mol_end:
1345
+ # Adjust index to be molecule-local (1-based)
1346
+ local_idx = idx - mol_start + 1
1347
+ mol_atoms.append((local_idx, idx))
1348
+ break # Found the molecule, no need to continue
1349
+
1350
+ # Update prev_natoms for next molecule type
1351
+ prev_natoms += len(self.top.topol_dict[mol]) * n_mols
1352
+
1353
+ if not mol_atoms:
1354
+ logging.warning(f"No QM atoms found in molecule {mol_name}")
1355
+ continue
1356
+
1357
+ # Create mapping from local to global indices
1358
+ local_to_global = {local: global_idx for local, global_idx in mol_atoms}
1359
+ for atom1, atom2 in mol_eq_atoms:
1360
+ if atom1 not in local_to_global or atom2 not in local_to_global:
1361
+ logging.warning(f"Atom indices {atom1} or {atom2} not found in QM region for molecule {mol_name}")
1362
+ continue
1363
+
1364
+ global_atom1 = local_to_global[atom1]
1365
+ global_atom2 = local_to_global[atom2]
1366
+
1367
+ # Convert to contiguous QM indices
1368
+ if global_atom1 in global_to_qm_idx and global_atom2 in global_to_qm_idx:
1369
+ qm_idx1 = global_to_qm_idx[global_atom1]
1370
+ qm_idx2 = global_to_qm_idx[global_atom2]
1371
+ eq_mapping[qm_idx2] = qm_idx1
1372
+
1373
+ logging.info(f'Successfully created equivalent atom mapping for {len(eq_mapping)} QM atoms')
1374
+ return eq_mapping
1375
+
1376
+ def redistribute_charges_after_dresp(self, optimized_charges, num_bonds_away=2,
1377
+ fixed_charge_indices=None, charge_group_constraints=None):
1378
+ """Redistribute charges after DRESP optimization to maintain QM region neutrality and charge group constraints.
1379
+
1380
+ This function:
1381
+ 1. Identifies QM atoms that are a specified number of bonds away from boundary atoms
1382
+ 2. Replaces their optimized charges with original values
1383
+ 3. Calculates the charge difference for each charge group
1384
+ 4. Redistributes charge differences within each group to maintain group constraints
1385
+ 5. Maintains total system charge conservation
1386
+
1387
+ Args:
1388
+ optimized_charges (numpy.ndarray or list): Optimized charges from DRESP
1389
+ num_bonds_away (int): Number of bonds away from boundary atoms to restore charges
1390
+ fixed_charge_indices (set, optional): Set of atom indices whose charges should be kept at original values
1391
+ charge_group_constraints (list, optional): List of tuples (atom_indices, target_charge) for charge groups
1392
+
1393
+ Returns:
1394
+ numpy.ndarray: Redistributed charges that maintain QM region charge conservation and group constraints
1395
+ """
1396
+ if not self.qm_atoms is not None:
1397
+ raise MiMiCPyError('No QM atoms defined')
1398
+
1399
+ if len(optimized_charges) != len(self.qm_atoms):
1400
+ raise MiMiCPyError(f'Optimized charges length ({len(optimized_charges)}) does not match number of QM atoms ({len(self.qm_atoms)})')
1401
+
1402
+ # Get boundary atoms
1403
+ if self.boundary_atoms.empty:
1404
+ return np.array(optimized_charges)
1405
+
1406
+ # Get original charges
1407
+ original_charges = self.qm_atoms['charge'].values
1408
+ original_charges = np.array(original_charges)
1409
+
1410
+ # Create mapping from QM atom index to position in optimized_charges array
1411
+ qm_atom_to_array_idx = {atom_idx: array_idx for array_idx, atom_idx in enumerate(self.qm_atoms.index)}
1412
+
1413
+ bonds = self.qm_interactions['bonds']
1414
+
1415
+ # Create adjacency list for bond connectivity
1416
+ adjacency_list = {}
1417
+ for bond in bonds:
1418
+ if bond['involves_mm']:
1419
+ continue
1420
+ atom1, atom2 = bond['atoms']
1421
+ if atom1 not in adjacency_list:
1422
+ adjacency_list[atom1] = []
1423
+ if atom2 not in adjacency_list:
1424
+ adjacency_list[atom2] = []
1425
+ adjacency_list[atom1].append(atom2)
1426
+ adjacency_list[atom2].append(atom1)
1427
+
1428
+ # Find boundary atom indices in the sequential QM indexing
1429
+ boundary_indices = set()
1430
+ for boundary_idx in self.boundary_atoms.index:
1431
+ array_idx = qm_atom_to_array_idx[boundary_idx]
1432
+ boundary_indices.add(array_idx)
1433
+
1434
+ # Find atoms that are within num_bonds_away from boundary atoms (inclusive)
1435
+ atoms_to_restore = set()
1436
+
1437
+ # First, add boundary atoms themselves to be restored
1438
+ atoms_to_restore.update(boundary_indices)
1439
+ if fixed_charge_indices:
1440
+ atoms_to_restore.update(fixed_charge_indices)
1441
+
1442
+ for boundary_idx in boundary_indices:
1443
+ # Use breadth-first search to find atoms within the specified bond distance
1444
+ visited = set()
1445
+ queue = [(boundary_idx, 0)] # (atom_index, bond_distance)
1446
+
1447
+ while queue:
1448
+ current_atom, bond_distance = queue.pop(0)
1449
+
1450
+ if current_atom in visited:
1451
+ continue
1452
+
1453
+ visited.add(current_atom)
1454
+
1455
+ if bond_distance <= num_bonds_away and bond_distance > 0:
1456
+ # This atom is within num_bonds_away from boundary (but not the boundary itself)
1457
+ atoms_to_restore.add(current_atom)
1458
+
1459
+ if bond_distance < num_bonds_away:
1460
+ # Continue searching for more distant atoms
1461
+ if current_atom in adjacency_list:
1462
+ for neighbor in adjacency_list[current_atom]:
1463
+ if neighbor not in visited:
1464
+ queue.append((neighbor, bond_distance + 1))
1465
+
1466
+ if len(atoms_to_restore) == 0:
1467
+ logging.warning(f'No atoms found within {num_bonds_away} bond(s) from boundary atoms. Returning optimized charges unchanged.')
1468
+ return optimized_charges
1469
+
1470
+ # Initialize result charges with optimized charges
1471
+ redistributed_charges = optimized_charges.copy()
1472
+
1473
+ # Process charge group constraints if provided
1474
+ if charge_group_constraints:
1475
+ # Create a mapping from atom index to group info
1476
+ atom_to_group = {}
1477
+ for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
1478
+ for atom_idx in atom_indices:
1479
+ atom_to_group[atom_idx] = (group_key, target_charge)
1480
+
1481
+ # Process each charge group separately
1482
+ for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
1483
+ # Find atoms in this group that need to be restored
1484
+ group_atoms_to_restore = atoms_to_restore.intersection(atom_indices)
1485
+ group_atoms_to_optimize = atom_indices - atoms_to_restore
1486
+
1487
+ if len(group_atoms_to_restore) == 0:
1488
+ # No atoms in this group need restoration, skip
1489
+ continue
1490
+
1491
+ # Calculate charge difference for this group
1492
+ group_charge_diff = 0.0
1493
+ for atom_idx in group_atoms_to_restore:
1494
+ original_charge = original_charges[atom_idx]
1495
+ optimized_charge = optimized_charges[atom_idx]
1496
+
1497
+ # Restore original charge
1498
+ redistributed_charges[atom_idx] = original_charge
1499
+
1500
+ # Add to group charge difference
1501
+ charge_diff = optimized_charge - original_charge
1502
+ group_charge_diff += charge_diff
1503
+
1504
+ # Log the restoration
1505
+ actual_atom_idx = list(self.qm_atoms.index)[atom_idx]
1506
+ logging.debug(f'Group {group_key}: Restored charge for atom {actual_atom_idx}: '
1507
+ f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
1508
+
1509
+ # If there are atoms to optimize in this group, redistribute the charge difference
1510
+ if len(group_atoms_to_optimize) > 0 and abs(group_charge_diff) > 1e-10:
1511
+ # Calculate weights for redistribution within the group
1512
+ weights = np.zeros(len(self.qm_atoms))
1513
+ for atom_idx in group_atoms_to_optimize:
1514
+ weights[atom_idx] = abs(optimized_charges[atom_idx])
1515
+
1516
+ # Normalize weights
1517
+ total_weight = np.sum(weights)
1518
+ if total_weight > 0:
1519
+ weights = weights / total_weight
1520
+ else:
1521
+ # If all weights are zero, distribute equally
1522
+ weights = np.ones(len(self.qm_atoms)) / len(group_atoms_to_optimize)
1523
+ # Set weights to zero for atoms not in this group
1524
+ for i in range(len(self.qm_atoms)):
1525
+ if i not in group_atoms_to_optimize:
1526
+ weights[i] = 0.0
1527
+
1528
+ # Redistribute charge difference within the group
1529
+ charge_per_atom = group_charge_diff * weights
1530
+ redistributed_charges += charge_per_atom
1531
+
1532
+ logging.debug(f'Group {group_key}: Redistributed {group_charge_diff:.6f} charge among {len(group_atoms_to_optimize)} atoms')
1533
+
1534
+ # Verify group constraint is maintained
1535
+ group_total = sum(redistributed_charges[atom_idx] for atom_idx in atom_indices)
1536
+ if abs(group_total - target_charge) > 1e-6:
1537
+ logging.warning(f'Group {group_key} constraint violated: target={target_charge:.6f}, actual={group_total:.6f}')
1538
+
1539
+ else:
1540
+ # Original behavior: global redistribution without group constraints
1541
+ total_charge_diff = 0.0
1542
+
1543
+ # Restore original charges for atoms at specified bond distance
1544
+ for atom_idx in atoms_to_restore:
1545
+ original_charge = original_charges[atom_idx]
1546
+ optimized_charge = optimized_charges[atom_idx]
1547
+
1548
+ # Restore original charge
1549
+ redistributed_charges[atom_idx] = original_charge
1550
+
1551
+ # Add to total charge difference
1552
+ charge_diff = optimized_charge - original_charge
1553
+ total_charge_diff += charge_diff
1554
+
1555
+ # Get the actual atom index for logging
1556
+ actual_atom_idx = list(self.qm_atoms.index)[atom_idx]
1557
+ if atom_idx in boundary_indices:
1558
+ logging.debug(f'Restored charge for boundary atom {actual_atom_idx}: '
1559
+ f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
1560
+ else:
1561
+ logging.debug(f'Restored charge for atom {actual_atom_idx} (within {num_bonds_away} bond(s) from boundary): '
1562
+ f'{optimized_charge:.6f} -> {original_charge:.6f} (diff: {charge_diff:.6f})')
1563
+
1564
+ # If no charge difference, return as is
1565
+ if abs(total_charge_diff) < 1e-10:
1566
+ logging.info('No charge difference to redistribute')
1567
+ return redistributed_charges
1568
+
1569
+ # Calculate weightage for redistribution based on atom charges
1570
+ weights = np.zeros(len(self.qm_atoms))
1571
+
1572
+ for i in range(len(self.qm_atoms)):
1573
+ if i in atoms_to_restore:
1574
+ # Atoms that are being restored to original charges get zero weight
1575
+ weights[i] = 0.0
1576
+ continue
1577
+
1578
+ # Use the absolute value of the atom's charge as weightage
1579
+ weights[i] = abs(optimized_charges[i])
1580
+
1581
+ # Normalize weights
1582
+ total_weight = np.sum(weights)
1583
+ if total_weight > 0:
1584
+ weights = weights / total_weight
1585
+ else:
1586
+ # If all weights are zero, distribute equally among non-boundary atoms
1587
+ non_boundary_count = len(self.qm_atoms) - len(boundary_indices)
1588
+ if non_boundary_count > 0:
1589
+ weights = np.ones(len(self.qm_atoms)) / non_boundary_count
1590
+ # Set boundary atom weights to zero
1591
+ for boundary_idx in boundary_indices:
1592
+ weights[boundary_idx] = 0.0
1593
+ else:
1594
+ weights = np.ones(len(self.qm_atoms)) / len(self.qm_atoms)
1595
+
1596
+ # Redistribute charge difference based on weights
1597
+ charge_per_atom = total_charge_diff * weights
1598
+ redistributed_charges += charge_per_atom
1599
+
1600
+ # Verify total charge conservation
1601
+ original_total = np.sum(original_charges)
1602
+ optimized_total = np.sum(optimized_charges)
1603
+ redistributed_total = np.sum(redistributed_charges)
1604
+
1605
+ logging.info(f'Charge redistribution summary:')
1606
+ logging.info(f' Original total charge: {original_total:.6f}')
1607
+ logging.info(f' Optimized total charge: {optimized_total:.6f}')
1608
+ logging.info(f' Redistributed total charge: {redistributed_total:.6f}')
1609
+ logging.info(f' Number of boundary atoms: {len(self.boundary_atoms)}')
1610
+ logging.info(f' Bonds away from boundary (inclusive): {num_bonds_away}')
1611
+ logging.info(f' Number of atoms restored: {len(atoms_to_restore)}')
1612
+
1613
+ if charge_group_constraints:
1614
+ logging.info(f' Number of charge groups: {len(charge_group_constraints)}')
1615
+ for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
1616
+ group_total = sum(redistributed_charges[atom_idx] for atom_idx in atom_indices)
1617
+ logging.info(f' Group {group_key}: target={target_charge:.6f}, actual={group_total:.6f}')
1618
+
1619
+ # Check if total charge is conserved
1620
+ if abs(redistributed_total - optimized_total) > 1e-6:
1621
+ logging.warning(f'Total charge not conserved: difference = {redistributed_total - optimized_total:.6f}')
1622
+
1623
+ return redistributed_charges
1624
+
1625
+ def find_mm_atoms_bonded_to_qm(self):
1626
+ """Find MM atoms that participate in interactions with QM atoms
1627
+
1628
+ This method identifies MM atoms that participate in any bonded interactions
1629
+ with QM atoms (bonds, angles, dihedrals). It uses bond connectivity for
1630
+ efficiency but also checks for MM atoms in angles and dihedrals with QM atoms.
1631
+
1632
+ Returns:
1633
+ pandas.DataFrame: DataFrame similar to qm_atoms containing MM atoms in QM interactions
1634
+ """
1635
+ if not self.qm_atoms is not None:
1636
+ raise MiMiCPyError('No QM atoms defined')
1637
+
1638
+ # Get QM atom indices (1-based GROMACS indices)
1639
+ qm_indices = set(self.qm_atoms.index)
1640
+
1641
+ # Get boundary atom indices to exclude them
1642
+ boundary_indices = set()
1643
+ if not self.boundary_atoms.empty:
1644
+ boundary_indices = set(self.boundary_atoms.index)
1645
+
1646
+ # Dictionary to store MM atoms and their molecule info
1647
+ mm_atoms_info = {} # gmx_idx -> (mol, local_idx)
1648
+
1649
+ # Build bond connectivity graph for the entire system
1650
+ adjacency_list = {}
1651
+
1652
+ prev_natoms = 0
1653
+ for mol, n_mols in self.top.molecules:
1654
+ mol_bonds = self.top.topol_dict.get_bonds(mol)
1655
+
1656
+ if mol_bonds:
1657
+ for j in range(len(mol_bonds[0])):
1658
+ i_idx = mol_bonds[0][j] + prev_natoms
1659
+ j_idx = mol_bonds[1][j] + prev_natoms
1660
+
1661
+ # Add to adjacency list
1662
+ if i_idx not in adjacency_list:
1663
+ adjacency_list[i_idx] = []
1664
+ if j_idx not in adjacency_list:
1665
+ adjacency_list[j_idx] = []
1666
+ adjacency_list[i_idx].append(j_idx)
1667
+ adjacency_list[j_idx].append(i_idx)
1668
+
1669
+ prev_natoms += len(self.top.topol_dict[mol]) * n_mols
1670
+
1671
+ # Find MM atoms that participate in interactions with QM atoms
1672
+ mm_atoms_in_interactions = set()
1673
+
1674
+ # First, find MM atoms directly bonded to QM atoms
1675
+ for qm_idx in qm_indices:
1676
+ if qm_idx in adjacency_list:
1677
+ for neighbor_idx in adjacency_list[qm_idx]:
1678
+ # If neighbor is not in QM and not a boundary atom, it's an MM atom
1679
+ if neighbor_idx not in qm_indices and neighbor_idx not in boundary_indices:
1680
+ mm_atoms_in_interactions.add(neighbor_idx)
1681
+
1682
+ # Now check for MM atoms in angles and dihedrals with QM atoms
1683
+ prev_natoms = 0
1684
+ for mol, n_mols in self.top.molecules:
1685
+ mol_angles = self.top.topol_dict.get_angles(mol)
1686
+ mol_dihedrals = self.top.topol_dict.get_dihedrals(mol)
1687
+
1688
+ # Check angles
1689
+ if mol_angles:
1690
+ for j in range(len(mol_angles[0])):
1691
+ i_idx = mol_angles[0][j] + prev_natoms
1692
+ j_idx = mol_angles[1][j] + prev_natoms
1693
+ k_idx = mol_angles[2][j] + prev_natoms
1694
+
1695
+ # Check if any atom is in QM and others are in MM
1696
+ qm_count = sum(1 for idx in [i_idx, j_idx, k_idx] if idx in qm_indices)
1697
+ if qm_count > 0 and qm_count < 3: # Mixed QM-MM interaction
1698
+ for idx in [i_idx, j_idx, k_idx]:
1699
+ if idx not in qm_indices and idx not in boundary_indices:
1700
+ mm_atoms_in_interactions.add(idx)
1701
+
1702
+ # Check dihedrals
1703
+ if mol_dihedrals:
1704
+ for j in range(len(mol_dihedrals[0])):
1705
+ i_idx = mol_dihedrals[0][j] + prev_natoms
1706
+ j_idx = mol_dihedrals[1][j] + prev_natoms
1707
+ k_idx = mol_dihedrals[2][j] + prev_natoms
1708
+ l_idx = mol_dihedrals[3][j] + prev_natoms
1709
+
1710
+ # Check if any atom is in QM and others are in MM
1711
+ qm_count = sum(1 for idx in [i_idx, j_idx, k_idx, l_idx] if idx in qm_indices)
1712
+ if qm_count > 0 and qm_count < 4: # Mixed QM-MM interaction
1713
+ for idx in [i_idx, j_idx, k_idx, l_idx]:
1714
+ if idx not in qm_indices and idx not in boundary_indices:
1715
+ mm_atoms_in_interactions.add(idx)
1716
+
1717
+ prev_natoms += len(self.top.topol_dict[mol]) * n_mols
1718
+
1719
+ # Now get molecule information for the MM atoms
1720
+ prev_natoms = 0
1721
+ for mol, n_mols in self.top.molecules:
1722
+ mol_size = len(self.top.topol_dict[mol])
1723
+
1724
+ for mol_idx in range(n_mols):
1725
+ mol_start = prev_natoms + (mol_idx * mol_size) + 1
1726
+ mol_end = mol_start + mol_size - 1
1727
+
1728
+ # Check if any MM atoms are in this molecule instance
1729
+ for gmx_idx in mm_atoms_in_interactions:
1730
+ if mol_start <= gmx_idx <= mol_end:
1731
+ # Calculate local index
1732
+ local_idx = gmx_idx - mol_start + 1
1733
+ mm_atoms_info[gmx_idx] = (mol, local_idx)
1734
+
1735
+ prev_natoms += mol_size * n_mols
1736
+
1737
+ # Create DataFrame similar to qm_atoms
1738
+ mm_atoms_data = []
1739
+
1740
+ for gmx_idx in sorted(mm_atoms_info.keys()):
1741
+ mol, local_idx = mm_atoms_info[gmx_idx]
1742
+ atom_row = self.top.topol_dict[mol].loc[local_idx]
1743
+
1744
+ mm_atoms_data.append({
1745
+ 'id': gmx_idx,
1746
+ 'type': atom_row['type'],
1747
+ 'resid': atom_row['resid'],
1748
+ 'resname': atom_row['resname'],
1749
+ 'name': atom_row['name'],
1750
+ 'cgnr': atom_row['cgnr'],
1751
+ 'charge': atom_row['charge'],
1752
+ 'element': atom_row.get('element', ''),
1753
+ 'mass': atom_row['mass'],
1754
+ 'mol': mol,
1755
+ 'is_bound': 0, # MM atoms are not boundary atoms
1756
+ 'is_qm': 0 # MM atoms are not QM atoms
1757
+ })
1758
+
1759
+ # Create DataFrame
1760
+ mm_atoms_df = pd.DataFrame(mm_atoms_data)
1761
+ if not mm_atoms_df.empty:
1762
+ mm_atoms_df.set_index('id', inplace=True)
1763
+
1764
+ logging.info(f'Found {len(mm_atoms_df)} MM atoms in QM interactions')
1765
+ return mm_atoms_df
1766
+
1767
+ def get_extended_qm_atoms_dataframe(self):
1768
+ """Get extended QM atom DataFrame including MM atoms bonded to QM atoms
1769
+
1770
+ This method returns a combined DataFrame containing both QM atoms and MM atoms
1771
+ bonded to QM atoms, with proper flags to distinguish between them.
1772
+
1773
+ Returns:
1774
+ pandas.DataFrame: Combined DataFrame of QM and MM atoms with columns:
1775
+ - All standard atom properties (type, resid, resname, name, cgnr, charge, mass, element)
1776
+ - is_bound: 1 for boundary atoms, 0 for others
1777
+ - is_qm: 1 for QM atoms, 0 for MM atoms
1778
+ """
1779
+ if not self.qm_atoms is not None:
1780
+ raise MiMiCPyError('No QM atoms defined')
1781
+
1782
+ # Get MM atoms DataFrame
1783
+ mm_atoms_df = self.find_mm_atoms_bonded_to_qm()
1784
+
1785
+ # Create a copy of QM atoms DataFrame
1786
+ qm_atoms_copy = self.qm_atoms.copy()
1787
+
1788
+ # Add is_qm column to QM atoms (all QM atoms have is_qm=1)
1789
+ qm_atoms_copy['is_qm'] = 1
1790
+
1791
+ # Add is_qm column to MM atoms (all MM atoms have is_qm=0)
1792
+ if not mm_atoms_df.empty:
1793
+ mm_atoms_df['is_qm'] = 0
1794
+
1795
+ # Combine the DataFrames
1796
+ if not mm_atoms_df.empty:
1797
+ extended_df = pd.concat([qm_atoms_copy, mm_atoms_df])
1798
+ else:
1799
+ extended_df = qm_atoms_copy
1800
+
1801
+ logging.info(f'Extended QM atom DataFrame: {len(qm_atoms_copy)} QM atoms + {len(mm_atoms_df)} MM atoms = {len(extended_df)} total')
1802
+ return extended_df
1803
+
1804
+ def identify_solvent_atoms(self, solvent_names:dict=None):
1805
+ """Identify solvent atoms in the QM region based on residue names and molecule names.
1806
+
1807
+ This method identifies which atoms in the QM region belong to solvent molecules
1808
+ based on their residue names and the molecule types they belong to.
1809
+
1810
+ Args:
1811
+ solvent_names (dict, optional): Dictionary containing 'resnames' and 'molecules' keys.
1812
+ Default: {'resnames': ['SOL', 'WAT', 'HOH', 'TIP3', 'TIP3P', 'TP3', 'H2O'],
1813
+ 'molecules': ['tip3p', 'spc', 'spce', 'spc/e', 'SOL', 'water', 'tip4p']}
1814
+
1815
+ Returns:
1816
+ set: Set of sequential indices (0-based) of solvent atoms
1817
+ """
1818
+ if not self.qm_atoms is not None:
1819
+ raise MiMiCPyError('No QM atoms defined')
1820
+
1821
+ # Set default solvent detection parameters
1822
+ if solvent_names is None:
1823
+ solvent_names = {'resnames': ['SOL', 'WAT', 'HOH', 'TIP3', 'TIP3P', 'TP3', 'H2O'],
1824
+ 'molecules': ['tip3p', 'spc', 'spce', 'spc/e', 'SOL', 'water', 'tip4p']}
1825
+
1826
+ # Initialize results
1827
+ solvent_atom_indices = set() # Sequential indices (0-based)
1828
+ solvent_gmx_indices = set() # GROMACS indices (1-based)
1829
+
1830
+
1831
+ # Get QM atoms DataFrame to work with
1832
+ qm_atoms_df = self.qm_atoms
1833
+
1834
+ # Create mapping from GROMACS indices to sequential indices
1835
+ gmx_to_seq_map = {}
1836
+ for seq_idx, gmx_idx in enumerate(qm_atoms_df.index):
1837
+ gmx_to_seq_map[gmx_idx] = seq_idx
1838
+
1839
+ # Check each QM atom
1840
+ for gmx_idx in qm_atoms_df.index:
1841
+ is_solvent = False
1842
+
1843
+ # Check by residue name
1844
+ if solvent_names['resnames'] is not None:
1845
+ resname = str(qm_atoms_df.loc[gmx_idx, 'resname']).strip()
1846
+ if resname in solvent_names['resnames']:
1847
+ is_solvent = True
1848
+
1849
+ # Check by molecule name if not already identified as solvent
1850
+ if not is_solvent:
1851
+ if solvent_names['molecules'] is not None:
1852
+ mol_name = str(qm_atoms_df.loc[gmx_idx, 'mol']).strip()
1853
+ if mol_name in solvent_names['molecules']:
1854
+ is_solvent = True
1855
+
1856
+
1857
+ # Add to solvent sets if identified as solvent
1858
+ if is_solvent:
1859
+ solvent_gmx_indices.add(gmx_idx)
1860
+ solvent_atom_indices.add(gmx_to_seq_map[gmx_idx])
1861
+
1862
+ return solvent_atom_indices
1863
+
1864
+ def is_solvent_interaction(self, interaction):
1865
+ """Check if an interaction involves solvent atoms.
1866
+
1867
+ This method checks if any of the atoms in the given interaction belong to solvent molecules.
1868
+
1869
+ Args:
1870
+ interaction (dict): Interaction dictionary containing 'atoms' key with atom indices
1871
+ solvent_resnames (list, optional): List of solvent residue names to identify
1872
+ solvent_molecules (list, optional): List of solvent molecule names to identify
1873
+
1874
+ Returns:
1875
+ bool: True if the interaction involves solvent atoms, False otherwise
1876
+ """
1877
+ if not self.qm_atoms is not None:
1878
+ raise MiMiCPyError('No QM atoms defined')
1879
+
1880
+ # Check if any atoms in the interaction are solvent atoms
1881
+ atom_indices = interaction.get('atoms', [])
1882
+ return any(atom_idx in self.solvent_atom_indices for atom_idx in atom_indices)
1883
+
1884
+ def create_charge_group_constraints(self, group_by='mol', target_charges=None,
1885
+ exclude_solvent=True, solvent_names=None):
1886
+ """
1887
+ Create charge group constraints based on information from qm_atoms dataframe.
1888
+
1889
+ This method groups QM atoms by specified criteria and creates charge constraints
1890
+ for each group. It can group atoms by residue name, molecule name, or atom type.
1891
+
1892
+ Args:
1893
+ group_by (str): Criteria to group atoms by. Options:
1894
+ - 'resname': Group by residue name
1895
+ - 'mol': Group by molecule name
1896
+ - 'type': Group by atom type
1897
+ - 'resid': Group by residue ID
1898
+ target_charges (dict, optional): Dictionary mapping group identifiers to target charges.
1899
+ If None, uses sum of original charges from qm_atoms for each group.
1900
+ Example: {'ALA': 0.0, 'GLY': 0.0, 'SOL': 0.0}
1901
+ exclude_solvent (bool): Whether to exclude solvent atoms from grouping
1902
+ solvent_names (dict, optional): Dictionary for solvent identification.
1903
+ If None, uses default solvent names.
1904
+
1905
+ Returns:
1906
+ list: List of tuples (atom_indices, target_charge) for use in opt_dresp
1907
+ """
1908
+ if not self.qm_atoms is not None:
1909
+ raise MiMiCPyError('No QM atoms defined')
1910
+
1911
+ # Identify solvent atoms if needed
1912
+ solvent_atom_indices = set()
1913
+ if exclude_solvent:
1914
+ solvent_atom_indices = self.identify_solvent_atoms(solvent_names)
1915
+
1916
+ # Group atoms by the specified criteria
1917
+ groups = {}
1918
+ group_charges = {}
1919
+
1920
+ for seq_idx, (gmx_idx, row) in enumerate(self.qm_atoms.iterrows()):
1921
+ # Skip solvent atoms if exclude_solvent is True
1922
+ if exclude_solvent and seq_idx in solvent_atom_indices:
1923
+ continue
1924
+
1925
+ # Get the grouping key based on group_by parameter
1926
+ if group_by == 'resname':
1927
+ group_key = str(row['resname']).strip()
1928
+ elif group_by == 'mol':
1929
+ group_key = str(row['mol']).strip()
1930
+ elif group_by == 'type':
1931
+ group_key = str(row['type']).strip()
1932
+ elif group_by == 'resid':
1933
+ group_key = str(row['resid'])
1934
+ else:
1935
+ raise ValueError(f"Invalid group_by parameter: {group_by}. "
1936
+ f"Must be one of: 'resname', 'mol', 'type', 'resid'")
1937
+
1938
+ # Add atom to its group
1939
+ if group_key not in groups:
1940
+ groups[group_key] = set()
1941
+ group_charges[group_key] = 0.0
1942
+ groups[group_key].add(seq_idx)
1943
+ group_charges[group_key] += row['charge']
1944
+
1945
+ # Create constraints
1946
+ constraints = {}
1947
+
1948
+ for group_key, atom_indices in groups.items():
1949
+ # Determine target charge for this group
1950
+ if target_charges is not None and group_key in target_charges:
1951
+ target_charge = target_charges[group_key]
1952
+ else:
1953
+ # Use sum of original charges if no specific target is provided
1954
+ target_charge = group_charges[group_key]
1955
+
1956
+ # Only add constraint if group has more than one atom or if target charge is not 0
1957
+ if len(atom_indices) > 1:
1958
+ constraints[group_key] = (atom_indices, target_charge)
1959
+
1960
+ return constraints