mimicpy 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. mimicpy/__init__.py +1 -1
  2. mimicpy/__main__.py +726 -2
  3. mimicpy/_authors.py +2 -2
  4. mimicpy/_version.py +2 -2
  5. mimicpy/coords/__init__.py +1 -1
  6. mimicpy/coords/base.py +1 -1
  7. mimicpy/coords/cpmdgeo.py +1 -1
  8. mimicpy/coords/gro.py +1 -1
  9. mimicpy/coords/pdb.py +1 -1
  10. mimicpy/core/__init__.py +1 -1
  11. mimicpy/core/prepare.py +3 -3
  12. mimicpy/core/selector.py +1 -1
  13. mimicpy/force_matching/__init__.py +34 -0
  14. mimicpy/force_matching/bonded_forces.py +628 -0
  15. mimicpy/force_matching/compare_top.py +809 -0
  16. mimicpy/force_matching/dresp.py +435 -0
  17. mimicpy/force_matching/nonbonded_forces.py +32 -0
  18. mimicpy/force_matching/opt_ff.py +2114 -0
  19. mimicpy/force_matching/qm_region.py +1960 -0
  20. mimicpy/plugins/__main_installer__.py +76 -0
  21. mimicpy/{__main_vmd__.py → plugins/__main_vmd__.py} +2 -2
  22. mimicpy/plugins/pymol.py +56 -0
  23. mimicpy/plugins/vmd.tcl +78 -0
  24. mimicpy/scripts/__init__.py +1 -1
  25. mimicpy/scripts/cpmd.py +1 -1
  26. mimicpy/scripts/fm_input.py +265 -0
  27. mimicpy/scripts/fmdata.py +120 -0
  28. mimicpy/scripts/mdp.py +1 -1
  29. mimicpy/scripts/ndx.py +1 -1
  30. mimicpy/scripts/script.py +1 -1
  31. mimicpy/topology/__init__.py +1 -1
  32. mimicpy/topology/itp.py +603 -35
  33. mimicpy/topology/mpt.py +1 -1
  34. mimicpy/topology/top.py +254 -15
  35. mimicpy/topology/topol_dict.py +233 -4
  36. mimicpy/utils/__init__.py +1 -1
  37. mimicpy/utils/atomic_numbers.py +1 -1
  38. mimicpy/utils/constants.py +17 -3
  39. mimicpy/utils/elements.py +1 -1
  40. mimicpy/utils/errors.py +1 -1
  41. mimicpy/utils/file_handler.py +1 -1
  42. mimicpy/utils/strings.py +1 -1
  43. mimicpy-0.3.0.dist-info/METADATA +156 -0
  44. mimicpy-0.3.0.dist-info/RECORD +50 -0
  45. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
  46. mimicpy-0.3.0.dist-info/entry_points.txt +4 -0
  47. mimicpy-0.2.0.dist-info/METADATA +0 -86
  48. mimicpy-0.2.0.dist-info/RECORD +0 -38
  49. mimicpy-0.2.0.dist-info/entry_points.txt +0 -3
  50. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
  51. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
  52. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
  53. {mimicpy-0.2.0.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
mimicpy/topology/itp.py CHANGED
@@ -1,6 +1,6 @@
1
1
  #
2
2
  # MiMiCPy: Python Based Tools for MiMiC
3
- # Copyright (C) 2020-2021 Bharath Raghavan,
3
+ # Copyright (C) 2020-2023 Bharath Raghavan,
4
4
  # Florian Schackert
5
5
  #
6
6
  # This file is part of MiMiCPy.
@@ -33,7 +33,7 @@ from ..utils.errors import MiMiCPyError, ParserError
33
33
  class Itp:
34
34
  """reads itp files"""
35
35
 
36
- columns = ['number', 'type', 'resid', 'resname', 'name', 'charge', 'element', 'mass']
36
+ columns = ['number', 'type', 'resid', 'resname', 'name', 'cgnr', 'charge', 'element', 'mass']
37
37
 
38
38
  def __init__(self,
39
39
  file,
@@ -42,7 +42,8 @@ class Itp:
42
42
  buffer=1000,
43
43
  mode='r',
44
44
  guess_elements=True,
45
- gmxdata=''):
45
+ gmxdata='',
46
+ parameter_definitions=None):
46
47
 
47
48
  self.file = file
48
49
  self.requested_molecules = requested_molecules
@@ -51,8 +52,12 @@ class Itp:
51
52
  self.mode = mode
52
53
  self.guess_elements = guess_elements
53
54
  self.gmxdata = gmxdata
55
+ self.external_parameter_definitions = parameter_definitions
54
56
  self._topol = None
55
57
  self._bonds = None
58
+ self._angles = None
59
+ self._dihedrals = None
60
+ self._pairs = None
56
61
  self._topology_files = None
57
62
  self._molecules = None
58
63
  self._molecule_types = None
@@ -111,6 +116,74 @@ class Itp:
111
116
  self.__read()
112
117
  return self._bonds
113
118
 
119
+ @property
120
+ def angles(self):
121
+ if self.mode == 'r':
122
+ return self._angles
123
+ self.mode = 'r'
124
+ self.__read()
125
+ return self._angles
126
+
127
+ @property
128
+ def dihedrals(self):
129
+ if self.mode == 'r':
130
+ return self._dihedrals
131
+ self.mode = 'r'
132
+ self.__read()
133
+ return self._dihedrals
134
+
135
+ @property
136
+ def pairs(self):
137
+ if self.mode == 'r':
138
+ return self._pairs
139
+ self.mode = 'r'
140
+ self.__read()
141
+
142
+ @property
143
+ def nrexcl_values(self):
144
+ """Get nrexcl values for molecules in this ITP file"""
145
+ if self.mode == 'r':
146
+ return getattr(self, '_nrexcl_values', {})
147
+ self.mode = 'r'
148
+ self.__read()
149
+ return getattr(self, '_nrexcl_values', {})
150
+
151
+ @property
152
+ def bondtypes(self):
153
+ """Get bond types from force field sections"""
154
+ if self.mode == 'r':
155
+ return getattr(self, '_bondtypes', {})
156
+ self.mode = 'r'
157
+ self.__read()
158
+ return getattr(self, '_bondtypes', {})
159
+
160
+ @property
161
+ def angletypes(self):
162
+ """Get angle types from force field sections"""
163
+ if self.mode == 'r':
164
+ return getattr(self, '_angletypes', {})
165
+ self.mode = 'r'
166
+ self.__read()
167
+ return getattr(self, '_angletypes', {})
168
+
169
+ @property
170
+ def dihedraltypes(self):
171
+ """Get dihedral types from force field sections"""
172
+ if self.mode == 'r':
173
+ return getattr(self, '_dihedraltypes', {})
174
+ self.mode = 'r'
175
+ self.__read()
176
+ return getattr(self, '_dihedraltypes', {})
177
+
178
+ @property
179
+ def parameter_definitions(self):
180
+ """Get parameter definitions (#define statements) from force field sections"""
181
+ if self.mode == 'r':
182
+ return getattr(self, '_parameter_definitions', {})
183
+ self.mode = 'r'
184
+ self.__read()
185
+ return getattr(self, '_parameter_definitions', {})
186
+
114
187
  @staticmethod
115
188
  def __get_molecules(topology):
116
189
  molecules = []
@@ -129,13 +202,33 @@ class Itp:
129
202
 
130
203
  @staticmethod
131
204
  def __get_section(section, string):
132
- # Clean string - or not?
133
- # Find text b/w [ section ] and either [ or EOF
134
- # Look for [ section ] / look for lines / look for optional spaces and [
135
- # string = clean(string, comments)
136
- section_regex = re.compile(r"\[\s*"+str(section)+"\s*\]\s*\n((?:.+\n)+?)\s*(?:$|\[)", re.MULTILINE)
205
+ # Find all sections of the given type
206
+ # Use re.escape to handle special characters like '+' in section names
207
+ # Use a flexible regex to handle comments and empty lines
208
+ section_regex = re.compile(
209
+ r"\[\s*" + re.escape(str(section)) + r"\s*\]\s*\n((?:.*?)(?=\n\s*\[|\Z))",
210
+ re.DOTALL | re.MULTILINE
211
+ )
137
212
  section_list = section_regex.findall(string)
138
- return section_list
213
+
214
+ # Filter out lines that are directives or not part of the section content
215
+ filtered_sections = []
216
+ for section_content in section_list:
217
+ # Split into lines and filter out directives and empty lines
218
+ lines = section_content.split('\n')
219
+ filtered_lines = []
220
+ for line in lines:
221
+ line = line.strip()
222
+ # Skip empty lines, comments, and directives
223
+ if (line and
224
+ not line.startswith(';') and
225
+ not line.startswith('#') and
226
+ not line.startswith('#include')):
227
+ filtered_lines.append(line)
228
+ if filtered_lines:
229
+ filtered_sections.append('\n'.join(filtered_lines))
230
+
231
+ return filtered_sections
139
232
 
140
233
  @staticmethod
141
234
  def __parse_block_till_section(itp, *sections):
@@ -193,15 +286,20 @@ class Itp:
193
286
  atm_types_section_dct = {k:[] for k in cols}
194
287
  for line in atomtypes_section.splitlines():
195
288
  line_split = line.split()
196
- if len(line_split) not in [6, 7]:
289
+ if len(line_split) not in [6, 7, 8]:
197
290
  raise ParserError(self.file, 'GROMACS topology',
198
291
  details='following line in [ atomtypes ] section not formatted properly: {}'.format(line))
292
+
293
+ if len(line_split) == 8:
294
+ # assume line is of format: name bond_type atom_no mass charge ptype sigma epsilon
295
+ line_split = line_split[:1] + line_split[2:] # second value is bond type, ignore it
296
+
199
297
  if len(line_split) == 6:
200
298
  line_split.insert(1, 'X')
201
299
 
202
300
  for i, col in enumerate(cols):
203
301
  atm_types_section_dct[col].append(float(line_split[i]) if col in float_cols else line_split[i])
204
-
302
+
205
303
  self.atom_types_df = pd.DataFrame(atm_types_section_dct)
206
304
  df = self.atom_types_df.copy()
207
305
 
@@ -227,9 +325,10 @@ class Itp:
227
325
  itp_file = Parser(self.file, self.buffer)
228
326
  itp_text = ''
229
327
  while not itp_file.is_closed:
230
- block = Itp.__parse_block_till_section(itp_file, 'moleculetype', 'atoms', 'bonds')
231
- if any([Itp.__section_is_in_string(section, block) for section in ['moleculetype', 'atoms', 'bonds']]):
232
- itp_text += block
328
+ chunk = next(itp_file, '')
329
+ if chunk == '':
330
+ break
331
+ itp_text += chunk
233
332
  return itp_text
234
333
 
235
334
  def __guess_element_from(self, mass, name, atom_type):
@@ -259,12 +358,24 @@ class Itp:
259
358
  atom_info = {k:[] for k in cols}
260
359
  number_of_bad_lines = 0
261
360
  for i, line in enumerate(atom_section.splitlines()):
361
+ line = line.split(';', 1)[0].strip() # Remove comments
362
+ if not line:
363
+ continue
262
364
  line = line.split()
263
- if len(line) == 8:
264
- number, atom_type, resid, resname, name, _, charge, mass = line[:8]
365
+ if len(line) == 8 or len(line) == 11: # Full format with cgnr; len 11 for perturbed molecule in a free energy perturbation calculation (see issue 57)
366
+ number, atom_type, resid, resname, name, cgnr, charge, mass = line[:8]
265
367
  elif len(line) == 7:
266
- number, atom_type, resid, resname, name, _, charge = line[:7]
267
- mass = 0
368
+ # Check if the last field looks like a mass (numeric) or charge (numeric)
369
+ # If both charge and mass are numeric, assume mass is included
370
+ try:
371
+ # If both are numeric, assume format: number, type, resid, resname, name, charge, mass
372
+ number, atom_type, resid, resname, name, charge, mass = line[:7]
373
+ cgnr = number # Use atom number as cgnr if not provided
374
+ except ValueError:
375
+ # If mass is not numeric, assume format: number, type, resid, resname, name, charge, _
376
+ number, atom_type, resid, resname, name, charge, _ = line[:7]
377
+ mass = 0.0
378
+ cgnr = number # Use atom number as cgnr if not provided
268
379
  else:
269
380
  if number_of_bad_lines > 5:
270
381
  raise ParserError(self.file, 'GROMACS topology',
@@ -274,6 +385,7 @@ class Itp:
274
385
  continue
275
386
  number = int(number)
276
387
  resid = int(resid)
388
+ cgnr = int(cgnr)
277
389
  charge = float(charge)
278
390
  mass = float(mass)
279
391
  if self.atom_types_dict is not None and atom_type in self.atom_types_dict:
@@ -288,59 +400,515 @@ class Itp:
288
400
  atom_info[cols[2]].append(resid)
289
401
  atom_info[cols[3]].append(resname)
290
402
  atom_info[cols[4]].append(name)
291
- atom_info[cols[5]].append(charge)
292
- atom_info[cols[6]].append(element)
293
- atom_info[cols[7]].append(mass)
403
+ atom_info[cols[5]].append(cgnr)
404
+ atom_info[cols[6]].append(charge)
405
+ atom_info[cols[7]].append(element)
406
+ atom_info[cols[8]].append(mass)
294
407
  atoms = pd.DataFrame(atom_info).set_index(cols[0])
295
408
  return atoms
296
409
 
297
410
  @staticmethod
298
411
  def __read_bonds(bonds_section):
299
- cols = ['atom_i', 'atom_j']
412
+ cols = ['atom_i', 'atom_j', 'func', 'param1', 'param2']
300
413
  bonds_section_dct = {k:[] for k in cols}
301
414
  for line in bonds_section.splitlines():
302
415
  line_split = line.split()
303
- for i, col in enumerate(cols):
304
- if line_split != []:
305
- bonds_section_dct[col].append(int(line_split[i]))
306
- return (bonds_section_dct[cols[0]], bonds_section_dct[cols[1]])
416
+ if line_split != []:
417
+ # First 3 columns are always present (2 indices + func)
418
+ for i in range(3):
419
+ bonds_section_dct[cols[i]].append(int(line_split[i]))
420
+
421
+ # Handle parameters
422
+ for i in range(3, len(cols)):
423
+ bonds_section_dct[cols[i]].append(float(line_split[i]) if i < len(line_split) else None)
424
+
425
+ return (bonds_section_dct[cols[0]], bonds_section_dct[cols[1]],
426
+ bonds_section_dct[cols[2]], bonds_section_dct[cols[3]], bonds_section_dct[cols[4]])
307
427
 
428
+
308
429
  @staticmethod
309
- def __get_molecules_with_bond_section(string):
310
- section_regex = re.compile(r"\[\s*moleculetype\s*\]\s*\n((?:.+\n)+?)\s*\[\s*bonds\s*\]", re.MULTILINE)
430
+ def __read_angles(angles_section):
431
+ cols = ['atom_i', 'atom_j', 'atom_k', 'func', 'param1', 'param2']
432
+ angles_section_dct = {k:[] for k in cols}
433
+ for line in angles_section.splitlines():
434
+ line_split = line.split()
435
+ if line_split != []:
436
+ # First 4 columns are always present (3 indices + func)
437
+ for i in range(4):
438
+ angles_section_dct[cols[i]].append(int(line_split[i]))
439
+
440
+ # Handle parameters
441
+ for i in range(4, len(cols)):
442
+ angles_section_dct[cols[i]].append(float(line_split[i]) if i < len(line_split) else None)
443
+
444
+ return (angles_section_dct[cols[0]], angles_section_dct[cols[1]], angles_section_dct[cols[2]],
445
+ angles_section_dct[cols[3]], angles_section_dct[cols[4]], angles_section_dct[cols[5]])
446
+
447
+ @staticmethod
448
+ def __read_dihedrals(dihedrals_section, parameter_definitions=None):
449
+ """Read dihedrals from a section, handling parameter references
450
+
451
+ Args:
452
+ dihedrals_section (str): Content of [ dihedrals ] section
453
+ parameter_definitions (dict, optional): Dictionary of #define statements
454
+
455
+ Returns:
456
+ tuple: Dihedral data in the same format as before
457
+ """
458
+ # Handle all three dihedral formats:
459
+ # Format 1: ai aj ak al func phi0 cp mult (func can be 1, 4, or 9)
460
+ # Format 2: ai aj ak al func param1 param2
461
+ # Format 3: ai aj ak al func C0 C1 C2 C3 C4 C5
462
+ cols = ['atom_i', 'atom_j', 'atom_k', 'atom_l', 'func']
463
+ dihedrals_section_dct = {k:[] for k in cols}
464
+
465
+ # Initialize parameter lists
466
+ phi0 = [] # Format 1, 4, 9
467
+ cp = [] # Format 1, 4, 9
468
+ mult = [] # Format 1, 4, 9
469
+ param1 = [] # Format 2
470
+ param2 = [] # Format 2
471
+ C0 = [] # Format 3
472
+ C1 = [] # Format 3
473
+ C2 = [] # Format 3
474
+ C3 = [] # Format 3
475
+ C4 = [] # Format 3
476
+ C5 = [] # Format 3
477
+
478
+ def add_none_params():
479
+ """Helper function to add None for all parameters"""
480
+ phi0.append(None)
481
+ cp.append(None)
482
+ mult.append(None)
483
+ param1.append(None)
484
+ param2.append(None)
485
+ C0.append(None)
486
+ C1.append(None)
487
+ C2.append(None)
488
+ C3.append(None)
489
+ C4.append(None)
490
+ C5.append(None)
491
+
492
+ for line in dihedrals_section.splitlines():
493
+ line = line.strip()
494
+ if line and not line.startswith(';'):
495
+ parts = line.split()
496
+ if len(parts) >= 5:
497
+ # First 5 columns are always present (4 indices + func)
498
+ for i in range(5):
499
+ dihedrals_section_dct[cols[i]].append(int(parts[i]) if i < len(parts) else None)
500
+
501
+ func_type = int(parts[4])
502
+
503
+ # Handle parameters
504
+ if len(parts) > 5:
505
+ # Check if the 6th part is a parameter reference (contains letters)
506
+ is_parameter_reference = (parameter_definitions and
507
+ len(parts) >= 6 and
508
+ any(c.isalpha() for c in parts[5]))
509
+
510
+ if is_parameter_reference:
511
+ # Handle parameter reference
512
+ param_ref = parts[5]
513
+ if param_ref in parameter_definitions:
514
+ param_values = parameter_definitions[param_ref].split()
515
+ if len(param_values) >= 3:
516
+ # Format: phi0 cp mult
517
+ phi0.append(float(param_values[0]))
518
+ cp.append(float(param_values[1]))
519
+ mult.append(int(param_values[2]))
520
+ # Add None for other parameters
521
+ param1.append(None)
522
+ param2.append(None)
523
+ C0.extend([None] * 6)
524
+ C1.extend([None] * 6)
525
+ C2.extend([None] * 6)
526
+ C3.extend([None] * 6)
527
+ C4.extend([None] * 6)
528
+ C5.extend([None] * 6)
529
+ else:
530
+ add_none_params()
531
+ else:
532
+ add_none_params()
533
+ else:
534
+ # Standard format handling
535
+ if func_type in [1, 4, 9]: # Format 1
536
+ phi0.append(float(parts[5]) if len(parts) > 5 else None)
537
+ cp.append(float(parts[6]) if len(parts) > 6 else None)
538
+ mult.append(int(parts[7]) if len(parts) > 7 else None)
539
+ param1.append(None)
540
+ param2.append(None)
541
+ C0.extend([None] * 6)
542
+ C1.extend([None] * 6)
543
+ C2.extend([None] * 6)
544
+ C3.extend([None] * 6)
545
+ C4.extend([None] * 6)
546
+ C5.extend([None] * 6)
547
+ elif func_type == 2: # Format 2
548
+ param1.append(float(parts[5]) if len(parts) > 5 else None)
549
+ param2.append(float(parts[6]) if len(parts) > 6 else None)
550
+ phi0.append(None)
551
+ cp.append(None)
552
+ mult.append(None)
553
+ C0.extend([None] * 6)
554
+ C1.extend([None] * 6)
555
+ C2.extend([None] * 6)
556
+ C3.extend([None] * 6)
557
+ C4.extend([None] * 6)
558
+ C5.extend([None] * 6)
559
+ elif func_type == 3: # Format 3
560
+ C0.append(float(parts[5]) if len(parts) > 5 else None)
561
+ C1.append(float(parts[6]) if len(parts) > 6 else None)
562
+ C2.append(float(parts[7]) if len(parts) > 7 else None)
563
+ C3.append(float(parts[8]) if len(parts) > 8 else None)
564
+ C4.append(float(parts[9]) if len(parts) > 9 else None)
565
+ C5.append(float(parts[10]) if len(parts) > 10 else None)
566
+ phi0.append(None)
567
+ cp.append(None)
568
+ mult.append(None)
569
+ param1.append(None)
570
+ param2.append(None)
571
+ else:
572
+ # No parameters provided
573
+ add_none_params()
574
+
575
+ # Return all parameters in a structured way
576
+ return (dihedrals_section_dct[cols[0]], dihedrals_section_dct[cols[1]],
577
+ dihedrals_section_dct[cols[2]], dihedrals_section_dct[cols[3]],
578
+ dihedrals_section_dct[cols[4]], # func
579
+ phi0, # Format 1
580
+ cp,
581
+ mult,
582
+ param1, # Format 2
583
+ param2,
584
+ C0, # Format 3
585
+ C1,
586
+ C2,
587
+ C3,
588
+ C4,
589
+ C5)
590
+
591
+ @staticmethod
592
+ def __get_molecules_with_interaction_section(string, interaction_type):
593
+ # Find all moleculetype sections and their following interaction sections
594
+ section_regex = re.compile(r"\[\s*moleculetype\s*\]\s*\n((?:.+\n)+?)\s*\[\s*" + interaction_type + r"\s*\]", re.MULTILINE)
311
595
  section_list = section_regex.findall(string)
596
+
597
+ # If no sections found, try to find just the interaction sections
598
+ if not section_list:
599
+ section_regex = re.compile(r"\[\s*" + interaction_type + r"\s*\]", re.MULTILINE)
600
+ section_list = section_regex.findall(string)
601
+
312
602
  return section_list
313
603
 
604
+ @staticmethod
605
+ def __read_pairs(pairs_section):
606
+ cols = ['atom_i', 'atom_j', 'func']
607
+ pairs_section_dct = {k:[] for k in cols}
608
+ for line in pairs_section.splitlines():
609
+ line_split = line.split()
610
+ if line_split != []:
611
+ # First 3 columns are always present (2 indices + func)
612
+ for i in range(3):
613
+ pairs_section_dct[cols[i]].append(int(line_split[i]))
614
+
615
+ return (pairs_section_dct[cols[0]], pairs_section_dct[cols[1]],
616
+ pairs_section_dct[cols[2]])
617
+
618
+ @staticmethod
619
+ def __read_bondtypes(bondtypes_section):
620
+ """Read bond types from force field section
621
+
622
+ Args:
623
+ bondtypes_section (str): Content of [ bondtypes ] section
624
+
625
+ Returns:
626
+ dict: Dictionary of bond type parameters keyed by atom type pairs
627
+ """
628
+ bondtypes = {}
629
+ for line in bondtypes_section.splitlines():
630
+ line = line.strip()
631
+ if line and not line.startswith(';'):
632
+ parts = line.split()
633
+ if len(parts) >= 5:
634
+ atom1, atom2 = parts[0], parts[1]
635
+ func = int(parts[2])
636
+ param1 = float(parts[3]) # bond length
637
+ param2 = float(parts[4]) # force constant
638
+ key = f"{atom1}-{atom2}"
639
+ bondtypes[key] = {
640
+ 'function': func,
641
+ 'length': param1,
642
+ 'force_constant': param2
643
+ }
644
+ return bondtypes
645
+
646
+ @staticmethod
647
+ def __read_angletypes(angletypes_section):
648
+ """Read angle types from force field section
649
+
650
+ Args:
651
+ angletypes_section (str): Content of [ angletypes ] section
652
+
653
+ Returns:
654
+ dict: Dictionary of angle type parameters keyed by atom type triplets
655
+ """
656
+ angletypes = {}
657
+ for line in angletypes_section.splitlines():
658
+ line = line.strip()
659
+ if line and not line.startswith(';'):
660
+ parts = line.split()
661
+ if len(parts) >= 6:
662
+ atom1, atom2, atom3 = parts[0], parts[1], parts[2]
663
+ func = int(parts[3])
664
+ param1 = float(parts[4]) # angle value
665
+ param2 = float(parts[5]) # force constant
666
+ key = f"{atom1}-{atom2}-{atom3}"
667
+ angletypes[key] = {
668
+ 'function': func,
669
+ 'angle': param1,
670
+ 'force_constant': param2
671
+ }
672
+ return angletypes
673
+
674
+ @staticmethod
675
+ def __read_dihedraltypes(dihedraltypes_section):
676
+ """Read dihedral types from force field section
677
+
678
+ Args:
679
+ dihedraltypes_section (str): Content of [ dihedraltypes ] section
680
+
681
+ Returns:
682
+ dict: Dictionary of dihedral type parameters keyed by atom type quartets
683
+ """
684
+ dihedraltypes = {}
685
+ for line in dihedraltypes_section.splitlines():
686
+ line = line.strip()
687
+ if line and not line.startswith(';'):
688
+ parts = line.split()
689
+ if len(parts) >= 5:
690
+ atom1, atom2, atom3, atom4 = parts[0], parts[1], parts[2], parts[3]
691
+ func = int(parts[4])
692
+ key = f"{atom1}-{atom2}-{atom3}-{atom4}"
693
+
694
+ if func in [1, 4, 9]: # Format 1
695
+ if len(parts) >= 8:
696
+ param1 = float(parts[5]) # phi0
697
+ param2 = float(parts[6]) # cp (force constant)
698
+ param3 = int(parts[7]) # mult
699
+ dihedraltypes[key] = {
700
+ 'function': func,
701
+ 'phi0': param1,
702
+ 'cp': param2,
703
+ 'mult': param3
704
+ }
705
+ elif func == 2: # Format 2
706
+ if len(parts) >= 7:
707
+ param1 = float(parts[5]) # param1
708
+ param2 = float(parts[6]) # param2 (force constant)
709
+ dihedraltypes[key] = {
710
+ 'function': func,
711
+ 'param1': param1,
712
+ 'param2': param2
713
+ }
714
+ elif func == 3: # Format 3
715
+ if len(parts) >= 11:
716
+ params = [float(parts[i]) for i in range(5, 11)] # C0-C5
717
+ dihedraltypes[key] = {
718
+ 'function': func,
719
+ 'params': params
720
+ }
721
+ return dihedraltypes
722
+
723
+ @staticmethod
724
+ def __read_parameter_definitions(string):
725
+ """Read #define statements from a string.
726
+
727
+ Args:
728
+ string (str): The string to search for #define statements.
729
+
730
+ Returns:
731
+ dict: A dictionary of #define statements keyed by their name.
732
+ """
733
+ define_regex = re.compile(r"#define\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+(.*)", re.MULTILINE)
734
+ definitions = {}
735
+ for match in define_regex.finditer(string):
736
+ name = match.group(1)
737
+ value = match.group(2)
738
+ definitions[name] = value
739
+ return definitions
740
+
314
741
  def __read(self):
315
742
  itp_text = self.__load_molecules_and_atoms()
316
743
  clean_itp_text = clean(itp_text, comments=[';', '#'])
317
744
  molecule_section = Itp.__get_section('moleculetype', clean_itp_text)
318
745
  atom_section = Itp.__get_section('atoms', clean_itp_text)
319
746
 
747
+ # Read force field sections from current file
748
+ bondtypes_section = Itp.__get_section('bondtypes', clean_itp_text)
749
+ angletypes_section = Itp.__get_section('angletypes', clean_itp_text)
750
+ dihedraltypes_section = Itp.__get_section('dihedraltypes', clean_itp_text)
751
+
752
+ # Store force field data
753
+ self._bondtypes = {}
754
+ self._angletypes = {}
755
+ self._dihedraltypes = {}
756
+ self._parameter_definitions = {} # Store #define statements
757
+
758
+ # Read force field sections from current file
759
+ if bondtypes_section:
760
+ for section in bondtypes_section:
761
+ self._bondtypes.update(Itp.__read_bondtypes(section))
762
+
763
+ if angletypes_section:
764
+ for section in angletypes_section:
765
+ self._angletypes.update(Itp.__read_angletypes(section))
766
+
767
+ if dihedraltypes_section:
768
+ for section in dihedraltypes_section:
769
+ self._dihedraltypes.update(Itp.__read_dihedraltypes(section))
770
+
771
+ # Read parameter definitions (#define statements) from current file and included files
772
+ # First, read from the original file content before cleaning
773
+ original_content = self.__load_molecules_and_atoms()
774
+ self._parameter_definitions = Itp.__read_parameter_definitions(original_content)
775
+
776
+ # Also read force field sections from included files
777
+ included_itps = self.__get_included_topology_files(original_content)
778
+ for included_itp in included_itps:
779
+ try:
780
+ # Read the included file content
781
+ with open(included_itp, 'r') as f:
782
+ included_content = f.read()
783
+
784
+ # Clean the included file content
785
+ clean_included_content = clean(included_content, comments=[';', '#'])
786
+
787
+ # Read force field sections from included file
788
+ included_bondtypes = Itp.__get_section('bondtypes', clean_included_content)
789
+ included_angletypes = Itp.__get_section('angletypes', clean_included_content)
790
+ included_dihedraltypes = Itp.__get_section('dihedraltypes', clean_included_content)
791
+
792
+ # Add force field data from included file
793
+ if included_bondtypes:
794
+ for section in included_bondtypes:
795
+ self._bondtypes.update(Itp.__read_bondtypes(section))
796
+
797
+ if included_angletypes:
798
+ for section in included_angletypes:
799
+ self._angletypes.update(Itp.__read_angletypes(section))
800
+
801
+ if included_dihedraltypes:
802
+ for section in included_dihedraltypes:
803
+ self._dihedraltypes.update(Itp.__read_dihedraltypes(section))
804
+
805
+ except OSError:
806
+ pass # Skip if file not found
807
+
808
+ # Also read parameter definitions from included files
809
+ included_itps = self.__get_included_topology_files(original_content)
810
+ for included_itp in included_itps:
811
+ try:
812
+ # Read the included file content
813
+ with open(included_itp, 'r') as f:
814
+ included_content = f.read()
815
+ # Extract parameter definitions from included file
816
+ included_definitions = Itp.__read_parameter_definitions(included_content)
817
+ self._parameter_definitions.update(included_definitions)
818
+ except OSError:
819
+ pass # Skip if file not found
820
+
821
+ # If external parameter definitions are provided, merge them
822
+ if self.external_parameter_definitions:
823
+ self._parameter_definitions.update(self.external_parameter_definitions)
824
+
320
825
  if molecule_section == [] and atom_section == []:
321
826
  return None
322
827
  molecules = []
323
828
  atom_infos = []
324
-
829
+ nrexcl_values = {} # Store nrexcl values for each molecule
325
830
  for molecule, atoms in zip(molecule_section, atom_section):
326
831
  mol = molecule.split()[0]
327
832
  if self.requested_molecules is not None and mol not in self.requested_molecules:
328
833
  continue
329
834
  molecules.append(mol)
330
835
  atom_infos.append(self.__read_atoms(atoms))
836
+
837
+ # Extract nrexcl value from moleculetype section
838
+ parts = molecule.split()
839
+ if len(parts) >= 2:
840
+ try:
841
+ nrexcl_values[mol] = int(parts[1])
842
+ except ValueError:
843
+ nrexcl_values[mol] = 3 # Default value if parsing fails
844
+ logging.warning(f'Could not parse nrexcl value for molecule {mol}, using default value 3')
331
845
  self._topol = dict(zip(molecules, atom_infos))
846
+ self._nrexcl_values = nrexcl_values # Store nrexcl values
332
847
 
333
- molecule_section = Itp.__get_molecules_with_bond_section(clean_itp_text) # get molecules with bonds section
848
+ # Read bonds
849
+ molecule_section = Itp.__get_molecules_with_interaction_section(clean_itp_text, 'bonds')
334
850
  bond_section = Itp.__get_section('bonds', clean_itp_text)
335
851
  bond_infos = []
336
-
337
852
  for molecule, bonds in zip(molecule_section, bond_section):
338
853
  mol = molecule.split()[0]
339
854
  if self.requested_molecules is not None and mol not in self.requested_molecules:
340
855
  continue
341
- molecules.append(mol)
342
856
  bond_infos.append(Itp.__read_bonds(bonds))
343
-
344
857
  self._bonds = dict(zip(molecules, bond_infos))
345
- #
346
- ###
858
+
859
+ # Read angles
860
+ molecule_section = Itp.__get_molecules_with_interaction_section(clean_itp_text, 'angles')
861
+ angle_section = Itp.__get_section('angles', clean_itp_text)
862
+ angle_infos = []
863
+ for molecule, angles in zip(molecule_section, angle_section):
864
+ mol = molecule.split()[0]
865
+ if self.requested_molecules is not None and mol not in self.requested_molecules:
866
+ continue
867
+ angle_infos.append(Itp.__read_angles(angles))
868
+ self._angles = dict(zip(molecules, angle_infos))
869
+
870
+ # Read dihedrals - correctly handle multiple molecules with multiple dihedral sections
871
+ dihedral_infos = []
872
+ for mol in molecules:
873
+ if self.requested_molecules is not None and mol not in self.requested_molecules:
874
+ continue
875
+
876
+ # Get all dihedral sections from the file
877
+ all_dihedral_sections = Itp.__get_section('dihedrals', clean_itp_text)
878
+
879
+ # For now, use a simple approach: assign dihedral sections based on their order
880
+ # This assumes that dihedral sections appear in the same order as molecules
881
+ mol_index = molecules.index(mol)
882
+
883
+ # Each molecule typically has 2 dihedral sections (propers and impropers)
884
+ # So we need to get sections 2*mol_index and 2*mol_index + 1
885
+ mol_dihedral_sections = []
886
+ start_idx = 2 * mol_index
887
+ end_idx = start_idx + 2
888
+
889
+ for i in range(start_idx, min(end_idx, len(all_dihedral_sections))):
890
+ mol_dihedral_sections.append(all_dihedral_sections[i])
891
+
892
+ # Combine all dihedral sections for this molecule
893
+ if mol_dihedral_sections:
894
+ combined_dihedrals = '\n'.join(mol_dihedral_sections)
895
+ dihedral_infos.append(Itp.__read_dihedrals(combined_dihedrals, self._parameter_definitions))
896
+ else:
897
+ dihedral_infos.append(None)
898
+
899
+ self._dihedrals = dict(zip(molecules, dihedral_infos))
900
+ # Read pairs
901
+ molecule_section = Itp.__get_molecules_with_interaction_section(clean_itp_text, 'pairs')
902
+ pairs_section = Itp.__get_section('pairs', clean_itp_text)
903
+ pairs_infos = []
904
+ for molecule, pairs in zip(molecule_section, pairs_section):
905
+ mol = molecule.split()[0]
906
+ if self.requested_molecules is not None and mol not in self.requested_molecules:
907
+ continue
908
+ pairs_infos.append(Itp.__read_pairs(pairs))
909
+ self._pairs = dict(zip(molecules, pairs_infos))
910
+
911
+ # Store source file information for each molecule
912
+ if hasattr(self, '_topol') and self._topol is not None:
913
+ for mol in molecules:
914
+ self._topol[mol].source_file = self.file