mimicpy 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mimicpy/__main__.py +725 -1
- mimicpy/_authors.py +1 -1
- mimicpy/_version.py +1 -1
- mimicpy/core/prepare.py +2 -2
- mimicpy/force_matching/__init__.py +34 -0
- mimicpy/force_matching/bonded_forces.py +628 -0
- mimicpy/force_matching/compare_top.py +809 -0
- mimicpy/force_matching/dresp.py +435 -0
- mimicpy/force_matching/nonbonded_forces.py +32 -0
- mimicpy/force_matching/opt_ff.py +2114 -0
- mimicpy/force_matching/qm_region.py +1960 -0
- mimicpy/scripts/fm_input.py +265 -0
- mimicpy/scripts/fmdata.py +120 -0
- mimicpy/topology/itp.py +602 -34
- mimicpy/topology/top.py +253 -14
- mimicpy/topology/topol_dict.py +232 -3
- mimicpy/utils/constants.py +16 -2
- mimicpy-0.3.0.dist-info/METADATA +156 -0
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info}/RECORD +25 -16
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info}/WHEEL +1 -1
- mimicpy-0.2.1.dist-info/METADATA +0 -90
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info}/entry_points.txt +0 -0
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING +0 -0
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info/licenses}/COPYING.LESSER +0 -0
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info}/top_level.txt +0 -0
- {mimicpy-0.2.1.dist-info → mimicpy-0.3.0.dist-info}/zip-safe +0 -0
mimicpy/__main__.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# MiMiCPy: Python Based Tools for MiMiC
|
|
5
5
|
# Copyright (C) 2020-2023 Bharath Raghavan,
|
|
6
6
|
# Florian Schackert
|
|
7
|
+
# Sachin Shivakumar
|
|
7
8
|
#
|
|
8
9
|
# This file is part of MiMiCPy.
|
|
9
10
|
#
|
|
@@ -31,6 +32,29 @@ import itertools
|
|
|
31
32
|
import threading
|
|
32
33
|
import pandas as pd
|
|
33
34
|
import mimicpy
|
|
35
|
+
from mimicpy.force_matching import (
|
|
36
|
+
get_configurations,
|
|
37
|
+
get_optimize_ff_parameters,
|
|
38
|
+
get_configurations_optff,
|
|
39
|
+
opt_dresp,
|
|
40
|
+
compute_sd,
|
|
41
|
+
QMRegion
|
|
42
|
+
)
|
|
43
|
+
from mimicpy.scripts.fmdata import FMDataset
|
|
44
|
+
from mimicpy.scripts.fm_input import FMInput
|
|
45
|
+
from mimicpy.force_matching.opt_ff import (
|
|
46
|
+
unified_optimization_ff,
|
|
47
|
+
compare_optimization_methods
|
|
48
|
+
)
|
|
49
|
+
from mimicpy.force_matching.compare_top import compare_qm_parameters
|
|
50
|
+
from pathlib import Path
|
|
51
|
+
import logging
|
|
52
|
+
from datetime import datetime
|
|
53
|
+
import subprocess
|
|
54
|
+
import shutil
|
|
55
|
+
from os import environ
|
|
56
|
+
import copy
|
|
57
|
+
import numpy as np
|
|
34
58
|
|
|
35
59
|
warnings.simplefilter(action='ignore', category=FutureWarning) # Supress pandas warnings
|
|
36
60
|
|
|
@@ -460,6 +484,126 @@ def geom2coords(args):
|
|
|
460
484
|
|
|
461
485
|
loader.close()
|
|
462
486
|
|
|
487
|
+
def json2h5(args):
|
|
488
|
+
"""
|
|
489
|
+
Convert large JSON Force Matching data files to HDF5 format using FMDataset functionality.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
args: Command line arguments containing input/output file paths
|
|
493
|
+
"""
|
|
494
|
+
from pathlib import Path
|
|
495
|
+
import os
|
|
496
|
+
|
|
497
|
+
input_file = Path(args.i)
|
|
498
|
+
output_file = Path(args.o) if args.o else input_file.with_suffix('.h5')
|
|
499
|
+
|
|
500
|
+
if not input_file.exists():
|
|
501
|
+
print(f'\n\nError: Cannot find file {input_file}! Exiting..\n')
|
|
502
|
+
sys.exit(1)
|
|
503
|
+
|
|
504
|
+
print('')
|
|
505
|
+
loader = Loader('**Converting JSON to HDF5**')
|
|
506
|
+
|
|
507
|
+
try:
|
|
508
|
+
# Use the existing FMDataset functionality to convert JSON to HDF5
|
|
509
|
+
fm_dataset = FMDataset(input_file, output_file)
|
|
510
|
+
|
|
511
|
+
loader.close()
|
|
512
|
+
|
|
513
|
+
except Exception as e:
|
|
514
|
+
loader.close(halt=True)
|
|
515
|
+
print(f'\n\nError during conversion: {e}')
|
|
516
|
+
sys.exit(1)
|
|
517
|
+
# Calculate file sizes for comparison
|
|
518
|
+
input_size = input_file.stat().st_size / (1024 * 1024) # MB
|
|
519
|
+
output_size = output_file.stat().st_size / (1024 * 1024) # MB
|
|
520
|
+
compression_ratio = (1 - output_size / input_size) * 100 if input_size > 0 else 0
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
print(f'\nConversion completed successfully!')
|
|
524
|
+
print(f'Input file: {input_file} ({input_size:.2f} MB)')
|
|
525
|
+
print(f'Output file: {output_file} ({output_size:.2f} MB)')
|
|
526
|
+
print(f'Compression: {compression_ratio:.1f}% reduction')
|
|
527
|
+
print(f'Configurations: {len(fm_dataset)}')
|
|
528
|
+
|
|
529
|
+
def h5info(args):
|
|
530
|
+
"""
|
|
531
|
+
Display information about HDF5 Force Matching data files.
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
args: Command line arguments containing input file path
|
|
535
|
+
"""
|
|
536
|
+
import h5py
|
|
537
|
+
import numpy as np
|
|
538
|
+
from pathlib import Path
|
|
539
|
+
|
|
540
|
+
input_file = Path(args.i)
|
|
541
|
+
|
|
542
|
+
if not input_file.exists():
|
|
543
|
+
print(f'\n\nError: Cannot find file {input_file}! Exiting..\n')
|
|
544
|
+
sys.exit(1)
|
|
545
|
+
|
|
546
|
+
print('')
|
|
547
|
+
loader = Loader('**Reading HDF5 file**')
|
|
548
|
+
|
|
549
|
+
try:
|
|
550
|
+
# Read the HDF5 file
|
|
551
|
+
with h5py.File(input_file, 'r') as f:
|
|
552
|
+
config_keys = [key for key in f.keys() if key.startswith('config_')]
|
|
553
|
+
config_keys.sort(key=lambda x: int(x.split('_')[1])) # Sort by configuration number
|
|
554
|
+
|
|
555
|
+
if not config_keys:
|
|
556
|
+
print('\n\nError: No configuration data found in HDF5 file! Exiting..\n')
|
|
557
|
+
sys.exit(1)
|
|
558
|
+
|
|
559
|
+
loader.close()
|
|
560
|
+
|
|
561
|
+
# Get file size
|
|
562
|
+
file_size = input_file.stat().st_size / (1024 * 1024) # MB
|
|
563
|
+
|
|
564
|
+
print(f'\nHDF5 File Information:')
|
|
565
|
+
print(f'File: {input_file}')
|
|
566
|
+
print(f'Size: {file_size:.2f} MB')
|
|
567
|
+
print(f'Configurations: {len(config_keys)}')
|
|
568
|
+
|
|
569
|
+
# Show details for first few configurations
|
|
570
|
+
print(f'\nConfiguration Details:')
|
|
571
|
+
for i, config_key in enumerate(config_keys[:min(5, len(config_keys))]):
|
|
572
|
+
config_group = f[config_key]
|
|
573
|
+
atoms_data = config_group['atoms']
|
|
574
|
+
n_atoms = len(atoms_data)
|
|
575
|
+
|
|
576
|
+
# Get some statistics
|
|
577
|
+
regions = atoms_data['region'][:]
|
|
578
|
+
qm_atoms = np.sum(regions == 1)
|
|
579
|
+
mm_atoms = np.sum(regions != 1)
|
|
580
|
+
|
|
581
|
+
print(f' {config_key}: {n_atoms} atoms ({qm_atoms} QM, {mm_atoms} MM)')
|
|
582
|
+
|
|
583
|
+
if len(config_keys) > 5:
|
|
584
|
+
print(f' ... and {len(config_keys) - 5} more configurations')
|
|
585
|
+
|
|
586
|
+
# Show dataset structure
|
|
587
|
+
print(f'\nDataset Structure:')
|
|
588
|
+
first_config = f[config_keys[0]]
|
|
589
|
+
atoms_data = first_config['atoms']
|
|
590
|
+
|
|
591
|
+
print(f' Atom data fields:')
|
|
592
|
+
for field in atoms_data.dtype.names:
|
|
593
|
+
field_dtype = atoms_data.dtype[field]
|
|
594
|
+
print(f' - {field}: {field_dtype}')
|
|
595
|
+
|
|
596
|
+
# Show compression info if available
|
|
597
|
+
if hasattr(atoms_data, 'compression') and atoms_data.compression:
|
|
598
|
+
print(f'\nCompression: {atoms_data.compression}')
|
|
599
|
+
else:
|
|
600
|
+
print(f'\nCompression: None')
|
|
601
|
+
|
|
602
|
+
except Exception as e:
|
|
603
|
+
loader.close(halt=True)
|
|
604
|
+
print(f'\n\nError reading HDF5 file: {e}')
|
|
605
|
+
sys.exit(1)
|
|
606
|
+
|
|
463
607
|
def fixtop(args):
|
|
464
608
|
nsa_dct = get_nsa_mpt(args, True)
|
|
465
609
|
print("\n**Reading topology**\n")
|
|
@@ -484,6 +628,425 @@ def fixtop(args):
|
|
|
484
628
|
sys.exit(1)
|
|
485
629
|
return top
|
|
486
630
|
|
|
631
|
+
def compare_top(args):
|
|
632
|
+
"""Command-line interface for comparing QM parameters between topologies"""
|
|
633
|
+
compare_qm_parameters(args.top1, args.top2, args.top3, args.coords, Path(args.sele))
|
|
634
|
+
|
|
635
|
+
def qminfo(args):
|
|
636
|
+
"""Print information about QM atoms in the QM region"""
|
|
637
|
+
print('')
|
|
638
|
+
loader = Loader('**Reading topology and coordinates**')
|
|
639
|
+
|
|
640
|
+
try:
|
|
641
|
+
# Initialize QMRegion
|
|
642
|
+
qm_region = QMRegion(args.top, args.coords, gmxdata=args.ff,
|
|
643
|
+
buffer=args.buf, guess_elements=args.guess,
|
|
644
|
+
nonstandard_atomtypes=get_nsa_mpt(args, only_nsa=True) if args.nsa else None)
|
|
645
|
+
|
|
646
|
+
# Setup QM region with selection file and solvent names
|
|
647
|
+
qm_region.setup_qm_region(Path(args.sele))
|
|
648
|
+
|
|
649
|
+
loader.close()
|
|
650
|
+
|
|
651
|
+
# Get output file if specified
|
|
652
|
+
output_file = getattr(args, 'out', None)
|
|
653
|
+
|
|
654
|
+
# Format output
|
|
655
|
+
output_lines = []
|
|
656
|
+
output_lines.append("=" * 80)
|
|
657
|
+
output_lines.append("QM Region Information")
|
|
658
|
+
output_lines.append("=" * 80)
|
|
659
|
+
output_lines.append(f"\nTotal number of QM atoms: {len(qm_region.qm_atoms)}")
|
|
660
|
+
output_lines.append(f"QM total charge: {qm_region.qm_total_charge:.6f}")
|
|
661
|
+
|
|
662
|
+
# Count boundary atoms if present
|
|
663
|
+
if not qm_region.boundary_atoms.empty:
|
|
664
|
+
output_lines.append(f"Number of boundary atoms: {len(qm_region.boundary_atoms)}")
|
|
665
|
+
output_lines.append(f"Number of core QM atoms: {len(qm_region.qm_atoms) - len(qm_region.boundary_atoms)}")
|
|
666
|
+
|
|
667
|
+
# Count by molecule type
|
|
668
|
+
if 'mol' in qm_region.qm_atoms.columns:
|
|
669
|
+
mol_counts = qm_region.qm_atoms.groupby('mol').size()
|
|
670
|
+
output_lines.append(f"Number of molecule types: {len(mol_counts)}")
|
|
671
|
+
output_lines.append("\nAtoms per molecule type:")
|
|
672
|
+
for mol, count in mol_counts.items():
|
|
673
|
+
output_lines.append(f" {mol}: {count} atoms")
|
|
674
|
+
|
|
675
|
+
# Print detailed atom table
|
|
676
|
+
output_lines.append("\n" + "=" * 80)
|
|
677
|
+
output_lines.append("Detailed QM Atom Information")
|
|
678
|
+
output_lines.append("=" * 80)
|
|
679
|
+
|
|
680
|
+
# Create formatted table
|
|
681
|
+
with pd.option_context('display.max_rows', None, 'display.max_columns', None,
|
|
682
|
+
'display.width', None, 'display.max_colwidth', None):
|
|
683
|
+
# Select columns to display (only include columns that exist)
|
|
684
|
+
display_cols = ['type', 'resid', 'resname', 'name', 'charge', 'mass']
|
|
685
|
+
available_cols = qm_region.qm_atoms.columns.tolist()
|
|
686
|
+
if 'element' in available_cols:
|
|
687
|
+
display_cols.insert(1, 'element')
|
|
688
|
+
if 'mol' in available_cols:
|
|
689
|
+
display_cols.append('mol')
|
|
690
|
+
if 'is_bound' in available_cols:
|
|
691
|
+
display_cols.append('is_bound')
|
|
692
|
+
|
|
693
|
+
# Filter to only include columns that actually exist
|
|
694
|
+
display_cols = [col for col in display_cols if col in available_cols]
|
|
695
|
+
|
|
696
|
+
# Create display dataframe with index as first column
|
|
697
|
+
display_df = qm_region.qm_atoms[display_cols].copy()
|
|
698
|
+
display_df.insert(0, 'atom_id', display_df.index)
|
|
699
|
+
|
|
700
|
+
# Format the table
|
|
701
|
+
table_str = str(display_df)
|
|
702
|
+
output_lines.append("\n" + table_str)
|
|
703
|
+
|
|
704
|
+
# Print boundary atoms separately if present
|
|
705
|
+
if not qm_region.boundary_atoms.empty:
|
|
706
|
+
output_lines.append("\n" + "=" * 80)
|
|
707
|
+
output_lines.append("Boundary Atoms")
|
|
708
|
+
output_lines.append("=" * 80)
|
|
709
|
+
# Filter display_cols to only include columns that exist in boundary_atoms
|
|
710
|
+
boundary_display_cols = [col for col in display_cols if col in qm_region.boundary_atoms.columns]
|
|
711
|
+
boundary_display = qm_region.boundary_atoms[boundary_display_cols].copy()
|
|
712
|
+
boundary_display.insert(0, 'atom_id', boundary_display.index)
|
|
713
|
+
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
|
|
714
|
+
output_lines.append("\n" + str(boundary_display))
|
|
715
|
+
|
|
716
|
+
# Print interactions summary if available
|
|
717
|
+
if hasattr(qm_region, 'qm_interactions') and qm_region.qm_interactions:
|
|
718
|
+
output_lines.append("\n" + "=" * 80)
|
|
719
|
+
output_lines.append("QM Interactions Summary")
|
|
720
|
+
output_lines.append("=" * 80)
|
|
721
|
+
output_lines.append(f"Bonds: {len(qm_region.qm_interactions.get('bonds', []))}")
|
|
722
|
+
output_lines.append(f"Angles: {len(qm_region.qm_interactions.get('angles', []))}")
|
|
723
|
+
output_lines.append(f"Dihedrals: {len(qm_region.qm_interactions.get('dihedrals', []))}")
|
|
724
|
+
|
|
725
|
+
# Write or print output
|
|
726
|
+
output_text = '\n'.join(output_lines)
|
|
727
|
+
|
|
728
|
+
if output_file:
|
|
729
|
+
try:
|
|
730
|
+
with open(output_file, 'w') as f:
|
|
731
|
+
f.write(output_text)
|
|
732
|
+
print(f"\nQM region information written to {output_file}")
|
|
733
|
+
except Exception as e:
|
|
734
|
+
print(f"\nError writing to file {output_file}: {e}")
|
|
735
|
+
print(output_text)
|
|
736
|
+
else:
|
|
737
|
+
print(output_text)
|
|
738
|
+
|
|
739
|
+
except FileNotFoundError as e:
|
|
740
|
+
print(f'\n\nError: Cannot find file {e.filename}! Exiting..\n')
|
|
741
|
+
loader.close(halt=True)
|
|
742
|
+
sys.exit(1)
|
|
743
|
+
except (mimicpy.utils.errors.ParserError, mimicpy.utils.errors.MiMiCPyError, mimicpy.utils.errors.SelectionError) as e:
|
|
744
|
+
print(f'\n\nError: {e}\n')
|
|
745
|
+
loader.close(halt=True)
|
|
746
|
+
sys.exit(1)
|
|
747
|
+
except KeyboardInterrupt:
|
|
748
|
+
print("\nReading halted\n")
|
|
749
|
+
loader.close(halt=True)
|
|
750
|
+
sys.exit(1)
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
def fm(args):
|
|
754
|
+
"""
|
|
755
|
+
Execute the force matching workflow.
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
args: Command line arguments containing force matching parameters
|
|
759
|
+
"""
|
|
760
|
+
log_file = 'fm.log'
|
|
761
|
+
file_handler = logging.FileHandler(log_file)
|
|
762
|
+
file_handler.setLevel(logging.DEBUG)
|
|
763
|
+
logging.getLogger().addHandler(file_handler)
|
|
764
|
+
|
|
765
|
+
start_time = datetime.now()
|
|
766
|
+
logging.info(f'Running Force matching in MiMiCPy {datetime.now()}')
|
|
767
|
+
|
|
768
|
+
# Read the FM data
|
|
769
|
+
fm_file = Path(args.fmdata)
|
|
770
|
+
if not fm_file.exists():
|
|
771
|
+
raise FileNotFoundError(f'FM file {fm_file} not found.')
|
|
772
|
+
|
|
773
|
+
flag = 'dresp' if args.dresp else 'opt_ff' if args.opt_ff else 'fm'
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
logging.info(f'Loading FM data from {fm_file}')
|
|
777
|
+
fmdata = FMDataset(fm_file)
|
|
778
|
+
logging.debug('FM data loaded successfully.')
|
|
779
|
+
|
|
780
|
+
# Read the fm input using the new FMInput class
|
|
781
|
+
fm_input = FMInput.from_file(args.fi)
|
|
782
|
+
|
|
783
|
+
# Get solvent names from fm_input
|
|
784
|
+
solvent_names = {'resnames': fm_input.solvent_resnames, 'molecules': fm_input.solvent_molecules}
|
|
785
|
+
|
|
786
|
+
# Initialize QMRegion
|
|
787
|
+
qm_region = QMRegion(args.top, args.coords, gmxdata=args.ff)
|
|
788
|
+
qm_region.setup_qm_region(Path(args.sele), solvent_names)
|
|
789
|
+
|
|
790
|
+
# Parse ITP file and get equivalent mapping
|
|
791
|
+
if fm_input.eq_atoms == 'use_atomtypes':
|
|
792
|
+
eq_map = qm_region.get_equivalent_map(use_atomtypes=True)
|
|
793
|
+
else:
|
|
794
|
+
eq_map = qm_region.get_equivalent_map(fm_input.eq_atoms)
|
|
795
|
+
|
|
796
|
+
logging.info(f'Stride: {fm_input.stride}')
|
|
797
|
+
logging.info(f'Equivalent atoms in gmx index: {fm_input.eq_atoms}')
|
|
798
|
+
logging.info(f'Reference charges: {fm_input.reference_charges}')
|
|
799
|
+
logging.info(f'Number of snapshots used for fitting: {len(range(fm_input.stride[0], fm_input.stride[1], fm_input.stride[2]))}')
|
|
800
|
+
|
|
801
|
+
if fm_input.skip_solvent_optimization:
|
|
802
|
+
logging.info('Skipping solvent optimization')
|
|
803
|
+
fixed_charge_indices = set(qm_region.solvent_atom_indices)
|
|
804
|
+
else:
|
|
805
|
+
fixed_charge_indices = None
|
|
806
|
+
|
|
807
|
+
charge_group_constraints = None
|
|
808
|
+
if fm_input.charge_group_constraints:
|
|
809
|
+
logging.info('Creating charge group constraints')
|
|
810
|
+
charge_group_constraints = qm_region.create_charge_group_constraints(group_by='mol')
|
|
811
|
+
for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
|
|
812
|
+
logging.info(f'Charge group {group_key} with {len(atom_indices)} atoms: target charge = {target_charge:.6f}')
|
|
813
|
+
# charge_group_constraints = list(charge_group_constraints.values())
|
|
814
|
+
|
|
815
|
+
check_dresp = False
|
|
816
|
+
# DRESP fitting
|
|
817
|
+
if flag in ['fm', 'dresp']:
|
|
818
|
+
if flag == 'dresp':
|
|
819
|
+
logging.info('Selected only DRESP optimization')
|
|
820
|
+
configurations = get_configurations(fmdata, fm_input.stride[0], fm_input.stride[1], fm_input.stride[2], qm_region)
|
|
821
|
+
ff_charges = qm_region.qm_charges
|
|
822
|
+
if fm_input.qm_total_charge is not None and fm_input.qm_total_charge != qm_region.qm_total_charge:
|
|
823
|
+
logging.warning(f'QM total charge mismatch: {qm_region.qm_total_charge} != {fm_input.qm_total_charge}')
|
|
824
|
+
logging.warning(f'Using user-defined total charge: {fm_input.qm_total_charge}')
|
|
825
|
+
qm_region.qm_total_charge = fm_input.qm_total_charge
|
|
826
|
+
logging.info(f'QM total charge: {qm_region.qm_total_charge}')
|
|
827
|
+
ff_charges = np.array(list(map(float, ff_charges)))
|
|
828
|
+
|
|
829
|
+
if fm_input.reference_charges == 'ff_charges':
|
|
830
|
+
for config, index in zip(configurations, range(fm_input.stride[0], fm_input.stride[1], fm_input.stride[2])):
|
|
831
|
+
config['reference_charge'] = ff_charges
|
|
832
|
+
|
|
833
|
+
# DRESP optimization
|
|
834
|
+
logging.info('Running DRESP optimization')
|
|
835
|
+
wv = fm_input.wv
|
|
836
|
+
we = fm_input.we
|
|
837
|
+
wh = fm_input.wh
|
|
838
|
+
wq = fm_input.wq
|
|
839
|
+
|
|
840
|
+
if args.grid_search:
|
|
841
|
+
# Grid search mode
|
|
842
|
+
wv_list = wv if isinstance(wv, list) else [wv]
|
|
843
|
+
we_list = we if isinstance(we, list) else [we]
|
|
844
|
+
wh_list = wh if isinstance(wh, list) else [wh]
|
|
845
|
+
|
|
846
|
+
best_sdv = float('inf')
|
|
847
|
+
best_params = None
|
|
848
|
+
best_charges = None
|
|
849
|
+
best_sd = None
|
|
850
|
+
grid_results = [] # Store all results for output
|
|
851
|
+
|
|
852
|
+
import itertools
|
|
853
|
+
for wv_val, we_val, wh_val in itertools.product(wv_list, we_list, wh_list):
|
|
854
|
+
logging.info(f"Trying DRESP with wv={wv_val}, we={we_val}, wh={wh_val}")
|
|
855
|
+
charges = opt_dresp(configurations, wv_val, we_val, wh_val, wq,
|
|
856
|
+
qm_region.qm_total_charge, eq_map, args.n_processes,
|
|
857
|
+
fixed_charge_indices, list(charge_group_constraints.values()), fm_input.weights_to_fix_charges)
|
|
858
|
+
charges = qm_region.redistribute_charges_after_dresp(charges, fm_input.num_bonds_away,
|
|
859
|
+
fixed_charge_indices, charge_group_constraints)
|
|
860
|
+
sd = compute_sd(charges, configurations, args.n_processes)
|
|
861
|
+
sdv = sd[0] + sd[1]
|
|
862
|
+
logging.info(f"SDV for wv={wv_val}, we={we_val}, wh={wh_val}: {sdv}")
|
|
863
|
+
grid_results.append({'wv': wv_val, 'we': we_val, 'wh': wh_val, 'sd_potential': sd[0], 'sd_field': sd[1], 'sdv': sdv, 'charges': charges})
|
|
864
|
+
if sdv < best_sdv:
|
|
865
|
+
best_sdv = sdv
|
|
866
|
+
best_params = (wv_val, we_val, wh_val)
|
|
867
|
+
best_charges = charges
|
|
868
|
+
best_sd = sd
|
|
869
|
+
|
|
870
|
+
# Output all grid search results to CSV for plotting
|
|
871
|
+
import csv
|
|
872
|
+
with open('dresp_grid_search_results.csv', 'w', newline='') as csvfile:
|
|
873
|
+
fieldnames = ['wv', 'we', 'wh', 'sd_potential', 'sd_field', 'sdv', 'charges']
|
|
874
|
+
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
875
|
+
writer.writeheader()
|
|
876
|
+
for row in grid_results:
|
|
877
|
+
writer.writerow(row)
|
|
878
|
+
|
|
879
|
+
# Use best parameters and charges for updating and writing topology
|
|
880
|
+
wv, we, wh = best_params
|
|
881
|
+
full_optimize_charges = best_charges
|
|
882
|
+
sd = best_sd
|
|
883
|
+
logging.info(f"Best DRESP parameters: wv={wv}, we={we}, wh={wh} with SDV={best_sdv}")
|
|
884
|
+
else:
|
|
885
|
+
# Single DRESP calculation (no grid search)
|
|
886
|
+
wv_val = wv[0] if isinstance(wv, list) else wv
|
|
887
|
+
we_val = we[0] if isinstance(we, list) else we
|
|
888
|
+
wh_val = wh[0] if isinstance(wh, list) else wh
|
|
889
|
+
charges = opt_dresp(configurations, wv_val, we_val, wh_val, wq,
|
|
890
|
+
qm_region.qm_total_charge, eq_map, args.n_processes,
|
|
891
|
+
fixed_charge_indices, list(charge_group_constraints.values()), fm_input.weights_to_fix_charges)
|
|
892
|
+
charges = qm_region.redistribute_charges_after_dresp(charges, fm_input.num_bonds_away,
|
|
893
|
+
fixed_charge_indices, charge_group_constraints)
|
|
894
|
+
sd = compute_sd(charges, configurations, args.n_processes)
|
|
895
|
+
wv, we, wh = wv_val, we_val, wh_val
|
|
896
|
+
full_optimize_charges = charges
|
|
897
|
+
logging.info(f"Single DRESP run: wv={wv}, we={we}, wh={wh}")
|
|
898
|
+
|
|
899
|
+
logging.info(f'DRESP optimized charges:')
|
|
900
|
+
for i, charge in enumerate(full_optimize_charges):
|
|
901
|
+
logging.info(f'{charge:10.6f}')
|
|
902
|
+
logging.info(f'# Sum of the charges: {np.sum(full_optimize_charges):10.6f}')
|
|
903
|
+
logging.info(f'# Standard deviations of potential: {sd[0]}')
|
|
904
|
+
logging.info(f'# Standard deviations of electric field: {sd[1]}')
|
|
905
|
+
|
|
906
|
+
qm_region.update_qm_charges(full_optimize_charges)
|
|
907
|
+
|
|
908
|
+
logging.info(f'DRESP optimization completed.')
|
|
909
|
+
|
|
910
|
+
if flag == 'dresp':
|
|
911
|
+
try:
|
|
912
|
+
logging.info(f'Writing DRESP topology.')
|
|
913
|
+
qm_region.write_topology(prefix='resp_')
|
|
914
|
+
logging.info(f'MiMiCPy Force matching workflow completed {datetime.now()}.')
|
|
915
|
+
logging.info(f'wall time: {datetime.now() - start_time}')
|
|
916
|
+
except Exception as e:
|
|
917
|
+
logging.error(f'Failed to write DRESP topology: {str(e)}')
|
|
918
|
+
return
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
logging.info(f'Starting Non-bonded force calculation.\n')
|
|
922
|
+
# Write non-bonded ITP file with prefixed name
|
|
923
|
+
try:
|
|
924
|
+
logging.info(f'Writing non-bonded topology.')
|
|
925
|
+
qm_region.write_non_bonded_itp(prefix='non_bonded_')
|
|
926
|
+
except Exception as e:
|
|
927
|
+
logging.error(f'Failed to write non-bonded topology: {str(e)}')
|
|
928
|
+
|
|
929
|
+
if not check_dresp:
|
|
930
|
+
logging.info(f'Starting Non-bonded force calculation.\n')
|
|
931
|
+
try:
|
|
932
|
+
logging.info(f'Writing non-bonded topology.')
|
|
933
|
+
qm_region.write_non_bonded_itp(prefix='non_bonded_')
|
|
934
|
+
logging.info(f'Non-bonded topology written.')
|
|
935
|
+
except Exception as e:
|
|
936
|
+
logging.error(f'Failed to write non-bonded topology: {str(e)}')
|
|
937
|
+
|
|
938
|
+
# Take the topology and prefix the name rerun
|
|
939
|
+
rerun_top_file = Path(args.top).with_name('rerun_' + Path(args.top).name)
|
|
940
|
+
shutil.copy(args.top, rerun_top_file)
|
|
941
|
+
|
|
942
|
+
# Get the list of written .itp files from QMRegion
|
|
943
|
+
written_itp_files = qm_region.written_files
|
|
944
|
+
|
|
945
|
+
# Read the topology file content
|
|
946
|
+
with open(rerun_top_file, 'r') as f:
|
|
947
|
+
content = f.read()
|
|
948
|
+
|
|
949
|
+
# Replace each .itp file reference with its non-bonded version
|
|
950
|
+
for itp_file in written_itp_files:
|
|
951
|
+
# Get the original name by removing the 'non_bonded_' prefix
|
|
952
|
+
non_bonded_name = Path(itp_file).name
|
|
953
|
+
original_name = non_bonded_name.replace('non_bonded_', '')
|
|
954
|
+
# Replace both quoted and unquoted references
|
|
955
|
+
content = content.replace(f'#include "{original_name}"', f'#include "{non_bonded_name}"')
|
|
956
|
+
content = content.replace(f"#include '{original_name}'", f"#include '{non_bonded_name}'")
|
|
957
|
+
content = content.replace(f"#include {original_name}", f"#include {non_bonded_name}")
|
|
958
|
+
|
|
959
|
+
# Write the updated content back to the rerun topology file
|
|
960
|
+
with open(rerun_top_file, 'w') as f:
|
|
961
|
+
f.write(content)
|
|
962
|
+
logging.info(f'Non-bonded topology written.')
|
|
963
|
+
|
|
964
|
+
|
|
965
|
+
tpr_file = 'rerun_nonb.tpr'
|
|
966
|
+
trr_file = 'rerun_nonb.trr'
|
|
967
|
+
|
|
968
|
+
logging.info(f'Running Non-bonded force calculation.\n')
|
|
969
|
+
# Generate tpr file
|
|
970
|
+
try:
|
|
971
|
+
cmd = f"{args.gmx} grompp -f {args.mdp} -c {args.coords} -p {rerun_top_file} -n {args.ndx} -o {tpr_file} -maxwarn 1 -quiet"
|
|
972
|
+
logging.info(f"Running command: {cmd}")
|
|
973
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
974
|
+
|
|
975
|
+
# Rerun the GROMACS trajectory
|
|
976
|
+
nthreads = environ.get('OMP_NUM_THREADS', 1)
|
|
977
|
+
|
|
978
|
+
cmd = f"{args.gmx} mdrun -s {tpr_file} -rerun {args.trr} -o {trr_file} -e rerun_nonb.edr -g rerun_nonb.log -ntomp {nthreads} -quiet"
|
|
979
|
+
logging.info(f"Running command: {cmd}")
|
|
980
|
+
subprocess.run(cmd, shell=True, check=True)
|
|
981
|
+
|
|
982
|
+
except subprocess.CalledProcessError as e:
|
|
983
|
+
print(f"Error running GROMACS command: {e}")
|
|
984
|
+
sys.exit(1)
|
|
985
|
+
|
|
986
|
+
logging.info(f'Non-bonded force calculation Completed.\n')
|
|
987
|
+
logging.info(f'Running force field parameter optimization.')
|
|
988
|
+
|
|
989
|
+
ff_optimize, bond2params, regularization = get_optimize_ff_parameters(
|
|
990
|
+
qm_region,
|
|
991
|
+
eq_map,
|
|
992
|
+
fm_input
|
|
993
|
+
)
|
|
994
|
+
|
|
995
|
+
configurations = get_configurations_optff(fmdata, tpr_file, trr_file, fm_input.stride[0], fm_input.stride[1],
|
|
996
|
+
fm_input.stride[2], qm_region)
|
|
997
|
+
|
|
998
|
+
# Check if hierarchical optimization is requested
|
|
999
|
+
if hasattr(args, 'compare_methods') and args.compare_methods:
|
|
1000
|
+
logging.info('Comparing all optimization methods...')
|
|
1001
|
+
results = compare_optimization_methods(
|
|
1002
|
+
qm_region, ff_optimize, configurations, bond2params,
|
|
1003
|
+
'optimization_comparison', regularization, fm_input, args.n_processes
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
# Use the best method based on final cost
|
|
1007
|
+
best_method = None
|
|
1008
|
+
best_cost = float('inf')
|
|
1009
|
+
for method, result in results.items():
|
|
1010
|
+
if 'error' not in result and result['final_cost'] < best_cost:
|
|
1011
|
+
best_cost = result['final_cost']
|
|
1012
|
+
best_method = method
|
|
1013
|
+
ff_optimize = result['optimized_params']
|
|
1014
|
+
|
|
1015
|
+
if best_method:
|
|
1016
|
+
logging.info(f'Best method: {best_method} with cost: {best_cost:.6f}')
|
|
1017
|
+
else:
|
|
1018
|
+
logging.error('All optimization methods failed')
|
|
1019
|
+
sys.exit(1)
|
|
1020
|
+
|
|
1021
|
+
else:
|
|
1022
|
+
# Use the optimization method specified in fm_input
|
|
1023
|
+
logging.info(f'Using optimization method: {getattr(fm_input, "optimization_method", "hierarchical")}')
|
|
1024
|
+
|
|
1025
|
+
# Pass n_processes to the optimization function
|
|
1026
|
+
logging.info(f'Using {args.n_processes} processes for parallel computation')
|
|
1027
|
+
|
|
1028
|
+
# Optimize force field parameters using unified interface
|
|
1029
|
+
ff_optimize, res = unified_optimization_ff(
|
|
1030
|
+
qm_region, ff_optimize, configurations, bond2params,
|
|
1031
|
+
'optimization_results.txt', regularization, fm_input, args.n_processes
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
if res:
|
|
1035
|
+
logging.info(f'Output from optimization: ')
|
|
1036
|
+
logging.info(f' status: {getattr(res, "status", "N/A")} ')
|
|
1037
|
+
logging.info(f' message: {getattr(res, "message", "N/A")} ')
|
|
1038
|
+
logging.info(f' success: {getattr(res, "success", True)} ')
|
|
1039
|
+
else:
|
|
1040
|
+
logging.info('Optimization completed successfully')
|
|
1041
|
+
|
|
1042
|
+
logging.info(f'Force field parameter optimization completed.')
|
|
1043
|
+
|
|
1044
|
+
logging.info('Writing optimized force parameters to file.')
|
|
1045
|
+
qm_region.update_topology(ff_optimize, bond2params)
|
|
1046
|
+
qm_region.write_topology(prefix='opt_')
|
|
1047
|
+
logging.info(f'MiMiCPy Force matching workflow completed {datetime.now()}.')
|
|
1048
|
+
logging.info(f'wall time: {datetime.now() - start_time}')
|
|
1049
|
+
|
|
487
1050
|
def main():
|
|
488
1051
|
print('\n \t ***** MiMiCPy ***** ')
|
|
489
1052
|
print('\n \t Running version {}\n\tFor more information type mimicpy [subcommand] --help \n'.format(mimicpy.__version__))
|
|
@@ -620,6 +1183,7 @@ def main():
|
|
|
620
1183
|
cpmd2coords_others = parser_cpmd2coords.add_argument_group('other options')
|
|
621
1184
|
cpmd2coords_others.add_argument('-guess',
|
|
622
1185
|
required=False,
|
|
1186
|
+
default=True,
|
|
623
1187
|
type=__str2bool,
|
|
624
1188
|
help='toggle guessing atomic elements',
|
|
625
1189
|
metavar='(True)')
|
|
@@ -668,7 +1232,7 @@ def main():
|
|
|
668
1232
|
metavar='')
|
|
669
1233
|
fixtop_others.add_argument('-cls',
|
|
670
1234
|
required=False,
|
|
671
|
-
default=
|
|
1235
|
+
default=None,
|
|
672
1236
|
type=__str2bool,
|
|
673
1237
|
help='toggle clear [ atomtypes ] sections from files',
|
|
674
1238
|
metavar='(False)')
|
|
@@ -753,6 +1317,166 @@ def main():
|
|
|
753
1317
|
metavar='[.gro/.pdb] (GEOMETRY.gro)')
|
|
754
1318
|
parser_geom2coords.set_defaults(func=geom2coords)
|
|
755
1319
|
##
|
|
1320
|
+
#####
|
|
1321
|
+
parser_json2h5 = subparsers.add_parser('json2h5',
|
|
1322
|
+
help='convert large JSON Force Matching data files to HDF5 format')
|
|
1323
|
+
json2h5_input = parser_json2h5.add_argument_group('options to specify input files')
|
|
1324
|
+
json2h5_input.add_argument('-i',
|
|
1325
|
+
required=True,
|
|
1326
|
+
help='input JSON file containing Force Matching data',
|
|
1327
|
+
metavar='[.json]')
|
|
1328
|
+
json2h5_output = parser_json2h5.add_argument_group('options to specify output files')
|
|
1329
|
+
json2h5_output.add_argument('-o',
|
|
1330
|
+
required=False,
|
|
1331
|
+
help='output HDF5 file (default: input file with .h5 extension)',
|
|
1332
|
+
metavar='[.h5]')
|
|
1333
|
+
parser_json2h5.set_defaults(func=json2h5)
|
|
1334
|
+
##
|
|
1335
|
+
#####
|
|
1336
|
+
parser_h5info = subparsers.add_parser('h5info',
|
|
1337
|
+
help='display information about HDF5 Force Matching data files')
|
|
1338
|
+
h5info_input = parser_h5info.add_argument_group('options to specify input files')
|
|
1339
|
+
h5info_input.add_argument('-i',
|
|
1340
|
+
required=True,
|
|
1341
|
+
help='input HDF5 file containing Force Matching data',
|
|
1342
|
+
metavar='[.h5]')
|
|
1343
|
+
parser_h5info.set_defaults(func=h5info)
|
|
1344
|
+
##
|
|
1345
|
+
#####
|
|
1346
|
+
# Add compare-top command
|
|
1347
|
+
parser_compare = subparsers.add_parser('compare-top',
|
|
1348
|
+
help='compare QM parameters between two topologies')
|
|
1349
|
+
compare_input = parser_compare.add_argument_group('options to specify inputs')
|
|
1350
|
+
compare_input.add_argument('-top1',
|
|
1351
|
+
required=True,
|
|
1352
|
+
help='First topology file',
|
|
1353
|
+
metavar='[.top]')
|
|
1354
|
+
compare_input.add_argument('-top2',
|
|
1355
|
+
required=True,
|
|
1356
|
+
help='Second topology file',
|
|
1357
|
+
metavar='[.top]')
|
|
1358
|
+
compare_input.add_argument('-top3',
|
|
1359
|
+
required=False,
|
|
1360
|
+
help='Third topology file',
|
|
1361
|
+
metavar='[.top]')
|
|
1362
|
+
compare_input.add_argument('-coords',
|
|
1363
|
+
required=True,
|
|
1364
|
+
help='Coordinate file (gro/pdb)',
|
|
1365
|
+
metavar='[.gro/.pdb]')
|
|
1366
|
+
compare_input.add_argument('-sele',
|
|
1367
|
+
required=True,
|
|
1368
|
+
help='qm selection',
|
|
1369
|
+
metavar='[.txt/.dat]')
|
|
1370
|
+
parser_compare.set_defaults(func=compare_top)
|
|
1371
|
+
##
|
|
1372
|
+
#####
|
|
1373
|
+
parser_qminfo = subparsers.add_parser('qminfo',
|
|
1374
|
+
help='print information about QM atoms in the QM region')
|
|
1375
|
+
qminfo_input = parser_qminfo.add_argument_group('options to specify input files')
|
|
1376
|
+
qminfo_input.add_argument('-top',
|
|
1377
|
+
required=True,
|
|
1378
|
+
help='GROMACS topology file',
|
|
1379
|
+
metavar='[.top]')
|
|
1380
|
+
qminfo_input.add_argument('-coords',
|
|
1381
|
+
required=True,
|
|
1382
|
+
help='GROMACS coordinate file',
|
|
1383
|
+
metavar='[.gro/.pdb]')
|
|
1384
|
+
qminfo_input.add_argument('-sele',
|
|
1385
|
+
required=True,
|
|
1386
|
+
help='QM selection file',
|
|
1387
|
+
metavar='[.txt/.dat]')
|
|
1388
|
+
qminfo_output = parser_qminfo.add_argument_group('options to specify output files')
|
|
1389
|
+
qminfo_output.add_argument('-out',
|
|
1390
|
+
required=False,
|
|
1391
|
+
help='output file to write QM information (default: print to console)',
|
|
1392
|
+
metavar='[.txt]')
|
|
1393
|
+
qminfo_others = parser_qminfo.add_argument_group('other options')
|
|
1394
|
+
qminfo_others.add_argument('-ff',
|
|
1395
|
+
required=False,
|
|
1396
|
+
help='path to force field data directory',
|
|
1397
|
+
metavar='')
|
|
1398
|
+
qminfo_others.add_argument('-guess',
|
|
1399
|
+
required=False,
|
|
1400
|
+
default=True,
|
|
1401
|
+
type=__str2bool,
|
|
1402
|
+
help='toggle guessing atomic elements',
|
|
1403
|
+
metavar='(True)')
|
|
1404
|
+
qminfo_others.add_argument('-nsa',
|
|
1405
|
+
required=False,
|
|
1406
|
+
help='file containing non-standard atomtypes in 2-column format',
|
|
1407
|
+
metavar='[.txt/.dat]')
|
|
1408
|
+
qminfo_others.add_argument('-buf',
|
|
1409
|
+
required=False,
|
|
1410
|
+
default=1000,
|
|
1411
|
+
type=__str2int,
|
|
1412
|
+
help='buffer size for reading input topology',
|
|
1413
|
+
metavar='(1000)')
|
|
1414
|
+
parser_qminfo.set_defaults(func=qminfo)
|
|
1415
|
+
##
|
|
1416
|
+
#####
|
|
1417
|
+
parser_fm = subparsers.add_parser('fm',
|
|
1418
|
+
help='run force matching workflow')
|
|
1419
|
+
fm_input = parser_fm.add_argument_group('options to specify input files')
|
|
1420
|
+
fm_input.add_argument('-top',
|
|
1421
|
+
required=True,
|
|
1422
|
+
help='GROMACS topology file',
|
|
1423
|
+
metavar='[.top]')
|
|
1424
|
+
fm_input.add_argument('-sele',
|
|
1425
|
+
required=True,
|
|
1426
|
+
help='qm selection',
|
|
1427
|
+
metavar='[.txt/.dat]')
|
|
1428
|
+
fm_input.add_argument('-fmdata',
|
|
1429
|
+
required=True,
|
|
1430
|
+
help='Force matching data file',
|
|
1431
|
+
metavar='[.json]')
|
|
1432
|
+
fm_input.add_argument('-fi',
|
|
1433
|
+
required=True,
|
|
1434
|
+
help='Force matching input file',
|
|
1435
|
+
metavar='[.inp]')
|
|
1436
|
+
fm_input.add_argument('-trr',
|
|
1437
|
+
required=True,
|
|
1438
|
+
help='GROMACS trajectory file',
|
|
1439
|
+
metavar='[.trr]')
|
|
1440
|
+
fm_input.add_argument('-mdp',
|
|
1441
|
+
required=False,
|
|
1442
|
+
help='GROMACS MDP file',
|
|
1443
|
+
metavar='[.mdp]')
|
|
1444
|
+
fm_input.add_argument('-ndx',
|
|
1445
|
+
required=True,
|
|
1446
|
+
help='GROMACS index file',
|
|
1447
|
+
metavar='[.ndx]')
|
|
1448
|
+
fm_input.add_argument('-coords',
|
|
1449
|
+
required=True,
|
|
1450
|
+
help='GROMACS coordinate file',
|
|
1451
|
+
metavar='[.gro/.pdb]')
|
|
1452
|
+
fm_others = parser_fm.add_argument_group('other options')
|
|
1453
|
+
fm_others.add_argument('-ff',
|
|
1454
|
+
required=False,
|
|
1455
|
+
help='path to force field data directory',
|
|
1456
|
+
metavar='')
|
|
1457
|
+
fm_others.add_argument('-gmx',
|
|
1458
|
+
default='gmx',
|
|
1459
|
+
help='GROMACS executable',
|
|
1460
|
+
metavar='(gmx)')
|
|
1461
|
+
fm_others.add_argument('-dresp',
|
|
1462
|
+
action='store_true',
|
|
1463
|
+
help='run only DRESP optimization')
|
|
1464
|
+
fm_others.add_argument('-opt_ff',
|
|
1465
|
+
action='store_true',
|
|
1466
|
+
help='run only force field optimization')
|
|
1467
|
+
fm_others.add_argument('-grid_search',
|
|
1468
|
+
action='store_true',
|
|
1469
|
+
help='perform grid search over wv, we, wh and output CSV')
|
|
1470
|
+
fm_others.add_argument('-compare_methods',
|
|
1471
|
+
action='store_true',
|
|
1472
|
+
help='compare all optimization methods and use the best one')
|
|
1473
|
+
fm_others.add_argument('-n_processes',
|
|
1474
|
+
type=int,
|
|
1475
|
+
default=None,
|
|
1476
|
+
help='number of processes for parallel force computation (default: auto-detect)')
|
|
1477
|
+
parser_fm.set_defaults(func=fm)
|
|
1478
|
+
##
|
|
1479
|
+
|
|
756
1480
|
args = parser.parse_args()
|
|
757
1481
|
if vars(args) == {}:
|
|
758
1482
|
sys.exit()
|