mimicpy 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mimicpy/__main__.py CHANGED
@@ -4,6 +4,7 @@
4
4
  # MiMiCPy: Python Based Tools for MiMiC
5
5
  # Copyright (C) 2020-2023 Bharath Raghavan,
6
6
  # Florian Schackert
7
+ # Sachin Shivakumar
7
8
  #
8
9
  # This file is part of MiMiCPy.
9
10
  #
@@ -31,6 +32,29 @@ import itertools
31
32
  import threading
32
33
  import pandas as pd
33
34
  import mimicpy
35
+ from mimicpy.force_matching import (
36
+ get_configurations,
37
+ get_optimize_ff_parameters,
38
+ get_configurations_optff,
39
+ opt_dresp,
40
+ compute_sd,
41
+ QMRegion
42
+ )
43
+ from mimicpy.scripts.fmdata import FMDataset
44
+ from mimicpy.scripts.fm_input import FMInput
45
+ from mimicpy.force_matching.opt_ff import (
46
+ unified_optimization_ff,
47
+ compare_optimization_methods
48
+ )
49
+ from mimicpy.force_matching.compare_top import compare_qm_parameters
50
+ from pathlib import Path
51
+ import logging
52
+ from datetime import datetime
53
+ import subprocess
54
+ import shutil
55
+ from os import environ
56
+ import copy
57
+ import numpy as np
34
58
 
35
59
  warnings.simplefilter(action='ignore', category=FutureWarning) # Supress pandas warnings
36
60
 
@@ -460,6 +484,126 @@ def geom2coords(args):
460
484
 
461
485
  loader.close()
462
486
 
487
+ def json2h5(args):
488
+ """
489
+ Convert large JSON Force Matching data files to HDF5 format using FMDataset functionality.
490
+
491
+ Args:
492
+ args: Command line arguments containing input/output file paths
493
+ """
494
+ from pathlib import Path
495
+ import os
496
+
497
+ input_file = Path(args.i)
498
+ output_file = Path(args.o) if args.o else input_file.with_suffix('.h5')
499
+
500
+ if not input_file.exists():
501
+ print(f'\n\nError: Cannot find file {input_file}! Exiting..\n')
502
+ sys.exit(1)
503
+
504
+ print('')
505
+ loader = Loader('**Converting JSON to HDF5**')
506
+
507
+ try:
508
+ # Use the existing FMDataset functionality to convert JSON to HDF5
509
+ fm_dataset = FMDataset(input_file, output_file)
510
+
511
+ loader.close()
512
+
513
+ except Exception as e:
514
+ loader.close(halt=True)
515
+ print(f'\n\nError during conversion: {e}')
516
+ sys.exit(1)
517
+ # Calculate file sizes for comparison
518
+ input_size = input_file.stat().st_size / (1024 * 1024) # MB
519
+ output_size = output_file.stat().st_size / (1024 * 1024) # MB
520
+ compression_ratio = (1 - output_size / input_size) * 100 if input_size > 0 else 0
521
+
522
+
523
+ print(f'\nConversion completed successfully!')
524
+ print(f'Input file: {input_file} ({input_size:.2f} MB)')
525
+ print(f'Output file: {output_file} ({output_size:.2f} MB)')
526
+ print(f'Compression: {compression_ratio:.1f}% reduction')
527
+ print(f'Configurations: {len(fm_dataset)}')
528
+
529
+ def h5info(args):
530
+ """
531
+ Display information about HDF5 Force Matching data files.
532
+
533
+ Args:
534
+ args: Command line arguments containing input file path
535
+ """
536
+ import h5py
537
+ import numpy as np
538
+ from pathlib import Path
539
+
540
+ input_file = Path(args.i)
541
+
542
+ if not input_file.exists():
543
+ print(f'\n\nError: Cannot find file {input_file}! Exiting..\n')
544
+ sys.exit(1)
545
+
546
+ print('')
547
+ loader = Loader('**Reading HDF5 file**')
548
+
549
+ try:
550
+ # Read the HDF5 file
551
+ with h5py.File(input_file, 'r') as f:
552
+ config_keys = [key for key in f.keys() if key.startswith('config_')]
553
+ config_keys.sort(key=lambda x: int(x.split('_')[1])) # Sort by configuration number
554
+
555
+ if not config_keys:
556
+ print('\n\nError: No configuration data found in HDF5 file! Exiting..\n')
557
+ sys.exit(1)
558
+
559
+ loader.close()
560
+
561
+ # Get file size
562
+ file_size = input_file.stat().st_size / (1024 * 1024) # MB
563
+
564
+ print(f'\nHDF5 File Information:')
565
+ print(f'File: {input_file}')
566
+ print(f'Size: {file_size:.2f} MB')
567
+ print(f'Configurations: {len(config_keys)}')
568
+
569
+ # Show details for first few configurations
570
+ print(f'\nConfiguration Details:')
571
+ for i, config_key in enumerate(config_keys[:min(5, len(config_keys))]):
572
+ config_group = f[config_key]
573
+ atoms_data = config_group['atoms']
574
+ n_atoms = len(atoms_data)
575
+
576
+ # Get some statistics
577
+ regions = atoms_data['region'][:]
578
+ qm_atoms = np.sum(regions == 1)
579
+ mm_atoms = np.sum(regions != 1)
580
+
581
+ print(f' {config_key}: {n_atoms} atoms ({qm_atoms} QM, {mm_atoms} MM)')
582
+
583
+ if len(config_keys) > 5:
584
+ print(f' ... and {len(config_keys) - 5} more configurations')
585
+
586
+ # Show dataset structure
587
+ print(f'\nDataset Structure:')
588
+ first_config = f[config_keys[0]]
589
+ atoms_data = first_config['atoms']
590
+
591
+ print(f' Atom data fields:')
592
+ for field in atoms_data.dtype.names:
593
+ field_dtype = atoms_data.dtype[field]
594
+ print(f' - {field}: {field_dtype}')
595
+
596
+ # Show compression info if available
597
+ if hasattr(atoms_data, 'compression') and atoms_data.compression:
598
+ print(f'\nCompression: {atoms_data.compression}')
599
+ else:
600
+ print(f'\nCompression: None')
601
+
602
+ except Exception as e:
603
+ loader.close(halt=True)
604
+ print(f'\n\nError reading HDF5 file: {e}')
605
+ sys.exit(1)
606
+
463
607
  def fixtop(args):
464
608
  nsa_dct = get_nsa_mpt(args, True)
465
609
  print("\n**Reading topology**\n")
@@ -484,6 +628,425 @@ def fixtop(args):
484
628
  sys.exit(1)
485
629
  return top
486
630
 
631
+ def compare_top(args):
632
+ """Command-line interface for comparing QM parameters between topologies"""
633
+ compare_qm_parameters(args.top1, args.top2, args.top3, args.coords, Path(args.sele))
634
+
635
+ def qminfo(args):
636
+ """Print information about QM atoms in the QM region"""
637
+ print('')
638
+ loader = Loader('**Reading topology and coordinates**')
639
+
640
+ try:
641
+ # Initialize QMRegion
642
+ qm_region = QMRegion(args.top, args.coords, gmxdata=args.ff,
643
+ buffer=args.buf, guess_elements=args.guess,
644
+ nonstandard_atomtypes=get_nsa_mpt(args, only_nsa=True) if args.nsa else None)
645
+
646
+ # Setup QM region with selection file and solvent names
647
+ qm_region.setup_qm_region(Path(args.sele))
648
+
649
+ loader.close()
650
+
651
+ # Get output file if specified
652
+ output_file = getattr(args, 'out', None)
653
+
654
+ # Format output
655
+ output_lines = []
656
+ output_lines.append("=" * 80)
657
+ output_lines.append("QM Region Information")
658
+ output_lines.append("=" * 80)
659
+ output_lines.append(f"\nTotal number of QM atoms: {len(qm_region.qm_atoms)}")
660
+ output_lines.append(f"QM total charge: {qm_region.qm_total_charge:.6f}")
661
+
662
+ # Count boundary atoms if present
663
+ if not qm_region.boundary_atoms.empty:
664
+ output_lines.append(f"Number of boundary atoms: {len(qm_region.boundary_atoms)}")
665
+ output_lines.append(f"Number of core QM atoms: {len(qm_region.qm_atoms) - len(qm_region.boundary_atoms)}")
666
+
667
+ # Count by molecule type
668
+ if 'mol' in qm_region.qm_atoms.columns:
669
+ mol_counts = qm_region.qm_atoms.groupby('mol').size()
670
+ output_lines.append(f"Number of molecule types: {len(mol_counts)}")
671
+ output_lines.append("\nAtoms per molecule type:")
672
+ for mol, count in mol_counts.items():
673
+ output_lines.append(f" {mol}: {count} atoms")
674
+
675
+ # Print detailed atom table
676
+ output_lines.append("\n" + "=" * 80)
677
+ output_lines.append("Detailed QM Atom Information")
678
+ output_lines.append("=" * 80)
679
+
680
+ # Create formatted table
681
+ with pd.option_context('display.max_rows', None, 'display.max_columns', None,
682
+ 'display.width', None, 'display.max_colwidth', None):
683
+ # Select columns to display (only include columns that exist)
684
+ display_cols = ['type', 'resid', 'resname', 'name', 'charge', 'mass']
685
+ available_cols = qm_region.qm_atoms.columns.tolist()
686
+ if 'element' in available_cols:
687
+ display_cols.insert(1, 'element')
688
+ if 'mol' in available_cols:
689
+ display_cols.append('mol')
690
+ if 'is_bound' in available_cols:
691
+ display_cols.append('is_bound')
692
+
693
+ # Filter to only include columns that actually exist
694
+ display_cols = [col for col in display_cols if col in available_cols]
695
+
696
+ # Create display dataframe with index as first column
697
+ display_df = qm_region.qm_atoms[display_cols].copy()
698
+ display_df.insert(0, 'atom_id', display_df.index)
699
+
700
+ # Format the table
701
+ table_str = str(display_df)
702
+ output_lines.append("\n" + table_str)
703
+
704
+ # Print boundary atoms separately if present
705
+ if not qm_region.boundary_atoms.empty:
706
+ output_lines.append("\n" + "=" * 80)
707
+ output_lines.append("Boundary Atoms")
708
+ output_lines.append("=" * 80)
709
+ # Filter display_cols to only include columns that exist in boundary_atoms
710
+ boundary_display_cols = [col for col in display_cols if col in qm_region.boundary_atoms.columns]
711
+ boundary_display = qm_region.boundary_atoms[boundary_display_cols].copy()
712
+ boundary_display.insert(0, 'atom_id', boundary_display.index)
713
+ with pd.option_context('display.max_rows', None, 'display.max_columns', None):
714
+ output_lines.append("\n" + str(boundary_display))
715
+
716
+ # Print interactions summary if available
717
+ if hasattr(qm_region, 'qm_interactions') and qm_region.qm_interactions:
718
+ output_lines.append("\n" + "=" * 80)
719
+ output_lines.append("QM Interactions Summary")
720
+ output_lines.append("=" * 80)
721
+ output_lines.append(f"Bonds: {len(qm_region.qm_interactions.get('bonds', []))}")
722
+ output_lines.append(f"Angles: {len(qm_region.qm_interactions.get('angles', []))}")
723
+ output_lines.append(f"Dihedrals: {len(qm_region.qm_interactions.get('dihedrals', []))}")
724
+
725
+ # Write or print output
726
+ output_text = '\n'.join(output_lines)
727
+
728
+ if output_file:
729
+ try:
730
+ with open(output_file, 'w') as f:
731
+ f.write(output_text)
732
+ print(f"\nQM region information written to {output_file}")
733
+ except Exception as e:
734
+ print(f"\nError writing to file {output_file}: {e}")
735
+ print(output_text)
736
+ else:
737
+ print(output_text)
738
+
739
+ except FileNotFoundError as e:
740
+ print(f'\n\nError: Cannot find file {e.filename}! Exiting..\n')
741
+ loader.close(halt=True)
742
+ sys.exit(1)
743
+ except (mimicpy.utils.errors.ParserError, mimicpy.utils.errors.MiMiCPyError, mimicpy.utils.errors.SelectionError) as e:
744
+ print(f'\n\nError: {e}\n')
745
+ loader.close(halt=True)
746
+ sys.exit(1)
747
+ except KeyboardInterrupt:
748
+ print("\nReading halted\n")
749
+ loader.close(halt=True)
750
+ sys.exit(1)
751
+
752
+
753
+ def fm(args):
754
+ """
755
+ Execute the force matching workflow.
756
+
757
+ Args:
758
+ args: Command line arguments containing force matching parameters
759
+ """
760
+ log_file = 'fm.log'
761
+ file_handler = logging.FileHandler(log_file)
762
+ file_handler.setLevel(logging.DEBUG)
763
+ logging.getLogger().addHandler(file_handler)
764
+
765
+ start_time = datetime.now()
766
+ logging.info(f'Running Force matching in MiMiCPy {datetime.now()}')
767
+
768
+ # Read the FM data
769
+ fm_file = Path(args.fmdata)
770
+ if not fm_file.exists():
771
+ raise FileNotFoundError(f'FM file {fm_file} not found.')
772
+
773
+ flag = 'dresp' if args.dresp else 'opt_ff' if args.opt_ff else 'fm'
774
+
775
+
776
+ logging.info(f'Loading FM data from {fm_file}')
777
+ fmdata = FMDataset(fm_file)
778
+ logging.debug('FM data loaded successfully.')
779
+
780
+ # Read the fm input using the new FMInput class
781
+ fm_input = FMInput.from_file(args.fi)
782
+
783
+ # Get solvent names from fm_input
784
+ solvent_names = {'resnames': fm_input.solvent_resnames, 'molecules': fm_input.solvent_molecules}
785
+
786
+ # Initialize QMRegion
787
+ qm_region = QMRegion(args.top, args.coords, gmxdata=args.ff)
788
+ qm_region.setup_qm_region(Path(args.sele), solvent_names)
789
+
790
+ # Parse ITP file and get equivalent mapping
791
+ if fm_input.eq_atoms == 'use_atomtypes':
792
+ eq_map = qm_region.get_equivalent_map(use_atomtypes=True)
793
+ else:
794
+ eq_map = qm_region.get_equivalent_map(fm_input.eq_atoms)
795
+
796
+ logging.info(f'Stride: {fm_input.stride}')
797
+ logging.info(f'Equivalent atoms in gmx index: {fm_input.eq_atoms}')
798
+ logging.info(f'Reference charges: {fm_input.reference_charges}')
799
+ logging.info(f'Number of snapshots used for fitting: {len(range(fm_input.stride[0], fm_input.stride[1], fm_input.stride[2]))}')
800
+
801
+ if fm_input.skip_solvent_optimization:
802
+ logging.info('Skipping solvent optimization')
803
+ fixed_charge_indices = set(qm_region.solvent_atom_indices)
804
+ else:
805
+ fixed_charge_indices = None
806
+
807
+ charge_group_constraints = None
808
+ if fm_input.charge_group_constraints:
809
+ logging.info('Creating charge group constraints')
810
+ charge_group_constraints = qm_region.create_charge_group_constraints(group_by='mol')
811
+ for group_key, (atom_indices, target_charge) in charge_group_constraints.items():
812
+ logging.info(f'Charge group {group_key} with {len(atom_indices)} atoms: target charge = {target_charge:.6f}')
813
+ # charge_group_constraints = list(charge_group_constraints.values())
814
+
815
+ check_dresp = False
816
+ # DRESP fitting
817
+ if flag in ['fm', 'dresp']:
818
+ if flag == 'dresp':
819
+ logging.info('Selected only DRESP optimization')
820
+ configurations = get_configurations(fmdata, fm_input.stride[0], fm_input.stride[1], fm_input.stride[2], qm_region)
821
+ ff_charges = qm_region.qm_charges
822
+ if fm_input.qm_total_charge is not None and fm_input.qm_total_charge != qm_region.qm_total_charge:
823
+ logging.warning(f'QM total charge mismatch: {qm_region.qm_total_charge} != {fm_input.qm_total_charge}')
824
+ logging.warning(f'Using user-defined total charge: {fm_input.qm_total_charge}')
825
+ qm_region.qm_total_charge = fm_input.qm_total_charge
826
+ logging.info(f'QM total charge: {qm_region.qm_total_charge}')
827
+ ff_charges = np.array(list(map(float, ff_charges)))
828
+
829
+ if fm_input.reference_charges == 'ff_charges':
830
+ for config, index in zip(configurations, range(fm_input.stride[0], fm_input.stride[1], fm_input.stride[2])):
831
+ config['reference_charge'] = ff_charges
832
+
833
+ # DRESP optimization
834
+ logging.info('Running DRESP optimization')
835
+ wv = fm_input.wv
836
+ we = fm_input.we
837
+ wh = fm_input.wh
838
+ wq = fm_input.wq
839
+
840
+ if args.grid_search:
841
+ # Grid search mode
842
+ wv_list = wv if isinstance(wv, list) else [wv]
843
+ we_list = we if isinstance(we, list) else [we]
844
+ wh_list = wh if isinstance(wh, list) else [wh]
845
+
846
+ best_sdv = float('inf')
847
+ best_params = None
848
+ best_charges = None
849
+ best_sd = None
850
+ grid_results = [] # Store all results for output
851
+
852
+ import itertools
853
+ for wv_val, we_val, wh_val in itertools.product(wv_list, we_list, wh_list):
854
+ logging.info(f"Trying DRESP with wv={wv_val}, we={we_val}, wh={wh_val}")
855
+ charges = opt_dresp(configurations, wv_val, we_val, wh_val, wq,
856
+ qm_region.qm_total_charge, eq_map, args.n_processes,
857
+ fixed_charge_indices, list(charge_group_constraints.values()), fm_input.weights_to_fix_charges)
858
+ charges = qm_region.redistribute_charges_after_dresp(charges, fm_input.num_bonds_away,
859
+ fixed_charge_indices, charge_group_constraints)
860
+ sd = compute_sd(charges, configurations, args.n_processes)
861
+ sdv = sd[0] + sd[1]
862
+ logging.info(f"SDV for wv={wv_val}, we={we_val}, wh={wh_val}: {sdv}")
863
+ grid_results.append({'wv': wv_val, 'we': we_val, 'wh': wh_val, 'sd_potential': sd[0], 'sd_field': sd[1], 'sdv': sdv, 'charges': charges})
864
+ if sdv < best_sdv:
865
+ best_sdv = sdv
866
+ best_params = (wv_val, we_val, wh_val)
867
+ best_charges = charges
868
+ best_sd = sd
869
+
870
+ # Output all grid search results to CSV for plotting
871
+ import csv
872
+ with open('dresp_grid_search_results.csv', 'w', newline='') as csvfile:
873
+ fieldnames = ['wv', 'we', 'wh', 'sd_potential', 'sd_field', 'sdv', 'charges']
874
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
875
+ writer.writeheader()
876
+ for row in grid_results:
877
+ writer.writerow(row)
878
+
879
+ # Use best parameters and charges for updating and writing topology
880
+ wv, we, wh = best_params
881
+ full_optimize_charges = best_charges
882
+ sd = best_sd
883
+ logging.info(f"Best DRESP parameters: wv={wv}, we={we}, wh={wh} with SDV={best_sdv}")
884
+ else:
885
+ # Single DRESP calculation (no grid search)
886
+ wv_val = wv[0] if isinstance(wv, list) else wv
887
+ we_val = we[0] if isinstance(we, list) else we
888
+ wh_val = wh[0] if isinstance(wh, list) else wh
889
+ charges = opt_dresp(configurations, wv_val, we_val, wh_val, wq,
890
+ qm_region.qm_total_charge, eq_map, args.n_processes,
891
+ fixed_charge_indices, list(charge_group_constraints.values()), fm_input.weights_to_fix_charges)
892
+ charges = qm_region.redistribute_charges_after_dresp(charges, fm_input.num_bonds_away,
893
+ fixed_charge_indices, charge_group_constraints)
894
+ sd = compute_sd(charges, configurations, args.n_processes)
895
+ wv, we, wh = wv_val, we_val, wh_val
896
+ full_optimize_charges = charges
897
+ logging.info(f"Single DRESP run: wv={wv}, we={we}, wh={wh}")
898
+
899
+ logging.info(f'DRESP optimized charges:')
900
+ for i, charge in enumerate(full_optimize_charges):
901
+ logging.info(f'{charge:10.6f}')
902
+ logging.info(f'# Sum of the charges: {np.sum(full_optimize_charges):10.6f}')
903
+ logging.info(f'# Standard deviations of potential: {sd[0]}')
904
+ logging.info(f'# Standard deviations of electric field: {sd[1]}')
905
+
906
+ qm_region.update_qm_charges(full_optimize_charges)
907
+
908
+ logging.info(f'DRESP optimization completed.')
909
+
910
+ if flag == 'dresp':
911
+ try:
912
+ logging.info(f'Writing DRESP topology.')
913
+ qm_region.write_topology(prefix='resp_')
914
+ logging.info(f'MiMiCPy Force matching workflow completed {datetime.now()}.')
915
+ logging.info(f'wall time: {datetime.now() - start_time}')
916
+ except Exception as e:
917
+ logging.error(f'Failed to write DRESP topology: {str(e)}')
918
+ return
919
+
920
+
921
+ logging.info(f'Starting Non-bonded force calculation.\n')
922
+ # Write non-bonded ITP file with prefixed name
923
+ try:
924
+ logging.info(f'Writing non-bonded topology.')
925
+ qm_region.write_non_bonded_itp(prefix='non_bonded_')
926
+ except Exception as e:
927
+ logging.error(f'Failed to write non-bonded topology: {str(e)}')
928
+
929
+ if not check_dresp:
930
+ logging.info(f'Starting Non-bonded force calculation.\n')
931
+ try:
932
+ logging.info(f'Writing non-bonded topology.')
933
+ qm_region.write_non_bonded_itp(prefix='non_bonded_')
934
+ logging.info(f'Non-bonded topology written.')
935
+ except Exception as e:
936
+ logging.error(f'Failed to write non-bonded topology: {str(e)}')
937
+
938
+ # Take the topology and prefix the name rerun
939
+ rerun_top_file = Path(args.top).with_name('rerun_' + Path(args.top).name)
940
+ shutil.copy(args.top, rerun_top_file)
941
+
942
+ # Get the list of written .itp files from QMRegion
943
+ written_itp_files = qm_region.written_files
944
+
945
+ # Read the topology file content
946
+ with open(rerun_top_file, 'r') as f:
947
+ content = f.read()
948
+
949
+ # Replace each .itp file reference with its non-bonded version
950
+ for itp_file in written_itp_files:
951
+ # Get the original name by removing the 'non_bonded_' prefix
952
+ non_bonded_name = Path(itp_file).name
953
+ original_name = non_bonded_name.replace('non_bonded_', '')
954
+ # Replace both quoted and unquoted references
955
+ content = content.replace(f'#include "{original_name}"', f'#include "{non_bonded_name}"')
956
+ content = content.replace(f"#include '{original_name}'", f"#include '{non_bonded_name}'")
957
+ content = content.replace(f"#include {original_name}", f"#include {non_bonded_name}")
958
+
959
+ # Write the updated content back to the rerun topology file
960
+ with open(rerun_top_file, 'w') as f:
961
+ f.write(content)
962
+ logging.info(f'Non-bonded topology written.')
963
+
964
+
965
+ tpr_file = 'rerun_nonb.tpr'
966
+ trr_file = 'rerun_nonb.trr'
967
+
968
+ logging.info(f'Running Non-bonded force calculation.\n')
969
+ # Generate tpr file
970
+ try:
971
+ cmd = f"{args.gmx} grompp -f {args.mdp} -c {args.coords} -p {rerun_top_file} -n {args.ndx} -o {tpr_file} -maxwarn 1 -quiet"
972
+ logging.info(f"Running command: {cmd}")
973
+ subprocess.run(cmd, shell=True, check=True)
974
+
975
+ # Rerun the GROMACS trajectory
976
+ nthreads = environ.get('OMP_NUM_THREADS', 1)
977
+
978
+ cmd = f"{args.gmx} mdrun -s {tpr_file} -rerun {args.trr} -o {trr_file} -e rerun_nonb.edr -g rerun_nonb.log -ntomp {nthreads} -quiet"
979
+ logging.info(f"Running command: {cmd}")
980
+ subprocess.run(cmd, shell=True, check=True)
981
+
982
+ except subprocess.CalledProcessError as e:
983
+ print(f"Error running GROMACS command: {e}")
984
+ sys.exit(1)
985
+
986
+ logging.info(f'Non-bonded force calculation Completed.\n')
987
+ logging.info(f'Running force field parameter optimization.')
988
+
989
+ ff_optimize, bond2params, regularization = get_optimize_ff_parameters(
990
+ qm_region,
991
+ eq_map,
992
+ fm_input
993
+ )
994
+
995
+ configurations = get_configurations_optff(fmdata, tpr_file, trr_file, fm_input.stride[0], fm_input.stride[1],
996
+ fm_input.stride[2], qm_region)
997
+
998
+ # Check if hierarchical optimization is requested
999
+ if hasattr(args, 'compare_methods') and args.compare_methods:
1000
+ logging.info('Comparing all optimization methods...')
1001
+ results = compare_optimization_methods(
1002
+ qm_region, ff_optimize, configurations, bond2params,
1003
+ 'optimization_comparison', regularization, fm_input, args.n_processes
1004
+ )
1005
+
1006
+ # Use the best method based on final cost
1007
+ best_method = None
1008
+ best_cost = float('inf')
1009
+ for method, result in results.items():
1010
+ if 'error' not in result and result['final_cost'] < best_cost:
1011
+ best_cost = result['final_cost']
1012
+ best_method = method
1013
+ ff_optimize = result['optimized_params']
1014
+
1015
+ if best_method:
1016
+ logging.info(f'Best method: {best_method} with cost: {best_cost:.6f}')
1017
+ else:
1018
+ logging.error('All optimization methods failed')
1019
+ sys.exit(1)
1020
+
1021
+ else:
1022
+ # Use the optimization method specified in fm_input
1023
+ logging.info(f'Using optimization method: {getattr(fm_input, "optimization_method", "hierarchical")}')
1024
+
1025
+ # Pass n_processes to the optimization function
1026
+ logging.info(f'Using {args.n_processes} processes for parallel computation')
1027
+
1028
+ # Optimize force field parameters using unified interface
1029
+ ff_optimize, res = unified_optimization_ff(
1030
+ qm_region, ff_optimize, configurations, bond2params,
1031
+ 'optimization_results.txt', regularization, fm_input, args.n_processes
1032
+ )
1033
+
1034
+ if res:
1035
+ logging.info(f'Output from optimization: ')
1036
+ logging.info(f' status: {getattr(res, "status", "N/A")} ')
1037
+ logging.info(f' message: {getattr(res, "message", "N/A")} ')
1038
+ logging.info(f' success: {getattr(res, "success", True)} ')
1039
+ else:
1040
+ logging.info('Optimization completed successfully')
1041
+
1042
+ logging.info(f'Force field parameter optimization completed.')
1043
+
1044
+ logging.info('Writing optimized force parameters to file.')
1045
+ qm_region.update_topology(ff_optimize, bond2params)
1046
+ qm_region.write_topology(prefix='opt_')
1047
+ logging.info(f'MiMiCPy Force matching workflow completed {datetime.now()}.')
1048
+ logging.info(f'wall time: {datetime.now() - start_time}')
1049
+
487
1050
  def main():
488
1051
  print('\n \t ***** MiMiCPy ***** ')
489
1052
  print('\n \t Running version {}\n\tFor more information type mimicpy [subcommand] --help \n'.format(mimicpy.__version__))
@@ -620,6 +1183,7 @@ def main():
620
1183
  cpmd2coords_others = parser_cpmd2coords.add_argument_group('other options')
621
1184
  cpmd2coords_others.add_argument('-guess',
622
1185
  required=False,
1186
+ default=True,
623
1187
  type=__str2bool,
624
1188
  help='toggle guessing atomic elements',
625
1189
  metavar='(True)')
@@ -668,7 +1232,7 @@ def main():
668
1232
  metavar='')
669
1233
  fixtop_others.add_argument('-cls',
670
1234
  required=False,
671
- default=False,
1235
+ default=None,
672
1236
  type=__str2bool,
673
1237
  help='toggle clear [ atomtypes ] sections from files',
674
1238
  metavar='(False)')
@@ -753,6 +1317,166 @@ def main():
753
1317
  metavar='[.gro/.pdb] (GEOMETRY.gro)')
754
1318
  parser_geom2coords.set_defaults(func=geom2coords)
755
1319
  ##
1320
+ #####
1321
+ parser_json2h5 = subparsers.add_parser('json2h5',
1322
+ help='convert large JSON Force Matching data files to HDF5 format')
1323
+ json2h5_input = parser_json2h5.add_argument_group('options to specify input files')
1324
+ json2h5_input.add_argument('-i',
1325
+ required=True,
1326
+ help='input JSON file containing Force Matching data',
1327
+ metavar='[.json]')
1328
+ json2h5_output = parser_json2h5.add_argument_group('options to specify output files')
1329
+ json2h5_output.add_argument('-o',
1330
+ required=False,
1331
+ help='output HDF5 file (default: input file with .h5 extension)',
1332
+ metavar='[.h5]')
1333
+ parser_json2h5.set_defaults(func=json2h5)
1334
+ ##
1335
+ #####
1336
+ parser_h5info = subparsers.add_parser('h5info',
1337
+ help='display information about HDF5 Force Matching data files')
1338
+ h5info_input = parser_h5info.add_argument_group('options to specify input files')
1339
+ h5info_input.add_argument('-i',
1340
+ required=True,
1341
+ help='input HDF5 file containing Force Matching data',
1342
+ metavar='[.h5]')
1343
+ parser_h5info.set_defaults(func=h5info)
1344
+ ##
1345
+ #####
1346
+ # Add compare-top command
1347
+ parser_compare = subparsers.add_parser('compare-top',
1348
+ help='compare QM parameters between two topologies')
1349
+ compare_input = parser_compare.add_argument_group('options to specify inputs')
1350
+ compare_input.add_argument('-top1',
1351
+ required=True,
1352
+ help='First topology file',
1353
+ metavar='[.top]')
1354
+ compare_input.add_argument('-top2',
1355
+ required=True,
1356
+ help='Second topology file',
1357
+ metavar='[.top]')
1358
+ compare_input.add_argument('-top3',
1359
+ required=False,
1360
+ help='Third topology file',
1361
+ metavar='[.top]')
1362
+ compare_input.add_argument('-coords',
1363
+ required=True,
1364
+ help='Coordinate file (gro/pdb)',
1365
+ metavar='[.gro/.pdb]')
1366
+ compare_input.add_argument('-sele',
1367
+ required=True,
1368
+ help='qm selection',
1369
+ metavar='[.txt/.dat]')
1370
+ parser_compare.set_defaults(func=compare_top)
1371
+ ##
1372
+ #####
1373
+ parser_qminfo = subparsers.add_parser('qminfo',
1374
+ help='print information about QM atoms in the QM region')
1375
+ qminfo_input = parser_qminfo.add_argument_group('options to specify input files')
1376
+ qminfo_input.add_argument('-top',
1377
+ required=True,
1378
+ help='GROMACS topology file',
1379
+ metavar='[.top]')
1380
+ qminfo_input.add_argument('-coords',
1381
+ required=True,
1382
+ help='GROMACS coordinate file',
1383
+ metavar='[.gro/.pdb]')
1384
+ qminfo_input.add_argument('-sele',
1385
+ required=True,
1386
+ help='QM selection file',
1387
+ metavar='[.txt/.dat]')
1388
+ qminfo_output = parser_qminfo.add_argument_group('options to specify output files')
1389
+ qminfo_output.add_argument('-out',
1390
+ required=False,
1391
+ help='output file to write QM information (default: print to console)',
1392
+ metavar='[.txt]')
1393
+ qminfo_others = parser_qminfo.add_argument_group('other options')
1394
+ qminfo_others.add_argument('-ff',
1395
+ required=False,
1396
+ help='path to force field data directory',
1397
+ metavar='')
1398
+ qminfo_others.add_argument('-guess',
1399
+ required=False,
1400
+ default=True,
1401
+ type=__str2bool,
1402
+ help='toggle guessing atomic elements',
1403
+ metavar='(True)')
1404
+ qminfo_others.add_argument('-nsa',
1405
+ required=False,
1406
+ help='file containing non-standard atomtypes in 2-column format',
1407
+ metavar='[.txt/.dat]')
1408
+ qminfo_others.add_argument('-buf',
1409
+ required=False,
1410
+ default=1000,
1411
+ type=__str2int,
1412
+ help='buffer size for reading input topology',
1413
+ metavar='(1000)')
1414
+ parser_qminfo.set_defaults(func=qminfo)
1415
+ ##
1416
+ #####
1417
+ parser_fm = subparsers.add_parser('fm',
1418
+ help='run force matching workflow')
1419
+ fm_input = parser_fm.add_argument_group('options to specify input files')
1420
+ fm_input.add_argument('-top',
1421
+ required=True,
1422
+ help='GROMACS topology file',
1423
+ metavar='[.top]')
1424
+ fm_input.add_argument('-sele',
1425
+ required=True,
1426
+ help='qm selection',
1427
+ metavar='[.txt/.dat]')
1428
+ fm_input.add_argument('-fmdata',
1429
+ required=True,
1430
+ help='Force matching data file',
1431
+ metavar='[.json]')
1432
+ fm_input.add_argument('-fi',
1433
+ required=True,
1434
+ help='Force matching input file',
1435
+ metavar='[.inp]')
1436
+ fm_input.add_argument('-trr',
1437
+ required=True,
1438
+ help='GROMACS trajectory file',
1439
+ metavar='[.trr]')
1440
+ fm_input.add_argument('-mdp',
1441
+ required=False,
1442
+ help='GROMACS MDP file',
1443
+ metavar='[.mdp]')
1444
+ fm_input.add_argument('-ndx',
1445
+ required=True,
1446
+ help='GROMACS index file',
1447
+ metavar='[.ndx]')
1448
+ fm_input.add_argument('-coords',
1449
+ required=True,
1450
+ help='GROMACS coordinate file',
1451
+ metavar='[.gro/.pdb]')
1452
+ fm_others = parser_fm.add_argument_group('other options')
1453
+ fm_others.add_argument('-ff',
1454
+ required=False,
1455
+ help='path to force field data directory',
1456
+ metavar='')
1457
+ fm_others.add_argument('-gmx',
1458
+ default='gmx',
1459
+ help='GROMACS executable',
1460
+ metavar='(gmx)')
1461
+ fm_others.add_argument('-dresp',
1462
+ action='store_true',
1463
+ help='run only DRESP optimization')
1464
+ fm_others.add_argument('-opt_ff',
1465
+ action='store_true',
1466
+ help='run only force field optimization')
1467
+ fm_others.add_argument('-grid_search',
1468
+ action='store_true',
1469
+ help='perform grid search over wv, we, wh and output CSV')
1470
+ fm_others.add_argument('-compare_methods',
1471
+ action='store_true',
1472
+ help='compare all optimization methods and use the best one')
1473
+ fm_others.add_argument('-n_processes',
1474
+ type=int,
1475
+ default=None,
1476
+ help='number of processes for parallel force computation (default: auto-detect)')
1477
+ parser_fm.set_defaults(func=fm)
1478
+ ##
1479
+
756
1480
  args = parser.parse_args()
757
1481
  if vars(args) == {}:
758
1482
  sys.exit()