mlmm-toolkit 0.2.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hessian_ff/__init__.py +50 -0
- hessian_ff/analytical_hessian.py +609 -0
- hessian_ff/constants.py +46 -0
- hessian_ff/forcefield.py +339 -0
- hessian_ff/loaders.py +608 -0
- hessian_ff/native/Makefile +8 -0
- hessian_ff/native/__init__.py +28 -0
- hessian_ff/native/analytical_hessian.py +88 -0
- hessian_ff/native/analytical_hessian_ext.cpp +258 -0
- hessian_ff/native/bonded.py +82 -0
- hessian_ff/native/bonded_ext.cpp +640 -0
- hessian_ff/native/loader.py +349 -0
- hessian_ff/native/nonbonded.py +118 -0
- hessian_ff/native/nonbonded_ext.cpp +1150 -0
- hessian_ff/prmtop_parmed.py +23 -0
- hessian_ff/system.py +107 -0
- hessian_ff/terms/__init__.py +14 -0
- hessian_ff/terms/angle.py +73 -0
- hessian_ff/terms/bond.py +44 -0
- hessian_ff/terms/cmap.py +406 -0
- hessian_ff/terms/dihedral.py +141 -0
- hessian_ff/terms/nonbonded.py +209 -0
- hessian_ff/tests/__init__.py +0 -0
- hessian_ff/tests/conftest.py +75 -0
- hessian_ff/tests/data/small/complex.parm7 +1346 -0
- hessian_ff/tests/data/small/complex.pdb +125 -0
- hessian_ff/tests/data/small/complex.rst7 +63 -0
- hessian_ff/tests/test_coords_input.py +44 -0
- hessian_ff/tests/test_energy_force.py +49 -0
- hessian_ff/tests/test_hessian.py +137 -0
- hessian_ff/tests/test_smoke.py +18 -0
- hessian_ff/tests/test_validation.py +40 -0
- hessian_ff/workflows.py +889 -0
- mlmm/__init__.py +36 -0
- mlmm/__main__.py +7 -0
- mlmm/_version.py +34 -0
- mlmm/add_elem_info.py +374 -0
- mlmm/advanced_help.py +91 -0
- mlmm/align_freeze_atoms.py +601 -0
- mlmm/all.py +3535 -0
- mlmm/bond_changes.py +231 -0
- mlmm/bool_compat.py +223 -0
- mlmm/cli.py +574 -0
- mlmm/cli_utils.py +166 -0
- mlmm/default_group.py +337 -0
- mlmm/defaults.py +467 -0
- mlmm/define_layer.py +526 -0
- mlmm/dft.py +1041 -0
- mlmm/energy_diagram.py +253 -0
- mlmm/extract.py +2213 -0
- mlmm/fix_altloc.py +464 -0
- mlmm/freq.py +1406 -0
- mlmm/harmonic_constraints.py +140 -0
- mlmm/hessian_cache.py +44 -0
- mlmm/hessian_calc.py +174 -0
- mlmm/irc.py +638 -0
- mlmm/mlmm_calc.py +2262 -0
- mlmm/mm_parm.py +945 -0
- mlmm/oniom_export.py +1983 -0
- mlmm/oniom_import.py +457 -0
- mlmm/opt.py +1742 -0
- mlmm/path_opt.py +1353 -0
- mlmm/path_search.py +2299 -0
- mlmm/preflight.py +88 -0
- mlmm/py.typed +1 -0
- mlmm/pysis_runner.py +45 -0
- mlmm/scan.py +1047 -0
- mlmm/scan2d.py +1226 -0
- mlmm/scan3d.py +1265 -0
- mlmm/scan_common.py +184 -0
- mlmm/summary_log.py +736 -0
- mlmm/trj2fig.py +448 -0
- mlmm/tsopt.py +2871 -0
- mlmm/utils.py +2309 -0
- mlmm/xtb_embedcharge_correction.py +475 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
- pysisyphus/Geometry.py +1667 -0
- pysisyphus/LICENSE +674 -0
- pysisyphus/TableFormatter.py +63 -0
- pysisyphus/TablePrinter.py +74 -0
- pysisyphus/__init__.py +12 -0
- pysisyphus/calculators/AFIR.py +452 -0
- pysisyphus/calculators/AnaPot.py +20 -0
- pysisyphus/calculators/AnaPot2.py +48 -0
- pysisyphus/calculators/AnaPot3.py +12 -0
- pysisyphus/calculators/AnaPot4.py +20 -0
- pysisyphus/calculators/AnaPotBase.py +337 -0
- pysisyphus/calculators/AnaPotCBM.py +25 -0
- pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
- pysisyphus/calculators/CFOUR.py +250 -0
- pysisyphus/calculators/Calculator.py +844 -0
- pysisyphus/calculators/CerjanMiller.py +24 -0
- pysisyphus/calculators/Composite.py +123 -0
- pysisyphus/calculators/ConicalIntersection.py +171 -0
- pysisyphus/calculators/DFTBp.py +430 -0
- pysisyphus/calculators/DFTD3.py +66 -0
- pysisyphus/calculators/DFTD4.py +84 -0
- pysisyphus/calculators/Dalton.py +61 -0
- pysisyphus/calculators/Dimer.py +681 -0
- pysisyphus/calculators/Dummy.py +20 -0
- pysisyphus/calculators/EGO.py +76 -0
- pysisyphus/calculators/EnergyMin.py +224 -0
- pysisyphus/calculators/ExternalPotential.py +264 -0
- pysisyphus/calculators/FakeASE.py +35 -0
- pysisyphus/calculators/FourWellAnaPot.py +28 -0
- pysisyphus/calculators/FreeEndNEBPot.py +39 -0
- pysisyphus/calculators/Gaussian09.py +18 -0
- pysisyphus/calculators/Gaussian16.py +726 -0
- pysisyphus/calculators/HardSphere.py +159 -0
- pysisyphus/calculators/IDPPCalculator.py +49 -0
- pysisyphus/calculators/IPIClient.py +133 -0
- pysisyphus/calculators/IPIServer.py +234 -0
- pysisyphus/calculators/LEPSBase.py +24 -0
- pysisyphus/calculators/LEPSExpr.py +139 -0
- pysisyphus/calculators/LennardJones.py +80 -0
- pysisyphus/calculators/MOPAC.py +219 -0
- pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
- pysisyphus/calculators/MultiCalc.py +85 -0
- pysisyphus/calculators/NFK.py +45 -0
- pysisyphus/calculators/OBabel.py +87 -0
- pysisyphus/calculators/ONIOMv2.py +1129 -0
- pysisyphus/calculators/ORCA.py +893 -0
- pysisyphus/calculators/ORCA5.py +6 -0
- pysisyphus/calculators/OpenMM.py +88 -0
- pysisyphus/calculators/OpenMolcas.py +281 -0
- pysisyphus/calculators/OverlapCalculator.py +908 -0
- pysisyphus/calculators/Psi4.py +218 -0
- pysisyphus/calculators/PyPsi4.py +37 -0
- pysisyphus/calculators/PySCF.py +341 -0
- pysisyphus/calculators/PyXTB.py +73 -0
- pysisyphus/calculators/QCEngine.py +106 -0
- pysisyphus/calculators/Rastrigin.py +22 -0
- pysisyphus/calculators/Remote.py +76 -0
- pysisyphus/calculators/Rosenbrock.py +15 -0
- pysisyphus/calculators/SocketCalc.py +97 -0
- pysisyphus/calculators/TIP3P.py +111 -0
- pysisyphus/calculators/TransTorque.py +161 -0
- pysisyphus/calculators/Turbomole.py +965 -0
- pysisyphus/calculators/VRIPot.py +37 -0
- pysisyphus/calculators/WFOWrapper.py +333 -0
- pysisyphus/calculators/WFOWrapper2.py +341 -0
- pysisyphus/calculators/XTB.py +418 -0
- pysisyphus/calculators/__init__.py +81 -0
- pysisyphus/calculators/cosmo_data.py +139 -0
- pysisyphus/calculators/parser.py +150 -0
- pysisyphus/color.py +19 -0
- pysisyphus/config.py +133 -0
- pysisyphus/constants.py +65 -0
- pysisyphus/cos/AdaptiveNEB.py +230 -0
- pysisyphus/cos/ChainOfStates.py +725 -0
- pysisyphus/cos/FreeEndNEB.py +25 -0
- pysisyphus/cos/FreezingString.py +103 -0
- pysisyphus/cos/GrowingChainOfStates.py +71 -0
- pysisyphus/cos/GrowingNT.py +309 -0
- pysisyphus/cos/GrowingString.py +508 -0
- pysisyphus/cos/NEB.py +189 -0
- pysisyphus/cos/SimpleZTS.py +64 -0
- pysisyphus/cos/__init__.py +22 -0
- pysisyphus/cos/stiffness.py +199 -0
- pysisyphus/drivers/__init__.py +17 -0
- pysisyphus/drivers/afir.py +855 -0
- pysisyphus/drivers/barriers.py +271 -0
- pysisyphus/drivers/birkholz.py +138 -0
- pysisyphus/drivers/cluster.py +318 -0
- pysisyphus/drivers/diabatization.py +133 -0
- pysisyphus/drivers/merge.py +368 -0
- pysisyphus/drivers/merge_mol2.py +322 -0
- pysisyphus/drivers/opt.py +375 -0
- pysisyphus/drivers/perf.py +91 -0
- pysisyphus/drivers/pka.py +52 -0
- pysisyphus/drivers/precon_pos_rot.py +669 -0
- pysisyphus/drivers/rates.py +480 -0
- pysisyphus/drivers/replace.py +219 -0
- pysisyphus/drivers/scan.py +212 -0
- pysisyphus/drivers/spectrum.py +166 -0
- pysisyphus/drivers/thermo.py +31 -0
- pysisyphus/dynamics/Gaussian.py +103 -0
- pysisyphus/dynamics/__init__.py +20 -0
- pysisyphus/dynamics/colvars.py +136 -0
- pysisyphus/dynamics/driver.py +297 -0
- pysisyphus/dynamics/helpers.py +256 -0
- pysisyphus/dynamics/lincs.py +105 -0
- pysisyphus/dynamics/mdp.py +364 -0
- pysisyphus/dynamics/rattle.py +121 -0
- pysisyphus/dynamics/thermostats.py +128 -0
- pysisyphus/dynamics/wigner.py +266 -0
- pysisyphus/elem_data.py +3473 -0
- pysisyphus/exceptions.py +2 -0
- pysisyphus/filtertrj.py +69 -0
- pysisyphus/helpers.py +623 -0
- pysisyphus/helpers_pure.py +649 -0
- pysisyphus/init_logging.py +50 -0
- pysisyphus/intcoords/Bend.py +69 -0
- pysisyphus/intcoords/Bend2.py +25 -0
- pysisyphus/intcoords/BondedFragment.py +32 -0
- pysisyphus/intcoords/Cartesian.py +41 -0
- pysisyphus/intcoords/CartesianCoords.py +140 -0
- pysisyphus/intcoords/Coords.py +56 -0
- pysisyphus/intcoords/DLC.py +197 -0
- pysisyphus/intcoords/DistanceFunction.py +34 -0
- pysisyphus/intcoords/DummyImproper.py +70 -0
- pysisyphus/intcoords/DummyTorsion.py +72 -0
- pysisyphus/intcoords/LinearBend.py +105 -0
- pysisyphus/intcoords/LinearDisplacement.py +80 -0
- pysisyphus/intcoords/OutOfPlane.py +59 -0
- pysisyphus/intcoords/PrimTypes.py +286 -0
- pysisyphus/intcoords/Primitive.py +137 -0
- pysisyphus/intcoords/RedundantCoords.py +659 -0
- pysisyphus/intcoords/RobustTorsion.py +59 -0
- pysisyphus/intcoords/Rotation.py +147 -0
- pysisyphus/intcoords/Stretch.py +31 -0
- pysisyphus/intcoords/Torsion.py +101 -0
- pysisyphus/intcoords/Torsion2.py +25 -0
- pysisyphus/intcoords/Translation.py +45 -0
- pysisyphus/intcoords/__init__.py +61 -0
- pysisyphus/intcoords/augment_bonds.py +126 -0
- pysisyphus/intcoords/derivatives.py +10512 -0
- pysisyphus/intcoords/eval.py +80 -0
- pysisyphus/intcoords/exceptions.py +37 -0
- pysisyphus/intcoords/findiffs.py +48 -0
- pysisyphus/intcoords/generate_derivatives.py +414 -0
- pysisyphus/intcoords/helpers.py +235 -0
- pysisyphus/intcoords/logging_conf.py +10 -0
- pysisyphus/intcoords/mp_derivatives.py +10836 -0
- pysisyphus/intcoords/setup.py +962 -0
- pysisyphus/intcoords/setup_fast.py +176 -0
- pysisyphus/intcoords/update.py +272 -0
- pysisyphus/intcoords/valid.py +89 -0
- pysisyphus/interpolate/Geodesic.py +93 -0
- pysisyphus/interpolate/IDPP.py +55 -0
- pysisyphus/interpolate/Interpolator.py +116 -0
- pysisyphus/interpolate/LST.py +70 -0
- pysisyphus/interpolate/Redund.py +152 -0
- pysisyphus/interpolate/__init__.py +9 -0
- pysisyphus/interpolate/helpers.py +34 -0
- pysisyphus/io/__init__.py +22 -0
- pysisyphus/io/aomix.py +178 -0
- pysisyphus/io/cjson.py +24 -0
- pysisyphus/io/crd.py +101 -0
- pysisyphus/io/cube.py +220 -0
- pysisyphus/io/fchk.py +184 -0
- pysisyphus/io/hdf5.py +49 -0
- pysisyphus/io/hessian.py +72 -0
- pysisyphus/io/mol2.py +146 -0
- pysisyphus/io/molden.py +293 -0
- pysisyphus/io/orca.py +189 -0
- pysisyphus/io/pdb.py +269 -0
- pysisyphus/io/psf.py +79 -0
- pysisyphus/io/pubchem.py +31 -0
- pysisyphus/io/qcschema.py +34 -0
- pysisyphus/io/sdf.py +29 -0
- pysisyphus/io/xyz.py +61 -0
- pysisyphus/io/zmat.py +175 -0
- pysisyphus/irc/DWI.py +108 -0
- pysisyphus/irc/DampedVelocityVerlet.py +134 -0
- pysisyphus/irc/Euler.py +22 -0
- pysisyphus/irc/EulerPC.py +345 -0
- pysisyphus/irc/GonzalezSchlegel.py +187 -0
- pysisyphus/irc/IMKMod.py +164 -0
- pysisyphus/irc/IRC.py +878 -0
- pysisyphus/irc/IRCDummy.py +10 -0
- pysisyphus/irc/Instanton.py +307 -0
- pysisyphus/irc/LQA.py +53 -0
- pysisyphus/irc/ModeKill.py +136 -0
- pysisyphus/irc/ParamPlot.py +53 -0
- pysisyphus/irc/RK4.py +36 -0
- pysisyphus/irc/__init__.py +31 -0
- pysisyphus/irc/initial_displ.py +219 -0
- pysisyphus/linalg.py +411 -0
- pysisyphus/line_searches/Backtracking.py +88 -0
- pysisyphus/line_searches/HagerZhang.py +184 -0
- pysisyphus/line_searches/LineSearch.py +232 -0
- pysisyphus/line_searches/StrongWolfe.py +108 -0
- pysisyphus/line_searches/__init__.py +9 -0
- pysisyphus/line_searches/interpol.py +15 -0
- pysisyphus/modefollow/NormalMode.py +40 -0
- pysisyphus/modefollow/__init__.py +10 -0
- pysisyphus/modefollow/davidson.py +199 -0
- pysisyphus/modefollow/lanczos.py +95 -0
- pysisyphus/optimizers/BFGS.py +99 -0
- pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
- pysisyphus/optimizers/ConjugateGradient.py +98 -0
- pysisyphus/optimizers/CubicNewton.py +75 -0
- pysisyphus/optimizers/FIRE.py +113 -0
- pysisyphus/optimizers/HessianOptimizer.py +1176 -0
- pysisyphus/optimizers/LBFGS.py +228 -0
- pysisyphus/optimizers/LayerOpt.py +411 -0
- pysisyphus/optimizers/MicroOptimizer.py +169 -0
- pysisyphus/optimizers/NCOptimizer.py +90 -0
- pysisyphus/optimizers/Optimizer.py +1084 -0
- pysisyphus/optimizers/PreconLBFGS.py +260 -0
- pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
- pysisyphus/optimizers/QuickMin.py +74 -0
- pysisyphus/optimizers/RFOptimizer.py +181 -0
- pysisyphus/optimizers/RSA.py +99 -0
- pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
- pysisyphus/optimizers/SteepestDescent.py +23 -0
- pysisyphus/optimizers/StringOptimizer.py +173 -0
- pysisyphus/optimizers/__init__.py +41 -0
- pysisyphus/optimizers/closures.py +301 -0
- pysisyphus/optimizers/cls_map.py +58 -0
- pysisyphus/optimizers/exceptions.py +6 -0
- pysisyphus/optimizers/gdiis.py +280 -0
- pysisyphus/optimizers/guess_hessians.py +311 -0
- pysisyphus/optimizers/hessian_updates.py +355 -0
- pysisyphus/optimizers/poly_fit.py +285 -0
- pysisyphus/optimizers/precon.py +153 -0
- pysisyphus/optimizers/restrict_step.py +24 -0
- pysisyphus/pack.py +172 -0
- pysisyphus/peakdetect.py +948 -0
- pysisyphus/plot.py +1031 -0
- pysisyphus/run.py +2106 -0
- pysisyphus/socket_helper.py +74 -0
- pysisyphus/stocastic/FragmentKick.py +132 -0
- pysisyphus/stocastic/Kick.py +81 -0
- pysisyphus/stocastic/Pipeline.py +303 -0
- pysisyphus/stocastic/__init__.py +21 -0
- pysisyphus/stocastic/align.py +127 -0
- pysisyphus/testing.py +96 -0
- pysisyphus/thermo.py +156 -0
- pysisyphus/trj.py +824 -0
- pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
- pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
- pysisyphus/tsoptimizers/TRIM.py +59 -0
- pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
- pysisyphus/tsoptimizers/__init__.py +23 -0
- pysisyphus/wavefunction/Basis.py +239 -0
- pysisyphus/wavefunction/DIIS.py +76 -0
- pysisyphus/wavefunction/__init__.py +25 -0
- pysisyphus/wavefunction/build_ext.py +42 -0
- pysisyphus/wavefunction/cart2sph.py +190 -0
- pysisyphus/wavefunction/diabatization.py +304 -0
- pysisyphus/wavefunction/excited_states.py +435 -0
- pysisyphus/wavefunction/gen_ints.py +1811 -0
- pysisyphus/wavefunction/helpers.py +104 -0
- pysisyphus/wavefunction/ints/__init__.py +0 -0
- pysisyphus/wavefunction/ints/boys.py +193 -0
- pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
- pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
- pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
- pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
- pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
- pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
- pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
- pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
- pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
- pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
- pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
- pysisyphus/wavefunction/localization.py +458 -0
- pysisyphus/wavefunction/multipole.py +159 -0
- pysisyphus/wavefunction/normalization.py +36 -0
- pysisyphus/wavefunction/pop_analysis.py +134 -0
- pysisyphus/wavefunction/shells.py +1171 -0
- pysisyphus/wavefunction/wavefunction.py +504 -0
- pysisyphus/wrapper/__init__.py +11 -0
- pysisyphus/wrapper/exceptions.py +2 -0
- pysisyphus/wrapper/jmol.py +120 -0
- pysisyphus/wrapper/mwfn.py +169 -0
- pysisyphus/wrapper/packmol.py +71 -0
- pysisyphus/xyzloader.py +168 -0
- pysisyphus/yaml_mods.py +45 -0
- thermoanalysis/LICENSE +674 -0
- thermoanalysis/QCData.py +244 -0
- thermoanalysis/__init__.py +0 -0
- thermoanalysis/config.py +3 -0
- thermoanalysis/constants.py +20 -0
- thermoanalysis/thermo.py +1011 -0
mlmm/oniom_export.py
ADDED
|
@@ -0,0 +1,1983 @@
|
|
|
1
|
+
# mlmm/oniom_export.py
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Export ML/MM system to Gaussian/ORCA ONIOM input format from Amber parm7 topology.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
mlmm oniom-export --parm real.parm7 -i pocket.pdb --model-pdb ml.pdb -o out.com
|
|
8
|
+
|
|
9
|
+
For detailed documentation, see: docs/oniom_export.md
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import re
|
|
15
|
+
import shlex
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
|
|
23
|
+
import click
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
from .add_elem_info import guess_element as _guess_element
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
import parmed as pmd
|
|
32
|
+
except ImportError:
|
|
33
|
+
pmd = None
|
|
34
|
+
|
|
35
|
+
_GAUSSIAN_DEFAULT_METHOD = "wB97XD/def2-TZVPD"
|
|
36
|
+
_ORCA_DEFAULT_METHOD = "B3LYP D3BJ def2-SVP"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _check_parmed() -> None:
|
|
40
|
+
"""Check if ParmEd is available."""
|
|
41
|
+
if pmd is None:
|
|
42
|
+
raise ImportError(
|
|
43
|
+
"ParmEd is required for ONIOM export. Install with: pip install parmed"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# -----------------------------------------------
|
|
48
|
+
# Coordinates / element handling
|
|
49
|
+
# -----------------------------------------------
|
|
50
|
+
|
|
51
|
+
# Periodic table symbols (1-indexed; index 0 is dummy)
|
|
52
|
+
_PERIODIC_TABLE: List[str] = [
|
|
53
|
+
"",
|
|
54
|
+
"H",
|
|
55
|
+
"He",
|
|
56
|
+
"Li",
|
|
57
|
+
"Be",
|
|
58
|
+
"B",
|
|
59
|
+
"C",
|
|
60
|
+
"N",
|
|
61
|
+
"O",
|
|
62
|
+
"F",
|
|
63
|
+
"Ne",
|
|
64
|
+
"Na",
|
|
65
|
+
"Mg",
|
|
66
|
+
"Al",
|
|
67
|
+
"Si",
|
|
68
|
+
"P",
|
|
69
|
+
"S",
|
|
70
|
+
"Cl",
|
|
71
|
+
"Ar",
|
|
72
|
+
"K",
|
|
73
|
+
"Ca",
|
|
74
|
+
"Sc",
|
|
75
|
+
"Ti",
|
|
76
|
+
"V",
|
|
77
|
+
"Cr",
|
|
78
|
+
"Mn",
|
|
79
|
+
"Fe",
|
|
80
|
+
"Co",
|
|
81
|
+
"Ni",
|
|
82
|
+
"Cu",
|
|
83
|
+
"Zn",
|
|
84
|
+
"Ga",
|
|
85
|
+
"Ge",
|
|
86
|
+
"As",
|
|
87
|
+
"Se",
|
|
88
|
+
"Br",
|
|
89
|
+
"Kr",
|
|
90
|
+
"Rb",
|
|
91
|
+
"Sr",
|
|
92
|
+
"Y",
|
|
93
|
+
"Zr",
|
|
94
|
+
"Nb",
|
|
95
|
+
"Mo",
|
|
96
|
+
"Tc",
|
|
97
|
+
"Ru",
|
|
98
|
+
"Rh",
|
|
99
|
+
"Pd",
|
|
100
|
+
"Ag",
|
|
101
|
+
"Cd",
|
|
102
|
+
"In",
|
|
103
|
+
"Sn",
|
|
104
|
+
"Sb",
|
|
105
|
+
"Te",
|
|
106
|
+
"I",
|
|
107
|
+
"Xe",
|
|
108
|
+
"Cs",
|
|
109
|
+
"Ba",
|
|
110
|
+
"La",
|
|
111
|
+
"Ce",
|
|
112
|
+
"Pr",
|
|
113
|
+
"Nd",
|
|
114
|
+
"Pm",
|
|
115
|
+
"Sm",
|
|
116
|
+
"Eu",
|
|
117
|
+
"Gd",
|
|
118
|
+
"Tb",
|
|
119
|
+
"Dy",
|
|
120
|
+
"Ho",
|
|
121
|
+
"Er",
|
|
122
|
+
"Tm",
|
|
123
|
+
"Yb",
|
|
124
|
+
"Lu",
|
|
125
|
+
"Hf",
|
|
126
|
+
"Ta",
|
|
127
|
+
"W",
|
|
128
|
+
"Re",
|
|
129
|
+
"Os",
|
|
130
|
+
"Ir",
|
|
131
|
+
"Pt",
|
|
132
|
+
"Au",
|
|
133
|
+
"Hg",
|
|
134
|
+
"Tl",
|
|
135
|
+
"Pb",
|
|
136
|
+
"Bi",
|
|
137
|
+
"Po",
|
|
138
|
+
"At",
|
|
139
|
+
"Rn",
|
|
140
|
+
"Fr",
|
|
141
|
+
"Ra",
|
|
142
|
+
"Ac",
|
|
143
|
+
"Th",
|
|
144
|
+
"Pa",
|
|
145
|
+
"U",
|
|
146
|
+
"Np",
|
|
147
|
+
"Pu",
|
|
148
|
+
"Am",
|
|
149
|
+
"Cm",
|
|
150
|
+
"Bk",
|
|
151
|
+
"Cf",
|
|
152
|
+
"Es",
|
|
153
|
+
"Fm",
|
|
154
|
+
"Md",
|
|
155
|
+
"No",
|
|
156
|
+
"Lr",
|
|
157
|
+
"Rf",
|
|
158
|
+
"Db",
|
|
159
|
+
"Sg",
|
|
160
|
+
"Bh",
|
|
161
|
+
"Hs",
|
|
162
|
+
"Mt",
|
|
163
|
+
"Ds",
|
|
164
|
+
"Rg",
|
|
165
|
+
"Cn",
|
|
166
|
+
"Nh",
|
|
167
|
+
"Fl",
|
|
168
|
+
"Mc",
|
|
169
|
+
"Lv",
|
|
170
|
+
"Ts",
|
|
171
|
+
"Og",
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
_TWO_LETTER_ELEMENT_UPPER: Set[str] = {sym.upper() for sym in _PERIODIC_TABLE if len(sym) == 2}
|
|
175
|
+
|
|
176
|
+
# A small set of common atomic masses for robust element inference when atomic number is missing.
|
|
177
|
+
# (Atomic masses vary slightly by isotope; we only need a reasonable guess.)
|
|
178
|
+
_COMMON_MASS_TABLE: List[Tuple[str, float]] = [
|
|
179
|
+
("H", 1.008),
|
|
180
|
+
("C", 12.011),
|
|
181
|
+
("N", 14.007),
|
|
182
|
+
("O", 15.999),
|
|
183
|
+
("F", 18.998),
|
|
184
|
+
("Na", 22.990),
|
|
185
|
+
("Mg", 24.305),
|
|
186
|
+
("Al", 26.982),
|
|
187
|
+
("Si", 28.085),
|
|
188
|
+
("P", 30.974),
|
|
189
|
+
("S", 32.06),
|
|
190
|
+
("Cl", 35.45),
|
|
191
|
+
("K", 39.098),
|
|
192
|
+
("Ca", 40.078),
|
|
193
|
+
("Mn", 54.938),
|
|
194
|
+
("Fe", 55.845),
|
|
195
|
+
("Co", 58.933),
|
|
196
|
+
("Ni", 58.693),
|
|
197
|
+
("Cu", 63.546),
|
|
198
|
+
("Zn", 65.38),
|
|
199
|
+
("Se", 78.971),
|
|
200
|
+
("Br", 79.904),
|
|
201
|
+
("I", 126.90),
|
|
202
|
+
]
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _normalize_element_symbol(sym: str) -> str:
|
|
206
|
+
"""
|
|
207
|
+
Normalize an element symbol to canonical form (e.g., 'cl'/'CL' -> 'Cl').
|
|
208
|
+
|
|
209
|
+
Returns 'X' if empty / unknown.
|
|
210
|
+
"""
|
|
211
|
+
s = (sym or "").strip()
|
|
212
|
+
if not s:
|
|
213
|
+
return "X"
|
|
214
|
+
# Keep only first 2 characters (typical element symbols)
|
|
215
|
+
s = re.sub(r"[^A-Za-z]", "", s)
|
|
216
|
+
if not s:
|
|
217
|
+
return "X"
|
|
218
|
+
if len(s) == 1:
|
|
219
|
+
return s.upper()
|
|
220
|
+
return s[0].upper() + s[1].lower()
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _infer_element_from_pdb_atom_name(atom_name_field: str) -> str:
|
|
224
|
+
"""
|
|
225
|
+
Best-effort element inference from the 4-char PDB atom-name field (columns 13-16).
|
|
226
|
+
|
|
227
|
+
This uses PDB alignment conventions:
|
|
228
|
+
- one-letter elements are right-justified (often leading space), e.g. " CA " (C-alpha) -> C
|
|
229
|
+
- two-letter elements are left-justified, e.g. "CA " (calcium) -> Ca
|
|
230
|
+
"""
|
|
231
|
+
field = (atom_name_field or "")[:4]
|
|
232
|
+
if len(field) < 2:
|
|
233
|
+
return _normalize_element_symbol(field.strip())
|
|
234
|
+
|
|
235
|
+
# Example: "1H " -> element "H"
|
|
236
|
+
if field[0].isdigit():
|
|
237
|
+
return _normalize_element_symbol(field[1])
|
|
238
|
+
|
|
239
|
+
# Right-justified => one-letter element in column 14
|
|
240
|
+
if field[0] == " ":
|
|
241
|
+
return _normalize_element_symbol(field[1])
|
|
242
|
+
|
|
243
|
+
# Left-justified => likely 2-letter element (or 1-letter + suffix)
|
|
244
|
+
cand2 = field[0:2].strip().upper()
|
|
245
|
+
if cand2 in _TWO_LETTER_ELEMENT_UPPER:
|
|
246
|
+
return _normalize_element_symbol(cand2)
|
|
247
|
+
|
|
248
|
+
return _normalize_element_symbol(field[0])
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def _read_pdb_geometry(pdb_path: Path) -> Tuple[np.ndarray, List[str]]:
|
|
252
|
+
"""Read coordinates and element symbols from a PDB file (ATOM/HETATM records)."""
|
|
253
|
+
coords: List[List[float]] = []
|
|
254
|
+
elements: List[str] = []
|
|
255
|
+
with pdb_path.open("r") as f:
|
|
256
|
+
for line in f:
|
|
257
|
+
if not (line.startswith("ATOM") or line.startswith("HETATM")):
|
|
258
|
+
continue
|
|
259
|
+
try:
|
|
260
|
+
x = float(line[30:38])
|
|
261
|
+
y = float(line[38:46])
|
|
262
|
+
z = float(line[46:54])
|
|
263
|
+
except Exception as e:
|
|
264
|
+
raise ValueError(f"Failed to parse coordinates from PDB line: {line.rstrip()}") from e
|
|
265
|
+
|
|
266
|
+
# Element: prefer columns 77-78; fall back to residue-aware guess_element
|
|
267
|
+
# (which correctly handles HG2->H in protein residues, etc.).
|
|
268
|
+
elem_field = line[76:78].strip()
|
|
269
|
+
if elem_field:
|
|
270
|
+
elem = elem_field
|
|
271
|
+
# Guard against 1-letter misalignment when atom name encodes a 2-letter element (e.g., MG)
|
|
272
|
+
elem_inferred = _infer_element_from_pdb_atom_name(line[12:16])
|
|
273
|
+
if len(elem_field) == 1 and elem_inferred and len(elem_inferred) == 2:
|
|
274
|
+
if elem_field.upper() != elem_inferred[0].upper():
|
|
275
|
+
elem = elem_inferred
|
|
276
|
+
else:
|
|
277
|
+
# No element column — use residue-aware inference (add_elem_info.guess_element)
|
|
278
|
+
atom_name = line[12:16].strip()
|
|
279
|
+
resname = line[17:20].strip()
|
|
280
|
+
is_het = line.startswith("HETATM")
|
|
281
|
+
guessed = _guess_element(atom_name, resname, is_het)
|
|
282
|
+
if guessed:
|
|
283
|
+
elem = guessed
|
|
284
|
+
else:
|
|
285
|
+
elem = _infer_element_from_pdb_atom_name(line[12:16])
|
|
286
|
+
|
|
287
|
+
coords.append([x, y, z])
|
|
288
|
+
elements.append(_normalize_element_symbol(elem))
|
|
289
|
+
|
|
290
|
+
if not coords:
|
|
291
|
+
raise ValueError(f"No ATOM/HETATM records found in PDB: {pdb_path}")
|
|
292
|
+
return np.asarray(coords, dtype=float), elements
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _read_xyz_geometry(xyz_path: Path) -> Tuple[np.ndarray, List[str]]:
|
|
296
|
+
"""Read coordinates and element symbols from a single-frame XYZ file."""
|
|
297
|
+
coords: List[List[float]] = []
|
|
298
|
+
elements: List[str] = []
|
|
299
|
+
with xyz_path.open("r") as f:
|
|
300
|
+
lines = f.readlines()
|
|
301
|
+
|
|
302
|
+
if len(lines) < 3:
|
|
303
|
+
raise ValueError(f"XYZ file is too short: {xyz_path}")
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
n_atoms = int(lines[0].strip())
|
|
307
|
+
except Exception as e:
|
|
308
|
+
raise ValueError(f"First line of XYZ must be an integer atom count: {xyz_path}") from e
|
|
309
|
+
|
|
310
|
+
if len(lines) < 2 + n_atoms:
|
|
311
|
+
raise ValueError(
|
|
312
|
+
f"XYZ file atom count ({n_atoms}) exceeds available lines: {xyz_path}"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
for i in range(n_atoms):
|
|
316
|
+
raw = lines[2 + i].strip()
|
|
317
|
+
if not raw:
|
|
318
|
+
continue
|
|
319
|
+
parts = raw.split()
|
|
320
|
+
if len(parts) < 4:
|
|
321
|
+
raise ValueError(f"Invalid XYZ atom line: '{raw}'")
|
|
322
|
+
elem = _normalize_element_symbol(parts[0])
|
|
323
|
+
try:
|
|
324
|
+
x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
|
|
325
|
+
except Exception as e:
|
|
326
|
+
raise ValueError(f"Invalid XYZ coordinates in line: '{raw}'") from e
|
|
327
|
+
coords.append([x, y, z])
|
|
328
|
+
elements.append(elem)
|
|
329
|
+
|
|
330
|
+
if len(coords) != n_atoms:
|
|
331
|
+
raise ValueError(
|
|
332
|
+
f"XYZ parsing produced {len(coords)} atoms but header says {n_atoms}: {xyz_path}"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
return np.asarray(coords, dtype=float), elements
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def _read_input_geometry(input_path: Path) -> Tuple[np.ndarray, List[str]]:
|
|
339
|
+
"""
|
|
340
|
+
Read coordinates + element list from an input coordinate file.
|
|
341
|
+
|
|
342
|
+
Supported:
|
|
343
|
+
- .pdb
|
|
344
|
+
- .xyz
|
|
345
|
+
"""
|
|
346
|
+
suffix = input_path.suffix.lower()
|
|
347
|
+
if suffix == ".pdb" or suffix == ".ent":
|
|
348
|
+
return _read_pdb_geometry(input_path)
|
|
349
|
+
if suffix == ".xyz":
|
|
350
|
+
return _read_xyz_geometry(input_path)
|
|
351
|
+
raise ValueError(f"Unsupported input coordinate format: {input_path} (expected .pdb or .xyz)")
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def _apply_coordinates_to_parm(parm, coords: np.ndarray) -> None:
|
|
355
|
+
"""Attach coordinates to a ParmEd structure, keeping atom order unchanged."""
|
|
356
|
+
coords = np.asarray(coords, dtype=float)
|
|
357
|
+
if coords.shape != (len(parm.atoms), 3):
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Atom count mismatch: parm7 has {len(parm.atoms)} atoms, "
|
|
360
|
+
f"but coordinate file has {coords.shape[0]} atoms"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Prefer setting structure-level coordinates
|
|
364
|
+
try:
|
|
365
|
+
parm.coordinates = coords
|
|
366
|
+
except Exception:
|
|
367
|
+
logger.debug("Failed to set structure-level coordinates on parm", exc_info=True)
|
|
368
|
+
|
|
369
|
+
# Ensure per-atom cached coordinates are also available
|
|
370
|
+
for i, atom in enumerate(parm.atoms):
|
|
371
|
+
x, y, z = coords[i]
|
|
372
|
+
try:
|
|
373
|
+
atom.xx = float(x)
|
|
374
|
+
atom.xy = float(y)
|
|
375
|
+
atom.xz = float(z)
|
|
376
|
+
except Exception:
|
|
377
|
+
logger.debug("Failed to set per-atom coords on atom %d", i, exc_info=True)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def _infer_element_from_mass(mass: float, tol: float = 1.5) -> str:
|
|
381
|
+
"""
|
|
382
|
+
Infer element from atomic mass using a small common-element table.
|
|
383
|
+
|
|
384
|
+
Returns 'X' if no close match found.
|
|
385
|
+
"""
|
|
386
|
+
try:
|
|
387
|
+
m = float(mass)
|
|
388
|
+
except Exception:
|
|
389
|
+
return "X"
|
|
390
|
+
|
|
391
|
+
best_sym = "X"
|
|
392
|
+
best_diff = 1e9
|
|
393
|
+
for sym, ref_m in _COMMON_MASS_TABLE:
|
|
394
|
+
diff = abs(m - ref_m)
|
|
395
|
+
if diff < best_diff:
|
|
396
|
+
best_diff = diff
|
|
397
|
+
best_sym = sym
|
|
398
|
+
|
|
399
|
+
if best_diff <= tol:
|
|
400
|
+
return _normalize_element_symbol(best_sym)
|
|
401
|
+
return "X"
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _get_parm_element(atom) -> str:
|
|
405
|
+
"""
|
|
406
|
+
Best-effort element symbol for a ParmEd atom.
|
|
407
|
+
|
|
408
|
+
Tries (in order):
|
|
409
|
+
1) atom.element_name
|
|
410
|
+
2) atom.atomic_number -> periodic table
|
|
411
|
+
3) atom.mass -> common mass table
|
|
412
|
+
4) atom.name PDB-style heuristic (very weak fallback)
|
|
413
|
+
"""
|
|
414
|
+
# 1) element_name
|
|
415
|
+
elem = getattr(atom, "element_name", None)
|
|
416
|
+
if elem:
|
|
417
|
+
norm = _normalize_element_symbol(str(elem))
|
|
418
|
+
if norm != "X":
|
|
419
|
+
return norm
|
|
420
|
+
|
|
421
|
+
# 2) atomic_number
|
|
422
|
+
z = getattr(atom, "atomic_number", None)
|
|
423
|
+
if z is not None:
|
|
424
|
+
try:
|
|
425
|
+
zi = int(z)
|
|
426
|
+
if 0 < zi < len(_PERIODIC_TABLE):
|
|
427
|
+
return _PERIODIC_TABLE[zi]
|
|
428
|
+
except Exception:
|
|
429
|
+
logger.debug("Failed to infer element from atomic_number=%s", z, exc_info=True)
|
|
430
|
+
|
|
431
|
+
# 3) mass
|
|
432
|
+
mass = getattr(atom, "mass", None)
|
|
433
|
+
if mass is not None:
|
|
434
|
+
guess = _infer_element_from_mass(mass)
|
|
435
|
+
if guess != "X":
|
|
436
|
+
return guess
|
|
437
|
+
|
|
438
|
+
# 4) fallback: first letter of atom name (can be wrong for metals)
|
|
439
|
+
name = getattr(atom, "name", "")
|
|
440
|
+
if name:
|
|
441
|
+
return _normalize_element_symbol(str(name)[0])
|
|
442
|
+
|
|
443
|
+
return "X"
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def _get_parm_elements(parm) -> List[str]:
|
|
447
|
+
"""Element symbols for each atom in a ParmEd structure."""
|
|
448
|
+
return [_get_parm_element(atom) for atom in parm.atoms]
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _validate_element_order(
|
|
452
|
+
parm,
|
|
453
|
+
input_elements: List[str],
|
|
454
|
+
*,
|
|
455
|
+
strict: bool = True,
|
|
456
|
+
) -> None:
|
|
457
|
+
"""
|
|
458
|
+
Validate that element sequence in the coordinate file matches the parm7 topology.
|
|
459
|
+
|
|
460
|
+
If `strict` is True, raise on the first detected mismatch where both elements are known.
|
|
461
|
+
Unknown elements ('X') are ignored.
|
|
462
|
+
"""
|
|
463
|
+
parm_elements = _get_parm_elements(parm)
|
|
464
|
+
if len(parm_elements) != len(input_elements):
|
|
465
|
+
raise ValueError(
|
|
466
|
+
f"Atom count mismatch: parm7 has {len(parm_elements)} atoms, "
|
|
467
|
+
f"but input has {len(input_elements)} atoms"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
unknown_in = sum(1 for e in input_elements if e == "X")
|
|
471
|
+
unknown_parm = sum(1 for e in parm_elements if e == "X")
|
|
472
|
+
if unknown_in > 0 or unknown_parm > 0:
|
|
473
|
+
click.echo(
|
|
474
|
+
f"[oniom-export] WARNING: element check is partial "
|
|
475
|
+
f"(unknown elements: input={unknown_in}, parm={unknown_parm})"
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
for i, (e_parm, e_in) in enumerate(zip(parm_elements, input_elements)):
|
|
479
|
+
if e_parm == "X" or e_in == "X":
|
|
480
|
+
continue
|
|
481
|
+
if e_parm != e_in:
|
|
482
|
+
msg = (
|
|
483
|
+
f"Element sequence mismatch at atom index {i} (0-based): "
|
|
484
|
+
f"parm7={e_parm}, input={e_in}. "
|
|
485
|
+
f"Atom order likely differs between parm7 and the coordinate file."
|
|
486
|
+
)
|
|
487
|
+
if strict:
|
|
488
|
+
raise ValueError(msg)
|
|
489
|
+
click.echo(f"[oniom-export] WARNING: {msg}")
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _get_total_charge(parm) -> int:
|
|
493
|
+
"""
|
|
494
|
+
Calculate total charge (integer) from atom partial charges.
|
|
495
|
+
|
|
496
|
+
Notes
|
|
497
|
+
-----
|
|
498
|
+
- `atom.charge` is the most reliable source (units of electron charge).
|
|
499
|
+
- Amber prmtop stores CHARGE values scaled by ~18.2223. If we ever fall back
|
|
500
|
+
to parm_data["CHARGE"], we divide by 18.2223.
|
|
501
|
+
"""
|
|
502
|
+
q: float
|
|
503
|
+
try:
|
|
504
|
+
q = float(sum(float(getattr(a, "charge", 0.0)) for a in parm.atoms))
|
|
505
|
+
except Exception:
|
|
506
|
+
# Fallback: try prmtop raw charges (often scaled)
|
|
507
|
+
try:
|
|
508
|
+
q_raw = float(np.sum(parm.parm_data["CHARGE"]))
|
|
509
|
+
q = q_raw / 18.2223
|
|
510
|
+
except Exception:
|
|
511
|
+
q = 0.0
|
|
512
|
+
|
|
513
|
+
q_int = int(round(q))
|
|
514
|
+
if abs(q - q_int) > 1e-3:
|
|
515
|
+
click.echo(
|
|
516
|
+
f"[oniom-export] WARNING: total charge {q:.6f} is not close to an integer; "
|
|
517
|
+
f"rounded to {q_int}"
|
|
518
|
+
)
|
|
519
|
+
return q_int
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _fix_atom_type(atom_type: str) -> str:
|
|
523
|
+
"""
|
|
524
|
+
Fix atom types for Gaussian compatibility.
|
|
525
|
+
|
|
526
|
+
- 2C, 3C -> C2C, C3C (numeric prefix)
|
|
527
|
+
- C*, N* -> C9, N9 (asterisk)
|
|
528
|
+
- lowercase (GAFF2) -> L{uppercase} (e.g., ca -> LCA)
|
|
529
|
+
"""
|
|
530
|
+
atom_type = str(atom_type)
|
|
531
|
+
if atom_type == "2C":
|
|
532
|
+
return "C2C"
|
|
533
|
+
elif atom_type == "3C":
|
|
534
|
+
return "C3C"
|
|
535
|
+
elif atom_type == "C*":
|
|
536
|
+
return "C9"
|
|
537
|
+
elif atom_type == "N*":
|
|
538
|
+
return "N9"
|
|
539
|
+
elif bool(re.match(r"^[a-z]+", atom_type)):
|
|
540
|
+
return f"L{atom_type.upper()}"
|
|
541
|
+
else:
|
|
542
|
+
return atom_type
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def _parse_pdb_atoms_with_meta(pdb_path: Path) -> List[Dict[str, Any]]:
|
|
546
|
+
"""
|
|
547
|
+
Parse ATOM/HETATM records from a PDB file.
|
|
548
|
+
|
|
549
|
+
Returns a list of dictionaries:
|
|
550
|
+
- idx (0-based, sequential in file)
|
|
551
|
+
- atom_name, res_name, chain_id, res_seq, icode
|
|
552
|
+
- coord (np.ndarray shape (3,))
|
|
553
|
+
- element (best-effort)
|
|
554
|
+
- bfactor (float, defaults to 0.0)
|
|
555
|
+
"""
|
|
556
|
+
atoms: List[Dict[str, Any]] = []
|
|
557
|
+
with pdb_path.open("r") as f:
|
|
558
|
+
atom_idx = 0
|
|
559
|
+
for line in f:
|
|
560
|
+
if not line.startswith(("ATOM", "HETATM")):
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
atom_name = line[12:16].strip()
|
|
564
|
+
res_name = line[17:20].strip()
|
|
565
|
+
chain_id = line[21:22].strip()
|
|
566
|
+
res_seq_str = line[22:26].strip()
|
|
567
|
+
icode = line[26:27].strip()
|
|
568
|
+
|
|
569
|
+
try:
|
|
570
|
+
res_seq = int(res_seq_str)
|
|
571
|
+
except Exception:
|
|
572
|
+
res_seq = 0
|
|
573
|
+
|
|
574
|
+
try:
|
|
575
|
+
x = float(line[30:38])
|
|
576
|
+
y = float(line[38:46])
|
|
577
|
+
z = float(line[46:54])
|
|
578
|
+
except Exception:
|
|
579
|
+
x, y, z = 0.0, 0.0, 0.0
|
|
580
|
+
|
|
581
|
+
# B-factor (tempFactor) is columns 61-66 in PDB v3.3 (0-based slice 60:66)
|
|
582
|
+
try:
|
|
583
|
+
bfac = float(line[60:66])
|
|
584
|
+
except Exception:
|
|
585
|
+
bfac = 0.0
|
|
586
|
+
|
|
587
|
+
elem_field = line[76:78].strip() if len(line) >= 78 else ""
|
|
588
|
+
if elem_field:
|
|
589
|
+
elem = elem_field
|
|
590
|
+
else:
|
|
591
|
+
is_het = line.startswith("HETATM")
|
|
592
|
+
guessed = _guess_element(atom_name, res_name, is_het)
|
|
593
|
+
elem = guessed if guessed else _infer_element_from_pdb_atom_name(line[12:16])
|
|
594
|
+
|
|
595
|
+
atoms.append(
|
|
596
|
+
{
|
|
597
|
+
"idx": atom_idx,
|
|
598
|
+
"atom_name": atom_name,
|
|
599
|
+
"res_name": res_name,
|
|
600
|
+
"chain_id": chain_id,
|
|
601
|
+
"res_seq": res_seq,
|
|
602
|
+
"icode": icode,
|
|
603
|
+
"coord": np.array([x, y, z], dtype=float),
|
|
604
|
+
"element": _normalize_element_symbol(elem),
|
|
605
|
+
"bfactor": float(bfac),
|
|
606
|
+
}
|
|
607
|
+
)
|
|
608
|
+
atom_idx += 1
|
|
609
|
+
return atoms
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
def _read_qm_atoms_from_pdb(
|
|
613
|
+
model_pdb: Path,
|
|
614
|
+
*,
|
|
615
|
+
input_pdb: Optional[Path] = None,
|
|
616
|
+
system_coords: Optional[np.ndarray] = None,
|
|
617
|
+
system_elements: Optional[List[str]] = None,
|
|
618
|
+
match_tol: float = 0.2,
|
|
619
|
+
) -> Set[int]:
|
|
620
|
+
"""
|
|
621
|
+
Determine QM-region atom indices (0-based, topology order).
|
|
622
|
+
|
|
623
|
+
This function tries (in order):
|
|
624
|
+
1) Match model_pdb atoms to atoms in `input_pdb` via (atom_name, res_name, res_seq),
|
|
625
|
+
with coordinate-based disambiguation when that ID is not unique.
|
|
626
|
+
2) If `input_pdb` is not available, match by nearest coordinates against `system_coords`
|
|
627
|
+
(optionally requiring element agreement when available).
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
model_pdb
|
|
632
|
+
PDB containing QM-region atoms (typically a subset PDB produced by `define-layer`
|
|
633
|
+
or `build_model_pdb_from_bfactors`).
|
|
634
|
+
input_pdb
|
|
635
|
+
Full-system PDB whose atom order matches the Amber topology (recommended).
|
|
636
|
+
system_coords
|
|
637
|
+
Full-system coordinates (shape (N,3)) in topology order.
|
|
638
|
+
system_elements
|
|
639
|
+
Optional element list (len N). If provided, element mismatches are rejected.
|
|
640
|
+
match_tol
|
|
641
|
+
Maximum allowed distance (Å) for coordinate matching/disambiguation.
|
|
642
|
+
|
|
643
|
+
Returns
|
|
644
|
+
-------
|
|
645
|
+
Set[int]
|
|
646
|
+
QM atom indices in 0-based topology order.
|
|
647
|
+
"""
|
|
648
|
+
qm_indices: Set[int] = set()
|
|
649
|
+
|
|
650
|
+
model_atoms = _parse_pdb_atoms_with_meta(model_pdb)
|
|
651
|
+
if not model_atoms:
|
|
652
|
+
return set()
|
|
653
|
+
|
|
654
|
+
# Path 1: match by PDB identifiers using full-system PDB
|
|
655
|
+
if input_pdb is not None and input_pdb.suffix.lower() in {".pdb", ".ent"} and input_pdb.exists():
|
|
656
|
+
input_atoms = _parse_pdb_atoms_with_meta(input_pdb)
|
|
657
|
+
if not input_atoms:
|
|
658
|
+
raise ValueError(f"Failed to read any atoms from input PDB: {input_pdb}")
|
|
659
|
+
|
|
660
|
+
if system_coords is None:
|
|
661
|
+
# If the caller didn't provide coords, use the PDB coords for disambiguation
|
|
662
|
+
system_coords = np.asarray([a["coord"] for a in input_atoms], dtype=float)
|
|
663
|
+
|
|
664
|
+
# Map from (atom_name,res_name,res_seq) -> list of candidate indices
|
|
665
|
+
key_to_candidates: Dict[Tuple[str, str, int], List[int]] = {}
|
|
666
|
+
for a in input_atoms:
|
|
667
|
+
key = (a["atom_name"], a["res_name"], int(a["res_seq"]))
|
|
668
|
+
key_to_candidates.setdefault(key, []).append(int(a["idx"]))
|
|
669
|
+
|
|
670
|
+
used: Set[int] = set()
|
|
671
|
+
sys_coords = np.asarray(system_coords, dtype=float)
|
|
672
|
+
|
|
673
|
+
missing: int = 0
|
|
674
|
+
for ma in model_atoms:
|
|
675
|
+
key = (ma["atom_name"], ma["res_name"], int(ma["res_seq"]))
|
|
676
|
+
cand = key_to_candidates.get(key, [])
|
|
677
|
+
if not cand:
|
|
678
|
+
missing += 1
|
|
679
|
+
continue
|
|
680
|
+
|
|
681
|
+
if len(cand) == 1:
|
|
682
|
+
chosen = cand[0]
|
|
683
|
+
if chosen in used:
|
|
684
|
+
# Already used (duplicate identifiers). Fall back to coordinate disambiguation.
|
|
685
|
+
cand = cand
|
|
686
|
+
else:
|
|
687
|
+
qm_indices.add(chosen)
|
|
688
|
+
used.add(chosen)
|
|
689
|
+
continue
|
|
690
|
+
|
|
691
|
+
# Disambiguate by nearest coordinate among candidates (and avoid already-used indices)
|
|
692
|
+
cand_free = [i for i in cand if i not in used]
|
|
693
|
+
if not cand_free:
|
|
694
|
+
cand_free = cand # allow reuse as a last resort
|
|
695
|
+
|
|
696
|
+
cand_coords = sys_coords[np.asarray(cand_free, dtype=int)]
|
|
697
|
+
dists = np.linalg.norm(cand_coords - ma["coord"][None, :], axis=1)
|
|
698
|
+
j = int(np.argmin(dists))
|
|
699
|
+
chosen = int(cand_free[j])
|
|
700
|
+
|
|
701
|
+
if float(dists[j]) > match_tol:
|
|
702
|
+
# ID match exists but coordinates are far apart -> likely inconsistent inputs
|
|
703
|
+
click.echo(
|
|
704
|
+
f"[oniom-export] WARNING: matched ID {key} but nearest distance is {dists[j]:.3f} Å "
|
|
705
|
+
f"(> {match_tol} Å). Check that input_pdb and model_pdb come from the same structure."
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
qm_indices.add(chosen)
|
|
709
|
+
used.add(chosen)
|
|
710
|
+
|
|
711
|
+
if missing > 0:
|
|
712
|
+
click.echo(
|
|
713
|
+
f"[oniom-export] WARNING: {missing} atoms in model_pdb could not be matched by "
|
|
714
|
+
f"(atom_name,res_name,res_seq) to input_pdb. "
|
|
715
|
+
"If this is unexpected, verify residue numbering and naming."
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
return qm_indices
|
|
719
|
+
|
|
720
|
+
# Path 2: coordinate-only matching against system_coords
|
|
721
|
+
if system_coords is None:
|
|
722
|
+
raise ValueError(
|
|
723
|
+
"Cannot match model_pdb to topology atoms without either `input_pdb` (full-system PDB) "
|
|
724
|
+
"or `system_coords`."
|
|
725
|
+
)
|
|
726
|
+
|
|
727
|
+
sys_coords = np.asarray(system_coords, dtype=float)
|
|
728
|
+
try:
|
|
729
|
+
from scipy.spatial import cKDTree
|
|
730
|
+
|
|
731
|
+
tree = cKDTree(sys_coords)
|
|
732
|
+
for ma in model_atoms:
|
|
733
|
+
dist, idx = tree.query(ma["coord"], k=1)
|
|
734
|
+
if float(dist) > match_tol:
|
|
735
|
+
continue
|
|
736
|
+
if system_elements is not None and 0 <= int(idx) < len(system_elements):
|
|
737
|
+
e_sys = _normalize_element_symbol(system_elements[int(idx)])
|
|
738
|
+
e_mod = _normalize_element_symbol(ma["element"])
|
|
739
|
+
if e_sys != "X" and e_mod != "X" and e_sys != e_mod:
|
|
740
|
+
continue
|
|
741
|
+
qm_indices.add(int(idx))
|
|
742
|
+
except Exception:
|
|
743
|
+
# Slow fallback
|
|
744
|
+
for ma in model_atoms:
|
|
745
|
+
d = np.linalg.norm(sys_coords - ma["coord"][None, :], axis=1)
|
|
746
|
+
idx = int(np.argmin(d))
|
|
747
|
+
if float(d[idx]) > match_tol:
|
|
748
|
+
continue
|
|
749
|
+
if system_elements is not None and 0 <= idx < len(system_elements):
|
|
750
|
+
e_sys = _normalize_element_symbol(system_elements[idx])
|
|
751
|
+
e_mod = _normalize_element_symbol(ma["element"])
|
|
752
|
+
if e_sys != "X" and e_mod != "X" and e_sys != e_mod:
|
|
753
|
+
continue
|
|
754
|
+
qm_indices.add(idx)
|
|
755
|
+
|
|
756
|
+
return qm_indices
|
|
757
|
+
|
|
758
|
+
def _identify_qm_atoms_by_distance(
|
|
759
|
+
parm,
|
|
760
|
+
qm_residue_indices: List[int],
|
|
761
|
+
near_cutoff: float,
|
|
762
|
+
) -> Tuple[Set[int], Set[int]]:
|
|
763
|
+
"""
|
|
764
|
+
Identify QM and movable atoms based on residue indices and distance cutoff.
|
|
765
|
+
|
|
766
|
+
Returns:
|
|
767
|
+
(qm_atom_indices, movable_atom_indices) - both 0-based
|
|
768
|
+
"""
|
|
769
|
+
from scipy import spatial
|
|
770
|
+
|
|
771
|
+
# Get QM atom indices from specified residues
|
|
772
|
+
qm_atom_indices: Set[int] = set()
|
|
773
|
+
for resi in qm_residue_indices:
|
|
774
|
+
if 0 <= resi < len(parm.residues):
|
|
775
|
+
for atom in parm.residues[resi].atoms:
|
|
776
|
+
qm_atom_indices.add(atom.idx)
|
|
777
|
+
|
|
778
|
+
if not qm_atom_indices:
|
|
779
|
+
return set(), set()
|
|
780
|
+
|
|
781
|
+
# Find movable atoms (within near_cutoff of QM atoms)
|
|
782
|
+
qm_list = sorted(qm_atom_indices)
|
|
783
|
+
neighbor_mask = np.any(
|
|
784
|
+
spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
|
|
785
|
+
axis=1,
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
# Include entire residues if any atom is within cutoff
|
|
789
|
+
movable_indices: Set[int] = set()
|
|
790
|
+
neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
|
|
791
|
+
for residue in neighbor_residues:
|
|
792
|
+
for atom in residue.atoms:
|
|
793
|
+
movable_indices.add(atom.idx)
|
|
794
|
+
|
|
795
|
+
return qm_atom_indices, movable_indices
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
# -----------------------------------------------
|
|
799
|
+
# QM/MM covalent-boundary link helpers
|
|
800
|
+
# -----------------------------------------------
|
|
801
|
+
|
|
802
|
+
_LINK_H_BOND_LENGTH = {
|
|
803
|
+
"C": 1.09,
|
|
804
|
+
"N": 1.01,
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
_LINK_H_FF_TYPE = {
|
|
808
|
+
"C": "HC",
|
|
809
|
+
"N": "H",
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
def _atom_xyz(parm, atom_idx: int) -> np.ndarray:
|
|
814
|
+
"""Return Cartesian coordinate (Å) for a topology atom index."""
|
|
815
|
+
atom = parm.atoms[int(atom_idx)]
|
|
816
|
+
try:
|
|
817
|
+
return np.array([float(atom.xx), float(atom.xy), float(atom.xz)], dtype=float)
|
|
818
|
+
except Exception:
|
|
819
|
+
return np.asarray(parm.coordinates[int(atom_idx)], dtype=float)
|
|
820
|
+
|
|
821
|
+
|
|
822
|
+
def _find_qmmm_boundary_pairs(parm, qm_indices: Set[int]) -> List[Tuple[int, int]]:
|
|
823
|
+
"""
|
|
824
|
+
Detect covalent QM/MM boundary bonds from topology bonds.
|
|
825
|
+
|
|
826
|
+
Returns
|
|
827
|
+
-------
|
|
828
|
+
List[Tuple[int, int]]
|
|
829
|
+
A list of (qm_idx, mm_idx) index pairs (0-based).
|
|
830
|
+
"""
|
|
831
|
+
per_mm_candidates: Dict[int, List[int]] = {}
|
|
832
|
+
|
|
833
|
+
for bond in getattr(parm, "bonds", []):
|
|
834
|
+
i = int(bond.atom1.idx)
|
|
835
|
+
j = int(bond.atom2.idx)
|
|
836
|
+
i_qm = i in qm_indices
|
|
837
|
+
j_qm = j in qm_indices
|
|
838
|
+
if i_qm == j_qm:
|
|
839
|
+
continue
|
|
840
|
+
qm_idx, mm_idx = (i, j) if i_qm else (j, i)
|
|
841
|
+
per_mm_candidates.setdefault(mm_idx, []).append(qm_idx)
|
|
842
|
+
|
|
843
|
+
pairs: List[Tuple[int, int]] = []
|
|
844
|
+
for mm_idx, cands_raw in sorted(per_mm_candidates.items()):
|
|
845
|
+
cands = sorted(set(int(x) for x in cands_raw))
|
|
846
|
+
if len(cands) == 1:
|
|
847
|
+
pairs.append((cands[0], mm_idx))
|
|
848
|
+
continue
|
|
849
|
+
|
|
850
|
+
mm_xyz = _atom_xyz(parm, mm_idx)
|
|
851
|
+
best_qm = min(
|
|
852
|
+
cands,
|
|
853
|
+
key=lambda q: float(np.linalg.norm(_atom_xyz(parm, int(q)) - mm_xyz)),
|
|
854
|
+
)
|
|
855
|
+
click.echo(
|
|
856
|
+
f"[oniom-export] WARNING: MM atom {mm_idx} is bonded to multiple QM atoms "
|
|
857
|
+
f"{cands}; using closest QM atom {best_qm} as the link parent."
|
|
858
|
+
)
|
|
859
|
+
pairs.append((int(best_qm), int(mm_idx)))
|
|
860
|
+
|
|
861
|
+
return pairs
|
|
862
|
+
|
|
863
|
+
|
|
864
|
+
def _estimate_link_h_position(parm, qm_idx: int, mm_idx: int, bond_length: float) -> Optional[np.ndarray]:
|
|
865
|
+
"""Estimate link-H position using the MLMMCore rule: r_H = r_QM + u(QM->MM) * d."""
|
|
866
|
+
qm_xyz = _atom_xyz(parm, qm_idx)
|
|
867
|
+
mm_xyz = _atom_xyz(parm, mm_idx)
|
|
868
|
+
vec = mm_xyz - qm_xyz
|
|
869
|
+
norm = float(np.linalg.norm(vec))
|
|
870
|
+
if norm < 1.0e-12:
|
|
871
|
+
return None
|
|
872
|
+
return qm_xyz + (vec / norm) * float(bond_length)
|
|
873
|
+
|
|
874
|
+
|
|
875
|
+
def _build_link_atom_specs(
|
|
876
|
+
parm,
|
|
877
|
+
qm_indices: Set[int],
|
|
878
|
+
*,
|
|
879
|
+
elements: Optional[List[str]] = None,
|
|
880
|
+
) -> Dict[int, Dict[str, Any]]:
|
|
881
|
+
"""
|
|
882
|
+
Build link-atom specs keyed by boundary MM atom index.
|
|
883
|
+
|
|
884
|
+
Each value contains:
|
|
885
|
+
- qm_idx: 0-based QM parent index
|
|
886
|
+
- ff_type: Gaussian Amber atom type for the link hydrogen
|
|
887
|
+
- bond_length: QM-H bond length used for placement
|
|
888
|
+
- position: estimated link-H coordinate (Å), when available
|
|
889
|
+
"""
|
|
890
|
+
specs: Dict[int, Dict[str, Any]] = {}
|
|
891
|
+
warned_elems: Set[str] = set()
|
|
892
|
+
|
|
893
|
+
for qm_idx, mm_idx in _find_qmmm_boundary_pairs(parm, qm_indices):
|
|
894
|
+
if elements is not None and 0 <= qm_idx < len(elements):
|
|
895
|
+
qm_elem = _normalize_element_symbol(elements[qm_idx])
|
|
896
|
+
else:
|
|
897
|
+
qm_elem = _normalize_element_symbol(_get_parm_element(parm.atoms[qm_idx]))
|
|
898
|
+
|
|
899
|
+
if qm_elem in _LINK_H_BOND_LENGTH:
|
|
900
|
+
bond_len = float(_LINK_H_BOND_LENGTH[qm_elem])
|
|
901
|
+
else:
|
|
902
|
+
bond_len = float(_LINK_H_BOND_LENGTH["C"])
|
|
903
|
+
if qm_elem not in warned_elems:
|
|
904
|
+
click.echo(
|
|
905
|
+
f"[oniom-export] WARNING: unsupported QM parent element '{qm_elem}' for link-H "
|
|
906
|
+
f"distance; using C-like default ({bond_len:.2f} Å)."
|
|
907
|
+
)
|
|
908
|
+
warned_elems.add(qm_elem)
|
|
909
|
+
|
|
910
|
+
ff_type = _LINK_H_FF_TYPE.get(qm_elem, _LINK_H_FF_TYPE["C"])
|
|
911
|
+
link_pos = _estimate_link_h_position(parm, qm_idx=qm_idx, mm_idx=mm_idx, bond_length=bond_len)
|
|
912
|
+
if link_pos is None:
|
|
913
|
+
click.echo(
|
|
914
|
+
f"[oniom-export] WARNING: failed to estimate link-H position for boundary "
|
|
915
|
+
f"(QM={qm_idx}, MM={mm_idx}); skipping link annotation for this bond."
|
|
916
|
+
)
|
|
917
|
+
continue
|
|
918
|
+
|
|
919
|
+
specs[int(mm_idx)] = {
|
|
920
|
+
"qm_idx": int(qm_idx),
|
|
921
|
+
"ff_type": str(ff_type),
|
|
922
|
+
"bond_length": float(bond_len),
|
|
923
|
+
"position": link_pos,
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
return specs
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
# -----------------------------------------------
|
|
930
|
+
# Gaussian ONIOM Export
|
|
931
|
+
# -----------------------------------------------
|
|
932
|
+
|
|
933
|
+
def _write_gaussian_header(
|
|
934
|
+
parm,
|
|
935
|
+
parm_path: str,
|
|
936
|
+
output_name: str,
|
|
937
|
+
method: str = "wB97XD/def2-TZVPD",
|
|
938
|
+
nproc: int = 8,
|
|
939
|
+
mem: str = "16GB",
|
|
940
|
+
qm_charge: int = 0,
|
|
941
|
+
qm_mult: int = 1,
|
|
942
|
+
real_charge: Optional[int] = None,
|
|
943
|
+
real_mult: Optional[int] = None,
|
|
944
|
+
) -> str:
|
|
945
|
+
"""
|
|
946
|
+
Generate Gaussian ONIOM input header.
|
|
947
|
+
|
|
948
|
+
Gaussian ONIOM uses *three* charge/multiplicity pairs for a 2-layer ONIOM job:
|
|
949
|
+
(real system @ low level) (model system @ high level) (model system @ low level)
|
|
950
|
+
|
|
951
|
+
We default the real-system charge to the total charge of the topology, and the real-system
|
|
952
|
+
multiplicity to `qm_mult` (since the MM region is typically closed-shell).
|
|
953
|
+
"""
|
|
954
|
+
total_charge = _get_total_charge(parm)
|
|
955
|
+
|
|
956
|
+
if real_charge is None:
|
|
957
|
+
real_charge = total_charge
|
|
958
|
+
if real_mult is None:
|
|
959
|
+
real_mult = qm_mult
|
|
960
|
+
|
|
961
|
+
if int(real_charge) != int(total_charge):
|
|
962
|
+
click.echo(
|
|
963
|
+
f"[oniom-export] WARNING: real_charge={real_charge} differs from topology total charge={total_charge}. "
|
|
964
|
+
"Proceeding as requested."
|
|
965
|
+
)
|
|
966
|
+
|
|
967
|
+
chk_name = Path(output_name).stem
|
|
968
|
+
|
|
969
|
+
header = f"""%chk={chk_name}.chk
|
|
970
|
+
%mem={mem}
|
|
971
|
+
%nprocshared={nproc}
|
|
972
|
+
#p oniom({method}:amber=softonly)
|
|
973
|
+
scf=(xqc,intrep,maxconventionalcyc=80)
|
|
974
|
+
nosymm iop(2/15=3) geom=connectivity Amber=(FirstEquiv)
|
|
975
|
+
|
|
976
|
+
ONIOM inputfile generated by mlmm oniom-export from {parm_path}.
|
|
977
|
+
|
|
978
|
+
{real_charge} {real_mult} {qm_charge} {qm_mult} {qm_charge} {qm_mult}
|
|
979
|
+
"""
|
|
980
|
+
return header
|
|
981
|
+
|
|
982
|
+
def _write_gaussian_coordinates(
|
|
983
|
+
parm,
|
|
984
|
+
qm_indices: Set[int],
|
|
985
|
+
movable_indices: Set[int],
|
|
986
|
+
elements: Optional[List[str]] = None,
|
|
987
|
+
link_specs: Optional[Dict[int, Dict[str, Any]]] = None,
|
|
988
|
+
) -> Tuple[str, str]:
|
|
989
|
+
"""
|
|
990
|
+
Generate Gaussian ONIOM coordinate section and connectivity.
|
|
991
|
+
|
|
992
|
+
Returns:
|
|
993
|
+
(coords_section, connectivity_section)
|
|
994
|
+
"""
|
|
995
|
+
coords_lines: List[str] = []
|
|
996
|
+
|
|
997
|
+
# Optional elements list (preferred when coming from input PDB/XYZ)
|
|
998
|
+
elements_list: Optional[List[str]] = None
|
|
999
|
+
if elements is not None and len(elements) == len(parm.atoms):
|
|
1000
|
+
elements_list = elements
|
|
1001
|
+
|
|
1002
|
+
for atom in parm.atoms:
|
|
1003
|
+
idx = atom.idx
|
|
1004
|
+
layer = "H" if idx in qm_indices else "L"
|
|
1005
|
+
movable = 0 if idx in movable_indices else -1
|
|
1006
|
+
|
|
1007
|
+
x, y, z = atom.xx, atom.xy, atom.xz
|
|
1008
|
+
ff_type = _fix_atom_type(atom.atom_type)
|
|
1009
|
+
charge = atom.charge
|
|
1010
|
+
|
|
1011
|
+
if elements_list is not None:
|
|
1012
|
+
element = elements_list[idx]
|
|
1013
|
+
else:
|
|
1014
|
+
element = _get_parm_element(atom)
|
|
1015
|
+
|
|
1016
|
+
atom_section = f"{element}-{ff_type}-{charge:.6f}"
|
|
1017
|
+
link_suffix = ""
|
|
1018
|
+
if layer == "L" and link_specs is not None and idx in link_specs:
|
|
1019
|
+
spec = link_specs[idx]
|
|
1020
|
+
qm_parent = int(spec["qm_idx"]) + 1 # Gaussian connectivity is 1-based
|
|
1021
|
+
link_ff_type = _fix_atom_type(str(spec["ff_type"]))
|
|
1022
|
+
link_suffix = f" H-{link_ff_type} {qm_parent}"
|
|
1023
|
+
coords_lines.append(
|
|
1024
|
+
f"{atom_section:<20} {movable:>2} {x:12.6f} {y:12.6f} {z:12.6f} {layer}{link_suffix}"
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
# Connectivity section
|
|
1028
|
+
bond_dict: Dict[int, List[int]] = {}
|
|
1029
|
+
for bond in parm.bonds:
|
|
1030
|
+
i, j = bond.atom1.idx, bond.atom2.idx
|
|
1031
|
+
if i not in bond_dict:
|
|
1032
|
+
bond_dict[i] = []
|
|
1033
|
+
if j not in bond_dict:
|
|
1034
|
+
bond_dict[j] = []
|
|
1035
|
+
bond_dict[i].append(j)
|
|
1036
|
+
bond_dict[j].append(i)
|
|
1037
|
+
|
|
1038
|
+
conn_lines: List[str] = []
|
|
1039
|
+
for i in range(len(parm.atoms)):
|
|
1040
|
+
neighbors = sorted([j for j in bond_dict.get(i, []) if j > i])
|
|
1041
|
+
if neighbors:
|
|
1042
|
+
neighbor_str = " ".join(f"{j+1} 1.0" for j in neighbors)
|
|
1043
|
+
conn_lines.append(f"{i+1} {neighbor_str}")
|
|
1044
|
+
else:
|
|
1045
|
+
conn_lines.append(f"{i+1}")
|
|
1046
|
+
|
|
1047
|
+
return "\n".join(coords_lines), "\n".join(conn_lines)
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _write_gaussian_ff_params(parm) -> str:
|
|
1051
|
+
"""
|
|
1052
|
+
Extract Amber-style force field parameters from parm7 for Gaussian (Amber=SoftOnly).
|
|
1053
|
+
|
|
1054
|
+
This attempts to write a *self-contained* parameter section with the core Amber terms:
|
|
1055
|
+
- NonBon (Amber mixing rule + standard 1-4 scaling)
|
|
1056
|
+
- HrmStr1 (bonds)
|
|
1057
|
+
- HrmBnd1 (angles)
|
|
1058
|
+
- AmbTrs (proper torsions, periodicities 1-4)
|
|
1059
|
+
- ImpTrs (improper torsions)
|
|
1060
|
+
- VDW (per-atom-type LJ parameters; Radius = Rmin/2, Well-depth = epsilon)
|
|
1061
|
+
|
|
1062
|
+
Limitations
|
|
1063
|
+
-----------
|
|
1064
|
+
- Amber torsions with periodicity > 4 are not representable by AmbTrs; they are skipped with a warning.
|
|
1065
|
+
- Per-dihedral 1-4 scaling factors (SCEE/SCNB) are not emitted (Gaussian uses the global NonBon scaling).
|
|
1066
|
+
This matches most standard Amber/GAFF workflows where SCEE/SCNB are uniform.
|
|
1067
|
+
"""
|
|
1068
|
+
lines: List[str] = []
|
|
1069
|
+
|
|
1070
|
+
# Non-bonded master function: Amber arithmetic mixing + standard exclusions and 1-4 scaling.
|
|
1071
|
+
# V-type=3 (Amber arithmetic), C-type=1 (Coulomb), cutoffs 0/0 (no explicit cutoffs here),
|
|
1072
|
+
# VScale: 1-2=0, 1-3=0, 1-4=0.5 ; CScale: 1-2=0, 1-3=0, 1-4=Amber default (1/1.2 via -1.2).
|
|
1073
|
+
lines.append("! Nonbonded master function (Amber defaults)")
|
|
1074
|
+
lines.append("NonBon 3 1 0 0 0.0 0.0 0.5 0.0 0.0 -1.2")
|
|
1075
|
+
|
|
1076
|
+
# -------------------------
|
|
1077
|
+
# Bonds
|
|
1078
|
+
# -------------------------
|
|
1079
|
+
lines.append("")
|
|
1080
|
+
lines.append("! Bond parameters")
|
|
1081
|
+
bond_params: Set[Tuple[str, str, float, float]] = set()
|
|
1082
|
+
for bond in getattr(parm, "bonds", []):
|
|
1083
|
+
btype = getattr(bond, "type", None)
|
|
1084
|
+
if btype is None:
|
|
1085
|
+
continue
|
|
1086
|
+
try:
|
|
1087
|
+
k = float(getattr(btype, "k"))
|
|
1088
|
+
req = float(getattr(btype, "req"))
|
|
1089
|
+
except Exception:
|
|
1090
|
+
continue
|
|
1091
|
+
|
|
1092
|
+
t1 = _fix_atom_type(getattr(bond.atom1, "atom_type", "X"))
|
|
1093
|
+
t2 = _fix_atom_type(getattr(bond.atom2, "atom_type", "X"))
|
|
1094
|
+
if t1 > t2:
|
|
1095
|
+
t1, t2 = t2, t1
|
|
1096
|
+
bond_params.add((t1, t2, k, req))
|
|
1097
|
+
|
|
1098
|
+
for t1, t2, k, req in sorted(bond_params):
|
|
1099
|
+
lines.append(f"HrmStr1 {t1} {t2} {k:.6f} {req:.6f}")
|
|
1100
|
+
|
|
1101
|
+
# -------------------------
|
|
1102
|
+
# Angles
|
|
1103
|
+
# -------------------------
|
|
1104
|
+
lines.append("")
|
|
1105
|
+
lines.append("! Angle parameters")
|
|
1106
|
+
angle_params: Set[Tuple[str, str, str, float, float]] = set()
|
|
1107
|
+
for angle in getattr(parm, "angles", []):
|
|
1108
|
+
atype = getattr(angle, "type", None)
|
|
1109
|
+
if atype is None:
|
|
1110
|
+
continue
|
|
1111
|
+
try:
|
|
1112
|
+
k = float(getattr(atype, "k"))
|
|
1113
|
+
theteq = float(getattr(atype, "theteq"))
|
|
1114
|
+
except Exception:
|
|
1115
|
+
continue
|
|
1116
|
+
|
|
1117
|
+
t1 = _fix_atom_type(getattr(angle.atom1, "atom_type", "X"))
|
|
1118
|
+
t2 = _fix_atom_type(getattr(angle.atom2, "atom_type", "X"))
|
|
1119
|
+
t3 = _fix_atom_type(getattr(angle.atom3, "atom_type", "X"))
|
|
1120
|
+
|
|
1121
|
+
# Sort endpoints for consistency
|
|
1122
|
+
if t1 > t3:
|
|
1123
|
+
t1, t3 = t3, t1
|
|
1124
|
+
angle_params.add((t1, t2, t3, k, theteq))
|
|
1125
|
+
|
|
1126
|
+
for t1, t2, t3, k, theteq in sorted(angle_params):
|
|
1127
|
+
lines.append(f"HrmBnd1 {t1} {t2} {t3} {k:.6f} {theteq:.6f}")
|
|
1128
|
+
|
|
1129
|
+
# -------------------------
|
|
1130
|
+
# Torsions (proper)
|
|
1131
|
+
# -------------------------
|
|
1132
|
+
def _as_term_list(dtype_obj: Any) -> List[Any]:
|
|
1133
|
+
if dtype_obj is None:
|
|
1134
|
+
return []
|
|
1135
|
+
# ParmEd uses DihedralTypeList for multi-term torsions (iterable)
|
|
1136
|
+
terms = getattr(dtype_obj, "terms", None)
|
|
1137
|
+
if terms is not None:
|
|
1138
|
+
try:
|
|
1139
|
+
return list(terms)
|
|
1140
|
+
except Exception:
|
|
1141
|
+
logger.debug("Failed to convert terms to list", exc_info=True)
|
|
1142
|
+
if isinstance(dtype_obj, (list, tuple)):
|
|
1143
|
+
return list(dtype_obj)
|
|
1144
|
+
# Try iteration (DihedralTypeList behaves like a list)
|
|
1145
|
+
try:
|
|
1146
|
+
if hasattr(dtype_obj, "__iter__") and not isinstance(dtype_obj, (str, bytes)):
|
|
1147
|
+
return list(dtype_obj)
|
|
1148
|
+
except Exception:
|
|
1149
|
+
logger.debug("Failed to iterate dtype_obj", exc_info=True)
|
|
1150
|
+
return [dtype_obj]
|
|
1151
|
+
|
|
1152
|
+
def _get_attr(obj: Any, names: List[str], default: Any = None) -> Any:
|
|
1153
|
+
for n in names:
|
|
1154
|
+
if hasattr(obj, n):
|
|
1155
|
+
v = getattr(obj, n)
|
|
1156
|
+
if v is not None:
|
|
1157
|
+
return v
|
|
1158
|
+
return default
|
|
1159
|
+
|
|
1160
|
+
lines.append("")
|
|
1161
|
+
lines.append("! Proper torsions (AmbTrs)")
|
|
1162
|
+
# key -> (phase[4], mag[4])
|
|
1163
|
+
tors_params: Dict[Tuple[str, str, str, str], Tuple[List[float], List[float]]] = {}
|
|
1164
|
+
|
|
1165
|
+
# Separate proper vs improper if ParmEd exposes `impropers`
|
|
1166
|
+
dihedrals_all = list(getattr(parm, "dihedrals", []) or [])
|
|
1167
|
+
impropers_from_attr = list(getattr(parm, "impropers", []) or [])
|
|
1168
|
+
if impropers_from_attr:
|
|
1169
|
+
proper_dihedrals = dihedrals_all
|
|
1170
|
+
improper_dihedrals = impropers_from_attr
|
|
1171
|
+
else:
|
|
1172
|
+
proper_dihedrals = [d for d in dihedrals_all if not bool(getattr(d, "improper", False))]
|
|
1173
|
+
improper_dihedrals = [d for d in dihedrals_all if bool(getattr(d, "improper", False))]
|
|
1174
|
+
|
|
1175
|
+
for dih in proper_dihedrals:
|
|
1176
|
+
dtype = getattr(dih, "type", None)
|
|
1177
|
+
for term in _as_term_list(dtype):
|
|
1178
|
+
try:
|
|
1179
|
+
per = _get_attr(term, ["per", "periodicity", "period"], None)
|
|
1180
|
+
phase = float(_get_attr(term, ["phase", "phi", "phase_shift"], 0.0))
|
|
1181
|
+
mag = float(_get_attr(term, ["phi_k", "pk", "k", "barrier"], 0.0))
|
|
1182
|
+
div = float(_get_attr(term, ["div", "divider", "idivf", "npaths"], 1.0))
|
|
1183
|
+
if div == 0.0:
|
|
1184
|
+
div = 1.0
|
|
1185
|
+
except Exception:
|
|
1186
|
+
continue
|
|
1187
|
+
|
|
1188
|
+
try:
|
|
1189
|
+
n = int(round(abs(float(per))))
|
|
1190
|
+
except Exception:
|
|
1191
|
+
continue
|
|
1192
|
+
if n < 1:
|
|
1193
|
+
continue
|
|
1194
|
+
if n > 4:
|
|
1195
|
+
click.echo(
|
|
1196
|
+
f"[oniom-export] WARNING: skipping Amber torsion with periodicity {n} (>4) "
|
|
1197
|
+
f"for types {_fix_atom_type(dih.atom1.atom_type)}-{_fix_atom_type(dih.atom2.atom_type)}-"
|
|
1198
|
+
f"{_fix_atom_type(dih.atom3.atom_type)}-{_fix_atom_type(dih.atom4.atom_type)}"
|
|
1199
|
+
)
|
|
1200
|
+
continue
|
|
1201
|
+
|
|
1202
|
+
t1 = _fix_atom_type(getattr(dih.atom1, "atom_type", "X"))
|
|
1203
|
+
t2 = _fix_atom_type(getattr(dih.atom2, "atom_type", "X"))
|
|
1204
|
+
t3 = _fix_atom_type(getattr(dih.atom3, "atom_type", "X"))
|
|
1205
|
+
t4 = _fix_atom_type(getattr(dih.atom4, "atom_type", "X"))
|
|
1206
|
+
key = (t1, t2, t3, t4)
|
|
1207
|
+
|
|
1208
|
+
if key not in tors_params:
|
|
1209
|
+
tors_params[key] = ([0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0])
|
|
1210
|
+
|
|
1211
|
+
phases, mags = tors_params[key]
|
|
1212
|
+
idx = n - 1
|
|
1213
|
+
|
|
1214
|
+
# Amber divides each term by IDIVF; Gaussian's AmbTrs uses a single NPaths.
|
|
1215
|
+
# We fold the division into the magnitude and set NPaths=1.
|
|
1216
|
+
mag_eff = mag / div
|
|
1217
|
+
|
|
1218
|
+
if mags[idx] != 0.0 and abs(phases[idx] - phase) > 1e-6:
|
|
1219
|
+
click.echo(
|
|
1220
|
+
f"[oniom-export] WARNING: multiple torsion terms with the same periodicity {n} but "
|
|
1221
|
+
f"different phases for key {key}. Keeping the first phase {phases[idx]:.3f} and "
|
|
1222
|
+
f"adding magnitudes."
|
|
1223
|
+
)
|
|
1224
|
+
if mags[idx] == 0.0:
|
|
1225
|
+
phases[idx] = phase
|
|
1226
|
+
mags[idx] += mag_eff
|
|
1227
|
+
|
|
1228
|
+
for (t1, t2, t3, t4) in sorted(tors_params.keys()):
|
|
1229
|
+
phases, mags = tors_params[(t1, t2, t3, t4)]
|
|
1230
|
+
if all(abs(m) < 1e-12 for m in mags):
|
|
1231
|
+
continue
|
|
1232
|
+
po1, po2, po3, po4 = phases
|
|
1233
|
+
m1, m2, m3, m4 = mags
|
|
1234
|
+
lines.append(
|
|
1235
|
+
f"AmbTrs {t1} {t2} {t3} {t4} "
|
|
1236
|
+
f"{po1:.6f} {po2:.6f} {po3:.6f} {po4:.6f} "
|
|
1237
|
+
f"{m1:.6f} {m2:.6f} {m3:.6f} {m4:.6f} 1"
|
|
1238
|
+
)
|
|
1239
|
+
|
|
1240
|
+
# -------------------------
|
|
1241
|
+
# Improper torsions
|
|
1242
|
+
# -------------------------
|
|
1243
|
+
lines.append("")
|
|
1244
|
+
lines.append("! Improper torsions (ImpTrs)")
|
|
1245
|
+
improper_params: Set[Tuple[str, str, str, str, float, float, float]] = set()
|
|
1246
|
+
|
|
1247
|
+
for imp in improper_dihedrals:
|
|
1248
|
+
dtype = getattr(imp, "type", None)
|
|
1249
|
+
for term in _as_term_list(dtype):
|
|
1250
|
+
try:
|
|
1251
|
+
per = float(abs(float(_get_attr(term, ["per", "periodicity", "period"], 2.0))))
|
|
1252
|
+
phase = float(_get_attr(term, ["phase", "phi", "phase_shift"], 0.0))
|
|
1253
|
+
mag = float(_get_attr(term, ["phi_k", "pk", "k", "barrier"], 0.0))
|
|
1254
|
+
div = float(_get_attr(term, ["div", "divider", "idivf", "npaths"], 1.0))
|
|
1255
|
+
if div == 0.0:
|
|
1256
|
+
div = 1.0
|
|
1257
|
+
except Exception:
|
|
1258
|
+
continue
|
|
1259
|
+
|
|
1260
|
+
t1 = _fix_atom_type(getattr(imp.atom1, "atom_type", "X"))
|
|
1261
|
+
t2 = _fix_atom_type(getattr(imp.atom2, "atom_type", "X"))
|
|
1262
|
+
t3 = _fix_atom_type(getattr(imp.atom3, "atom_type", "X"))
|
|
1263
|
+
t4 = _fix_atom_type(getattr(imp.atom4, "atom_type", "X"))
|
|
1264
|
+
|
|
1265
|
+
mag_eff = mag / div
|
|
1266
|
+
improper_params.add((t1, t2, t3, t4, mag_eff, phase, per))
|
|
1267
|
+
|
|
1268
|
+
for t1, t2, t3, t4, mag_eff, phase, per in sorted(improper_params):
|
|
1269
|
+
if abs(mag_eff) < 1e-12:
|
|
1270
|
+
continue
|
|
1271
|
+
lines.append(f"ImpTrs {t1} {t2} {t3} {t4} {mag_eff:.6f} {phase:.6f} {per:.6f}")
|
|
1272
|
+
|
|
1273
|
+
# -------------------------
|
|
1274
|
+
# VDW parameters
|
|
1275
|
+
# -------------------------
|
|
1276
|
+
lines.append("")
|
|
1277
|
+
lines.append("! VDW parameters (Radius = Rmin/2 [Å], Well-depth = epsilon [kcal/mol])")
|
|
1278
|
+
vdw_params: Dict[str, Tuple[float, float]] = {}
|
|
1279
|
+
for atom in getattr(parm, "atoms", []):
|
|
1280
|
+
atype = _fix_atom_type(getattr(atom, "atom_type", "X"))
|
|
1281
|
+
if atype in vdw_params:
|
|
1282
|
+
continue
|
|
1283
|
+
|
|
1284
|
+
# ParmEd often provides rmin (Amber Rmin/2), epsilon, and/or sigma
|
|
1285
|
+
radius: Optional[float] = None
|
|
1286
|
+
epsilon: Optional[float] = None
|
|
1287
|
+
|
|
1288
|
+
try:
|
|
1289
|
+
if hasattr(atom, "rmin") and getattr(atom, "rmin") is not None:
|
|
1290
|
+
radius = float(getattr(atom, "rmin"))
|
|
1291
|
+
elif hasattr(atom, "rmin_half") and getattr(atom, "rmin_half") is not None:
|
|
1292
|
+
radius = float(getattr(atom, "rmin_half"))
|
|
1293
|
+
elif hasattr(atom, "sigma") and getattr(atom, "sigma") is not None:
|
|
1294
|
+
# Convert sigma -> Rmin/2 using rmin = 2^(1/6)*sigma, and radius = rmin/2
|
|
1295
|
+
radius = float(getattr(atom, "sigma")) * (2.0 ** (1.0 / 6.0)) / 2.0
|
|
1296
|
+
except Exception:
|
|
1297
|
+
radius = None
|
|
1298
|
+
|
|
1299
|
+
try:
|
|
1300
|
+
if hasattr(atom, "epsilon") and getattr(atom, "epsilon") is not None:
|
|
1301
|
+
epsilon = float(getattr(atom, "epsilon"))
|
|
1302
|
+
except Exception:
|
|
1303
|
+
epsilon = None
|
|
1304
|
+
|
|
1305
|
+
if radius is None or epsilon is None:
|
|
1306
|
+
continue
|
|
1307
|
+
|
|
1308
|
+
vdw_params[atype] = (radius, epsilon)
|
|
1309
|
+
|
|
1310
|
+
for atype, (radius, epsilon) in sorted(vdw_params.items()):
|
|
1311
|
+
lines.append(f"VDW {atype} {radius:.6f} {epsilon:.6f}")
|
|
1312
|
+
|
|
1313
|
+
return "\n".join(lines)
|
|
1314
|
+
|
|
1315
|
+
def export_gaussian(
|
|
1316
|
+
parm7_path: Path,
|
|
1317
|
+
model_pdb: Optional[Path],
|
|
1318
|
+
output_path: Path,
|
|
1319
|
+
method: str = "wB97XD/def2-TZVPD",
|
|
1320
|
+
qm_charge: int = 0,
|
|
1321
|
+
qm_mult: int = 1,
|
|
1322
|
+
near_cutoff: float = 6.0,
|
|
1323
|
+
nproc: int = 8,
|
|
1324
|
+
mem: str = "16GB",
|
|
1325
|
+
qm_residues: Optional[List[int]] = None,
|
|
1326
|
+
input_path: Optional[Path] = None,
|
|
1327
|
+
element_check: bool = True,
|
|
1328
|
+
) -> None:
|
|
1329
|
+
"""
|
|
1330
|
+
Generate Gaussian ONIOM input file from parm7.
|
|
1331
|
+
|
|
1332
|
+
Args:
|
|
1333
|
+
parm7_path: Path to Amber parm7 topology
|
|
1334
|
+
model_pdb: Path to PDB defining QM region atoms (optional)
|
|
1335
|
+
output_path: Output Gaussian input file
|
|
1336
|
+
method: QM method and basis set
|
|
1337
|
+
qm_charge: Charge of QM region
|
|
1338
|
+
qm_mult: Multiplicity of QM region
|
|
1339
|
+
near_cutoff: Distance cutoff for movable atoms (Angstrom)
|
|
1340
|
+
nproc: Number of processors
|
|
1341
|
+
mem: Memory allocation
|
|
1342
|
+
qm_residues: List of 0-based residue indices for QM region (alternative to model_pdb)
|
|
1343
|
+
input_path: Coordinate file (.pdb or .xyz). If omitted, uses coordinates stored in the ParmEd object.
|
|
1344
|
+
element_check: If True, validate element sequence between parm7 and input coordinates.
|
|
1345
|
+
"""
|
|
1346
|
+
_check_parmed()
|
|
1347
|
+
|
|
1348
|
+
parm = pmd.load_file(str(parm7_path))
|
|
1349
|
+
|
|
1350
|
+
# Load / attach coordinates
|
|
1351
|
+
elements_for_output: Optional[List[str]] = None
|
|
1352
|
+
if input_path is not None:
|
|
1353
|
+
coords, input_elems = _read_input_geometry(input_path)
|
|
1354
|
+
_apply_coordinates_to_parm(parm, coords)
|
|
1355
|
+
elements_for_output = input_elems
|
|
1356
|
+
if element_check:
|
|
1357
|
+
_validate_element_order(parm, input_elems, strict=True)
|
|
1358
|
+
else:
|
|
1359
|
+
coords_attr = getattr(parm, "coordinates", None)
|
|
1360
|
+
n_coords = 0
|
|
1361
|
+
try:
|
|
1362
|
+
n_coords = len(coords_attr) if coords_attr is not None else 0
|
|
1363
|
+
except Exception:
|
|
1364
|
+
n_coords = 0
|
|
1365
|
+
if n_coords == 0:
|
|
1366
|
+
raise ValueError(
|
|
1367
|
+
"No coordinates found in the loaded parm7. "
|
|
1368
|
+
"Please provide a coordinate file with -i/--input (PDB or XYZ)."
|
|
1369
|
+
)
|
|
1370
|
+
elements_for_output = _get_parm_elements(parm)
|
|
1371
|
+
|
|
1372
|
+
# Detect layer indices from B-factors if the input is a layered PDB produced by mlmm.
|
|
1373
|
+
layer_info: Optional[Dict[str, List[int]]] = None
|
|
1374
|
+
if input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"}:
|
|
1375
|
+
try:
|
|
1376
|
+
from .utils import (
|
|
1377
|
+
has_valid_layer_bfactors,
|
|
1378
|
+
parse_layer_indices_from_bfactors,
|
|
1379
|
+
read_bfactors_from_pdb,
|
|
1380
|
+
)
|
|
1381
|
+
|
|
1382
|
+
bfactors = read_bfactors_from_pdb(input_path)
|
|
1383
|
+
if len(bfactors) == len(parm.atoms) and has_valid_layer_bfactors(bfactors):
|
|
1384
|
+
layer_info = parse_layer_indices_from_bfactors(bfactors)
|
|
1385
|
+
click.echo(
|
|
1386
|
+
"[oniom-export] Detected ML/MM layer B-factors in the input PDB; "
|
|
1387
|
+
"using them to decide movable/frozen atoms."
|
|
1388
|
+
)
|
|
1389
|
+
except Exception:
|
|
1390
|
+
layer_info = None
|
|
1391
|
+
|
|
1392
|
+
# Determine QM region
|
|
1393
|
+
qm_indices: Set[int] = set()
|
|
1394
|
+
movable_indices: Set[int] = set()
|
|
1395
|
+
|
|
1396
|
+
if model_pdb is not None:
|
|
1397
|
+
qm_indices = _read_qm_atoms_from_pdb(
|
|
1398
|
+
model_pdb,
|
|
1399
|
+
input_pdb=input_path
|
|
1400
|
+
if (input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"})
|
|
1401
|
+
else None,
|
|
1402
|
+
system_coords=getattr(parm, "coordinates", None),
|
|
1403
|
+
system_elements=elements_for_output,
|
|
1404
|
+
)
|
|
1405
|
+
elif qm_residues:
|
|
1406
|
+
qm_indices, movable_indices = _identify_qm_atoms_by_distance(parm, qm_residues, near_cutoff)
|
|
1407
|
+
elif layer_info is not None and layer_info.get("ml_indices"):
|
|
1408
|
+
qm_indices = set(int(i) for i in layer_info["ml_indices"])
|
|
1409
|
+
else:
|
|
1410
|
+
raise ValueError(
|
|
1411
|
+
"No QM region specified. Provide --model-pdb, or supply a layered input PDB "
|
|
1412
|
+
"(B-factor=0 marks the ML/QM region), or use qm_residues in the Python API."
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
if not qm_indices:
|
|
1416
|
+
raise ValueError("No QM atoms identified")
|
|
1417
|
+
|
|
1418
|
+
if max(qm_indices) >= len(parm.atoms):
|
|
1419
|
+
raise ValueError(
|
|
1420
|
+
f"QM index out of range: max(qm_indices)={max(qm_indices)} but topology has {len(parm.atoms)} atoms. "
|
|
1421
|
+
"Check that your model PDB / input PDB and parm7 have consistent atom ordering."
|
|
1422
|
+
)
|
|
1423
|
+
|
|
1424
|
+
# Determine movable atoms for partial optimization.
|
|
1425
|
+
if layer_info is not None:
|
|
1426
|
+
frozen = set(int(i) for i in layer_info.get("frozen_indices", []))
|
|
1427
|
+
movable_indices = set(range(len(parm.atoms))) - frozen
|
|
1428
|
+
elif not movable_indices:
|
|
1429
|
+
# Distance-based selection: include all atoms in residues within `near_cutoff` of any QM atom.
|
|
1430
|
+
from scipy import spatial
|
|
1431
|
+
|
|
1432
|
+
qm_list = sorted(qm_indices)
|
|
1433
|
+
neighbor_mask = np.any(
|
|
1434
|
+
spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
|
|
1435
|
+
axis=1,
|
|
1436
|
+
)
|
|
1437
|
+
neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
|
|
1438
|
+
for residue in neighbor_residues:
|
|
1439
|
+
for atom in residue.atoms:
|
|
1440
|
+
movable_indices.add(atom.idx)
|
|
1441
|
+
|
|
1442
|
+
movable_indices |= qm_indices # QM atoms must always be movable
|
|
1443
|
+
|
|
1444
|
+
# Detect covalent QM/MM boundaries and generate link-atom metadata.
|
|
1445
|
+
link_specs = _build_link_atom_specs(
|
|
1446
|
+
parm,
|
|
1447
|
+
qm_indices,
|
|
1448
|
+
elements=elements_for_output,
|
|
1449
|
+
)
|
|
1450
|
+
|
|
1451
|
+
# Generate sections
|
|
1452
|
+
header = _write_gaussian_header(
|
|
1453
|
+
parm,
|
|
1454
|
+
str(parm7_path),
|
|
1455
|
+
str(output_path),
|
|
1456
|
+
method=method,
|
|
1457
|
+
nproc=nproc,
|
|
1458
|
+
mem=mem,
|
|
1459
|
+
qm_charge=qm_charge,
|
|
1460
|
+
qm_mult=qm_mult,
|
|
1461
|
+
)
|
|
1462
|
+
coords, connectivity = _write_gaussian_coordinates(
|
|
1463
|
+
parm,
|
|
1464
|
+
qm_indices,
|
|
1465
|
+
movable_indices,
|
|
1466
|
+
elements=elements_for_output,
|
|
1467
|
+
link_specs=link_specs,
|
|
1468
|
+
)
|
|
1469
|
+
ff_params = _write_gaussian_ff_params(parm)
|
|
1470
|
+
|
|
1471
|
+
# Write output
|
|
1472
|
+
with output_path.open("w") as f:
|
|
1473
|
+
f.write(header)
|
|
1474
|
+
f.write(coords)
|
|
1475
|
+
f.write("\n\n")
|
|
1476
|
+
f.write(connectivity)
|
|
1477
|
+
f.write("\n\n")
|
|
1478
|
+
f.write(ff_params)
|
|
1479
|
+
f.write("\n\n")
|
|
1480
|
+
|
|
1481
|
+
click.echo(f"[oniom-gaussian] Wrote '{output_path}'")
|
|
1482
|
+
click.echo(f"[oniom-gaussian] QM atoms: {len(qm_indices)}, Movable atoms: {len(movable_indices)}")
|
|
1483
|
+
click.echo(f"[oniom-gaussian] Link boundaries: {len(link_specs)}")
|
|
1484
|
+
|
|
1485
|
+
|
|
1486
|
+
# -----------------------------------------------
|
|
1487
|
+
# ORCA QM/MM Export
|
|
1488
|
+
# -----------------------------------------------
|
|
1489
|
+
|
|
1490
|
+
# -----------------------------------------------
|
|
1491
|
+
# ORCA QM/MM Export
|
|
1492
|
+
# -----------------------------------------------
|
|
1493
|
+
|
|
1494
|
+
def _format_orca_index_set(indices: Set[int]) -> str:
|
|
1495
|
+
"""
|
|
1496
|
+
Format a set of 0-based atom indices using ORCA's compact range syntax.
|
|
1497
|
+
|
|
1498
|
+
Example:
|
|
1499
|
+
{0:3 7 10:12}
|
|
1500
|
+
"""
|
|
1501
|
+
if not indices:
|
|
1502
|
+
return "{}"
|
|
1503
|
+
sorted_idx = sorted(int(i) for i in indices)
|
|
1504
|
+
parts: List[str] = []
|
|
1505
|
+
start = prev = sorted_idx[0]
|
|
1506
|
+
for i in sorted_idx[1:]:
|
|
1507
|
+
if i == prev + 1:
|
|
1508
|
+
prev = i
|
|
1509
|
+
continue
|
|
1510
|
+
parts.append(f"{start}:{prev}" if prev > start else f"{start}")
|
|
1511
|
+
start = prev = i
|
|
1512
|
+
parts.append(f"{start}:{prev}" if prev > start else f"{start}")
|
|
1513
|
+
return "{" + " ".join(parts) + "}"
|
|
1514
|
+
|
|
1515
|
+
|
|
1516
|
+
def _manual_orcaff_command(parm7_path: Path, out_dir: Path) -> str:
|
|
1517
|
+
"""Return a shell command users can run to generate ORCAFF.prms manually."""
|
|
1518
|
+
return (
|
|
1519
|
+
f"cd {shlex.quote(str(out_dir.resolve()))} && "
|
|
1520
|
+
f"orca_mm -convff -AMBER {shlex.quote(str(parm7_path.resolve()))}"
|
|
1521
|
+
)
|
|
1522
|
+
|
|
1523
|
+
|
|
1524
|
+
def _resolve_oniom_mode(mode: Optional[str], output_path: Path) -> str:
|
|
1525
|
+
"""Resolve export mode using explicit `--mode` first, then output suffix."""
|
|
1526
|
+
if mode is not None:
|
|
1527
|
+
return str(mode).strip().lower()
|
|
1528
|
+
|
|
1529
|
+
suffix = output_path.suffix.lower()
|
|
1530
|
+
if suffix in {".gjf", ".com"}:
|
|
1531
|
+
return "g16"
|
|
1532
|
+
if suffix == ".inp":
|
|
1533
|
+
return "orca"
|
|
1534
|
+
|
|
1535
|
+
raise ValueError(
|
|
1536
|
+
f"Could not infer export mode from -o/--output '{output_path}'. "
|
|
1537
|
+
"Specify --mode (g16/orca) or use an output suffix: .gjf/.com (g16), .inp (orca)."
|
|
1538
|
+
)
|
|
1539
|
+
|
|
1540
|
+
|
|
1541
|
+
def export_orca(
|
|
1542
|
+
parm7_path: Path,
|
|
1543
|
+
model_pdb: Optional[Path],
|
|
1544
|
+
output_path: Path,
|
|
1545
|
+
method: str = "B3LYP D3BJ def2-SVP",
|
|
1546
|
+
qm_charge: int = 0,
|
|
1547
|
+
qm_mult: int = 1,
|
|
1548
|
+
total_charge: Optional[int] = None,
|
|
1549
|
+
total_mult: Optional[int] = None,
|
|
1550
|
+
nproc: int = 8,
|
|
1551
|
+
near_cutoff: float = 6.0,
|
|
1552
|
+
qm_residues: Optional[List[int]] = None,
|
|
1553
|
+
input_path: Optional[Path] = None,
|
|
1554
|
+
element_check: bool = True,
|
|
1555
|
+
orcaff_path: Optional[Path] = None,
|
|
1556
|
+
convert_orcaff: bool = True,
|
|
1557
|
+
) -> None:
|
|
1558
|
+
"""
|
|
1559
|
+
Generate an ORCA QM/MM input file.
|
|
1560
|
+
|
|
1561
|
+
ORCA's QM/MM implementation expects an ORCA force-field parameter file (ORCAFF.prms).
|
|
1562
|
+
This can be generated from an Amber topology (prmtop/parm7) using ORCA's `orca_mm` utility:
|
|
1563
|
+
|
|
1564
|
+
orca_mm -convff -AMBER <topology.prmtop>
|
|
1565
|
+
|
|
1566
|
+
This exporter will try to generate the ORCAFF file automatically when:
|
|
1567
|
+
- `orcaff_path` is not provided, and
|
|
1568
|
+
- `convert_orcaff=True`, and
|
|
1569
|
+
- `orca_mm` is found in PATH.
|
|
1570
|
+
|
|
1571
|
+
Args:
|
|
1572
|
+
parm7_path: Path to Amber parm7/prmtop topology file.
|
|
1573
|
+
model_pdb: PDB defining QM region (typically subset PDB).
|
|
1574
|
+
output_path: Output ORCA input file (.inp).
|
|
1575
|
+
method: ORCA QM method line (e.g., "B3LYP D3BJ def2-SVP").
|
|
1576
|
+
qm_charge: Charge of the QM region.
|
|
1577
|
+
qm_mult: Multiplicity of the QM region.
|
|
1578
|
+
total_charge: Charge of the full QM+MM system for Charge_Total in %qmmm.
|
|
1579
|
+
If None, uses topology total charge.
|
|
1580
|
+
total_mult: Multiplicity of the full QM+MM system for Mult_Total in %qmmm.
|
|
1581
|
+
If None, uses qm_mult.
|
|
1582
|
+
nproc: Number of processors.
|
|
1583
|
+
near_cutoff: Distance cutoff (Å) used to define ActiveAtoms when no layer B-factors exist.
|
|
1584
|
+
qm_residues: Alternative QM definition by 0-based residue indices in the ParmEd structure.
|
|
1585
|
+
input_path: Coordinate file (.pdb or .xyz). Atom order must match the topology.
|
|
1586
|
+
element_check: Validate element sequence between input and topology (best-effort).
|
|
1587
|
+
orcaff_path: Path to ORCAFF.prms file. If None, uses/creates <parm7_stem>.ORCAFF.prms in output dir.
|
|
1588
|
+
convert_orcaff: If True, try to run `orca_mm -convff -AMBER` when ORCAFF.prms is missing.
|
|
1589
|
+
"""
|
|
1590
|
+
_check_parmed()
|
|
1591
|
+
|
|
1592
|
+
parm = pmd.load_file(str(parm7_path))
|
|
1593
|
+
|
|
1594
|
+
# Load / attach coordinates
|
|
1595
|
+
elements_for_output: Optional[List[str]] = None
|
|
1596
|
+
if input_path is not None:
|
|
1597
|
+
coords, input_elems = _read_input_geometry(input_path)
|
|
1598
|
+
_apply_coordinates_to_parm(parm, coords)
|
|
1599
|
+
elements_for_output = input_elems
|
|
1600
|
+
if element_check:
|
|
1601
|
+
_validate_element_order(parm, input_elems, strict=True)
|
|
1602
|
+
else:
|
|
1603
|
+
coords_attr = getattr(parm, "coordinates", None)
|
|
1604
|
+
n_coords = 0
|
|
1605
|
+
try:
|
|
1606
|
+
n_coords = len(coords_attr) if coords_attr is not None else 0
|
|
1607
|
+
except Exception:
|
|
1608
|
+
n_coords = 0
|
|
1609
|
+
if n_coords == 0:
|
|
1610
|
+
raise ValueError(
|
|
1611
|
+
"No coordinates found in the loaded topology/structure. "
|
|
1612
|
+
"Please provide a coordinate file with -i/--input (PDB or XYZ)."
|
|
1613
|
+
)
|
|
1614
|
+
elements_for_output = _get_parm_elements(parm)
|
|
1615
|
+
|
|
1616
|
+
# Detect layer indices from B-factors if input is a layered PDB produced by mlmm.
|
|
1617
|
+
layer_info: Optional[Dict[str, List[int]]] = None
|
|
1618
|
+
if input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"}:
|
|
1619
|
+
try:
|
|
1620
|
+
from .utils import (
|
|
1621
|
+
has_valid_layer_bfactors,
|
|
1622
|
+
parse_layer_indices_from_bfactors,
|
|
1623
|
+
read_bfactors_from_pdb,
|
|
1624
|
+
)
|
|
1625
|
+
|
|
1626
|
+
bfactors = read_bfactors_from_pdb(input_path)
|
|
1627
|
+
if len(bfactors) == len(parm.atoms) and has_valid_layer_bfactors(bfactors):
|
|
1628
|
+
layer_info = parse_layer_indices_from_bfactors(bfactors)
|
|
1629
|
+
click.echo(
|
|
1630
|
+
"[oniom-export] Detected ML/MM layer B-factors in the input PDB; "
|
|
1631
|
+
"using them to decide movable/frozen atoms."
|
|
1632
|
+
)
|
|
1633
|
+
except Exception:
|
|
1634
|
+
layer_info = None
|
|
1635
|
+
|
|
1636
|
+
# Determine QM region
|
|
1637
|
+
qm_indices: Set[int] = set()
|
|
1638
|
+
movable_indices: Set[int] = set()
|
|
1639
|
+
|
|
1640
|
+
if model_pdb is not None:
|
|
1641
|
+
qm_indices = _read_qm_atoms_from_pdb(
|
|
1642
|
+
model_pdb,
|
|
1643
|
+
input_pdb=input_path
|
|
1644
|
+
if (input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"})
|
|
1645
|
+
else None,
|
|
1646
|
+
system_coords=getattr(parm, "coordinates", None),
|
|
1647
|
+
system_elements=elements_for_output,
|
|
1648
|
+
)
|
|
1649
|
+
elif qm_residues:
|
|
1650
|
+
qm_indices, movable_indices = _identify_qm_atoms_by_distance(parm, qm_residues, near_cutoff)
|
|
1651
|
+
elif layer_info is not None and layer_info.get("ml_indices"):
|
|
1652
|
+
qm_indices = set(int(i) for i in layer_info["ml_indices"])
|
|
1653
|
+
else:
|
|
1654
|
+
raise ValueError(
|
|
1655
|
+
"No QM region specified. Provide --model-pdb, or supply a layered input PDB "
|
|
1656
|
+
"(B-factor=0 marks the ML/QM region), or use qm_residues in the Python API."
|
|
1657
|
+
)
|
|
1658
|
+
|
|
1659
|
+
if not qm_indices:
|
|
1660
|
+
raise ValueError("No QM atoms identified")
|
|
1661
|
+
|
|
1662
|
+
if max(qm_indices) >= len(parm.atoms):
|
|
1663
|
+
raise ValueError(
|
|
1664
|
+
f"QM index out of range: max(qm_indices)={max(qm_indices)} but topology has {len(parm.atoms)} atoms. "
|
|
1665
|
+
"Check that your model PDB / input PDB and parm7 have consistent atom ordering."
|
|
1666
|
+
)
|
|
1667
|
+
|
|
1668
|
+
# Determine ActiveAtoms (movable atoms)
|
|
1669
|
+
if layer_info is not None:
|
|
1670
|
+
frozen = set(int(i) for i in layer_info.get("frozen_indices", []))
|
|
1671
|
+
movable_indices = set(range(len(parm.atoms))) - frozen
|
|
1672
|
+
elif not movable_indices:
|
|
1673
|
+
# Distance-based selection: include all atoms in residues within `near_cutoff` of any QM atom.
|
|
1674
|
+
from scipy import spatial
|
|
1675
|
+
|
|
1676
|
+
qm_list = sorted(qm_indices)
|
|
1677
|
+
neighbor_mask = np.any(
|
|
1678
|
+
spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
|
|
1679
|
+
axis=1,
|
|
1680
|
+
)
|
|
1681
|
+
neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
|
|
1682
|
+
for residue in neighbor_residues:
|
|
1683
|
+
for atom in residue.atoms:
|
|
1684
|
+
movable_indices.add(atom.idx)
|
|
1685
|
+
|
|
1686
|
+
movable_indices |= qm_indices
|
|
1687
|
+
|
|
1688
|
+
if total_charge is None:
|
|
1689
|
+
total_charge = _get_total_charge(parm)
|
|
1690
|
+
if total_mult is None:
|
|
1691
|
+
total_mult = int(qm_mult)
|
|
1692
|
+
|
|
1693
|
+
# ORCA generates link atoms automatically from QMAtoms and ORCAFF topology.
|
|
1694
|
+
# We still detect boundaries to report what was found and to keep behavior explicit.
|
|
1695
|
+
link_specs = _build_link_atom_specs(
|
|
1696
|
+
parm,
|
|
1697
|
+
qm_indices,
|
|
1698
|
+
elements=elements_for_output,
|
|
1699
|
+
)
|
|
1700
|
+
|
|
1701
|
+
# Resolve/generate ORCAFF.prms
|
|
1702
|
+
out_dir = output_path.parent
|
|
1703
|
+
manual_orcaff_cmd = _manual_orcaff_command(parm7_path, out_dir)
|
|
1704
|
+
if orcaff_path is None:
|
|
1705
|
+
expected = out_dir / f"{parm7_path.stem}.ORCAFF.prms"
|
|
1706
|
+
orcaff_path = expected
|
|
1707
|
+
|
|
1708
|
+
if not orcaff_path.exists() and convert_orcaff:
|
|
1709
|
+
orca_mm = shutil.which("orca_mm")
|
|
1710
|
+
if orca_mm is None:
|
|
1711
|
+
click.echo(
|
|
1712
|
+
"[oniom-orca] WARNING: ORCAFF.prms not found and `orca_mm` is not available on PATH.\n"
|
|
1713
|
+
f"[oniom-orca] Run manually: {manual_orcaff_cmd}"
|
|
1714
|
+
)
|
|
1715
|
+
else:
|
|
1716
|
+
click.echo(
|
|
1717
|
+
f"[oniom-orca] Generating ORCAFF.prms via: {manual_orcaff_cmd}"
|
|
1718
|
+
)
|
|
1719
|
+
proc = subprocess.run(
|
|
1720
|
+
[orca_mm, "-convff", "-AMBER", str(parm7_path)],
|
|
1721
|
+
cwd=str(out_dir),
|
|
1722
|
+
stdout=subprocess.PIPE,
|
|
1723
|
+
stderr=subprocess.STDOUT,
|
|
1724
|
+
text=True,
|
|
1725
|
+
)
|
|
1726
|
+
if proc.returncode != 0:
|
|
1727
|
+
raise RuntimeError(
|
|
1728
|
+
"orca_mm failed "
|
|
1729
|
+
f"(exit {proc.returncode}).\n"
|
|
1730
|
+
f"Run manually: {manual_orcaff_cmd}\n"
|
|
1731
|
+
f"Output:\n{proc.stdout}"
|
|
1732
|
+
)
|
|
1733
|
+
|
|
1734
|
+
# Try to locate the generated file (orca_mm typically writes <stem>.ORCAFF.prms)
|
|
1735
|
+
if not orcaff_path.exists():
|
|
1736
|
+
candidates = sorted(out_dir.glob("*.ORCAFF.prms"), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
1737
|
+
if candidates:
|
|
1738
|
+
orcaff_path = candidates[0]
|
|
1739
|
+
|
|
1740
|
+
# ORCA input (use compact range syntax; indices are 0-based)
|
|
1741
|
+
qm_atoms_str = _format_orca_index_set(qm_indices)
|
|
1742
|
+
active_atoms_str = _format_orca_index_set(movable_indices)
|
|
1743
|
+
link_comment_block = ""
|
|
1744
|
+
if link_specs:
|
|
1745
|
+
link_lines = ["# Estimated link-H positions (QM/MM boundary caps; Angstrom)"]
|
|
1746
|
+
for mm_idx, spec in sorted(link_specs.items()):
|
|
1747
|
+
pos = np.asarray(spec["position"], dtype=float)
|
|
1748
|
+
link_lines.append(
|
|
1749
|
+
f"# QM {int(spec['qm_idx']) + 1:>5d} MM {int(mm_idx) + 1:>5d} "
|
|
1750
|
+
f"Hcap ({pos[0]:10.6f}, {pos[1]:10.6f}, {pos[2]:10.6f})"
|
|
1751
|
+
)
|
|
1752
|
+
link_comment_block = "\n".join(link_lines) + "\n"
|
|
1753
|
+
|
|
1754
|
+
# Prefer a relative filename when possible
|
|
1755
|
+
orcaff_ref = str(orcaff_path) if orcaff_path.is_absolute() else orcaff_path.name
|
|
1756
|
+
|
|
1757
|
+
orca_input = f"""# ORCA QM/MM input generated by mlmm oniom-orca
|
|
1758
|
+
# Amber topology: {parm7_path}
|
|
1759
|
+
# ORCAFF parameters: {orcaff_ref}
|
|
1760
|
+
# Coordinates: {input_path if input_path is not None else "(from topology/structure)"}
|
|
1761
|
+
{link_comment_block}
|
|
1762
|
+
|
|
1763
|
+
%pal nprocs {nproc} end
|
|
1764
|
+
|
|
1765
|
+
! {method}
|
|
1766
|
+
! QMMM
|
|
1767
|
+
|
|
1768
|
+
%qmmm
|
|
1769
|
+
ORCAFFFilename "{orcaff_ref}"
|
|
1770
|
+
QMAtoms {qm_atoms_str} end
|
|
1771
|
+
ActiveAtoms {active_atoms_str} end
|
|
1772
|
+
Charge_Total {int(total_charge)}
|
|
1773
|
+
Mult_Total {int(total_mult)}
|
|
1774
|
+
end
|
|
1775
|
+
|
|
1776
|
+
* xyz {qm_charge} {qm_mult}
|
|
1777
|
+
"""
|
|
1778
|
+
|
|
1779
|
+
# Add coordinates
|
|
1780
|
+
elements_list: Optional[List[str]] = None
|
|
1781
|
+
if elements_for_output is not None and len(elements_for_output) == len(parm.atoms):
|
|
1782
|
+
elements_list = elements_for_output
|
|
1783
|
+
|
|
1784
|
+
for atom in parm.atoms:
|
|
1785
|
+
x, y, z = atom.xx, atom.xy, atom.xz
|
|
1786
|
+
if elements_list is not None:
|
|
1787
|
+
element = elements_list[atom.idx]
|
|
1788
|
+
else:
|
|
1789
|
+
element = _get_parm_element(atom)
|
|
1790
|
+
orca_input += f" {element:<2} {x:12.6f} {y:12.6f} {z:12.6f}\n"
|
|
1791
|
+
|
|
1792
|
+
orca_input += "*\n"
|
|
1793
|
+
|
|
1794
|
+
# Write output
|
|
1795
|
+
with output_path.open("w") as f:
|
|
1796
|
+
f.write(orca_input)
|
|
1797
|
+
|
|
1798
|
+
click.echo(f"[oniom-orca] Wrote '{output_path}'")
|
|
1799
|
+
click.echo(f"[oniom-orca] QM atoms: {len(qm_indices)}, Active atoms: {len(movable_indices)}")
|
|
1800
|
+
click.echo(f"[oniom-orca] Link boundaries (auto-capped by ORCA): {len(link_specs)}")
|
|
1801
|
+
if orcaff_path is not None:
|
|
1802
|
+
if orcaff_path.exists():
|
|
1803
|
+
click.echo(f"[oniom-orca] ORCAFF.prms: {orcaff_path}")
|
|
1804
|
+
else:
|
|
1805
|
+
click.echo(
|
|
1806
|
+
f"[oniom-orca] NOTE: ORCAFF.prms not found at '{orcaff_path}'. "
|
|
1807
|
+
f"Run manually: {manual_orcaff_cmd}"
|
|
1808
|
+
)
|
|
1809
|
+
|
|
1810
|
+
|
|
1811
|
+
|
|
1812
|
+
# -----------------------------------------------
|
|
1813
|
+
# CLI Commands
|
|
1814
|
+
# -----------------------------------------------
|
|
1815
|
+
|
|
1816
|
+
@click.command(
|
|
1817
|
+
name="oniom-export",
|
|
1818
|
+
help="Export ONIOM input from Amber parm7 topology (Gaussian g16 or ORCA).",
|
|
1819
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
1820
|
+
)
|
|
1821
|
+
@click.option(
|
|
1822
|
+
"--parm",
|
|
1823
|
+
"parm7",
|
|
1824
|
+
type=click.Path(path_type=Path, exists=True, dir_okay=False),
|
|
1825
|
+
required=True,
|
|
1826
|
+
help="Amber parm7 topology file.",
|
|
1827
|
+
)
|
|
1828
|
+
@click.option(
|
|
1829
|
+
"-i",
|
|
1830
|
+
"--input",
|
|
1831
|
+
"input_coords",
|
|
1832
|
+
type=click.Path(path_type=Path, exists=True, dir_okay=False),
|
|
1833
|
+
default=None,
|
|
1834
|
+
help="Coordinate file (.pdb or .xyz) for the current structure (atom order must match parm7).",
|
|
1835
|
+
)
|
|
1836
|
+
@click.option(
|
|
1837
|
+
"--element-check/--no-element-check",
|
|
1838
|
+
default=True,
|
|
1839
|
+
show_default=True,
|
|
1840
|
+
help="Validate that the element sequence in --input matches the parm7 topology.",
|
|
1841
|
+
)
|
|
1842
|
+
@click.option(
|
|
1843
|
+
"--model-pdb",
|
|
1844
|
+
type=click.Path(path_type=Path, exists=True, dir_okay=False),
|
|
1845
|
+
default=None,
|
|
1846
|
+
help="PDB file defining QM region atoms.",
|
|
1847
|
+
)
|
|
1848
|
+
@click.option(
|
|
1849
|
+
"-o",
|
|
1850
|
+
"--output",
|
|
1851
|
+
type=click.Path(path_type=Path, dir_okay=False),
|
|
1852
|
+
required=True,
|
|
1853
|
+
help="Output file path (.gjf/.com for g16, .inp for ORCA when --mode is omitted).",
|
|
1854
|
+
)
|
|
1855
|
+
@click.option(
|
|
1856
|
+
"--mode",
|
|
1857
|
+
type=click.Choice(["g16", "orca"], case_sensitive=False),
|
|
1858
|
+
default=None,
|
|
1859
|
+
help="Export mode. If omitted, inferred from -o suffix: .gjf/.com -> g16, .inp -> orca.",
|
|
1860
|
+
)
|
|
1861
|
+
@click.option(
|
|
1862
|
+
"--method",
|
|
1863
|
+
type=str,
|
|
1864
|
+
default=None,
|
|
1865
|
+
help="QM method and basis set. Defaults depend on mode.",
|
|
1866
|
+
)
|
|
1867
|
+
@click.option(
|
|
1868
|
+
"-q",
|
|
1869
|
+
"--charge",
|
|
1870
|
+
type=int,
|
|
1871
|
+
required=True,
|
|
1872
|
+
help="Charge of QM region.",
|
|
1873
|
+
)
|
|
1874
|
+
@click.option(
|
|
1875
|
+
"-m",
|
|
1876
|
+
"--multiplicity",
|
|
1877
|
+
type=int,
|
|
1878
|
+
default=1,
|
|
1879
|
+
show_default=True,
|
|
1880
|
+
help="Multiplicity of QM region.",
|
|
1881
|
+
)
|
|
1882
|
+
@click.option(
|
|
1883
|
+
"--near",
|
|
1884
|
+
type=float,
|
|
1885
|
+
default=6.0,
|
|
1886
|
+
show_default=True,
|
|
1887
|
+
help="Distance cutoff for movable/active atoms (Angstrom).",
|
|
1888
|
+
)
|
|
1889
|
+
@click.option(
|
|
1890
|
+
"--nproc",
|
|
1891
|
+
type=int,
|
|
1892
|
+
default=8,
|
|
1893
|
+
show_default=True,
|
|
1894
|
+
help="Number of processors.",
|
|
1895
|
+
)
|
|
1896
|
+
@click.option(
|
|
1897
|
+
"--mem",
|
|
1898
|
+
type=str,
|
|
1899
|
+
default="16GB",
|
|
1900
|
+
show_default=True,
|
|
1901
|
+
help="Memory allocation (g16 mode).",
|
|
1902
|
+
)
|
|
1903
|
+
@click.option(
|
|
1904
|
+
"--total-charge",
|
|
1905
|
+
type=int,
|
|
1906
|
+
default=None,
|
|
1907
|
+
help="Total charge of full QM+MM system for ORCA Charge_Total (orca mode).",
|
|
1908
|
+
)
|
|
1909
|
+
@click.option(
|
|
1910
|
+
"--total-mult",
|
|
1911
|
+
type=int,
|
|
1912
|
+
default=None,
|
|
1913
|
+
help="Total multiplicity of full QM+MM system for ORCA Mult_Total (orca mode).",
|
|
1914
|
+
)
|
|
1915
|
+
@click.option(
|
|
1916
|
+
"--orcaff",
|
|
1917
|
+
type=click.Path(exists=True, path_type=Path),
|
|
1918
|
+
default=None,
|
|
1919
|
+
help="Path to ORCAFF.prms (orca mode). If omitted, uses/creates <parm7_stem>.ORCAFF.prms in output directory.",
|
|
1920
|
+
)
|
|
1921
|
+
@click.option(
|
|
1922
|
+
"--convert-orcaff/--no-convert-orcaff",
|
|
1923
|
+
default=True,
|
|
1924
|
+
show_default=True,
|
|
1925
|
+
help="If ORCAFF.prms is missing, try `orca_mm -convff -AMBER` automatically (orca mode).",
|
|
1926
|
+
)
|
|
1927
|
+
def cli(
|
|
1928
|
+
parm7: Path,
|
|
1929
|
+
input_coords: Optional[Path],
|
|
1930
|
+
element_check: bool,
|
|
1931
|
+
model_pdb: Optional[Path],
|
|
1932
|
+
output: Path,
|
|
1933
|
+
mode: Optional[str],
|
|
1934
|
+
method: Optional[str],
|
|
1935
|
+
charge: int,
|
|
1936
|
+
multiplicity: int,
|
|
1937
|
+
near: float,
|
|
1938
|
+
nproc: int,
|
|
1939
|
+
mem: str,
|
|
1940
|
+
total_charge: Optional[int],
|
|
1941
|
+
total_mult: Optional[int],
|
|
1942
|
+
orcaff: Optional[Path],
|
|
1943
|
+
convert_orcaff: bool,
|
|
1944
|
+
) -> None:
|
|
1945
|
+
"""Export Gaussian/ORCA ONIOM input via a unified entrypoint."""
|
|
1946
|
+
try:
|
|
1947
|
+
resolved_mode = _resolve_oniom_mode(mode, output)
|
|
1948
|
+
|
|
1949
|
+
if resolved_mode == "g16":
|
|
1950
|
+
export_gaussian(
|
|
1951
|
+
parm7_path=parm7,
|
|
1952
|
+
model_pdb=model_pdb,
|
|
1953
|
+
output_path=output,
|
|
1954
|
+
method=method or _GAUSSIAN_DEFAULT_METHOD,
|
|
1955
|
+
qm_charge=charge,
|
|
1956
|
+
qm_mult=multiplicity,
|
|
1957
|
+
near_cutoff=near,
|
|
1958
|
+
nproc=nproc,
|
|
1959
|
+
mem=mem,
|
|
1960
|
+
input_path=input_coords,
|
|
1961
|
+
element_check=element_check,
|
|
1962
|
+
)
|
|
1963
|
+
return
|
|
1964
|
+
|
|
1965
|
+
export_orca(
|
|
1966
|
+
parm7_path=parm7,
|
|
1967
|
+
model_pdb=model_pdb,
|
|
1968
|
+
output_path=output,
|
|
1969
|
+
method=method or _ORCA_DEFAULT_METHOD,
|
|
1970
|
+
qm_charge=charge,
|
|
1971
|
+
qm_mult=multiplicity,
|
|
1972
|
+
total_charge=total_charge,
|
|
1973
|
+
total_mult=total_mult,
|
|
1974
|
+
nproc=nproc,
|
|
1975
|
+
near_cutoff=near,
|
|
1976
|
+
input_path=input_coords,
|
|
1977
|
+
element_check=element_check,
|
|
1978
|
+
orcaff_path=orcaff,
|
|
1979
|
+
convert_orcaff=convert_orcaff,
|
|
1980
|
+
)
|
|
1981
|
+
except Exception as e:
|
|
1982
|
+
click.echo(f"ERROR: {e}", err=True)
|
|
1983
|
+
raise SystemExit(1)
|