mlmm-toolkit 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. hessian_ff/__init__.py +50 -0
  2. hessian_ff/analytical_hessian.py +609 -0
  3. hessian_ff/constants.py +46 -0
  4. hessian_ff/forcefield.py +339 -0
  5. hessian_ff/loaders.py +608 -0
  6. hessian_ff/native/Makefile +8 -0
  7. hessian_ff/native/__init__.py +28 -0
  8. hessian_ff/native/analytical_hessian.py +88 -0
  9. hessian_ff/native/analytical_hessian_ext.cpp +258 -0
  10. hessian_ff/native/bonded.py +82 -0
  11. hessian_ff/native/bonded_ext.cpp +640 -0
  12. hessian_ff/native/loader.py +349 -0
  13. hessian_ff/native/nonbonded.py +118 -0
  14. hessian_ff/native/nonbonded_ext.cpp +1150 -0
  15. hessian_ff/prmtop_parmed.py +23 -0
  16. hessian_ff/system.py +107 -0
  17. hessian_ff/terms/__init__.py +14 -0
  18. hessian_ff/terms/angle.py +73 -0
  19. hessian_ff/terms/bond.py +44 -0
  20. hessian_ff/terms/cmap.py +406 -0
  21. hessian_ff/terms/dihedral.py +141 -0
  22. hessian_ff/terms/nonbonded.py +209 -0
  23. hessian_ff/tests/__init__.py +0 -0
  24. hessian_ff/tests/conftest.py +75 -0
  25. hessian_ff/tests/data/small/complex.parm7 +1346 -0
  26. hessian_ff/tests/data/small/complex.pdb +125 -0
  27. hessian_ff/tests/data/small/complex.rst7 +63 -0
  28. hessian_ff/tests/test_coords_input.py +44 -0
  29. hessian_ff/tests/test_energy_force.py +49 -0
  30. hessian_ff/tests/test_hessian.py +137 -0
  31. hessian_ff/tests/test_smoke.py +18 -0
  32. hessian_ff/tests/test_validation.py +40 -0
  33. hessian_ff/workflows.py +889 -0
  34. mlmm/__init__.py +36 -0
  35. mlmm/__main__.py +7 -0
  36. mlmm/_version.py +34 -0
  37. mlmm/add_elem_info.py +374 -0
  38. mlmm/advanced_help.py +91 -0
  39. mlmm/align_freeze_atoms.py +601 -0
  40. mlmm/all.py +3535 -0
  41. mlmm/bond_changes.py +231 -0
  42. mlmm/bool_compat.py +223 -0
  43. mlmm/cli.py +574 -0
  44. mlmm/cli_utils.py +166 -0
  45. mlmm/default_group.py +337 -0
  46. mlmm/defaults.py +467 -0
  47. mlmm/define_layer.py +526 -0
  48. mlmm/dft.py +1041 -0
  49. mlmm/energy_diagram.py +253 -0
  50. mlmm/extract.py +2213 -0
  51. mlmm/fix_altloc.py +464 -0
  52. mlmm/freq.py +1406 -0
  53. mlmm/harmonic_constraints.py +140 -0
  54. mlmm/hessian_cache.py +44 -0
  55. mlmm/hessian_calc.py +174 -0
  56. mlmm/irc.py +638 -0
  57. mlmm/mlmm_calc.py +2262 -0
  58. mlmm/mm_parm.py +945 -0
  59. mlmm/oniom_export.py +1983 -0
  60. mlmm/oniom_import.py +457 -0
  61. mlmm/opt.py +1742 -0
  62. mlmm/path_opt.py +1353 -0
  63. mlmm/path_search.py +2299 -0
  64. mlmm/preflight.py +88 -0
  65. mlmm/py.typed +1 -0
  66. mlmm/pysis_runner.py +45 -0
  67. mlmm/scan.py +1047 -0
  68. mlmm/scan2d.py +1226 -0
  69. mlmm/scan3d.py +1265 -0
  70. mlmm/scan_common.py +184 -0
  71. mlmm/summary_log.py +736 -0
  72. mlmm/trj2fig.py +448 -0
  73. mlmm/tsopt.py +2871 -0
  74. mlmm/utils.py +2309 -0
  75. mlmm/xtb_embedcharge_correction.py +475 -0
  76. mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
  77. mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
  78. mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
  79. mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
  80. mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
  81. mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
  82. pysisyphus/Geometry.py +1667 -0
  83. pysisyphus/LICENSE +674 -0
  84. pysisyphus/TableFormatter.py +63 -0
  85. pysisyphus/TablePrinter.py +74 -0
  86. pysisyphus/__init__.py +12 -0
  87. pysisyphus/calculators/AFIR.py +452 -0
  88. pysisyphus/calculators/AnaPot.py +20 -0
  89. pysisyphus/calculators/AnaPot2.py +48 -0
  90. pysisyphus/calculators/AnaPot3.py +12 -0
  91. pysisyphus/calculators/AnaPot4.py +20 -0
  92. pysisyphus/calculators/AnaPotBase.py +337 -0
  93. pysisyphus/calculators/AnaPotCBM.py +25 -0
  94. pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
  95. pysisyphus/calculators/CFOUR.py +250 -0
  96. pysisyphus/calculators/Calculator.py +844 -0
  97. pysisyphus/calculators/CerjanMiller.py +24 -0
  98. pysisyphus/calculators/Composite.py +123 -0
  99. pysisyphus/calculators/ConicalIntersection.py +171 -0
  100. pysisyphus/calculators/DFTBp.py +430 -0
  101. pysisyphus/calculators/DFTD3.py +66 -0
  102. pysisyphus/calculators/DFTD4.py +84 -0
  103. pysisyphus/calculators/Dalton.py +61 -0
  104. pysisyphus/calculators/Dimer.py +681 -0
  105. pysisyphus/calculators/Dummy.py +20 -0
  106. pysisyphus/calculators/EGO.py +76 -0
  107. pysisyphus/calculators/EnergyMin.py +224 -0
  108. pysisyphus/calculators/ExternalPotential.py +264 -0
  109. pysisyphus/calculators/FakeASE.py +35 -0
  110. pysisyphus/calculators/FourWellAnaPot.py +28 -0
  111. pysisyphus/calculators/FreeEndNEBPot.py +39 -0
  112. pysisyphus/calculators/Gaussian09.py +18 -0
  113. pysisyphus/calculators/Gaussian16.py +726 -0
  114. pysisyphus/calculators/HardSphere.py +159 -0
  115. pysisyphus/calculators/IDPPCalculator.py +49 -0
  116. pysisyphus/calculators/IPIClient.py +133 -0
  117. pysisyphus/calculators/IPIServer.py +234 -0
  118. pysisyphus/calculators/LEPSBase.py +24 -0
  119. pysisyphus/calculators/LEPSExpr.py +139 -0
  120. pysisyphus/calculators/LennardJones.py +80 -0
  121. pysisyphus/calculators/MOPAC.py +219 -0
  122. pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
  123. pysisyphus/calculators/MultiCalc.py +85 -0
  124. pysisyphus/calculators/NFK.py +45 -0
  125. pysisyphus/calculators/OBabel.py +87 -0
  126. pysisyphus/calculators/ONIOMv2.py +1129 -0
  127. pysisyphus/calculators/ORCA.py +893 -0
  128. pysisyphus/calculators/ORCA5.py +6 -0
  129. pysisyphus/calculators/OpenMM.py +88 -0
  130. pysisyphus/calculators/OpenMolcas.py +281 -0
  131. pysisyphus/calculators/OverlapCalculator.py +908 -0
  132. pysisyphus/calculators/Psi4.py +218 -0
  133. pysisyphus/calculators/PyPsi4.py +37 -0
  134. pysisyphus/calculators/PySCF.py +341 -0
  135. pysisyphus/calculators/PyXTB.py +73 -0
  136. pysisyphus/calculators/QCEngine.py +106 -0
  137. pysisyphus/calculators/Rastrigin.py +22 -0
  138. pysisyphus/calculators/Remote.py +76 -0
  139. pysisyphus/calculators/Rosenbrock.py +15 -0
  140. pysisyphus/calculators/SocketCalc.py +97 -0
  141. pysisyphus/calculators/TIP3P.py +111 -0
  142. pysisyphus/calculators/TransTorque.py +161 -0
  143. pysisyphus/calculators/Turbomole.py +965 -0
  144. pysisyphus/calculators/VRIPot.py +37 -0
  145. pysisyphus/calculators/WFOWrapper.py +333 -0
  146. pysisyphus/calculators/WFOWrapper2.py +341 -0
  147. pysisyphus/calculators/XTB.py +418 -0
  148. pysisyphus/calculators/__init__.py +81 -0
  149. pysisyphus/calculators/cosmo_data.py +139 -0
  150. pysisyphus/calculators/parser.py +150 -0
  151. pysisyphus/color.py +19 -0
  152. pysisyphus/config.py +133 -0
  153. pysisyphus/constants.py +65 -0
  154. pysisyphus/cos/AdaptiveNEB.py +230 -0
  155. pysisyphus/cos/ChainOfStates.py +725 -0
  156. pysisyphus/cos/FreeEndNEB.py +25 -0
  157. pysisyphus/cos/FreezingString.py +103 -0
  158. pysisyphus/cos/GrowingChainOfStates.py +71 -0
  159. pysisyphus/cos/GrowingNT.py +309 -0
  160. pysisyphus/cos/GrowingString.py +508 -0
  161. pysisyphus/cos/NEB.py +189 -0
  162. pysisyphus/cos/SimpleZTS.py +64 -0
  163. pysisyphus/cos/__init__.py +22 -0
  164. pysisyphus/cos/stiffness.py +199 -0
  165. pysisyphus/drivers/__init__.py +17 -0
  166. pysisyphus/drivers/afir.py +855 -0
  167. pysisyphus/drivers/barriers.py +271 -0
  168. pysisyphus/drivers/birkholz.py +138 -0
  169. pysisyphus/drivers/cluster.py +318 -0
  170. pysisyphus/drivers/diabatization.py +133 -0
  171. pysisyphus/drivers/merge.py +368 -0
  172. pysisyphus/drivers/merge_mol2.py +322 -0
  173. pysisyphus/drivers/opt.py +375 -0
  174. pysisyphus/drivers/perf.py +91 -0
  175. pysisyphus/drivers/pka.py +52 -0
  176. pysisyphus/drivers/precon_pos_rot.py +669 -0
  177. pysisyphus/drivers/rates.py +480 -0
  178. pysisyphus/drivers/replace.py +219 -0
  179. pysisyphus/drivers/scan.py +212 -0
  180. pysisyphus/drivers/spectrum.py +166 -0
  181. pysisyphus/drivers/thermo.py +31 -0
  182. pysisyphus/dynamics/Gaussian.py +103 -0
  183. pysisyphus/dynamics/__init__.py +20 -0
  184. pysisyphus/dynamics/colvars.py +136 -0
  185. pysisyphus/dynamics/driver.py +297 -0
  186. pysisyphus/dynamics/helpers.py +256 -0
  187. pysisyphus/dynamics/lincs.py +105 -0
  188. pysisyphus/dynamics/mdp.py +364 -0
  189. pysisyphus/dynamics/rattle.py +121 -0
  190. pysisyphus/dynamics/thermostats.py +128 -0
  191. pysisyphus/dynamics/wigner.py +266 -0
  192. pysisyphus/elem_data.py +3473 -0
  193. pysisyphus/exceptions.py +2 -0
  194. pysisyphus/filtertrj.py +69 -0
  195. pysisyphus/helpers.py +623 -0
  196. pysisyphus/helpers_pure.py +649 -0
  197. pysisyphus/init_logging.py +50 -0
  198. pysisyphus/intcoords/Bend.py +69 -0
  199. pysisyphus/intcoords/Bend2.py +25 -0
  200. pysisyphus/intcoords/BondedFragment.py +32 -0
  201. pysisyphus/intcoords/Cartesian.py +41 -0
  202. pysisyphus/intcoords/CartesianCoords.py +140 -0
  203. pysisyphus/intcoords/Coords.py +56 -0
  204. pysisyphus/intcoords/DLC.py +197 -0
  205. pysisyphus/intcoords/DistanceFunction.py +34 -0
  206. pysisyphus/intcoords/DummyImproper.py +70 -0
  207. pysisyphus/intcoords/DummyTorsion.py +72 -0
  208. pysisyphus/intcoords/LinearBend.py +105 -0
  209. pysisyphus/intcoords/LinearDisplacement.py +80 -0
  210. pysisyphus/intcoords/OutOfPlane.py +59 -0
  211. pysisyphus/intcoords/PrimTypes.py +286 -0
  212. pysisyphus/intcoords/Primitive.py +137 -0
  213. pysisyphus/intcoords/RedundantCoords.py +659 -0
  214. pysisyphus/intcoords/RobustTorsion.py +59 -0
  215. pysisyphus/intcoords/Rotation.py +147 -0
  216. pysisyphus/intcoords/Stretch.py +31 -0
  217. pysisyphus/intcoords/Torsion.py +101 -0
  218. pysisyphus/intcoords/Torsion2.py +25 -0
  219. pysisyphus/intcoords/Translation.py +45 -0
  220. pysisyphus/intcoords/__init__.py +61 -0
  221. pysisyphus/intcoords/augment_bonds.py +126 -0
  222. pysisyphus/intcoords/derivatives.py +10512 -0
  223. pysisyphus/intcoords/eval.py +80 -0
  224. pysisyphus/intcoords/exceptions.py +37 -0
  225. pysisyphus/intcoords/findiffs.py +48 -0
  226. pysisyphus/intcoords/generate_derivatives.py +414 -0
  227. pysisyphus/intcoords/helpers.py +235 -0
  228. pysisyphus/intcoords/logging_conf.py +10 -0
  229. pysisyphus/intcoords/mp_derivatives.py +10836 -0
  230. pysisyphus/intcoords/setup.py +962 -0
  231. pysisyphus/intcoords/setup_fast.py +176 -0
  232. pysisyphus/intcoords/update.py +272 -0
  233. pysisyphus/intcoords/valid.py +89 -0
  234. pysisyphus/interpolate/Geodesic.py +93 -0
  235. pysisyphus/interpolate/IDPP.py +55 -0
  236. pysisyphus/interpolate/Interpolator.py +116 -0
  237. pysisyphus/interpolate/LST.py +70 -0
  238. pysisyphus/interpolate/Redund.py +152 -0
  239. pysisyphus/interpolate/__init__.py +9 -0
  240. pysisyphus/interpolate/helpers.py +34 -0
  241. pysisyphus/io/__init__.py +22 -0
  242. pysisyphus/io/aomix.py +178 -0
  243. pysisyphus/io/cjson.py +24 -0
  244. pysisyphus/io/crd.py +101 -0
  245. pysisyphus/io/cube.py +220 -0
  246. pysisyphus/io/fchk.py +184 -0
  247. pysisyphus/io/hdf5.py +49 -0
  248. pysisyphus/io/hessian.py +72 -0
  249. pysisyphus/io/mol2.py +146 -0
  250. pysisyphus/io/molden.py +293 -0
  251. pysisyphus/io/orca.py +189 -0
  252. pysisyphus/io/pdb.py +269 -0
  253. pysisyphus/io/psf.py +79 -0
  254. pysisyphus/io/pubchem.py +31 -0
  255. pysisyphus/io/qcschema.py +34 -0
  256. pysisyphus/io/sdf.py +29 -0
  257. pysisyphus/io/xyz.py +61 -0
  258. pysisyphus/io/zmat.py +175 -0
  259. pysisyphus/irc/DWI.py +108 -0
  260. pysisyphus/irc/DampedVelocityVerlet.py +134 -0
  261. pysisyphus/irc/Euler.py +22 -0
  262. pysisyphus/irc/EulerPC.py +345 -0
  263. pysisyphus/irc/GonzalezSchlegel.py +187 -0
  264. pysisyphus/irc/IMKMod.py +164 -0
  265. pysisyphus/irc/IRC.py +878 -0
  266. pysisyphus/irc/IRCDummy.py +10 -0
  267. pysisyphus/irc/Instanton.py +307 -0
  268. pysisyphus/irc/LQA.py +53 -0
  269. pysisyphus/irc/ModeKill.py +136 -0
  270. pysisyphus/irc/ParamPlot.py +53 -0
  271. pysisyphus/irc/RK4.py +36 -0
  272. pysisyphus/irc/__init__.py +31 -0
  273. pysisyphus/irc/initial_displ.py +219 -0
  274. pysisyphus/linalg.py +411 -0
  275. pysisyphus/line_searches/Backtracking.py +88 -0
  276. pysisyphus/line_searches/HagerZhang.py +184 -0
  277. pysisyphus/line_searches/LineSearch.py +232 -0
  278. pysisyphus/line_searches/StrongWolfe.py +108 -0
  279. pysisyphus/line_searches/__init__.py +9 -0
  280. pysisyphus/line_searches/interpol.py +15 -0
  281. pysisyphus/modefollow/NormalMode.py +40 -0
  282. pysisyphus/modefollow/__init__.py +10 -0
  283. pysisyphus/modefollow/davidson.py +199 -0
  284. pysisyphus/modefollow/lanczos.py +95 -0
  285. pysisyphus/optimizers/BFGS.py +99 -0
  286. pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
  287. pysisyphus/optimizers/ConjugateGradient.py +98 -0
  288. pysisyphus/optimizers/CubicNewton.py +75 -0
  289. pysisyphus/optimizers/FIRE.py +113 -0
  290. pysisyphus/optimizers/HessianOptimizer.py +1176 -0
  291. pysisyphus/optimizers/LBFGS.py +228 -0
  292. pysisyphus/optimizers/LayerOpt.py +411 -0
  293. pysisyphus/optimizers/MicroOptimizer.py +169 -0
  294. pysisyphus/optimizers/NCOptimizer.py +90 -0
  295. pysisyphus/optimizers/Optimizer.py +1084 -0
  296. pysisyphus/optimizers/PreconLBFGS.py +260 -0
  297. pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
  298. pysisyphus/optimizers/QuickMin.py +74 -0
  299. pysisyphus/optimizers/RFOptimizer.py +181 -0
  300. pysisyphus/optimizers/RSA.py +99 -0
  301. pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
  302. pysisyphus/optimizers/SteepestDescent.py +23 -0
  303. pysisyphus/optimizers/StringOptimizer.py +173 -0
  304. pysisyphus/optimizers/__init__.py +41 -0
  305. pysisyphus/optimizers/closures.py +301 -0
  306. pysisyphus/optimizers/cls_map.py +58 -0
  307. pysisyphus/optimizers/exceptions.py +6 -0
  308. pysisyphus/optimizers/gdiis.py +280 -0
  309. pysisyphus/optimizers/guess_hessians.py +311 -0
  310. pysisyphus/optimizers/hessian_updates.py +355 -0
  311. pysisyphus/optimizers/poly_fit.py +285 -0
  312. pysisyphus/optimizers/precon.py +153 -0
  313. pysisyphus/optimizers/restrict_step.py +24 -0
  314. pysisyphus/pack.py +172 -0
  315. pysisyphus/peakdetect.py +948 -0
  316. pysisyphus/plot.py +1031 -0
  317. pysisyphus/run.py +2106 -0
  318. pysisyphus/socket_helper.py +74 -0
  319. pysisyphus/stocastic/FragmentKick.py +132 -0
  320. pysisyphus/stocastic/Kick.py +81 -0
  321. pysisyphus/stocastic/Pipeline.py +303 -0
  322. pysisyphus/stocastic/__init__.py +21 -0
  323. pysisyphus/stocastic/align.py +127 -0
  324. pysisyphus/testing.py +96 -0
  325. pysisyphus/thermo.py +156 -0
  326. pysisyphus/trj.py +824 -0
  327. pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
  328. pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
  329. pysisyphus/tsoptimizers/TRIM.py +59 -0
  330. pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
  331. pysisyphus/tsoptimizers/__init__.py +23 -0
  332. pysisyphus/wavefunction/Basis.py +239 -0
  333. pysisyphus/wavefunction/DIIS.py +76 -0
  334. pysisyphus/wavefunction/__init__.py +25 -0
  335. pysisyphus/wavefunction/build_ext.py +42 -0
  336. pysisyphus/wavefunction/cart2sph.py +190 -0
  337. pysisyphus/wavefunction/diabatization.py +304 -0
  338. pysisyphus/wavefunction/excited_states.py +435 -0
  339. pysisyphus/wavefunction/gen_ints.py +1811 -0
  340. pysisyphus/wavefunction/helpers.py +104 -0
  341. pysisyphus/wavefunction/ints/__init__.py +0 -0
  342. pysisyphus/wavefunction/ints/boys.py +193 -0
  343. pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
  344. pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
  345. pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
  346. pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
  347. pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
  348. pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
  349. pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
  350. pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
  351. pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
  352. pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
  353. pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
  354. pysisyphus/wavefunction/localization.py +458 -0
  355. pysisyphus/wavefunction/multipole.py +159 -0
  356. pysisyphus/wavefunction/normalization.py +36 -0
  357. pysisyphus/wavefunction/pop_analysis.py +134 -0
  358. pysisyphus/wavefunction/shells.py +1171 -0
  359. pysisyphus/wavefunction/wavefunction.py +504 -0
  360. pysisyphus/wrapper/__init__.py +11 -0
  361. pysisyphus/wrapper/exceptions.py +2 -0
  362. pysisyphus/wrapper/jmol.py +120 -0
  363. pysisyphus/wrapper/mwfn.py +169 -0
  364. pysisyphus/wrapper/packmol.py +71 -0
  365. pysisyphus/xyzloader.py +168 -0
  366. pysisyphus/yaml_mods.py +45 -0
  367. thermoanalysis/LICENSE +674 -0
  368. thermoanalysis/QCData.py +244 -0
  369. thermoanalysis/__init__.py +0 -0
  370. thermoanalysis/config.py +3 -0
  371. thermoanalysis/constants.py +20 -0
  372. thermoanalysis/thermo.py +1011 -0
mlmm/oniom_export.py ADDED
@@ -0,0 +1,1983 @@
1
+ # mlmm/oniom_export.py
2
+
3
+ """
4
+ Export ML/MM system to Gaussian/ORCA ONIOM input format from Amber parm7 topology.
5
+
6
+ Example:
7
+ mlmm oniom-export --parm real.parm7 -i pocket.pdb --model-pdb ml.pdb -o out.com
8
+
9
+ For detailed documentation, see: docs/oniom_export.md
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import re
15
+ import shlex
16
+ import shutil
17
+ import subprocess
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Set, Tuple
20
+
21
+ import logging
22
+
23
+ import click
24
+ import numpy as np
25
+
26
+ from .add_elem_info import guess_element as _guess_element
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ try:
31
+ import parmed as pmd
32
+ except ImportError:
33
+ pmd = None
34
+
35
+ _GAUSSIAN_DEFAULT_METHOD = "wB97XD/def2-TZVPD"
36
+ _ORCA_DEFAULT_METHOD = "B3LYP D3BJ def2-SVP"
37
+
38
+
39
+ def _check_parmed() -> None:
40
+ """Check if ParmEd is available."""
41
+ if pmd is None:
42
+ raise ImportError(
43
+ "ParmEd is required for ONIOM export. Install with: pip install parmed"
44
+ )
45
+
46
+
47
+ # -----------------------------------------------
48
+ # Coordinates / element handling
49
+ # -----------------------------------------------
50
+
51
+ # Periodic table symbols (1-indexed; index 0 is dummy)
52
+ _PERIODIC_TABLE: List[str] = [
53
+ "",
54
+ "H",
55
+ "He",
56
+ "Li",
57
+ "Be",
58
+ "B",
59
+ "C",
60
+ "N",
61
+ "O",
62
+ "F",
63
+ "Ne",
64
+ "Na",
65
+ "Mg",
66
+ "Al",
67
+ "Si",
68
+ "P",
69
+ "S",
70
+ "Cl",
71
+ "Ar",
72
+ "K",
73
+ "Ca",
74
+ "Sc",
75
+ "Ti",
76
+ "V",
77
+ "Cr",
78
+ "Mn",
79
+ "Fe",
80
+ "Co",
81
+ "Ni",
82
+ "Cu",
83
+ "Zn",
84
+ "Ga",
85
+ "Ge",
86
+ "As",
87
+ "Se",
88
+ "Br",
89
+ "Kr",
90
+ "Rb",
91
+ "Sr",
92
+ "Y",
93
+ "Zr",
94
+ "Nb",
95
+ "Mo",
96
+ "Tc",
97
+ "Ru",
98
+ "Rh",
99
+ "Pd",
100
+ "Ag",
101
+ "Cd",
102
+ "In",
103
+ "Sn",
104
+ "Sb",
105
+ "Te",
106
+ "I",
107
+ "Xe",
108
+ "Cs",
109
+ "Ba",
110
+ "La",
111
+ "Ce",
112
+ "Pr",
113
+ "Nd",
114
+ "Pm",
115
+ "Sm",
116
+ "Eu",
117
+ "Gd",
118
+ "Tb",
119
+ "Dy",
120
+ "Ho",
121
+ "Er",
122
+ "Tm",
123
+ "Yb",
124
+ "Lu",
125
+ "Hf",
126
+ "Ta",
127
+ "W",
128
+ "Re",
129
+ "Os",
130
+ "Ir",
131
+ "Pt",
132
+ "Au",
133
+ "Hg",
134
+ "Tl",
135
+ "Pb",
136
+ "Bi",
137
+ "Po",
138
+ "At",
139
+ "Rn",
140
+ "Fr",
141
+ "Ra",
142
+ "Ac",
143
+ "Th",
144
+ "Pa",
145
+ "U",
146
+ "Np",
147
+ "Pu",
148
+ "Am",
149
+ "Cm",
150
+ "Bk",
151
+ "Cf",
152
+ "Es",
153
+ "Fm",
154
+ "Md",
155
+ "No",
156
+ "Lr",
157
+ "Rf",
158
+ "Db",
159
+ "Sg",
160
+ "Bh",
161
+ "Hs",
162
+ "Mt",
163
+ "Ds",
164
+ "Rg",
165
+ "Cn",
166
+ "Nh",
167
+ "Fl",
168
+ "Mc",
169
+ "Lv",
170
+ "Ts",
171
+ "Og",
172
+ ]
173
+
174
+ _TWO_LETTER_ELEMENT_UPPER: Set[str] = {sym.upper() for sym in _PERIODIC_TABLE if len(sym) == 2}
175
+
176
+ # A small set of common atomic masses for robust element inference when atomic number is missing.
177
+ # (Atomic masses vary slightly by isotope; we only need a reasonable guess.)
178
+ _COMMON_MASS_TABLE: List[Tuple[str, float]] = [
179
+ ("H", 1.008),
180
+ ("C", 12.011),
181
+ ("N", 14.007),
182
+ ("O", 15.999),
183
+ ("F", 18.998),
184
+ ("Na", 22.990),
185
+ ("Mg", 24.305),
186
+ ("Al", 26.982),
187
+ ("Si", 28.085),
188
+ ("P", 30.974),
189
+ ("S", 32.06),
190
+ ("Cl", 35.45),
191
+ ("K", 39.098),
192
+ ("Ca", 40.078),
193
+ ("Mn", 54.938),
194
+ ("Fe", 55.845),
195
+ ("Co", 58.933),
196
+ ("Ni", 58.693),
197
+ ("Cu", 63.546),
198
+ ("Zn", 65.38),
199
+ ("Se", 78.971),
200
+ ("Br", 79.904),
201
+ ("I", 126.90),
202
+ ]
203
+
204
+
205
+ def _normalize_element_symbol(sym: str) -> str:
206
+ """
207
+ Normalize an element symbol to canonical form (e.g., 'cl'/'CL' -> 'Cl').
208
+
209
+ Returns 'X' if empty / unknown.
210
+ """
211
+ s = (sym or "").strip()
212
+ if not s:
213
+ return "X"
214
+ # Keep only first 2 characters (typical element symbols)
215
+ s = re.sub(r"[^A-Za-z]", "", s)
216
+ if not s:
217
+ return "X"
218
+ if len(s) == 1:
219
+ return s.upper()
220
+ return s[0].upper() + s[1].lower()
221
+
222
+
223
+ def _infer_element_from_pdb_atom_name(atom_name_field: str) -> str:
224
+ """
225
+ Best-effort element inference from the 4-char PDB atom-name field (columns 13-16).
226
+
227
+ This uses PDB alignment conventions:
228
+ - one-letter elements are right-justified (often leading space), e.g. " CA " (C-alpha) -> C
229
+ - two-letter elements are left-justified, e.g. "CA " (calcium) -> Ca
230
+ """
231
+ field = (atom_name_field or "")[:4]
232
+ if len(field) < 2:
233
+ return _normalize_element_symbol(field.strip())
234
+
235
+ # Example: "1H " -> element "H"
236
+ if field[0].isdigit():
237
+ return _normalize_element_symbol(field[1])
238
+
239
+ # Right-justified => one-letter element in column 14
240
+ if field[0] == " ":
241
+ return _normalize_element_symbol(field[1])
242
+
243
+ # Left-justified => likely 2-letter element (or 1-letter + suffix)
244
+ cand2 = field[0:2].strip().upper()
245
+ if cand2 in _TWO_LETTER_ELEMENT_UPPER:
246
+ return _normalize_element_symbol(cand2)
247
+
248
+ return _normalize_element_symbol(field[0])
249
+
250
+
251
+ def _read_pdb_geometry(pdb_path: Path) -> Tuple[np.ndarray, List[str]]:
252
+ """Read coordinates and element symbols from a PDB file (ATOM/HETATM records)."""
253
+ coords: List[List[float]] = []
254
+ elements: List[str] = []
255
+ with pdb_path.open("r") as f:
256
+ for line in f:
257
+ if not (line.startswith("ATOM") or line.startswith("HETATM")):
258
+ continue
259
+ try:
260
+ x = float(line[30:38])
261
+ y = float(line[38:46])
262
+ z = float(line[46:54])
263
+ except Exception as e:
264
+ raise ValueError(f"Failed to parse coordinates from PDB line: {line.rstrip()}") from e
265
+
266
+ # Element: prefer columns 77-78; fall back to residue-aware guess_element
267
+ # (which correctly handles HG2->H in protein residues, etc.).
268
+ elem_field = line[76:78].strip()
269
+ if elem_field:
270
+ elem = elem_field
271
+ # Guard against 1-letter misalignment when atom name encodes a 2-letter element (e.g., MG)
272
+ elem_inferred = _infer_element_from_pdb_atom_name(line[12:16])
273
+ if len(elem_field) == 1 and elem_inferred and len(elem_inferred) == 2:
274
+ if elem_field.upper() != elem_inferred[0].upper():
275
+ elem = elem_inferred
276
+ else:
277
+ # No element column — use residue-aware inference (add_elem_info.guess_element)
278
+ atom_name = line[12:16].strip()
279
+ resname = line[17:20].strip()
280
+ is_het = line.startswith("HETATM")
281
+ guessed = _guess_element(atom_name, resname, is_het)
282
+ if guessed:
283
+ elem = guessed
284
+ else:
285
+ elem = _infer_element_from_pdb_atom_name(line[12:16])
286
+
287
+ coords.append([x, y, z])
288
+ elements.append(_normalize_element_symbol(elem))
289
+
290
+ if not coords:
291
+ raise ValueError(f"No ATOM/HETATM records found in PDB: {pdb_path}")
292
+ return np.asarray(coords, dtype=float), elements
293
+
294
+
295
+ def _read_xyz_geometry(xyz_path: Path) -> Tuple[np.ndarray, List[str]]:
296
+ """Read coordinates and element symbols from a single-frame XYZ file."""
297
+ coords: List[List[float]] = []
298
+ elements: List[str] = []
299
+ with xyz_path.open("r") as f:
300
+ lines = f.readlines()
301
+
302
+ if len(lines) < 3:
303
+ raise ValueError(f"XYZ file is too short: {xyz_path}")
304
+
305
+ try:
306
+ n_atoms = int(lines[0].strip())
307
+ except Exception as e:
308
+ raise ValueError(f"First line of XYZ must be an integer atom count: {xyz_path}") from e
309
+
310
+ if len(lines) < 2 + n_atoms:
311
+ raise ValueError(
312
+ f"XYZ file atom count ({n_atoms}) exceeds available lines: {xyz_path}"
313
+ )
314
+
315
+ for i in range(n_atoms):
316
+ raw = lines[2 + i].strip()
317
+ if not raw:
318
+ continue
319
+ parts = raw.split()
320
+ if len(parts) < 4:
321
+ raise ValueError(f"Invalid XYZ atom line: '{raw}'")
322
+ elem = _normalize_element_symbol(parts[0])
323
+ try:
324
+ x, y, z = float(parts[1]), float(parts[2]), float(parts[3])
325
+ except Exception as e:
326
+ raise ValueError(f"Invalid XYZ coordinates in line: '{raw}'") from e
327
+ coords.append([x, y, z])
328
+ elements.append(elem)
329
+
330
+ if len(coords) != n_atoms:
331
+ raise ValueError(
332
+ f"XYZ parsing produced {len(coords)} atoms but header says {n_atoms}: {xyz_path}"
333
+ )
334
+
335
+ return np.asarray(coords, dtype=float), elements
336
+
337
+
338
+ def _read_input_geometry(input_path: Path) -> Tuple[np.ndarray, List[str]]:
339
+ """
340
+ Read coordinates + element list from an input coordinate file.
341
+
342
+ Supported:
343
+ - .pdb
344
+ - .xyz
345
+ """
346
+ suffix = input_path.suffix.lower()
347
+ if suffix == ".pdb" or suffix == ".ent":
348
+ return _read_pdb_geometry(input_path)
349
+ if suffix == ".xyz":
350
+ return _read_xyz_geometry(input_path)
351
+ raise ValueError(f"Unsupported input coordinate format: {input_path} (expected .pdb or .xyz)")
352
+
353
+
354
+ def _apply_coordinates_to_parm(parm, coords: np.ndarray) -> None:
355
+ """Attach coordinates to a ParmEd structure, keeping atom order unchanged."""
356
+ coords = np.asarray(coords, dtype=float)
357
+ if coords.shape != (len(parm.atoms), 3):
358
+ raise ValueError(
359
+ f"Atom count mismatch: parm7 has {len(parm.atoms)} atoms, "
360
+ f"but coordinate file has {coords.shape[0]} atoms"
361
+ )
362
+
363
+ # Prefer setting structure-level coordinates
364
+ try:
365
+ parm.coordinates = coords
366
+ except Exception:
367
+ logger.debug("Failed to set structure-level coordinates on parm", exc_info=True)
368
+
369
+ # Ensure per-atom cached coordinates are also available
370
+ for i, atom in enumerate(parm.atoms):
371
+ x, y, z = coords[i]
372
+ try:
373
+ atom.xx = float(x)
374
+ atom.xy = float(y)
375
+ atom.xz = float(z)
376
+ except Exception:
377
+ logger.debug("Failed to set per-atom coords on atom %d", i, exc_info=True)
378
+
379
+
380
+ def _infer_element_from_mass(mass: float, tol: float = 1.5) -> str:
381
+ """
382
+ Infer element from atomic mass using a small common-element table.
383
+
384
+ Returns 'X' if no close match found.
385
+ """
386
+ try:
387
+ m = float(mass)
388
+ except Exception:
389
+ return "X"
390
+
391
+ best_sym = "X"
392
+ best_diff = 1e9
393
+ for sym, ref_m in _COMMON_MASS_TABLE:
394
+ diff = abs(m - ref_m)
395
+ if diff < best_diff:
396
+ best_diff = diff
397
+ best_sym = sym
398
+
399
+ if best_diff <= tol:
400
+ return _normalize_element_symbol(best_sym)
401
+ return "X"
402
+
403
+
404
+ def _get_parm_element(atom) -> str:
405
+ """
406
+ Best-effort element symbol for a ParmEd atom.
407
+
408
+ Tries (in order):
409
+ 1) atom.element_name
410
+ 2) atom.atomic_number -> periodic table
411
+ 3) atom.mass -> common mass table
412
+ 4) atom.name PDB-style heuristic (very weak fallback)
413
+ """
414
+ # 1) element_name
415
+ elem = getattr(atom, "element_name", None)
416
+ if elem:
417
+ norm = _normalize_element_symbol(str(elem))
418
+ if norm != "X":
419
+ return norm
420
+
421
+ # 2) atomic_number
422
+ z = getattr(atom, "atomic_number", None)
423
+ if z is not None:
424
+ try:
425
+ zi = int(z)
426
+ if 0 < zi < len(_PERIODIC_TABLE):
427
+ return _PERIODIC_TABLE[zi]
428
+ except Exception:
429
+ logger.debug("Failed to infer element from atomic_number=%s", z, exc_info=True)
430
+
431
+ # 3) mass
432
+ mass = getattr(atom, "mass", None)
433
+ if mass is not None:
434
+ guess = _infer_element_from_mass(mass)
435
+ if guess != "X":
436
+ return guess
437
+
438
+ # 4) fallback: first letter of atom name (can be wrong for metals)
439
+ name = getattr(atom, "name", "")
440
+ if name:
441
+ return _normalize_element_symbol(str(name)[0])
442
+
443
+ return "X"
444
+
445
+
446
+ def _get_parm_elements(parm) -> List[str]:
447
+ """Element symbols for each atom in a ParmEd structure."""
448
+ return [_get_parm_element(atom) for atom in parm.atoms]
449
+
450
+
451
+ def _validate_element_order(
452
+ parm,
453
+ input_elements: List[str],
454
+ *,
455
+ strict: bool = True,
456
+ ) -> None:
457
+ """
458
+ Validate that element sequence in the coordinate file matches the parm7 topology.
459
+
460
+ If `strict` is True, raise on the first detected mismatch where both elements are known.
461
+ Unknown elements ('X') are ignored.
462
+ """
463
+ parm_elements = _get_parm_elements(parm)
464
+ if len(parm_elements) != len(input_elements):
465
+ raise ValueError(
466
+ f"Atom count mismatch: parm7 has {len(parm_elements)} atoms, "
467
+ f"but input has {len(input_elements)} atoms"
468
+ )
469
+
470
+ unknown_in = sum(1 for e in input_elements if e == "X")
471
+ unknown_parm = sum(1 for e in parm_elements if e == "X")
472
+ if unknown_in > 0 or unknown_parm > 0:
473
+ click.echo(
474
+ f"[oniom-export] WARNING: element check is partial "
475
+ f"(unknown elements: input={unknown_in}, parm={unknown_parm})"
476
+ )
477
+
478
+ for i, (e_parm, e_in) in enumerate(zip(parm_elements, input_elements)):
479
+ if e_parm == "X" or e_in == "X":
480
+ continue
481
+ if e_parm != e_in:
482
+ msg = (
483
+ f"Element sequence mismatch at atom index {i} (0-based): "
484
+ f"parm7={e_parm}, input={e_in}. "
485
+ f"Atom order likely differs between parm7 and the coordinate file."
486
+ )
487
+ if strict:
488
+ raise ValueError(msg)
489
+ click.echo(f"[oniom-export] WARNING: {msg}")
490
+
491
+
492
+ def _get_total_charge(parm) -> int:
493
+ """
494
+ Calculate total charge (integer) from atom partial charges.
495
+
496
+ Notes
497
+ -----
498
+ - `atom.charge` is the most reliable source (units of electron charge).
499
+ - Amber prmtop stores CHARGE values scaled by ~18.2223. If we ever fall back
500
+ to parm_data["CHARGE"], we divide by 18.2223.
501
+ """
502
+ q: float
503
+ try:
504
+ q = float(sum(float(getattr(a, "charge", 0.0)) for a in parm.atoms))
505
+ except Exception:
506
+ # Fallback: try prmtop raw charges (often scaled)
507
+ try:
508
+ q_raw = float(np.sum(parm.parm_data["CHARGE"]))
509
+ q = q_raw / 18.2223
510
+ except Exception:
511
+ q = 0.0
512
+
513
+ q_int = int(round(q))
514
+ if abs(q - q_int) > 1e-3:
515
+ click.echo(
516
+ f"[oniom-export] WARNING: total charge {q:.6f} is not close to an integer; "
517
+ f"rounded to {q_int}"
518
+ )
519
+ return q_int
520
+
521
+
522
+ def _fix_atom_type(atom_type: str) -> str:
523
+ """
524
+ Fix atom types for Gaussian compatibility.
525
+
526
+ - 2C, 3C -> C2C, C3C (numeric prefix)
527
+ - C*, N* -> C9, N9 (asterisk)
528
+ - lowercase (GAFF2) -> L{uppercase} (e.g., ca -> LCA)
529
+ """
530
+ atom_type = str(atom_type)
531
+ if atom_type == "2C":
532
+ return "C2C"
533
+ elif atom_type == "3C":
534
+ return "C3C"
535
+ elif atom_type == "C*":
536
+ return "C9"
537
+ elif atom_type == "N*":
538
+ return "N9"
539
+ elif bool(re.match(r"^[a-z]+", atom_type)):
540
+ return f"L{atom_type.upper()}"
541
+ else:
542
+ return atom_type
543
+
544
+
545
+ def _parse_pdb_atoms_with_meta(pdb_path: Path) -> List[Dict[str, Any]]:
546
+ """
547
+ Parse ATOM/HETATM records from a PDB file.
548
+
549
+ Returns a list of dictionaries:
550
+ - idx (0-based, sequential in file)
551
+ - atom_name, res_name, chain_id, res_seq, icode
552
+ - coord (np.ndarray shape (3,))
553
+ - element (best-effort)
554
+ - bfactor (float, defaults to 0.0)
555
+ """
556
+ atoms: List[Dict[str, Any]] = []
557
+ with pdb_path.open("r") as f:
558
+ atom_idx = 0
559
+ for line in f:
560
+ if not line.startswith(("ATOM", "HETATM")):
561
+ continue
562
+
563
+ atom_name = line[12:16].strip()
564
+ res_name = line[17:20].strip()
565
+ chain_id = line[21:22].strip()
566
+ res_seq_str = line[22:26].strip()
567
+ icode = line[26:27].strip()
568
+
569
+ try:
570
+ res_seq = int(res_seq_str)
571
+ except Exception:
572
+ res_seq = 0
573
+
574
+ try:
575
+ x = float(line[30:38])
576
+ y = float(line[38:46])
577
+ z = float(line[46:54])
578
+ except Exception:
579
+ x, y, z = 0.0, 0.0, 0.0
580
+
581
+ # B-factor (tempFactor) is columns 61-66 in PDB v3.3 (0-based slice 60:66)
582
+ try:
583
+ bfac = float(line[60:66])
584
+ except Exception:
585
+ bfac = 0.0
586
+
587
+ elem_field = line[76:78].strip() if len(line) >= 78 else ""
588
+ if elem_field:
589
+ elem = elem_field
590
+ else:
591
+ is_het = line.startswith("HETATM")
592
+ guessed = _guess_element(atom_name, res_name, is_het)
593
+ elem = guessed if guessed else _infer_element_from_pdb_atom_name(line[12:16])
594
+
595
+ atoms.append(
596
+ {
597
+ "idx": atom_idx,
598
+ "atom_name": atom_name,
599
+ "res_name": res_name,
600
+ "chain_id": chain_id,
601
+ "res_seq": res_seq,
602
+ "icode": icode,
603
+ "coord": np.array([x, y, z], dtype=float),
604
+ "element": _normalize_element_symbol(elem),
605
+ "bfactor": float(bfac),
606
+ }
607
+ )
608
+ atom_idx += 1
609
+ return atoms
610
+
611
+
612
+ def _read_qm_atoms_from_pdb(
613
+ model_pdb: Path,
614
+ *,
615
+ input_pdb: Optional[Path] = None,
616
+ system_coords: Optional[np.ndarray] = None,
617
+ system_elements: Optional[List[str]] = None,
618
+ match_tol: float = 0.2,
619
+ ) -> Set[int]:
620
+ """
621
+ Determine QM-region atom indices (0-based, topology order).
622
+
623
+ This function tries (in order):
624
+ 1) Match model_pdb atoms to atoms in `input_pdb` via (atom_name, res_name, res_seq),
625
+ with coordinate-based disambiguation when that ID is not unique.
626
+ 2) If `input_pdb` is not available, match by nearest coordinates against `system_coords`
627
+ (optionally requiring element agreement when available).
628
+
629
+ Parameters
630
+ ----------
631
+ model_pdb
632
+ PDB containing QM-region atoms (typically a subset PDB produced by `define-layer`
633
+ or `build_model_pdb_from_bfactors`).
634
+ input_pdb
635
+ Full-system PDB whose atom order matches the Amber topology (recommended).
636
+ system_coords
637
+ Full-system coordinates (shape (N,3)) in topology order.
638
+ system_elements
639
+ Optional element list (len N). If provided, element mismatches are rejected.
640
+ match_tol
641
+ Maximum allowed distance (Å) for coordinate matching/disambiguation.
642
+
643
+ Returns
644
+ -------
645
+ Set[int]
646
+ QM atom indices in 0-based topology order.
647
+ """
648
+ qm_indices: Set[int] = set()
649
+
650
+ model_atoms = _parse_pdb_atoms_with_meta(model_pdb)
651
+ if not model_atoms:
652
+ return set()
653
+
654
+ # Path 1: match by PDB identifiers using full-system PDB
655
+ if input_pdb is not None and input_pdb.suffix.lower() in {".pdb", ".ent"} and input_pdb.exists():
656
+ input_atoms = _parse_pdb_atoms_with_meta(input_pdb)
657
+ if not input_atoms:
658
+ raise ValueError(f"Failed to read any atoms from input PDB: {input_pdb}")
659
+
660
+ if system_coords is None:
661
+ # If the caller didn't provide coords, use the PDB coords for disambiguation
662
+ system_coords = np.asarray([a["coord"] for a in input_atoms], dtype=float)
663
+
664
+ # Map from (atom_name,res_name,res_seq) -> list of candidate indices
665
+ key_to_candidates: Dict[Tuple[str, str, int], List[int]] = {}
666
+ for a in input_atoms:
667
+ key = (a["atom_name"], a["res_name"], int(a["res_seq"]))
668
+ key_to_candidates.setdefault(key, []).append(int(a["idx"]))
669
+
670
+ used: Set[int] = set()
671
+ sys_coords = np.asarray(system_coords, dtype=float)
672
+
673
+ missing: int = 0
674
+ for ma in model_atoms:
675
+ key = (ma["atom_name"], ma["res_name"], int(ma["res_seq"]))
676
+ cand = key_to_candidates.get(key, [])
677
+ if not cand:
678
+ missing += 1
679
+ continue
680
+
681
+ if len(cand) == 1:
682
+ chosen = cand[0]
683
+ if chosen in used:
684
+ # Already used (duplicate identifiers). Fall back to coordinate disambiguation.
685
+ cand = cand
686
+ else:
687
+ qm_indices.add(chosen)
688
+ used.add(chosen)
689
+ continue
690
+
691
+ # Disambiguate by nearest coordinate among candidates (and avoid already-used indices)
692
+ cand_free = [i for i in cand if i not in used]
693
+ if not cand_free:
694
+ cand_free = cand # allow reuse as a last resort
695
+
696
+ cand_coords = sys_coords[np.asarray(cand_free, dtype=int)]
697
+ dists = np.linalg.norm(cand_coords - ma["coord"][None, :], axis=1)
698
+ j = int(np.argmin(dists))
699
+ chosen = int(cand_free[j])
700
+
701
+ if float(dists[j]) > match_tol:
702
+ # ID match exists but coordinates are far apart -> likely inconsistent inputs
703
+ click.echo(
704
+ f"[oniom-export] WARNING: matched ID {key} but nearest distance is {dists[j]:.3f} Å "
705
+ f"(> {match_tol} Å). Check that input_pdb and model_pdb come from the same structure."
706
+ )
707
+
708
+ qm_indices.add(chosen)
709
+ used.add(chosen)
710
+
711
+ if missing > 0:
712
+ click.echo(
713
+ f"[oniom-export] WARNING: {missing} atoms in model_pdb could not be matched by "
714
+ f"(atom_name,res_name,res_seq) to input_pdb. "
715
+ "If this is unexpected, verify residue numbering and naming."
716
+ )
717
+
718
+ return qm_indices
719
+
720
+ # Path 2: coordinate-only matching against system_coords
721
+ if system_coords is None:
722
+ raise ValueError(
723
+ "Cannot match model_pdb to topology atoms without either `input_pdb` (full-system PDB) "
724
+ "or `system_coords`."
725
+ )
726
+
727
+ sys_coords = np.asarray(system_coords, dtype=float)
728
+ try:
729
+ from scipy.spatial import cKDTree
730
+
731
+ tree = cKDTree(sys_coords)
732
+ for ma in model_atoms:
733
+ dist, idx = tree.query(ma["coord"], k=1)
734
+ if float(dist) > match_tol:
735
+ continue
736
+ if system_elements is not None and 0 <= int(idx) < len(system_elements):
737
+ e_sys = _normalize_element_symbol(system_elements[int(idx)])
738
+ e_mod = _normalize_element_symbol(ma["element"])
739
+ if e_sys != "X" and e_mod != "X" and e_sys != e_mod:
740
+ continue
741
+ qm_indices.add(int(idx))
742
+ except Exception:
743
+ # Slow fallback
744
+ for ma in model_atoms:
745
+ d = np.linalg.norm(sys_coords - ma["coord"][None, :], axis=1)
746
+ idx = int(np.argmin(d))
747
+ if float(d[idx]) > match_tol:
748
+ continue
749
+ if system_elements is not None and 0 <= idx < len(system_elements):
750
+ e_sys = _normalize_element_symbol(system_elements[idx])
751
+ e_mod = _normalize_element_symbol(ma["element"])
752
+ if e_sys != "X" and e_mod != "X" and e_sys != e_mod:
753
+ continue
754
+ qm_indices.add(idx)
755
+
756
+ return qm_indices
757
+
758
+ def _identify_qm_atoms_by_distance(
759
+ parm,
760
+ qm_residue_indices: List[int],
761
+ near_cutoff: float,
762
+ ) -> Tuple[Set[int], Set[int]]:
763
+ """
764
+ Identify QM and movable atoms based on residue indices and distance cutoff.
765
+
766
+ Returns:
767
+ (qm_atom_indices, movable_atom_indices) - both 0-based
768
+ """
769
+ from scipy import spatial
770
+
771
+ # Get QM atom indices from specified residues
772
+ qm_atom_indices: Set[int] = set()
773
+ for resi in qm_residue_indices:
774
+ if 0 <= resi < len(parm.residues):
775
+ for atom in parm.residues[resi].atoms:
776
+ qm_atom_indices.add(atom.idx)
777
+
778
+ if not qm_atom_indices:
779
+ return set(), set()
780
+
781
+ # Find movable atoms (within near_cutoff of QM atoms)
782
+ qm_list = sorted(qm_atom_indices)
783
+ neighbor_mask = np.any(
784
+ spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
785
+ axis=1,
786
+ )
787
+
788
+ # Include entire residues if any atom is within cutoff
789
+ movable_indices: Set[int] = set()
790
+ neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
791
+ for residue in neighbor_residues:
792
+ for atom in residue.atoms:
793
+ movable_indices.add(atom.idx)
794
+
795
+ return qm_atom_indices, movable_indices
796
+
797
+
798
+ # -----------------------------------------------
799
+ # QM/MM covalent-boundary link helpers
800
+ # -----------------------------------------------
801
+
802
+ _LINK_H_BOND_LENGTH = {
803
+ "C": 1.09,
804
+ "N": 1.01,
805
+ }
806
+
807
+ _LINK_H_FF_TYPE = {
808
+ "C": "HC",
809
+ "N": "H",
810
+ }
811
+
812
+
813
+ def _atom_xyz(parm, atom_idx: int) -> np.ndarray:
814
+ """Return Cartesian coordinate (Å) for a topology atom index."""
815
+ atom = parm.atoms[int(atom_idx)]
816
+ try:
817
+ return np.array([float(atom.xx), float(atom.xy), float(atom.xz)], dtype=float)
818
+ except Exception:
819
+ return np.asarray(parm.coordinates[int(atom_idx)], dtype=float)
820
+
821
+
822
+ def _find_qmmm_boundary_pairs(parm, qm_indices: Set[int]) -> List[Tuple[int, int]]:
823
+ """
824
+ Detect covalent QM/MM boundary bonds from topology bonds.
825
+
826
+ Returns
827
+ -------
828
+ List[Tuple[int, int]]
829
+ A list of (qm_idx, mm_idx) index pairs (0-based).
830
+ """
831
+ per_mm_candidates: Dict[int, List[int]] = {}
832
+
833
+ for bond in getattr(parm, "bonds", []):
834
+ i = int(bond.atom1.idx)
835
+ j = int(bond.atom2.idx)
836
+ i_qm = i in qm_indices
837
+ j_qm = j in qm_indices
838
+ if i_qm == j_qm:
839
+ continue
840
+ qm_idx, mm_idx = (i, j) if i_qm else (j, i)
841
+ per_mm_candidates.setdefault(mm_idx, []).append(qm_idx)
842
+
843
+ pairs: List[Tuple[int, int]] = []
844
+ for mm_idx, cands_raw in sorted(per_mm_candidates.items()):
845
+ cands = sorted(set(int(x) for x in cands_raw))
846
+ if len(cands) == 1:
847
+ pairs.append((cands[0], mm_idx))
848
+ continue
849
+
850
+ mm_xyz = _atom_xyz(parm, mm_idx)
851
+ best_qm = min(
852
+ cands,
853
+ key=lambda q: float(np.linalg.norm(_atom_xyz(parm, int(q)) - mm_xyz)),
854
+ )
855
+ click.echo(
856
+ f"[oniom-export] WARNING: MM atom {mm_idx} is bonded to multiple QM atoms "
857
+ f"{cands}; using closest QM atom {best_qm} as the link parent."
858
+ )
859
+ pairs.append((int(best_qm), int(mm_idx)))
860
+
861
+ return pairs
862
+
863
+
864
+ def _estimate_link_h_position(parm, qm_idx: int, mm_idx: int, bond_length: float) -> Optional[np.ndarray]:
865
+ """Estimate link-H position using the MLMMCore rule: r_H = r_QM + u(QM->MM) * d."""
866
+ qm_xyz = _atom_xyz(parm, qm_idx)
867
+ mm_xyz = _atom_xyz(parm, mm_idx)
868
+ vec = mm_xyz - qm_xyz
869
+ norm = float(np.linalg.norm(vec))
870
+ if norm < 1.0e-12:
871
+ return None
872
+ return qm_xyz + (vec / norm) * float(bond_length)
873
+
874
+
875
+ def _build_link_atom_specs(
876
+ parm,
877
+ qm_indices: Set[int],
878
+ *,
879
+ elements: Optional[List[str]] = None,
880
+ ) -> Dict[int, Dict[str, Any]]:
881
+ """
882
+ Build link-atom specs keyed by boundary MM atom index.
883
+
884
+ Each value contains:
885
+ - qm_idx: 0-based QM parent index
886
+ - ff_type: Gaussian Amber atom type for the link hydrogen
887
+ - bond_length: QM-H bond length used for placement
888
+ - position: estimated link-H coordinate (Å), when available
889
+ """
890
+ specs: Dict[int, Dict[str, Any]] = {}
891
+ warned_elems: Set[str] = set()
892
+
893
+ for qm_idx, mm_idx in _find_qmmm_boundary_pairs(parm, qm_indices):
894
+ if elements is not None and 0 <= qm_idx < len(elements):
895
+ qm_elem = _normalize_element_symbol(elements[qm_idx])
896
+ else:
897
+ qm_elem = _normalize_element_symbol(_get_parm_element(parm.atoms[qm_idx]))
898
+
899
+ if qm_elem in _LINK_H_BOND_LENGTH:
900
+ bond_len = float(_LINK_H_BOND_LENGTH[qm_elem])
901
+ else:
902
+ bond_len = float(_LINK_H_BOND_LENGTH["C"])
903
+ if qm_elem not in warned_elems:
904
+ click.echo(
905
+ f"[oniom-export] WARNING: unsupported QM parent element '{qm_elem}' for link-H "
906
+ f"distance; using C-like default ({bond_len:.2f} Å)."
907
+ )
908
+ warned_elems.add(qm_elem)
909
+
910
+ ff_type = _LINK_H_FF_TYPE.get(qm_elem, _LINK_H_FF_TYPE["C"])
911
+ link_pos = _estimate_link_h_position(parm, qm_idx=qm_idx, mm_idx=mm_idx, bond_length=bond_len)
912
+ if link_pos is None:
913
+ click.echo(
914
+ f"[oniom-export] WARNING: failed to estimate link-H position for boundary "
915
+ f"(QM={qm_idx}, MM={mm_idx}); skipping link annotation for this bond."
916
+ )
917
+ continue
918
+
919
+ specs[int(mm_idx)] = {
920
+ "qm_idx": int(qm_idx),
921
+ "ff_type": str(ff_type),
922
+ "bond_length": float(bond_len),
923
+ "position": link_pos,
924
+ }
925
+
926
+ return specs
927
+
928
+
929
+ # -----------------------------------------------
930
+ # Gaussian ONIOM Export
931
+ # -----------------------------------------------
932
+
933
+ def _write_gaussian_header(
934
+ parm,
935
+ parm_path: str,
936
+ output_name: str,
937
+ method: str = "wB97XD/def2-TZVPD",
938
+ nproc: int = 8,
939
+ mem: str = "16GB",
940
+ qm_charge: int = 0,
941
+ qm_mult: int = 1,
942
+ real_charge: Optional[int] = None,
943
+ real_mult: Optional[int] = None,
944
+ ) -> str:
945
+ """
946
+ Generate Gaussian ONIOM input header.
947
+
948
+ Gaussian ONIOM uses *three* charge/multiplicity pairs for a 2-layer ONIOM job:
949
+ (real system @ low level) (model system @ high level) (model system @ low level)
950
+
951
+ We default the real-system charge to the total charge of the topology, and the real-system
952
+ multiplicity to `qm_mult` (since the MM region is typically closed-shell).
953
+ """
954
+ total_charge = _get_total_charge(parm)
955
+
956
+ if real_charge is None:
957
+ real_charge = total_charge
958
+ if real_mult is None:
959
+ real_mult = qm_mult
960
+
961
+ if int(real_charge) != int(total_charge):
962
+ click.echo(
963
+ f"[oniom-export] WARNING: real_charge={real_charge} differs from topology total charge={total_charge}. "
964
+ "Proceeding as requested."
965
+ )
966
+
967
+ chk_name = Path(output_name).stem
968
+
969
+ header = f"""%chk={chk_name}.chk
970
+ %mem={mem}
971
+ %nprocshared={nproc}
972
+ #p oniom({method}:amber=softonly)
973
+ scf=(xqc,intrep,maxconventionalcyc=80)
974
+ nosymm iop(2/15=3) geom=connectivity Amber=(FirstEquiv)
975
+
976
+ ONIOM inputfile generated by mlmm oniom-export from {parm_path}.
977
+
978
+ {real_charge} {real_mult} {qm_charge} {qm_mult} {qm_charge} {qm_mult}
979
+ """
980
+ return header
981
+
982
+ def _write_gaussian_coordinates(
983
+ parm,
984
+ qm_indices: Set[int],
985
+ movable_indices: Set[int],
986
+ elements: Optional[List[str]] = None,
987
+ link_specs: Optional[Dict[int, Dict[str, Any]]] = None,
988
+ ) -> Tuple[str, str]:
989
+ """
990
+ Generate Gaussian ONIOM coordinate section and connectivity.
991
+
992
+ Returns:
993
+ (coords_section, connectivity_section)
994
+ """
995
+ coords_lines: List[str] = []
996
+
997
+ # Optional elements list (preferred when coming from input PDB/XYZ)
998
+ elements_list: Optional[List[str]] = None
999
+ if elements is not None and len(elements) == len(parm.atoms):
1000
+ elements_list = elements
1001
+
1002
+ for atom in parm.atoms:
1003
+ idx = atom.idx
1004
+ layer = "H" if idx in qm_indices else "L"
1005
+ movable = 0 if idx in movable_indices else -1
1006
+
1007
+ x, y, z = atom.xx, atom.xy, atom.xz
1008
+ ff_type = _fix_atom_type(atom.atom_type)
1009
+ charge = atom.charge
1010
+
1011
+ if elements_list is not None:
1012
+ element = elements_list[idx]
1013
+ else:
1014
+ element = _get_parm_element(atom)
1015
+
1016
+ atom_section = f"{element}-{ff_type}-{charge:.6f}"
1017
+ link_suffix = ""
1018
+ if layer == "L" and link_specs is not None and idx in link_specs:
1019
+ spec = link_specs[idx]
1020
+ qm_parent = int(spec["qm_idx"]) + 1 # Gaussian connectivity is 1-based
1021
+ link_ff_type = _fix_atom_type(str(spec["ff_type"]))
1022
+ link_suffix = f" H-{link_ff_type} {qm_parent}"
1023
+ coords_lines.append(
1024
+ f"{atom_section:<20} {movable:>2} {x:12.6f} {y:12.6f} {z:12.6f} {layer}{link_suffix}"
1025
+ )
1026
+
1027
+ # Connectivity section
1028
+ bond_dict: Dict[int, List[int]] = {}
1029
+ for bond in parm.bonds:
1030
+ i, j = bond.atom1.idx, bond.atom2.idx
1031
+ if i not in bond_dict:
1032
+ bond_dict[i] = []
1033
+ if j not in bond_dict:
1034
+ bond_dict[j] = []
1035
+ bond_dict[i].append(j)
1036
+ bond_dict[j].append(i)
1037
+
1038
+ conn_lines: List[str] = []
1039
+ for i in range(len(parm.atoms)):
1040
+ neighbors = sorted([j for j in bond_dict.get(i, []) if j > i])
1041
+ if neighbors:
1042
+ neighbor_str = " ".join(f"{j+1} 1.0" for j in neighbors)
1043
+ conn_lines.append(f"{i+1} {neighbor_str}")
1044
+ else:
1045
+ conn_lines.append(f"{i+1}")
1046
+
1047
+ return "\n".join(coords_lines), "\n".join(conn_lines)
1048
+
1049
+
1050
+ def _write_gaussian_ff_params(parm) -> str:
1051
+ """
1052
+ Extract Amber-style force field parameters from parm7 for Gaussian (Amber=SoftOnly).
1053
+
1054
+ This attempts to write a *self-contained* parameter section with the core Amber terms:
1055
+ - NonBon (Amber mixing rule + standard 1-4 scaling)
1056
+ - HrmStr1 (bonds)
1057
+ - HrmBnd1 (angles)
1058
+ - AmbTrs (proper torsions, periodicities 1-4)
1059
+ - ImpTrs (improper torsions)
1060
+ - VDW (per-atom-type LJ parameters; Radius = Rmin/2, Well-depth = epsilon)
1061
+
1062
+ Limitations
1063
+ -----------
1064
+ - Amber torsions with periodicity > 4 are not representable by AmbTrs; they are skipped with a warning.
1065
+ - Per-dihedral 1-4 scaling factors (SCEE/SCNB) are not emitted (Gaussian uses the global NonBon scaling).
1066
+ This matches most standard Amber/GAFF workflows where SCEE/SCNB are uniform.
1067
+ """
1068
+ lines: List[str] = []
1069
+
1070
+ # Non-bonded master function: Amber arithmetic mixing + standard exclusions and 1-4 scaling.
1071
+ # V-type=3 (Amber arithmetic), C-type=1 (Coulomb), cutoffs 0/0 (no explicit cutoffs here),
1072
+ # VScale: 1-2=0, 1-3=0, 1-4=0.5 ; CScale: 1-2=0, 1-3=0, 1-4=Amber default (1/1.2 via -1.2).
1073
+ lines.append("! Nonbonded master function (Amber defaults)")
1074
+ lines.append("NonBon 3 1 0 0 0.0 0.0 0.5 0.0 0.0 -1.2")
1075
+
1076
+ # -------------------------
1077
+ # Bonds
1078
+ # -------------------------
1079
+ lines.append("")
1080
+ lines.append("! Bond parameters")
1081
+ bond_params: Set[Tuple[str, str, float, float]] = set()
1082
+ for bond in getattr(parm, "bonds", []):
1083
+ btype = getattr(bond, "type", None)
1084
+ if btype is None:
1085
+ continue
1086
+ try:
1087
+ k = float(getattr(btype, "k"))
1088
+ req = float(getattr(btype, "req"))
1089
+ except Exception:
1090
+ continue
1091
+
1092
+ t1 = _fix_atom_type(getattr(bond.atom1, "atom_type", "X"))
1093
+ t2 = _fix_atom_type(getattr(bond.atom2, "atom_type", "X"))
1094
+ if t1 > t2:
1095
+ t1, t2 = t2, t1
1096
+ bond_params.add((t1, t2, k, req))
1097
+
1098
+ for t1, t2, k, req in sorted(bond_params):
1099
+ lines.append(f"HrmStr1 {t1} {t2} {k:.6f} {req:.6f}")
1100
+
1101
+ # -------------------------
1102
+ # Angles
1103
+ # -------------------------
1104
+ lines.append("")
1105
+ lines.append("! Angle parameters")
1106
+ angle_params: Set[Tuple[str, str, str, float, float]] = set()
1107
+ for angle in getattr(parm, "angles", []):
1108
+ atype = getattr(angle, "type", None)
1109
+ if atype is None:
1110
+ continue
1111
+ try:
1112
+ k = float(getattr(atype, "k"))
1113
+ theteq = float(getattr(atype, "theteq"))
1114
+ except Exception:
1115
+ continue
1116
+
1117
+ t1 = _fix_atom_type(getattr(angle.atom1, "atom_type", "X"))
1118
+ t2 = _fix_atom_type(getattr(angle.atom2, "atom_type", "X"))
1119
+ t3 = _fix_atom_type(getattr(angle.atom3, "atom_type", "X"))
1120
+
1121
+ # Sort endpoints for consistency
1122
+ if t1 > t3:
1123
+ t1, t3 = t3, t1
1124
+ angle_params.add((t1, t2, t3, k, theteq))
1125
+
1126
+ for t1, t2, t3, k, theteq in sorted(angle_params):
1127
+ lines.append(f"HrmBnd1 {t1} {t2} {t3} {k:.6f} {theteq:.6f}")
1128
+
1129
+ # -------------------------
1130
+ # Torsions (proper)
1131
+ # -------------------------
1132
+ def _as_term_list(dtype_obj: Any) -> List[Any]:
1133
+ if dtype_obj is None:
1134
+ return []
1135
+ # ParmEd uses DihedralTypeList for multi-term torsions (iterable)
1136
+ terms = getattr(dtype_obj, "terms", None)
1137
+ if terms is not None:
1138
+ try:
1139
+ return list(terms)
1140
+ except Exception:
1141
+ logger.debug("Failed to convert terms to list", exc_info=True)
1142
+ if isinstance(dtype_obj, (list, tuple)):
1143
+ return list(dtype_obj)
1144
+ # Try iteration (DihedralTypeList behaves like a list)
1145
+ try:
1146
+ if hasattr(dtype_obj, "__iter__") and not isinstance(dtype_obj, (str, bytes)):
1147
+ return list(dtype_obj)
1148
+ except Exception:
1149
+ logger.debug("Failed to iterate dtype_obj", exc_info=True)
1150
+ return [dtype_obj]
1151
+
1152
+ def _get_attr(obj: Any, names: List[str], default: Any = None) -> Any:
1153
+ for n in names:
1154
+ if hasattr(obj, n):
1155
+ v = getattr(obj, n)
1156
+ if v is not None:
1157
+ return v
1158
+ return default
1159
+
1160
+ lines.append("")
1161
+ lines.append("! Proper torsions (AmbTrs)")
1162
+ # key -> (phase[4], mag[4])
1163
+ tors_params: Dict[Tuple[str, str, str, str], Tuple[List[float], List[float]]] = {}
1164
+
1165
+ # Separate proper vs improper if ParmEd exposes `impropers`
1166
+ dihedrals_all = list(getattr(parm, "dihedrals", []) or [])
1167
+ impropers_from_attr = list(getattr(parm, "impropers", []) or [])
1168
+ if impropers_from_attr:
1169
+ proper_dihedrals = dihedrals_all
1170
+ improper_dihedrals = impropers_from_attr
1171
+ else:
1172
+ proper_dihedrals = [d for d in dihedrals_all if not bool(getattr(d, "improper", False))]
1173
+ improper_dihedrals = [d for d in dihedrals_all if bool(getattr(d, "improper", False))]
1174
+
1175
+ for dih in proper_dihedrals:
1176
+ dtype = getattr(dih, "type", None)
1177
+ for term in _as_term_list(dtype):
1178
+ try:
1179
+ per = _get_attr(term, ["per", "periodicity", "period"], None)
1180
+ phase = float(_get_attr(term, ["phase", "phi", "phase_shift"], 0.0))
1181
+ mag = float(_get_attr(term, ["phi_k", "pk", "k", "barrier"], 0.0))
1182
+ div = float(_get_attr(term, ["div", "divider", "idivf", "npaths"], 1.0))
1183
+ if div == 0.0:
1184
+ div = 1.0
1185
+ except Exception:
1186
+ continue
1187
+
1188
+ try:
1189
+ n = int(round(abs(float(per))))
1190
+ except Exception:
1191
+ continue
1192
+ if n < 1:
1193
+ continue
1194
+ if n > 4:
1195
+ click.echo(
1196
+ f"[oniom-export] WARNING: skipping Amber torsion with periodicity {n} (>4) "
1197
+ f"for types {_fix_atom_type(dih.atom1.atom_type)}-{_fix_atom_type(dih.atom2.atom_type)}-"
1198
+ f"{_fix_atom_type(dih.atom3.atom_type)}-{_fix_atom_type(dih.atom4.atom_type)}"
1199
+ )
1200
+ continue
1201
+
1202
+ t1 = _fix_atom_type(getattr(dih.atom1, "atom_type", "X"))
1203
+ t2 = _fix_atom_type(getattr(dih.atom2, "atom_type", "X"))
1204
+ t3 = _fix_atom_type(getattr(dih.atom3, "atom_type", "X"))
1205
+ t4 = _fix_atom_type(getattr(dih.atom4, "atom_type", "X"))
1206
+ key = (t1, t2, t3, t4)
1207
+
1208
+ if key not in tors_params:
1209
+ tors_params[key] = ([0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0])
1210
+
1211
+ phases, mags = tors_params[key]
1212
+ idx = n - 1
1213
+
1214
+ # Amber divides each term by IDIVF; Gaussian's AmbTrs uses a single NPaths.
1215
+ # We fold the division into the magnitude and set NPaths=1.
1216
+ mag_eff = mag / div
1217
+
1218
+ if mags[idx] != 0.0 and abs(phases[idx] - phase) > 1e-6:
1219
+ click.echo(
1220
+ f"[oniom-export] WARNING: multiple torsion terms with the same periodicity {n} but "
1221
+ f"different phases for key {key}. Keeping the first phase {phases[idx]:.3f} and "
1222
+ f"adding magnitudes."
1223
+ )
1224
+ if mags[idx] == 0.0:
1225
+ phases[idx] = phase
1226
+ mags[idx] += mag_eff
1227
+
1228
+ for (t1, t2, t3, t4) in sorted(tors_params.keys()):
1229
+ phases, mags = tors_params[(t1, t2, t3, t4)]
1230
+ if all(abs(m) < 1e-12 for m in mags):
1231
+ continue
1232
+ po1, po2, po3, po4 = phases
1233
+ m1, m2, m3, m4 = mags
1234
+ lines.append(
1235
+ f"AmbTrs {t1} {t2} {t3} {t4} "
1236
+ f"{po1:.6f} {po2:.6f} {po3:.6f} {po4:.6f} "
1237
+ f"{m1:.6f} {m2:.6f} {m3:.6f} {m4:.6f} 1"
1238
+ )
1239
+
1240
+ # -------------------------
1241
+ # Improper torsions
1242
+ # -------------------------
1243
+ lines.append("")
1244
+ lines.append("! Improper torsions (ImpTrs)")
1245
+ improper_params: Set[Tuple[str, str, str, str, float, float, float]] = set()
1246
+
1247
+ for imp in improper_dihedrals:
1248
+ dtype = getattr(imp, "type", None)
1249
+ for term in _as_term_list(dtype):
1250
+ try:
1251
+ per = float(abs(float(_get_attr(term, ["per", "periodicity", "period"], 2.0))))
1252
+ phase = float(_get_attr(term, ["phase", "phi", "phase_shift"], 0.0))
1253
+ mag = float(_get_attr(term, ["phi_k", "pk", "k", "barrier"], 0.0))
1254
+ div = float(_get_attr(term, ["div", "divider", "idivf", "npaths"], 1.0))
1255
+ if div == 0.0:
1256
+ div = 1.0
1257
+ except Exception:
1258
+ continue
1259
+
1260
+ t1 = _fix_atom_type(getattr(imp.atom1, "atom_type", "X"))
1261
+ t2 = _fix_atom_type(getattr(imp.atom2, "atom_type", "X"))
1262
+ t3 = _fix_atom_type(getattr(imp.atom3, "atom_type", "X"))
1263
+ t4 = _fix_atom_type(getattr(imp.atom4, "atom_type", "X"))
1264
+
1265
+ mag_eff = mag / div
1266
+ improper_params.add((t1, t2, t3, t4, mag_eff, phase, per))
1267
+
1268
+ for t1, t2, t3, t4, mag_eff, phase, per in sorted(improper_params):
1269
+ if abs(mag_eff) < 1e-12:
1270
+ continue
1271
+ lines.append(f"ImpTrs {t1} {t2} {t3} {t4} {mag_eff:.6f} {phase:.6f} {per:.6f}")
1272
+
1273
+ # -------------------------
1274
+ # VDW parameters
1275
+ # -------------------------
1276
+ lines.append("")
1277
+ lines.append("! VDW parameters (Radius = Rmin/2 [Å], Well-depth = epsilon [kcal/mol])")
1278
+ vdw_params: Dict[str, Tuple[float, float]] = {}
1279
+ for atom in getattr(parm, "atoms", []):
1280
+ atype = _fix_atom_type(getattr(atom, "atom_type", "X"))
1281
+ if atype in vdw_params:
1282
+ continue
1283
+
1284
+ # ParmEd often provides rmin (Amber Rmin/2), epsilon, and/or sigma
1285
+ radius: Optional[float] = None
1286
+ epsilon: Optional[float] = None
1287
+
1288
+ try:
1289
+ if hasattr(atom, "rmin") and getattr(atom, "rmin") is not None:
1290
+ radius = float(getattr(atom, "rmin"))
1291
+ elif hasattr(atom, "rmin_half") and getattr(atom, "rmin_half") is not None:
1292
+ radius = float(getattr(atom, "rmin_half"))
1293
+ elif hasattr(atom, "sigma") and getattr(atom, "sigma") is not None:
1294
+ # Convert sigma -> Rmin/2 using rmin = 2^(1/6)*sigma, and radius = rmin/2
1295
+ radius = float(getattr(atom, "sigma")) * (2.0 ** (1.0 / 6.0)) / 2.0
1296
+ except Exception:
1297
+ radius = None
1298
+
1299
+ try:
1300
+ if hasattr(atom, "epsilon") and getattr(atom, "epsilon") is not None:
1301
+ epsilon = float(getattr(atom, "epsilon"))
1302
+ except Exception:
1303
+ epsilon = None
1304
+
1305
+ if radius is None or epsilon is None:
1306
+ continue
1307
+
1308
+ vdw_params[atype] = (radius, epsilon)
1309
+
1310
+ for atype, (radius, epsilon) in sorted(vdw_params.items()):
1311
+ lines.append(f"VDW {atype} {radius:.6f} {epsilon:.6f}")
1312
+
1313
+ return "\n".join(lines)
1314
+
1315
+ def export_gaussian(
1316
+ parm7_path: Path,
1317
+ model_pdb: Optional[Path],
1318
+ output_path: Path,
1319
+ method: str = "wB97XD/def2-TZVPD",
1320
+ qm_charge: int = 0,
1321
+ qm_mult: int = 1,
1322
+ near_cutoff: float = 6.0,
1323
+ nproc: int = 8,
1324
+ mem: str = "16GB",
1325
+ qm_residues: Optional[List[int]] = None,
1326
+ input_path: Optional[Path] = None,
1327
+ element_check: bool = True,
1328
+ ) -> None:
1329
+ """
1330
+ Generate Gaussian ONIOM input file from parm7.
1331
+
1332
+ Args:
1333
+ parm7_path: Path to Amber parm7 topology
1334
+ model_pdb: Path to PDB defining QM region atoms (optional)
1335
+ output_path: Output Gaussian input file
1336
+ method: QM method and basis set
1337
+ qm_charge: Charge of QM region
1338
+ qm_mult: Multiplicity of QM region
1339
+ near_cutoff: Distance cutoff for movable atoms (Angstrom)
1340
+ nproc: Number of processors
1341
+ mem: Memory allocation
1342
+ qm_residues: List of 0-based residue indices for QM region (alternative to model_pdb)
1343
+ input_path: Coordinate file (.pdb or .xyz). If omitted, uses coordinates stored in the ParmEd object.
1344
+ element_check: If True, validate element sequence between parm7 and input coordinates.
1345
+ """
1346
+ _check_parmed()
1347
+
1348
+ parm = pmd.load_file(str(parm7_path))
1349
+
1350
+ # Load / attach coordinates
1351
+ elements_for_output: Optional[List[str]] = None
1352
+ if input_path is not None:
1353
+ coords, input_elems = _read_input_geometry(input_path)
1354
+ _apply_coordinates_to_parm(parm, coords)
1355
+ elements_for_output = input_elems
1356
+ if element_check:
1357
+ _validate_element_order(parm, input_elems, strict=True)
1358
+ else:
1359
+ coords_attr = getattr(parm, "coordinates", None)
1360
+ n_coords = 0
1361
+ try:
1362
+ n_coords = len(coords_attr) if coords_attr is not None else 0
1363
+ except Exception:
1364
+ n_coords = 0
1365
+ if n_coords == 0:
1366
+ raise ValueError(
1367
+ "No coordinates found in the loaded parm7. "
1368
+ "Please provide a coordinate file with -i/--input (PDB or XYZ)."
1369
+ )
1370
+ elements_for_output = _get_parm_elements(parm)
1371
+
1372
+ # Detect layer indices from B-factors if the input is a layered PDB produced by mlmm.
1373
+ layer_info: Optional[Dict[str, List[int]]] = None
1374
+ if input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"}:
1375
+ try:
1376
+ from .utils import (
1377
+ has_valid_layer_bfactors,
1378
+ parse_layer_indices_from_bfactors,
1379
+ read_bfactors_from_pdb,
1380
+ )
1381
+
1382
+ bfactors = read_bfactors_from_pdb(input_path)
1383
+ if len(bfactors) == len(parm.atoms) and has_valid_layer_bfactors(bfactors):
1384
+ layer_info = parse_layer_indices_from_bfactors(bfactors)
1385
+ click.echo(
1386
+ "[oniom-export] Detected ML/MM layer B-factors in the input PDB; "
1387
+ "using them to decide movable/frozen atoms."
1388
+ )
1389
+ except Exception:
1390
+ layer_info = None
1391
+
1392
+ # Determine QM region
1393
+ qm_indices: Set[int] = set()
1394
+ movable_indices: Set[int] = set()
1395
+
1396
+ if model_pdb is not None:
1397
+ qm_indices = _read_qm_atoms_from_pdb(
1398
+ model_pdb,
1399
+ input_pdb=input_path
1400
+ if (input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"})
1401
+ else None,
1402
+ system_coords=getattr(parm, "coordinates", None),
1403
+ system_elements=elements_for_output,
1404
+ )
1405
+ elif qm_residues:
1406
+ qm_indices, movable_indices = _identify_qm_atoms_by_distance(parm, qm_residues, near_cutoff)
1407
+ elif layer_info is not None and layer_info.get("ml_indices"):
1408
+ qm_indices = set(int(i) for i in layer_info["ml_indices"])
1409
+ else:
1410
+ raise ValueError(
1411
+ "No QM region specified. Provide --model-pdb, or supply a layered input PDB "
1412
+ "(B-factor=0 marks the ML/QM region), or use qm_residues in the Python API."
1413
+ )
1414
+
1415
+ if not qm_indices:
1416
+ raise ValueError("No QM atoms identified")
1417
+
1418
+ if max(qm_indices) >= len(parm.atoms):
1419
+ raise ValueError(
1420
+ f"QM index out of range: max(qm_indices)={max(qm_indices)} but topology has {len(parm.atoms)} atoms. "
1421
+ "Check that your model PDB / input PDB and parm7 have consistent atom ordering."
1422
+ )
1423
+
1424
+ # Determine movable atoms for partial optimization.
1425
+ if layer_info is not None:
1426
+ frozen = set(int(i) for i in layer_info.get("frozen_indices", []))
1427
+ movable_indices = set(range(len(parm.atoms))) - frozen
1428
+ elif not movable_indices:
1429
+ # Distance-based selection: include all atoms in residues within `near_cutoff` of any QM atom.
1430
+ from scipy import spatial
1431
+
1432
+ qm_list = sorted(qm_indices)
1433
+ neighbor_mask = np.any(
1434
+ spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
1435
+ axis=1,
1436
+ )
1437
+ neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
1438
+ for residue in neighbor_residues:
1439
+ for atom in residue.atoms:
1440
+ movable_indices.add(atom.idx)
1441
+
1442
+ movable_indices |= qm_indices # QM atoms must always be movable
1443
+
1444
+ # Detect covalent QM/MM boundaries and generate link-atom metadata.
1445
+ link_specs = _build_link_atom_specs(
1446
+ parm,
1447
+ qm_indices,
1448
+ elements=elements_for_output,
1449
+ )
1450
+
1451
+ # Generate sections
1452
+ header = _write_gaussian_header(
1453
+ parm,
1454
+ str(parm7_path),
1455
+ str(output_path),
1456
+ method=method,
1457
+ nproc=nproc,
1458
+ mem=mem,
1459
+ qm_charge=qm_charge,
1460
+ qm_mult=qm_mult,
1461
+ )
1462
+ coords, connectivity = _write_gaussian_coordinates(
1463
+ parm,
1464
+ qm_indices,
1465
+ movable_indices,
1466
+ elements=elements_for_output,
1467
+ link_specs=link_specs,
1468
+ )
1469
+ ff_params = _write_gaussian_ff_params(parm)
1470
+
1471
+ # Write output
1472
+ with output_path.open("w") as f:
1473
+ f.write(header)
1474
+ f.write(coords)
1475
+ f.write("\n\n")
1476
+ f.write(connectivity)
1477
+ f.write("\n\n")
1478
+ f.write(ff_params)
1479
+ f.write("\n\n")
1480
+
1481
+ click.echo(f"[oniom-gaussian] Wrote '{output_path}'")
1482
+ click.echo(f"[oniom-gaussian] QM atoms: {len(qm_indices)}, Movable atoms: {len(movable_indices)}")
1483
+ click.echo(f"[oniom-gaussian] Link boundaries: {len(link_specs)}")
1484
+
1485
+
1486
+ # -----------------------------------------------
1487
+ # ORCA QM/MM Export
1488
+ # -----------------------------------------------
1489
+
1490
+ # -----------------------------------------------
1491
+ # ORCA QM/MM Export
1492
+ # -----------------------------------------------
1493
+
1494
+ def _format_orca_index_set(indices: Set[int]) -> str:
1495
+ """
1496
+ Format a set of 0-based atom indices using ORCA's compact range syntax.
1497
+
1498
+ Example:
1499
+ {0:3 7 10:12}
1500
+ """
1501
+ if not indices:
1502
+ return "{}"
1503
+ sorted_idx = sorted(int(i) for i in indices)
1504
+ parts: List[str] = []
1505
+ start = prev = sorted_idx[0]
1506
+ for i in sorted_idx[1:]:
1507
+ if i == prev + 1:
1508
+ prev = i
1509
+ continue
1510
+ parts.append(f"{start}:{prev}" if prev > start else f"{start}")
1511
+ start = prev = i
1512
+ parts.append(f"{start}:{prev}" if prev > start else f"{start}")
1513
+ return "{" + " ".join(parts) + "}"
1514
+
1515
+
1516
+ def _manual_orcaff_command(parm7_path: Path, out_dir: Path) -> str:
1517
+ """Return a shell command users can run to generate ORCAFF.prms manually."""
1518
+ return (
1519
+ f"cd {shlex.quote(str(out_dir.resolve()))} && "
1520
+ f"orca_mm -convff -AMBER {shlex.quote(str(parm7_path.resolve()))}"
1521
+ )
1522
+
1523
+
1524
+ def _resolve_oniom_mode(mode: Optional[str], output_path: Path) -> str:
1525
+ """Resolve export mode using explicit `--mode` first, then output suffix."""
1526
+ if mode is not None:
1527
+ return str(mode).strip().lower()
1528
+
1529
+ suffix = output_path.suffix.lower()
1530
+ if suffix in {".gjf", ".com"}:
1531
+ return "g16"
1532
+ if suffix == ".inp":
1533
+ return "orca"
1534
+
1535
+ raise ValueError(
1536
+ f"Could not infer export mode from -o/--output '{output_path}'. "
1537
+ "Specify --mode (g16/orca) or use an output suffix: .gjf/.com (g16), .inp (orca)."
1538
+ )
1539
+
1540
+
1541
+ def export_orca(
1542
+ parm7_path: Path,
1543
+ model_pdb: Optional[Path],
1544
+ output_path: Path,
1545
+ method: str = "B3LYP D3BJ def2-SVP",
1546
+ qm_charge: int = 0,
1547
+ qm_mult: int = 1,
1548
+ total_charge: Optional[int] = None,
1549
+ total_mult: Optional[int] = None,
1550
+ nproc: int = 8,
1551
+ near_cutoff: float = 6.0,
1552
+ qm_residues: Optional[List[int]] = None,
1553
+ input_path: Optional[Path] = None,
1554
+ element_check: bool = True,
1555
+ orcaff_path: Optional[Path] = None,
1556
+ convert_orcaff: bool = True,
1557
+ ) -> None:
1558
+ """
1559
+ Generate an ORCA QM/MM input file.
1560
+
1561
+ ORCA's QM/MM implementation expects an ORCA force-field parameter file (ORCAFF.prms).
1562
+ This can be generated from an Amber topology (prmtop/parm7) using ORCA's `orca_mm` utility:
1563
+
1564
+ orca_mm -convff -AMBER <topology.prmtop>
1565
+
1566
+ This exporter will try to generate the ORCAFF file automatically when:
1567
+ - `orcaff_path` is not provided, and
1568
+ - `convert_orcaff=True`, and
1569
+ - `orca_mm` is found in PATH.
1570
+
1571
+ Args:
1572
+ parm7_path: Path to Amber parm7/prmtop topology file.
1573
+ model_pdb: PDB defining QM region (typically subset PDB).
1574
+ output_path: Output ORCA input file (.inp).
1575
+ method: ORCA QM method line (e.g., "B3LYP D3BJ def2-SVP").
1576
+ qm_charge: Charge of the QM region.
1577
+ qm_mult: Multiplicity of the QM region.
1578
+ total_charge: Charge of the full QM+MM system for Charge_Total in %qmmm.
1579
+ If None, uses topology total charge.
1580
+ total_mult: Multiplicity of the full QM+MM system for Mult_Total in %qmmm.
1581
+ If None, uses qm_mult.
1582
+ nproc: Number of processors.
1583
+ near_cutoff: Distance cutoff (Å) used to define ActiveAtoms when no layer B-factors exist.
1584
+ qm_residues: Alternative QM definition by 0-based residue indices in the ParmEd structure.
1585
+ input_path: Coordinate file (.pdb or .xyz). Atom order must match the topology.
1586
+ element_check: Validate element sequence between input and topology (best-effort).
1587
+ orcaff_path: Path to ORCAFF.prms file. If None, uses/creates <parm7_stem>.ORCAFF.prms in output dir.
1588
+ convert_orcaff: If True, try to run `orca_mm -convff -AMBER` when ORCAFF.prms is missing.
1589
+ """
1590
+ _check_parmed()
1591
+
1592
+ parm = pmd.load_file(str(parm7_path))
1593
+
1594
+ # Load / attach coordinates
1595
+ elements_for_output: Optional[List[str]] = None
1596
+ if input_path is not None:
1597
+ coords, input_elems = _read_input_geometry(input_path)
1598
+ _apply_coordinates_to_parm(parm, coords)
1599
+ elements_for_output = input_elems
1600
+ if element_check:
1601
+ _validate_element_order(parm, input_elems, strict=True)
1602
+ else:
1603
+ coords_attr = getattr(parm, "coordinates", None)
1604
+ n_coords = 0
1605
+ try:
1606
+ n_coords = len(coords_attr) if coords_attr is not None else 0
1607
+ except Exception:
1608
+ n_coords = 0
1609
+ if n_coords == 0:
1610
+ raise ValueError(
1611
+ "No coordinates found in the loaded topology/structure. "
1612
+ "Please provide a coordinate file with -i/--input (PDB or XYZ)."
1613
+ )
1614
+ elements_for_output = _get_parm_elements(parm)
1615
+
1616
+ # Detect layer indices from B-factors if input is a layered PDB produced by mlmm.
1617
+ layer_info: Optional[Dict[str, List[int]]] = None
1618
+ if input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"}:
1619
+ try:
1620
+ from .utils import (
1621
+ has_valid_layer_bfactors,
1622
+ parse_layer_indices_from_bfactors,
1623
+ read_bfactors_from_pdb,
1624
+ )
1625
+
1626
+ bfactors = read_bfactors_from_pdb(input_path)
1627
+ if len(bfactors) == len(parm.atoms) and has_valid_layer_bfactors(bfactors):
1628
+ layer_info = parse_layer_indices_from_bfactors(bfactors)
1629
+ click.echo(
1630
+ "[oniom-export] Detected ML/MM layer B-factors in the input PDB; "
1631
+ "using them to decide movable/frozen atoms."
1632
+ )
1633
+ except Exception:
1634
+ layer_info = None
1635
+
1636
+ # Determine QM region
1637
+ qm_indices: Set[int] = set()
1638
+ movable_indices: Set[int] = set()
1639
+
1640
+ if model_pdb is not None:
1641
+ qm_indices = _read_qm_atoms_from_pdb(
1642
+ model_pdb,
1643
+ input_pdb=input_path
1644
+ if (input_path is not None and input_path.suffix.lower() in {".pdb", ".ent"})
1645
+ else None,
1646
+ system_coords=getattr(parm, "coordinates", None),
1647
+ system_elements=elements_for_output,
1648
+ )
1649
+ elif qm_residues:
1650
+ qm_indices, movable_indices = _identify_qm_atoms_by_distance(parm, qm_residues, near_cutoff)
1651
+ elif layer_info is not None and layer_info.get("ml_indices"):
1652
+ qm_indices = set(int(i) for i in layer_info["ml_indices"])
1653
+ else:
1654
+ raise ValueError(
1655
+ "No QM region specified. Provide --model-pdb, or supply a layered input PDB "
1656
+ "(B-factor=0 marks the ML/QM region), or use qm_residues in the Python API."
1657
+ )
1658
+
1659
+ if not qm_indices:
1660
+ raise ValueError("No QM atoms identified")
1661
+
1662
+ if max(qm_indices) >= len(parm.atoms):
1663
+ raise ValueError(
1664
+ f"QM index out of range: max(qm_indices)={max(qm_indices)} but topology has {len(parm.atoms)} atoms. "
1665
+ "Check that your model PDB / input PDB and parm7 have consistent atom ordering."
1666
+ )
1667
+
1668
+ # Determine ActiveAtoms (movable atoms)
1669
+ if layer_info is not None:
1670
+ frozen = set(int(i) for i in layer_info.get("frozen_indices", []))
1671
+ movable_indices = set(range(len(parm.atoms))) - frozen
1672
+ elif not movable_indices:
1673
+ # Distance-based selection: include all atoms in residues within `near_cutoff` of any QM atom.
1674
+ from scipy import spatial
1675
+
1676
+ qm_list = sorted(qm_indices)
1677
+ neighbor_mask = np.any(
1678
+ spatial.distance.cdist(parm.coordinates, parm.coordinates[qm_list]) <= near_cutoff,
1679
+ axis=1,
1680
+ )
1681
+ neighbor_residues = set(parm.atoms[i].residue for i in np.where(neighbor_mask)[0])
1682
+ for residue in neighbor_residues:
1683
+ for atom in residue.atoms:
1684
+ movable_indices.add(atom.idx)
1685
+
1686
+ movable_indices |= qm_indices
1687
+
1688
+ if total_charge is None:
1689
+ total_charge = _get_total_charge(parm)
1690
+ if total_mult is None:
1691
+ total_mult = int(qm_mult)
1692
+
1693
+ # ORCA generates link atoms automatically from QMAtoms and ORCAFF topology.
1694
+ # We still detect boundaries to report what was found and to keep behavior explicit.
1695
+ link_specs = _build_link_atom_specs(
1696
+ parm,
1697
+ qm_indices,
1698
+ elements=elements_for_output,
1699
+ )
1700
+
1701
+ # Resolve/generate ORCAFF.prms
1702
+ out_dir = output_path.parent
1703
+ manual_orcaff_cmd = _manual_orcaff_command(parm7_path, out_dir)
1704
+ if orcaff_path is None:
1705
+ expected = out_dir / f"{parm7_path.stem}.ORCAFF.prms"
1706
+ orcaff_path = expected
1707
+
1708
+ if not orcaff_path.exists() and convert_orcaff:
1709
+ orca_mm = shutil.which("orca_mm")
1710
+ if orca_mm is None:
1711
+ click.echo(
1712
+ "[oniom-orca] WARNING: ORCAFF.prms not found and `orca_mm` is not available on PATH.\n"
1713
+ f"[oniom-orca] Run manually: {manual_orcaff_cmd}"
1714
+ )
1715
+ else:
1716
+ click.echo(
1717
+ f"[oniom-orca] Generating ORCAFF.prms via: {manual_orcaff_cmd}"
1718
+ )
1719
+ proc = subprocess.run(
1720
+ [orca_mm, "-convff", "-AMBER", str(parm7_path)],
1721
+ cwd=str(out_dir),
1722
+ stdout=subprocess.PIPE,
1723
+ stderr=subprocess.STDOUT,
1724
+ text=True,
1725
+ )
1726
+ if proc.returncode != 0:
1727
+ raise RuntimeError(
1728
+ "orca_mm failed "
1729
+ f"(exit {proc.returncode}).\n"
1730
+ f"Run manually: {manual_orcaff_cmd}\n"
1731
+ f"Output:\n{proc.stdout}"
1732
+ )
1733
+
1734
+ # Try to locate the generated file (orca_mm typically writes <stem>.ORCAFF.prms)
1735
+ if not orcaff_path.exists():
1736
+ candidates = sorted(out_dir.glob("*.ORCAFF.prms"), key=lambda p: p.stat().st_mtime, reverse=True)
1737
+ if candidates:
1738
+ orcaff_path = candidates[0]
1739
+
1740
+ # ORCA input (use compact range syntax; indices are 0-based)
1741
+ qm_atoms_str = _format_orca_index_set(qm_indices)
1742
+ active_atoms_str = _format_orca_index_set(movable_indices)
1743
+ link_comment_block = ""
1744
+ if link_specs:
1745
+ link_lines = ["# Estimated link-H positions (QM/MM boundary caps; Angstrom)"]
1746
+ for mm_idx, spec in sorted(link_specs.items()):
1747
+ pos = np.asarray(spec["position"], dtype=float)
1748
+ link_lines.append(
1749
+ f"# QM {int(spec['qm_idx']) + 1:>5d} MM {int(mm_idx) + 1:>5d} "
1750
+ f"Hcap ({pos[0]:10.6f}, {pos[1]:10.6f}, {pos[2]:10.6f})"
1751
+ )
1752
+ link_comment_block = "\n".join(link_lines) + "\n"
1753
+
1754
+ # Prefer a relative filename when possible
1755
+ orcaff_ref = str(orcaff_path) if orcaff_path.is_absolute() else orcaff_path.name
1756
+
1757
+ orca_input = f"""# ORCA QM/MM input generated by mlmm oniom-orca
1758
+ # Amber topology: {parm7_path}
1759
+ # ORCAFF parameters: {orcaff_ref}
1760
+ # Coordinates: {input_path if input_path is not None else "(from topology/structure)"}
1761
+ {link_comment_block}
1762
+
1763
+ %pal nprocs {nproc} end
1764
+
1765
+ ! {method}
1766
+ ! QMMM
1767
+
1768
+ %qmmm
1769
+ ORCAFFFilename "{orcaff_ref}"
1770
+ QMAtoms {qm_atoms_str} end
1771
+ ActiveAtoms {active_atoms_str} end
1772
+ Charge_Total {int(total_charge)}
1773
+ Mult_Total {int(total_mult)}
1774
+ end
1775
+
1776
+ * xyz {qm_charge} {qm_mult}
1777
+ """
1778
+
1779
+ # Add coordinates
1780
+ elements_list: Optional[List[str]] = None
1781
+ if elements_for_output is not None and len(elements_for_output) == len(parm.atoms):
1782
+ elements_list = elements_for_output
1783
+
1784
+ for atom in parm.atoms:
1785
+ x, y, z = atom.xx, atom.xy, atom.xz
1786
+ if elements_list is not None:
1787
+ element = elements_list[atom.idx]
1788
+ else:
1789
+ element = _get_parm_element(atom)
1790
+ orca_input += f" {element:<2} {x:12.6f} {y:12.6f} {z:12.6f}\n"
1791
+
1792
+ orca_input += "*\n"
1793
+
1794
+ # Write output
1795
+ with output_path.open("w") as f:
1796
+ f.write(orca_input)
1797
+
1798
+ click.echo(f"[oniom-orca] Wrote '{output_path}'")
1799
+ click.echo(f"[oniom-orca] QM atoms: {len(qm_indices)}, Active atoms: {len(movable_indices)}")
1800
+ click.echo(f"[oniom-orca] Link boundaries (auto-capped by ORCA): {len(link_specs)}")
1801
+ if orcaff_path is not None:
1802
+ if orcaff_path.exists():
1803
+ click.echo(f"[oniom-orca] ORCAFF.prms: {orcaff_path}")
1804
+ else:
1805
+ click.echo(
1806
+ f"[oniom-orca] NOTE: ORCAFF.prms not found at '{orcaff_path}'. "
1807
+ f"Run manually: {manual_orcaff_cmd}"
1808
+ )
1809
+
1810
+
1811
+
1812
+ # -----------------------------------------------
1813
+ # CLI Commands
1814
+ # -----------------------------------------------
1815
+
1816
+ @click.command(
1817
+ name="oniom-export",
1818
+ help="Export ONIOM input from Amber parm7 topology (Gaussian g16 or ORCA).",
1819
+ context_settings={"help_option_names": ["-h", "--help"]},
1820
+ )
1821
+ @click.option(
1822
+ "--parm",
1823
+ "parm7",
1824
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
1825
+ required=True,
1826
+ help="Amber parm7 topology file.",
1827
+ )
1828
+ @click.option(
1829
+ "-i",
1830
+ "--input",
1831
+ "input_coords",
1832
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
1833
+ default=None,
1834
+ help="Coordinate file (.pdb or .xyz) for the current structure (atom order must match parm7).",
1835
+ )
1836
+ @click.option(
1837
+ "--element-check/--no-element-check",
1838
+ default=True,
1839
+ show_default=True,
1840
+ help="Validate that the element sequence in --input matches the parm7 topology.",
1841
+ )
1842
+ @click.option(
1843
+ "--model-pdb",
1844
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
1845
+ default=None,
1846
+ help="PDB file defining QM region atoms.",
1847
+ )
1848
+ @click.option(
1849
+ "-o",
1850
+ "--output",
1851
+ type=click.Path(path_type=Path, dir_okay=False),
1852
+ required=True,
1853
+ help="Output file path (.gjf/.com for g16, .inp for ORCA when --mode is omitted).",
1854
+ )
1855
+ @click.option(
1856
+ "--mode",
1857
+ type=click.Choice(["g16", "orca"], case_sensitive=False),
1858
+ default=None,
1859
+ help="Export mode. If omitted, inferred from -o suffix: .gjf/.com -> g16, .inp -> orca.",
1860
+ )
1861
+ @click.option(
1862
+ "--method",
1863
+ type=str,
1864
+ default=None,
1865
+ help="QM method and basis set. Defaults depend on mode.",
1866
+ )
1867
+ @click.option(
1868
+ "-q",
1869
+ "--charge",
1870
+ type=int,
1871
+ required=True,
1872
+ help="Charge of QM region.",
1873
+ )
1874
+ @click.option(
1875
+ "-m",
1876
+ "--multiplicity",
1877
+ type=int,
1878
+ default=1,
1879
+ show_default=True,
1880
+ help="Multiplicity of QM region.",
1881
+ )
1882
+ @click.option(
1883
+ "--near",
1884
+ type=float,
1885
+ default=6.0,
1886
+ show_default=True,
1887
+ help="Distance cutoff for movable/active atoms (Angstrom).",
1888
+ )
1889
+ @click.option(
1890
+ "--nproc",
1891
+ type=int,
1892
+ default=8,
1893
+ show_default=True,
1894
+ help="Number of processors.",
1895
+ )
1896
+ @click.option(
1897
+ "--mem",
1898
+ type=str,
1899
+ default="16GB",
1900
+ show_default=True,
1901
+ help="Memory allocation (g16 mode).",
1902
+ )
1903
+ @click.option(
1904
+ "--total-charge",
1905
+ type=int,
1906
+ default=None,
1907
+ help="Total charge of full QM+MM system for ORCA Charge_Total (orca mode).",
1908
+ )
1909
+ @click.option(
1910
+ "--total-mult",
1911
+ type=int,
1912
+ default=None,
1913
+ help="Total multiplicity of full QM+MM system for ORCA Mult_Total (orca mode).",
1914
+ )
1915
+ @click.option(
1916
+ "--orcaff",
1917
+ type=click.Path(exists=True, path_type=Path),
1918
+ default=None,
1919
+ help="Path to ORCAFF.prms (orca mode). If omitted, uses/creates <parm7_stem>.ORCAFF.prms in output directory.",
1920
+ )
1921
+ @click.option(
1922
+ "--convert-orcaff/--no-convert-orcaff",
1923
+ default=True,
1924
+ show_default=True,
1925
+ help="If ORCAFF.prms is missing, try `orca_mm -convff -AMBER` automatically (orca mode).",
1926
+ )
1927
+ def cli(
1928
+ parm7: Path,
1929
+ input_coords: Optional[Path],
1930
+ element_check: bool,
1931
+ model_pdb: Optional[Path],
1932
+ output: Path,
1933
+ mode: Optional[str],
1934
+ method: Optional[str],
1935
+ charge: int,
1936
+ multiplicity: int,
1937
+ near: float,
1938
+ nproc: int,
1939
+ mem: str,
1940
+ total_charge: Optional[int],
1941
+ total_mult: Optional[int],
1942
+ orcaff: Optional[Path],
1943
+ convert_orcaff: bool,
1944
+ ) -> None:
1945
+ """Export Gaussian/ORCA ONIOM input via a unified entrypoint."""
1946
+ try:
1947
+ resolved_mode = _resolve_oniom_mode(mode, output)
1948
+
1949
+ if resolved_mode == "g16":
1950
+ export_gaussian(
1951
+ parm7_path=parm7,
1952
+ model_pdb=model_pdb,
1953
+ output_path=output,
1954
+ method=method or _GAUSSIAN_DEFAULT_METHOD,
1955
+ qm_charge=charge,
1956
+ qm_mult=multiplicity,
1957
+ near_cutoff=near,
1958
+ nproc=nproc,
1959
+ mem=mem,
1960
+ input_path=input_coords,
1961
+ element_check=element_check,
1962
+ )
1963
+ return
1964
+
1965
+ export_orca(
1966
+ parm7_path=parm7,
1967
+ model_pdb=model_pdb,
1968
+ output_path=output,
1969
+ method=method or _ORCA_DEFAULT_METHOD,
1970
+ qm_charge=charge,
1971
+ qm_mult=multiplicity,
1972
+ total_charge=total_charge,
1973
+ total_mult=total_mult,
1974
+ nproc=nproc,
1975
+ near_cutoff=near,
1976
+ input_path=input_coords,
1977
+ element_check=element_check,
1978
+ orcaff_path=orcaff,
1979
+ convert_orcaff=convert_orcaff,
1980
+ )
1981
+ except Exception as e:
1982
+ click.echo(f"ERROR: {e}", err=True)
1983
+ raise SystemExit(1)