mlmm-toolkit 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. hessian_ff/__init__.py +50 -0
  2. hessian_ff/analytical_hessian.py +609 -0
  3. hessian_ff/constants.py +46 -0
  4. hessian_ff/forcefield.py +339 -0
  5. hessian_ff/loaders.py +608 -0
  6. hessian_ff/native/Makefile +8 -0
  7. hessian_ff/native/__init__.py +28 -0
  8. hessian_ff/native/analytical_hessian.py +88 -0
  9. hessian_ff/native/analytical_hessian_ext.cpp +258 -0
  10. hessian_ff/native/bonded.py +82 -0
  11. hessian_ff/native/bonded_ext.cpp +640 -0
  12. hessian_ff/native/loader.py +349 -0
  13. hessian_ff/native/nonbonded.py +118 -0
  14. hessian_ff/native/nonbonded_ext.cpp +1150 -0
  15. hessian_ff/prmtop_parmed.py +23 -0
  16. hessian_ff/system.py +107 -0
  17. hessian_ff/terms/__init__.py +14 -0
  18. hessian_ff/terms/angle.py +73 -0
  19. hessian_ff/terms/bond.py +44 -0
  20. hessian_ff/terms/cmap.py +406 -0
  21. hessian_ff/terms/dihedral.py +141 -0
  22. hessian_ff/terms/nonbonded.py +209 -0
  23. hessian_ff/tests/__init__.py +0 -0
  24. hessian_ff/tests/conftest.py +75 -0
  25. hessian_ff/tests/data/small/complex.parm7 +1346 -0
  26. hessian_ff/tests/data/small/complex.pdb +125 -0
  27. hessian_ff/tests/data/small/complex.rst7 +63 -0
  28. hessian_ff/tests/test_coords_input.py +44 -0
  29. hessian_ff/tests/test_energy_force.py +49 -0
  30. hessian_ff/tests/test_hessian.py +137 -0
  31. hessian_ff/tests/test_smoke.py +18 -0
  32. hessian_ff/tests/test_validation.py +40 -0
  33. hessian_ff/workflows.py +889 -0
  34. mlmm/__init__.py +36 -0
  35. mlmm/__main__.py +7 -0
  36. mlmm/_version.py +34 -0
  37. mlmm/add_elem_info.py +374 -0
  38. mlmm/advanced_help.py +91 -0
  39. mlmm/align_freeze_atoms.py +601 -0
  40. mlmm/all.py +3535 -0
  41. mlmm/bond_changes.py +231 -0
  42. mlmm/bool_compat.py +223 -0
  43. mlmm/cli.py +574 -0
  44. mlmm/cli_utils.py +166 -0
  45. mlmm/default_group.py +337 -0
  46. mlmm/defaults.py +467 -0
  47. mlmm/define_layer.py +526 -0
  48. mlmm/dft.py +1041 -0
  49. mlmm/energy_diagram.py +253 -0
  50. mlmm/extract.py +2213 -0
  51. mlmm/fix_altloc.py +464 -0
  52. mlmm/freq.py +1406 -0
  53. mlmm/harmonic_constraints.py +140 -0
  54. mlmm/hessian_cache.py +44 -0
  55. mlmm/hessian_calc.py +174 -0
  56. mlmm/irc.py +638 -0
  57. mlmm/mlmm_calc.py +2262 -0
  58. mlmm/mm_parm.py +945 -0
  59. mlmm/oniom_export.py +1983 -0
  60. mlmm/oniom_import.py +457 -0
  61. mlmm/opt.py +1742 -0
  62. mlmm/path_opt.py +1353 -0
  63. mlmm/path_search.py +2299 -0
  64. mlmm/preflight.py +88 -0
  65. mlmm/py.typed +1 -0
  66. mlmm/pysis_runner.py +45 -0
  67. mlmm/scan.py +1047 -0
  68. mlmm/scan2d.py +1226 -0
  69. mlmm/scan3d.py +1265 -0
  70. mlmm/scan_common.py +184 -0
  71. mlmm/summary_log.py +736 -0
  72. mlmm/trj2fig.py +448 -0
  73. mlmm/tsopt.py +2871 -0
  74. mlmm/utils.py +2309 -0
  75. mlmm/xtb_embedcharge_correction.py +475 -0
  76. mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
  77. mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
  78. mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
  79. mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
  80. mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
  81. mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
  82. pysisyphus/Geometry.py +1667 -0
  83. pysisyphus/LICENSE +674 -0
  84. pysisyphus/TableFormatter.py +63 -0
  85. pysisyphus/TablePrinter.py +74 -0
  86. pysisyphus/__init__.py +12 -0
  87. pysisyphus/calculators/AFIR.py +452 -0
  88. pysisyphus/calculators/AnaPot.py +20 -0
  89. pysisyphus/calculators/AnaPot2.py +48 -0
  90. pysisyphus/calculators/AnaPot3.py +12 -0
  91. pysisyphus/calculators/AnaPot4.py +20 -0
  92. pysisyphus/calculators/AnaPotBase.py +337 -0
  93. pysisyphus/calculators/AnaPotCBM.py +25 -0
  94. pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
  95. pysisyphus/calculators/CFOUR.py +250 -0
  96. pysisyphus/calculators/Calculator.py +844 -0
  97. pysisyphus/calculators/CerjanMiller.py +24 -0
  98. pysisyphus/calculators/Composite.py +123 -0
  99. pysisyphus/calculators/ConicalIntersection.py +171 -0
  100. pysisyphus/calculators/DFTBp.py +430 -0
  101. pysisyphus/calculators/DFTD3.py +66 -0
  102. pysisyphus/calculators/DFTD4.py +84 -0
  103. pysisyphus/calculators/Dalton.py +61 -0
  104. pysisyphus/calculators/Dimer.py +681 -0
  105. pysisyphus/calculators/Dummy.py +20 -0
  106. pysisyphus/calculators/EGO.py +76 -0
  107. pysisyphus/calculators/EnergyMin.py +224 -0
  108. pysisyphus/calculators/ExternalPotential.py +264 -0
  109. pysisyphus/calculators/FakeASE.py +35 -0
  110. pysisyphus/calculators/FourWellAnaPot.py +28 -0
  111. pysisyphus/calculators/FreeEndNEBPot.py +39 -0
  112. pysisyphus/calculators/Gaussian09.py +18 -0
  113. pysisyphus/calculators/Gaussian16.py +726 -0
  114. pysisyphus/calculators/HardSphere.py +159 -0
  115. pysisyphus/calculators/IDPPCalculator.py +49 -0
  116. pysisyphus/calculators/IPIClient.py +133 -0
  117. pysisyphus/calculators/IPIServer.py +234 -0
  118. pysisyphus/calculators/LEPSBase.py +24 -0
  119. pysisyphus/calculators/LEPSExpr.py +139 -0
  120. pysisyphus/calculators/LennardJones.py +80 -0
  121. pysisyphus/calculators/MOPAC.py +219 -0
  122. pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
  123. pysisyphus/calculators/MultiCalc.py +85 -0
  124. pysisyphus/calculators/NFK.py +45 -0
  125. pysisyphus/calculators/OBabel.py +87 -0
  126. pysisyphus/calculators/ONIOMv2.py +1129 -0
  127. pysisyphus/calculators/ORCA.py +893 -0
  128. pysisyphus/calculators/ORCA5.py +6 -0
  129. pysisyphus/calculators/OpenMM.py +88 -0
  130. pysisyphus/calculators/OpenMolcas.py +281 -0
  131. pysisyphus/calculators/OverlapCalculator.py +908 -0
  132. pysisyphus/calculators/Psi4.py +218 -0
  133. pysisyphus/calculators/PyPsi4.py +37 -0
  134. pysisyphus/calculators/PySCF.py +341 -0
  135. pysisyphus/calculators/PyXTB.py +73 -0
  136. pysisyphus/calculators/QCEngine.py +106 -0
  137. pysisyphus/calculators/Rastrigin.py +22 -0
  138. pysisyphus/calculators/Remote.py +76 -0
  139. pysisyphus/calculators/Rosenbrock.py +15 -0
  140. pysisyphus/calculators/SocketCalc.py +97 -0
  141. pysisyphus/calculators/TIP3P.py +111 -0
  142. pysisyphus/calculators/TransTorque.py +161 -0
  143. pysisyphus/calculators/Turbomole.py +965 -0
  144. pysisyphus/calculators/VRIPot.py +37 -0
  145. pysisyphus/calculators/WFOWrapper.py +333 -0
  146. pysisyphus/calculators/WFOWrapper2.py +341 -0
  147. pysisyphus/calculators/XTB.py +418 -0
  148. pysisyphus/calculators/__init__.py +81 -0
  149. pysisyphus/calculators/cosmo_data.py +139 -0
  150. pysisyphus/calculators/parser.py +150 -0
  151. pysisyphus/color.py +19 -0
  152. pysisyphus/config.py +133 -0
  153. pysisyphus/constants.py +65 -0
  154. pysisyphus/cos/AdaptiveNEB.py +230 -0
  155. pysisyphus/cos/ChainOfStates.py +725 -0
  156. pysisyphus/cos/FreeEndNEB.py +25 -0
  157. pysisyphus/cos/FreezingString.py +103 -0
  158. pysisyphus/cos/GrowingChainOfStates.py +71 -0
  159. pysisyphus/cos/GrowingNT.py +309 -0
  160. pysisyphus/cos/GrowingString.py +508 -0
  161. pysisyphus/cos/NEB.py +189 -0
  162. pysisyphus/cos/SimpleZTS.py +64 -0
  163. pysisyphus/cos/__init__.py +22 -0
  164. pysisyphus/cos/stiffness.py +199 -0
  165. pysisyphus/drivers/__init__.py +17 -0
  166. pysisyphus/drivers/afir.py +855 -0
  167. pysisyphus/drivers/barriers.py +271 -0
  168. pysisyphus/drivers/birkholz.py +138 -0
  169. pysisyphus/drivers/cluster.py +318 -0
  170. pysisyphus/drivers/diabatization.py +133 -0
  171. pysisyphus/drivers/merge.py +368 -0
  172. pysisyphus/drivers/merge_mol2.py +322 -0
  173. pysisyphus/drivers/opt.py +375 -0
  174. pysisyphus/drivers/perf.py +91 -0
  175. pysisyphus/drivers/pka.py +52 -0
  176. pysisyphus/drivers/precon_pos_rot.py +669 -0
  177. pysisyphus/drivers/rates.py +480 -0
  178. pysisyphus/drivers/replace.py +219 -0
  179. pysisyphus/drivers/scan.py +212 -0
  180. pysisyphus/drivers/spectrum.py +166 -0
  181. pysisyphus/drivers/thermo.py +31 -0
  182. pysisyphus/dynamics/Gaussian.py +103 -0
  183. pysisyphus/dynamics/__init__.py +20 -0
  184. pysisyphus/dynamics/colvars.py +136 -0
  185. pysisyphus/dynamics/driver.py +297 -0
  186. pysisyphus/dynamics/helpers.py +256 -0
  187. pysisyphus/dynamics/lincs.py +105 -0
  188. pysisyphus/dynamics/mdp.py +364 -0
  189. pysisyphus/dynamics/rattle.py +121 -0
  190. pysisyphus/dynamics/thermostats.py +128 -0
  191. pysisyphus/dynamics/wigner.py +266 -0
  192. pysisyphus/elem_data.py +3473 -0
  193. pysisyphus/exceptions.py +2 -0
  194. pysisyphus/filtertrj.py +69 -0
  195. pysisyphus/helpers.py +623 -0
  196. pysisyphus/helpers_pure.py +649 -0
  197. pysisyphus/init_logging.py +50 -0
  198. pysisyphus/intcoords/Bend.py +69 -0
  199. pysisyphus/intcoords/Bend2.py +25 -0
  200. pysisyphus/intcoords/BondedFragment.py +32 -0
  201. pysisyphus/intcoords/Cartesian.py +41 -0
  202. pysisyphus/intcoords/CartesianCoords.py +140 -0
  203. pysisyphus/intcoords/Coords.py +56 -0
  204. pysisyphus/intcoords/DLC.py +197 -0
  205. pysisyphus/intcoords/DistanceFunction.py +34 -0
  206. pysisyphus/intcoords/DummyImproper.py +70 -0
  207. pysisyphus/intcoords/DummyTorsion.py +72 -0
  208. pysisyphus/intcoords/LinearBend.py +105 -0
  209. pysisyphus/intcoords/LinearDisplacement.py +80 -0
  210. pysisyphus/intcoords/OutOfPlane.py +59 -0
  211. pysisyphus/intcoords/PrimTypes.py +286 -0
  212. pysisyphus/intcoords/Primitive.py +137 -0
  213. pysisyphus/intcoords/RedundantCoords.py +659 -0
  214. pysisyphus/intcoords/RobustTorsion.py +59 -0
  215. pysisyphus/intcoords/Rotation.py +147 -0
  216. pysisyphus/intcoords/Stretch.py +31 -0
  217. pysisyphus/intcoords/Torsion.py +101 -0
  218. pysisyphus/intcoords/Torsion2.py +25 -0
  219. pysisyphus/intcoords/Translation.py +45 -0
  220. pysisyphus/intcoords/__init__.py +61 -0
  221. pysisyphus/intcoords/augment_bonds.py +126 -0
  222. pysisyphus/intcoords/derivatives.py +10512 -0
  223. pysisyphus/intcoords/eval.py +80 -0
  224. pysisyphus/intcoords/exceptions.py +37 -0
  225. pysisyphus/intcoords/findiffs.py +48 -0
  226. pysisyphus/intcoords/generate_derivatives.py +414 -0
  227. pysisyphus/intcoords/helpers.py +235 -0
  228. pysisyphus/intcoords/logging_conf.py +10 -0
  229. pysisyphus/intcoords/mp_derivatives.py +10836 -0
  230. pysisyphus/intcoords/setup.py +962 -0
  231. pysisyphus/intcoords/setup_fast.py +176 -0
  232. pysisyphus/intcoords/update.py +272 -0
  233. pysisyphus/intcoords/valid.py +89 -0
  234. pysisyphus/interpolate/Geodesic.py +93 -0
  235. pysisyphus/interpolate/IDPP.py +55 -0
  236. pysisyphus/interpolate/Interpolator.py +116 -0
  237. pysisyphus/interpolate/LST.py +70 -0
  238. pysisyphus/interpolate/Redund.py +152 -0
  239. pysisyphus/interpolate/__init__.py +9 -0
  240. pysisyphus/interpolate/helpers.py +34 -0
  241. pysisyphus/io/__init__.py +22 -0
  242. pysisyphus/io/aomix.py +178 -0
  243. pysisyphus/io/cjson.py +24 -0
  244. pysisyphus/io/crd.py +101 -0
  245. pysisyphus/io/cube.py +220 -0
  246. pysisyphus/io/fchk.py +184 -0
  247. pysisyphus/io/hdf5.py +49 -0
  248. pysisyphus/io/hessian.py +72 -0
  249. pysisyphus/io/mol2.py +146 -0
  250. pysisyphus/io/molden.py +293 -0
  251. pysisyphus/io/orca.py +189 -0
  252. pysisyphus/io/pdb.py +269 -0
  253. pysisyphus/io/psf.py +79 -0
  254. pysisyphus/io/pubchem.py +31 -0
  255. pysisyphus/io/qcschema.py +34 -0
  256. pysisyphus/io/sdf.py +29 -0
  257. pysisyphus/io/xyz.py +61 -0
  258. pysisyphus/io/zmat.py +175 -0
  259. pysisyphus/irc/DWI.py +108 -0
  260. pysisyphus/irc/DampedVelocityVerlet.py +134 -0
  261. pysisyphus/irc/Euler.py +22 -0
  262. pysisyphus/irc/EulerPC.py +345 -0
  263. pysisyphus/irc/GonzalezSchlegel.py +187 -0
  264. pysisyphus/irc/IMKMod.py +164 -0
  265. pysisyphus/irc/IRC.py +878 -0
  266. pysisyphus/irc/IRCDummy.py +10 -0
  267. pysisyphus/irc/Instanton.py +307 -0
  268. pysisyphus/irc/LQA.py +53 -0
  269. pysisyphus/irc/ModeKill.py +136 -0
  270. pysisyphus/irc/ParamPlot.py +53 -0
  271. pysisyphus/irc/RK4.py +36 -0
  272. pysisyphus/irc/__init__.py +31 -0
  273. pysisyphus/irc/initial_displ.py +219 -0
  274. pysisyphus/linalg.py +411 -0
  275. pysisyphus/line_searches/Backtracking.py +88 -0
  276. pysisyphus/line_searches/HagerZhang.py +184 -0
  277. pysisyphus/line_searches/LineSearch.py +232 -0
  278. pysisyphus/line_searches/StrongWolfe.py +108 -0
  279. pysisyphus/line_searches/__init__.py +9 -0
  280. pysisyphus/line_searches/interpol.py +15 -0
  281. pysisyphus/modefollow/NormalMode.py +40 -0
  282. pysisyphus/modefollow/__init__.py +10 -0
  283. pysisyphus/modefollow/davidson.py +199 -0
  284. pysisyphus/modefollow/lanczos.py +95 -0
  285. pysisyphus/optimizers/BFGS.py +99 -0
  286. pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
  287. pysisyphus/optimizers/ConjugateGradient.py +98 -0
  288. pysisyphus/optimizers/CubicNewton.py +75 -0
  289. pysisyphus/optimizers/FIRE.py +113 -0
  290. pysisyphus/optimizers/HessianOptimizer.py +1176 -0
  291. pysisyphus/optimizers/LBFGS.py +228 -0
  292. pysisyphus/optimizers/LayerOpt.py +411 -0
  293. pysisyphus/optimizers/MicroOptimizer.py +169 -0
  294. pysisyphus/optimizers/NCOptimizer.py +90 -0
  295. pysisyphus/optimizers/Optimizer.py +1084 -0
  296. pysisyphus/optimizers/PreconLBFGS.py +260 -0
  297. pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
  298. pysisyphus/optimizers/QuickMin.py +74 -0
  299. pysisyphus/optimizers/RFOptimizer.py +181 -0
  300. pysisyphus/optimizers/RSA.py +99 -0
  301. pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
  302. pysisyphus/optimizers/SteepestDescent.py +23 -0
  303. pysisyphus/optimizers/StringOptimizer.py +173 -0
  304. pysisyphus/optimizers/__init__.py +41 -0
  305. pysisyphus/optimizers/closures.py +301 -0
  306. pysisyphus/optimizers/cls_map.py +58 -0
  307. pysisyphus/optimizers/exceptions.py +6 -0
  308. pysisyphus/optimizers/gdiis.py +280 -0
  309. pysisyphus/optimizers/guess_hessians.py +311 -0
  310. pysisyphus/optimizers/hessian_updates.py +355 -0
  311. pysisyphus/optimizers/poly_fit.py +285 -0
  312. pysisyphus/optimizers/precon.py +153 -0
  313. pysisyphus/optimizers/restrict_step.py +24 -0
  314. pysisyphus/pack.py +172 -0
  315. pysisyphus/peakdetect.py +948 -0
  316. pysisyphus/plot.py +1031 -0
  317. pysisyphus/run.py +2106 -0
  318. pysisyphus/socket_helper.py +74 -0
  319. pysisyphus/stocastic/FragmentKick.py +132 -0
  320. pysisyphus/stocastic/Kick.py +81 -0
  321. pysisyphus/stocastic/Pipeline.py +303 -0
  322. pysisyphus/stocastic/__init__.py +21 -0
  323. pysisyphus/stocastic/align.py +127 -0
  324. pysisyphus/testing.py +96 -0
  325. pysisyphus/thermo.py +156 -0
  326. pysisyphus/trj.py +824 -0
  327. pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
  328. pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
  329. pysisyphus/tsoptimizers/TRIM.py +59 -0
  330. pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
  331. pysisyphus/tsoptimizers/__init__.py +23 -0
  332. pysisyphus/wavefunction/Basis.py +239 -0
  333. pysisyphus/wavefunction/DIIS.py +76 -0
  334. pysisyphus/wavefunction/__init__.py +25 -0
  335. pysisyphus/wavefunction/build_ext.py +42 -0
  336. pysisyphus/wavefunction/cart2sph.py +190 -0
  337. pysisyphus/wavefunction/diabatization.py +304 -0
  338. pysisyphus/wavefunction/excited_states.py +435 -0
  339. pysisyphus/wavefunction/gen_ints.py +1811 -0
  340. pysisyphus/wavefunction/helpers.py +104 -0
  341. pysisyphus/wavefunction/ints/__init__.py +0 -0
  342. pysisyphus/wavefunction/ints/boys.py +193 -0
  343. pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
  344. pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
  345. pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
  346. pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
  347. pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
  348. pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
  349. pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
  350. pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
  351. pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
  352. pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
  353. pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
  354. pysisyphus/wavefunction/localization.py +458 -0
  355. pysisyphus/wavefunction/multipole.py +159 -0
  356. pysisyphus/wavefunction/normalization.py +36 -0
  357. pysisyphus/wavefunction/pop_analysis.py +134 -0
  358. pysisyphus/wavefunction/shells.py +1171 -0
  359. pysisyphus/wavefunction/wavefunction.py +504 -0
  360. pysisyphus/wrapper/__init__.py +11 -0
  361. pysisyphus/wrapper/exceptions.py +2 -0
  362. pysisyphus/wrapper/jmol.py +120 -0
  363. pysisyphus/wrapper/mwfn.py +169 -0
  364. pysisyphus/wrapper/packmol.py +71 -0
  365. pysisyphus/xyzloader.py +168 -0
  366. pysisyphus/yaml_mods.py +45 -0
  367. thermoanalysis/LICENSE +674 -0
  368. thermoanalysis/QCData.py +244 -0
  369. thermoanalysis/__init__.py +0 -0
  370. thermoanalysis/config.py +3 -0
  371. thermoanalysis/constants.py +20 -0
  372. thermoanalysis/thermo.py +1011 -0
mlmm/path_opt.py ADDED
@@ -0,0 +1,1353 @@
1
+ # mlmm/path_opt.py
2
+
3
+ """
4
+ ML/MM minimum-energy path optimization via Growing String Method or Direct Max Flux.
5
+
6
+ Example:
7
+ mlmm path-opt -i reac.pdb prod.pdb --parm real.parm7 --model-pdb ml_region.pdb -q 0
8
+ mlmm path-opt -i reac.pdb prod.pdb --parm real.parm7 -q 0 --mep-mode dmf
9
+
10
+ For detailed documentation, see: docs/path_opt.md
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from copy import deepcopy
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
18
+
19
+ import gc
20
+ import logging
21
+ import sys
22
+ import traceback
23
+ import textwrap
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ import click
28
+ import numpy as np
29
+ import time
30
+ import torch
31
+
32
+ from pysisyphus.helpers import geom_loader
33
+ from pysisyphus.cos.GrowingString import GrowingString
34
+ from pysisyphus.optimizers.StringOptimizer import StringOptimizer
35
+ from pysisyphus.optimizers.exceptions import OptimizationError
36
+ from pysisyphus.optimizers.LBFGS import LBFGS # <-- added for --preopt
37
+
38
+ from .mlmm_calc import mlmm, MLMMASECalculator
39
+ from .opt import (
40
+ GEOM_KW as OPT_GEOM_KW,
41
+ CALC_KW as OPT_CALC_KW,
42
+ LBFGS_KW as OPT_LBFGS_KW,
43
+ _parse_freeze_atoms as _parse_freeze_atoms_opt,
44
+ _normalize_geom_freeze as _normalize_geom_freeze_opt,
45
+ )
46
+ from .utils import (
47
+ apply_layer_freeze_constraints,
48
+ convert_xyz_to_pdb,
49
+ set_convert_file_enabled,
50
+ deep_update,
51
+ load_yaml_dict,
52
+ apply_yaml_overrides,
53
+ pretty_block,
54
+ strip_inherited_keys,
55
+ filter_calc_for_echo,
56
+ format_freeze_atoms_for_echo,
57
+ format_elapsed,
58
+ merge_freeze_atom_indices,
59
+ prepare_input_structure,
60
+ resolve_charge_spin_or_raise,
61
+ PreparedInputStructure,
62
+ parse_indices_string,
63
+ build_model_pdb_from_bfactors,
64
+ build_model_pdb_from_indices,
65
+ )
66
+ from .cli_utils import resolve_yaml_sources, load_merged_yaml_cfg, make_is_param_explicit
67
+ from .align_freeze_atoms import align_and_refine_sequence_inplace
68
+ from .defaults import (
69
+ BFACTOR_FROZEN,
70
+ BFACTOR_ML,
71
+ BFACTOR_MOVABLE_MM,
72
+ DMF_KW as _DMF_KW_DEFAULT,
73
+ GS_KW as _GS_KW_DEFAULT,
74
+ STOPT_KW as _STOPT_KW_DEFAULT,
75
+ )
76
+
77
+
78
+ # -----------------------------------------------
79
+ # Defaults (overridden by YAML/CLI)
80
+ # -----------------------------------------------
81
+
82
+ # Geometry (input handling) — share defaults with opt.py
83
+ GEOM_KW: Dict[str, Any] = deepcopy(OPT_GEOM_KW)
84
+
85
+ # ML/MM calculator settings — share defaults with opt.py
86
+ CALC_KW: Dict[str, Any] = deepcopy(OPT_CALC_KW)
87
+
88
+ # LBFGS (used for optional endpoint pre-optimization)
89
+ LBFGS_KW: Dict[str, Any] = deepcopy(OPT_LBFGS_KW)
90
+
91
+ # DMF (Direct Max Flux) defaults
92
+ DMF_KW: Dict[str, Any] = deepcopy(_DMF_KW_DEFAULT)
93
+
94
+ # GrowingString (path representation)
95
+ GS_KW: Dict[str, Any] = deepcopy(_GS_KW_DEFAULT)
96
+
97
+ # StringOptimizer (optimization control)
98
+ STOPT_KW: Dict[str, Any] = deepcopy(_STOPT_KW_DEFAULT)
99
+
100
+ def _load_two_endpoints(
101
+ inputs: Sequence[PreparedInputStructure],
102
+ coord_type: str,
103
+ base_freeze: Sequence[int],
104
+ ) -> Sequence:
105
+ """
106
+ Load the two endpoint structures and set `freeze_atoms` as needed.
107
+ """
108
+ geoms = []
109
+ for prepared in inputs:
110
+ geom_path = prepared.geom_path
111
+ g = geom_loader(geom_path, coord_type=coord_type)
112
+ cfg: Dict[str, Any] = {"freeze_atoms": list(base_freeze)}
113
+ freeze = merge_freeze_atom_indices(cfg)
114
+ g.freeze_atoms = np.array(freeze, dtype=int)
115
+ geoms.append(g)
116
+ return geoms
117
+
118
+
119
+ # Helpers shared with opt.py (imported for consistency)
120
+ _parse_freeze_atoms = _parse_freeze_atoms_opt
121
+ _normalize_geom_freeze = _normalize_geom_freeze_opt
122
+
123
+
124
+ # -----------------------------------------------
125
+ # B-factor annotation helpers (added)
126
+ # -----------------------------------------------
127
+
128
+ def _parse_pdb_atoms_for_indexing(pdb_path: Path) -> List[Dict[str, Any]]:
129
+ """Parse PDB ATOM/HETATM records for indexing/matching."""
130
+ atoms: List[Dict[str, Any]] = []
131
+ with open(pdb_path, "r") as f:
132
+ for line in f:
133
+ if line.startswith(("ATOM ", "HETATM")):
134
+ # Ensure line is long enough
135
+ s = line.rstrip("\n")
136
+ s = s + (" " * (80 - len(s))) if len(s) < 80 else s
137
+ serial_str = s[6:11].strip()
138
+ resseq_str = s[22:26].strip()
139
+ try:
140
+ serial = int(serial_str) if serial_str else None
141
+ except ValueError:
142
+ serial = None
143
+ try:
144
+ resseq = int(resseq_str) if resseq_str else None
145
+ except ValueError:
146
+ resseq = None
147
+ atom = {
148
+ "line": s,
149
+ "serial": serial,
150
+ "name": s[12:16].strip(),
151
+ "altloc": s[16].strip() if len(s) > 16 else "",
152
+ "resname": s[17:20].strip(),
153
+ "chain": s[21].strip() if len(s) > 21 else "",
154
+ "resseq": resseq,
155
+ "icode": s[26].strip() if len(s) > 26 else "",
156
+ }
157
+ atoms.append(atom)
158
+ return atoms
159
+
160
+
161
+ def _compute_ml_indices_from_model_and_ref(ref_pdb: Path, model_pdb: Path) -> Set[int]:
162
+ """
163
+ Compute 0-based atom indices (in the order of ATOM/HETATM records of ref_pdb)
164
+ that belong to the ML region defined by model_pdb.
165
+
166
+ Matching strategy (robust, fallback-based):
167
+ 1) Exact key match on (name, altloc, resname, chain, resseq, icode)
168
+ 2) Key match ignoring altloc when model altloc is blank
169
+ 3) Serial number match
170
+ """
171
+ ref_atoms = _parse_pdb_atoms_for_indexing(ref_pdb)
172
+ model_atoms = _parse_pdb_atoms_for_indexing(model_pdb)
173
+
174
+ # Build maps for the reference structure
175
+ key_to_indices: Dict[Tuple[str, str, str, str, Optional[int], str], List[int]] = {}
176
+ key_wo_alt_to_indices: Dict[Tuple[str, str, str, Optional[int], str], List[int]] = {}
177
+ serial_to_index: Dict[int, int] = {}
178
+
179
+ for idx, a in enumerate(ref_atoms):
180
+ key = (a["name"], a["altloc"], a["resname"], a["chain"], a["resseq"], a["icode"])
181
+ key_wo = (a["name"], a["resname"], a["chain"], a["resseq"], a["icode"])
182
+ key_to_indices.setdefault(key, []).append(idx)
183
+ key_wo_alt_to_indices.setdefault(key_wo, []).append(idx)
184
+ if a["serial"] is not None:
185
+ # If duplicated serials exist, keep the first occurrence
186
+ serial_to_index.setdefault(a["serial"], idx)
187
+
188
+ ml_indices: Set[int] = set()
189
+ misses = 0
190
+
191
+ for ma in model_atoms:
192
+ key = (ma["name"], ma["altloc"], ma["resname"], ma["chain"], ma["resseq"], ma["icode"])
193
+ key_wo = (ma["name"], ma["resname"], ma["chain"], ma["resseq"], ma["icode"])
194
+
195
+ idx: Optional[int] = None
196
+
197
+ # 1) Exact key match (including altloc)
198
+ candidates = key_to_indices.get(key)
199
+ if candidates and len(candidates) == 1:
200
+ idx = candidates[0]
201
+ elif candidates and len(candidates) > 1:
202
+ # Multiple; try to disambiguate via serial number if possible
203
+ if ma["serial"] is not None:
204
+ si = serial_to_index.get(ma["serial"])
205
+ if si in candidates:
206
+ idx = si
207
+
208
+ # 2) Ignore altloc if none or blank in model
209
+ if idx is None and (ma["altloc"] == "" or ma["altloc"] == " "):
210
+ candidates2 = key_wo_alt_to_indices.get(key_wo)
211
+ if candidates2:
212
+ idx = candidates2[0] # pick first
213
+
214
+ # 3) Serial-number fallback
215
+ if idx is None and ma["serial"] is not None:
216
+ idx = serial_to_index.get(ma["serial"])
217
+
218
+ if idx is not None:
219
+ ml_indices.add(idx)
220
+ else:
221
+ misses += 1
222
+
223
+ if misses:
224
+ click.echo(f"[annotate] WARNING: {misses} ML atoms from '{model_pdb.name}' could not be mapped to '{ref_pdb.name}'.", err=True)
225
+
226
+ return ml_indices
227
+
228
+
229
+ def _apply_bfactor_annotations_inplace(
230
+ pdb_path: Path,
231
+ ml_indices: Set[int],
232
+ freeze_indices: Sequence[int],
233
+ beta_ml: float = BFACTOR_ML,
234
+ beta_freeze: float = BFACTOR_FROZEN,
235
+ beta_both: float = BFACTOR_ML,
236
+ ) -> None:
237
+ """
238
+ In-place update of B-factors for PDB ATOM/HETATM records.
239
+
240
+ Rules (new 3-layer encoding):
241
+ - ML only: 0 (BFACTOR_ML)
242
+ - Freeze only: 20 (BFACTOR_FROZEN)
243
+ - ML ∩ Freeze: 0 (ML takes precedence)
244
+ - Others: 10 (BFACTOR_MOVABLE_MM)
245
+
246
+ The index for lookups is the 0-based position among ATOM/HETATM
247
+ records and resets at each MODEL record for multi-model PDBs.
248
+ """
249
+ freeze_set: Set[int] = set(int(i) for i in (freeze_indices or []))
250
+ ml_set: Set[int] = set(int(i) for i in (ml_indices or set()))
251
+
252
+ def _format_b(b: float) -> str:
253
+ # PDB tempFactor field is 6.2 width
254
+ return f"{b:6.2f}"
255
+
256
+ # Read and process lines
257
+ lines_out: List[str] = []
258
+ atom_idx = 0 # 0-based within a MODEL (or entire file if no MODEL)
259
+
260
+ with open(pdb_path, "r") as f:
261
+ lines = f.readlines()
262
+
263
+ for line in lines:
264
+ if line.startswith("MODEL"):
265
+ # Reset index at each model
266
+ atom_idx = 0
267
+ lines_out.append(line)
268
+ continue
269
+
270
+ if line.startswith(("ATOM ", "HETATM")):
271
+ s = line.rstrip("\n")
272
+ # Pad to at least 66 chars so we can safely replace tempFactor (cols 61-66)
273
+ if len(s) < 66:
274
+ s = s + (" " * (66 - len(s)))
275
+
276
+ # Decide B-factor for this atom index
277
+ if (atom_idx in ml_set) and (atom_idx in freeze_set):
278
+ b = beta_both
279
+ elif atom_idx in ml_set:
280
+ b = beta_ml
281
+ elif atom_idx in freeze_set:
282
+ b = beta_freeze
283
+ else:
284
+ b = BFACTOR_MOVABLE_MM
285
+
286
+ s = s[:60] + _format_b(b) + s[66:]
287
+ # Ensure trailing newline
288
+ s = s if s.endswith("\n") else s + "\n"
289
+ lines_out.append(s)
290
+
291
+ atom_idx += 1
292
+ else:
293
+ lines_out.append(line)
294
+
295
+ with open(pdb_path, "w") as f:
296
+ f.writelines(lines_out)
297
+
298
+ click.echo(
299
+ f"[annotate] Updated B-factors in '{pdb_path}' "
300
+ f"(ML={BFACTOR_ML:.0f}, MovableMM={BFACTOR_MOVABLE_MM:.0f}, "
301
+ f"FrozenMM={BFACTOR_FROZEN:.0f}; {len(ml_set)} ML, {len(freeze_set)} frozen)."
302
+ )
303
+
304
+
305
+ # -----------------------------------------------
306
+ # HEI selection helper
307
+ # -----------------------------------------------
308
+
309
+ def _select_hei_index(energies: Sequence[float]) -> int:
310
+ """Pick an HEI index preferring internal local maxima."""
311
+ E = np.array(energies, dtype=float)
312
+ nE = int(len(E))
313
+ hei_idx = None
314
+ if nE >= 3:
315
+ candidates = [i for i in range(1, nE - 1)
316
+ if E[i] > E[i - 1] and E[i] > E[i + 1]]
317
+ if candidates:
318
+ hei_idx = int(max(candidates, key=lambda i: E[i]))
319
+ else:
320
+ hei_idx = 1 + int(np.argmax(E[1:-1]))
321
+ if hei_idx is None:
322
+ hei_idx = int(np.argmax(E))
323
+ return hei_idx
324
+
325
+
326
+ # -----------------------------------------------
327
+ # DMF (Direct Max Flux) MEP optimization
328
+ # -----------------------------------------------
329
+
330
+ def _run_dmf_mep(
331
+ geoms: Sequence,
332
+ calc_cfg: Dict[str, Any],
333
+ out_dir_path: Path,
334
+ input_paths: Sequence[Path],
335
+ max_nodes: int,
336
+ fix_atoms: Sequence[int],
337
+ dmf_cfg: Optional[Dict[str, Any]] = None,
338
+ ml_indices_set: Optional[Set[int]] = None,
339
+ freeze_atoms_final: Optional[Sequence[int]] = None,
340
+ ) -> None:
341
+ """Run Direct Max Flux (DMF) MEP optimization between two endpoints.
342
+
343
+ Uses pydmf (CPU version) with harmonic constraints for frozen atoms.
344
+ The ML/MM ONIOM calculator is wrapped as an ASE calculator.
345
+
346
+ References:
347
+ [1] S.-i. Koda and S. Saito, JCTC, 20, 2798-2811 (2024). doi: 10.1021/acs.jctc.3c01246
348
+ [2] S.-i. Koda and S. Saito, JCTC, 20, 7176-7187 (2024). doi: 10.1021/acs.jctc.4c00792
349
+ [3] S.-i. Koda and S. Saito, JCTC, 21, 3513-3522 (2025). doi: 10.1021/acs.jctc.4c01549
350
+ """
351
+ try:
352
+ from ase.io import read as ase_read, write as ase_write
353
+ from ase.calculators.mixing import SumCalculator
354
+ from dmf import DirectMaxFlux, interpolate_fbenm
355
+ except Exception as e:
356
+ raise RuntimeError(
357
+ "DMF mode requires ase, cyipopt, and pydmf to be installed. "
358
+ f"Import error: {e}"
359
+ ) from e
360
+
361
+ from .harmonic_constraints import HarmonicFixAtoms
362
+
363
+ def _geom_to_ase(g):
364
+ from io import StringIO
365
+ return ase_read(StringIO(g.as_xyz()), format="xyz")
366
+
367
+ fix_atoms = list(sorted(set(map(int, fix_atoms))))
368
+
369
+ ref_images = [_geom_to_ase(g) for g in geoms]
370
+ charge = int(calc_cfg.get("model_charge", 0))
371
+ spin = int(calc_cfg.get("model_mult", 1))
372
+ for img in ref_images:
373
+ img.info["charge"] = charge
374
+ img.info["spin"] = spin
375
+
376
+ # Build the ONIOM ASE calculator
377
+ shared_pysis_calc = mlmm(**calc_cfg)
378
+ ase_calc = MLMMASECalculator(core=shared_pysis_calc.core)
379
+
380
+ dmf_cfg = deep_update(dict(DMF_KW), dmf_cfg)
381
+ fbenm_opts: Dict[str, Any] = dict(dmf_cfg.get("fbenm_options", {}))
382
+ cfbenm_opts: Dict[str, Any] = dict(dmf_cfg.get("cfbenm_options", {}))
383
+ dmf_opts: Dict[str, Any] = dict(dmf_cfg.get("dmf_options", {}))
384
+ update_teval = bool(dmf_opts.pop("update_teval", False))
385
+ k_fix = float(dmf_cfg.get("k_fix", DMF_KW["k_fix"]))
386
+
387
+ # Run FB-ENM interpolation
388
+ click.echo("\n=== DMF: FB-ENM interpolation ===\n")
389
+ mxflx_fbenm = interpolate_fbenm(
390
+ ref_images,
391
+ nmove=max(1, int(max_nodes)),
392
+ fbenm_only_endpoints=bool(dmf_cfg.get("fbenm_only_endpoints", False)),
393
+ correlated=bool(dmf_cfg.get("correlated", False)),
394
+ sequential=bool(dmf_cfg.get("sequential", False)),
395
+ output_file=str(out_dir_path / "dmf_fbenm_ipopt.out"),
396
+ fbenm_options=fbenm_opts,
397
+ cfbenm_options=cfbenm_opts,
398
+ dmf_options=dmf_opts,
399
+ )
400
+
401
+ initial_trj = out_dir_path / "dmf_initial_trj.xyz"
402
+ ase_write(initial_trj, mxflx_fbenm.images, format="xyz")
403
+ click.echo(f"[write] Wrote '{initial_trj}' ({len(mxflx_fbenm.images)} images).")
404
+
405
+ # Convert initial trajectory to PDB if possible
406
+ if input_paths[0].suffix.lower() == ".pdb":
407
+ try:
408
+ initial_pdb = initial_trj.with_suffix(".pdb")
409
+ convert_xyz_to_pdb(initial_trj, input_paths[0].resolve(), initial_pdb)
410
+ click.echo(f"[convert] Wrote '{initial_pdb}'.")
411
+ except Exception as e:
412
+ click.echo(f"[convert] WARNING: {e}", err=True)
413
+
414
+ coefs = mxflx_fbenm.coefs.copy()
415
+
416
+ # Create DirectMaxFlux object
417
+ click.echo("\n=== DMF: Direct Max Flux optimization ===\n")
418
+ mxflx = DirectMaxFlux(
419
+ ref_images,
420
+ coefs=coefs,
421
+ nmove=max(1, int(max_nodes)),
422
+ update_teval=update_teval,
423
+ remove_rotation_and_translation=bool(
424
+ dmf_opts.get("remove_rotation_and_translation", False)
425
+ ),
426
+ mass_weighted=bool(dmf_opts.get("mass_weighted", False)),
427
+ parallel=bool(dmf_opts.get("parallel", False)),
428
+ eps_vel=float(dmf_opts.get("eps_vel", DMF_KW["dmf_options"]["eps_vel"])),
429
+ eps_rot=float(dmf_opts.get("eps_rot", DMF_KW["dmf_options"]["eps_rot"])),
430
+ beta=float(dmf_opts.get("beta", DMF_KW["dmf_options"]["beta"])),
431
+ )
432
+
433
+ # Assign calculators to images
434
+ for image in mxflx.images:
435
+ if "charge" not in image.info:
436
+ image.info["charge"] = charge
437
+ if "spin" not in image.info:
438
+ image.info["spin"] = spin
439
+
440
+ if fix_atoms:
441
+ ref_positions = image.get_positions()[fix_atoms]
442
+ harmonic_calc = HarmonicFixAtoms(
443
+ indices=fix_atoms,
444
+ ref_positions=ref_positions,
445
+ k_fix=k_fix,
446
+ )
447
+ image.calc = SumCalculator([ase_calc, harmonic_calc])
448
+ else:
449
+ image.calc = ase_calc
450
+
451
+ mxflx.add_ipopt_options({"output_file": str(out_dir_path / "dmf_ipopt.out")})
452
+ max_cycles = dmf_cfg.get("max_cycles") if isinstance(dmf_cfg, dict) else None
453
+ if max_cycles is not None:
454
+ try:
455
+ max_iter = int(max_cycles)
456
+ if max_iter > 0:
457
+ mxflx.add_ipopt_options({"max_iter": max_iter})
458
+ except Exception:
459
+ logger.debug("Failed to set ipopt max_iter option", exc_info=True)
460
+ mxflx.solve(tol="tight")
461
+ click.echo("\n=== DMF: optimization finished ===\n")
462
+
463
+ # Evaluate final energies using the PySisyphus calculator for consistency
464
+ from pysisyphus.constants import ANG2BOHR
465
+ energies = []
466
+ for image in mxflx.images:
467
+ elems = image.get_chemical_symbols()
468
+ coords_bohr = np.asarray(image.get_positions(), dtype=float).reshape(-1, 3) * ANG2BOHR
469
+ energies.append(float(shared_pysis_calc.get_energy(elems, coords_bohr)["energy"]))
470
+ hei_idx = _select_hei_index(energies)
471
+
472
+ # Write final trajectory
473
+ final_trj = out_dir_path / "final_geometries_trj.xyz"
474
+ blocks = []
475
+ for idx, (image, E) in enumerate(zip(mxflx.images, energies)):
476
+ from io import StringIO
477
+ buf = StringIO()
478
+ ase_write(buf, image, format="xyz")
479
+ s = buf.getvalue()
480
+ lines = s.splitlines()
481
+ if len(lines) >= 2 and lines[0].strip().isdigit():
482
+ lines[1] = f"{E:.12f}"
483
+ blocks.append("\n".join(lines) + "\n")
484
+ with open(final_trj, "w") as f:
485
+ f.write("".join(blocks))
486
+ click.echo(f"[write] Wrote '{final_trj}' with energy.")
487
+
488
+ # Convert to PDB
489
+ if input_paths[0].suffix.lower() == ".pdb":
490
+ ref_pdb = input_paths[0].resolve()
491
+ try:
492
+ final_pdb = out_dir_path / "final_geometries.pdb"
493
+ convert_xyz_to_pdb(final_trj, ref_pdb, final_pdb)
494
+ click.echo(f"[convert] Wrote '{final_pdb}'.")
495
+ _apply_bfactor_annotations_inplace(
496
+ final_pdb,
497
+ ml_indices=ml_indices_set or set(),
498
+ freeze_indices=freeze_atoms_final or [],
499
+ )
500
+ except Exception as e:
501
+ click.echo(f"[convert] WARNING: {e}", err=True)
502
+
503
+ # Write HEI
504
+ hei_geom = mxflx.images[hei_idx]
505
+ hei_E = energies[hei_idx]
506
+ hei_xyz = out_dir_path / "hei.xyz"
507
+ from io import StringIO
508
+ buf = StringIO()
509
+ ase_write(buf, hei_geom, format="xyz")
510
+ s = buf.getvalue()
511
+ lines = s.splitlines()
512
+ if len(lines) >= 2 and lines[0].strip().isdigit():
513
+ lines[1] = f"{hei_E:.12f}"
514
+ s = "\n".join(lines) + "\n"
515
+ with open(hei_xyz, "w") as f:
516
+ f.write(s)
517
+ click.echo(f"[write] Wrote '{hei_xyz}' (HEI index={hei_idx}).")
518
+
519
+ if input_paths[0].suffix.lower() == ".pdb":
520
+ ref_pdb = input_paths[0].resolve()
521
+ hei_pdb = out_dir_path / "hei.pdb"
522
+ try:
523
+ convert_xyz_to_pdb(hei_xyz, ref_pdb, hei_pdb)
524
+ click.echo(f"[convert] Wrote '{hei_pdb}'.")
525
+ _apply_bfactor_annotations_inplace(
526
+ hei_pdb,
527
+ ml_indices=ml_indices_set or set(),
528
+ freeze_indices=freeze_atoms_final or [],
529
+ )
530
+ except Exception as e:
531
+ click.echo(f"[convert] WARNING: {e}", err=True)
532
+
533
+
534
+ # -----------------------------------------------
535
+ # CLI
536
+ # -----------------------------------------------
537
+
538
+ @click.command(
539
+ help="MEP optimization via the Growing String method or Direct Max Flux.",
540
+ context_settings={"help_option_names": ["-h", "--help"]},
541
+ )
542
+ @click.option(
543
+ "-i", "--input",
544
+ "input_paths",
545
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
546
+ nargs=2,
547
+ required=True,
548
+ help="Two endpoint structures (reactant/product); both must be full-enzyme PDBs.",
549
+ )
550
+ @click.option(
551
+ "-q",
552
+ "--charge",
553
+ type=int,
554
+ required=False,
555
+ help="Total charge. Required unless --ligand-charge is provided.",
556
+ )
557
+ @click.option("-l", "--ligand-charge", type=str, default=None, show_default=False,
558
+ help="Total charge or per-resname mapping (e.g., GPP:-3,SAM:1) used to derive "
559
+ "charge when -q is omitted (requires PDB input or --ref-pdb).")
560
+ @click.option(
561
+ "-m",
562
+ "--multiplicity",
563
+ "spin",
564
+ type=int,
565
+ default=None,
566
+ show_default=False,
567
+ help="Spin multiplicity (2S+1). Defaults to 1 when omitted.",
568
+ )
569
+ @click.option(
570
+ "--mep-mode",
571
+ type=click.Choice(["gsm", "dmf"], case_sensitive=False),
572
+ default="gsm",
573
+ show_default=True,
574
+ help="MEP optimizer: Growing String Method (gsm) or Direct Max Flux (dmf).",
575
+ )
576
+ @click.option("--max-nodes", type=int, default=GS_KW["max_nodes"], show_default=True,
577
+ help="Number of internal nodes (string has max_nodes+2 images including endpoints).")
578
+ @click.option("--max-cycles", type=int, default=300, show_default=True, help="Maximum optimization cycles.")
579
+ @click.option(
580
+ "--climb/--no-climb",
581
+ default=True,
582
+ show_default=True,
583
+ help="Search for a transition state (climbing image) after path growth.",
584
+ )
585
+ @click.option(
586
+ "--preopt/--no-preopt",
587
+ default=False,
588
+ show_default=True,
589
+ help="Pre-optimize the two endpoint structures with LBFGS before string growth.",
590
+ )
591
+ @click.option("--preopt-max-cycles", "preopt_max_cycles", type=int, default=10000, show_default=True,
592
+ help="Maximum LBFGS cycles for endpoint pre-optimization when --preopt=True.")
593
+ @click.option(
594
+ "--fix-ends/--no-fix-ends",
595
+ default=False,
596
+ show_default=True,
597
+ help="Fix endpoint structures during path growth.",
598
+ )
599
+ @click.option(
600
+ "--dump/--no-dump",
601
+ default=False,
602
+ show_default=True,
603
+ help="Dump optimizer trajectory/restarts during the run.",
604
+ )
605
+ @click.option("--out-dir", "out_dir", type=str, default="./result_path_opt/", show_default=True,
606
+ help="Output directory.")
607
+ @click.option(
608
+ "--thresh",
609
+ type=click.Choice(["gau_loose", "gau", "gau_tight", "gau_vtight", "baker", "never"], case_sensitive=False),
610
+ default=None,
611
+ help="Convergence preset for the string optimizer.",
612
+ )
613
+ @click.option(
614
+ "--config",
615
+ "config_yaml",
616
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
617
+ default=None,
618
+ help="Base YAML configuration file applied before explicit CLI options.",
619
+ )
620
+ @click.option(
621
+ "--show-config/--no-show-config",
622
+ "show_config",
623
+ default=False,
624
+ show_default=True,
625
+ help="Print resolved configuration and continue execution.",
626
+ )
627
+ @click.option(
628
+ "--dry-run/--no-dry-run",
629
+ "dry_run",
630
+ default=False,
631
+ show_default=True,
632
+ help="Validate options and print the execution plan without running path optimization.",
633
+ )
634
+ @click.option(
635
+ "--parm",
636
+ "real_parm7",
637
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
638
+ required=True,
639
+ help="Amber parm7 topology for the enzyme complex (MM layers).",
640
+ )
641
+ @click.option(
642
+ "--model-pdb",
643
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
644
+ required=False,
645
+ help="PDB defining the ML region (atom IDs used by the ML/MM calculator). "
646
+ "Optional when --detect-layer is enabled.",
647
+ )
648
+ @click.option(
649
+ "--model-indices",
650
+ "model_indices_str",
651
+ type=str,
652
+ default=None,
653
+ show_default=False,
654
+ help="Comma-separated atom indices for the ML region (ranges allowed like 1-5). "
655
+ "Used when --model-pdb is omitted.",
656
+ )
657
+ @click.option(
658
+ "--model-indices-one-based/--model-indices-zero-based",
659
+ "model_indices_one_based",
660
+ default=True,
661
+ show_default=True,
662
+ help="Interpret --model-indices as 1-based (default) or 0-based.",
663
+ )
664
+ @click.option(
665
+ "--detect-layer/--no-detect-layer",
666
+ "detect_layer",
667
+ default=True,
668
+ show_default=True,
669
+ help="Detect ML/MM layers from input PDB B-factors (B=0/10/20). "
670
+ "If disabled, you must provide --model-pdb or --model-indices.",
671
+ )
672
+ @click.option(
673
+ "--freeze-atoms",
674
+ "freeze_atoms_cli",
675
+ type=str,
676
+ default=None,
677
+ help="Comma-separated 1-based indices to freeze (applied to every image).",
678
+ )
679
+ @click.option(
680
+ "--hess-cutoff",
681
+ "hess_cutoff",
682
+ type=float,
683
+ default=None,
684
+ show_default=False,
685
+ help="Distance cutoff (Å) from ML region for MM atoms to include in Hessian calculation. "
686
+ "Applied to movable MM atoms and can be combined with --detect-layer.",
687
+ )
688
+ @click.option(
689
+ "--movable-cutoff",
690
+ "movable_cutoff",
691
+ type=float,
692
+ default=None,
693
+ show_default=False,
694
+ help="Distance cutoff (Å) from ML region for movable MM atoms. MM atoms beyond this are frozen. "
695
+ "Providing --movable-cutoff disables --detect-layer.",
696
+ )
697
+ @click.option(
698
+ "--convert-files/--no-convert-files",
699
+ "convert_files",
700
+ default=True,
701
+ show_default=True,
702
+ help="Convert XYZ/TRJ outputs into PDB companions based on the input format.",
703
+ )
704
+ @click.option(
705
+ "-b", "--backend",
706
+ type=click.Choice(["uma", "orb", "mace", "aimnet2"], case_sensitive=False),
707
+ default=None,
708
+ show_default=False,
709
+ help="ML backend for the ONIOM high-level region (default: uma).",
710
+ )
711
+ @click.option(
712
+ "--embedcharge/--no-embedcharge",
713
+ "embedcharge",
714
+ default=False,
715
+ show_default=True,
716
+ help="Enable xTB point-charge embedding correction for MM→ML environmental effects.",
717
+ )
718
+ @click.option(
719
+ "--embedcharge-cutoff",
720
+ "embedcharge_cutoff",
721
+ type=float,
722
+ default=None,
723
+ show_default=False,
724
+ help="Distance cutoff (Å) from ML region for MM point charges in xTB embedding. "
725
+ "Default: 12.0 Å when --embedcharge is enabled.",
726
+ )
727
+ @click.pass_context
728
+ def cli(
729
+ ctx: click.Context,
730
+ input_paths: Sequence[Path],
731
+ charge: Optional[int],
732
+ ligand_charge: Optional[str],
733
+ spin: Optional[int],
734
+ mep_mode: str,
735
+ max_nodes: int,
736
+ max_cycles: int,
737
+ climb: bool,
738
+ preopt: bool,
739
+ preopt_max_cycles: int,
740
+ fix_ends: bool,
741
+ dump: bool,
742
+ out_dir: str,
743
+ thresh: Optional[str],
744
+ config_yaml: Optional[Path],
745
+ show_config: bool,
746
+ dry_run: bool,
747
+ real_parm7: Path,
748
+ model_pdb: Optional[Path],
749
+ model_indices_str: Optional[str],
750
+ model_indices_one_based: bool,
751
+ detect_layer: bool,
752
+ freeze_atoms_cli: Optional[str],
753
+ hess_cutoff: Optional[float],
754
+ movable_cutoff: Optional[float],
755
+ convert_files: bool,
756
+ backend: Optional[str],
757
+ embedcharge: bool,
758
+ embedcharge_cutoff: Optional[float],
759
+ ) -> None:
760
+ set_convert_file_enabled(convert_files)
761
+ _is_param_explicit = make_is_param_explicit(ctx)
762
+
763
+ config_yaml, override_yaml, used_legacy_yaml = resolve_yaml_sources(
764
+ config_yaml=config_yaml,
765
+ override_yaml=None,
766
+ args_yaml_legacy=None,
767
+ )
768
+ merged_yaml_cfg, _, _ = load_merged_yaml_cfg(
769
+ config_yaml=config_yaml,
770
+ override_yaml=None,
771
+ )
772
+
773
+ input_paths = tuple(Path(p) for p in input_paths)
774
+ prepared_inputs = [prepare_input_structure(p) for p in input_paths]
775
+ try:
776
+ time_start = time.perf_counter()
777
+
778
+ if len(prepared_inputs) != 2:
779
+ click.echo("ERROR: Provide exactly two endpoint structures (-i reactant product).", err=True)
780
+ sys.exit(1)
781
+
782
+ for src in input_paths:
783
+ if src.suffix.lower() != ".pdb":
784
+ click.echo(
785
+ f"ERROR: ML/MM path optimization requires PDB inputs; '{src.name}' is not a PDB.",
786
+ err=True,
787
+ )
788
+ sys.exit(1)
789
+
790
+ # --------------------------
791
+ # 1) Assemble final config (defaults < config < CLI(explicit) < override)
792
+ # --------------------------
793
+ config_layer_cfg = load_yaml_dict(config_yaml)
794
+ override_layer_cfg = load_yaml_dict(override_yaml)
795
+
796
+ mep_mode_kind = mep_mode.strip().lower()
797
+
798
+ geom_cfg = dict(GEOM_KW)
799
+ calc_cfg = dict(CALC_KW)
800
+ gs_cfg = dict(GS_KW)
801
+ stopt_cfg = dict(STOPT_KW)
802
+ lbfgs_cfg = dict(LBFGS_KW)
803
+ dmf_cfg = dict(DMF_KW)
804
+
805
+ apply_yaml_overrides(
806
+ config_layer_cfg,
807
+ [
808
+ (geom_cfg, (("geom",),)),
809
+ (calc_cfg, (("calc",), ("mlmm",))),
810
+ (gs_cfg, (("gs",),)),
811
+ (stopt_cfg, (("stopt",), ("opt",))),
812
+ (lbfgs_cfg, (("opt", "lbfgs"), ("lbfgs",), ("stopt", "lbfgs"))),
813
+ (dmf_cfg, (("dmf",),)),
814
+ ],
815
+ )
816
+
817
+ # CLI explicit overrides (after config YAML, before override YAML)
818
+ if backend is not None:
819
+ calc_cfg["backend"] = str(backend).lower()
820
+ if _is_param_explicit("embedcharge"):
821
+ calc_cfg["embedcharge"] = bool(embedcharge)
822
+ if _is_param_explicit("embedcharge_cutoff"):
823
+ calc_cfg["embedcharge_cutoff"] = embedcharge_cutoff
824
+
825
+ if _is_param_explicit("max_nodes"):
826
+ gs_cfg["max_nodes"] = int(max_nodes)
827
+ if _is_param_explicit("max_cycles"):
828
+ stopt_cfg["max_cycles"] = int(max_cycles)
829
+ stopt_cfg["stop_in_when_full"] = int(max_cycles)
830
+ dmf_cfg["max_cycles"] = int(max_cycles)
831
+ if _is_param_explicit("climb"):
832
+ gs_cfg["climb"] = bool(climb)
833
+ gs_cfg["climb_lanczos"] = bool(climb)
834
+ if _is_param_explicit("fix_ends"):
835
+ gs_cfg["fix_first"] = bool(fix_ends)
836
+ gs_cfg["fix_last"] = bool(fix_ends)
837
+ if _is_param_explicit("dump"):
838
+ stopt_cfg["dump"] = bool(dump)
839
+ lbfgs_cfg["dump"] = bool(dump)
840
+ if _is_param_explicit("out_dir"):
841
+ stopt_cfg["out_dir"] = out_dir
842
+ lbfgs_cfg["out_dir"] = out_dir
843
+ if _is_param_explicit("thresh") and thresh is not None:
844
+ stopt_cfg["thresh"] = str(thresh)
845
+ lbfgs_cfg["thresh"] = str(thresh)
846
+ if _is_param_explicit("detect_layer"):
847
+ calc_cfg["use_bfactor_layers"] = bool(detect_layer)
848
+ if _is_param_explicit("hess_cutoff") and hess_cutoff is not None:
849
+ calc_cfg["hess_cutoff"] = float(hess_cutoff)
850
+ if _is_param_explicit("movable_cutoff") and movable_cutoff is not None:
851
+ calc_cfg["movable_cutoff"] = float(movable_cutoff)
852
+ calc_cfg["use_bfactor_layers"] = False
853
+ if _is_param_explicit("preopt_max_cycles"):
854
+ lbfgs_cfg["max_cycles"] = int(preopt_max_cycles)
855
+
856
+ resolved_charge = charge
857
+ resolved_spin = spin
858
+ for prepared in prepared_inputs:
859
+ resolved_charge, resolved_spin = resolve_charge_spin_or_raise(
860
+ prepared,
861
+ resolved_charge,
862
+ resolved_spin,
863
+ ligand_charge=ligand_charge,
864
+ prefix="[path-opt]",
865
+ )
866
+ model_charge_value = calc_cfg.get("model_charge", resolved_charge)
867
+ if model_charge_value is None:
868
+ model_charge_value = resolved_charge
869
+ calc_cfg["model_charge"] = int(model_charge_value)
870
+ if _is_param_explicit("charge"):
871
+ calc_cfg["model_charge"] = int(resolved_charge)
872
+
873
+ model_mult_value = calc_cfg.get("model_mult", resolved_spin)
874
+ if model_mult_value is None:
875
+ model_mult_value = resolved_spin
876
+ calc_cfg["model_mult"] = int(model_mult_value)
877
+ if _is_param_explicit("spin"):
878
+ calc_cfg["model_mult"] = int(resolved_spin)
879
+
880
+ if model_pdb is not None:
881
+ calc_cfg["model_pdb"] = str(model_pdb)
882
+ calc_cfg["input_pdb"] = str(input_paths[0])
883
+ calc_cfg["real_parm7"] = str(real_parm7)
884
+
885
+ apply_yaml_overrides(
886
+ override_layer_cfg,
887
+ [
888
+ (geom_cfg, (("geom",),)),
889
+ (calc_cfg, (("calc",), ("mlmm",))),
890
+ (gs_cfg, (("gs",),)),
891
+ (stopt_cfg, (("stopt",), ("opt",))),
892
+ (lbfgs_cfg, (("opt", "lbfgs"), ("lbfgs",), ("stopt", "lbfgs"))),
893
+ (dmf_cfg, (("dmf",),)),
894
+ ],
895
+ )
896
+
897
+ try:
898
+ geom_freeze = _normalize_geom_freeze(geom_cfg.get("freeze_atoms"))
899
+ except click.BadParameter as e:
900
+ click.echo(f"ERROR: {e}", err=True)
901
+ sys.exit(1)
902
+ geom_cfg["freeze_atoms"] = geom_freeze
903
+
904
+ try:
905
+ cli_freeze = _parse_freeze_atoms(freeze_atoms_cli)
906
+ except click.BadParameter as e:
907
+ click.echo(f"ERROR: {e}", err=True)
908
+ sys.exit(1)
909
+
910
+ model_indices: Optional[List[int]] = None
911
+ if model_indices_str:
912
+ try:
913
+ model_indices = parse_indices_string(model_indices_str, one_based=model_indices_one_based)
914
+ except click.BadParameter as e:
915
+ click.echo(f"ERROR: {e}", err=True)
916
+ sys.exit(1)
917
+ if cli_freeze:
918
+ merge_freeze_atom_indices(geom_cfg, cli_freeze)
919
+
920
+ freeze_atoms_final = list(geom_cfg.get("freeze_atoms") or [])
921
+ calc_cfg["freeze_atoms"] = freeze_atoms_final
922
+
923
+ # Keep optimizer alignment policy deterministic.
924
+ stopt_cfg["align"] = False
925
+ stopt_cfg["stop_in_when_full"] = int(stopt_cfg.get("max_cycles", STOPT_KW["max_cycles"]))
926
+
927
+ out_dir_path = Path(stopt_cfg["out_dir"]).resolve()
928
+ preopt_max_cycles_effective = int(lbfgs_cfg.get("max_cycles", preopt_max_cycles))
929
+
930
+ # movable_cutoff implies full distance-based layer assignment.
931
+ # hess_cutoff alone can be combined with --detect-layer.
932
+ detect_layer_enabled = bool(calc_cfg.get("use_bfactor_layers", True))
933
+ model_pdb_cfg = calc_cfg.get("model_pdb")
934
+ if calc_cfg.get("movable_cutoff") is not None:
935
+ if detect_layer_enabled:
936
+ click.echo("[layer] movable_cutoff is set; disabling --detect-layer.", err=True)
937
+ detect_layer_enabled = False
938
+ calc_cfg["use_bfactor_layers"] = False
939
+
940
+ layer_source_pdb = input_paths[0]
941
+ if detect_layer_enabled and layer_source_pdb.suffix.lower() != ".pdb":
942
+ click.echo("ERROR: --detect-layer requires a PDB input.", err=True)
943
+ sys.exit(1)
944
+
945
+ if show_config:
946
+ click.echo(
947
+ pretty_block(
948
+ "yaml_layers",
949
+ {
950
+ "config": None if config_yaml is None else str(config_yaml),
951
+ "override_yaml": None if override_yaml is None else str(override_yaml),
952
+ "merged_keys": sorted(merged_yaml_cfg.keys()),
953
+ },
954
+ )
955
+ )
956
+
957
+ if dry_run:
958
+ model_region_source = "bfactor"
959
+ if not detect_layer_enabled:
960
+ if model_pdb_cfg is not None:
961
+ model_region_source = "model_pdb"
962
+ elif model_indices:
963
+ model_region_source = "model_indices"
964
+ else:
965
+ click.echo("ERROR: Provide --model-pdb or --model-indices when --no-detect-layer.", err=True)
966
+ sys.exit(1)
967
+ if (
968
+ not detect_layer_enabled
969
+ and model_pdb_cfg is None
970
+ and model_indices
971
+ and layer_source_pdb.suffix.lower() != ".pdb"
972
+ ):
973
+ click.echo("ERROR: --model-indices requires a PDB input.", err=True)
974
+ sys.exit(1)
975
+ click.echo(
976
+ pretty_block(
977
+ "dry_run_plan",
978
+ {
979
+ "input_endpoints": [str(p) for p in input_paths],
980
+ "output_dir": str(out_dir_path),
981
+ "mep_mode": mep_mode_kind,
982
+ "fix_ends": bool(gs_cfg.get("fix_first", False) and gs_cfg.get("fix_last", False)),
983
+ "detect_layer": bool(detect_layer_enabled),
984
+ "model_region_source": model_region_source,
985
+ "model_indices_count": 0 if not model_indices else len(model_indices),
986
+ "preopt": bool(preopt),
987
+ "preopt_max_cycles": int(preopt_max_cycles_effective),
988
+ "will_run_path_opt": True,
989
+ "will_write_summary": True,
990
+ "backend": calc_cfg.get("backend", "uma"),
991
+ "embedcharge": bool(calc_cfg.get("embedcharge", False)),
992
+ },
993
+ )
994
+ )
995
+ click.echo("[dry-run] Validation complete. Path optimization execution was skipped.")
996
+ return
997
+
998
+ model_pdb_path: Optional[Path] = None
999
+ layer_info: Optional[Dict[str, List[int]]] = None
1000
+
1001
+ if detect_layer_enabled:
1002
+ try:
1003
+ model_pdb_path, layer_info = build_model_pdb_from_bfactors(layer_source_pdb, out_dir_path)
1004
+ calc_cfg["use_bfactor_layers"] = True
1005
+ click.echo(
1006
+ f"[layer] Detected B-factor layers: ML={len(layer_info.get('ml_indices', []))}, "
1007
+ f"MovableMM={len(layer_info.get('movable_mm_indices', []))}, "
1008
+ f"FrozenMM={len(layer_info.get('frozen_indices', []))}"
1009
+ )
1010
+ except Exception as e:
1011
+ if model_pdb_cfg is None and not model_indices:
1012
+ click.echo(f"ERROR: {e}", err=True)
1013
+ sys.exit(1)
1014
+ click.echo(f"[layer] WARNING: {e} Falling back to explicit ML region.", err=True)
1015
+ detect_layer_enabled = False
1016
+
1017
+ if not detect_layer_enabled:
1018
+ if model_pdb_cfg is None and not model_indices:
1019
+ click.echo("ERROR: Provide --model-pdb or --model-indices when --no-detect-layer.", err=True)
1020
+ sys.exit(1)
1021
+ if model_pdb_cfg is not None:
1022
+ model_pdb_path = Path(model_pdb_cfg)
1023
+ else:
1024
+ if layer_source_pdb.suffix.lower() != ".pdb":
1025
+ click.echo("ERROR: --model-indices requires a PDB input.", err=True)
1026
+ sys.exit(1)
1027
+ try:
1028
+ model_pdb_path = build_model_pdb_from_indices(layer_source_pdb, out_dir_path, model_indices or [])
1029
+ except Exception as e:
1030
+ click.echo(f"ERROR: {e}", err=True)
1031
+ sys.exit(1)
1032
+ calc_cfg["use_bfactor_layers"] = False
1033
+
1034
+ if model_pdb_path is None:
1035
+ click.echo("ERROR: Failed to resolve model PDB for the ML region.", err=True)
1036
+ sys.exit(1)
1037
+
1038
+ calc_cfg["model_pdb"] = str(model_pdb_path)
1039
+ freeze_atoms_final = apply_layer_freeze_constraints(
1040
+ geom_cfg,
1041
+ calc_cfg,
1042
+ layer_info,
1043
+ echo_fn=click.echo,
1044
+ )
1045
+
1046
+ for key in ("input_pdb", "real_parm7", "model_pdb", "mm_fd_dir"):
1047
+ val = calc_cfg.get(key)
1048
+ if val:
1049
+ calc_cfg[key] = str(Path(val).expanduser().resolve())
1050
+
1051
+ # For display: resolved configuration (show only non-default values)
1052
+ echo_geom = format_freeze_atoms_for_echo(geom_cfg, key="freeze_atoms")
1053
+ echo_calc = format_freeze_atoms_for_echo(filter_calc_for_echo(calc_cfg), key="freeze_atoms")
1054
+ echo_gs = strip_inherited_keys(gs_cfg, GS_KW, mode="same")
1055
+ echo_stopt = strip_inherited_keys({**stopt_cfg, "out_dir": str(out_dir_path)}, STOPT_KW, mode="same")
1056
+ echo_lbfgs = strip_inherited_keys({**lbfgs_cfg, "out_dir": stopt_cfg.get("out_dir")}, LBFGS_KW, mode="same")
1057
+
1058
+ click.echo(pretty_block("geom", echo_geom))
1059
+ click.echo(pretty_block("calc", echo_calc))
1060
+ if mep_mode_kind == "gsm":
1061
+ click.echo(pretty_block("gs", echo_gs))
1062
+ click.echo(pretty_block("stopt", echo_stopt))
1063
+ click.echo(pretty_block("lbfgs", echo_lbfgs))
1064
+ elif mep_mode_kind == "dmf":
1065
+ click.echo(pretty_block("dmf", dmf_cfg))
1066
+ click.echo(
1067
+ pretty_block(
1068
+ "run_flags",
1069
+ {
1070
+ "mep_mode": mep_mode_kind,
1071
+ "preopt": bool(preopt),
1072
+ "preopt_max_cycles": int(preopt_max_cycles_effective),
1073
+ "fix_ends": bool(gs_cfg.get("fix_first", False) and gs_cfg.get("fix_last", False)),
1074
+ },
1075
+ )
1076
+ )
1077
+
1078
+ if int(stopt_cfg.get("max_cycles", 0)) <= 0:
1079
+ click.echo("[INFO] max_cycles <= 0: skipping path optimization.")
1080
+ return
1081
+
1082
+ # --------------------------
1083
+ # 2) Prepare structures (load two endpoints and apply freezing)
1084
+ # --------------------------
1085
+ out_dir_path.mkdir(parents=True, exist_ok=True)
1086
+
1087
+ source_paths = [prep.source_path for prep in prepared_inputs]
1088
+
1089
+ # Pre-compute ML-region indices (0-based in ref PDB atom order) for later PDB annotation
1090
+ ml_indices_set: Set[int] = set()
1091
+ try:
1092
+ ref_pdb_for_map = source_paths[0]
1093
+ if ref_pdb_for_map.suffix.lower() == ".pdb":
1094
+ ml_indices_set = _compute_ml_indices_from_model_and_ref(
1095
+ ref_pdb_for_map.resolve(),
1096
+ Path(calc_cfg["model_pdb"]).resolve(),
1097
+ )
1098
+ click.echo(f"[annotate] ML-region atoms mapped: {len(ml_indices_set)}")
1099
+ except Exception as e:
1100
+ click.echo(f"[annotate] WARNING: Failed to pre-compute ML-region indices: {e}", err=True)
1101
+
1102
+ # Load endpoints (if PDB, merge in link-parent freezing)
1103
+ geoms = _load_two_endpoints(
1104
+ inputs=prepared_inputs,
1105
+ coord_type=geom_cfg.get("coord_type", "cart"),
1106
+ base_freeze=geom_cfg.get("freeze_atoms", []),
1107
+ )
1108
+
1109
+ # Shared ML/MM calculator (reuse the same instance for all images)
1110
+ shared_calc = mlmm(**calc_cfg)
1111
+
1112
+ # === (NEW) optional endpoint pre-optimization ===
1113
+ if preopt:
1114
+ try:
1115
+ click.echo("\n=== Pre-optimizing endpoints (LBFGS) ===\n")
1116
+ pre_dir_base = out_dir_path / "preopt"
1117
+ for i, g in enumerate(geoms):
1118
+ try:
1119
+ g.set_calculator(shared_calc)
1120
+ except Exception:
1121
+ logger.debug("Failed to set calculator on geometry", exc_info=True)
1122
+ subdir = pre_dir_base / f"end{i:02d}"
1123
+ subdir.mkdir(parents=True, exist_ok=True)
1124
+ lbfgs_args = dict(lbfgs_cfg)
1125
+ lbfgs_args.update({
1126
+ "out_dir": str(subdir),
1127
+ "max_cycles": int(preopt_max_cycles_effective),
1128
+ })
1129
+ optimizer = LBFGS(g, **lbfgs_args)
1130
+ optimizer.run()
1131
+ try:
1132
+ final_xyz_path = optimizer.final_fn if isinstance(optimizer.final_fn, Path) else Path(optimizer.final_fn)
1133
+ g_new = geom_loader(final_xyz_path, coord_type=geom_cfg.get("coord_type", "cart"))
1134
+ try:
1135
+ g_new.freeze_atoms = np.array(getattr(g, "freeze_atoms", []), dtype=int)
1136
+ except Exception:
1137
+ logger.debug("Failed to set freeze_atoms on new geometry", exc_info=True)
1138
+ geoms[i] = g_new
1139
+ except Exception as e:
1140
+ click.echo(f"[preopt] WARNING: Failed to reload optimized endpoint #{i}: {e}", err=True)
1141
+ click.echo("[preopt] Completed endpoint pre-optimization.")
1142
+ except Exception as e:
1143
+ click.echo(f"[preopt] WARNING: Pre-optimization skipped due to error: {e}", err=True)
1144
+
1145
+ # By default, apply external Kabsch alignment (if freeze_atoms exist, use only them)
1146
+ align_thresh = str(stopt_cfg.get("thresh", "gau"))
1147
+ try:
1148
+ click.echo("\n=== Aligning all inputs to the first structure (freeze-guided scan + relaxation) ===\n")
1149
+ _ = align_and_refine_sequence_inplace(
1150
+ geoms,
1151
+ thresh=align_thresh,
1152
+ shared_calc=shared_calc,
1153
+ out_dir=out_dir_path / "align_refine",
1154
+ verbose=True,
1155
+ )
1156
+ click.echo("[align] Completed input alignment.")
1157
+ except Exception as e:
1158
+ click.echo(f"[align] WARNING: alignment skipped: {e}", err=True)
1159
+
1160
+ # Collect freeze_atoms for DMF
1161
+ fix_atoms: List[int] = []
1162
+ try:
1163
+ fix_atoms = sorted(
1164
+ {int(i) for g in geoms for i in getattr(g, "freeze_atoms", [])}
1165
+ )
1166
+ except Exception:
1167
+ logger.debug("Failed to extract freeze_atoms from geometries", exc_info=True)
1168
+
1169
+ # --------------------------
1170
+ # 3) DMF or GSM routing
1171
+ # --------------------------
1172
+ if mep_mode_kind == "dmf":
1173
+ try:
1174
+ _run_dmf_mep(
1175
+ geoms,
1176
+ calc_cfg,
1177
+ out_dir_path,
1178
+ input_paths,
1179
+ max_nodes,
1180
+ fix_atoms,
1181
+ dmf_cfg=dmf_cfg,
1182
+ ml_indices_set=ml_indices_set,
1183
+ freeze_atoms_final=freeze_atoms_final,
1184
+ )
1185
+ except Exception as e:
1186
+ tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
1187
+ click.echo(f"[dmf] ERROR: DMF optimization failed:\n{textwrap.indent(tb, ' ')}", err=True)
1188
+ sys.exit(3)
1189
+ click.echo(format_elapsed("[time] Elapsed Time for Path Opt (DMF)", time_start))
1190
+ return
1191
+
1192
+ for g in geoms:
1193
+ g.set_calculator(shared_calc)
1194
+
1195
+ def calc_getter():
1196
+ # Used when GrowingString generates new nodes
1197
+ return shared_calc
1198
+
1199
+ # --------------------------
1200
+ # 3) Build path object and optimizer (GSM)
1201
+ # --------------------------
1202
+ gs = GrowingString(
1203
+ images=geoms,
1204
+ calc_getter=calc_getter,
1205
+ **gs_cfg,
1206
+ )
1207
+
1208
+ # StringOptimizer expects 'out_dir' under the key "out_dir"
1209
+ opt_args = dict(stopt_cfg)
1210
+ opt_args["out_dir"] = str(out_dir_path)
1211
+
1212
+ optimizer = StringOptimizer(
1213
+ geometry=gs,
1214
+ **{k: v for k, v in opt_args.items() if k != "type"} # 'type' is just a tag
1215
+ )
1216
+
1217
+ # --------------------------
1218
+ # 4) Run optimization
1219
+ # --------------------------
1220
+ click.echo("\n=== Growing String optimization started ===\n")
1221
+ optimizer.run()
1222
+ click.echo("\n=== Growing String optimization finished ===\n")
1223
+
1224
+ # --------------------------
1225
+ # 5) Write final path (final_geometries_trj.xyz)
1226
+ # --------------------------
1227
+ final_trj = out_dir_path / "final_geometries_trj.xyz"
1228
+ try:
1229
+ try:
1230
+ energies = np.array(gs.energy, dtype=float)
1231
+ blocks = []
1232
+ for idx, (geom, E) in enumerate(zip(gs.images, energies)):
1233
+ s = geom.as_xyz()
1234
+ lines = s.splitlines()
1235
+ if len(lines) >= 2 and lines[0].strip().isdigit():
1236
+ lines[1] = f"{E:.12f}"
1237
+ s_mod = "\n".join(lines)
1238
+ if not s_mod.endswith("\n"):
1239
+ s_mod += "\n"
1240
+ blocks.append(s_mod)
1241
+ annotated = "".join(blocks)
1242
+ with open(final_trj, "w") as f:
1243
+ f.write(annotated)
1244
+ click.echo(f"[write] Wrote '{final_trj}' with energy.")
1245
+ except Exception:
1246
+ with open(final_trj, "w") as f:
1247
+ f.write(gs.as_xyz())
1248
+ click.echo(f"[write] Wrote '{final_trj}'.")
1249
+
1250
+ if input_paths[0].suffix.lower() == ".pdb":
1251
+ ref_pdb = input_paths[0].resolve()
1252
+
1253
+ try:
1254
+ out_pdb = out_dir_path / "final_geometries.pdb"
1255
+ convert_xyz_to_pdb(final_trj, ref_pdb, out_pdb)
1256
+ click.echo(f"[convert] Wrote '{out_pdb}'.")
1257
+ # === Annotate B-factors for ML & freeze atoms (added) ===
1258
+ _apply_bfactor_annotations_inplace(
1259
+ out_pdb,
1260
+ ml_indices=ml_indices_set,
1261
+ freeze_indices=freeze_atoms_final,
1262
+ )
1263
+ except Exception as e:
1264
+ click.echo(f"[convert] WARNING: Failed to convert MEP path trajectory to PDB: {e}", err=True)
1265
+
1266
+ except Exception as e:
1267
+ click.echo(f"[write] ERROR: Failed to write final trajectory: {e}", err=True)
1268
+ sys.exit(4)
1269
+
1270
+ try:
1271
+ energies = np.array(gs.energy, dtype=float)
1272
+ # --- HEI identification logic ---
1273
+ # Choose the internal local maximum (exclude endpoints) with the highest energy,
1274
+ # i.e., nodes whose immediate neighbors have lower energy.
1275
+ # Fallback 1: if none exist, pick the maximum among internal nodes (exclude endpoints).
1276
+ # Fallback 2: if internal nodes are unavailable, pick the global maximum.
1277
+ nE = int(len(energies))
1278
+ hei_idx = None
1279
+ if nE >= 3:
1280
+ # Strict internal local maxima (both neighbors lower)
1281
+ candidates = [i for i in range(1, nE - 1)
1282
+ if energies[i] > energies[i - 1] and energies[i] > energies[i + 1]]
1283
+ if candidates:
1284
+ cand_es = energies[candidates]
1285
+ rel = int(np.argmax(cand_es))
1286
+ hei_idx = int(candidates[rel])
1287
+ else:
1288
+ # Fallback 1: maximum over internal nodes (exclude endpoints)
1289
+ if nE > 2:
1290
+ rel = int(np.argmax(energies[1:-1]))
1291
+ hei_idx = 1 + rel
1292
+ if hei_idx is None:
1293
+ # Fallback 2: global maximum
1294
+ hei_idx = int(np.argmax(energies))
1295
+
1296
+ hei_geom = gs.images[hei_idx]
1297
+ hei_E = float(energies[hei_idx])
1298
+
1299
+ hei_xyz = out_dir_path / "hei.xyz"
1300
+ s = hei_geom.as_xyz()
1301
+ lines = s.splitlines()
1302
+ if len(lines) >= 2 and lines[0].strip().isdigit():
1303
+ lines[1] = f"{hei_E:.12f}"
1304
+ s = "\n".join(lines) + ("\n" if not s.endswith("\n") else "")
1305
+ with open(hei_xyz, "w") as f:
1306
+ f.write(s)
1307
+ click.echo(f"[write] Wrote '{hei_xyz}'.")
1308
+
1309
+ ref_pdb = None
1310
+ if source_paths[0].suffix.lower() == ".pdb":
1311
+ ref_pdb = source_paths[0].resolve()
1312
+ if ref_pdb is not None:
1313
+ hei_pdb = out_dir_path / "hei.pdb"
1314
+ convert_xyz_to_pdb(hei_xyz, ref_pdb, hei_pdb)
1315
+ click.echo(f"[convert] Wrote '{hei_pdb}'.")
1316
+ # === Annotate B-factors for ML & freeze atoms (added) ===
1317
+ _apply_bfactor_annotations_inplace(
1318
+ hei_pdb,
1319
+ ml_indices=ml_indices_set,
1320
+ freeze_indices=freeze_atoms_final,
1321
+ )
1322
+ else:
1323
+ click.echo("[convert] Skipped 'hei.pdb' (no PDB reference among inputs).")
1324
+
1325
+ except Exception as e:
1326
+ click.echo(f"[HEI] ERROR: Failed to dump HEI: {e}", err=True)
1327
+ sys.exit(5)
1328
+
1329
+ # summary.md and key_* outputs are disabled.
1330
+ click.echo(format_elapsed("[time] Elapsed Time for Path Opt", time_start))
1331
+
1332
+ except OptimizationError as e:
1333
+ click.echo(f"ERROR: Path optimization failed — {e}", err=True)
1334
+ sys.exit(3)
1335
+ except KeyboardInterrupt:
1336
+ click.echo("\nInterrupted by user.", err=True)
1337
+ sys.exit(130)
1338
+ except Exception as e:
1339
+ tb = "".join(traceback.format_exception(type(e), e, e.__traceback__))
1340
+ click.echo("Unhandled error during path optimization:\n" + textwrap.indent(tb, " "), err=True)
1341
+ sys.exit(1)
1342
+ finally:
1343
+ for prepared in prepared_inputs:
1344
+ prepared.cleanup()
1345
+ # Release GPU memory so subsequent pipeline stages don't OOM
1346
+ shared_calc = gs = geoms = None
1347
+ gc.collect() # break cyclic refs inside torch.nn.Module
1348
+ if torch.cuda.is_available():
1349
+ torch.cuda.empty_cache()
1350
+
1351
+
1352
+ if __name__ == "__main__":
1353
+ cli()