mlmm-toolkit 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. hessian_ff/__init__.py +50 -0
  2. hessian_ff/analytical_hessian.py +609 -0
  3. hessian_ff/constants.py +46 -0
  4. hessian_ff/forcefield.py +339 -0
  5. hessian_ff/loaders.py +608 -0
  6. hessian_ff/native/Makefile +8 -0
  7. hessian_ff/native/__init__.py +28 -0
  8. hessian_ff/native/analytical_hessian.py +88 -0
  9. hessian_ff/native/analytical_hessian_ext.cpp +258 -0
  10. hessian_ff/native/bonded.py +82 -0
  11. hessian_ff/native/bonded_ext.cpp +640 -0
  12. hessian_ff/native/loader.py +349 -0
  13. hessian_ff/native/nonbonded.py +118 -0
  14. hessian_ff/native/nonbonded_ext.cpp +1150 -0
  15. hessian_ff/prmtop_parmed.py +23 -0
  16. hessian_ff/system.py +107 -0
  17. hessian_ff/terms/__init__.py +14 -0
  18. hessian_ff/terms/angle.py +73 -0
  19. hessian_ff/terms/bond.py +44 -0
  20. hessian_ff/terms/cmap.py +406 -0
  21. hessian_ff/terms/dihedral.py +141 -0
  22. hessian_ff/terms/nonbonded.py +209 -0
  23. hessian_ff/tests/__init__.py +0 -0
  24. hessian_ff/tests/conftest.py +75 -0
  25. hessian_ff/tests/data/small/complex.parm7 +1346 -0
  26. hessian_ff/tests/data/small/complex.pdb +125 -0
  27. hessian_ff/tests/data/small/complex.rst7 +63 -0
  28. hessian_ff/tests/test_coords_input.py +44 -0
  29. hessian_ff/tests/test_energy_force.py +49 -0
  30. hessian_ff/tests/test_hessian.py +137 -0
  31. hessian_ff/tests/test_smoke.py +18 -0
  32. hessian_ff/tests/test_validation.py +40 -0
  33. hessian_ff/workflows.py +889 -0
  34. mlmm/__init__.py +36 -0
  35. mlmm/__main__.py +7 -0
  36. mlmm/_version.py +34 -0
  37. mlmm/add_elem_info.py +374 -0
  38. mlmm/advanced_help.py +91 -0
  39. mlmm/align_freeze_atoms.py +601 -0
  40. mlmm/all.py +3535 -0
  41. mlmm/bond_changes.py +231 -0
  42. mlmm/bool_compat.py +223 -0
  43. mlmm/cli.py +574 -0
  44. mlmm/cli_utils.py +166 -0
  45. mlmm/default_group.py +337 -0
  46. mlmm/defaults.py +467 -0
  47. mlmm/define_layer.py +526 -0
  48. mlmm/dft.py +1041 -0
  49. mlmm/energy_diagram.py +253 -0
  50. mlmm/extract.py +2213 -0
  51. mlmm/fix_altloc.py +464 -0
  52. mlmm/freq.py +1406 -0
  53. mlmm/harmonic_constraints.py +140 -0
  54. mlmm/hessian_cache.py +44 -0
  55. mlmm/hessian_calc.py +174 -0
  56. mlmm/irc.py +638 -0
  57. mlmm/mlmm_calc.py +2262 -0
  58. mlmm/mm_parm.py +945 -0
  59. mlmm/oniom_export.py +1983 -0
  60. mlmm/oniom_import.py +457 -0
  61. mlmm/opt.py +1742 -0
  62. mlmm/path_opt.py +1353 -0
  63. mlmm/path_search.py +2299 -0
  64. mlmm/preflight.py +88 -0
  65. mlmm/py.typed +1 -0
  66. mlmm/pysis_runner.py +45 -0
  67. mlmm/scan.py +1047 -0
  68. mlmm/scan2d.py +1226 -0
  69. mlmm/scan3d.py +1265 -0
  70. mlmm/scan_common.py +184 -0
  71. mlmm/summary_log.py +736 -0
  72. mlmm/trj2fig.py +448 -0
  73. mlmm/tsopt.py +2871 -0
  74. mlmm/utils.py +2309 -0
  75. mlmm/xtb_embedcharge_correction.py +475 -0
  76. mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
  77. mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
  78. mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
  79. mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
  80. mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
  81. mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
  82. pysisyphus/Geometry.py +1667 -0
  83. pysisyphus/LICENSE +674 -0
  84. pysisyphus/TableFormatter.py +63 -0
  85. pysisyphus/TablePrinter.py +74 -0
  86. pysisyphus/__init__.py +12 -0
  87. pysisyphus/calculators/AFIR.py +452 -0
  88. pysisyphus/calculators/AnaPot.py +20 -0
  89. pysisyphus/calculators/AnaPot2.py +48 -0
  90. pysisyphus/calculators/AnaPot3.py +12 -0
  91. pysisyphus/calculators/AnaPot4.py +20 -0
  92. pysisyphus/calculators/AnaPotBase.py +337 -0
  93. pysisyphus/calculators/AnaPotCBM.py +25 -0
  94. pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
  95. pysisyphus/calculators/CFOUR.py +250 -0
  96. pysisyphus/calculators/Calculator.py +844 -0
  97. pysisyphus/calculators/CerjanMiller.py +24 -0
  98. pysisyphus/calculators/Composite.py +123 -0
  99. pysisyphus/calculators/ConicalIntersection.py +171 -0
  100. pysisyphus/calculators/DFTBp.py +430 -0
  101. pysisyphus/calculators/DFTD3.py +66 -0
  102. pysisyphus/calculators/DFTD4.py +84 -0
  103. pysisyphus/calculators/Dalton.py +61 -0
  104. pysisyphus/calculators/Dimer.py +681 -0
  105. pysisyphus/calculators/Dummy.py +20 -0
  106. pysisyphus/calculators/EGO.py +76 -0
  107. pysisyphus/calculators/EnergyMin.py +224 -0
  108. pysisyphus/calculators/ExternalPotential.py +264 -0
  109. pysisyphus/calculators/FakeASE.py +35 -0
  110. pysisyphus/calculators/FourWellAnaPot.py +28 -0
  111. pysisyphus/calculators/FreeEndNEBPot.py +39 -0
  112. pysisyphus/calculators/Gaussian09.py +18 -0
  113. pysisyphus/calculators/Gaussian16.py +726 -0
  114. pysisyphus/calculators/HardSphere.py +159 -0
  115. pysisyphus/calculators/IDPPCalculator.py +49 -0
  116. pysisyphus/calculators/IPIClient.py +133 -0
  117. pysisyphus/calculators/IPIServer.py +234 -0
  118. pysisyphus/calculators/LEPSBase.py +24 -0
  119. pysisyphus/calculators/LEPSExpr.py +139 -0
  120. pysisyphus/calculators/LennardJones.py +80 -0
  121. pysisyphus/calculators/MOPAC.py +219 -0
  122. pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
  123. pysisyphus/calculators/MultiCalc.py +85 -0
  124. pysisyphus/calculators/NFK.py +45 -0
  125. pysisyphus/calculators/OBabel.py +87 -0
  126. pysisyphus/calculators/ONIOMv2.py +1129 -0
  127. pysisyphus/calculators/ORCA.py +893 -0
  128. pysisyphus/calculators/ORCA5.py +6 -0
  129. pysisyphus/calculators/OpenMM.py +88 -0
  130. pysisyphus/calculators/OpenMolcas.py +281 -0
  131. pysisyphus/calculators/OverlapCalculator.py +908 -0
  132. pysisyphus/calculators/Psi4.py +218 -0
  133. pysisyphus/calculators/PyPsi4.py +37 -0
  134. pysisyphus/calculators/PySCF.py +341 -0
  135. pysisyphus/calculators/PyXTB.py +73 -0
  136. pysisyphus/calculators/QCEngine.py +106 -0
  137. pysisyphus/calculators/Rastrigin.py +22 -0
  138. pysisyphus/calculators/Remote.py +76 -0
  139. pysisyphus/calculators/Rosenbrock.py +15 -0
  140. pysisyphus/calculators/SocketCalc.py +97 -0
  141. pysisyphus/calculators/TIP3P.py +111 -0
  142. pysisyphus/calculators/TransTorque.py +161 -0
  143. pysisyphus/calculators/Turbomole.py +965 -0
  144. pysisyphus/calculators/VRIPot.py +37 -0
  145. pysisyphus/calculators/WFOWrapper.py +333 -0
  146. pysisyphus/calculators/WFOWrapper2.py +341 -0
  147. pysisyphus/calculators/XTB.py +418 -0
  148. pysisyphus/calculators/__init__.py +81 -0
  149. pysisyphus/calculators/cosmo_data.py +139 -0
  150. pysisyphus/calculators/parser.py +150 -0
  151. pysisyphus/color.py +19 -0
  152. pysisyphus/config.py +133 -0
  153. pysisyphus/constants.py +65 -0
  154. pysisyphus/cos/AdaptiveNEB.py +230 -0
  155. pysisyphus/cos/ChainOfStates.py +725 -0
  156. pysisyphus/cos/FreeEndNEB.py +25 -0
  157. pysisyphus/cos/FreezingString.py +103 -0
  158. pysisyphus/cos/GrowingChainOfStates.py +71 -0
  159. pysisyphus/cos/GrowingNT.py +309 -0
  160. pysisyphus/cos/GrowingString.py +508 -0
  161. pysisyphus/cos/NEB.py +189 -0
  162. pysisyphus/cos/SimpleZTS.py +64 -0
  163. pysisyphus/cos/__init__.py +22 -0
  164. pysisyphus/cos/stiffness.py +199 -0
  165. pysisyphus/drivers/__init__.py +17 -0
  166. pysisyphus/drivers/afir.py +855 -0
  167. pysisyphus/drivers/barriers.py +271 -0
  168. pysisyphus/drivers/birkholz.py +138 -0
  169. pysisyphus/drivers/cluster.py +318 -0
  170. pysisyphus/drivers/diabatization.py +133 -0
  171. pysisyphus/drivers/merge.py +368 -0
  172. pysisyphus/drivers/merge_mol2.py +322 -0
  173. pysisyphus/drivers/opt.py +375 -0
  174. pysisyphus/drivers/perf.py +91 -0
  175. pysisyphus/drivers/pka.py +52 -0
  176. pysisyphus/drivers/precon_pos_rot.py +669 -0
  177. pysisyphus/drivers/rates.py +480 -0
  178. pysisyphus/drivers/replace.py +219 -0
  179. pysisyphus/drivers/scan.py +212 -0
  180. pysisyphus/drivers/spectrum.py +166 -0
  181. pysisyphus/drivers/thermo.py +31 -0
  182. pysisyphus/dynamics/Gaussian.py +103 -0
  183. pysisyphus/dynamics/__init__.py +20 -0
  184. pysisyphus/dynamics/colvars.py +136 -0
  185. pysisyphus/dynamics/driver.py +297 -0
  186. pysisyphus/dynamics/helpers.py +256 -0
  187. pysisyphus/dynamics/lincs.py +105 -0
  188. pysisyphus/dynamics/mdp.py +364 -0
  189. pysisyphus/dynamics/rattle.py +121 -0
  190. pysisyphus/dynamics/thermostats.py +128 -0
  191. pysisyphus/dynamics/wigner.py +266 -0
  192. pysisyphus/elem_data.py +3473 -0
  193. pysisyphus/exceptions.py +2 -0
  194. pysisyphus/filtertrj.py +69 -0
  195. pysisyphus/helpers.py +623 -0
  196. pysisyphus/helpers_pure.py +649 -0
  197. pysisyphus/init_logging.py +50 -0
  198. pysisyphus/intcoords/Bend.py +69 -0
  199. pysisyphus/intcoords/Bend2.py +25 -0
  200. pysisyphus/intcoords/BondedFragment.py +32 -0
  201. pysisyphus/intcoords/Cartesian.py +41 -0
  202. pysisyphus/intcoords/CartesianCoords.py +140 -0
  203. pysisyphus/intcoords/Coords.py +56 -0
  204. pysisyphus/intcoords/DLC.py +197 -0
  205. pysisyphus/intcoords/DistanceFunction.py +34 -0
  206. pysisyphus/intcoords/DummyImproper.py +70 -0
  207. pysisyphus/intcoords/DummyTorsion.py +72 -0
  208. pysisyphus/intcoords/LinearBend.py +105 -0
  209. pysisyphus/intcoords/LinearDisplacement.py +80 -0
  210. pysisyphus/intcoords/OutOfPlane.py +59 -0
  211. pysisyphus/intcoords/PrimTypes.py +286 -0
  212. pysisyphus/intcoords/Primitive.py +137 -0
  213. pysisyphus/intcoords/RedundantCoords.py +659 -0
  214. pysisyphus/intcoords/RobustTorsion.py +59 -0
  215. pysisyphus/intcoords/Rotation.py +147 -0
  216. pysisyphus/intcoords/Stretch.py +31 -0
  217. pysisyphus/intcoords/Torsion.py +101 -0
  218. pysisyphus/intcoords/Torsion2.py +25 -0
  219. pysisyphus/intcoords/Translation.py +45 -0
  220. pysisyphus/intcoords/__init__.py +61 -0
  221. pysisyphus/intcoords/augment_bonds.py +126 -0
  222. pysisyphus/intcoords/derivatives.py +10512 -0
  223. pysisyphus/intcoords/eval.py +80 -0
  224. pysisyphus/intcoords/exceptions.py +37 -0
  225. pysisyphus/intcoords/findiffs.py +48 -0
  226. pysisyphus/intcoords/generate_derivatives.py +414 -0
  227. pysisyphus/intcoords/helpers.py +235 -0
  228. pysisyphus/intcoords/logging_conf.py +10 -0
  229. pysisyphus/intcoords/mp_derivatives.py +10836 -0
  230. pysisyphus/intcoords/setup.py +962 -0
  231. pysisyphus/intcoords/setup_fast.py +176 -0
  232. pysisyphus/intcoords/update.py +272 -0
  233. pysisyphus/intcoords/valid.py +89 -0
  234. pysisyphus/interpolate/Geodesic.py +93 -0
  235. pysisyphus/interpolate/IDPP.py +55 -0
  236. pysisyphus/interpolate/Interpolator.py +116 -0
  237. pysisyphus/interpolate/LST.py +70 -0
  238. pysisyphus/interpolate/Redund.py +152 -0
  239. pysisyphus/interpolate/__init__.py +9 -0
  240. pysisyphus/interpolate/helpers.py +34 -0
  241. pysisyphus/io/__init__.py +22 -0
  242. pysisyphus/io/aomix.py +178 -0
  243. pysisyphus/io/cjson.py +24 -0
  244. pysisyphus/io/crd.py +101 -0
  245. pysisyphus/io/cube.py +220 -0
  246. pysisyphus/io/fchk.py +184 -0
  247. pysisyphus/io/hdf5.py +49 -0
  248. pysisyphus/io/hessian.py +72 -0
  249. pysisyphus/io/mol2.py +146 -0
  250. pysisyphus/io/molden.py +293 -0
  251. pysisyphus/io/orca.py +189 -0
  252. pysisyphus/io/pdb.py +269 -0
  253. pysisyphus/io/psf.py +79 -0
  254. pysisyphus/io/pubchem.py +31 -0
  255. pysisyphus/io/qcschema.py +34 -0
  256. pysisyphus/io/sdf.py +29 -0
  257. pysisyphus/io/xyz.py +61 -0
  258. pysisyphus/io/zmat.py +175 -0
  259. pysisyphus/irc/DWI.py +108 -0
  260. pysisyphus/irc/DampedVelocityVerlet.py +134 -0
  261. pysisyphus/irc/Euler.py +22 -0
  262. pysisyphus/irc/EulerPC.py +345 -0
  263. pysisyphus/irc/GonzalezSchlegel.py +187 -0
  264. pysisyphus/irc/IMKMod.py +164 -0
  265. pysisyphus/irc/IRC.py +878 -0
  266. pysisyphus/irc/IRCDummy.py +10 -0
  267. pysisyphus/irc/Instanton.py +307 -0
  268. pysisyphus/irc/LQA.py +53 -0
  269. pysisyphus/irc/ModeKill.py +136 -0
  270. pysisyphus/irc/ParamPlot.py +53 -0
  271. pysisyphus/irc/RK4.py +36 -0
  272. pysisyphus/irc/__init__.py +31 -0
  273. pysisyphus/irc/initial_displ.py +219 -0
  274. pysisyphus/linalg.py +411 -0
  275. pysisyphus/line_searches/Backtracking.py +88 -0
  276. pysisyphus/line_searches/HagerZhang.py +184 -0
  277. pysisyphus/line_searches/LineSearch.py +232 -0
  278. pysisyphus/line_searches/StrongWolfe.py +108 -0
  279. pysisyphus/line_searches/__init__.py +9 -0
  280. pysisyphus/line_searches/interpol.py +15 -0
  281. pysisyphus/modefollow/NormalMode.py +40 -0
  282. pysisyphus/modefollow/__init__.py +10 -0
  283. pysisyphus/modefollow/davidson.py +199 -0
  284. pysisyphus/modefollow/lanczos.py +95 -0
  285. pysisyphus/optimizers/BFGS.py +99 -0
  286. pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
  287. pysisyphus/optimizers/ConjugateGradient.py +98 -0
  288. pysisyphus/optimizers/CubicNewton.py +75 -0
  289. pysisyphus/optimizers/FIRE.py +113 -0
  290. pysisyphus/optimizers/HessianOptimizer.py +1176 -0
  291. pysisyphus/optimizers/LBFGS.py +228 -0
  292. pysisyphus/optimizers/LayerOpt.py +411 -0
  293. pysisyphus/optimizers/MicroOptimizer.py +169 -0
  294. pysisyphus/optimizers/NCOptimizer.py +90 -0
  295. pysisyphus/optimizers/Optimizer.py +1084 -0
  296. pysisyphus/optimizers/PreconLBFGS.py +260 -0
  297. pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
  298. pysisyphus/optimizers/QuickMin.py +74 -0
  299. pysisyphus/optimizers/RFOptimizer.py +181 -0
  300. pysisyphus/optimizers/RSA.py +99 -0
  301. pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
  302. pysisyphus/optimizers/SteepestDescent.py +23 -0
  303. pysisyphus/optimizers/StringOptimizer.py +173 -0
  304. pysisyphus/optimizers/__init__.py +41 -0
  305. pysisyphus/optimizers/closures.py +301 -0
  306. pysisyphus/optimizers/cls_map.py +58 -0
  307. pysisyphus/optimizers/exceptions.py +6 -0
  308. pysisyphus/optimizers/gdiis.py +280 -0
  309. pysisyphus/optimizers/guess_hessians.py +311 -0
  310. pysisyphus/optimizers/hessian_updates.py +355 -0
  311. pysisyphus/optimizers/poly_fit.py +285 -0
  312. pysisyphus/optimizers/precon.py +153 -0
  313. pysisyphus/optimizers/restrict_step.py +24 -0
  314. pysisyphus/pack.py +172 -0
  315. pysisyphus/peakdetect.py +948 -0
  316. pysisyphus/plot.py +1031 -0
  317. pysisyphus/run.py +2106 -0
  318. pysisyphus/socket_helper.py +74 -0
  319. pysisyphus/stocastic/FragmentKick.py +132 -0
  320. pysisyphus/stocastic/Kick.py +81 -0
  321. pysisyphus/stocastic/Pipeline.py +303 -0
  322. pysisyphus/stocastic/__init__.py +21 -0
  323. pysisyphus/stocastic/align.py +127 -0
  324. pysisyphus/testing.py +96 -0
  325. pysisyphus/thermo.py +156 -0
  326. pysisyphus/trj.py +824 -0
  327. pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
  328. pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
  329. pysisyphus/tsoptimizers/TRIM.py +59 -0
  330. pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
  331. pysisyphus/tsoptimizers/__init__.py +23 -0
  332. pysisyphus/wavefunction/Basis.py +239 -0
  333. pysisyphus/wavefunction/DIIS.py +76 -0
  334. pysisyphus/wavefunction/__init__.py +25 -0
  335. pysisyphus/wavefunction/build_ext.py +42 -0
  336. pysisyphus/wavefunction/cart2sph.py +190 -0
  337. pysisyphus/wavefunction/diabatization.py +304 -0
  338. pysisyphus/wavefunction/excited_states.py +435 -0
  339. pysisyphus/wavefunction/gen_ints.py +1811 -0
  340. pysisyphus/wavefunction/helpers.py +104 -0
  341. pysisyphus/wavefunction/ints/__init__.py +0 -0
  342. pysisyphus/wavefunction/ints/boys.py +193 -0
  343. pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
  344. pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
  345. pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
  346. pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
  347. pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
  348. pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
  349. pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
  350. pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
  351. pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
  352. pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
  353. pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
  354. pysisyphus/wavefunction/localization.py +458 -0
  355. pysisyphus/wavefunction/multipole.py +159 -0
  356. pysisyphus/wavefunction/normalization.py +36 -0
  357. pysisyphus/wavefunction/pop_analysis.py +134 -0
  358. pysisyphus/wavefunction/shells.py +1171 -0
  359. pysisyphus/wavefunction/wavefunction.py +504 -0
  360. pysisyphus/wrapper/__init__.py +11 -0
  361. pysisyphus/wrapper/exceptions.py +2 -0
  362. pysisyphus/wrapper/jmol.py +120 -0
  363. pysisyphus/wrapper/mwfn.py +169 -0
  364. pysisyphus/wrapper/packmol.py +71 -0
  365. pysisyphus/xyzloader.py +168 -0
  366. pysisyphus/yaml_mods.py +45 -0
  367. thermoanalysis/LICENSE +674 -0
  368. thermoanalysis/QCData.py +244 -0
  369. thermoanalysis/__init__.py +0 -0
  370. thermoanalysis/config.py +3 -0
  371. thermoanalysis/constants.py +20 -0
  372. thermoanalysis/thermo.py +1011 -0
mlmm/oniom_import.py ADDED
@@ -0,0 +1,457 @@
1
+ """Import ONIOM input (Gaussian/ORCA) and reconstruct XYZ + layered PDB.
2
+
3
+ Outputs:
4
+ - <out_prefix>.xyz
5
+ - <out_prefix>_layered.pdb
6
+
7
+ Layer encoding in output PDB B-factor:
8
+ - ML(QM): 0.00
9
+ - Movable MM: 10.00
10
+ - Frozen MM: 20.00
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from pathlib import Path
17
+ from typing import List, Optional, Sequence, Set, Tuple
18
+
19
+ import click
20
+ import numpy as np
21
+
22
+ from .defaults import BFACTOR_ML, BFACTOR_MOVABLE_MM, BFACTOR_FROZEN
23
+
24
+
25
+ _FLOAT_RE = r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?"
26
+ _G16_COORD_RE = re.compile(
27
+ rf"^\s*(\S+)\s+(-?\d+)\s+({_FLOAT_RE})\s+({_FLOAT_RE})\s+({_FLOAT_RE})\s+([HL])(?:\s+.*)?$"
28
+ )
29
+ _ORCA_XYZ_RE = re.compile(rf"^\s*([A-Za-z][A-Za-z]?)\s+({_FLOAT_RE})\s+({_FLOAT_RE})\s+({_FLOAT_RE})\s*$")
30
+ _SIX_INT_RE = re.compile(r"^\s*[-+]?\d+(?:\s+[-+]?\d+){5}\s*$")
31
+
32
+
33
+ def _normalize_element_symbol(sym: str) -> str:
34
+ s = re.sub(r"[^A-Za-z]", "", (sym or "").strip())
35
+ if not s:
36
+ return "X"
37
+ if len(s) == 1:
38
+ return s.upper()
39
+ return s[0].upper() + s[1].lower()
40
+
41
+
42
+ def _resolve_mode(mode: Optional[str], input_path: Path) -> str:
43
+ if mode is not None:
44
+ m = str(mode).strip().lower()
45
+ if m in {"g16", "orca"}:
46
+ return m
47
+ raise click.BadParameter("--mode must be one of: g16, orca")
48
+
49
+ suf = input_path.suffix.lower()
50
+ if suf in {".gjf", ".com"}:
51
+ return "g16"
52
+ if suf == ".inp":
53
+ return "orca"
54
+ raise click.ClickException(
55
+ f"Could not infer mode from '{input_path.name}'. Use --mode g16|orca "
56
+ "or input extension .gjf/.com/.inp."
57
+ )
58
+
59
+
60
+ def _parse_orca_index_set(raw: str) -> Set[int]:
61
+ """Parse ORCA compact index set (0-based), e.g. "{0:3 7 10:12}"."""
62
+ txt = (raw or "").strip()
63
+ if not txt:
64
+ return set()
65
+ if txt.startswith("{") and txt.endswith("}"):
66
+ txt = txt[1:-1].strip()
67
+ if not txt:
68
+ return set()
69
+
70
+ out: Set[int] = set()
71
+ for tok in txt.split():
72
+ if ":" in tok:
73
+ parts = tok.split(":", 1)
74
+ if len(parts) != 2:
75
+ raise ValueError(f"Invalid ORCA range token: '{tok}'")
76
+ a = int(parts[0])
77
+ b = int(parts[1])
78
+ if b < a:
79
+ raise ValueError(f"Invalid descending ORCA range token: '{tok}'")
80
+ out.update(range(a, b + 1))
81
+ else:
82
+ out.add(int(tok))
83
+ return out
84
+
85
+
86
+ def _parse_gaussian_oniom(path: Path) -> Tuple[np.ndarray, List[str], Set[int], Set[int], int, int]:
87
+ """Parse Gaussian ONIOM input produced by mlmm oniom-export.
88
+
89
+ Returns:
90
+ coords, elements, qm_indices, movable_indices, qm_charge, qm_mult
91
+ """
92
+ lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
93
+
94
+ charge_line_idx = None
95
+ for i, line in enumerate(lines):
96
+ if _SIX_INT_RE.match(line):
97
+ charge_line_idx = i
98
+ break
99
+ if charge_line_idx is None:
100
+ raise click.ClickException("Failed to locate Gaussian ONIOM charge/multiplicity line.")
101
+
102
+ parts = lines[charge_line_idx].split()
103
+ if len(parts) != 6:
104
+ raise click.ClickException("Invalid Gaussian ONIOM charge/multiplicity line.")
105
+
106
+ try:
107
+ qm_charge = int(parts[2])
108
+ qm_mult = int(parts[3])
109
+ except Exception as exc:
110
+ raise click.ClickException("Failed to parse Gaussian QM charge/multiplicity.") from exc
111
+
112
+ # Coordinate block starts after the charge line and optional blanks.
113
+ i = charge_line_idx + 1
114
+ while i < len(lines) and (not lines[i].strip()):
115
+ i += 1
116
+
117
+ coords: List[List[float]] = []
118
+ elems: List[str] = []
119
+ qm_indices: Set[int] = set()
120
+ movable_indices: Set[int] = set()
121
+
122
+ idx = 0
123
+ while i < len(lines):
124
+ line = lines[i]
125
+ if not line.strip():
126
+ break
127
+
128
+ m = _G16_COORD_RE.match(line)
129
+ if m is None:
130
+ # We intentionally support only mlmm-export style rows.
131
+ raise click.ClickException(
132
+ f"Unsupported Gaussian coordinate line format at line {i + 1}: {line!r}"
133
+ )
134
+
135
+ atom_token = m.group(1)
136
+ movable = int(m.group(2))
137
+ x = float(m.group(3))
138
+ y = float(m.group(4))
139
+ z = float(m.group(5))
140
+ layer = m.group(6)
141
+
142
+ elem_raw = atom_token.split("-", 1)[0]
143
+ elem = _normalize_element_symbol(elem_raw)
144
+
145
+ coords.append([x, y, z])
146
+ elems.append(elem)
147
+
148
+ if layer == "H":
149
+ qm_indices.add(idx)
150
+ movable_indices.add(idx)
151
+ else:
152
+ if movable == 0:
153
+ movable_indices.add(idx)
154
+
155
+ idx += 1
156
+ i += 1
157
+
158
+ if not coords:
159
+ raise click.ClickException("No coordinate rows found in Gaussian ONIOM input.")
160
+
161
+ return np.asarray(coords, dtype=float), elems, qm_indices, movable_indices, qm_charge, qm_mult
162
+
163
+
164
+ def _parse_orca_qmmm(path: Path) -> Tuple[np.ndarray, List[str], Set[int], Set[int], int, int]:
165
+ """Parse ORCA QM/MM input produced by mlmm oniom-export.
166
+
167
+ Returns:
168
+ coords, elements, qm_indices, movable_indices, qm_charge, qm_mult
169
+ """
170
+ lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
171
+
172
+ qmmm_start = None
173
+ qmmm_end = None
174
+ for i, line in enumerate(lines):
175
+ if re.match(r"^\s*%qmmm\b", line, flags=re.IGNORECASE):
176
+ qmmm_start = i
177
+ break
178
+ if qmmm_start is None:
179
+ raise click.ClickException("Failed to find %qmmm block in ORCA input.")
180
+
181
+ for i in range(qmmm_start + 1, len(lines)):
182
+ if re.match(r"^\s*end\s*$", lines[i], flags=re.IGNORECASE):
183
+ qmmm_end = i
184
+ break
185
+ if qmmm_end is None:
186
+ raise click.ClickException("Failed to find end of %qmmm block in ORCA input.")
187
+
188
+ qmmm_lines = lines[qmmm_start : qmmm_end + 1]
189
+ qm_indices: Optional[Set[int]] = None
190
+ active_indices: Optional[Set[int]] = None
191
+
192
+ qm_pat = re.compile(r"QMAtoms\s+(\{.*\})\s+end", flags=re.IGNORECASE)
193
+ act_pat = re.compile(r"ActiveAtoms\s+(\{.*\})\s+end", flags=re.IGNORECASE)
194
+
195
+ for raw in qmmm_lines:
196
+ m_qm = qm_pat.search(raw)
197
+ if m_qm:
198
+ qm_indices = _parse_orca_index_set(m_qm.group(1))
199
+ m_act = act_pat.search(raw)
200
+ if m_act:
201
+ active_indices = _parse_orca_index_set(m_act.group(1))
202
+
203
+ if qm_indices is None:
204
+ raise click.ClickException("Failed to parse QMAtoms from ORCA %qmmm block.")
205
+ if active_indices is None:
206
+ raise click.ClickException("Failed to parse ActiveAtoms from ORCA %qmmm block.")
207
+
208
+ xyz_start = None
209
+ qm_charge = 0
210
+ qm_mult = 1
211
+ xyz_header_re = re.compile(r"^\s*\*\s*xyz\s+([-+]?\d+)\s+([-+]?\d+)\s*$", flags=re.IGNORECASE)
212
+ for i, line in enumerate(lines):
213
+ m = xyz_header_re.match(line)
214
+ if m:
215
+ xyz_start = i
216
+ qm_charge = int(m.group(1))
217
+ qm_mult = int(m.group(2))
218
+ break
219
+ if xyz_start is None:
220
+ raise click.ClickException("Failed to find '* xyz <charge> <mult>' block in ORCA input.")
221
+
222
+ coords: List[List[float]] = []
223
+ elems: List[str] = []
224
+ i = xyz_start + 1
225
+ while i < len(lines):
226
+ line = lines[i]
227
+ if re.match(r"^\s*\*\s*$", line):
228
+ break
229
+ if not line.strip():
230
+ i += 1
231
+ continue
232
+
233
+ m = _ORCA_XYZ_RE.match(line)
234
+ if m is None:
235
+ raise click.ClickException(
236
+ f"Unsupported ORCA xyz line format at line {i + 1}: {line!r}"
237
+ )
238
+
239
+ elem = _normalize_element_symbol(m.group(1))
240
+ x = float(m.group(2))
241
+ y = float(m.group(3))
242
+ z = float(m.group(4))
243
+
244
+ elems.append(elem)
245
+ coords.append([x, y, z])
246
+ i += 1
247
+
248
+ if not coords:
249
+ raise click.ClickException("No coordinates found in ORCA xyz block.")
250
+
251
+ n_atoms = len(coords)
252
+ out_of_range = [i for i in qm_indices | active_indices if i < 0 or i >= n_atoms]
253
+ if out_of_range:
254
+ raise click.ClickException(
255
+ f"ORCA QMAtoms/ActiveAtoms contain out-of-range indices for {n_atoms} atoms."
256
+ )
257
+
258
+ movable_indices = set(active_indices)
259
+ movable_indices |= set(qm_indices)
260
+
261
+ return np.asarray(coords, dtype=float), elems, set(qm_indices), movable_indices, qm_charge, qm_mult
262
+
263
+
264
+ def _bfactor_for_atom(idx: int, qm_indices: Set[int], movable_indices: Set[int]) -> float:
265
+ if idx in qm_indices:
266
+ return float(BFACTOR_ML)
267
+ if idx in movable_indices:
268
+ return float(BFACTOR_MOVABLE_MM)
269
+ return float(BFACTOR_FROZEN)
270
+
271
+
272
+ def _write_xyz(path: Path, coords: np.ndarray, elements: Sequence[str], comment: str = "") -> None:
273
+ n_atoms = int(coords.shape[0])
274
+ lines = [str(n_atoms), comment]
275
+ for i in range(n_atoms):
276
+ e = _normalize_element_symbol(elements[i] if i < len(elements) else "X")
277
+ x, y, z = coords[i]
278
+ lines.append(f"{e:>2s} {x: .8f} {y: .8f} {z: .8f}")
279
+ path.write_text("\n".join(lines) + "\n", encoding="utf-8")
280
+
281
+
282
+ def _format_pdb_atom_line(
283
+ serial: int,
284
+ atom_name: str,
285
+ res_name: str,
286
+ chain_id: str,
287
+ res_seq: int,
288
+ x: float,
289
+ y: float,
290
+ z: float,
291
+ bfac: float,
292
+ element: str,
293
+ ) -> str:
294
+ an = (atom_name or "X")[:4]
295
+ rn = (res_name or "MOL")[:3]
296
+ ch = (chain_id or "A")[:1]
297
+ rs = int(res_seq)
298
+ el = _normalize_element_symbol(element)
299
+ return (
300
+ f"ATOM {serial:5d} {an:>4s} {rn:>3s} {ch:1s}{rs:4d} "
301
+ f"{x:8.3f}{y:8.3f}{z:8.3f}"
302
+ f"{1.00:6.2f}{bfac:6.2f} {el:>2s}\n"
303
+ )
304
+
305
+
306
+ def _patch_ref_pdb_line(line: str, x: float, y: float, z: float, bfac: float) -> str:
307
+ s = line.rstrip("\n")
308
+ if len(s) < 80:
309
+ s = s.ljust(80)
310
+ # columns: x[30:38], y[38:46], z[46:54], b[60:66]
311
+ s = s[:30] + f"{x:8.3f}{y:8.3f}{z:8.3f}" + s[54:60] + f"{bfac:6.2f}" + s[66:]
312
+ return s + "\n"
313
+
314
+
315
+ def _write_layered_pdb_without_ref(
316
+ path: Path,
317
+ coords: np.ndarray,
318
+ elements: Sequence[str],
319
+ qm_indices: Set[int],
320
+ movable_indices: Set[int],
321
+ ) -> None:
322
+ lines: List[str] = []
323
+ for i in range(int(coords.shape[0])):
324
+ x, y, z = coords[i]
325
+ elem = _normalize_element_symbol(elements[i] if i < len(elements) else "X")
326
+ atom_name = elem if len(elem) <= 2 else elem[:2]
327
+ bfac = _bfactor_for_atom(i, qm_indices, movable_indices)
328
+ lines.append(
329
+ _format_pdb_atom_line(
330
+ serial=i + 1,
331
+ atom_name=atom_name,
332
+ res_name="MOL",
333
+ chain_id="A",
334
+ res_seq=1,
335
+ x=float(x),
336
+ y=float(y),
337
+ z=float(z),
338
+ bfac=bfac,
339
+ element=elem,
340
+ )
341
+ )
342
+ lines.append("END\n")
343
+ path.write_text("".join(lines), encoding="utf-8")
344
+
345
+
346
+ def _write_layered_pdb_with_ref(
347
+ path: Path,
348
+ ref_pdb: Path,
349
+ coords: np.ndarray,
350
+ qm_indices: Set[int],
351
+ movable_indices: Set[int],
352
+ ) -> None:
353
+ ref_lines = ref_pdb.read_text(encoding="utf-8", errors="replace").splitlines(keepends=True)
354
+ atom_line_indices: List[int] = [
355
+ i for i, line in enumerate(ref_lines) if line.startswith(("ATOM ", "HETATM"))
356
+ ]
357
+
358
+ n_atoms = int(coords.shape[0])
359
+ if len(atom_line_indices) != n_atoms:
360
+ raise click.ClickException(
361
+ f"--ref-pdb atom count mismatch: ref has {len(atom_line_indices)} ATOM/HETATM rows, "
362
+ f"but ONIOM input has {n_atoms} atoms."
363
+ )
364
+
365
+ out_lines = list(ref_lines)
366
+ for idx, line_idx in enumerate(atom_line_indices):
367
+ x, y, z = coords[idx]
368
+ bfac = _bfactor_for_atom(idx, qm_indices, movable_indices)
369
+ out_lines[line_idx] = _patch_ref_pdb_line(out_lines[line_idx], float(x), float(y), float(z), bfac)
370
+
371
+ path.write_text("".join(out_lines), encoding="utf-8")
372
+
373
+
374
+ @click.command(
375
+ name="oniom-import",
376
+ help=(
377
+ "Import ONIOM input (Gaussian g16 or ORCA) and reconstruct XYZ + B-factor layered PDB."
378
+ ),
379
+ context_settings={"help_option_names": ["-h", "--help"]},
380
+ )
381
+ @click.option(
382
+ "-i",
383
+ "--input",
384
+ "input_path",
385
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
386
+ required=True,
387
+ help="Input ONIOM file (.gjf/.com for g16, .inp for ORCA).",
388
+ )
389
+ @click.option(
390
+ "--mode",
391
+ type=click.Choice(["g16", "orca"], case_sensitive=False),
392
+ default=None,
393
+ help="Input mode. If omitted, inferred from input suffix.",
394
+ )
395
+ @click.option(
396
+ "-o",
397
+ "--out-prefix",
398
+ "out_prefix",
399
+ type=click.Path(path_type=Path),
400
+ default=None,
401
+ help="Output prefix. Defaults to input stem in the current working directory.",
402
+ )
403
+ @click.option(
404
+ "--ref-pdb",
405
+ type=click.Path(path_type=Path, exists=True, dir_okay=False),
406
+ default=None,
407
+ help="Reference PDB to preserve atom naming/residue metadata (atom count must match).",
408
+ )
409
+ def cli(
410
+ input_path: Path,
411
+ mode: Optional[str],
412
+ out_prefix: Optional[Path],
413
+ ref_pdb: Optional[Path],
414
+ ) -> None:
415
+ mode_resolved = _resolve_mode(mode, input_path)
416
+
417
+ if out_prefix is None:
418
+ prefix = Path.cwd() / input_path.stem
419
+ else:
420
+ prefix = Path(out_prefix)
421
+ prefix = prefix.resolve()
422
+ prefix.parent.mkdir(parents=True, exist_ok=True)
423
+
424
+ if mode_resolved == "g16":
425
+ coords, elements, qm_indices, movable_indices, qm_charge, qm_mult = _parse_gaussian_oniom(input_path)
426
+ else:
427
+ coords, elements, qm_indices, movable_indices, qm_charge, qm_mult = _parse_orca_qmmm(input_path)
428
+
429
+ n_atoms = int(coords.shape[0])
430
+ if n_atoms <= 0:
431
+ raise click.ClickException("No atoms parsed from ONIOM input.")
432
+
433
+ xyz_path = prefix.with_suffix(".xyz")
434
+ pdb_path = prefix.parent / f"{prefix.name}_layered.pdb"
435
+
436
+ _write_xyz(
437
+ xyz_path,
438
+ coords,
439
+ elements,
440
+ comment=(
441
+ f"mode={mode_resolved} atoms={n_atoms} qm={len(qm_indices)} movable={len(movable_indices)} "
442
+ f"q={qm_charge} m={qm_mult}"
443
+ ),
444
+ )
445
+
446
+ if ref_pdb is not None:
447
+ _write_layered_pdb_with_ref(pdb_path, ref_pdb, coords, qm_indices, movable_indices)
448
+ else:
449
+ _write_layered_pdb_without_ref(pdb_path, coords, elements, qm_indices, movable_indices)
450
+
451
+ click.echo(f"[oniom-import] mode={mode_resolved}")
452
+ click.echo(
453
+ f"[oniom-import] atoms={n_atoms}, qm={len(qm_indices)}, movable={len(movable_indices)}, "
454
+ f"frozen={n_atoms - len(set(movable_indices) | set(qm_indices))}"
455
+ )
456
+ click.echo(f"[oniom-import] wrote: {xyz_path}")
457
+ click.echo(f"[oniom-import] wrote: {pdb_path}")