mlmm-toolkit 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. hessian_ff/__init__.py +50 -0
  2. hessian_ff/analytical_hessian.py +609 -0
  3. hessian_ff/constants.py +46 -0
  4. hessian_ff/forcefield.py +339 -0
  5. hessian_ff/loaders.py +608 -0
  6. hessian_ff/native/Makefile +8 -0
  7. hessian_ff/native/__init__.py +28 -0
  8. hessian_ff/native/analytical_hessian.py +88 -0
  9. hessian_ff/native/analytical_hessian_ext.cpp +258 -0
  10. hessian_ff/native/bonded.py +82 -0
  11. hessian_ff/native/bonded_ext.cpp +640 -0
  12. hessian_ff/native/loader.py +349 -0
  13. hessian_ff/native/nonbonded.py +118 -0
  14. hessian_ff/native/nonbonded_ext.cpp +1150 -0
  15. hessian_ff/prmtop_parmed.py +23 -0
  16. hessian_ff/system.py +107 -0
  17. hessian_ff/terms/__init__.py +14 -0
  18. hessian_ff/terms/angle.py +73 -0
  19. hessian_ff/terms/bond.py +44 -0
  20. hessian_ff/terms/cmap.py +406 -0
  21. hessian_ff/terms/dihedral.py +141 -0
  22. hessian_ff/terms/nonbonded.py +209 -0
  23. hessian_ff/tests/__init__.py +0 -0
  24. hessian_ff/tests/conftest.py +75 -0
  25. hessian_ff/tests/data/small/complex.parm7 +1346 -0
  26. hessian_ff/tests/data/small/complex.pdb +125 -0
  27. hessian_ff/tests/data/small/complex.rst7 +63 -0
  28. hessian_ff/tests/test_coords_input.py +44 -0
  29. hessian_ff/tests/test_energy_force.py +49 -0
  30. hessian_ff/tests/test_hessian.py +137 -0
  31. hessian_ff/tests/test_smoke.py +18 -0
  32. hessian_ff/tests/test_validation.py +40 -0
  33. hessian_ff/workflows.py +889 -0
  34. mlmm/__init__.py +36 -0
  35. mlmm/__main__.py +7 -0
  36. mlmm/_version.py +34 -0
  37. mlmm/add_elem_info.py +374 -0
  38. mlmm/advanced_help.py +91 -0
  39. mlmm/align_freeze_atoms.py +601 -0
  40. mlmm/all.py +3535 -0
  41. mlmm/bond_changes.py +231 -0
  42. mlmm/bool_compat.py +223 -0
  43. mlmm/cli.py +574 -0
  44. mlmm/cli_utils.py +166 -0
  45. mlmm/default_group.py +337 -0
  46. mlmm/defaults.py +467 -0
  47. mlmm/define_layer.py +526 -0
  48. mlmm/dft.py +1041 -0
  49. mlmm/energy_diagram.py +253 -0
  50. mlmm/extract.py +2213 -0
  51. mlmm/fix_altloc.py +464 -0
  52. mlmm/freq.py +1406 -0
  53. mlmm/harmonic_constraints.py +140 -0
  54. mlmm/hessian_cache.py +44 -0
  55. mlmm/hessian_calc.py +174 -0
  56. mlmm/irc.py +638 -0
  57. mlmm/mlmm_calc.py +2262 -0
  58. mlmm/mm_parm.py +945 -0
  59. mlmm/oniom_export.py +1983 -0
  60. mlmm/oniom_import.py +457 -0
  61. mlmm/opt.py +1742 -0
  62. mlmm/path_opt.py +1353 -0
  63. mlmm/path_search.py +2299 -0
  64. mlmm/preflight.py +88 -0
  65. mlmm/py.typed +1 -0
  66. mlmm/pysis_runner.py +45 -0
  67. mlmm/scan.py +1047 -0
  68. mlmm/scan2d.py +1226 -0
  69. mlmm/scan3d.py +1265 -0
  70. mlmm/scan_common.py +184 -0
  71. mlmm/summary_log.py +736 -0
  72. mlmm/trj2fig.py +448 -0
  73. mlmm/tsopt.py +2871 -0
  74. mlmm/utils.py +2309 -0
  75. mlmm/xtb_embedcharge_correction.py +475 -0
  76. mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
  77. mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
  78. mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
  79. mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
  80. mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
  81. mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
  82. pysisyphus/Geometry.py +1667 -0
  83. pysisyphus/LICENSE +674 -0
  84. pysisyphus/TableFormatter.py +63 -0
  85. pysisyphus/TablePrinter.py +74 -0
  86. pysisyphus/__init__.py +12 -0
  87. pysisyphus/calculators/AFIR.py +452 -0
  88. pysisyphus/calculators/AnaPot.py +20 -0
  89. pysisyphus/calculators/AnaPot2.py +48 -0
  90. pysisyphus/calculators/AnaPot3.py +12 -0
  91. pysisyphus/calculators/AnaPot4.py +20 -0
  92. pysisyphus/calculators/AnaPotBase.py +337 -0
  93. pysisyphus/calculators/AnaPotCBM.py +25 -0
  94. pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
  95. pysisyphus/calculators/CFOUR.py +250 -0
  96. pysisyphus/calculators/Calculator.py +844 -0
  97. pysisyphus/calculators/CerjanMiller.py +24 -0
  98. pysisyphus/calculators/Composite.py +123 -0
  99. pysisyphus/calculators/ConicalIntersection.py +171 -0
  100. pysisyphus/calculators/DFTBp.py +430 -0
  101. pysisyphus/calculators/DFTD3.py +66 -0
  102. pysisyphus/calculators/DFTD4.py +84 -0
  103. pysisyphus/calculators/Dalton.py +61 -0
  104. pysisyphus/calculators/Dimer.py +681 -0
  105. pysisyphus/calculators/Dummy.py +20 -0
  106. pysisyphus/calculators/EGO.py +76 -0
  107. pysisyphus/calculators/EnergyMin.py +224 -0
  108. pysisyphus/calculators/ExternalPotential.py +264 -0
  109. pysisyphus/calculators/FakeASE.py +35 -0
  110. pysisyphus/calculators/FourWellAnaPot.py +28 -0
  111. pysisyphus/calculators/FreeEndNEBPot.py +39 -0
  112. pysisyphus/calculators/Gaussian09.py +18 -0
  113. pysisyphus/calculators/Gaussian16.py +726 -0
  114. pysisyphus/calculators/HardSphere.py +159 -0
  115. pysisyphus/calculators/IDPPCalculator.py +49 -0
  116. pysisyphus/calculators/IPIClient.py +133 -0
  117. pysisyphus/calculators/IPIServer.py +234 -0
  118. pysisyphus/calculators/LEPSBase.py +24 -0
  119. pysisyphus/calculators/LEPSExpr.py +139 -0
  120. pysisyphus/calculators/LennardJones.py +80 -0
  121. pysisyphus/calculators/MOPAC.py +219 -0
  122. pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
  123. pysisyphus/calculators/MultiCalc.py +85 -0
  124. pysisyphus/calculators/NFK.py +45 -0
  125. pysisyphus/calculators/OBabel.py +87 -0
  126. pysisyphus/calculators/ONIOMv2.py +1129 -0
  127. pysisyphus/calculators/ORCA.py +893 -0
  128. pysisyphus/calculators/ORCA5.py +6 -0
  129. pysisyphus/calculators/OpenMM.py +88 -0
  130. pysisyphus/calculators/OpenMolcas.py +281 -0
  131. pysisyphus/calculators/OverlapCalculator.py +908 -0
  132. pysisyphus/calculators/Psi4.py +218 -0
  133. pysisyphus/calculators/PyPsi4.py +37 -0
  134. pysisyphus/calculators/PySCF.py +341 -0
  135. pysisyphus/calculators/PyXTB.py +73 -0
  136. pysisyphus/calculators/QCEngine.py +106 -0
  137. pysisyphus/calculators/Rastrigin.py +22 -0
  138. pysisyphus/calculators/Remote.py +76 -0
  139. pysisyphus/calculators/Rosenbrock.py +15 -0
  140. pysisyphus/calculators/SocketCalc.py +97 -0
  141. pysisyphus/calculators/TIP3P.py +111 -0
  142. pysisyphus/calculators/TransTorque.py +161 -0
  143. pysisyphus/calculators/Turbomole.py +965 -0
  144. pysisyphus/calculators/VRIPot.py +37 -0
  145. pysisyphus/calculators/WFOWrapper.py +333 -0
  146. pysisyphus/calculators/WFOWrapper2.py +341 -0
  147. pysisyphus/calculators/XTB.py +418 -0
  148. pysisyphus/calculators/__init__.py +81 -0
  149. pysisyphus/calculators/cosmo_data.py +139 -0
  150. pysisyphus/calculators/parser.py +150 -0
  151. pysisyphus/color.py +19 -0
  152. pysisyphus/config.py +133 -0
  153. pysisyphus/constants.py +65 -0
  154. pysisyphus/cos/AdaptiveNEB.py +230 -0
  155. pysisyphus/cos/ChainOfStates.py +725 -0
  156. pysisyphus/cos/FreeEndNEB.py +25 -0
  157. pysisyphus/cos/FreezingString.py +103 -0
  158. pysisyphus/cos/GrowingChainOfStates.py +71 -0
  159. pysisyphus/cos/GrowingNT.py +309 -0
  160. pysisyphus/cos/GrowingString.py +508 -0
  161. pysisyphus/cos/NEB.py +189 -0
  162. pysisyphus/cos/SimpleZTS.py +64 -0
  163. pysisyphus/cos/__init__.py +22 -0
  164. pysisyphus/cos/stiffness.py +199 -0
  165. pysisyphus/drivers/__init__.py +17 -0
  166. pysisyphus/drivers/afir.py +855 -0
  167. pysisyphus/drivers/barriers.py +271 -0
  168. pysisyphus/drivers/birkholz.py +138 -0
  169. pysisyphus/drivers/cluster.py +318 -0
  170. pysisyphus/drivers/diabatization.py +133 -0
  171. pysisyphus/drivers/merge.py +368 -0
  172. pysisyphus/drivers/merge_mol2.py +322 -0
  173. pysisyphus/drivers/opt.py +375 -0
  174. pysisyphus/drivers/perf.py +91 -0
  175. pysisyphus/drivers/pka.py +52 -0
  176. pysisyphus/drivers/precon_pos_rot.py +669 -0
  177. pysisyphus/drivers/rates.py +480 -0
  178. pysisyphus/drivers/replace.py +219 -0
  179. pysisyphus/drivers/scan.py +212 -0
  180. pysisyphus/drivers/spectrum.py +166 -0
  181. pysisyphus/drivers/thermo.py +31 -0
  182. pysisyphus/dynamics/Gaussian.py +103 -0
  183. pysisyphus/dynamics/__init__.py +20 -0
  184. pysisyphus/dynamics/colvars.py +136 -0
  185. pysisyphus/dynamics/driver.py +297 -0
  186. pysisyphus/dynamics/helpers.py +256 -0
  187. pysisyphus/dynamics/lincs.py +105 -0
  188. pysisyphus/dynamics/mdp.py +364 -0
  189. pysisyphus/dynamics/rattle.py +121 -0
  190. pysisyphus/dynamics/thermostats.py +128 -0
  191. pysisyphus/dynamics/wigner.py +266 -0
  192. pysisyphus/elem_data.py +3473 -0
  193. pysisyphus/exceptions.py +2 -0
  194. pysisyphus/filtertrj.py +69 -0
  195. pysisyphus/helpers.py +623 -0
  196. pysisyphus/helpers_pure.py +649 -0
  197. pysisyphus/init_logging.py +50 -0
  198. pysisyphus/intcoords/Bend.py +69 -0
  199. pysisyphus/intcoords/Bend2.py +25 -0
  200. pysisyphus/intcoords/BondedFragment.py +32 -0
  201. pysisyphus/intcoords/Cartesian.py +41 -0
  202. pysisyphus/intcoords/CartesianCoords.py +140 -0
  203. pysisyphus/intcoords/Coords.py +56 -0
  204. pysisyphus/intcoords/DLC.py +197 -0
  205. pysisyphus/intcoords/DistanceFunction.py +34 -0
  206. pysisyphus/intcoords/DummyImproper.py +70 -0
  207. pysisyphus/intcoords/DummyTorsion.py +72 -0
  208. pysisyphus/intcoords/LinearBend.py +105 -0
  209. pysisyphus/intcoords/LinearDisplacement.py +80 -0
  210. pysisyphus/intcoords/OutOfPlane.py +59 -0
  211. pysisyphus/intcoords/PrimTypes.py +286 -0
  212. pysisyphus/intcoords/Primitive.py +137 -0
  213. pysisyphus/intcoords/RedundantCoords.py +659 -0
  214. pysisyphus/intcoords/RobustTorsion.py +59 -0
  215. pysisyphus/intcoords/Rotation.py +147 -0
  216. pysisyphus/intcoords/Stretch.py +31 -0
  217. pysisyphus/intcoords/Torsion.py +101 -0
  218. pysisyphus/intcoords/Torsion2.py +25 -0
  219. pysisyphus/intcoords/Translation.py +45 -0
  220. pysisyphus/intcoords/__init__.py +61 -0
  221. pysisyphus/intcoords/augment_bonds.py +126 -0
  222. pysisyphus/intcoords/derivatives.py +10512 -0
  223. pysisyphus/intcoords/eval.py +80 -0
  224. pysisyphus/intcoords/exceptions.py +37 -0
  225. pysisyphus/intcoords/findiffs.py +48 -0
  226. pysisyphus/intcoords/generate_derivatives.py +414 -0
  227. pysisyphus/intcoords/helpers.py +235 -0
  228. pysisyphus/intcoords/logging_conf.py +10 -0
  229. pysisyphus/intcoords/mp_derivatives.py +10836 -0
  230. pysisyphus/intcoords/setup.py +962 -0
  231. pysisyphus/intcoords/setup_fast.py +176 -0
  232. pysisyphus/intcoords/update.py +272 -0
  233. pysisyphus/intcoords/valid.py +89 -0
  234. pysisyphus/interpolate/Geodesic.py +93 -0
  235. pysisyphus/interpolate/IDPP.py +55 -0
  236. pysisyphus/interpolate/Interpolator.py +116 -0
  237. pysisyphus/interpolate/LST.py +70 -0
  238. pysisyphus/interpolate/Redund.py +152 -0
  239. pysisyphus/interpolate/__init__.py +9 -0
  240. pysisyphus/interpolate/helpers.py +34 -0
  241. pysisyphus/io/__init__.py +22 -0
  242. pysisyphus/io/aomix.py +178 -0
  243. pysisyphus/io/cjson.py +24 -0
  244. pysisyphus/io/crd.py +101 -0
  245. pysisyphus/io/cube.py +220 -0
  246. pysisyphus/io/fchk.py +184 -0
  247. pysisyphus/io/hdf5.py +49 -0
  248. pysisyphus/io/hessian.py +72 -0
  249. pysisyphus/io/mol2.py +146 -0
  250. pysisyphus/io/molden.py +293 -0
  251. pysisyphus/io/orca.py +189 -0
  252. pysisyphus/io/pdb.py +269 -0
  253. pysisyphus/io/psf.py +79 -0
  254. pysisyphus/io/pubchem.py +31 -0
  255. pysisyphus/io/qcschema.py +34 -0
  256. pysisyphus/io/sdf.py +29 -0
  257. pysisyphus/io/xyz.py +61 -0
  258. pysisyphus/io/zmat.py +175 -0
  259. pysisyphus/irc/DWI.py +108 -0
  260. pysisyphus/irc/DampedVelocityVerlet.py +134 -0
  261. pysisyphus/irc/Euler.py +22 -0
  262. pysisyphus/irc/EulerPC.py +345 -0
  263. pysisyphus/irc/GonzalezSchlegel.py +187 -0
  264. pysisyphus/irc/IMKMod.py +164 -0
  265. pysisyphus/irc/IRC.py +878 -0
  266. pysisyphus/irc/IRCDummy.py +10 -0
  267. pysisyphus/irc/Instanton.py +307 -0
  268. pysisyphus/irc/LQA.py +53 -0
  269. pysisyphus/irc/ModeKill.py +136 -0
  270. pysisyphus/irc/ParamPlot.py +53 -0
  271. pysisyphus/irc/RK4.py +36 -0
  272. pysisyphus/irc/__init__.py +31 -0
  273. pysisyphus/irc/initial_displ.py +219 -0
  274. pysisyphus/linalg.py +411 -0
  275. pysisyphus/line_searches/Backtracking.py +88 -0
  276. pysisyphus/line_searches/HagerZhang.py +184 -0
  277. pysisyphus/line_searches/LineSearch.py +232 -0
  278. pysisyphus/line_searches/StrongWolfe.py +108 -0
  279. pysisyphus/line_searches/__init__.py +9 -0
  280. pysisyphus/line_searches/interpol.py +15 -0
  281. pysisyphus/modefollow/NormalMode.py +40 -0
  282. pysisyphus/modefollow/__init__.py +10 -0
  283. pysisyphus/modefollow/davidson.py +199 -0
  284. pysisyphus/modefollow/lanczos.py +95 -0
  285. pysisyphus/optimizers/BFGS.py +99 -0
  286. pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
  287. pysisyphus/optimizers/ConjugateGradient.py +98 -0
  288. pysisyphus/optimizers/CubicNewton.py +75 -0
  289. pysisyphus/optimizers/FIRE.py +113 -0
  290. pysisyphus/optimizers/HessianOptimizer.py +1176 -0
  291. pysisyphus/optimizers/LBFGS.py +228 -0
  292. pysisyphus/optimizers/LayerOpt.py +411 -0
  293. pysisyphus/optimizers/MicroOptimizer.py +169 -0
  294. pysisyphus/optimizers/NCOptimizer.py +90 -0
  295. pysisyphus/optimizers/Optimizer.py +1084 -0
  296. pysisyphus/optimizers/PreconLBFGS.py +260 -0
  297. pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
  298. pysisyphus/optimizers/QuickMin.py +74 -0
  299. pysisyphus/optimizers/RFOptimizer.py +181 -0
  300. pysisyphus/optimizers/RSA.py +99 -0
  301. pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
  302. pysisyphus/optimizers/SteepestDescent.py +23 -0
  303. pysisyphus/optimizers/StringOptimizer.py +173 -0
  304. pysisyphus/optimizers/__init__.py +41 -0
  305. pysisyphus/optimizers/closures.py +301 -0
  306. pysisyphus/optimizers/cls_map.py +58 -0
  307. pysisyphus/optimizers/exceptions.py +6 -0
  308. pysisyphus/optimizers/gdiis.py +280 -0
  309. pysisyphus/optimizers/guess_hessians.py +311 -0
  310. pysisyphus/optimizers/hessian_updates.py +355 -0
  311. pysisyphus/optimizers/poly_fit.py +285 -0
  312. pysisyphus/optimizers/precon.py +153 -0
  313. pysisyphus/optimizers/restrict_step.py +24 -0
  314. pysisyphus/pack.py +172 -0
  315. pysisyphus/peakdetect.py +948 -0
  316. pysisyphus/plot.py +1031 -0
  317. pysisyphus/run.py +2106 -0
  318. pysisyphus/socket_helper.py +74 -0
  319. pysisyphus/stocastic/FragmentKick.py +132 -0
  320. pysisyphus/stocastic/Kick.py +81 -0
  321. pysisyphus/stocastic/Pipeline.py +303 -0
  322. pysisyphus/stocastic/__init__.py +21 -0
  323. pysisyphus/stocastic/align.py +127 -0
  324. pysisyphus/testing.py +96 -0
  325. pysisyphus/thermo.py +156 -0
  326. pysisyphus/trj.py +824 -0
  327. pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
  328. pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
  329. pysisyphus/tsoptimizers/TRIM.py +59 -0
  330. pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
  331. pysisyphus/tsoptimizers/__init__.py +23 -0
  332. pysisyphus/wavefunction/Basis.py +239 -0
  333. pysisyphus/wavefunction/DIIS.py +76 -0
  334. pysisyphus/wavefunction/__init__.py +25 -0
  335. pysisyphus/wavefunction/build_ext.py +42 -0
  336. pysisyphus/wavefunction/cart2sph.py +190 -0
  337. pysisyphus/wavefunction/diabatization.py +304 -0
  338. pysisyphus/wavefunction/excited_states.py +435 -0
  339. pysisyphus/wavefunction/gen_ints.py +1811 -0
  340. pysisyphus/wavefunction/helpers.py +104 -0
  341. pysisyphus/wavefunction/ints/__init__.py +0 -0
  342. pysisyphus/wavefunction/ints/boys.py +193 -0
  343. pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
  344. pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
  345. pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
  346. pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
  347. pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
  348. pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
  349. pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
  350. pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
  351. pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
  352. pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
  353. pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
  354. pysisyphus/wavefunction/localization.py +458 -0
  355. pysisyphus/wavefunction/multipole.py +159 -0
  356. pysisyphus/wavefunction/normalization.py +36 -0
  357. pysisyphus/wavefunction/pop_analysis.py +134 -0
  358. pysisyphus/wavefunction/shells.py +1171 -0
  359. pysisyphus/wavefunction/wavefunction.py +504 -0
  360. pysisyphus/wrapper/__init__.py +11 -0
  361. pysisyphus/wrapper/exceptions.py +2 -0
  362. pysisyphus/wrapper/jmol.py +120 -0
  363. pysisyphus/wrapper/mwfn.py +169 -0
  364. pysisyphus/wrapper/packmol.py +71 -0
  365. pysisyphus/xyzloader.py +168 -0
  366. pysisyphus/yaml_mods.py +45 -0
  367. thermoanalysis/LICENSE +674 -0
  368. thermoanalysis/QCData.py +244 -0
  369. thermoanalysis/__init__.py +0 -0
  370. thermoanalysis/config.py +3 -0
  371. thermoanalysis/constants.py +20 -0
  372. thermoanalysis/thermo.py +1011 -0
mlmm/mm_parm.py ADDED
@@ -0,0 +1,945 @@
1
+ # mlmm/mm_parm.py
2
+
3
+ """
4
+ AmberTools prmtop/rst7 builder with automatic GAFF2 ligand parameterization.
5
+
6
+ Example:
7
+ mlmm mm-parm -i input.pdb --out-prefix complex -l "GPP=-3"
8
+
9
+ For detailed documentation, see: docs/mm_parm.md
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import logging
15
+ import os
16
+ import re
17
+ import shutil
18
+ import subprocess
19
+ import sys
20
+ import tempfile
21
+ from dataclasses import dataclass
22
+ from pathlib import Path
23
+ from typing import Dict, List, Optional, Set, Tuple
24
+
25
+ import click
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+ # ===================== User dictionaries & constants =====================
30
+
31
+ AMINO_ACIDS: Dict[str, int] = {
32
+ # --- Standard 20 (L) ---
33
+ "ALA": 0, "ARG": +1, "ASN": 0, "ASP": -1, "CYS": 0,
34
+ "GLU": -1, "GLN": 0, "GLY": 0, "HIS": 0, "ILE": 0,
35
+ "LEU": 0, "LYS": +1, "MET": 0, "PHE": 0, "PRO": 0,
36
+ "SER": 0, "THR": 0, "TRP": 0, "TYR": 0, "VAL": 0,
37
+
38
+ # --- Canonical extras ---
39
+ "SEC": 0, # selenocysteine
40
+ "PYL": +1, # pyrrolysine
41
+
42
+ # --- Protonation / tautomers (Amber/CHARMM style) ---
43
+ "HIP": +1, # fully protonated His
44
+ "HID": 0, # Nδ-protonated His
45
+ "HIE": 0, # Nε-protonated His
46
+ "ASH": 0, # neutral Asp
47
+ "GLH": 0, # neutral Glu
48
+ "LYN": 0, # neutral Lys
49
+ "ARN": 0, # neutral Arg
50
+ "TYM": -1, # deprotonated Tyr (phenolate)
51
+
52
+ # --- Phosphorylated residues ---
53
+ "SEP": -2, "TPO": -2, "PTR": -2,
54
+
55
+ # --- Cys family ---
56
+ "CYX": 0, # disulfide Cys
57
+ "CSO": 0, # Cys sulfenic acid
58
+ "CSD": -1, # Cys sulfinic acid
59
+ "CSX": 0, # generic Cys derivative
60
+ "OCS": -1, # cysteic acid
61
+ "CYM": -1, # deprotonated Cys
62
+
63
+ # --- Lys variants / carboxylation ---
64
+ "MLY": +1, "LLP": +1, "DLY": +1,
65
+ "KCX": -1, # Lysine Nz-Carboxylic Acid
66
+
67
+ # --- D isomers (19 residues) ---
68
+ "DAL": 0, "DAR": +1, "DSG": 0, "DAS": -1, "DCY": 0,
69
+ "DGN": 0, "DGL": -1, "DHI": 0, "DIL": 0, "DLE": 0,
70
+ "DLY": +1, "MED": 0, "DPN": 0, "DPR": 0, "DSN": 0,
71
+ "DTH": 0, "DTR": 0, "DTY": 0, "DVA": 0,
72
+
73
+ # --- Carboxylation / cyclization / others ---
74
+ "CGU": -2, # gamma-carboxy-glutamate
75
+ "CGA": -1, # carboxymethylated glutamate
76
+ "PCA": 0, # pyroglutamate
77
+ "MSE": 0, # selenomethionine
78
+ "OMT": 0, # methionine sulfone
79
+
80
+ # --- Other modified residues possibly encountered ---
81
+ "ASA": 0, "CIR": 0, "FOR": 0, "MVA": 0, "IIL": 0, "AIB": 0, "HTN": 0,
82
+ "SAR": 0, "NMC": 0, "PFF": 0, "NFA": 0, "ALY": 0, "AZF": 0, "CNX": 0, "CYF": 0,
83
+
84
+ # --- Hydroxyproline ---
85
+ "HYP": 0,
86
+
87
+ # --- All C-terminus forms ---
88
+ "CALA": -1, "CARG": 0, "CASN": -1, "CASP": -2, "CCYS": -1,
89
+ "CCYX": -1, "CGLN": -1, "CGLU": -2, "CGLY": -1, "CHID": -1,
90
+ "CHIE": -1, "CHIP": 0, "CHYP": -1, "CILE": -1, "CLEU": -1,
91
+ "CLYS": 0, "CMET": -1, "CPHE": -1, "CPRO": -1, "CSER": -1,
92
+ "CTHR": -1, "CTRP": -1, "CTYR": -1, "CVAL": -1, "NHE": 0,
93
+ "NME": 0,
94
+ "CTER": -1, # generic C-terminus
95
+
96
+ # --- All N-terminus forms ---
97
+ "NALA": +1, "NARG": +2, "NASN": +1, "NASP": 0, "NCYS": +1,
98
+ "NCYX": +1, "NGLN": +1, "NGLU": 0, "NGLY": +1, "NHID": +1,
99
+ "NHIE": +1, "NHIP": +2, "NILE": +1, "NLEU": +1, "NLYS": +2,
100
+ "NMET": +1, "NPHE": +1, "NPRO": +1, "NSER": +1, "NTHR": +1,
101
+ "NTRP": +1, "NTYR": +1, "NVAL": +1, "ACE": 0,
102
+ "NTER": +1, # generic N-terminus
103
+ }
104
+
105
+ # Common ions (by residue name) and their formal charges
106
+ ION: Dict[str, int] = {
107
+ # +1
108
+ "LI": +1, "NA": +1, "K": +1, "RB": +1, "CS": +1, "TL": +1, "AG": +1, "CU1": +1,
109
+ "Ag": +1, "K+": +1, "Na+": +1, "NH4": +1, "H3O+": +1, "HE+": +1, "HZ+": +1, "Tl": +1,
110
+
111
+ # +2
112
+ "MG": +2, "CA": +2, "SR": +2, "BA": +2, "MN": +2, "FE2": +2, "CO": +2, "NI": +2,
113
+ "CU": +2, "ZN": +2, "CD": +2, "HG": +2, "PB": +2, "Be": +2, "PD": +2, "PT": +2,
114
+ "Sn": +2, "Ra": +2, "YB2": +2, "V2+": +2,
115
+
116
+ # +3
117
+ "FE": +3, "AU3": +3, "AL": +3, "GA": +3, "IN": +3,
118
+ "CE": +3, "Ce": +3, "CR": +3, "Cr": +3, "Dy": +3, "EU": +3, "EU3": +3, "Er": +3,
119
+ "GD3": +3, "LA": +3, "LU": +3, "Nd": +3, "PR": +3, "SM": +3, "Sm": +3, "TB": +3,
120
+ "Tm": +3, "Y": +3, "Pu": +3,
121
+
122
+ # +4
123
+ "U4+": +4, "Th": +4, "Hf": +4, "Zr": +4,
124
+
125
+ # -1
126
+ "F": -1, "CL": -1, "BR": -1, "I": -1, "Cl-": -1, "IOD": -1,
127
+ }
128
+
129
+ # Water residue names considered as "water"
130
+ WATER_RES = {"HOH", "WAT", "H2O", "DOD", "TIP", "TIP3", "SOL"}
131
+
132
+ # Distance cutoff (Å) for disulfide detection (SG–SG)
133
+ DISULFIDE_CUTOFF = 2.3 # Å
134
+
135
+ # Hint message printed when the build fails
136
+ HINT_MESSAGE = (
137
+ "[HINT] When the build fails, please check:\n"
138
+ " - TER records are present between protein chains in the input PDB.\n"
139
+ " - Ligand formal charges and spin multiplicities (defaults: 0 and 1) are set correctly via --ligand-charge/--ligand-mult.\n"
140
+ " - Hydrogens have been correctly added to the ligand (e.g. with --add-h/--ph).\n"
141
+ )
142
+
143
+ # ff19SB/OPC3 + nucleic/lipid/GLYCAM + GAFF2
144
+ LEAPRC_LINES = [
145
+ "source leaprc.protein.ff19SB",
146
+ "source leaprc.phosaa19SB",
147
+ "source leaprc.protein.ff19SB_modAA",
148
+ "source leaprc.lipid21",
149
+ "source leaprc.RNA.OL3",
150
+ "source leaprc.DNA.OL21",
151
+ "source leaprc.GLYCAM_06j-1",
152
+ "source leaprc.water.opc3",
153
+ "source leaprc.gaff2",
154
+ "loadamberparams frcmod.ionslm_126_opc3",
155
+ ]
156
+
157
+ # AmberTools leaprc set for ff14SB/TIP3P
158
+ LEAPRC_LINES_OLD = [
159
+ "source leaprc.protein.ff14SB",
160
+ "source leaprc.phosaa14SB",
161
+ "source leaprc.protein.ff14SB_modAA",
162
+ "source leaprc.lipid21",
163
+ "source leaprc.RNA.OL3",
164
+ "source leaprc.DNA.OL21",
165
+ "source leaprc.GLYCAM_06j-1",
166
+ "source leaprc.water.tip3p",
167
+ "source leaprc.gaff2",
168
+ "loadamberparams frcmod.ionsjc_tip3p",
169
+ "loadamberparams frcmod.ions1lm_126_tip3p",
170
+ "loadamberparams frcmod.ions234lm_126_tip3p",
171
+ ]
172
+
173
+ # ===================== Utilities =====================
174
+
175
+
176
+ def which(cmd: str) -> Optional[str]:
177
+ """Return the path if command exists in PATH; otherwise None."""
178
+ return shutil.which(cmd)
179
+
180
+
181
+ _AMBERTOOLS_REQUIRED_COMMANDS: Tuple[str, ...] = ("tleap", "antechamber", "parmchk2")
182
+
183
+
184
+ def ambertools_command_paths() -> Dict[str, Optional[str]]:
185
+ """Return resolved paths for required AmberTools executables."""
186
+ return {cmd: which(cmd) for cmd in _AMBERTOOLS_REQUIRED_COMMANDS}
187
+
188
+
189
+ def missing_ambertools_commands(paths: Optional[Dict[str, Optional[str]]] = None) -> List[str]:
190
+ """Return required AmberTools commands that are missing from PATH."""
191
+ resolved = paths if paths is not None else ambertools_command_paths()
192
+ return [cmd for cmd in _AMBERTOOLS_REQUIRED_COMMANDS if not resolved.get(cmd)]
193
+
194
+
195
+ def ambertools_available() -> bool:
196
+ """Return True if tleap, antechamber and parmchk2 are available on PATH."""
197
+ return not missing_ambertools_commands()
198
+
199
+
200
+ def run(cmd: List[str], cwd: Optional[Path] = None, logfile: Optional[Path] = None) -> int:
201
+ """Run a subprocess, capture stdout+stderr into a log file, and return the return code."""
202
+ with subprocess.Popen(
203
+ cmd,
204
+ cwd=str(cwd) if cwd else None,
205
+ stdout=subprocess.PIPE,
206
+ stderr=subprocess.STDOUT,
207
+ text=True,
208
+ bufsize=1,
209
+ ) as p:
210
+ lines: List[str] = []
211
+ for line in p.stdout: # type: ignore
212
+ lines.append(line)
213
+ rc = p.wait()
214
+ if logfile:
215
+ logfile.write_text("".join(lines), encoding="utf-8", errors="ignore")
216
+ return rc
217
+
218
+
219
+ def parse_ligand_charge(expr: Optional[str]) -> Dict[str, int]:
220
+ """
221
+ Parse '--ligand-charge' string into a dict.
222
+ Accepts 'GPP=-3,MMT=-1' OR 'GPP:-3,MMT:-1' → {'GPP': -3, 'MMT': -1}.
223
+ """
224
+ if not expr:
225
+ return {}
226
+ out: Dict[str, int] = {}
227
+ for tok in expr.split(","):
228
+ tok = tok.strip()
229
+ if not tok:
230
+ continue
231
+ if "=" in tok:
232
+ k, v = tok.split("=", 1)
233
+ elif ":" in tok:
234
+ k, v = tok.split(":", 1)
235
+ else:
236
+ raise ValueError(f"Invalid format in --ligand-charge: {tok} (use RES=Q or RES:Q)")
237
+ out[k.strip()] = int(v.strip())
238
+ return out
239
+
240
+
241
+ def parse_ligand_mult(expr: Optional[str]) -> Dict[str, int]:
242
+ """
243
+ Parse '--ligand-mult' string into a dict of spin multiplicities.
244
+ Accepts 'HEM=1,NO=2' OR 'HEM:1,NO:2' → {'HEM': 1, 'NO': 2}.
245
+ """
246
+ if not expr:
247
+ return {}
248
+ out: Dict[str, int] = {}
249
+ for tok in expr.split(","):
250
+ tok = tok.strip()
251
+ if not tok:
252
+ continue
253
+ if "=" in tok:
254
+ k, v = tok.split("=", 1)
255
+ elif ":" in tok:
256
+ k, v = tok.split(":", 1)
257
+ else:
258
+ raise ValueError(f"Invalid format in --ligand-mult: {tok} (use RES=M or RES:M)")
259
+ out[k.strip()] = int(v.strip())
260
+ return out
261
+
262
+
263
+ def copy_pdb_no_fix(pdb_path: Path, tmpdir: Path) -> Path:
264
+ """Copy the input PDB verbatim to tmpdir/fixed.pdb (no structural fixing)."""
265
+ fixed_pdb = tmpdir / "fixed.pdb"
266
+ shutil.copy2(pdb_path, fixed_pdb)
267
+ return fixed_pdb
268
+
269
+
270
+ def add_hydrogens_with_pdbfixer(pdb_in: Path, pdb_out: Path, ph: float) -> None:
271
+ """
272
+ Add hydrogens at the specified pH using PDBFixer, without adding missing heavy atoms/residues.
273
+ """
274
+ try:
275
+ from pdbfixer import PDBFixer
276
+ from pdbfixer import pdbfixer as _pdbfixer_mod
277
+ except Exception as e:
278
+ raise RuntimeError(
279
+ "PDBFixer is required to use --add-h True, but it was not found."
280
+ ) from e
281
+
282
+ pdbfile_writer = getattr(getattr(_pdbfixer_mod, "app", None), "PDBFile", None)
283
+ if pdbfile_writer is None:
284
+ raise RuntimeError(
285
+ "PDBFixer installation is incomplete: could not access PDB writer."
286
+ )
287
+
288
+ fixer = PDBFixer(filename=str(pdb_in))
289
+ fixer.addMissingHydrogens(pH=ph) # only Hs
290
+ with open(pdb_out, "w") as f:
291
+ pdbfile_writer.writeFile(fixer.topology, fixer.positions, f, keepIds=True)
292
+
293
+
294
+ def detect_disulfides_from_pdb(
295
+ pdb_path: Path,
296
+ cutoff: float = DISULFIDE_CUTOFF,
297
+ ) -> List[Tuple[Tuple[str, int], Tuple[str, int]]]:
298
+ """
299
+ Extract SG (or S) atoms from CYS/CYM/CYX in a PDB and return residue-pairs
300
+ with SG–SG distance ≤ cutoff Å.
301
+ Return format: [((chainID, resSeq), (chainID, resSeq)), ...]
302
+ """
303
+ sg_sites: List[Tuple[str, int, float, float, float]] = []
304
+ with open(pdb_path, "r") as f:
305
+ for line in f:
306
+ if not (line.startswith("ATOM") or line.startswith("HETATM")):
307
+ continue
308
+ resname = line[17:20].strip()
309
+ # Restrict disulfide detection to residues defined in AMINO_ACIDS.
310
+ if resname not in AMINO_ACIDS:
311
+ continue
312
+ if resname not in {"CYS", "CYM", "CYX"}:
313
+ continue
314
+ atom_name = line[12:16].strip()
315
+ if atom_name not in {"SG", "S"}:
316
+ continue
317
+ # altLoc: only blank or 'A'
318
+ altloc = line[16].strip()
319
+ if altloc not in ("", "A"):
320
+ continue
321
+ chain = line[21]
322
+ resseq_field = line[22:26]
323
+ try:
324
+ resseq = int(resseq_field)
325
+ except Exception:
326
+ try:
327
+ resseq = int(resseq_field.strip())
328
+ except Exception:
329
+ continue
330
+ try:
331
+ x = float(line[30:38])
332
+ y = float(line[38:46])
333
+ z = float(line[46:54])
334
+ except Exception:
335
+ continue
336
+ sg_sites.append((chain, resseq, x, y, z))
337
+ pairs: List[Tuple[Tuple[str, int], Tuple[str, int]]] = []
338
+ for i in range(len(sg_sites)):
339
+ ci, ri, xi, yi, zi = sg_sites[i]
340
+ for j in range(i + 1, len(sg_sites)):
341
+ cj, rj, xj, yj, zj = sg_sites[j]
342
+ dx = xi - xj
343
+ dy = yi - yj
344
+ dz = zi - zj
345
+ dist = (dx * dx + dy * dy + dz * dz) ** 0.5
346
+ if dist <= cutoff:
347
+ pairs.append(((ci, ri), (cj, rj)))
348
+ return pairs
349
+
350
+
351
+ def build_leap_residue_index(pdb_path: Path) -> Dict[Tuple[str, str], int]:
352
+ """
353
+ Build a mapping (chainID, resSeq as 4-char string) → LEaP 1-based residue index
354
+ by scanning the PDB in order. LEaP numbers residues by appearance order, which can
355
+ differ from RESSEQ integers; this avoids mismatches when bonding.
356
+ """
357
+ mapping: Dict[Tuple[str, str], int] = {}
358
+ seen: Set[Tuple[str, str]] = set()
359
+ idx = 0
360
+ with open(pdb_path, "r") as f:
361
+ for line in f:
362
+ if not (line.startswith("ATOM") or line.startswith("HETATM")):
363
+ continue
364
+ chain = line[21]
365
+ resseq = line[22:26] # 4-character, right-justified
366
+ key = (chain, resseq)
367
+ if key not in seen:
368
+ idx += 1
369
+ seen.add(key)
370
+ mapping[key] = idx
371
+ return mapping
372
+
373
+
374
+ def parse_tleap_unknown_residues(leap_log: Path) -> Set[str]:
375
+ """Parse a LEaP log and collect residue names reported as unknown/failed."""
376
+ txt = leap_log.read_text(encoding="utf-8", errors="ignore")
377
+ res: Set[str] = set()
378
+ patterns = [
379
+ r"Unknown residue:\s+([A-Za-z0-9\+\-]+)",
380
+ r"Could not find in database the residue:\s+([A-Za-z0-9\+\-]+)",
381
+ r"createAtomUnit:.*\bresidue\s+([A-Za-z0-9\+\-]+)\b",
382
+ r"Creating new UNIT for residue:\s+([A-Za-z0-9_+\-]+)",
383
+ ]
384
+ for pat in patterns:
385
+ for m in re.finditer(pat, txt):
386
+ rn = m.group(1).strip()
387
+ if rn in WATER_RES:
388
+ continue
389
+ res.add(rn)
390
+ return res
391
+
392
+
393
+ # -------- helpers for TER insertion --------
394
+
395
+
396
+ def insert_ter_around_special_residues(pdb_in: Path, pdb_out: Path, special_resnames: Set[str]) -> None:
397
+ """
398
+ Insert TER records before/after contiguous blocks of residues whose names are in
399
+ `special_resnames` (e.g., ligand names from --ligand-charge, WATER_RES, and ION).
400
+ If such residues are consecutive, do not insert TER between them. Existing TER
401
+ records are preserved and duplicate consecutive TERs are avoided.
402
+ """
403
+
404
+ def recname(line: str) -> str:
405
+ return line[:6].strip()
406
+
407
+ out_lines: List[str] = []
408
+ prev_key: Optional[Tuple[str, str]] = None # (chain, resseq[22:26])
409
+ prev_special: Optional[bool] = None
410
+ last_written_was_TER = False
411
+
412
+ with open(pdb_in, "r") as f:
413
+ for line in f:
414
+ rn = recname(line)
415
+ if rn in {"ATOM", "HETATM"}:
416
+ chain = line[21]
417
+ resseq = line[22:26]
418
+ resname = line[17:20].strip()
419
+ key = (chain, resseq)
420
+ curr_special = resname in special_resnames
421
+
422
+ if prev_key is None:
423
+ pass
424
+ elif key != prev_key:
425
+ # boundary (previous residue → current residue)
426
+ if prev_special != curr_special and (prev_special or curr_special):
427
+ if not last_written_was_TER:
428
+ out_lines.append("TER\n")
429
+ last_written_was_TER = True
430
+ out_lines.append(line)
431
+ last_written_was_TER = False
432
+ prev_key = key
433
+ prev_special = curr_special
434
+
435
+ elif rn == "TER":
436
+ if not last_written_was_TER:
437
+ out_lines.append(line)
438
+ last_written_was_TER = True
439
+ prev_key = None
440
+ prev_special = None
441
+ else:
442
+ out_lines.append(line)
443
+
444
+ # Add trailing TER if the file ends with a "special" residue and no TER has been written
445
+ if prev_key is not None and prev_special and not last_written_was_TER:
446
+ out_lines.append("TER\n")
447
+
448
+ with open(pdb_out, "w") as w:
449
+ w.writelines(out_lines)
450
+
451
+
452
+ # -------- helper for amino-acid–like residue detection --------
453
+
454
+
455
+ def extract_first_residue_pdb(src_pdb: Path, resname: str, dst_pdb: Path) -> bool:
456
+ """
457
+ Extract only the *first occurrence* of the specified residue name from src_pdb,
458
+ write it as a standalone PDB to dst_pdb, and return True on success.
459
+ """
460
+ found = False
461
+ out_lines: List[str] = []
462
+ with open(src_pdb, "r") as f:
463
+ for line in f:
464
+ if not (line.startswith("ATOM") or line.startswith("HETATM")):
465
+ continue
466
+ rn = line[17:20].strip()
467
+ if rn == resname and not found:
468
+ resseq = line[22:26]
469
+ chain = line[21]
470
+ found = True
471
+ break
472
+ if not found:
473
+ return False
474
+ with open(src_pdb, "r") as f:
475
+ for line in f:
476
+ if not (line.startswith("ATOM") or line.startswith("HETATM")):
477
+ continue
478
+ rn = line[17:20].strip()
479
+ if rn == resname and line[22:26] == resseq and line[21] == chain:
480
+ out_lines.append(line)
481
+ if not out_lines:
482
+ return False
483
+ with open(dst_pdb, "w") as w:
484
+ for ln in out_lines:
485
+ w.write(ln)
486
+ w.write("END\n")
487
+ return True
488
+
489
+
490
+ def antechamber_parametrize(resname: str, res_charge: int, res_mult: int, workdir: Path) -> Tuple[Path, Path]:
491
+ """
492
+ Run antechamber (GAFF2 + AM1-BCC) and parmchk2 to generate mol2/frcmod.
493
+ Input: {resname}.pdb → Output: {resname}.mol2, {resname}.frcmod (paths returned).
494
+ """
495
+ pdb = workdir / f"{resname}.pdb"
496
+ mol2 = workdir / f"{resname}.mol2"
497
+ frcmod = workdir / f"{resname}.frcmod"
498
+ cmd1 = [
499
+ "antechamber",
500
+ "-i",
501
+ pdb.name,
502
+ "-fi",
503
+ "pdb",
504
+ "-o",
505
+ mol2.name,
506
+ "-fo",
507
+ "mol2",
508
+ "-at",
509
+ "gaff2",
510
+ "-c",
511
+ "bcc",
512
+ "-nc",
513
+ str(res_charge),
514
+ "-m",
515
+ str(res_mult),
516
+ "-rn",
517
+ resname,
518
+ "-s",
519
+ "2",
520
+ ]
521
+ rc1 = run(cmd1, cwd=workdir, logfile=workdir / f"{resname}.antechamber.log")
522
+ if rc1 != 0 or not mol2.exists():
523
+ raise RuntimeError(f"[{resname}] antechamber failed (see log).")
524
+ cmd2 = ["parmchk2", "-i", mol2.name, "-f", "mol2", "-o", frcmod.name, "-s", "2"]
525
+ rc2 = run(cmd2, cwd=workdir, logfile=workdir / f"{resname}.parmchk2.log")
526
+ if rc2 != 0 or not frcmod.exists():
527
+ raise RuntimeError(f"[{resname}] parmchk2 failed (see log).")
528
+ click.echo(f"[mm-parm] Built GAFF2 parameter for [{resname}] (charge={res_charge}, mult={res_mult}).")
529
+ return mol2, frcmod
530
+
531
+
532
+ def write_tleap_input(
533
+ fixed_pdb: Path,
534
+ lig_defs: List[Tuple[str, Path, Path]],
535
+ ss_pairs: List[Tuple[Tuple[str, int], Tuple[str, int]]],
536
+ out_prefix: str,
537
+ tleap_in: Path,
538
+ leaprc_lines: List[str],
539
+ ) -> None:
540
+ """
541
+ Compose a LEaP input script.
542
+ - lig_defs: list of (RESNAME, lib_or_mol2_path, frcmod_path)
543
+ * .lib → loadoff + loadamberparams frcmod
544
+ * .mol2 → RES = loadmol2 + loadamberparams frcmod
545
+ - ss_pairs: ((chainID, resSeq), (chainID, resSeq)) residue pairs to bond (S–S)
546
+ (LEaP residue indices are resolved from PDB order via an internal mapping).
547
+ """
548
+ lines: List[str] = []
549
+ lines += leaprc_lines
550
+
551
+ # ligands / nonstandard residues
552
+ for resname, param_file, frcmod in lig_defs:
553
+ if param_file.suffix.lower() == ".lib":
554
+ lines.append(f"loadoff {param_file.name}")
555
+ lines.append(f"loadamberparams {frcmod.name}")
556
+ else:
557
+ lines.append(f"{resname} = loadmol2 {param_file.name}")
558
+ lines.append(f"loadamberparams {frcmod.name}")
559
+
560
+ # complex
561
+ lines.append(f"complex = loadpdb {fixed_pdb.name}")
562
+
563
+ # S–S bonds
564
+ resnum_map = build_leap_residue_index(fixed_pdb)
565
+ for (c1, r1), (c2, r2) in ss_pairs:
566
+ key1 = (c1, f"{r1:>4}")
567
+ key2 = (c2, f"{r2:>4}")
568
+ if key1 in resnum_map and key2 in resnum_map:
569
+ n1, n2 = resnum_map[key1], resnum_map[key2]
570
+ lines.append(f"bond complex.{n1}.SG complex.{n2}.SG")
571
+ else:
572
+ lines.append(f"# WARN: could not resolve SS pair ({c1}{r1})-({c2}{r2})")
573
+
574
+ # For logging: print charge in tleap output
575
+ lines.append("charge complex")
576
+
577
+ # outputs (parm7/inpcrd + pdb)
578
+ lines.append(f"saveamberparm complex {out_prefix}.parm7 {out_prefix}.inpcrd")
579
+ lines.append(f"savepdb complex {out_prefix}.pdb")
580
+ lines.append("quit")
581
+ tleap_in.write_text("\n".join(lines) + "\n", encoding="utf-8")
582
+
583
+
584
+ def ambertools_route(
585
+ pdb: Path,
586
+ out_prefix: str,
587
+ ligand_charge: Dict[str, int],
588
+ ligand_mult: Dict[str, int],
589
+ keep_temp: bool,
590
+ tmpdir: Path,
591
+ ff_set: str,
592
+ add_ter: bool,
593
+ ) -> Tuple[Path, Path]:
594
+ """
595
+ AmberTools route:
596
+ - (Optionally) add hydrogens beforehand (done in run_pipeline).
597
+ - Use the input PDB as-is (copied to fixed.pdb). Optionally insert TERs.
598
+ - Detect candidate S–S bonds by SG–SG geometry.
599
+ - First, run LEaP without extra parameters. If unknown residues are reported,
600
+ parameterize them with antechamber+parmchk2 (GAFF2/AM1‑BCC).
601
+ - Residues listed in ``AMINO_ACIDS`` that remain unknown to LEaP are not treated
602
+ automatically; the build aborts with an explanatory message.
603
+ - Load parameters and re-run LEaP. LEaP writes complex.parm7/complex.inpcrd/
604
+ complex.pdb; this function copies complex.parm7 and complex.inpcrd to
605
+ <out_prefix>.parm7 and <out_prefix>.rst7. The caller handles the PDB export.
606
+ """
607
+ leaprc_lines = LEAPRC_LINES if ff_set == "ff19SB" else LEAPRC_LINES_OLD
608
+ protein_ff = "ff19SB" if ff_set == "ff19SB" else "ff14SB"
609
+ _ = protein_ff # currently unused, kept for clarity/extension
610
+
611
+ # PDB as-is, optional TER insertion
612
+ fixed_pdb = copy_pdb_no_fix(pdb, tmpdir)
613
+ if add_ter:
614
+ special_resnames: Set[str] = set(ligand_charge.keys()) | set(WATER_RES) | set(ION.keys())
615
+ fixed_pdb_with_ter = tmpdir / "fixed_withTER.pdb"
616
+ insert_ter_around_special_residues(fixed_pdb, fixed_pdb_with_ter, special_resnames)
617
+ fixed_pdb = fixed_pdb_with_ter
618
+
619
+ # Detect S–S candidates
620
+ ss_pairs = detect_disulfides_from_pdb(fixed_pdb, cutoff=DISULFIDE_CUTOFF)
621
+
622
+ # Pass 1 (no extra params) -> will write complex.parm7/.inpcrd/.pdb
623
+ leap_in = tmpdir / "tleap_1.in"
624
+ write_tleap_input(
625
+ fixed_pdb,
626
+ lig_defs=[],
627
+ ss_pairs=ss_pairs,
628
+ out_prefix="complex",
629
+ tleap_in=leap_in,
630
+ leaprc_lines=leaprc_lines,
631
+ )
632
+ log1 = tmpdir / "tleap_1.log"
633
+ run(["tleap", "-f", leap_in.name], cwd=tmpdir, logfile=log1)
634
+
635
+ # Collect unknown residues
636
+ need_params: Set[str] = parse_tleap_unknown_residues(log1)
637
+
638
+ # Parameterize unknown residues
639
+ lig_defs: List[Tuple[str, Path, Path]] = []
640
+ for rn in sorted(need_params):
641
+ charge = ligand_charge.get(rn, AMINO_ACIDS.get(rn, 0))
642
+ mult = ligand_mult.get(rn, 1)
643
+ lig_pdb = tmpdir / f"{rn}.pdb"
644
+ ok = extract_first_residue_pdb(fixed_pdb, rn, lig_pdb)
645
+ if not ok:
646
+ raise RuntimeError(f"Failed to extract PDB for unknown residue {rn}")
647
+
648
+ # Explicit ligand-charge mappings take highest priority and force GAFF2 parameterization.
649
+ if rn in ligand_charge:
650
+ mol2, frcmod = antechamber_parametrize(rn, charge, mult, tmpdir)
651
+ lig_defs.append((rn, mol2, frcmod))
652
+ continue
653
+
654
+ # Amino-acid residues must be handled by the selected Amber protein force field.
655
+ if rn in AMINO_ACIDS:
656
+ raise RuntimeError(
657
+ f"Nonstandard amino acid residue '{rn}' is not supported by mm_parm. "
658
+ "This workflow does not auto-parameterize amino-acid residues. "
659
+ "Please prepare the parameters manually with AmberTools, edit the "
660
+ "input structure to use a supported residue, or explicitly list the "
661
+ "residue in --ligand-charge to force GAFF2 parameterization."
662
+ )
663
+
664
+ mol2, frcmod = antechamber_parametrize(rn, charge, mult, tmpdir)
665
+ lig_defs.append((rn, mol2, frcmod))
666
+
667
+ # Pass 2 (with generated parameters) -> will (re)write complex.* including PDB
668
+ if need_params:
669
+ leap_in2 = tmpdir / "tleap_2.in"
670
+ write_tleap_input(
671
+ fixed_pdb,
672
+ lig_defs=lig_defs,
673
+ ss_pairs=ss_pairs,
674
+ out_prefix="complex",
675
+ tleap_in=leap_in2,
676
+ leaprc_lines=leaprc_lines,
677
+ )
678
+ log2 = tmpdir / "tleap_2.log"
679
+ run(["tleap", "-f", leap_in2.name], cwd=tmpdir, logfile=log2)
680
+ if not (tmpdir / "complex.parm7").exists():
681
+ raise RuntimeError(f"tleap failed to produce parm7; see {log2.name}.")
682
+
683
+ # Copy outputs (parm7, inpcrd) to final names
684
+ src_parm = tmpdir / "complex.parm7"
685
+ src_inp = tmpdir / "complex.inpcrd"
686
+ if not (src_parm.exists() and src_inp.exists()):
687
+ msg = f"LEaP outputs not found in {tmpdir}. Check logs: {tmpdir / 'tleap_1.log'}"
688
+ if (tmpdir / "tleap_2.log").exists():
689
+ msg += f" and {tmpdir / 'tleap_2.log'}"
690
+ raise FileNotFoundError(msg)
691
+
692
+ parm7 = Path(f"{out_prefix}.parm7").resolve()
693
+ rst7 = Path(f"{out_prefix}.rst7").resolve()
694
+ shutil.copy2(src_parm, parm7)
695
+ shutil.copy2(src_inp, rst7) # copy LEaP ASCII inpcrd as <prefix>.rst7
696
+
697
+ # Return paths for prmtop/rst7; the caller will copy PDB using naming rule
698
+ return parm7, rst7
699
+
700
+
701
+ # ===================== Main pipeline (library/CLI entry) =====================
702
+
703
+
704
+ @dataclass
705
+ class Args:
706
+ pdb: Path
707
+ out_prefix: str
708
+ ligand_charge: Dict[str, int]
709
+ ligand_mult: Dict[str, int]
710
+ keep_temp: bool
711
+ add_ter: bool
712
+ add_h: bool
713
+ ph: float
714
+ ff_set: str # "ff19SB" or "ff14SB"
715
+ out_prefix_given: bool # whether user explicitly provided --out-prefix
716
+
717
+
718
+ def run_pipeline(args: Args) -> None:
719
+ if not args.pdb.exists():
720
+ sys.exit(f"PDB not found: {args.pdb}")
721
+
722
+ amber_paths = ambertools_command_paths()
723
+ missing_cmds = missing_ambertools_commands(amber_paths)
724
+ if missing_cmds:
725
+ found_lines = [
726
+ f" {name}: {amber_paths[name]}"
727
+ for name in _AMBERTOOLS_REQUIRED_COMMANDS
728
+ if amber_paths.get(name)
729
+ ]
730
+ missing_text = ", ".join(missing_cmds)
731
+ details = "\n".join(found_lines) if found_lines else " (none found)"
732
+ sys.exit(
733
+ "AmberTools preflight failed.\n"
734
+ f"Missing required command(s): {missing_text}\n"
735
+ "Required: tleap, antechamber, parmchk2\n"
736
+ "Detected command paths:\n"
737
+ f"{details}"
738
+ )
739
+
740
+ # Decide PDB filename to export/copy (used both on success and as H-added fallback)
741
+ # When --out-prefix is omitted and --add-h False, do not write <input_stem>_parm.pdb.
742
+ final_pdb_out: Optional[Path]
743
+ if args.out_prefix_given:
744
+ final_pdb_out = Path(f"{args.out_prefix}.pdb").resolve()
745
+ else:
746
+ if args.add_h:
747
+ final_pdb_out = Path(f"{Path(args.pdb).stem}_parm.pdb").resolve()
748
+ else:
749
+ final_pdb_out = None
750
+
751
+ # Prepare temporary working directory
752
+ tmp_mgr: Optional[tempfile.TemporaryDirectory] = None
753
+ if args.keep_temp:
754
+ tmpdir_path = Path(tempfile.mkdtemp(prefix="parm7build_", dir=os.getcwd()))
755
+ else:
756
+ tmp_mgr = tempfile.TemporaryDirectory(prefix="parm7build_")
757
+ tmpdir_path = Path(tmp_mgr.name)
758
+
759
+ fixed_pdb_with_H: Optional[Path] = None # for fallback export
760
+
761
+ try:
762
+ # Copy input PDB locally (avoid path/lock issues)
763
+ local_pdb = tmpdir_path / "input.pdb"
764
+ shutil.copy2(args.pdb, local_pdb)
765
+
766
+ # Optional: add hydrogens via PDBFixer at specified pH
767
+ prepared_pdb = local_pdb
768
+ if args.add_h:
769
+ fixed_pdb = tmpdir_path / "input_withH.pdb"
770
+ click.echo(f"[mm-parm] Adding hydrogens with PDBFixer at pH={args.ph:.2f} ...")
771
+ try:
772
+ add_hydrogens_with_pdbfixer(local_pdb, fixed_pdb, args.ph)
773
+ except Exception as e:
774
+ if args.keep_temp:
775
+ click.echo(
776
+ f"[mm-parm] ERROR: PDBFixer hydrogen addition failed: {e}\n"
777
+ f"Temporary working directory kept at: {tmpdir_path}",
778
+ err=True,
779
+ )
780
+ raise
781
+ prepared_pdb = fixed_pdb
782
+ fixed_pdb_with_H = fixed_pdb
783
+ click.echo("[mm-parm] Hydrogens added (PDBFixer).")
784
+
785
+ try:
786
+ click.echo("[mm-parm] AmberTools detected. Using tleap + GAFF2 (AM1-BCC).")
787
+ click.echo(
788
+ f"[mm-parm] FF set: {args.ff_set} | add_ter: {args.add_ter} | "
789
+ f"add_h: {args.add_h} (pH={args.ph:.2f})"
790
+ )
791
+ parm7, rst7 = ambertools_route(
792
+ prepared_pdb,
793
+ args.out_prefix,
794
+ args.ligand_charge,
795
+ args.ligand_mult,
796
+ args.keep_temp,
797
+ tmpdir_path,
798
+ ff_set=args.ff_set,
799
+ add_ter=args.add_ter,
800
+ )
801
+ except Exception as e:
802
+ # Fallback export of H-added PDB on failure
803
+ if fixed_pdb_with_H is not None and fixed_pdb_with_H.exists() and final_pdb_out is not None:
804
+ try:
805
+ shutil.copy2(fixed_pdb_with_H, final_pdb_out)
806
+ click.echo(f"[mm-parm] Build failed, but wrote hydrogen-added PDB fallback: {final_pdb_out}")
807
+ except Exception as copy_e:
808
+ click.echo(f"[mm-parm] WARNING: Failed to write fallback hydrogen-added PDB: {copy_e}", err=True)
809
+ if args.keep_temp:
810
+ click.echo(f"[mm-parm] ERROR: Failed: {e}\nTemporary working directory kept at: {tmpdir_path}", err=True)
811
+ # Re-raise to preserve error behavior
812
+ raise
813
+
814
+ # Copy LEaP PDB (complex.pdb) to final name, if requested
815
+ if final_pdb_out is not None:
816
+ src_pdb = tmpdir_path / "complex.pdb"
817
+ if src_pdb.exists():
818
+ shutil.copy2(src_pdb, final_pdb_out)
819
+ click.echo(f"[mm-parm] Wrote: {final_pdb_out}")
820
+ else:
821
+ click.echo("[mm-parm] WARNING: LEaP PDB (complex.pdb) was not found; skipping PDB export copy.", err=True)
822
+
823
+ click.echo(f"[mm-parm] Wrote: {parm7}")
824
+ click.echo(f"[mm-parm] Wrote: {rst7}")
825
+
826
+ if args.keep_temp:
827
+ click.echo(f"[mm-parm] Temporary directory kept: {tmpdir_path}")
828
+ info = f"[mm-parm] LEaP logs: {tmpdir_path / 'tleap_1.log'}"
829
+ if (tmpdir_path / "tleap_2.log").exists():
830
+ info += f", {tmpdir_path / 'tleap_2.log'}"
831
+ click.echo(info)
832
+ except Exception:
833
+ # Print a generic hint message on failure, then re-raise
834
+ click.echo(HINT_MESSAGE, err=True)
835
+ raise
836
+ finally:
837
+ if tmp_mgr is not None:
838
+ try:
839
+ tmp_mgr.cleanup()
840
+ except Exception:
841
+ logger.debug("Failed to clean up temporary directory", exc_info=True)
842
+
843
+
844
+ # ===================== Click CLI entry point =====================
845
+
846
+
847
+ @click.command(
848
+ context_settings={"help_option_names": ["-h", "--help"]},
849
+ help="Generate Amber parm7/rst7 (and a LEaP-exported PDB) from a PDB using AmberTools only.",
850
+ )
851
+ @click.option(
852
+ "-i",
853
+ "--input",
854
+ "pdb",
855
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
856
+ required=True,
857
+ help="Input PDB file (used as-is; optional hydrogens via --add-h/--ph).",
858
+ )
859
+ @click.option(
860
+ "--out-prefix",
861
+ default=None,
862
+ help=(
863
+ "Output prefix (default: input PDB stem). For LEaP PDB: "
864
+ "if omitted and --add-h True, <input_stem>_parm.pdb is used."
865
+ ),
866
+ )
867
+ @click.option(
868
+ "-l",
869
+ "--ligand-charge",
870
+ default=None,
871
+ help=(
872
+ 'Comma-separated mapping of residue=charge or residue:charge '
873
+ '(e.g., "GPP=-3,MMT=-1" or "GPP:-3,MMT:-1")'
874
+ ),
875
+ )
876
+ @click.option(
877
+ "--ligand-mult",
878
+ default=None,
879
+ help=(
880
+ 'Comma-separated mapping of residue=multiplicity or residue:multiplicity '
881
+ '(e.g., "HEM=1,NO:2")'
882
+ ),
883
+ )
884
+ @click.option(
885
+ "--keep-temp/--no-keep-temp",
886
+ "keep_temp",
887
+ default=False,
888
+ show_default=True,
889
+ help="Keep temporary working directory (in current dir) for debugging.",
890
+ )
891
+ @click.option(
892
+ "--add-ter/--no-add-ter",
893
+ "add_ter",
894
+ default=True,
895
+ show_default=True,
896
+ help=(
897
+ "Insert TER before/after target residues. "
898
+ "When contiguous, TER is not inserted between them."
899
+ ),
900
+ )
901
+ @click.option(
902
+ "--add-h/--no-add-h",
903
+ "add_h",
904
+ default=False,
905
+ show_default=True,
906
+ help="Add hydrogens using PDBFixer at the specified --ph.",
907
+ )
908
+ @click.option(
909
+ "--ph",
910
+ "ph",
911
+ type=float,
912
+ default=7.0,
913
+ help="pH used by PDBFixer when adding hydrogens (--add-h True). Default: 7.0",
914
+ )
915
+ @click.option(
916
+ "--ff-set",
917
+ type=click.Choice(["ff19SB", "ff14SB"]),
918
+ default="ff19SB",
919
+ help="Force-field set for proteins/backbone typing and water/ion parameters (default: ff19SB).",
920
+ )
921
+ def cli(
922
+ pdb: Path,
923
+ out_prefix: Optional[str],
924
+ ligand_charge: Optional[str],
925
+ ligand_mult: Optional[str],
926
+ keep_temp: bool,
927
+ add_ter: bool,
928
+ add_h: bool,
929
+ ph: float,
930
+ ff_set: str,
931
+ ) -> None:
932
+ """Click entry point that mirrors the documented CLI."""
933
+ args = Args(
934
+ pdb=pdb,
935
+ out_prefix=out_prefix if out_prefix is not None else Path(pdb).stem,
936
+ ligand_charge=parse_ligand_charge(ligand_charge),
937
+ ligand_mult=parse_ligand_mult(ligand_mult),
938
+ keep_temp=keep_temp,
939
+ add_ter=bool(add_ter),
940
+ add_h=bool(add_h),
941
+ ph=ph,
942
+ ff_set=ff_set,
943
+ out_prefix_given=(out_prefix is not None),
944
+ )
945
+ run_pipeline(args)