mlmm-toolkit 0.2.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (372) hide show
  1. hessian_ff/__init__.py +50 -0
  2. hessian_ff/analytical_hessian.py +609 -0
  3. hessian_ff/constants.py +46 -0
  4. hessian_ff/forcefield.py +339 -0
  5. hessian_ff/loaders.py +608 -0
  6. hessian_ff/native/Makefile +8 -0
  7. hessian_ff/native/__init__.py +28 -0
  8. hessian_ff/native/analytical_hessian.py +88 -0
  9. hessian_ff/native/analytical_hessian_ext.cpp +258 -0
  10. hessian_ff/native/bonded.py +82 -0
  11. hessian_ff/native/bonded_ext.cpp +640 -0
  12. hessian_ff/native/loader.py +349 -0
  13. hessian_ff/native/nonbonded.py +118 -0
  14. hessian_ff/native/nonbonded_ext.cpp +1150 -0
  15. hessian_ff/prmtop_parmed.py +23 -0
  16. hessian_ff/system.py +107 -0
  17. hessian_ff/terms/__init__.py +14 -0
  18. hessian_ff/terms/angle.py +73 -0
  19. hessian_ff/terms/bond.py +44 -0
  20. hessian_ff/terms/cmap.py +406 -0
  21. hessian_ff/terms/dihedral.py +141 -0
  22. hessian_ff/terms/nonbonded.py +209 -0
  23. hessian_ff/tests/__init__.py +0 -0
  24. hessian_ff/tests/conftest.py +75 -0
  25. hessian_ff/tests/data/small/complex.parm7 +1346 -0
  26. hessian_ff/tests/data/small/complex.pdb +125 -0
  27. hessian_ff/tests/data/small/complex.rst7 +63 -0
  28. hessian_ff/tests/test_coords_input.py +44 -0
  29. hessian_ff/tests/test_energy_force.py +49 -0
  30. hessian_ff/tests/test_hessian.py +137 -0
  31. hessian_ff/tests/test_smoke.py +18 -0
  32. hessian_ff/tests/test_validation.py +40 -0
  33. hessian_ff/workflows.py +889 -0
  34. mlmm/__init__.py +36 -0
  35. mlmm/__main__.py +7 -0
  36. mlmm/_version.py +34 -0
  37. mlmm/add_elem_info.py +374 -0
  38. mlmm/advanced_help.py +91 -0
  39. mlmm/align_freeze_atoms.py +601 -0
  40. mlmm/all.py +3535 -0
  41. mlmm/bond_changes.py +231 -0
  42. mlmm/bool_compat.py +223 -0
  43. mlmm/cli.py +574 -0
  44. mlmm/cli_utils.py +166 -0
  45. mlmm/default_group.py +337 -0
  46. mlmm/defaults.py +467 -0
  47. mlmm/define_layer.py +526 -0
  48. mlmm/dft.py +1041 -0
  49. mlmm/energy_diagram.py +253 -0
  50. mlmm/extract.py +2213 -0
  51. mlmm/fix_altloc.py +464 -0
  52. mlmm/freq.py +1406 -0
  53. mlmm/harmonic_constraints.py +140 -0
  54. mlmm/hessian_cache.py +44 -0
  55. mlmm/hessian_calc.py +174 -0
  56. mlmm/irc.py +638 -0
  57. mlmm/mlmm_calc.py +2262 -0
  58. mlmm/mm_parm.py +945 -0
  59. mlmm/oniom_export.py +1983 -0
  60. mlmm/oniom_import.py +457 -0
  61. mlmm/opt.py +1742 -0
  62. mlmm/path_opt.py +1353 -0
  63. mlmm/path_search.py +2299 -0
  64. mlmm/preflight.py +88 -0
  65. mlmm/py.typed +1 -0
  66. mlmm/pysis_runner.py +45 -0
  67. mlmm/scan.py +1047 -0
  68. mlmm/scan2d.py +1226 -0
  69. mlmm/scan3d.py +1265 -0
  70. mlmm/scan_common.py +184 -0
  71. mlmm/summary_log.py +736 -0
  72. mlmm/trj2fig.py +448 -0
  73. mlmm/tsopt.py +2871 -0
  74. mlmm/utils.py +2309 -0
  75. mlmm/xtb_embedcharge_correction.py +475 -0
  76. mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
  77. mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
  78. mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
  79. mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
  80. mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
  81. mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
  82. pysisyphus/Geometry.py +1667 -0
  83. pysisyphus/LICENSE +674 -0
  84. pysisyphus/TableFormatter.py +63 -0
  85. pysisyphus/TablePrinter.py +74 -0
  86. pysisyphus/__init__.py +12 -0
  87. pysisyphus/calculators/AFIR.py +452 -0
  88. pysisyphus/calculators/AnaPot.py +20 -0
  89. pysisyphus/calculators/AnaPot2.py +48 -0
  90. pysisyphus/calculators/AnaPot3.py +12 -0
  91. pysisyphus/calculators/AnaPot4.py +20 -0
  92. pysisyphus/calculators/AnaPotBase.py +337 -0
  93. pysisyphus/calculators/AnaPotCBM.py +25 -0
  94. pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
  95. pysisyphus/calculators/CFOUR.py +250 -0
  96. pysisyphus/calculators/Calculator.py +844 -0
  97. pysisyphus/calculators/CerjanMiller.py +24 -0
  98. pysisyphus/calculators/Composite.py +123 -0
  99. pysisyphus/calculators/ConicalIntersection.py +171 -0
  100. pysisyphus/calculators/DFTBp.py +430 -0
  101. pysisyphus/calculators/DFTD3.py +66 -0
  102. pysisyphus/calculators/DFTD4.py +84 -0
  103. pysisyphus/calculators/Dalton.py +61 -0
  104. pysisyphus/calculators/Dimer.py +681 -0
  105. pysisyphus/calculators/Dummy.py +20 -0
  106. pysisyphus/calculators/EGO.py +76 -0
  107. pysisyphus/calculators/EnergyMin.py +224 -0
  108. pysisyphus/calculators/ExternalPotential.py +264 -0
  109. pysisyphus/calculators/FakeASE.py +35 -0
  110. pysisyphus/calculators/FourWellAnaPot.py +28 -0
  111. pysisyphus/calculators/FreeEndNEBPot.py +39 -0
  112. pysisyphus/calculators/Gaussian09.py +18 -0
  113. pysisyphus/calculators/Gaussian16.py +726 -0
  114. pysisyphus/calculators/HardSphere.py +159 -0
  115. pysisyphus/calculators/IDPPCalculator.py +49 -0
  116. pysisyphus/calculators/IPIClient.py +133 -0
  117. pysisyphus/calculators/IPIServer.py +234 -0
  118. pysisyphus/calculators/LEPSBase.py +24 -0
  119. pysisyphus/calculators/LEPSExpr.py +139 -0
  120. pysisyphus/calculators/LennardJones.py +80 -0
  121. pysisyphus/calculators/MOPAC.py +219 -0
  122. pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
  123. pysisyphus/calculators/MultiCalc.py +85 -0
  124. pysisyphus/calculators/NFK.py +45 -0
  125. pysisyphus/calculators/OBabel.py +87 -0
  126. pysisyphus/calculators/ONIOMv2.py +1129 -0
  127. pysisyphus/calculators/ORCA.py +893 -0
  128. pysisyphus/calculators/ORCA5.py +6 -0
  129. pysisyphus/calculators/OpenMM.py +88 -0
  130. pysisyphus/calculators/OpenMolcas.py +281 -0
  131. pysisyphus/calculators/OverlapCalculator.py +908 -0
  132. pysisyphus/calculators/Psi4.py +218 -0
  133. pysisyphus/calculators/PyPsi4.py +37 -0
  134. pysisyphus/calculators/PySCF.py +341 -0
  135. pysisyphus/calculators/PyXTB.py +73 -0
  136. pysisyphus/calculators/QCEngine.py +106 -0
  137. pysisyphus/calculators/Rastrigin.py +22 -0
  138. pysisyphus/calculators/Remote.py +76 -0
  139. pysisyphus/calculators/Rosenbrock.py +15 -0
  140. pysisyphus/calculators/SocketCalc.py +97 -0
  141. pysisyphus/calculators/TIP3P.py +111 -0
  142. pysisyphus/calculators/TransTorque.py +161 -0
  143. pysisyphus/calculators/Turbomole.py +965 -0
  144. pysisyphus/calculators/VRIPot.py +37 -0
  145. pysisyphus/calculators/WFOWrapper.py +333 -0
  146. pysisyphus/calculators/WFOWrapper2.py +341 -0
  147. pysisyphus/calculators/XTB.py +418 -0
  148. pysisyphus/calculators/__init__.py +81 -0
  149. pysisyphus/calculators/cosmo_data.py +139 -0
  150. pysisyphus/calculators/parser.py +150 -0
  151. pysisyphus/color.py +19 -0
  152. pysisyphus/config.py +133 -0
  153. pysisyphus/constants.py +65 -0
  154. pysisyphus/cos/AdaptiveNEB.py +230 -0
  155. pysisyphus/cos/ChainOfStates.py +725 -0
  156. pysisyphus/cos/FreeEndNEB.py +25 -0
  157. pysisyphus/cos/FreezingString.py +103 -0
  158. pysisyphus/cos/GrowingChainOfStates.py +71 -0
  159. pysisyphus/cos/GrowingNT.py +309 -0
  160. pysisyphus/cos/GrowingString.py +508 -0
  161. pysisyphus/cos/NEB.py +189 -0
  162. pysisyphus/cos/SimpleZTS.py +64 -0
  163. pysisyphus/cos/__init__.py +22 -0
  164. pysisyphus/cos/stiffness.py +199 -0
  165. pysisyphus/drivers/__init__.py +17 -0
  166. pysisyphus/drivers/afir.py +855 -0
  167. pysisyphus/drivers/barriers.py +271 -0
  168. pysisyphus/drivers/birkholz.py +138 -0
  169. pysisyphus/drivers/cluster.py +318 -0
  170. pysisyphus/drivers/diabatization.py +133 -0
  171. pysisyphus/drivers/merge.py +368 -0
  172. pysisyphus/drivers/merge_mol2.py +322 -0
  173. pysisyphus/drivers/opt.py +375 -0
  174. pysisyphus/drivers/perf.py +91 -0
  175. pysisyphus/drivers/pka.py +52 -0
  176. pysisyphus/drivers/precon_pos_rot.py +669 -0
  177. pysisyphus/drivers/rates.py +480 -0
  178. pysisyphus/drivers/replace.py +219 -0
  179. pysisyphus/drivers/scan.py +212 -0
  180. pysisyphus/drivers/spectrum.py +166 -0
  181. pysisyphus/drivers/thermo.py +31 -0
  182. pysisyphus/dynamics/Gaussian.py +103 -0
  183. pysisyphus/dynamics/__init__.py +20 -0
  184. pysisyphus/dynamics/colvars.py +136 -0
  185. pysisyphus/dynamics/driver.py +297 -0
  186. pysisyphus/dynamics/helpers.py +256 -0
  187. pysisyphus/dynamics/lincs.py +105 -0
  188. pysisyphus/dynamics/mdp.py +364 -0
  189. pysisyphus/dynamics/rattle.py +121 -0
  190. pysisyphus/dynamics/thermostats.py +128 -0
  191. pysisyphus/dynamics/wigner.py +266 -0
  192. pysisyphus/elem_data.py +3473 -0
  193. pysisyphus/exceptions.py +2 -0
  194. pysisyphus/filtertrj.py +69 -0
  195. pysisyphus/helpers.py +623 -0
  196. pysisyphus/helpers_pure.py +649 -0
  197. pysisyphus/init_logging.py +50 -0
  198. pysisyphus/intcoords/Bend.py +69 -0
  199. pysisyphus/intcoords/Bend2.py +25 -0
  200. pysisyphus/intcoords/BondedFragment.py +32 -0
  201. pysisyphus/intcoords/Cartesian.py +41 -0
  202. pysisyphus/intcoords/CartesianCoords.py +140 -0
  203. pysisyphus/intcoords/Coords.py +56 -0
  204. pysisyphus/intcoords/DLC.py +197 -0
  205. pysisyphus/intcoords/DistanceFunction.py +34 -0
  206. pysisyphus/intcoords/DummyImproper.py +70 -0
  207. pysisyphus/intcoords/DummyTorsion.py +72 -0
  208. pysisyphus/intcoords/LinearBend.py +105 -0
  209. pysisyphus/intcoords/LinearDisplacement.py +80 -0
  210. pysisyphus/intcoords/OutOfPlane.py +59 -0
  211. pysisyphus/intcoords/PrimTypes.py +286 -0
  212. pysisyphus/intcoords/Primitive.py +137 -0
  213. pysisyphus/intcoords/RedundantCoords.py +659 -0
  214. pysisyphus/intcoords/RobustTorsion.py +59 -0
  215. pysisyphus/intcoords/Rotation.py +147 -0
  216. pysisyphus/intcoords/Stretch.py +31 -0
  217. pysisyphus/intcoords/Torsion.py +101 -0
  218. pysisyphus/intcoords/Torsion2.py +25 -0
  219. pysisyphus/intcoords/Translation.py +45 -0
  220. pysisyphus/intcoords/__init__.py +61 -0
  221. pysisyphus/intcoords/augment_bonds.py +126 -0
  222. pysisyphus/intcoords/derivatives.py +10512 -0
  223. pysisyphus/intcoords/eval.py +80 -0
  224. pysisyphus/intcoords/exceptions.py +37 -0
  225. pysisyphus/intcoords/findiffs.py +48 -0
  226. pysisyphus/intcoords/generate_derivatives.py +414 -0
  227. pysisyphus/intcoords/helpers.py +235 -0
  228. pysisyphus/intcoords/logging_conf.py +10 -0
  229. pysisyphus/intcoords/mp_derivatives.py +10836 -0
  230. pysisyphus/intcoords/setup.py +962 -0
  231. pysisyphus/intcoords/setup_fast.py +176 -0
  232. pysisyphus/intcoords/update.py +272 -0
  233. pysisyphus/intcoords/valid.py +89 -0
  234. pysisyphus/interpolate/Geodesic.py +93 -0
  235. pysisyphus/interpolate/IDPP.py +55 -0
  236. pysisyphus/interpolate/Interpolator.py +116 -0
  237. pysisyphus/interpolate/LST.py +70 -0
  238. pysisyphus/interpolate/Redund.py +152 -0
  239. pysisyphus/interpolate/__init__.py +9 -0
  240. pysisyphus/interpolate/helpers.py +34 -0
  241. pysisyphus/io/__init__.py +22 -0
  242. pysisyphus/io/aomix.py +178 -0
  243. pysisyphus/io/cjson.py +24 -0
  244. pysisyphus/io/crd.py +101 -0
  245. pysisyphus/io/cube.py +220 -0
  246. pysisyphus/io/fchk.py +184 -0
  247. pysisyphus/io/hdf5.py +49 -0
  248. pysisyphus/io/hessian.py +72 -0
  249. pysisyphus/io/mol2.py +146 -0
  250. pysisyphus/io/molden.py +293 -0
  251. pysisyphus/io/orca.py +189 -0
  252. pysisyphus/io/pdb.py +269 -0
  253. pysisyphus/io/psf.py +79 -0
  254. pysisyphus/io/pubchem.py +31 -0
  255. pysisyphus/io/qcschema.py +34 -0
  256. pysisyphus/io/sdf.py +29 -0
  257. pysisyphus/io/xyz.py +61 -0
  258. pysisyphus/io/zmat.py +175 -0
  259. pysisyphus/irc/DWI.py +108 -0
  260. pysisyphus/irc/DampedVelocityVerlet.py +134 -0
  261. pysisyphus/irc/Euler.py +22 -0
  262. pysisyphus/irc/EulerPC.py +345 -0
  263. pysisyphus/irc/GonzalezSchlegel.py +187 -0
  264. pysisyphus/irc/IMKMod.py +164 -0
  265. pysisyphus/irc/IRC.py +878 -0
  266. pysisyphus/irc/IRCDummy.py +10 -0
  267. pysisyphus/irc/Instanton.py +307 -0
  268. pysisyphus/irc/LQA.py +53 -0
  269. pysisyphus/irc/ModeKill.py +136 -0
  270. pysisyphus/irc/ParamPlot.py +53 -0
  271. pysisyphus/irc/RK4.py +36 -0
  272. pysisyphus/irc/__init__.py +31 -0
  273. pysisyphus/irc/initial_displ.py +219 -0
  274. pysisyphus/linalg.py +411 -0
  275. pysisyphus/line_searches/Backtracking.py +88 -0
  276. pysisyphus/line_searches/HagerZhang.py +184 -0
  277. pysisyphus/line_searches/LineSearch.py +232 -0
  278. pysisyphus/line_searches/StrongWolfe.py +108 -0
  279. pysisyphus/line_searches/__init__.py +9 -0
  280. pysisyphus/line_searches/interpol.py +15 -0
  281. pysisyphus/modefollow/NormalMode.py +40 -0
  282. pysisyphus/modefollow/__init__.py +10 -0
  283. pysisyphus/modefollow/davidson.py +199 -0
  284. pysisyphus/modefollow/lanczos.py +95 -0
  285. pysisyphus/optimizers/BFGS.py +99 -0
  286. pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
  287. pysisyphus/optimizers/ConjugateGradient.py +98 -0
  288. pysisyphus/optimizers/CubicNewton.py +75 -0
  289. pysisyphus/optimizers/FIRE.py +113 -0
  290. pysisyphus/optimizers/HessianOptimizer.py +1176 -0
  291. pysisyphus/optimizers/LBFGS.py +228 -0
  292. pysisyphus/optimizers/LayerOpt.py +411 -0
  293. pysisyphus/optimizers/MicroOptimizer.py +169 -0
  294. pysisyphus/optimizers/NCOptimizer.py +90 -0
  295. pysisyphus/optimizers/Optimizer.py +1084 -0
  296. pysisyphus/optimizers/PreconLBFGS.py +260 -0
  297. pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
  298. pysisyphus/optimizers/QuickMin.py +74 -0
  299. pysisyphus/optimizers/RFOptimizer.py +181 -0
  300. pysisyphus/optimizers/RSA.py +99 -0
  301. pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
  302. pysisyphus/optimizers/SteepestDescent.py +23 -0
  303. pysisyphus/optimizers/StringOptimizer.py +173 -0
  304. pysisyphus/optimizers/__init__.py +41 -0
  305. pysisyphus/optimizers/closures.py +301 -0
  306. pysisyphus/optimizers/cls_map.py +58 -0
  307. pysisyphus/optimizers/exceptions.py +6 -0
  308. pysisyphus/optimizers/gdiis.py +280 -0
  309. pysisyphus/optimizers/guess_hessians.py +311 -0
  310. pysisyphus/optimizers/hessian_updates.py +355 -0
  311. pysisyphus/optimizers/poly_fit.py +285 -0
  312. pysisyphus/optimizers/precon.py +153 -0
  313. pysisyphus/optimizers/restrict_step.py +24 -0
  314. pysisyphus/pack.py +172 -0
  315. pysisyphus/peakdetect.py +948 -0
  316. pysisyphus/plot.py +1031 -0
  317. pysisyphus/run.py +2106 -0
  318. pysisyphus/socket_helper.py +74 -0
  319. pysisyphus/stocastic/FragmentKick.py +132 -0
  320. pysisyphus/stocastic/Kick.py +81 -0
  321. pysisyphus/stocastic/Pipeline.py +303 -0
  322. pysisyphus/stocastic/__init__.py +21 -0
  323. pysisyphus/stocastic/align.py +127 -0
  324. pysisyphus/testing.py +96 -0
  325. pysisyphus/thermo.py +156 -0
  326. pysisyphus/trj.py +824 -0
  327. pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
  328. pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
  329. pysisyphus/tsoptimizers/TRIM.py +59 -0
  330. pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
  331. pysisyphus/tsoptimizers/__init__.py +23 -0
  332. pysisyphus/wavefunction/Basis.py +239 -0
  333. pysisyphus/wavefunction/DIIS.py +76 -0
  334. pysisyphus/wavefunction/__init__.py +25 -0
  335. pysisyphus/wavefunction/build_ext.py +42 -0
  336. pysisyphus/wavefunction/cart2sph.py +190 -0
  337. pysisyphus/wavefunction/diabatization.py +304 -0
  338. pysisyphus/wavefunction/excited_states.py +435 -0
  339. pysisyphus/wavefunction/gen_ints.py +1811 -0
  340. pysisyphus/wavefunction/helpers.py +104 -0
  341. pysisyphus/wavefunction/ints/__init__.py +0 -0
  342. pysisyphus/wavefunction/ints/boys.py +193 -0
  343. pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
  344. pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
  345. pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
  346. pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
  347. pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
  348. pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
  349. pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
  350. pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
  351. pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
  352. pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
  353. pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
  354. pysisyphus/wavefunction/localization.py +458 -0
  355. pysisyphus/wavefunction/multipole.py +159 -0
  356. pysisyphus/wavefunction/normalization.py +36 -0
  357. pysisyphus/wavefunction/pop_analysis.py +134 -0
  358. pysisyphus/wavefunction/shells.py +1171 -0
  359. pysisyphus/wavefunction/wavefunction.py +504 -0
  360. pysisyphus/wrapper/__init__.py +11 -0
  361. pysisyphus/wrapper/exceptions.py +2 -0
  362. pysisyphus/wrapper/jmol.py +120 -0
  363. pysisyphus/wrapper/mwfn.py +169 -0
  364. pysisyphus/wrapper/packmol.py +71 -0
  365. pysisyphus/xyzloader.py +168 -0
  366. pysisyphus/yaml_mods.py +45 -0
  367. thermoanalysis/LICENSE +674 -0
  368. thermoanalysis/QCData.py +244 -0
  369. thermoanalysis/__init__.py +0 -0
  370. thermoanalysis/config.py +3 -0
  371. thermoanalysis/constants.py +20 -0
  372. thermoanalysis/thermo.py +1011 -0
mlmm/fix_altloc.py ADDED
@@ -0,0 +1,464 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ fix_altloc.py - Drop alternate locations from PDB files
4
+
5
+ What it does
6
+ ------------
7
+ 1) Blank the PDB altLoc column (column 17, 1-based) with a single space.
8
+ - This is a 1-character replacement (no shifting / no reformatting).
9
+ 2) If the same atom appears multiple times due to alternate locations
10
+ (altLoc like A/B/... or custom labels like H/L),
11
+ keep the "best" one by the default rule:
12
+ - Highest occupancy first
13
+ - If tied (or occupancy missing), keep the earliest one in the file
14
+
15
+ Handled records
16
+ ---------------
17
+ - ATOM / HETATM
18
+ - ANISOU is also handled: ANISOU lines are kept only if the corresponding
19
+ ATOM/HETATM line (same serial) is kept.
20
+
21
+ Notes
22
+ -----
23
+ - Atom serial numbers are NOT renumbered (gaps may remain).
24
+ - CONECT and other connectivity/annotation records are NOT updated.
25
+
26
+ Usage
27
+ -----
28
+ mlmm fix-altloc -i input.pdb -o output.pdb
29
+ mlmm fix-altloc -i ./dir -o ./dir_clean --recursive
30
+ mlmm fix-altloc -i ./dir --inplace --recursive
31
+ """
32
+
33
+ import shutil
34
+ from pathlib import Path
35
+ from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
36
+
37
+ import click
38
+
39
+ COORD_RECORDS = ("ATOM ", "HETATM")
40
+ ANISOU_RECORD = "ANISOU"
41
+
42
+ # PDB fixed columns (0-based Python indices)
43
+ ALTLOC_IDX = 16 # column 17 (1-based)
44
+ SERIAL_SLICE = slice(6, 11) # columns 7-11 (1-based), width 5
45
+ OCC_SLICE = slice(54, 60) # columns 55-60 (1-based), width 6
46
+
47
+
48
+ def split_newline(line: str) -> Tuple[str, str]:
49
+ """Split a line into (core, newline) while preserving the newline exactly."""
50
+ if line.endswith("\r\n"):
51
+ return line[:-2], "\r\n"
52
+ if line.endswith("\n"):
53
+ return line[:-1], "\n"
54
+ if line.endswith("\r"):
55
+ return line[:-1], "\r"
56
+ return line, ""
57
+
58
+
59
+ def ensure_len(core: str, n: int) -> str:
60
+ """Right-pad with spaces to guarantee at least n characters (no shifting)."""
61
+ return core if len(core) >= n else core.ljust(n)
62
+
63
+
64
+ def blank_altloc(line: str) -> str:
65
+ """
66
+ Blank the altLoc field (column 17, 1-based) with a single space.
67
+
68
+ IMPORTANT: This does NOT remove characters; it replaces exactly one character,
69
+ so the fixed-width PDB formatting is preserved.
70
+ """
71
+ core, nl = split_newline(line)
72
+ core = ensure_len(core, ALTLOC_IDX + 1) # make sure core[ALTLOC_IDX] exists
73
+ core = core[:ALTLOC_IDX] + " " + core[ALTLOC_IDX + 1:]
74
+ return core + nl
75
+
76
+
77
+ def atom_serial_5(line: str) -> str:
78
+ """Return the 5-character atom serial field exactly as it appears (cols 7-11)."""
79
+ core, _ = split_newline(line)
80
+ core = ensure_len(core, SERIAL_SLICE.stop)
81
+ return core[SERIAL_SLICE]
82
+
83
+
84
+ def parse_occupancy(line: str) -> Optional[float]:
85
+ """
86
+ Parse occupancy from columns 55-60 (1-based).
87
+ Returns None if missing/unparseable.
88
+ """
89
+ core, _ = split_newline(line)
90
+ core = ensure_len(core, OCC_SLICE.stop)
91
+ s = core[OCC_SLICE].strip()
92
+ if not s:
93
+ return None
94
+ try:
95
+ return float(s)
96
+ except ValueError:
97
+ return None
98
+
99
+
100
+ def atom_identity_key(line: str) -> Tuple[str, str, str, str, str, str, str]:
101
+ """
102
+ Build a key to identify the "same atom" while IGNORING altLoc.
103
+
104
+ Fields used (fixed columns, classic PDB):
105
+ - record name (ATOM/HETATM) cols 1-6
106
+ - atom name cols 13-16
107
+ - residue name cols 18-20
108
+ - chain ID col 22
109
+ - residue sequence number cols 23-26
110
+ - insertion code col 27
111
+ - segID (non-standard, common) cols 73-76
112
+
113
+ segID is included to reduce accidental merging in MD-style PDBs where chain ID may be blank.
114
+ """
115
+ core, _ = split_newline(line)
116
+ core = ensure_len(core, 76)
117
+
118
+ record = core[0:6]
119
+ atom_name = core[12:16]
120
+ res_name = core[17:20]
121
+ chain_id = core[21:22]
122
+ res_seq = core[22:26]
123
+ i_code = core[26:27]
124
+ seg_id = core[72:76] # 73-76 (1-based), optional/non-standard
125
+
126
+ return (record, atom_name, res_name, chain_id, res_seq, i_code, seg_id)
127
+
128
+
129
+ def process_block(lines: List[str]) -> List[str]:
130
+ """
131
+ Two-pass processing for a block (either the whole file if no MODEL,
132
+ or the content between MODEL and ENDMDL):
133
+
134
+ Pass 1: determine the best coordinate line per atom key
135
+ by (occupancy desc, first-appearance asc).
136
+ Pass 2: output only the chosen coordinate lines (altLoc blanked),
137
+ and keep only ANISOU lines whose serial is chosen (altLoc blanked).
138
+ All other records are passed through unchanged.
139
+
140
+ Handling of different atom counts between altLoc states:
141
+ --------------------------------------------------------
142
+ When different altLoc states have different atoms (e.g., altLoc A has
143
+ atoms N,CA,CB,CG while altLoc B has N,CA,CB,CD), this function:
144
+ - For DUPLICATE atoms (same identity key, e.g., N,CA,CB): selects the best
145
+ one based on occupancy
146
+ - For UNIQUE atoms (only in one altLoc, e.g., CG in A, CD in B): keeps ALL
147
+ of them in the output
148
+
149
+ This ensures the output structure contains all unique atoms from all altLoc
150
+ states, with duplicates resolved to the best conformer.
151
+ """
152
+ # key -> (occ_val_for_compare, line_index, serial5)
153
+ best: Dict[Tuple[str, str, str, str, str, str, str], Tuple[float, int, str]] = {}
154
+
155
+ for idx, line in enumerate(lines):
156
+ if line.startswith(COORD_RECORDS):
157
+ key = atom_identity_key(line)
158
+ occ = parse_occupancy(line)
159
+ occ_val = occ if occ is not None else float("-inf")
160
+ serial = atom_serial_5(line)
161
+
162
+ if key not in best:
163
+ best[key] = (occ_val, idx, serial)
164
+ else:
165
+ best_occ, best_idx, _best_serial = best[key]
166
+ # Prefer higher occupancy; if tied, prefer earlier line (smaller idx)
167
+ if (occ_val > best_occ) or (occ_val == best_occ and idx < best_idx):
168
+ best[key] = (occ_val, idx, serial)
169
+
170
+ chosen_serials: Set[str] = set(v[2] for v in best.values())
171
+
172
+ out: List[str] = []
173
+ for idx, line in enumerate(lines):
174
+ if line.startswith(COORD_RECORDS):
175
+ key = atom_identity_key(line)
176
+ # Keep only the selected "best" line for this key
177
+ if key in best and best[key][1] == idx:
178
+ out.append(blank_altloc(line))
179
+ continue
180
+
181
+ if line.startswith(ANISOU_RECORD):
182
+ serial = atom_serial_5(line)
183
+ if serial in chosen_serials:
184
+ out.append(blank_altloc(line))
185
+ continue
186
+
187
+ out.append(line)
188
+
189
+ return out
190
+
191
+
192
+ def process_stream(lines: Iterable[str]) -> Iterator[str]:
193
+ """
194
+ Handle MODEL/ENDMDL blocks:
195
+ - If MODEL records exist, apply the selection independently within each MODEL block.
196
+ - Text outside MODEL blocks is processed as a single block.
197
+ """
198
+ buffer: List[str] = []
199
+ in_model = False
200
+
201
+ for line in lines:
202
+ if line.startswith("MODEL "):
203
+ # Flush anything accumulated before this MODEL
204
+ if buffer:
205
+ for x in process_block(buffer):
206
+ yield x
207
+ buffer = []
208
+ in_model = True
209
+ yield line
210
+ continue
211
+
212
+ if in_model and line.startswith("ENDMDL"):
213
+ # Process the model contents, then emit ENDMDL
214
+ for x in process_block(buffer):
215
+ yield x
216
+ buffer = []
217
+ in_model = False
218
+ yield line
219
+ continue
220
+
221
+ buffer.append(line)
222
+
223
+ # Flush remaining lines at EOF
224
+ if buffer:
225
+ for x in process_block(buffer):
226
+ yield x
227
+
228
+
229
+ def clean_pdb_file(in_path: Path, out_path: Path) -> None:
230
+ """Process a PDB file and write the cleaned output."""
231
+ out_path.parent.mkdir(parents=True, exist_ok=True)
232
+ with in_path.open("r", newline="") as fin, out_path.open("w", newline="") as fout:
233
+ for out_line in process_stream(fin):
234
+ fout.write(out_line)
235
+
236
+
237
+ def collect_pdb_files(input_path: Path, recursive: bool) -> List[Path]:
238
+ """Collect *.pdb files from a file or directory (optionally recursive)."""
239
+ if input_path.is_file():
240
+ return [input_path]
241
+ pattern = "**/*.pdb" if recursive else "*.pdb"
242
+ return sorted([p for p in input_path.glob(pattern) if p.is_file()])
243
+
244
+
245
+ # =============================================================================
246
+ # Public API for programmatic use
247
+ # =============================================================================
248
+
249
+ def has_altloc(pdb_path: Path) -> bool:
250
+ """
251
+ Check if a PDB file contains any non-blank altLoc characters (column 17, 1-based).
252
+
253
+ Returns True if at least one ATOM/HETATM record has a non-space character
254
+ in the altLoc column. Returns False if no altLoc is found.
255
+ """
256
+ try:
257
+ with open(pdb_path, "r", encoding="utf-8", errors="ignore") as fh:
258
+ for line in fh:
259
+ if line.startswith(COORD_RECORDS):
260
+ # altLoc is at column 17 (1-based), which is index 16 (0-based)
261
+ if len(line) > ALTLOC_IDX:
262
+ altloc_char = line[ALTLOC_IDX]
263
+ if altloc_char != " " and altloc_char != "":
264
+ return True
265
+ return False
266
+ except Exception:
267
+ return False
268
+
269
+
270
+ def fix_altloc_file(
271
+ in_path: str | Path,
272
+ out_path: str | Path,
273
+ *,
274
+ overwrite: bool = False,
275
+ skip_if_no_altloc: bool = True,
276
+ ) -> bool:
277
+ """
278
+ Fix alternate locations in a PDB file.
279
+
280
+ Parameters
281
+ ----------
282
+ in_path : str | Path
283
+ Input PDB file path.
284
+ out_path : str | Path
285
+ Output PDB file path.
286
+ overwrite : bool
287
+ If True, overwrite existing output file. Default False.
288
+ skip_if_no_altloc : bool
289
+ If True, skip processing if no altLoc is detected. Default True.
290
+
291
+ Returns
292
+ -------
293
+ bool
294
+ True if the file was processed (altloc found and fixed),
295
+ False if skipped (no altloc detected).
296
+
297
+ Raises
298
+ ------
299
+ FileExistsError
300
+ If output file exists and overwrite=False.
301
+ FileNotFoundError
302
+ If input file does not exist.
303
+ """
304
+ in_path = Path(in_path)
305
+ out_path = Path(out_path)
306
+
307
+ if not in_path.exists():
308
+ raise FileNotFoundError(f"Input file not found: {in_path}")
309
+
310
+ if out_path.exists() and not overwrite:
311
+ raise FileExistsError(f"Output file exists: {out_path}")
312
+
313
+ if skip_if_no_altloc and not has_altloc(in_path):
314
+ return False
315
+
316
+ clean_pdb_file(in_path, out_path)
317
+ return True
318
+
319
+
320
+ # =============================================================================
321
+ # CLI
322
+ # =============================================================================
323
+
324
+ def _run_fix_altloc(
325
+ input_path: Path,
326
+ out: Optional[Path],
327
+ recursive: bool,
328
+ inplace: bool,
329
+ overwrite: bool,
330
+ force: bool,
331
+ ) -> None:
332
+ """Core business logic for fix-altloc (called from Click CLI)."""
333
+ pdb_files = collect_pdb_files(input_path, recursive)
334
+ if not pdb_files:
335
+ raise click.ClickException(f"No .pdb files found in: {input_path}")
336
+
337
+ skip_if_no_altloc = not force
338
+ processed_count = 0
339
+ skipped_count = 0
340
+
341
+ # In-place mode
342
+ if inplace:
343
+ for in_path in pdb_files:
344
+ if skip_if_no_altloc and not has_altloc(in_path):
345
+ skipped_count += 1
346
+ continue
347
+
348
+ bak_path = in_path.with_suffix(in_path.suffix + ".bak")
349
+ if not bak_path.exists():
350
+ shutil.copy2(in_path, bak_path)
351
+
352
+ tmp_path = in_path.with_suffix(in_path.suffix + ".tmp")
353
+ clean_pdb_file(in_path, tmp_path)
354
+ tmp_path.replace(in_path)
355
+ processed_count += 1
356
+
357
+ if processed_count > 0:
358
+ click.echo(f"[fix-altloc] Processed {processed_count} file(s) in-place.")
359
+ if skipped_count > 0:
360
+ click.echo(f"[fix-altloc] Skipped {skipped_count} file(s) (no altLoc detected).")
361
+ return
362
+
363
+ # File input
364
+ if input_path.is_file():
365
+ in_path = input_path
366
+
367
+ if skip_if_no_altloc and not has_altloc(in_path):
368
+ click.echo(f"[fix-altloc] Skipped {in_path} (no altLoc detected).")
369
+ return
370
+
371
+ if out is None:
372
+ out_path = in_path.with_name(in_path.stem + "_clean.pdb")
373
+ else:
374
+ if out.suffix.lower() == ".pdb":
375
+ out_path = out
376
+ else:
377
+ out.mkdir(parents=True, exist_ok=True)
378
+ out_path = out / in_path.name
379
+
380
+ if out_path.exists() and not overwrite:
381
+ raise click.ClickException(f"Output exists: {out_path} (use --overwrite to overwrite)")
382
+
383
+ clean_pdb_file(in_path, out_path)
384
+ click.echo(f"[fix-altloc] Fixed altLoc → {out_path}")
385
+ return
386
+
387
+ # Directory input
388
+ in_dir = input_path
389
+ out_dir = out if out is not None else in_dir.with_name(in_dir.name + "_clean")
390
+ out_dir.mkdir(parents=True, exist_ok=True)
391
+
392
+ for in_path in pdb_files:
393
+ if skip_if_no_altloc and not has_altloc(in_path):
394
+ skipped_count += 1
395
+ continue
396
+
397
+ rel = in_path.relative_to(in_dir)
398
+ out_path = out_dir / rel
399
+
400
+ if out_path.exists() and not overwrite:
401
+ raise click.ClickException(f"Output exists: {out_path} (use --overwrite to overwrite)")
402
+
403
+ clean_pdb_file(in_path, out_path)
404
+ processed_count += 1
405
+
406
+ if processed_count > 0:
407
+ click.echo(f"[fix-altloc] Processed {processed_count} file(s) → {out_dir}")
408
+ if skipped_count > 0:
409
+ click.echo(f"[fix-altloc] Skipped {skipped_count} file(s) (no altLoc detected).")
410
+
411
+
412
+ @click.command(
413
+ name="fix-altloc",
414
+ help=(
415
+ "Blank PDB altLoc column (col 17) without shifting, and keep one altLoc "
416
+ "per atom by default rule: highest occupancy, then earliest appearance."
417
+ ),
418
+ context_settings={"help_option_names": ["-h", "--help"]},
419
+ )
420
+ @click.option(
421
+ "-i", "--input", "input_path",
422
+ type=click.Path(exists=True, path_type=Path),
423
+ required=True,
424
+ help="Input PDB file or directory.",
425
+ )
426
+ @click.option(
427
+ "-o", "--out",
428
+ type=click.Path(path_type=Path),
429
+ default=None,
430
+ help="Output file (if input is a file) or output directory (if input is a directory).",
431
+ )
432
+ @click.option(
433
+ "--recursive/--no-recursive",
434
+ default=False, show_default=True,
435
+ help="When input is a directory, process *.pdb recursively (including subdirectories).",
436
+ )
437
+ @click.option(
438
+ "--inplace/--no-inplace",
439
+ default=False, show_default=True,
440
+ help="Overwrite input file(s) in place (creates .bak next to each file).",
441
+ )
442
+ @click.option(
443
+ "--overwrite/--no-overwrite",
444
+ default=False, show_default=True,
445
+ help="Allow overwriting existing output files.",
446
+ )
447
+ @click.option(
448
+ "--force/--no-force",
449
+ default=False, show_default=True,
450
+ help="Process files even if no altLoc is detected (default: skip files without altLoc).",
451
+ )
452
+ def cli(
453
+ input_path: Path,
454
+ out: Optional[Path],
455
+ recursive: bool,
456
+ inplace: bool,
457
+ overwrite: bool,
458
+ force: bool,
459
+ ) -> None:
460
+ _run_fix_altloc(input_path, out, recursive, inplace, overwrite, force)
461
+
462
+
463
+ if __name__ == "__main__":
464
+ cli()