mlmm-toolkit 0.2.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hessian_ff/__init__.py +50 -0
- hessian_ff/analytical_hessian.py +609 -0
- hessian_ff/constants.py +46 -0
- hessian_ff/forcefield.py +339 -0
- hessian_ff/loaders.py +608 -0
- hessian_ff/native/Makefile +8 -0
- hessian_ff/native/__init__.py +28 -0
- hessian_ff/native/analytical_hessian.py +88 -0
- hessian_ff/native/analytical_hessian_ext.cpp +258 -0
- hessian_ff/native/bonded.py +82 -0
- hessian_ff/native/bonded_ext.cpp +640 -0
- hessian_ff/native/loader.py +349 -0
- hessian_ff/native/nonbonded.py +118 -0
- hessian_ff/native/nonbonded_ext.cpp +1150 -0
- hessian_ff/prmtop_parmed.py +23 -0
- hessian_ff/system.py +107 -0
- hessian_ff/terms/__init__.py +14 -0
- hessian_ff/terms/angle.py +73 -0
- hessian_ff/terms/bond.py +44 -0
- hessian_ff/terms/cmap.py +406 -0
- hessian_ff/terms/dihedral.py +141 -0
- hessian_ff/terms/nonbonded.py +209 -0
- hessian_ff/tests/__init__.py +0 -0
- hessian_ff/tests/conftest.py +75 -0
- hessian_ff/tests/data/small/complex.parm7 +1346 -0
- hessian_ff/tests/data/small/complex.pdb +125 -0
- hessian_ff/tests/data/small/complex.rst7 +63 -0
- hessian_ff/tests/test_coords_input.py +44 -0
- hessian_ff/tests/test_energy_force.py +49 -0
- hessian_ff/tests/test_hessian.py +137 -0
- hessian_ff/tests/test_smoke.py +18 -0
- hessian_ff/tests/test_validation.py +40 -0
- hessian_ff/workflows.py +889 -0
- mlmm/__init__.py +36 -0
- mlmm/__main__.py +7 -0
- mlmm/_version.py +34 -0
- mlmm/add_elem_info.py +374 -0
- mlmm/advanced_help.py +91 -0
- mlmm/align_freeze_atoms.py +601 -0
- mlmm/all.py +3535 -0
- mlmm/bond_changes.py +231 -0
- mlmm/bool_compat.py +223 -0
- mlmm/cli.py +574 -0
- mlmm/cli_utils.py +166 -0
- mlmm/default_group.py +337 -0
- mlmm/defaults.py +467 -0
- mlmm/define_layer.py +526 -0
- mlmm/dft.py +1041 -0
- mlmm/energy_diagram.py +253 -0
- mlmm/extract.py +2213 -0
- mlmm/fix_altloc.py +464 -0
- mlmm/freq.py +1406 -0
- mlmm/harmonic_constraints.py +140 -0
- mlmm/hessian_cache.py +44 -0
- mlmm/hessian_calc.py +174 -0
- mlmm/irc.py +638 -0
- mlmm/mlmm_calc.py +2262 -0
- mlmm/mm_parm.py +945 -0
- mlmm/oniom_export.py +1983 -0
- mlmm/oniom_import.py +457 -0
- mlmm/opt.py +1742 -0
- mlmm/path_opt.py +1353 -0
- mlmm/path_search.py +2299 -0
- mlmm/preflight.py +88 -0
- mlmm/py.typed +1 -0
- mlmm/pysis_runner.py +45 -0
- mlmm/scan.py +1047 -0
- mlmm/scan2d.py +1226 -0
- mlmm/scan3d.py +1265 -0
- mlmm/scan_common.py +184 -0
- mlmm/summary_log.py +736 -0
- mlmm/trj2fig.py +448 -0
- mlmm/tsopt.py +2871 -0
- mlmm/utils.py +2309 -0
- mlmm/xtb_embedcharge_correction.py +475 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/METADATA +1159 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/RECORD +372 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/WHEEL +5 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/entry_points.txt +2 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/licenses/LICENSE +674 -0
- mlmm_toolkit-0.2.2.dev0.dist-info/top_level.txt +4 -0
- pysisyphus/Geometry.py +1667 -0
- pysisyphus/LICENSE +674 -0
- pysisyphus/TableFormatter.py +63 -0
- pysisyphus/TablePrinter.py +74 -0
- pysisyphus/__init__.py +12 -0
- pysisyphus/calculators/AFIR.py +452 -0
- pysisyphus/calculators/AnaPot.py +20 -0
- pysisyphus/calculators/AnaPot2.py +48 -0
- pysisyphus/calculators/AnaPot3.py +12 -0
- pysisyphus/calculators/AnaPot4.py +20 -0
- pysisyphus/calculators/AnaPotBase.py +337 -0
- pysisyphus/calculators/AnaPotCBM.py +25 -0
- pysisyphus/calculators/AtomAtomTransTorque.py +154 -0
- pysisyphus/calculators/CFOUR.py +250 -0
- pysisyphus/calculators/Calculator.py +844 -0
- pysisyphus/calculators/CerjanMiller.py +24 -0
- pysisyphus/calculators/Composite.py +123 -0
- pysisyphus/calculators/ConicalIntersection.py +171 -0
- pysisyphus/calculators/DFTBp.py +430 -0
- pysisyphus/calculators/DFTD3.py +66 -0
- pysisyphus/calculators/DFTD4.py +84 -0
- pysisyphus/calculators/Dalton.py +61 -0
- pysisyphus/calculators/Dimer.py +681 -0
- pysisyphus/calculators/Dummy.py +20 -0
- pysisyphus/calculators/EGO.py +76 -0
- pysisyphus/calculators/EnergyMin.py +224 -0
- pysisyphus/calculators/ExternalPotential.py +264 -0
- pysisyphus/calculators/FakeASE.py +35 -0
- pysisyphus/calculators/FourWellAnaPot.py +28 -0
- pysisyphus/calculators/FreeEndNEBPot.py +39 -0
- pysisyphus/calculators/Gaussian09.py +18 -0
- pysisyphus/calculators/Gaussian16.py +726 -0
- pysisyphus/calculators/HardSphere.py +159 -0
- pysisyphus/calculators/IDPPCalculator.py +49 -0
- pysisyphus/calculators/IPIClient.py +133 -0
- pysisyphus/calculators/IPIServer.py +234 -0
- pysisyphus/calculators/LEPSBase.py +24 -0
- pysisyphus/calculators/LEPSExpr.py +139 -0
- pysisyphus/calculators/LennardJones.py +80 -0
- pysisyphus/calculators/MOPAC.py +219 -0
- pysisyphus/calculators/MullerBrownSympyPot.py +51 -0
- pysisyphus/calculators/MultiCalc.py +85 -0
- pysisyphus/calculators/NFK.py +45 -0
- pysisyphus/calculators/OBabel.py +87 -0
- pysisyphus/calculators/ONIOMv2.py +1129 -0
- pysisyphus/calculators/ORCA.py +893 -0
- pysisyphus/calculators/ORCA5.py +6 -0
- pysisyphus/calculators/OpenMM.py +88 -0
- pysisyphus/calculators/OpenMolcas.py +281 -0
- pysisyphus/calculators/OverlapCalculator.py +908 -0
- pysisyphus/calculators/Psi4.py +218 -0
- pysisyphus/calculators/PyPsi4.py +37 -0
- pysisyphus/calculators/PySCF.py +341 -0
- pysisyphus/calculators/PyXTB.py +73 -0
- pysisyphus/calculators/QCEngine.py +106 -0
- pysisyphus/calculators/Rastrigin.py +22 -0
- pysisyphus/calculators/Remote.py +76 -0
- pysisyphus/calculators/Rosenbrock.py +15 -0
- pysisyphus/calculators/SocketCalc.py +97 -0
- pysisyphus/calculators/TIP3P.py +111 -0
- pysisyphus/calculators/TransTorque.py +161 -0
- pysisyphus/calculators/Turbomole.py +965 -0
- pysisyphus/calculators/VRIPot.py +37 -0
- pysisyphus/calculators/WFOWrapper.py +333 -0
- pysisyphus/calculators/WFOWrapper2.py +341 -0
- pysisyphus/calculators/XTB.py +418 -0
- pysisyphus/calculators/__init__.py +81 -0
- pysisyphus/calculators/cosmo_data.py +139 -0
- pysisyphus/calculators/parser.py +150 -0
- pysisyphus/color.py +19 -0
- pysisyphus/config.py +133 -0
- pysisyphus/constants.py +65 -0
- pysisyphus/cos/AdaptiveNEB.py +230 -0
- pysisyphus/cos/ChainOfStates.py +725 -0
- pysisyphus/cos/FreeEndNEB.py +25 -0
- pysisyphus/cos/FreezingString.py +103 -0
- pysisyphus/cos/GrowingChainOfStates.py +71 -0
- pysisyphus/cos/GrowingNT.py +309 -0
- pysisyphus/cos/GrowingString.py +508 -0
- pysisyphus/cos/NEB.py +189 -0
- pysisyphus/cos/SimpleZTS.py +64 -0
- pysisyphus/cos/__init__.py +22 -0
- pysisyphus/cos/stiffness.py +199 -0
- pysisyphus/drivers/__init__.py +17 -0
- pysisyphus/drivers/afir.py +855 -0
- pysisyphus/drivers/barriers.py +271 -0
- pysisyphus/drivers/birkholz.py +138 -0
- pysisyphus/drivers/cluster.py +318 -0
- pysisyphus/drivers/diabatization.py +133 -0
- pysisyphus/drivers/merge.py +368 -0
- pysisyphus/drivers/merge_mol2.py +322 -0
- pysisyphus/drivers/opt.py +375 -0
- pysisyphus/drivers/perf.py +91 -0
- pysisyphus/drivers/pka.py +52 -0
- pysisyphus/drivers/precon_pos_rot.py +669 -0
- pysisyphus/drivers/rates.py +480 -0
- pysisyphus/drivers/replace.py +219 -0
- pysisyphus/drivers/scan.py +212 -0
- pysisyphus/drivers/spectrum.py +166 -0
- pysisyphus/drivers/thermo.py +31 -0
- pysisyphus/dynamics/Gaussian.py +103 -0
- pysisyphus/dynamics/__init__.py +20 -0
- pysisyphus/dynamics/colvars.py +136 -0
- pysisyphus/dynamics/driver.py +297 -0
- pysisyphus/dynamics/helpers.py +256 -0
- pysisyphus/dynamics/lincs.py +105 -0
- pysisyphus/dynamics/mdp.py +364 -0
- pysisyphus/dynamics/rattle.py +121 -0
- pysisyphus/dynamics/thermostats.py +128 -0
- pysisyphus/dynamics/wigner.py +266 -0
- pysisyphus/elem_data.py +3473 -0
- pysisyphus/exceptions.py +2 -0
- pysisyphus/filtertrj.py +69 -0
- pysisyphus/helpers.py +623 -0
- pysisyphus/helpers_pure.py +649 -0
- pysisyphus/init_logging.py +50 -0
- pysisyphus/intcoords/Bend.py +69 -0
- pysisyphus/intcoords/Bend2.py +25 -0
- pysisyphus/intcoords/BondedFragment.py +32 -0
- pysisyphus/intcoords/Cartesian.py +41 -0
- pysisyphus/intcoords/CartesianCoords.py +140 -0
- pysisyphus/intcoords/Coords.py +56 -0
- pysisyphus/intcoords/DLC.py +197 -0
- pysisyphus/intcoords/DistanceFunction.py +34 -0
- pysisyphus/intcoords/DummyImproper.py +70 -0
- pysisyphus/intcoords/DummyTorsion.py +72 -0
- pysisyphus/intcoords/LinearBend.py +105 -0
- pysisyphus/intcoords/LinearDisplacement.py +80 -0
- pysisyphus/intcoords/OutOfPlane.py +59 -0
- pysisyphus/intcoords/PrimTypes.py +286 -0
- pysisyphus/intcoords/Primitive.py +137 -0
- pysisyphus/intcoords/RedundantCoords.py +659 -0
- pysisyphus/intcoords/RobustTorsion.py +59 -0
- pysisyphus/intcoords/Rotation.py +147 -0
- pysisyphus/intcoords/Stretch.py +31 -0
- pysisyphus/intcoords/Torsion.py +101 -0
- pysisyphus/intcoords/Torsion2.py +25 -0
- pysisyphus/intcoords/Translation.py +45 -0
- pysisyphus/intcoords/__init__.py +61 -0
- pysisyphus/intcoords/augment_bonds.py +126 -0
- pysisyphus/intcoords/derivatives.py +10512 -0
- pysisyphus/intcoords/eval.py +80 -0
- pysisyphus/intcoords/exceptions.py +37 -0
- pysisyphus/intcoords/findiffs.py +48 -0
- pysisyphus/intcoords/generate_derivatives.py +414 -0
- pysisyphus/intcoords/helpers.py +235 -0
- pysisyphus/intcoords/logging_conf.py +10 -0
- pysisyphus/intcoords/mp_derivatives.py +10836 -0
- pysisyphus/intcoords/setup.py +962 -0
- pysisyphus/intcoords/setup_fast.py +176 -0
- pysisyphus/intcoords/update.py +272 -0
- pysisyphus/intcoords/valid.py +89 -0
- pysisyphus/interpolate/Geodesic.py +93 -0
- pysisyphus/interpolate/IDPP.py +55 -0
- pysisyphus/interpolate/Interpolator.py +116 -0
- pysisyphus/interpolate/LST.py +70 -0
- pysisyphus/interpolate/Redund.py +152 -0
- pysisyphus/interpolate/__init__.py +9 -0
- pysisyphus/interpolate/helpers.py +34 -0
- pysisyphus/io/__init__.py +22 -0
- pysisyphus/io/aomix.py +178 -0
- pysisyphus/io/cjson.py +24 -0
- pysisyphus/io/crd.py +101 -0
- pysisyphus/io/cube.py +220 -0
- pysisyphus/io/fchk.py +184 -0
- pysisyphus/io/hdf5.py +49 -0
- pysisyphus/io/hessian.py +72 -0
- pysisyphus/io/mol2.py +146 -0
- pysisyphus/io/molden.py +293 -0
- pysisyphus/io/orca.py +189 -0
- pysisyphus/io/pdb.py +269 -0
- pysisyphus/io/psf.py +79 -0
- pysisyphus/io/pubchem.py +31 -0
- pysisyphus/io/qcschema.py +34 -0
- pysisyphus/io/sdf.py +29 -0
- pysisyphus/io/xyz.py +61 -0
- pysisyphus/io/zmat.py +175 -0
- pysisyphus/irc/DWI.py +108 -0
- pysisyphus/irc/DampedVelocityVerlet.py +134 -0
- pysisyphus/irc/Euler.py +22 -0
- pysisyphus/irc/EulerPC.py +345 -0
- pysisyphus/irc/GonzalezSchlegel.py +187 -0
- pysisyphus/irc/IMKMod.py +164 -0
- pysisyphus/irc/IRC.py +878 -0
- pysisyphus/irc/IRCDummy.py +10 -0
- pysisyphus/irc/Instanton.py +307 -0
- pysisyphus/irc/LQA.py +53 -0
- pysisyphus/irc/ModeKill.py +136 -0
- pysisyphus/irc/ParamPlot.py +53 -0
- pysisyphus/irc/RK4.py +36 -0
- pysisyphus/irc/__init__.py +31 -0
- pysisyphus/irc/initial_displ.py +219 -0
- pysisyphus/linalg.py +411 -0
- pysisyphus/line_searches/Backtracking.py +88 -0
- pysisyphus/line_searches/HagerZhang.py +184 -0
- pysisyphus/line_searches/LineSearch.py +232 -0
- pysisyphus/line_searches/StrongWolfe.py +108 -0
- pysisyphus/line_searches/__init__.py +9 -0
- pysisyphus/line_searches/interpol.py +15 -0
- pysisyphus/modefollow/NormalMode.py +40 -0
- pysisyphus/modefollow/__init__.py +10 -0
- pysisyphus/modefollow/davidson.py +199 -0
- pysisyphus/modefollow/lanczos.py +95 -0
- pysisyphus/optimizers/BFGS.py +99 -0
- pysisyphus/optimizers/BacktrackingOptimizer.py +113 -0
- pysisyphus/optimizers/ConjugateGradient.py +98 -0
- pysisyphus/optimizers/CubicNewton.py +75 -0
- pysisyphus/optimizers/FIRE.py +113 -0
- pysisyphus/optimizers/HessianOptimizer.py +1176 -0
- pysisyphus/optimizers/LBFGS.py +228 -0
- pysisyphus/optimizers/LayerOpt.py +411 -0
- pysisyphus/optimizers/MicroOptimizer.py +169 -0
- pysisyphus/optimizers/NCOptimizer.py +90 -0
- pysisyphus/optimizers/Optimizer.py +1084 -0
- pysisyphus/optimizers/PreconLBFGS.py +260 -0
- pysisyphus/optimizers/PreconSteepestDescent.py +7 -0
- pysisyphus/optimizers/QuickMin.py +74 -0
- pysisyphus/optimizers/RFOptimizer.py +181 -0
- pysisyphus/optimizers/RSA.py +99 -0
- pysisyphus/optimizers/StabilizedQNMethod.py +248 -0
- pysisyphus/optimizers/SteepestDescent.py +23 -0
- pysisyphus/optimizers/StringOptimizer.py +173 -0
- pysisyphus/optimizers/__init__.py +41 -0
- pysisyphus/optimizers/closures.py +301 -0
- pysisyphus/optimizers/cls_map.py +58 -0
- pysisyphus/optimizers/exceptions.py +6 -0
- pysisyphus/optimizers/gdiis.py +280 -0
- pysisyphus/optimizers/guess_hessians.py +311 -0
- pysisyphus/optimizers/hessian_updates.py +355 -0
- pysisyphus/optimizers/poly_fit.py +285 -0
- pysisyphus/optimizers/precon.py +153 -0
- pysisyphus/optimizers/restrict_step.py +24 -0
- pysisyphus/pack.py +172 -0
- pysisyphus/peakdetect.py +948 -0
- pysisyphus/plot.py +1031 -0
- pysisyphus/run.py +2106 -0
- pysisyphus/socket_helper.py +74 -0
- pysisyphus/stocastic/FragmentKick.py +132 -0
- pysisyphus/stocastic/Kick.py +81 -0
- pysisyphus/stocastic/Pipeline.py +303 -0
- pysisyphus/stocastic/__init__.py +21 -0
- pysisyphus/stocastic/align.py +127 -0
- pysisyphus/testing.py +96 -0
- pysisyphus/thermo.py +156 -0
- pysisyphus/trj.py +824 -0
- pysisyphus/tsoptimizers/RSIRFOptimizer.py +56 -0
- pysisyphus/tsoptimizers/RSPRFOptimizer.py +182 -0
- pysisyphus/tsoptimizers/TRIM.py +59 -0
- pysisyphus/tsoptimizers/TSHessianOptimizer.py +463 -0
- pysisyphus/tsoptimizers/__init__.py +23 -0
- pysisyphus/wavefunction/Basis.py +239 -0
- pysisyphus/wavefunction/DIIS.py +76 -0
- pysisyphus/wavefunction/__init__.py +25 -0
- pysisyphus/wavefunction/build_ext.py +42 -0
- pysisyphus/wavefunction/cart2sph.py +190 -0
- pysisyphus/wavefunction/diabatization.py +304 -0
- pysisyphus/wavefunction/excited_states.py +435 -0
- pysisyphus/wavefunction/gen_ints.py +1811 -0
- pysisyphus/wavefunction/helpers.py +104 -0
- pysisyphus/wavefunction/ints/__init__.py +0 -0
- pysisyphus/wavefunction/ints/boys.py +193 -0
- pysisyphus/wavefunction/ints/boys_table_N_64_xasym_27.1_step_0.01.npy +0 -0
- pysisyphus/wavefunction/ints/cart_gto3d.py +176 -0
- pysisyphus/wavefunction/ints/coulomb3d.py +25928 -0
- pysisyphus/wavefunction/ints/diag_quadrupole3d.py +10036 -0
- pysisyphus/wavefunction/ints/dipole3d.py +8762 -0
- pysisyphus/wavefunction/ints/int2c2e3d.py +7198 -0
- pysisyphus/wavefunction/ints/int3c2e3d_sph.py +65040 -0
- pysisyphus/wavefunction/ints/kinetic3d.py +8240 -0
- pysisyphus/wavefunction/ints/ovlp3d.py +3777 -0
- pysisyphus/wavefunction/ints/quadrupole3d.py +15054 -0
- pysisyphus/wavefunction/ints/self_ovlp3d.py +198 -0
- pysisyphus/wavefunction/localization.py +458 -0
- pysisyphus/wavefunction/multipole.py +159 -0
- pysisyphus/wavefunction/normalization.py +36 -0
- pysisyphus/wavefunction/pop_analysis.py +134 -0
- pysisyphus/wavefunction/shells.py +1171 -0
- pysisyphus/wavefunction/wavefunction.py +504 -0
- pysisyphus/wrapper/__init__.py +11 -0
- pysisyphus/wrapper/exceptions.py +2 -0
- pysisyphus/wrapper/jmol.py +120 -0
- pysisyphus/wrapper/mwfn.py +169 -0
- pysisyphus/wrapper/packmol.py +71 -0
- pysisyphus/xyzloader.py +168 -0
- pysisyphus/yaml_mods.py +45 -0
- thermoanalysis/LICENSE +674 -0
- thermoanalysis/QCData.py +244 -0
- thermoanalysis/__init__.py +0 -0
- thermoanalysis/config.py +3 -0
- thermoanalysis/constants.py +20 -0
- thermoanalysis/thermo.py +1011 -0
mlmm/fix_altloc.py
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
fix_altloc.py - Drop alternate locations from PDB files
|
|
4
|
+
|
|
5
|
+
What it does
|
|
6
|
+
------------
|
|
7
|
+
1) Blank the PDB altLoc column (column 17, 1-based) with a single space.
|
|
8
|
+
- This is a 1-character replacement (no shifting / no reformatting).
|
|
9
|
+
2) If the same atom appears multiple times due to alternate locations
|
|
10
|
+
(altLoc like A/B/... or custom labels like H/L),
|
|
11
|
+
keep the "best" one by the default rule:
|
|
12
|
+
- Highest occupancy first
|
|
13
|
+
- If tied (or occupancy missing), keep the earliest one in the file
|
|
14
|
+
|
|
15
|
+
Handled records
|
|
16
|
+
---------------
|
|
17
|
+
- ATOM / HETATM
|
|
18
|
+
- ANISOU is also handled: ANISOU lines are kept only if the corresponding
|
|
19
|
+
ATOM/HETATM line (same serial) is kept.
|
|
20
|
+
|
|
21
|
+
Notes
|
|
22
|
+
-----
|
|
23
|
+
- Atom serial numbers are NOT renumbered (gaps may remain).
|
|
24
|
+
- CONECT and other connectivity/annotation records are NOT updated.
|
|
25
|
+
|
|
26
|
+
Usage
|
|
27
|
+
-----
|
|
28
|
+
mlmm fix-altloc -i input.pdb -o output.pdb
|
|
29
|
+
mlmm fix-altloc -i ./dir -o ./dir_clean --recursive
|
|
30
|
+
mlmm fix-altloc -i ./dir --inplace --recursive
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import shutil
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
|
|
36
|
+
|
|
37
|
+
import click
|
|
38
|
+
|
|
39
|
+
COORD_RECORDS = ("ATOM ", "HETATM")
|
|
40
|
+
ANISOU_RECORD = "ANISOU"
|
|
41
|
+
|
|
42
|
+
# PDB fixed columns (0-based Python indices)
|
|
43
|
+
ALTLOC_IDX = 16 # column 17 (1-based)
|
|
44
|
+
SERIAL_SLICE = slice(6, 11) # columns 7-11 (1-based), width 5
|
|
45
|
+
OCC_SLICE = slice(54, 60) # columns 55-60 (1-based), width 6
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def split_newline(line: str) -> Tuple[str, str]:
|
|
49
|
+
"""Split a line into (core, newline) while preserving the newline exactly."""
|
|
50
|
+
if line.endswith("\r\n"):
|
|
51
|
+
return line[:-2], "\r\n"
|
|
52
|
+
if line.endswith("\n"):
|
|
53
|
+
return line[:-1], "\n"
|
|
54
|
+
if line.endswith("\r"):
|
|
55
|
+
return line[:-1], "\r"
|
|
56
|
+
return line, ""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def ensure_len(core: str, n: int) -> str:
|
|
60
|
+
"""Right-pad with spaces to guarantee at least n characters (no shifting)."""
|
|
61
|
+
return core if len(core) >= n else core.ljust(n)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def blank_altloc(line: str) -> str:
|
|
65
|
+
"""
|
|
66
|
+
Blank the altLoc field (column 17, 1-based) with a single space.
|
|
67
|
+
|
|
68
|
+
IMPORTANT: This does NOT remove characters; it replaces exactly one character,
|
|
69
|
+
so the fixed-width PDB formatting is preserved.
|
|
70
|
+
"""
|
|
71
|
+
core, nl = split_newline(line)
|
|
72
|
+
core = ensure_len(core, ALTLOC_IDX + 1) # make sure core[ALTLOC_IDX] exists
|
|
73
|
+
core = core[:ALTLOC_IDX] + " " + core[ALTLOC_IDX + 1:]
|
|
74
|
+
return core + nl
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def atom_serial_5(line: str) -> str:
|
|
78
|
+
"""Return the 5-character atom serial field exactly as it appears (cols 7-11)."""
|
|
79
|
+
core, _ = split_newline(line)
|
|
80
|
+
core = ensure_len(core, SERIAL_SLICE.stop)
|
|
81
|
+
return core[SERIAL_SLICE]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def parse_occupancy(line: str) -> Optional[float]:
|
|
85
|
+
"""
|
|
86
|
+
Parse occupancy from columns 55-60 (1-based).
|
|
87
|
+
Returns None if missing/unparseable.
|
|
88
|
+
"""
|
|
89
|
+
core, _ = split_newline(line)
|
|
90
|
+
core = ensure_len(core, OCC_SLICE.stop)
|
|
91
|
+
s = core[OCC_SLICE].strip()
|
|
92
|
+
if not s:
|
|
93
|
+
return None
|
|
94
|
+
try:
|
|
95
|
+
return float(s)
|
|
96
|
+
except ValueError:
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def atom_identity_key(line: str) -> Tuple[str, str, str, str, str, str, str]:
|
|
101
|
+
"""
|
|
102
|
+
Build a key to identify the "same atom" while IGNORING altLoc.
|
|
103
|
+
|
|
104
|
+
Fields used (fixed columns, classic PDB):
|
|
105
|
+
- record name (ATOM/HETATM) cols 1-6
|
|
106
|
+
- atom name cols 13-16
|
|
107
|
+
- residue name cols 18-20
|
|
108
|
+
- chain ID col 22
|
|
109
|
+
- residue sequence number cols 23-26
|
|
110
|
+
- insertion code col 27
|
|
111
|
+
- segID (non-standard, common) cols 73-76
|
|
112
|
+
|
|
113
|
+
segID is included to reduce accidental merging in MD-style PDBs where chain ID may be blank.
|
|
114
|
+
"""
|
|
115
|
+
core, _ = split_newline(line)
|
|
116
|
+
core = ensure_len(core, 76)
|
|
117
|
+
|
|
118
|
+
record = core[0:6]
|
|
119
|
+
atom_name = core[12:16]
|
|
120
|
+
res_name = core[17:20]
|
|
121
|
+
chain_id = core[21:22]
|
|
122
|
+
res_seq = core[22:26]
|
|
123
|
+
i_code = core[26:27]
|
|
124
|
+
seg_id = core[72:76] # 73-76 (1-based), optional/non-standard
|
|
125
|
+
|
|
126
|
+
return (record, atom_name, res_name, chain_id, res_seq, i_code, seg_id)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def process_block(lines: List[str]) -> List[str]:
|
|
130
|
+
"""
|
|
131
|
+
Two-pass processing for a block (either the whole file if no MODEL,
|
|
132
|
+
or the content between MODEL and ENDMDL):
|
|
133
|
+
|
|
134
|
+
Pass 1: determine the best coordinate line per atom key
|
|
135
|
+
by (occupancy desc, first-appearance asc).
|
|
136
|
+
Pass 2: output only the chosen coordinate lines (altLoc blanked),
|
|
137
|
+
and keep only ANISOU lines whose serial is chosen (altLoc blanked).
|
|
138
|
+
All other records are passed through unchanged.
|
|
139
|
+
|
|
140
|
+
Handling of different atom counts between altLoc states:
|
|
141
|
+
--------------------------------------------------------
|
|
142
|
+
When different altLoc states have different atoms (e.g., altLoc A has
|
|
143
|
+
atoms N,CA,CB,CG while altLoc B has N,CA,CB,CD), this function:
|
|
144
|
+
- For DUPLICATE atoms (same identity key, e.g., N,CA,CB): selects the best
|
|
145
|
+
one based on occupancy
|
|
146
|
+
- For UNIQUE atoms (only in one altLoc, e.g., CG in A, CD in B): keeps ALL
|
|
147
|
+
of them in the output
|
|
148
|
+
|
|
149
|
+
This ensures the output structure contains all unique atoms from all altLoc
|
|
150
|
+
states, with duplicates resolved to the best conformer.
|
|
151
|
+
"""
|
|
152
|
+
# key -> (occ_val_for_compare, line_index, serial5)
|
|
153
|
+
best: Dict[Tuple[str, str, str, str, str, str, str], Tuple[float, int, str]] = {}
|
|
154
|
+
|
|
155
|
+
for idx, line in enumerate(lines):
|
|
156
|
+
if line.startswith(COORD_RECORDS):
|
|
157
|
+
key = atom_identity_key(line)
|
|
158
|
+
occ = parse_occupancy(line)
|
|
159
|
+
occ_val = occ if occ is not None else float("-inf")
|
|
160
|
+
serial = atom_serial_5(line)
|
|
161
|
+
|
|
162
|
+
if key not in best:
|
|
163
|
+
best[key] = (occ_val, idx, serial)
|
|
164
|
+
else:
|
|
165
|
+
best_occ, best_idx, _best_serial = best[key]
|
|
166
|
+
# Prefer higher occupancy; if tied, prefer earlier line (smaller idx)
|
|
167
|
+
if (occ_val > best_occ) or (occ_val == best_occ and idx < best_idx):
|
|
168
|
+
best[key] = (occ_val, idx, serial)
|
|
169
|
+
|
|
170
|
+
chosen_serials: Set[str] = set(v[2] for v in best.values())
|
|
171
|
+
|
|
172
|
+
out: List[str] = []
|
|
173
|
+
for idx, line in enumerate(lines):
|
|
174
|
+
if line.startswith(COORD_RECORDS):
|
|
175
|
+
key = atom_identity_key(line)
|
|
176
|
+
# Keep only the selected "best" line for this key
|
|
177
|
+
if key in best and best[key][1] == idx:
|
|
178
|
+
out.append(blank_altloc(line))
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
if line.startswith(ANISOU_RECORD):
|
|
182
|
+
serial = atom_serial_5(line)
|
|
183
|
+
if serial in chosen_serials:
|
|
184
|
+
out.append(blank_altloc(line))
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
out.append(line)
|
|
188
|
+
|
|
189
|
+
return out
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def process_stream(lines: Iterable[str]) -> Iterator[str]:
|
|
193
|
+
"""
|
|
194
|
+
Handle MODEL/ENDMDL blocks:
|
|
195
|
+
- If MODEL records exist, apply the selection independently within each MODEL block.
|
|
196
|
+
- Text outside MODEL blocks is processed as a single block.
|
|
197
|
+
"""
|
|
198
|
+
buffer: List[str] = []
|
|
199
|
+
in_model = False
|
|
200
|
+
|
|
201
|
+
for line in lines:
|
|
202
|
+
if line.startswith("MODEL "):
|
|
203
|
+
# Flush anything accumulated before this MODEL
|
|
204
|
+
if buffer:
|
|
205
|
+
for x in process_block(buffer):
|
|
206
|
+
yield x
|
|
207
|
+
buffer = []
|
|
208
|
+
in_model = True
|
|
209
|
+
yield line
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
if in_model and line.startswith("ENDMDL"):
|
|
213
|
+
# Process the model contents, then emit ENDMDL
|
|
214
|
+
for x in process_block(buffer):
|
|
215
|
+
yield x
|
|
216
|
+
buffer = []
|
|
217
|
+
in_model = False
|
|
218
|
+
yield line
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
buffer.append(line)
|
|
222
|
+
|
|
223
|
+
# Flush remaining lines at EOF
|
|
224
|
+
if buffer:
|
|
225
|
+
for x in process_block(buffer):
|
|
226
|
+
yield x
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def clean_pdb_file(in_path: Path, out_path: Path) -> None:
|
|
230
|
+
"""Process a PDB file and write the cleaned output."""
|
|
231
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
with in_path.open("r", newline="") as fin, out_path.open("w", newline="") as fout:
|
|
233
|
+
for out_line in process_stream(fin):
|
|
234
|
+
fout.write(out_line)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def collect_pdb_files(input_path: Path, recursive: bool) -> List[Path]:
|
|
238
|
+
"""Collect *.pdb files from a file or directory (optionally recursive)."""
|
|
239
|
+
if input_path.is_file():
|
|
240
|
+
return [input_path]
|
|
241
|
+
pattern = "**/*.pdb" if recursive else "*.pdb"
|
|
242
|
+
return sorted([p for p in input_path.glob(pattern) if p.is_file()])
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
# =============================================================================
|
|
246
|
+
# Public API for programmatic use
|
|
247
|
+
# =============================================================================
|
|
248
|
+
|
|
249
|
+
def has_altloc(pdb_path: Path) -> bool:
|
|
250
|
+
"""
|
|
251
|
+
Check if a PDB file contains any non-blank altLoc characters (column 17, 1-based).
|
|
252
|
+
|
|
253
|
+
Returns True if at least one ATOM/HETATM record has a non-space character
|
|
254
|
+
in the altLoc column. Returns False if no altLoc is found.
|
|
255
|
+
"""
|
|
256
|
+
try:
|
|
257
|
+
with open(pdb_path, "r", encoding="utf-8", errors="ignore") as fh:
|
|
258
|
+
for line in fh:
|
|
259
|
+
if line.startswith(COORD_RECORDS):
|
|
260
|
+
# altLoc is at column 17 (1-based), which is index 16 (0-based)
|
|
261
|
+
if len(line) > ALTLOC_IDX:
|
|
262
|
+
altloc_char = line[ALTLOC_IDX]
|
|
263
|
+
if altloc_char != " " and altloc_char != "":
|
|
264
|
+
return True
|
|
265
|
+
return False
|
|
266
|
+
except Exception:
|
|
267
|
+
return False
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def fix_altloc_file(
|
|
271
|
+
in_path: str | Path,
|
|
272
|
+
out_path: str | Path,
|
|
273
|
+
*,
|
|
274
|
+
overwrite: bool = False,
|
|
275
|
+
skip_if_no_altloc: bool = True,
|
|
276
|
+
) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Fix alternate locations in a PDB file.
|
|
279
|
+
|
|
280
|
+
Parameters
|
|
281
|
+
----------
|
|
282
|
+
in_path : str | Path
|
|
283
|
+
Input PDB file path.
|
|
284
|
+
out_path : str | Path
|
|
285
|
+
Output PDB file path.
|
|
286
|
+
overwrite : bool
|
|
287
|
+
If True, overwrite existing output file. Default False.
|
|
288
|
+
skip_if_no_altloc : bool
|
|
289
|
+
If True, skip processing if no altLoc is detected. Default True.
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
bool
|
|
294
|
+
True if the file was processed (altloc found and fixed),
|
|
295
|
+
False if skipped (no altloc detected).
|
|
296
|
+
|
|
297
|
+
Raises
|
|
298
|
+
------
|
|
299
|
+
FileExistsError
|
|
300
|
+
If output file exists and overwrite=False.
|
|
301
|
+
FileNotFoundError
|
|
302
|
+
If input file does not exist.
|
|
303
|
+
"""
|
|
304
|
+
in_path = Path(in_path)
|
|
305
|
+
out_path = Path(out_path)
|
|
306
|
+
|
|
307
|
+
if not in_path.exists():
|
|
308
|
+
raise FileNotFoundError(f"Input file not found: {in_path}")
|
|
309
|
+
|
|
310
|
+
if out_path.exists() and not overwrite:
|
|
311
|
+
raise FileExistsError(f"Output file exists: {out_path}")
|
|
312
|
+
|
|
313
|
+
if skip_if_no_altloc and not has_altloc(in_path):
|
|
314
|
+
return False
|
|
315
|
+
|
|
316
|
+
clean_pdb_file(in_path, out_path)
|
|
317
|
+
return True
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
# =============================================================================
|
|
321
|
+
# CLI
|
|
322
|
+
# =============================================================================
|
|
323
|
+
|
|
324
|
+
def _run_fix_altloc(
|
|
325
|
+
input_path: Path,
|
|
326
|
+
out: Optional[Path],
|
|
327
|
+
recursive: bool,
|
|
328
|
+
inplace: bool,
|
|
329
|
+
overwrite: bool,
|
|
330
|
+
force: bool,
|
|
331
|
+
) -> None:
|
|
332
|
+
"""Core business logic for fix-altloc (called from Click CLI)."""
|
|
333
|
+
pdb_files = collect_pdb_files(input_path, recursive)
|
|
334
|
+
if not pdb_files:
|
|
335
|
+
raise click.ClickException(f"No .pdb files found in: {input_path}")
|
|
336
|
+
|
|
337
|
+
skip_if_no_altloc = not force
|
|
338
|
+
processed_count = 0
|
|
339
|
+
skipped_count = 0
|
|
340
|
+
|
|
341
|
+
# In-place mode
|
|
342
|
+
if inplace:
|
|
343
|
+
for in_path in pdb_files:
|
|
344
|
+
if skip_if_no_altloc and not has_altloc(in_path):
|
|
345
|
+
skipped_count += 1
|
|
346
|
+
continue
|
|
347
|
+
|
|
348
|
+
bak_path = in_path.with_suffix(in_path.suffix + ".bak")
|
|
349
|
+
if not bak_path.exists():
|
|
350
|
+
shutil.copy2(in_path, bak_path)
|
|
351
|
+
|
|
352
|
+
tmp_path = in_path.with_suffix(in_path.suffix + ".tmp")
|
|
353
|
+
clean_pdb_file(in_path, tmp_path)
|
|
354
|
+
tmp_path.replace(in_path)
|
|
355
|
+
processed_count += 1
|
|
356
|
+
|
|
357
|
+
if processed_count > 0:
|
|
358
|
+
click.echo(f"[fix-altloc] Processed {processed_count} file(s) in-place.")
|
|
359
|
+
if skipped_count > 0:
|
|
360
|
+
click.echo(f"[fix-altloc] Skipped {skipped_count} file(s) (no altLoc detected).")
|
|
361
|
+
return
|
|
362
|
+
|
|
363
|
+
# File input
|
|
364
|
+
if input_path.is_file():
|
|
365
|
+
in_path = input_path
|
|
366
|
+
|
|
367
|
+
if skip_if_no_altloc and not has_altloc(in_path):
|
|
368
|
+
click.echo(f"[fix-altloc] Skipped {in_path} (no altLoc detected).")
|
|
369
|
+
return
|
|
370
|
+
|
|
371
|
+
if out is None:
|
|
372
|
+
out_path = in_path.with_name(in_path.stem + "_clean.pdb")
|
|
373
|
+
else:
|
|
374
|
+
if out.suffix.lower() == ".pdb":
|
|
375
|
+
out_path = out
|
|
376
|
+
else:
|
|
377
|
+
out.mkdir(parents=True, exist_ok=True)
|
|
378
|
+
out_path = out / in_path.name
|
|
379
|
+
|
|
380
|
+
if out_path.exists() and not overwrite:
|
|
381
|
+
raise click.ClickException(f"Output exists: {out_path} (use --overwrite to overwrite)")
|
|
382
|
+
|
|
383
|
+
clean_pdb_file(in_path, out_path)
|
|
384
|
+
click.echo(f"[fix-altloc] Fixed altLoc → {out_path}")
|
|
385
|
+
return
|
|
386
|
+
|
|
387
|
+
# Directory input
|
|
388
|
+
in_dir = input_path
|
|
389
|
+
out_dir = out if out is not None else in_dir.with_name(in_dir.name + "_clean")
|
|
390
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
391
|
+
|
|
392
|
+
for in_path in pdb_files:
|
|
393
|
+
if skip_if_no_altloc and not has_altloc(in_path):
|
|
394
|
+
skipped_count += 1
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
rel = in_path.relative_to(in_dir)
|
|
398
|
+
out_path = out_dir / rel
|
|
399
|
+
|
|
400
|
+
if out_path.exists() and not overwrite:
|
|
401
|
+
raise click.ClickException(f"Output exists: {out_path} (use --overwrite to overwrite)")
|
|
402
|
+
|
|
403
|
+
clean_pdb_file(in_path, out_path)
|
|
404
|
+
processed_count += 1
|
|
405
|
+
|
|
406
|
+
if processed_count > 0:
|
|
407
|
+
click.echo(f"[fix-altloc] Processed {processed_count} file(s) → {out_dir}")
|
|
408
|
+
if skipped_count > 0:
|
|
409
|
+
click.echo(f"[fix-altloc] Skipped {skipped_count} file(s) (no altLoc detected).")
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
@click.command(
|
|
413
|
+
name="fix-altloc",
|
|
414
|
+
help=(
|
|
415
|
+
"Blank PDB altLoc column (col 17) without shifting, and keep one altLoc "
|
|
416
|
+
"per atom by default rule: highest occupancy, then earliest appearance."
|
|
417
|
+
),
|
|
418
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
419
|
+
)
|
|
420
|
+
@click.option(
|
|
421
|
+
"-i", "--input", "input_path",
|
|
422
|
+
type=click.Path(exists=True, path_type=Path),
|
|
423
|
+
required=True,
|
|
424
|
+
help="Input PDB file or directory.",
|
|
425
|
+
)
|
|
426
|
+
@click.option(
|
|
427
|
+
"-o", "--out",
|
|
428
|
+
type=click.Path(path_type=Path),
|
|
429
|
+
default=None,
|
|
430
|
+
help="Output file (if input is a file) or output directory (if input is a directory).",
|
|
431
|
+
)
|
|
432
|
+
@click.option(
|
|
433
|
+
"--recursive/--no-recursive",
|
|
434
|
+
default=False, show_default=True,
|
|
435
|
+
help="When input is a directory, process *.pdb recursively (including subdirectories).",
|
|
436
|
+
)
|
|
437
|
+
@click.option(
|
|
438
|
+
"--inplace/--no-inplace",
|
|
439
|
+
default=False, show_default=True,
|
|
440
|
+
help="Overwrite input file(s) in place (creates .bak next to each file).",
|
|
441
|
+
)
|
|
442
|
+
@click.option(
|
|
443
|
+
"--overwrite/--no-overwrite",
|
|
444
|
+
default=False, show_default=True,
|
|
445
|
+
help="Allow overwriting existing output files.",
|
|
446
|
+
)
|
|
447
|
+
@click.option(
|
|
448
|
+
"--force/--no-force",
|
|
449
|
+
default=False, show_default=True,
|
|
450
|
+
help="Process files even if no altLoc is detected (default: skip files without altLoc).",
|
|
451
|
+
)
|
|
452
|
+
def cli(
|
|
453
|
+
input_path: Path,
|
|
454
|
+
out: Optional[Path],
|
|
455
|
+
recursive: bool,
|
|
456
|
+
inplace: bool,
|
|
457
|
+
overwrite: bool,
|
|
458
|
+
force: bool,
|
|
459
|
+
) -> None:
|
|
460
|
+
_run_fix_altloc(input_path, out, recursive, inplace, overwrite, force)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
if __name__ == "__main__":
|
|
464
|
+
cli()
|