molforge 0.0.3__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {molforge-0.0.3 → molforge-0.2.0}/.gitignore +5 -0
- {molforge-0.0.3 → molforge-0.2.0}/CHANGELOG.md +576 -1
- {molforge-0.0.3 → molforge-0.2.0}/PKG-INFO +17 -5
- {molforge-0.0.3 → molforge-0.2.0}/README.md +12 -3
- {molforge-0.0.3 → molforge-0.2.0}/notebooks/README.md +2 -1
- {molforge-0.0.3 → molforge-0.2.0}/pyproject.toml +25 -1
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/__init__.py +1 -2
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/__init__.py +4 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/atom.py +4 -2
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/atom_array.py +3 -3
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/chain.py +4 -1
- molforge-0.2.0/src/molforge/core/metadata_keys.py +267 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/protein.py +8 -1
- molforge-0.2.0/src/molforge/ensembles/__init__.py +90 -0
- molforge-0.2.0/src/molforge/ensembles/clustering.py +261 -0
- molforge-0.2.0/src/molforge/ensembles/consensus.py +155 -0
- molforge-0.2.0/src/molforge/ensembles/density.py +207 -0
- molforge-0.2.0/src/molforge/ensembles/geometry.py +175 -0
- molforge-0.2.0/src/molforge/ensembles/weighting.py +194 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/dispatch.py +71 -22
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/mmcif.py +14 -13
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/pdb.py +11 -10
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/pdb_alphafold.py +39 -18
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/md/__init__.py +27 -4
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/metrics/dockq.py +6 -3
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/metrics/gdt.py +4 -3
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/metrics/lddt.py +2 -1
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/metrics/tm.py +1 -1
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/ml/embeddings.py +14 -5
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/ml/graph.py +2 -2
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/ml/structure_features.py +10 -7
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/plugins/__init__.py +2 -0
- molforge-0.2.0/src/molforge/plugins/registry.py +99 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/sequence/alignment.py +12 -5
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/contacts.py +6 -4
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/dihedrals.py +4 -1
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/dssp.py +1 -1
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/geometry.py +3 -3
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/rmsd.py +4 -2
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/sasa.py +2 -1
- molforge-0.2.0/src/molforge/validation/__init__.py +84 -0
- molforge-0.2.0/src/molforge/validation/criteria.py +250 -0
- molforge-0.2.0/src/molforge/validation/orchestration.py +318 -0
- molforge-0.2.0/src/molforge/validation/verdict.py +100 -0
- molforge-0.2.0/src/molforge/wrappers/docking/diffdock.py +54 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/docking/prep.py +10 -10
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/docking/vina.py +4 -5
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/folding/__init__.py +11 -7
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/folding/alphafold.py +14 -9
- molforge-0.2.0/src/molforge/wrappers/folding/boltz.py +378 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/folding/esmfold.py +11 -10
- molforge-0.2.0/src/molforge/wrappers/folding/rosetta.py +55 -0
- molforge-0.2.0/src/molforge/wrappers/folding/rosettafold.py +401 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/generative/proteinmpnn.py +2 -2
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/generative/rfdiffusion.py +1 -1
- molforge-0.2.0/src/molforge/wrappers/md/gromacs.py +78 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/md/openmm.py +13 -5
- molforge-0.2.0/tests/benchmarks/conftest.py +134 -0
- molforge-0.2.0/tests/benchmarks/test_perf.py +97 -0
- molforge-0.2.0/tests/fixtures/pdb/real_small_protein.pdb +202 -0
- molforge-0.2.0/tests/fixtures/pdb/real_with_altloc_sidechains.pdb +101 -0
- molforge-0.2.0/tests/fixtures/pdb/real_with_ligand_realistic.pdb +80 -0
- molforge-0.2.0/tests/integration/test_real_fixtures.py +456 -0
- molforge-0.2.0/tests/unit/core/test_metadata_keys.py +225 -0
- molforge-0.2.0/tests/unit/ensembles/conftest.py +138 -0
- molforge-0.2.0/tests/unit/ensembles/test_clustering.py +162 -0
- molforge-0.2.0/tests/unit/ensembles/test_consensus.py +166 -0
- molforge-0.2.0/tests/unit/ensembles/test_density.py +180 -0
- molforge-0.2.0/tests/unit/ensembles/test_geometry.py +122 -0
- molforge-0.2.0/tests/unit/ensembles/test_smoke.py +58 -0
- molforge-0.2.0/tests/unit/ensembles/test_weighting.py +224 -0
- molforge-0.2.0/tests/unit/io/test_dispatch.py +196 -0
- molforge-0.2.0/tests/unit/plugins/test_registry.py +157 -0
- molforge-0.2.0/tests/unit/structure/__init__.py +0 -0
- molforge-0.2.0/tests/unit/test_typing.py +62 -0
- molforge-0.2.0/tests/unit/validation/test_criteria.py +197 -0
- molforge-0.2.0/tests/unit/validation/test_orchestration.py +373 -0
- molforge-0.2.0/tests/unit/validation/test_verdict.py +101 -0
- molforge-0.2.0/tests/unit/wrappers/__init__.py +0 -0
- molforge-0.2.0/tests/unit/wrappers/test_boltz.py +533 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_docking_base.py +30 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_md_base.py +45 -0
- molforge-0.2.0/tests/unit/wrappers/test_proteinmpnn.py +361 -0
- molforge-0.2.0/tests/unit/wrappers/test_rosettafold.py +518 -0
- molforge-0.0.3/src/molforge/plugins/registry.py +0 -48
- molforge-0.0.3/src/molforge/wrappers/docking/diffdock.py +0 -17
- molforge-0.0.3/src/molforge/wrappers/folding/boltz.py +0 -20
- molforge-0.0.3/src/molforge/wrappers/folding/rosetta.py +0 -19
- molforge-0.0.3/src/molforge/wrappers/md/gromacs.py +0 -18
- molforge-0.0.3/tests/unit/core/test_core_types.py +0 -33
- molforge-0.0.3/tests/unit/io/test_dispatch.py +0 -74
- molforge-0.0.3/tests/unit/plugins/test_registry.py +0 -14
- molforge-0.0.3/tests/unit/wrappers/test_proteinmpnn.py +0 -157
- {molforge-0.0.3 → molforge-0.2.0}/LICENSE +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/data/README.md +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/plugins/README.md +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/plugins/example_plugin/README.md +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/plugins/example_plugin/pyproject.toml +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/requirements/README.md +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/scripts/README.md +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/constants.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/core/residue.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/docking/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/generative.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/fasta.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/mol2.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/pdbqt.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/pqr.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/io/sdf.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/metrics/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/ml/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/ml/sequence_features.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/py.typed +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/sequence/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/sequence/composition.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/sequence/matrices.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/sequence/mutations.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/structure/superposition.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/docking/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/docking/_base.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/folding/_base.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/generative/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/md/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/src/molforge/wrappers/md/_base.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/__init__.py +0 -0
- {molforge-0.0.3/tests/integration → molforge-0.2.0/tests/benchmarks}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/conftest.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/cif/dipeptide.cif +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/fasta/.gitkeep +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/fasta/multiline_with_digits.fasta +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/fasta/simple.fasta +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/.gitkeep +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/alphafold_mock.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/dipeptide.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/helix.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_beta_sheet.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_complex_bad.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_complex_good.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_complex_native.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_ensemble.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_mixed.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/mini_with_ligand.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/multi_model.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/tripeptide.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/with_altloc.pdb +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/fixtures/pdb/with_insertion_code.pdb +0 -0
- {molforge-0.0.3/tests/unit → molforge-0.2.0/tests/integration}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/integration/test_fixtures.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/integration/test_smoke.py +0 -0
- {molforge-0.0.3/tests/unit/core → molforge-0.2.0/tests/unit}/__init__.py +0 -0
- {molforge-0.0.3/tests/unit/docking → molforge-0.2.0/tests/unit/core}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/core/test_atom_array.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/core/test_constants.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/core/test_core_smoke.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/core/test_hierarchy.py +0 -0
- {molforge-0.0.3/tests/unit/io → molforge-0.2.0/tests/unit/docking}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/docking/test_docking_smoke.py +0 -0
- {molforge-0.0.3/tests/unit/md → molforge-0.2.0/tests/unit/ensembles}/__init__.py +0 -0
- {molforge-0.0.3/tests/unit/metrics → molforge-0.2.0/tests/unit/io}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/io/test_alphafold.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/io/test_fasta.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/io/test_io_smoke.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/io/test_mmcif.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/io/test_pdb.py +0 -0
- {molforge-0.0.3/tests/unit/ml → molforge-0.2.0/tests/unit/md}/__init__.py +0 -0
- {molforge-0.0.3/tests/unit/plugins → molforge-0.2.0/tests/unit/metrics}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/metrics/test_dockq.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/metrics/test_gdt.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/metrics/test_lddt.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/metrics/test_tm.py +0 -0
- {molforge-0.0.3/tests/unit/sequence → molforge-0.2.0/tests/unit/ml}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/ml/test_embeddings.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/ml/test_graph.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/ml/test_sequence_features.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/ml/test_structure_features.py +0 -0
- {molforge-0.0.3/tests/unit/structure → molforge-0.2.0/tests/unit/plugins}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/plugins/test_plugins_smoke.py +0 -0
- {molforge-0.0.3/tests/unit/wrappers → molforge-0.2.0/tests/unit/sequence}/__init__.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/sequence/test_alignment.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/sequence/test_composition.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/sequence/test_matrices.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/sequence/test_mutations.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_contacts.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_dihedrals.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_dssp.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_geometry.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_rmsd.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_sasa.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_structure_smoke.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/structure/test_superposition.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_alphafold.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_esmfold.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_folding_base.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_openmm.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_prep.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_rfdiffusion.py +0 -0
- {molforge-0.0.3 → molforge-0.2.0}/tests/unit/wrappers/test_vina.py +0 -0
|
@@ -63,6 +63,11 @@ Thumbs.db
|
|
|
63
63
|
docs/_build/
|
|
64
64
|
docs/site/
|
|
65
65
|
site/
|
|
66
|
+
# Notebooks copied into docs/ at build time by docs/_hooks/copy_notebooks.py.
|
|
67
|
+
# The canonical copies live in notebooks/; these staged copies are
|
|
68
|
+
# regenerated on every build and must never be committed.
|
|
69
|
+
docs/walkthroughs/*.ipynb
|
|
70
|
+
docs/examples/*.ipynb
|
|
66
71
|
|
|
67
72
|
# Data files (keep small fixtures only; ignore bulk data)
|
|
68
73
|
data/*
|
|
@@ -5,8 +5,583 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **ProteinMPNN wrapper test coverage raised from 69% to 96%.** The
|
|
12
|
+
two previously-untested seams of `wrappers.generative.proteinmpnn`
|
|
13
|
+
now have direct tests: `_parse_outputs` (FASTA-file discovery —
|
|
14
|
+
single file, multi-PDB stem matching, the `.fasta` extension, and
|
|
15
|
+
the no-output error path) and `_run_cli` (the subprocess-driving
|
|
16
|
+
seam, exercised with a mocked `subprocess.run` so neither
|
|
17
|
+
ProteinMPNN nor torch need be installed — covering command
|
|
18
|
+
assembly, `chains_to_design` / `fixed_positions` / `ca_only` /
|
|
19
|
+
`use_soluble_model` flag pass-through, the public `generate()`
|
|
20
|
+
entry point, and `CalledProcessError` → `RuntimeError` translation).
|
|
21
|
+
Edge-case tests for the `_parse_metadata` header parser (numeric vs.
|
|
22
|
+
string values, tokens without `=`) were also added. 12 new tests
|
|
23
|
+
(17 → 29 in the file).
|
|
24
|
+
- **Performance benchmark suite (`tests/benchmarks/`).** Baseline
|
|
25
|
+
timings for the five structural-analysis functions most likely to
|
|
26
|
+
sit in a pipeline inner loop: RMSD (with and without Kabsch
|
|
27
|
+
superposition), DSSP, lDDT, distance/contact maps, and global /
|
|
28
|
+
local sequence alignment. 8 benchmarks total, run against a
|
|
29
|
+
synthetic 200-residue protein generated parametrically (an
|
|
30
|
+
idealized alpha-helix with valid per-residue backbone geometry —
|
|
31
|
+
reproducible, no large fixture files). Built on `pytest-benchmark`
|
|
32
|
+
(added to the `[dev]` extra). The benchmarks are marked
|
|
33
|
+
`benchmark` *and* `slow`, so a normal `pytest` run and the CI
|
|
34
|
+
`test` job (`-m "not slow"`) skip them; run them explicitly with
|
|
35
|
+
`pytest -m benchmark`. A new non-blocking CI `benchmark` job
|
|
36
|
+
exercises them on every push so a broken benchmark is caught,
|
|
37
|
+
while timing variance on shared runners never gates the build —
|
|
38
|
+
for real regression tracking, save a local baseline with
|
|
39
|
+
`pytest -m benchmark --benchmark-save=baseline` and compare. The
|
|
40
|
+
suite skips cleanly (rather than erroring) when `pytest-benchmark`
|
|
41
|
+
isn't installed.
|
|
42
|
+
- **`io.fetch` is now implemented.** `molforge.io.fetch` — exported
|
|
43
|
+
but previously a `NotImplementedError` stub — now downloads
|
|
44
|
+
structures from the RCSB Protein Data Bank
|
|
45
|
+
(`source="rcsb"`, the default) or the AlphaFold Protein Structure
|
|
46
|
+
Database (`source="alphafold"`), in PDB or mmCIF format. It uses
|
|
47
|
+
only the standard library (`urllib`), so it adds no dependency.
|
|
48
|
+
Network and HTTP-404 failures surface as a clear `OSError` with
|
|
49
|
+
the failing URL; bad arguments raise `ValueError`. 7 new tests
|
|
50
|
+
(argument validation + mocked-network success and failure paths).
|
|
51
|
+
Surfaced by the API audit.
|
|
52
|
+
- **`docs/architecture/api-stability.md` — API stability reference.**
|
|
53
|
+
New documentation page recording the pre-1.0 API audit: which
|
|
54
|
+
parts of the public surface are committed (semver-protected) vs.
|
|
55
|
+
tentative (may still change), the audit-driven changes, and the
|
|
56
|
+
contract for engine-private fields. Added to the docs nav under
|
|
57
|
+
Architecture.
|
|
58
|
+
- **`molforge.core.metadata_keys` — documented vocabulary for
|
|
59
|
+
`Protein.metadata`.** `Protein.metadata` remains a free-form
|
|
60
|
+
`dict[str, Any]` (no breaking change), but the keys molforge's own
|
|
61
|
+
parsers and engine wrappers produce are now a documented, stable
|
|
62
|
+
contract. The new module provides string constants for every such
|
|
63
|
+
key (`ENGINE`, `MEAN_CONFIDENCE`, `PDB_ID`, `PAE_INTER`, ...), a
|
|
64
|
+
`ProteinMetadata` TypedDict (`total=False`) re-exported from
|
|
65
|
+
`molforge.core` for editor/mypy support, and a `DOCUMENTED_KEYS`
|
|
66
|
+
frozenset. Keys cover three groups: structural-IO header keys (PDB
|
|
67
|
+
/ mmCIF parsers), uniform folding-engine keys (set by every folding
|
|
68
|
+
wrapper), and engine-specific folding keys. The PDB/mmCIF parsers
|
|
69
|
+
and all four folding wrappers now write metadata via these
|
|
70
|
+
constants, so a key typo is an import-time `NameError` rather than
|
|
71
|
+
a silently-missing key. Keys outside the documented vocabulary are
|
|
72
|
+
still permitted but carry no cross-version stability guarantee. 15
|
|
73
|
+
new tests, including consistency checks that the parsers only emit
|
|
74
|
+
documented keys and that the TypedDict matches `DOCUMENTED_KEYS`.
|
|
75
|
+
|
|
76
|
+
### Fixed
|
|
77
|
+
- **`load_alphafold` now emits the uniform confidence metadata keys.**
|
|
78
|
+
`molforge.io.load_alphafold` previously wrote only AlphaFold-specific
|
|
79
|
+
keys (`plddt`, `plddt_per_residue`, `mean_plddt`, `source`), while
|
|
80
|
+
the AlphaFold *wrapper* wrote the cross-engine-uniform keys
|
|
81
|
+
(`confidence_per_atom`, `confidence_per_residue`, `mean_confidence`,
|
|
82
|
+
`engine`). Downstream code reading confidence uniformly across
|
|
83
|
+
engines silently missed AlphaFold structures loaded from disk.
|
|
84
|
+
`load_alphafold` now populates both sets (uniform keys preferred,
|
|
85
|
+
legacy keys retained for backward compatibility); the two carry
|
|
86
|
+
identical values. Surfaced by the API audit.
|
|
87
|
+
- **`GROMACS` and `DiffDock` are now coherent stubs.** Both are
|
|
88
|
+
exported (committed import paths) but unimplemented. Previously
|
|
89
|
+
they were *incoherent*: `GROMACS` didn't implement its `MDEngine`
|
|
90
|
+
abstract methods at all, so `GROMACS()` failed with a cryptic
|
|
91
|
+
"Can't instantiate abstract class" `TypeError` rather than a
|
|
92
|
+
meaningful message; both engines' methods raised a bare
|
|
93
|
+
`NotImplementedError` with no text. They are now coherent stubs —
|
|
94
|
+
instantiable, satisfying their respective engine ABCs
|
|
95
|
+
(`MDEngine` / `DockingEngine`), with every method raising
|
|
96
|
+
`NotImplementedError` carrying a clear message that points at the
|
|
97
|
+
working alternative (`OpenMM` / `Vina`) and the tracking issue.
|
|
98
|
+
10 new tests. Surfaced by the API audit.
|
|
99
|
+
- - **Lint drift from a Ruff version bump cleared; CI lint job green
|
|
100
|
+
again.** `.pre-commit-config.yaml` pinned `ruff-pre-commit` at
|
|
101
|
+
`v0.5.0`, but the `[dev]` extra installs `ruff>=0.5` unpinned, so
|
|
102
|
+
CI resolved a much newer Ruff (0.15.x) whose added rules flagged
|
|
103
|
+
33 pre-existing issues — meaning the CI `lint` job was effectively
|
|
104
|
+
red. All 33 are now resolved: a genuine dead variable in
|
|
105
|
+
`ensembles.clustering` removed, an unused `shutil` import dropped,
|
|
106
|
+
five `pytest.raises(match=...)` patterns with unescaped regex
|
|
107
|
+
metacharacters made explicit (raw strings / escaped dots), a
|
|
108
|
+
`zip()` given an explicit `strict=`, four nested `with` statements
|
|
109
|
+
collapsed, a `getattr()` call with a string literal in
|
|
110
|
+
`ensembles.weighting` replaced by a `cast`-backed direct attribute
|
|
111
|
+
access (dropping a now-misplaced `# noqa`), and a Ruff-version
|
|
112
|
+
formatting refresh applied across 24 files (cosmetic line-joining
|
|
113
|
+
only). Two intentional-notation cases
|
|
114
|
+
are configured rather than rewritten: `allowed-confusables`
|
|
115
|
+
permits `×`, `σ`, and `–` in docstrings (matrix dimensions, the
|
|
116
|
+
standard deviation, prose dashes), and `RUF022` is per-file-ignored
|
|
117
|
+
for the two modules whose `__all__` is deliberately grouped by
|
|
118
|
+
category with section comments. The `ruff` and `mypy` pre-commit
|
|
119
|
+
pins are bumped to the versions CI resolves, so the two stay in
|
|
120
|
+
lock-step and this drift cannot silently recur. No source-behaviour
|
|
121
|
+
or test-count change (918 pass + 11 skipped, unchanged).
|
|
122
|
+
|
|
123
|
+
### Changed
|
|
124
|
+
- **BREAKING: `cross_validate` now defaults to `on_error="raise"`.**
|
|
125
|
+
Previously `cross_validate` defaulted to `on_error="record"` —
|
|
126
|
+
exceptions raised by individual validators were silently caught,
|
|
127
|
+
recorded in verdict metadata, and the verdict marked
|
|
128
|
+
`passed=False`. The problem: a validator that throws on every
|
|
129
|
+
design (a misconfigured engine, a missing dependency, a bad
|
|
130
|
+
input) produced a full list of `passed=False` verdicts that
|
|
131
|
+
*looked* like a real result, hiding the bug. The new default
|
|
132
|
+
fails loud. Code that genuinely wants a batch to survive
|
|
133
|
+
individual validator failures must now pass `on_error="record"`
|
|
134
|
+
explicitly. Flagged and resolved by the API audit.
|
|
135
|
+
- **The entire `molforge` package is now `mypy --strict` clean.**
|
|
136
|
+
With `wrappers` and `plugins` brought up to strict, all 77 source
|
|
137
|
+
modules across every subpackage pass `mypy --strict` with zero
|
|
138
|
+
errors. The CI `typecheck` job is correspondingly simplified: the
|
|
139
|
+
previous two-step arrangement (a strict gate on the clean
|
|
140
|
+
subpackages plus a non-blocking informational full-tree run)
|
|
141
|
+
collapses to a single `mypy src` gate that fails the build on any
|
|
142
|
+
type error. The `tests/unit/test_typing.py` regression test is
|
|
143
|
+
likewise simplified to one whole-package check. 31 errors fixed in
|
|
144
|
+
this final tranche: 20 stale `# type: ignore` comments (made
|
|
145
|
+
redundant when the optional heavy dependencies were added to the
|
|
146
|
+
mypy `ignore_missing_imports` override), four deliberate engine-
|
|
147
|
+
method `# type: ignore[override]` annotations (the concrete
|
|
148
|
+
engine wrappers refine the permissive `**kwargs` signatures of
|
|
149
|
+
their `DockingEngine` / `MDEngine` / `GenerativeEngine` abstract
|
|
150
|
+
bases — an intentional, documented refinement that mypy's strict
|
|
151
|
+
Liskov check cannot model), `cast`s for the opaque
|
|
152
|
+
`Simulation.engine_handle` inside the OpenMM wrapper and for the
|
|
153
|
+
unstubbed-dependency return values, and `Vina.dock`'s receptor
|
|
154
|
+
narrowing switched from `hasattr` to `isinstance` (a more correct
|
|
155
|
+
check that mypy can also narrow on).
|
|
156
|
+
- **`molforge.ml` is now `mypy --strict` clean.** The ML subpackage
|
|
157
|
+
(sequence/structure featurization, protein-language-model
|
|
158
|
+
embeddings) joins the strict gate — eight strict-clean
|
|
159
|
+
subpackages in total, 51 source files. Six errors fixed: the four
|
|
160
|
+
numpy-widening `no-any-return`s in `embeddings.py` (resolved with
|
|
161
|
+
`cast`s), and two real type bugs in `structure_features.py` —
|
|
162
|
+
`pair_distances` and `pair_distance_features` declared
|
|
163
|
+
`atom_choice: str` but pass it to `distance_map`, which requires
|
|
164
|
+
the `Literal["ca","cb","heavy","all"]` the docstrings already
|
|
165
|
+
specify, and a coordinate feature array silently upcast to
|
|
166
|
+
float64 by a division. The `torch` and `transformers` (and
|
|
167
|
+
`colabfold`, `meeko`, `vina`) optional heavy dependencies, which
|
|
168
|
+
ship no type stubs, are added to the mypy `ignore_missing_imports`
|
|
169
|
+
override alongside the existing `Bio` / `biotite` / `mdtraj` /
|
|
170
|
+
`openmm` / `rdkit` entries. CI strict gate and the
|
|
171
|
+
`tests/unit/test_typing.py` regression test updated; only
|
|
172
|
+
`plugins` and `wrappers` remain outside the gate.
|
|
173
|
+
- **Six more subpackages are now `mypy --strict` clean.**
|
|
174
|
+
`molforge.io`, `molforge.sequence`, `molforge.structure`,
|
|
175
|
+
`molforge.metrics`, `molforge.ensembles`, and
|
|
176
|
+
`molforge.validation` now pass `mypy --strict` with zero errors,
|
|
177
|
+
joining `molforge.core` — seven strict-clean subpackages in total,
|
|
178
|
+
46 source files. The 12 errors fixed were mostly numpy operations
|
|
179
|
+
mypy widens to `Any` (resolved with explicit `cast`s that document
|
|
180
|
+
the known array dtype) and two stale `type: ignore` comments; two
|
|
181
|
+
were genuine annotation bugs — `_place_hydrogens` in `dssp.py` was
|
|
182
|
+
declared to return a single array but actually returns a
|
|
183
|
+
`(coords, mask)` tuple, and `_score` in `alignment.py` was
|
|
184
|
+
declared `NDArray[np.int_]` but builds an `int32` array (`np.int_`
|
|
185
|
+
is `int64` on 64-bit platforms). The CI strict gate now covers all
|
|
186
|
+
seven subpackages; the regression test
|
|
187
|
+
(`tests/unit/test_typing.py`, moved up from `tests/unit/core/` and
|
|
188
|
+
parametrized) checks each one in-suite. The remaining subpackages
|
|
189
|
+
(`ml`, `plugins`, `wrappers`) are still tracked by the
|
|
190
|
+
non-blocking informational `mypy src` CI step.
|
|
191
|
+
- **`molforge.core` is now `mypy --strict` clean, and CI enforces
|
|
192
|
+
it.** The `core` subpackage — the data model the rest of the
|
|
193
|
+
library is built on — now passes `mypy --strict` with zero
|
|
194
|
+
errors (fixed: two missing `NDArray` type arguments in
|
|
195
|
+
`AtomArray`, an `Any`-return in `Atom.coord`, and an untyped
|
|
196
|
+
`Chain.__iter__` that was suppressed with a `type: ignore`). The
|
|
197
|
+
CI `typecheck` job now runs `mypy --strict src/molforge/core/`
|
|
198
|
+
as a hard gate, with a separate non-blocking full-tree `mypy src`
|
|
199
|
+
step that keeps the remaining (out-of-`core`) type errors visible
|
|
200
|
+
while they're worked through. A new `slow`-marked regression test
|
|
201
|
+
(`tests/unit/core/test_typing.py`) runs the strict check in-suite
|
|
202
|
+
so a `core` type regression is caught locally too.
|
|
203
|
+
|
|
204
|
+
### Documented
|
|
205
|
+
- **`Simulation.engine_handle` contract clarified.** The attribute
|
|
206
|
+
type (`object | None`) is correct — it really is an opaque,
|
|
207
|
+
engine-specific handle — but the contract was under-specified.
|
|
208
|
+
The docstring now states explicitly that `engine_handle` is
|
|
209
|
+
engine-private (callers must not inspect it or set it), is **not
|
|
210
|
+
serialized** (it typically wraps unpicklable C-extension state;
|
|
211
|
+
persistence layers must drop it and let the engine wrapper
|
|
212
|
+
rebuild it on resume), and carries **no semver guarantee**. For
|
|
213
|
+
inspectable per-simulation data, `Simulation.metadata` is the
|
|
214
|
+
supported field. No code change. Flagged by the API audit.
|
|
215
|
+
|
|
216
|
+
### Added
|
|
217
|
+
- **RoseTTAFold All-Atom folding wrapper.** New file
|
|
218
|
+
`src/molforge/wrappers/folding/rosettafold.py` implements a real
|
|
219
|
+
wrapper around the Baker lab's RoseTTAFold-All-Atom (Krishna et
|
|
220
|
+
al. 2024, *Science* 384: eadl2528). Like Boltz, RFAA is driven via
|
|
221
|
+
subprocess — invocation is `python -m rf2aa.run_inference
|
|
222
|
+
--config-name <name>` from inside the cloned repo with a Hydra
|
|
223
|
+
config the wrapper writes to a temporary directory. Constructor
|
|
224
|
+
resolves the repo via explicit `repo_dir=` or the `RFAA_HOME`
|
|
225
|
+
environment variable; checks for both directory existence and an
|
|
226
|
+
`rf2aa/` subdirectory before invocation. Supports custom Python
|
|
227
|
+
executable (for callers whose conda env is separate from
|
|
228
|
+
molforge's), the `loader_params.MAXCYCLE` override RFAA recommends
|
|
229
|
+
for hard cases, custom job naming, and arbitrary Hydra-style
|
|
230
|
+
overrides via `extra_overrides=`. Output post-processing parses
|
|
231
|
+
the PDB (per-atom pLDDT in the B-factor column) plus the
|
|
232
|
+
`*_aux.pt` PyTorch confidence file when torch is importable —
|
|
233
|
+
torch tensors converted to NumPy on the way out. Surfaces the
|
|
234
|
+
uniform folding-engine metadata keys
|
|
235
|
+
(`confidence_per_residue`, `confidence_per_atom`,
|
|
236
|
+
`mean_confidence`) plus RFAA-specific tensors (`pae`, `pde`,
|
|
237
|
+
`mean_pae`, `pae_prot`, `pae_inter` — the last is RFAA's headline
|
|
238
|
+
metric, <10 = high-quality interface). Degrades gracefully when
|
|
239
|
+
torch isn't installed or the aux file is malformed: PDB-derived
|
|
240
|
+
confidence is still populated. v1 scope is single-chain protein
|
|
241
|
+
prediction matching the rest of the folding wrappers; protein-
|
|
242
|
+
ligand and covalent-modification co-folding (RFAA's headline
|
|
243
|
+
capability) need a separate `predict_complex()` surface and
|
|
244
|
+
remain planned. 47 new tests (45 passing + 2 correctly skipped:
|
|
245
|
+
one for the torch tensor conversion when torch isn't installed,
|
|
246
|
+
one @slow end-to-end requiring `$RFAA_HOME`). Total test count:
|
|
247
|
+
830 → 875 passed + 11 skipped.
|
|
248
|
+
|
|
249
|
+
### Deprecated
|
|
250
|
+
- **`molforge.wrappers.folding.Rosetta` is now a deprecated alias
|
|
251
|
+
for `RoseTTAFold`.** The original `rosetta.py` placeholder was
|
|
252
|
+
ambiguous about whether it referred to PyRosetta (the Baker lab's
|
|
253
|
+
classical sequence-design library) or RoseTTAFold (the deep-
|
|
254
|
+
learning model). The new real wrapper lives at
|
|
255
|
+
`RoseTTAFold` for clarity. `Rosetta` is retained as a thin
|
|
256
|
+
subclass that emits `DeprecationWarning` on construction so
|
|
257
|
+
existing imports / isinstance checks keep working through the
|
|
258
|
+
next minor release. A PyRosetta wrapper, if added, would live in
|
|
259
|
+
a separate module (`pyrosetta.py`) since PyRosetta's surface is
|
|
260
|
+
much wider than the `FoldingEngine` contract.
|
|
261
|
+
|
|
262
|
+
### Added
|
|
263
|
+
- **Boltz / Boltz-2 folding wrapper.** Real implementation replacing
|
|
264
|
+
the `boltz.py` stub. Drives the `boltz predict` CLI via subprocess
|
|
265
|
+
against a temporary directory and parses the resulting mmCIF +
|
|
266
|
+
confidence JSON sidecar. Supports both `boltz1` and `boltz2`
|
|
267
|
+
(default `boltz2`), MSA server toggling (`use_msa_server=True`
|
|
268
|
+
default; pass `False` for fast single-sequence inference),
|
|
269
|
+
configurable recycling steps, diffusion samples, sampling steps,
|
|
270
|
+
CPU/GPU routing via `--accelerator`, custom executable path, and
|
|
271
|
+
custom weights cache via `BOLTZ_CACHE`. Lazy CLI detection
|
|
272
|
+
(`shutil.which("boltz")`) — construction never touches the binary;
|
|
273
|
+
the first `predict()` call resolves it or raises a
|
|
274
|
+
`FoldingEngineNotInstalledError` with install hints. Output
|
|
275
|
+
metadata follows the uniform folding-engine convention
|
|
276
|
+
(`confidence_per_residue`, `confidence_per_atom`, `mean_confidence`)
|
|
277
|
+
and additionally surfaces Boltz-specific `ptm`, `iptm`, and
|
|
278
|
+
`confidence_score` from the JSON sidecar. 47 new tests (46 passing
|
|
279
|
+
+ 1 correctly skipped @slow end-to-end), structured as a series of
|
|
280
|
+
testable seams: construction, sequence validation, YAML input
|
|
281
|
+
construction, command-line assembly, environment setup, output
|
|
282
|
+
collection, subprocess invocation (with mocked `subprocess.run`),
|
|
283
|
+
and CIF post-processing in isolation. Total test count: 784 → 830
|
|
284
|
+
passed + 9 skipped.
|
|
285
|
+
- **`molforge.ensembles` — weighted statistics over pose ensembles.**
|
|
286
|
+
New top-level subpackage with seven public functions covering the
|
|
287
|
+
four standard analyses run against docking output:
|
|
288
|
+
|
|
289
|
+
- **Weighting:** `boltzmann_weights` (numerically-stable softmax
|
|
290
|
+
over scores, with physical defaults — kT at 298 K, 0.593 kcal/mol
|
|
291
|
+
— and a `lower_is_better` flag for ML confidence scores) and
|
|
292
|
+
`resample` (weighted bootstrap of pose objects, reproducible
|
|
293
|
+
with explicit `rng`).
|
|
294
|
+
- **Geometry:** `pairwise_rmsd` (N×N heavy-atom RMSD matrix over
|
|
295
|
+
ligand poses, vectorized in NumPy) and `pose_diversity` (summary
|
|
296
|
+
statistics over the upper triangle — min/max/mean/median/std —
|
|
297
|
+
for "did the docking actually explore?" diagnostics).
|
|
298
|
+
- **Clustering:** `pose_clusters` (hierarchical average-linkage
|
|
299
|
+
clustering at a user-specified RMSD cutoff, pure NumPy with no
|
|
300
|
+
scipy dependency; returns a `PoseClusteringResult` with cluster
|
|
301
|
+
labels, ordered `PoseCluster` objects with medoid index and
|
|
302
|
+
intra-cluster mean RMSD, and the underlying RMSD matrix).
|
|
303
|
+
- **Spatial:** `binding_site_density` (3D histogram of ligand
|
|
304
|
+
heavy-atom positions, auto-sized bounding box with configurable
|
|
305
|
+
padding or explicit `origin`/`shape` for comparative grids,
|
|
306
|
+
Boltzmann-weightable; returns a `DensityGrid` with a
|
|
307
|
+
`coordinate_of(ijk)` helper).
|
|
308
|
+
- **Consensus:** `consensus_pose` (medoid pick — returns one of
|
|
309
|
+
the input poses by reference — or weighted-mean synthesis —
|
|
310
|
+
returns a new `Pose` with averaged coords and weighted-average
|
|
311
|
+
score, marked in `metadata`).
|
|
312
|
+
|
|
313
|
+
Designed as a top-level subpackage (not under `docking`) because
|
|
314
|
+
the primitives generalize to MD trajectories and other structural
|
|
315
|
+
ensembles; v1 focuses on docking poses since that's the immediately
|
|
316
|
+
useful case. 1094 source lines across 5 modules, 120 new tests
|
|
317
|
+
across 5 test files. Total test count: 664 → 784 passed + 8 skipped.
|
|
318
|
+
|
|
319
|
+
Limitations documented in the module docstring and user guide:
|
|
320
|
+
pose RMSD is order-sensitive (upper-bound for symmetric ligands),
|
|
321
|
+
receptor is treated as fixed, and clustering is O(n³) and best
|
|
322
|
+
suited for ensembles of n ≲ 200 (single-docking-run sizes; MD-scale
|
|
323
|
+
ensembles would benefit from scipy's optimized linkage in a future
|
|
324
|
+
enhancement).
|
|
325
|
+
- **Docs: ensembles user guide + API reference.** `docs/guide/ensembles.md`
|
|
326
|
+
walks through the canonical workflow (score → weights → diversity →
|
|
327
|
+
clusters → density → consensus); `docs/reference/ensembles.md`
|
|
328
|
+
renders the full API via mkdocstrings. Added to mkdocs nav.
|
|
329
|
+
- **Notebook rendering via mkdocs-jupyter.** All six walkthrough
|
|
330
|
+
notebooks (`notebooks/walkthroughs/01_sequences.ipynb` through
|
|
331
|
+
`06_plugin_authoring.ipynb`) and all three example notebooks
|
|
332
|
+
(`cross_engine_validation`, `de_novo_design`, `end_to_end_design`)
|
|
333
|
+
now render as proper docs pages alongside the rest of the site.
|
|
334
|
+
Notebooks live at their canonical `notebooks/` location (where CI
|
|
335
|
+
executes them); they're symlinked into `docs/walkthroughs/` and
|
|
336
|
+
`docs/examples/` so mkdocs-jupyter can find them inside `docs_dir`
|
|
337
|
+
without duplicating files. `execute: false` in the plugin config —
|
|
338
|
+
the docs build never re-runs notebooks; it renders the pre-baked
|
|
339
|
+
outputs that are already committed to the repo (matching the CI
|
|
340
|
+
setup that catches notebook drift separately). A new
|
|
341
|
+
`docs/examples/index.md` landing page gives a 1-line summary of
|
|
342
|
+
each example. Total site size now ~7.6 MB across 24 pages; the
|
|
343
|
+
notebook pages average ~700 KB each (mkdocs-jupyter bundles
|
|
344
|
+
notebook CSS/JS per page). Build time ~6 s.
|
|
345
|
+
- **Docs CI + GitHub Pages deployment.** `.github/workflows/docs.yml`
|
|
346
|
+
rewritten from the placeholder `echo` into a real two-job workflow:
|
|
347
|
+
`build` runs `mkdocs build --strict` on every push and PR (catches
|
|
348
|
+
broken nav links, unresolved cross-references, missing
|
|
349
|
+
mkdocstrings symbols), and `deploy` runs only on pushes to
|
|
350
|
+
`main`/`master`, using the modern `actions/deploy-pages@v4` flow
|
|
351
|
+
(no `gh-pages` orphan branch). The deploy job has `pages: write`
|
|
352
|
+
and `id-token: write` permissions, is gated behind a
|
|
353
|
+
`github-pages` environment, and uses a `pages` concurrency group
|
|
354
|
+
with `cancel-in-progress: false` so concurrent deploys queue
|
|
355
|
+
rather than thrash. CI install is just `pip install -e
|
|
356
|
+
".[docs]" ruff` — molforge's lazy-import discipline means no
|
|
357
|
+
torch / scipy / openmm / biopython is needed at doc-build time,
|
|
358
|
+
which keeps the docs job under a minute. Verified locally by
|
|
359
|
+
building strictly in a fresh venv with only `[docs]` installed.
|
|
360
|
+
- **API reference pages live, strict mkdocs build green.** Eleven
|
|
361
|
+
reference pages now render real API content via mkdocstrings, with
|
|
362
|
+
`molforge.wrappers` split into a router landing page plus four
|
|
363
|
+
per-subcategory pages (folding, docking, md, generative) — totalling
|
|
364
|
+
~1.2 MB of rendered API HTML across 15 reference pages. `mkdocs
|
|
365
|
+
build --strict` is the local + CI check, with zero warnings.
|
|
366
|
+
- **mkdocs site skeleton (`docs/`, `mkdocs.yml`).** First end-to-end
|
|
367
|
+
buildable docs site, replacing the half-finished biocore-era stub.
|
|
368
|
+
Material for MkDocs theme with light/dark toggle, indigo palette,
|
|
369
|
+
navigation tabs, edit-on-GitHub links, and snippets-driven content
|
|
370
|
+
reuse. `mkdocstrings[python]` wired up against `src/` with
|
|
371
|
+
Google-style docstring parsing, source-order member listing, and
|
|
372
|
+
underscore filtering. Site is organized into five top-level
|
|
373
|
+
sections — Getting started, User guide, Architecture, API
|
|
374
|
+
reference, Project — and `mkdocs build` runs in ~2 s producing
|
|
375
|
+
17 pages including 11 stubbed API-reference pages (one per
|
|
376
|
+
subpackage, each rendering the live `__all__`). API-reference
|
|
377
|
+
content fills out in the next commit; this commit lands the
|
|
378
|
+
structure, theme, configuration, and all hand-written guide
|
|
379
|
+
prose. Stale `docs/source/` directory removed.
|
|
380
|
+
- **Realistic PDB fixtures + 29 new integration tests.**
|
|
381
|
+
Three new fixtures handcrafted from canonical bond lengths and
|
|
382
|
+
angles (Engh & Huber 1991) to exercise real-PDB code paths that
|
|
383
|
+
synthetic fixtures structurally can't:
|
|
384
|
+
- **`real_small_protein.pdb`** (193 atoms, 24 residues): mixed
|
|
385
|
+
helix/loop/strand topology with **all 20 standard amino acids
|
|
386
|
+
plus PRO**. Every residue carries its full canonical atom set,
|
|
387
|
+
so aromatic-ring parsing (PHE/TYR/TRP/HIS), branched side
|
|
388
|
+
chains (LEU/ILE/VAL), and the PRO ring-closure case all get
|
|
389
|
+
exercised. B-factors vary realistically (edges higher than
|
|
390
|
+
core, ~16-45 Ų). The helix is left-handed due to the NeRF
|
|
391
|
+
sign convention — documented honestly in a REMARK and in the
|
|
392
|
+
relevant test — which doesn't affect DSSP H-bond detection or
|
|
393
|
+
most other geometric analyses.
|
|
394
|
+
- **`real_with_altloc_sidechains.pdb`** (97 atoms, 12 residues):
|
|
395
|
+
same backbone as the first 12 residues of `real_small_protein`,
|
|
396
|
+
but with **A/B alternative conformations spanning the full
|
|
397
|
+
side chain** at LEU 2 (CB/CG/CD1/CD2) and SER 9 (CB/OG).
|
|
398
|
+
Occupancies 0.60/0.40. Replaces the prior 8-atom
|
|
399
|
+
`with_altloc.pdb` for any test that needs multi-atom altloc
|
|
400
|
+
context (the old one is kept for parser-level smoke tests).
|
|
401
|
+
- **`real_with_ligand_realistic.pdb`** (76 atoms, 13 residues
|
|
402
|
+
across 3 chains): 8-residue helix + a **benzene ligand** (BNZ,
|
|
403
|
+
6 aromatic carbons in a proper hexagonal ring at canonical 1.4 Å
|
|
404
|
+
spacing) + **a zinc ion** (ZN, properly classified as ion not
|
|
405
|
+
ligand thanks to the corrected element column) + **3
|
|
406
|
+
crystallographic waters** (HOH, classified as water). Replaces
|
|
407
|
+
`mini_with_ligand.pdb`'s fake imidazole + waters with proper
|
|
408
|
+
multi-chain hetero-atom chemistry.
|
|
409
|
+
- 29 integration tests in `tests/integration/test_real_fixtures.py`
|
|
410
|
+
organized by code path (fixture loading, entity-type classification,
|
|
411
|
+
full side-chain atom counts, multi-atom alt-loc handling under all
|
|
412
|
+
four `altloc=` modes, write-then-read round-trip preservation,
|
|
413
|
+
structural-analysis algorithms exercised on the realistic protein,
|
|
414
|
+
ML featurization on full side chains, and sequence mutation through
|
|
415
|
+
`mutate_protein`). Each test class is named after the code path
|
|
416
|
+
it exercises rather than the fixture, so failures point at the
|
|
417
|
+
broken behavior rather than the test fixture.
|
|
418
|
+
- Test coverage now stands at **664 passing + 8 correctly skipped**
|
|
419
|
+
(+29 from the new fixtures), with the integration suite growing
|
|
420
|
+
from 19 to 48 tests.
|
|
421
|
+
- **[`notebooks/examples/cross_engine_validation.ipynb`](notebooks/examples/cross_engine_validation.ipynb)**:
|
|
422
|
+
20-cell worked example of the cross-validator consensus pattern.
|
|
423
|
+
Uses two deterministic synthetic validators (mimicking
|
|
424
|
+
ESMFold-like and AlphaFold-like output) to walk through:
|
|
425
|
+
single-validator `cross_validate`, the strict / permissive /
|
|
426
|
+
majority consensus modes, drilling into a borderline design to
|
|
427
|
+
see which validator disagreed, and ranking the survivors. End-
|
|
428
|
+
to-end executable without GPU; the validator stubs are designed
|
|
429
|
+
so the cross-architecture-disagreement pattern (one model
|
|
430
|
+
overconfident, one model rejecting) is clearly visible on a
|
|
431
|
+
single sample design.
|
|
432
|
+
- Both this new notebook and the
|
|
433
|
+
[`05_ml_featurization`](notebooks/walkthroughs/05_ml_featurization.ipynb)
|
|
434
|
+
one are now in the CI's executable allowlist (so any drift
|
|
435
|
+
between the notebook outputs and library behavior breaks CI).
|
|
436
|
+
- **CI now executes runnable notebooks.** A new `notebooks` job in
|
|
437
|
+
`.github/workflows/ci.yml` parse-validates every notebook in
|
|
438
|
+
`notebooks/` and executes the four that don't require external
|
|
439
|
+
engines (`01_sequences`, `02_structures`,
|
|
440
|
+
`05_ml_featurization`, `06_plugin_authoring`) top-to-bottom
|
|
441
|
+
against the freshly-installed library. Catches the class of bug
|
|
442
|
+
where a notebook's outputs go silently out of sync with the
|
|
443
|
+
library — if any cell raises, CI fails.
|
|
444
|
+
- **`scripts/execute_notebooks.py`**: the underlying executor.
|
|
445
|
+
Usable locally as `python scripts/execute_notebooks.py` (or
|
|
446
|
+
`--check-only` for parse-only). Maintains explicit allowlists
|
|
447
|
+
for executable vs. parse-only notebooks; updating either list is
|
|
448
|
+
the only thing required when adding a new notebook.
|
|
449
|
+
- `nbclient>=0.10` and `ipykernel>=6.29` added to the `[dev]`
|
|
450
|
+
extra so the script is runnable in any dev environment via
|
|
451
|
+
`pip install -e ".[dev]"`.
|
|
452
|
+
- **`molforge.plugins.discover()` implemented.** Was previously
|
|
453
|
+
raising `NotImplementedError`; now walks Python entry points
|
|
454
|
+
under the `molforge.plugins` group via
|
|
455
|
+
`importlib.metadata.entry_points`. Each entry point's registration
|
|
456
|
+
function is called once. Broken plugins (failed import, register
|
|
457
|
+
function raises) are tolerated and silently skipped so one bad
|
|
458
|
+
plugin can't break every downstream user of molforge. Returns the
|
|
459
|
+
list of successfully-loaded entry-point names so callers can
|
|
460
|
+
introspect what's available. Companion `clear()` exported for
|
|
461
|
+
test isolation.
|
|
462
|
+
- **[`notebooks/walkthroughs/06_plugin_authoring.ipynb`](notebooks/walkthroughs/06_plugin_authoring.ipynb)**:
|
|
463
|
+
the last walkthrough stub from the v0.0.1 skeleton is now live.
|
|
464
|
+
14-cell tour of the plugin registry: when to use it vs. direct
|
|
465
|
+
imports, how to register engines / parsers / scorers, the
|
|
466
|
+
inline-vs-entry-point distinction, and how the
|
|
467
|
+
`pyproject.toml` entry-point declaration translates into
|
|
468
|
+
auto-discovery. Includes a runnable `RandomFolder` toy engine,
|
|
469
|
+
a minimal `.xyz` parser, and a `hydrophobic_fraction` scorer
|
|
470
|
+
registered inline so the notebook executes end-to-end without
|
|
471
|
+
installing anything extra.
|
|
472
|
+
- 11 new plugin-registry tests bringing total registry coverage
|
|
473
|
+
to 12: basic register / available / get round-trip, all three
|
|
474
|
+
kinds (engine / parser / scorer), `clear()` isolation, and
|
|
475
|
+
`discover()` against a mocked `importlib.metadata.entry_points`
|
|
476
|
+
covering the multi-plugin case, the broken-plugin tolerance, and
|
|
477
|
+
the empty-entry-points fallthrough.
|
|
478
|
+
|
|
479
|
+
### Fixed
|
|
480
|
+
- **Docs notebooks no longer use symlinks.** The walkthrough and
|
|
481
|
+
example notebooks were previously symlinked from `docs/` into the
|
|
482
|
+
canonical `notebooks/` directory. Symlinks broke two things: (1)
|
|
483
|
+
extracting a release tarball on Windows failed with "a required
|
|
484
|
+
privilege is not held by the client" because creating symlinks
|
|
485
|
+
needs a privilege normal accounts lack, and (2) the GitHub Pages
|
|
486
|
+
docs build failed in strict mode because `actions/checkout`
|
|
487
|
+
didn't preserve the links, leaving nine dangling `nav` references
|
|
488
|
+
(13 strict-mode warnings, non-zero exit). Replaced with a build
|
|
489
|
+
hook (`docs/_hooks/copy_notebooks.py`, registered via the
|
|
490
|
+
`hooks:` key in `mkdocs.yml`) that copies the notebooks from
|
|
491
|
+
`notebooks/` into `docs/` during `on_config`, before mkdocs's
|
|
492
|
+
file discovery runs so mkdocs-jupyter renders them normally. The
|
|
493
|
+
copies are git-ignored; the notebooks remain single-source in
|
|
494
|
+
`notebooks/`. No symlinks anywhere in the repo, and the tarball
|
|
495
|
+
extracts cleanly on every platform.
|
|
496
|
+
- **`docs/guide/data-model.md` field names.** Two rows in the
|
|
497
|
+
`AtomArray` schema table used pre-rename names (`res_name`,
|
|
498
|
+
`res_id`); corrected to `residue_name`, `residue_id` to match
|
|
499
|
+
the actual public attributes. Discovered while writing ensemble
|
|
500
|
+
test fixtures.
|
|
501
|
+
|
|
502
|
+
### Removed
|
|
503
|
+
- **`tests/unit/core/test_core_types.py`.** A pre-existing fossil
|
|
504
|
+
from before the view-based data-model refactor: it imported from
|
|
505
|
+
`biocore.core` (the pre-rename namespace) and called constructors
|
|
506
|
+
like `Chain(chain_id="A")` and `Residue(name="ALA", seq_id=1)`
|
|
507
|
+
that no longer match the current view-based signatures (`Chain`,
|
|
508
|
+
`Residue`, and `Atom` are now views over an `AtomArray` and take
|
|
509
|
+
`(array, start, end)` not standalone keyword arguments). The
|
|
510
|
+
assertions it made were already fully covered by
|
|
511
|
+
`test_hierarchy.py` (260 lines, with dedicated `TestAtom`,
|
|
512
|
+
`TestResidue`, `TestChain`, `TestProtein`, and `TestConsistency`
|
|
513
|
+
classes) and `test_atom_array.py` (210 lines). Removing the
|
|
514
|
+
fossil unblocks the full test suite from running cleanly under
|
|
515
|
+
`pytest` (previously needed `--ignore` for that one file).
|
|
516
|
+
Headline test count unchanged: 664 + 8 skipped.
|
|
517
|
+
|
|
518
|
+
## [v0.1.0] 2026-05-20
|
|
519
|
+
|
|
520
|
+
- **`molforge.validation`: cross-validation utilities for protein
|
|
521
|
+
design.** Captures the common pattern of "score designs across
|
|
522
|
+
multiple validators and combine results" that was previously
|
|
523
|
+
hand-rolled as list comprehensions in user code.
|
|
524
|
+
- **`Criterion`**: declarative success conditions (e.g.
|
|
525
|
+
`Criterion.gt("plddt", 80)`). Atomic comparisons via the six
|
|
526
|
+
standard operators (`gt` / `ge` / `lt` / `le` / `eq` / `ne`),
|
|
527
|
+
composable with the standard logical operators (`&` for AND,
|
|
528
|
+
`|` for OR, `~` for NOT) to express arbitrarily complex success
|
|
529
|
+
rules. Tracks which metrics it references via
|
|
530
|
+
`criterion.metric_names`, useful for upstream validation.
|
|
531
|
+
- **`CriteriaSet`**: a named collection of criteria evaluated
|
|
532
|
+
together, returning per-criterion pass/fail for diagnostics
|
|
533
|
+
rather than just an opaque boolean. Implicit AND across criteria.
|
|
534
|
+
- **`Verdict`**: per-design result combining metric values,
|
|
535
|
+
per-criterion results, an overall pass/fail, and a sortable
|
|
536
|
+
score. Exposes `failed_criteria` / `passed_criteria` properties
|
|
537
|
+
for inspection.
|
|
538
|
+
- **`cross_validate(designs, validators, criteria)`**: the
|
|
539
|
+
workhorse. Runs every design through every validator, namespaces
|
|
540
|
+
metrics by validator name (`"esmfold.plddt"` not just `"plddt"`),
|
|
541
|
+
applies criteria, returns ranked verdicts. Handles validator
|
|
542
|
+
exceptions gracefully (default: record in metadata + mark
|
|
543
|
+
failed; opt-in propagation via `on_error="raise"`).
|
|
544
|
+
- **`consensus(verdict_lists, mode=...)`**: merges verdict lists
|
|
545
|
+
across validators under a chosen rule (`"all"` / `"any"` /
|
|
546
|
+
`"majority"` / explicit `"threshold"` count). Joins by
|
|
547
|
+
`design_id`; metric values from every validator are preserved
|
|
548
|
+
in the merged `Verdict.values`; per-criterion pass/fail is
|
|
549
|
+
namespaced by validator to keep diagnostics distinguishable.
|
|
550
|
+
- **`rank_verdicts(verdicts, only_passed=..., by=...)`**: ranking
|
|
551
|
+
helper. Defaults to ascending score (lower-is-better); can
|
|
552
|
+
filter to only-passed; can sort by an arbitrary metric name
|
|
553
|
+
instead of the score.
|
|
554
|
+
|
|
555
|
+
### Changed
|
|
556
|
+
- **Docstring normalization for griffe.** Twelve docstrings across
|
|
557
|
+
`metrics/{dockq,gdt,lddt}`, `ml/{graph,structure_features}`,
|
|
558
|
+
`sequence/alignment`, `structure/{contacts,dihedrals,rmsd,sasa}`,
|
|
559
|
+
and `wrappers/folding/{alphafold,esmfold}` rewritten so each
|
|
560
|
+
parameter gets its own line in the `Args:` block (instead of
|
|
561
|
+
comma-grouping like `a, b: ...`), and continuation lines under
|
|
562
|
+
bullet points are re-indented to 8 spaces. No behavior or
|
|
563
|
+
signature changes; griffe was the only consumer mis-parsing them,
|
|
564
|
+
but the fix also makes the rendered tables clearer (each
|
|
565
|
+
parameter gets its own row). 664 + 8 skipped tests, unchanged.
|
|
566
|
+
|
|
567
|
+
### Removed
|
|
568
|
+
- **`tests/unit/core/test_core_types.py`.** A pre-existing fossil
|
|
569
|
+
from before the view-based data-model refactor: it imported from
|
|
570
|
+
`biocore.core` (the pre-rename namespace) and called constructors
|
|
571
|
+
like `Chain(chain_id="A")` and `Residue(name="ALA", seq_id=1)`
|
|
572
|
+
that no longer match the current view-based signatures (`Chain`,
|
|
573
|
+
`Residue`, and `Atom` are now views over an `AtomArray` and take
|
|
574
|
+
`(array, start, end)` not standalone keyword arguments). The
|
|
575
|
+
assertions it made were already fully covered by
|
|
576
|
+
`test_hierarchy.py` (260 lines, with dedicated `TestAtom`,
|
|
577
|
+
`TestResidue`, `TestChain`, `TestProtein`, and `TestConsistency`
|
|
578
|
+
classes) and `test_atom_array.py` (210 lines). Removing the
|
|
579
|
+
fossil unblocks the full test suite from running cleanly under
|
|
580
|
+
`pytest` (previously needed `--ignore` for that one file).
|
|
581
|
+
Headline test count unchanged: 664 + 8 skipped.
|
|
582
|
+
|
|
583
|
+
## [v0.0.3] 2026-05-20
|
|
8
584
|
|
|
9
|
-
### [v0.0.3] 2026-05-20
|
|
10
585
|
- **De novo design notebook updated**: the `de_novo_design.ipynb`
|
|
11
586
|
example now includes a section demonstrating the validation
|
|
12
587
|
utilities — declarative `CriteriaSet`, `cross_validate` against a
|