molforge 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- molforge-0.0.1/.gitignore +74 -0
- molforge-0.0.1/CHANGELOG.md +240 -0
- molforge-0.0.1/LICENSE +21 -0
- molforge-0.0.1/PKG-INFO +246 -0
- molforge-0.0.1/README.md +156 -0
- molforge-0.0.1/data/README.md +7 -0
- molforge-0.0.1/notebooks/README.md +13 -0
- molforge-0.0.1/plugins/README.md +15 -0
- molforge-0.0.1/plugins/example_plugin/README.md +20 -0
- molforge-0.0.1/plugins/example_plugin/pyproject.toml +16 -0
- molforge-0.0.1/pyproject.toml +212 -0
- molforge-0.0.1/requirements/README.md +8 -0
- molforge-0.0.1/scripts/README.md +5 -0
- molforge-0.0.1/src/molforge/__init__.py +18 -0
- molforge-0.0.1/src/molforge/core/__init__.py +64 -0
- molforge-0.0.1/src/molforge/core/atom.py +154 -0
- molforge-0.0.1/src/molforge/core/atom_array.py +318 -0
- molforge-0.0.1/src/molforge/core/chain.py +149 -0
- molforge-0.0.1/src/molforge/core/constants.py +145 -0
- molforge-0.0.1/src/molforge/core/protein.py +166 -0
- molforge-0.0.1/src/molforge/core/residue.py +160 -0
- molforge-0.0.1/src/molforge/docking/__init__.py +143 -0
- molforge-0.0.1/src/molforge/io/__init__.py +88 -0
- molforge-0.0.1/src/molforge/io/dispatch.py +156 -0
- molforge-0.0.1/src/molforge/io/fasta.py +185 -0
- molforge-0.0.1/src/molforge/io/mmcif.py +559 -0
- molforge-0.0.1/src/molforge/io/mol2.py +33 -0
- molforge-0.0.1/src/molforge/io/pdb.py +635 -0
- molforge-0.0.1/src/molforge/io/pdb_alphafold.py +91 -0
- molforge-0.0.1/src/molforge/io/pdbqt.py +33 -0
- molforge-0.0.1/src/molforge/io/pqr.py +33 -0
- molforge-0.0.1/src/molforge/io/sdf.py +33 -0
- molforge-0.0.1/src/molforge/md/__init__.py +23 -0
- molforge-0.0.1/src/molforge/metrics/__init__.py +25 -0
- molforge-0.0.1/src/molforge/ml/__init__.py +25 -0
- molforge-0.0.1/src/molforge/plugins/__init__.py +34 -0
- molforge-0.0.1/src/molforge/plugins/registry.py +48 -0
- molforge-0.0.1/src/molforge/py.typed +0 -0
- molforge-0.0.1/src/molforge/sequence/__init__.py +69 -0
- molforge-0.0.1/src/molforge/sequence/alignment.py +385 -0
- molforge-0.0.1/src/molforge/sequence/composition.py +132 -0
- molforge-0.0.1/src/molforge/sequence/matrices.py +99 -0
- molforge-0.0.1/src/molforge/sequence/mutations.py +195 -0
- molforge-0.0.1/src/molforge/structure/__init__.py +85 -0
- molforge-0.0.1/src/molforge/structure/contacts.py +175 -0
- molforge-0.0.1/src/molforge/structure/dssp.py +428 -0
- molforge-0.0.1/src/molforge/structure/geometry.py +144 -0
- molforge-0.0.1/src/molforge/structure/rmsd.py +171 -0
- molforge-0.0.1/src/molforge/structure/superposition.py +135 -0
- molforge-0.0.1/src/molforge/wrappers/__init__.py +11 -0
- molforge-0.0.1/src/molforge/wrappers/docking/__init__.py +34 -0
- molforge-0.0.1/src/molforge/wrappers/docking/_base.py +7 -0
- molforge-0.0.1/src/molforge/wrappers/docking/diffdock.py +17 -0
- molforge-0.0.1/src/molforge/wrappers/docking/vina.py +364 -0
- molforge-0.0.1/src/molforge/wrappers/folding/__init__.py +38 -0
- molforge-0.0.1/src/molforge/wrappers/folding/_base.py +102 -0
- molforge-0.0.1/src/molforge/wrappers/folding/alphafold.py +20 -0
- molforge-0.0.1/src/molforge/wrappers/folding/boltz.py +20 -0
- molforge-0.0.1/src/molforge/wrappers/folding/esmfold.py +234 -0
- molforge-0.0.1/src/molforge/wrappers/folding/rosetta.py +19 -0
- molforge-0.0.1/src/molforge/wrappers/md/__init__.py +8 -0
- molforge-0.0.1/src/molforge/wrappers/md/_base.py +13 -0
- molforge-0.0.1/src/molforge/wrappers/md/gromacs.py +18 -0
- molforge-0.0.1/src/molforge/wrappers/md/openmm.py +18 -0
- molforge-0.0.1/tests/__init__.py +0 -0
- molforge-0.0.1/tests/conftest.py +23 -0
- molforge-0.0.1/tests/fixtures/cif/dipeptide.cif +39 -0
- molforge-0.0.1/tests/fixtures/fasta/.gitkeep +0 -0
- molforge-0.0.1/tests/fixtures/fasta/multiline_with_digits.fasta +3 -0
- molforge-0.0.1/tests/fixtures/fasta/simple.fasta +6 -0
- molforge-0.0.1/tests/fixtures/pdb/.gitkeep +0 -0
- molforge-0.0.1/tests/fixtures/pdb/alphafold_mock.pdb +8 -0
- molforge-0.0.1/tests/fixtures/pdb/dipeptide.pdb +16 -0
- molforge-0.0.1/tests/fixtures/pdb/helix.pdb +62 -0
- molforge-0.0.1/tests/fixtures/pdb/multi_model.pdb +12 -0
- molforge-0.0.1/tests/fixtures/pdb/tripeptide.pdb +17 -0
- molforge-0.0.1/tests/fixtures/pdb/with_altloc.pdb +9 -0
- molforge-0.0.1/tests/fixtures/pdb/with_insertion_code.pdb +6 -0
- molforge-0.0.1/tests/integration/__init__.py +0 -0
- molforge-0.0.1/tests/integration/test_smoke.py +11 -0
- molforge-0.0.1/tests/unit/__init__.py +0 -0
- molforge-0.0.1/tests/unit/core/__init__.py +0 -0
- molforge-0.0.1/tests/unit/core/test_atom_array.py +210 -0
- molforge-0.0.1/tests/unit/core/test_constants.py +61 -0
- molforge-0.0.1/tests/unit/core/test_core_smoke.py +27 -0
- molforge-0.0.1/tests/unit/core/test_core_types.py +33 -0
- molforge-0.0.1/tests/unit/core/test_hierarchy.py +260 -0
- molforge-0.0.1/tests/unit/docking/__init__.py +0 -0
- molforge-0.0.1/tests/unit/docking/test_docking_smoke.py +9 -0
- molforge-0.0.1/tests/unit/io/__init__.py +0 -0
- molforge-0.0.1/tests/unit/io/test_alphafold.py +56 -0
- molforge-0.0.1/tests/unit/io/test_dispatch.py +74 -0
- molforge-0.0.1/tests/unit/io/test_fasta.py +135 -0
- molforge-0.0.1/tests/unit/io/test_io_smoke.py +9 -0
- molforge-0.0.1/tests/unit/io/test_mmcif.py +171 -0
- molforge-0.0.1/tests/unit/io/test_pdb.py +254 -0
- molforge-0.0.1/tests/unit/md/__init__.py +0 -0
- molforge-0.0.1/tests/unit/md/test_md_smoke.py +9 -0
- molforge-0.0.1/tests/unit/metrics/__init__.py +0 -0
- molforge-0.0.1/tests/unit/metrics/test_metrics_smoke.py +9 -0
- molforge-0.0.1/tests/unit/ml/__init__.py +0 -0
- molforge-0.0.1/tests/unit/ml/test_ml_smoke.py +9 -0
- molforge-0.0.1/tests/unit/plugins/__init__.py +0 -0
- molforge-0.0.1/tests/unit/plugins/test_plugins_smoke.py +9 -0
- molforge-0.0.1/tests/unit/plugins/test_registry.py +14 -0
- molforge-0.0.1/tests/unit/sequence/__init__.py +0 -0
- molforge-0.0.1/tests/unit/sequence/test_alignment.py +123 -0
- molforge-0.0.1/tests/unit/sequence/test_composition.py +92 -0
- molforge-0.0.1/tests/unit/sequence/test_matrices.py +41 -0
- molforge-0.0.1/tests/unit/sequence/test_mutations.py +135 -0
- molforge-0.0.1/tests/unit/structure/__init__.py +0 -0
- molforge-0.0.1/tests/unit/structure/test_contacts.py +90 -0
- molforge-0.0.1/tests/unit/structure/test_dssp.py +118 -0
- molforge-0.0.1/tests/unit/structure/test_geometry.py +110 -0
- molforge-0.0.1/tests/unit/structure/test_rmsd.py +107 -0
- molforge-0.0.1/tests/unit/structure/test_structure_smoke.py +9 -0
- molforge-0.0.1/tests/unit/structure/test_superposition.py +114 -0
- molforge-0.0.1/tests/unit/wrappers/__init__.py +0 -0
- molforge-0.0.1/tests/unit/wrappers/test_docking_base.py +82 -0
- molforge-0.0.1/tests/unit/wrappers/test_esmfold.py +114 -0
- molforge-0.0.1/tests/unit/wrappers/test_folding_base.py +63 -0
- molforge-0.0.1/tests/unit/wrappers/test_vina.py +201 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
*.egg
|
|
8
|
+
*.egg-info/
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
develop-eggs/
|
|
12
|
+
downloads/
|
|
13
|
+
eggs/
|
|
14
|
+
.eggs/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
MANIFEST
|
|
21
|
+
pip-log.txt
|
|
22
|
+
pip-delete-this-directory.txt
|
|
23
|
+
|
|
24
|
+
# Virtual environments
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
env/
|
|
28
|
+
ENV/
|
|
29
|
+
|
|
30
|
+
# Testing / coverage
|
|
31
|
+
.tox/
|
|
32
|
+
.nox/
|
|
33
|
+
.coverage
|
|
34
|
+
.coverage.*
|
|
35
|
+
.cache
|
|
36
|
+
nosetests.xml
|
|
37
|
+
coverage.xml
|
|
38
|
+
*.cover
|
|
39
|
+
*.py,cover
|
|
40
|
+
.hypothesis/
|
|
41
|
+
.pytest_cache/
|
|
42
|
+
htmlcov/
|
|
43
|
+
|
|
44
|
+
# Type checkers
|
|
45
|
+
.mypy_cache/
|
|
46
|
+
.pytype/
|
|
47
|
+
.pyre/
|
|
48
|
+
.ruff_cache/
|
|
49
|
+
|
|
50
|
+
# Jupyter
|
|
51
|
+
.ipynb_checkpoints
|
|
52
|
+
*.ipynb_checkpoints/
|
|
53
|
+
|
|
54
|
+
# Editors / OS
|
|
55
|
+
.idea/
|
|
56
|
+
.vscode/
|
|
57
|
+
*.swp
|
|
58
|
+
*.swo
|
|
59
|
+
.DS_Store
|
|
60
|
+
Thumbs.db
|
|
61
|
+
|
|
62
|
+
# Docs build output
|
|
63
|
+
docs/_build/
|
|
64
|
+
docs/site/
|
|
65
|
+
site/
|
|
66
|
+
|
|
67
|
+
# Data files (keep small fixtures only; ignore bulk data)
|
|
68
|
+
data/*
|
|
69
|
+
!data/.gitkeep
|
|
70
|
+
!data/README.md
|
|
71
|
+
|
|
72
|
+
# Project-specific
|
|
73
|
+
*.log
|
|
74
|
+
*.tmp
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **`molforge.structure.dssp`: Kabsch-Sander secondary-structure assignment.**
|
|
12
|
+
- Pure-NumPy implementation of the canonical DSSP algorithm
|
|
13
|
+
(Kabsch & Sander 1983) with no external dependencies — no DSSP
|
|
14
|
+
binary required, no Biopython, no mkdssp install.
|
|
15
|
+
- Returns both the full 8-state DSSP alphabet (`H` α-helix,
|
|
16
|
+
`G` 3-10 helix, `I` π-helix, `E` β-strand, `B` β-bridge,
|
|
17
|
+
`T` turn, `S` bend, `-` coil) and the 3-state collapse
|
|
18
|
+
(`H` / `E` / `C`) via :func:`dssp_3state`.
|
|
19
|
+
- Geometric backbone amide-H placement (no need for explicit H atoms
|
|
20
|
+
in input), Kabsch-Sander electrostatic H-bond energy model, both
|
|
21
|
+
parallel and antiparallel β-bridge detection.
|
|
22
|
+
- Non-protein residues (water, ligands, ions) and residues with
|
|
23
|
+
incomplete backbones get `-` rather than crashing.
|
|
24
|
+
- Result dict also exposes the full ``(n_res, n_res)`` H-bond energy
|
|
25
|
+
matrix for downstream analyses (custom topology metrics, contact-
|
|
26
|
+
map enrichment, etc.).
|
|
27
|
+
- Replaces the previous stub that raised `NotImplementedError`.
|
|
28
|
+
- New test fixture `tests/fixtures/pdb/helix.pdb` — an idealized
|
|
29
|
+
15-residue poly-alanine α-helix built from canonical (φ, ψ) values
|
|
30
|
+
via NeRF placement. Produces the expected DSSP `CHHHHHHHHHHHHHC`
|
|
31
|
+
pattern.
|
|
32
|
+
- 12 unit tests covering empty / tiny inputs, helix recognition (≥7 of
|
|
33
|
+
the middle 9 residues classified as H), 3-state collapse, alphabet
|
|
34
|
+
validity, residue labels, H-bond matrix shape, and graceful handling
|
|
35
|
+
of non-protein residues.
|
|
36
|
+
- **`molforge.wrappers.docking.Vina`: second fully-implemented engine wrapper.**
|
|
37
|
+
- Wraps AutoDock Vina via the
|
|
38
|
+
[`vina`](https://pypi.org/project/vina/) PyPI package, which bundles
|
|
39
|
+
the Vina binary so no manual install is required.
|
|
40
|
+
- Configurable scoring function (`vina` or `vinardo`), seed, CPU
|
|
41
|
+
thread count, and verbosity.
|
|
42
|
+
- Takes either a prepared `.pdbqt` file path or (eventually) a
|
|
43
|
+
`Protein` plus charges; the receptor / ligand preparation path for
|
|
44
|
+
`Protein` and `.pdb` / `.sdf` inputs raises a clear
|
|
45
|
+
`NotImplementedError` pointing users at meeko / AutoDockTools.
|
|
46
|
+
- Search box specified by `center` and `box_size` in Å.
|
|
47
|
+
- Multi-pose PDBQT output parsed back into `DockingResult` / `Pose`
|
|
48
|
+
objects with score (kcal/mol), RMSD lower/upper bounds vs the
|
|
49
|
+
best pose, rank, and the ligand atoms as a `Protein`.
|
|
50
|
+
- **`molforge.docking`: completed ABC and result types.**
|
|
51
|
+
- `Pose` and `DockingResult` dataclasses with `best`, `top_n`,
|
|
52
|
+
iteration, and length helpers — replacing the previous stub classes
|
|
53
|
+
that raised `NotImplementedError` on construction.
|
|
54
|
+
- `DockingEngine` ABC with the formal `dock` contract; mirrors
|
|
55
|
+
`FoldingEngine` for API consistency across wrapper categories.
|
|
56
|
+
- `DockingEngineNotInstalledError` for missing-dependency error paths.
|
|
57
|
+
- 28 unit tests (1 marked `@pytest.mark.slow` for the real engine):
|
|
58
|
+
construction is dependency-free, lazy import behavior, materialization
|
|
59
|
+
helpers (path passthrough plus clear errors for unsupported inputs),
|
|
60
|
+
and exhaustive PDBQT output parsing (multi-MODEL, single-pose-no-MODEL,
|
|
61
|
+
score/RMSD extraction, best-first sorting, rank reassignment, empty
|
|
62
|
+
input).
|
|
63
|
+
- **`molforge.sequence`: sequence operations subpackage.**
|
|
64
|
+
- **Pairwise alignment** (`align`, `needleman_wunsch`, `smith_waterman`,
|
|
65
|
+
`Alignment`, `identity`): pure-NumPy Needleman-Wunsch (global) and
|
|
66
|
+
Smith-Waterman (local) with affine gap penalties, BLOSUM62 / PAM250
|
|
67
|
+
substitution matrices, and a `format()` method for human-readable
|
|
68
|
+
alignment blocks. No external dependencies (no Biopython, no
|
|
69
|
+
parasail) so it works in the minimal install.
|
|
70
|
+
- **Substitution matrices** (`BLOSUM62`, `PAM250`, `get_matrix`,
|
|
71
|
+
`available_matrices`): hardcoded as NumPy arrays from the NCBI BLAST
|
|
72
|
+
distribution. No runtime data-file dependency.
|
|
73
|
+
- **Mutations** (`Mutation`, `parse_mutations`, `apply_mutation`,
|
|
74
|
+
`apply_mutations`, `mutate_protein`): protein-engineering notation
|
|
75
|
+
(`A123V`, `A123V/T56K`, chain-prefixed `H:K42N`) with parsing,
|
|
76
|
+
sequence-level application, and `Protein`-level application that
|
|
77
|
+
updates the canonical `AtomArray` (sequence-only — atoms stay put;
|
|
78
|
+
side-chain rebuilding is a job for Rosetta or OpenMM).
|
|
79
|
+
- **Composition / properties** (`composition`, `length`,
|
|
80
|
+
`molecular_weight`, `gravy`, `aromaticity`): the everyday sequence
|
|
81
|
+
stats — per-residue counts/fractions, monoisotopic MW with
|
|
82
|
+
terminal water, Kyte-Doolittle GRAVY score, aromatic fraction.
|
|
83
|
+
- 65 unit tests covering alignment correctness (identity, gaps, full and
|
|
84
|
+
local coverage), matrix lookup and symmetry, mutation parsing
|
|
85
|
+
(including chain prefix and multi-mutant syntax), `Protein` mutation
|
|
86
|
+
with original-unchanged semantics, and all composition helpers.
|
|
87
|
+
- **`molforge.structure`: structural analysis subpackage.**
|
|
88
|
+
- **Superposition** (`superpose`, `kabsch_rmsd`, `SuperpositionResult`):
|
|
89
|
+
Kabsch / Umeyama optimal rigid-body alignment via SVD with proper-
|
|
90
|
+
rotation guarantee (no reflections), optional per-point weights for
|
|
91
|
+
masking outliers, returns rotation + translation + aligned coords +
|
|
92
|
+
RMSD.
|
|
93
|
+
- **RMSD** (`rmsd`, `rmsd_raw`, `rmsd_per_residue`): structure-to-
|
|
94
|
+
structure RMSD with five atom-subset selectors (``ca``, ``backbone``,
|
|
95
|
+
``backbone_o``, ``all_heavy``, ``all``), optional alignment, and
|
|
96
|
+
per-residue breakdown for localizing structural differences.
|
|
97
|
+
- **Contacts** (`contact_map`, `distance_map`, `residue_contacts`):
|
|
98
|
+
binary contact maps at configurable cutoff, continuous distance
|
|
99
|
+
maps, and all-atom inter-residue contact listings with chain-pair
|
|
100
|
+
filtering for interface analysis.
|
|
101
|
+
- **Geometry** (`centroid`, `center_of_mass`, `radius_of_gyration`,
|
|
102
|
+
`bounding_box`, `translate`, `rotate`, `center_at_origin`): bulk
|
|
103
|
+
geometric properties (mass-weighted or geometric) and in-place
|
|
104
|
+
coordinate transforms that mutate the canonical `AtomArray` directly.
|
|
105
|
+
- Stubs (still `NotImplementedError`): `sasa`, `dssp`.
|
|
106
|
+
- 43 unit tests covering superposition correctness (identity, translation,
|
|
107
|
+
rotation, noisy alignment, proper-rotation guarantee, weighted),
|
|
108
|
+
RMSD computations across atom subsets, contact / distance map
|
|
109
|
+
symmetry and chain filtering, and all geometry operations.
|
|
110
|
+
|
|
111
|
+
- **`molforge.io.read_cif` / `write_cif`: mmCIF / PDBx implementation.**
|
|
112
|
+
- Full read/write of the ``_atom_site`` loop, the only mmCIF block
|
|
113
|
+
that holds atomic coordinate data.
|
|
114
|
+
- Hand-written tokenizer handles quoted strings, comments,
|
|
115
|
+
semicolon-bounded multi-line text fields, and the ``.``/``?`` sentinel
|
|
116
|
+
values for missing/unknown.
|
|
117
|
+
- Header metadata extracted: ``_entry.id``, ``_struct.title``,
|
|
118
|
+
``_exptl.method``, ``_refine.ls_d_res_high``.
|
|
119
|
+
- Preference for ``auth_*`` columns (matching PDB conventions) with
|
|
120
|
+
fallback to ``label_*``, so PDB↔mmCIF round-trips preserve
|
|
121
|
+
author-assigned chain IDs and residue numbers.
|
|
122
|
+
- Reuses the same altloc resolution strategies and entity-type
|
|
123
|
+
classification as the PDB parser for behavioural consistency.
|
|
124
|
+
- ``CIFParseError`` and ``CIFWriteError`` for typed error handling.
|
|
125
|
+
- Wired into the top-level :func:`load` / :func:`save` dispatcher so
|
|
126
|
+
``.cif`` and ``.mmcif`` extensions just work.
|
|
127
|
+
- 27 unit tests covering the tokenizer, parsing, write, round-trip
|
|
128
|
+
(CIF→CIF and PDB→CIF→Protein), dispatch, and error paths.
|
|
129
|
+
- **`molforge.wrappers.folding.ESMFold`: first fully-implemented engine wrapper.**
|
|
130
|
+
- Wraps Meta AI's ``facebook/esmfold_v1`` via HuggingFace
|
|
131
|
+
``transformers``. Single-sequence folding (no MSA needed), fast,
|
|
132
|
+
GPU-friendly.
|
|
133
|
+
- Lazy import of ``torch`` and ``transformers`` keeps ``import
|
|
134
|
+
molforge`` cheap; missing-dependency errors point users at the
|
|
135
|
+
correct ``pip install 'molforge[ml]'`` extra.
|
|
136
|
+
- Configurable device (``cuda``/``cpu``/``mps``/auto), chunk size for
|
|
137
|
+
long-sequence memory management, and dtype (``float32``/``float16``).
|
|
138
|
+
- pLDDT exposed uniformly: per-atom in
|
|
139
|
+
``metadata["confidence_per_atom"]``, per-residue in
|
|
140
|
+
``metadata["confidence_per_residue"]``, scalar mean in
|
|
141
|
+
``metadata["mean_confidence"]``.
|
|
142
|
+
- **`FoldingEngine` ABC**: full contract definition with ``predict`` (abstract),
|
|
143
|
+
``predict_many`` (overridable batch), and a uniform per-residue
|
|
144
|
+
confidence convention so downstream code reads engine output the same
|
|
145
|
+
way regardless of which engine produced it.
|
|
146
|
+
- **`FoldingEngineNotInstalledError`**: dedicated exception type for missing
|
|
147
|
+
heavy dependencies, with actionable error messages.
|
|
148
|
+
- 17 unit tests covering construction, lazy loading, sequence
|
|
149
|
+
validation, missing-dependency error paths, and post-processing
|
|
150
|
+
(PDB-to-Protein conversion with confidence metadata). The end-to-end
|
|
151
|
+
fold test is marked ``@pytest.mark.slow`` and skipped unless ``torch``
|
|
152
|
+
is installed.
|
|
153
|
+
|
|
154
|
+
### Changed
|
|
155
|
+
- **Project renamed from `biocore` to `molforge`** (PyPI name collision; the
|
|
156
|
+
`biocore` GitHub organization is a separate, established scientific
|
|
157
|
+
Python community). Import path is now `molforge`.
|
|
158
|
+
- README rewritten around the cross-tool workflow thesis: molforge is
|
|
159
|
+
positioned as connective tissue between docking, MD, folding, design,
|
|
160
|
+
and experimental tools, rather than primarily as a data-representation
|
|
161
|
+
library.
|
|
162
|
+
|
|
163
|
+
### Added
|
|
164
|
+
- **`molforge.io`: file I/O subsystem.**
|
|
165
|
+
- **PDB reader and writer** (`read_pdb`, `write_pdb`, plus their
|
|
166
|
+
`*_string` variants). Handles the full wwPDB v3.30 column layout,
|
|
167
|
+
HEADER/TITLE/EXPDTA/REMARK 2 metadata, NMR multi-model files,
|
|
168
|
+
alternate locations (with three resolution strategies:
|
|
169
|
+
`highest_occupancy`, `first`, `all`, or a specific altloc id),
|
|
170
|
+
insertion codes, gzipped input/output, hydrogen filtering, and
|
|
171
|
+
automatic entity-type classification (protein / dna / rna / water /
|
|
172
|
+
ion / ligand) per residue.
|
|
173
|
+
- **FASTA reader and writer** (`read_fasta`, `write_fasta`, `*_string`
|
|
174
|
+
variants) with `FastaRecord` dataclass. Tolerant of multi-line
|
|
175
|
+
sequences, embedded digits, comments, and blank lines.
|
|
176
|
+
- **AlphaFold helpers** (`load_alphafold`, `is_alphafold_pdb`). Lifts
|
|
177
|
+
pLDDT out of the B-factor column into `protein.metadata["plddt"]`
|
|
178
|
+
(per atom), `protein.metadata["plddt_per_residue"]`, and
|
|
179
|
+
`protein.metadata["mean_plddt"]`. B-factor column preserved for
|
|
180
|
+
downstream-tool compatibility.
|
|
181
|
+
- **Top-level `load()`, `save()`, `fetch()`** dispatch by file
|
|
182
|
+
extension or explicit `format=` keyword. `fetch()` is stubbed
|
|
183
|
+
pending an HTTP utility.
|
|
184
|
+
- Format stubs (raising `NotImplementedError` with clear pointers):
|
|
185
|
+
`mmcif`, `pdbqt`, `pqr`, `sdf`, `mol2`. The API surface is committed
|
|
186
|
+
so user code targeting these formats won't break when
|
|
187
|
+
implementations land.
|
|
188
|
+
- `PDBParseError`, `PDBWriteError` for typed error handling.
|
|
189
|
+
- 73 unit tests covering PDB parsing, writing, round-trip correctness on
|
|
190
|
+
real fixtures (dipeptide, NMR ensemble, altloc, insertion-coded),
|
|
191
|
+
FASTA edge cases (multiline, digits, comments, malformed input),
|
|
192
|
+
AlphaFold detection and pLDDT extraction, and dispatch behavior.
|
|
193
|
+
- `ACKNOWLEDGEMENTS.md` crediting Protkit, Biotite, Biopython,
|
|
194
|
+
BioPandas, MDAnalysis, OpenMM, RDKit, and the file-format
|
|
195
|
+
specifications we implement.
|
|
196
|
+
- **`molforge.core`: full implementation of the canonical data model.**
|
|
197
|
+
- `AtomArray`: flat, NumPy-backed source of truth with 15 typed fields
|
|
198
|
+
(coords, element, atom_name, residue_name, residue_id, insertion_code,
|
|
199
|
+
chain_id, b_factor, occupancy, charge, serial, record_type, entity_type,
|
|
200
|
+
altloc, model_id). Supports construction from a dict of arrays, boolean
|
|
201
|
+
selection (`select`, `where`), slicing, fancy indexing, and concatenation
|
|
202
|
+
(`append`). Lazily-computed and cached residue / chain boundary indices
|
|
203
|
+
(`residue_starts`, `chain_starts`) with explicit invalidation.
|
|
204
|
+
- `Atom`, `Residue`, `Chain`, `Protein`: lightweight hierarchical *views*
|
|
205
|
+
that hold a reference to a shared `AtomArray` plus an index range.
|
|
206
|
+
Mutations on the hierarchical side write through to the array, so the
|
|
207
|
+
two views never go out of sync.
|
|
208
|
+
- `Protein.select(**filters)`, `protein_only()`, `remove_water()` for
|
|
209
|
+
common substructure operations.
|
|
210
|
+
- First-class support for heterogeneous content: HETATM records,
|
|
211
|
+
ligands, waters, and ions are represented in the same array via
|
|
212
|
+
`record_type` and `entity_type` fields.
|
|
213
|
+
- Insertion codes, alternate locations (altloc), and multi-model
|
|
214
|
+
(NMR / trajectory) structures are modeled from day one.
|
|
215
|
+
- Constants module with three-letter ↔ one-letter mappings for the 20
|
|
216
|
+
canonical amino acids, common non-canonical residues (MSE, SEC,
|
|
217
|
+
phospho-S/T/Y, force-field-specific His variants), DNA / RNA
|
|
218
|
+
nucleotides, waters, and ions. Helper functions `three_to_one`,
|
|
219
|
+
`is_standard_amino_acid`, `is_water`, `is_ion`.
|
|
220
|
+
- 74 unit tests covering `AtomArray`, hierarchical views, constants, and
|
|
221
|
+
cross-cutting hierarchical ↔ linear consistency invariants.
|
|
222
|
+
- Initial repository skeleton with src-layout package structure.
|
|
223
|
+
- Hierarchical data model stubs for `Protein`, `Chain`, `Residue`, `Atom`.
|
|
224
|
+
- Linear / array view stubs (`AtomArray`) alongside hierarchical views.
|
|
225
|
+
- Top-level subpackages: `core`, `sequence`, `structure`, `md`, `docking`,
|
|
226
|
+
`ml`, `io`, `plugins`, `metrics`.
|
|
227
|
+
- Wrapper interface stubs for folding (AlphaFold/ColabFold, ESMFold, Boltz),
|
|
228
|
+
docking (AutoDock Vina, DiffDock), and MD (OpenMM, GROMACS).
|
|
229
|
+
- Plugin registry stub with entry-point discovery.
|
|
230
|
+
- `pyproject.toml` with PEP 621 metadata, extras (`structure`, `sequence`,
|
|
231
|
+
`md`, `docking`, `ml`, `io`, `all`, `dev`, `docs`), and tool config for
|
|
232
|
+
ruff, mypy, pytest, and coverage.
|
|
233
|
+
- GitHub Actions workflows for CI (lint, type-check, tests on Python 3.10-3.12),
|
|
234
|
+
documentation build, and release-to-PyPI on tag.
|
|
235
|
+
- Issue templates (bug, feature, question) and PR template.
|
|
236
|
+
- `CONTRIBUTING.md`, `CODE_OF_CONDUCT.md`, `SECURITY.md`, `CODEDoctorDeanS`.
|
|
237
|
+
- Walkthrough notebook stubs for sequences, structures, MD, and docking.
|
|
238
|
+
- Pinned requirements files per extra under `requirements/`.
|
|
239
|
+
|
|
240
|
+
[Unreleased]: https://github.com/DoctorDean/molforge/compare/HEAD...HEAD
|
molforge-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dean Sherry
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
molforge-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: molforge
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A unified Python library for structural bioinformatics, MD, protein engineering, and ML.
|
|
5
|
+
Project-URL: Homepage, https://github.com/DoctorDean/molforge
|
|
6
|
+
Project-URL: Documentation, https://molforge.readthedocs.io
|
|
7
|
+
Project-URL: Repository, https://github.com/DoctorDean/molforge
|
|
8
|
+
Project-URL: Issues, https://github.com/DoctorDean/molforge/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/DoctorDean/molforge/blob/main/CHANGELOG.md
|
|
10
|
+
Author: Dean Sherry
|
|
11
|
+
License: MIT License
|
|
12
|
+
|
|
13
|
+
Copyright (c) 2026 Dean Sherry
|
|
14
|
+
|
|
15
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
16
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
17
|
+
in the Software without restriction, including without limitation the rights
|
|
18
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
19
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
20
|
+
furnished to do so, subject to the following conditions:
|
|
21
|
+
|
|
22
|
+
The above copyright notice and this permission notice shall be included in all
|
|
23
|
+
copies or substantial portions of the Software.
|
|
24
|
+
|
|
25
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
26
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
27
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
28
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
29
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
30
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
31
|
+
SOFTWARE.
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Keywords: bioinformatics,computational-biology,docking,machine-learning,molecular-dynamics,protein-engineering,structural-biology
|
|
34
|
+
Classifier: Development Status :: 3 - Alpha
|
|
35
|
+
Classifier: Intended Audience :: Science/Research
|
|
36
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
37
|
+
Classifier: Operating System :: OS Independent
|
|
38
|
+
Classifier: Programming Language :: Python :: 3
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
41
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
42
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
44
|
+
Classifier: Typing :: Typed
|
|
45
|
+
Requires-Python: >=3.10
|
|
46
|
+
Requires-Dist: numpy>=1.24
|
|
47
|
+
Requires-Dist: typing-extensions>=4.7
|
|
48
|
+
Provides-Extra: all
|
|
49
|
+
Requires-Dist: biopython>=1.83; extra == 'all'
|
|
50
|
+
Requires-Dist: biotite>=0.40; extra == 'all'
|
|
51
|
+
Requires-Dist: mdtraj>=1.10; extra == 'all'
|
|
52
|
+
Requires-Dist: openmm>=8.1; (platform_system != 'Windows') and extra == 'all'
|
|
53
|
+
Requires-Dist: rdkit>=2023.9; extra == 'all'
|
|
54
|
+
Requires-Dist: scipy>=1.11; extra == 'all'
|
|
55
|
+
Requires-Dist: torch>=2.1; extra == 'all'
|
|
56
|
+
Requires-Dist: transformers>=4.40; extra == 'all'
|
|
57
|
+
Provides-Extra: dev
|
|
58
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
59
|
+
Requires-Dist: hypothesis>=6.92; extra == 'dev'
|
|
60
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
61
|
+
Requires-Dist: pre-commit>=3.6; extra == 'dev'
|
|
62
|
+
Requires-Dist: pytest-cov>=4.1; extra == 'dev'
|
|
63
|
+
Requires-Dist: pytest-xdist>=3.5; extra == 'dev'
|
|
64
|
+
Requires-Dist: pytest>=7.4; extra == 'dev'
|
|
65
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
66
|
+
Requires-Dist: twine>=4.0; extra == 'dev'
|
|
67
|
+
Provides-Extra: docking
|
|
68
|
+
Requires-Dist: rdkit>=2023.9; extra == 'docking'
|
|
69
|
+
Provides-Extra: docs
|
|
70
|
+
Requires-Dist: mkdocs-jupyter>=0.24; extra == 'docs'
|
|
71
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
72
|
+
Requires-Dist: mkdocs>=1.5; extra == 'docs'
|
|
73
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
|
|
74
|
+
Provides-Extra: io
|
|
75
|
+
Requires-Dist: biopython>=1.83; extra == 'io'
|
|
76
|
+
Requires-Dist: biotite>=0.40; extra == 'io'
|
|
77
|
+
Provides-Extra: md
|
|
78
|
+
Requires-Dist: mdtraj>=1.10; extra == 'md'
|
|
79
|
+
Requires-Dist: openmm>=8.1; (platform_system != 'Windows') and extra == 'md'
|
|
80
|
+
Provides-Extra: ml
|
|
81
|
+
Requires-Dist: torch>=2.1; extra == 'ml'
|
|
82
|
+
Requires-Dist: transformers>=4.40; extra == 'ml'
|
|
83
|
+
Provides-Extra: sequence
|
|
84
|
+
Requires-Dist: biopython>=1.83; extra == 'sequence'
|
|
85
|
+
Provides-Extra: structure
|
|
86
|
+
Requires-Dist: biopython>=1.83; extra == 'structure'
|
|
87
|
+
Requires-Dist: biotite>=0.40; extra == 'structure'
|
|
88
|
+
Requires-Dist: scipy>=1.11; extra == 'structure'
|
|
89
|
+
Description-Content-Type: text/markdown
|
|
90
|
+
|
|
91
|
+
# molforge
|
|
92
|
+
|
|
93
|
+
[](https://github.com/DoctorDean/molforge/actions/workflows/ci.yml)
|
|
94
|
+
[](https://pypi.org/project/molforge/)
|
|
95
|
+
[](https://pypi.org/project/molforge/)
|
|
96
|
+
[](LICENSE)
|
|
97
|
+
[](https://github.com/astral-sh/ruff)
|
|
98
|
+
|
|
99
|
+

|
|
100
|
+
|
|
101
|
+
> **A forge for protein workflows.** One Python script, every tool in your stack: docking, MD, folding, antibody and nanobody engineering, de novo design — without the format-conversion tax.
|
|
102
|
+
|
|
103
|
+
`molforge` is an open-source Python library that lets you compose protein workflows across the tools you already use. Bring your structures and sequences in, plug in your engines of choice (Vina, OpenMM, ESMFold, AlphaFold, RFdiffusion, ProteinMPNN, your own model), and walk out with a coherent pipeline instead of five incompatible Python environments and a graveyard of conversion scripts.
|
|
104
|
+
|
|
105
|
+
---
|
|
106
|
+
|
|
107
|
+
## Why molforge exists
|
|
108
|
+
|
|
109
|
+
Modern protein work is multi-tool by nature. A real antibody-design loop might fold a sequence with ESMFold, identify CDR loops with anarci, score binding with FoldX, dock against a target with AutoDock Vina, relax with OpenMM, then evaluate with Rosetta. **Each of those tools speaks its own dialect**: different file formats, different atom-naming conventions, different ideas of what "the structure" is. Most of an engineer's day disappears into glue code.
|
|
110
|
+
|
|
111
|
+
`molforge` is the connective tissue. It provides:
|
|
112
|
+
|
|
113
|
+
- A **canonical, NumPy-backed data model** that's cheap to convert in and out of — so every engine in your pipeline reads from and writes to the same representation.
|
|
114
|
+
- **Thin wrappers** around the engines you already trust, with consistent interfaces (so swapping ESMFold for AlphaFold is one line, not a refactor).
|
|
115
|
+
- **First-class IO** for the messy reality of structural-bio files: PDB, mmCIF, FASTA, PDBQT, PQR, SDF, MOL2, and AlphaFold predictions with pLDDT.
|
|
116
|
+
- A **plugin registry** so the next docking engine, folding model, or scoring function can slot into your pipeline without forking molforge.
|
|
117
|
+
|
|
118
|
+
Built as a library, not a framework: there's no orchestrator, no DAG runtime, no decorators you have to import to make things work. Use whatever workflow tool you like — Snakemake, Nextflow, Prefect, a shell script — molforge is just imports.
|
|
119
|
+
|
|
120
|
+
## Design principles
|
|
121
|
+
|
|
122
|
+
1. **Workflows over silos.** Every design decision is judged by "does this make it easier to chain N tools together?"
|
|
123
|
+
2. **Wrappers, not reimplementations.** We don't rebuild OpenMM or AutoDock. We give them a shared vocabulary.
|
|
124
|
+
3. **One data model, two views.** Hierarchical (`protein.chains["A"].residues[42]`) for biology, linear (`protein.atom_array.coords`) for ML — same data, no conversion.
|
|
125
|
+
4. **Heterogeneous content is first-class.** Antibodies have glycans. Drug targets have ligands and ions. Membrane proteins have lipids. The data model handles all of it without an awkward special case for "non-protein."
|
|
126
|
+
5. **Typed, tested, documented.** Strict mypy, ruff-clean, >90% coverage target.
|
|
127
|
+
|
|
128
|
+
## Installation
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
# minimal core (data model + sequence + basic IO)
|
|
132
|
+
pip install molforge
|
|
133
|
+
|
|
134
|
+
# with structure analysis (RMSD, SASA, contacts)
|
|
135
|
+
pip install "molforge[structure]"
|
|
136
|
+
|
|
137
|
+
# with ML wrappers (torch, transformers, esm)
|
|
138
|
+
pip install "molforge[ml]"
|
|
139
|
+
|
|
140
|
+
# with MD support (openmm, mdtraj)
|
|
141
|
+
pip install "molforge[md]"
|
|
142
|
+
|
|
143
|
+
# with docking (rdkit for small molecules)
|
|
144
|
+
pip install "molforge[docking]"
|
|
145
|
+
|
|
146
|
+
# everything
|
|
147
|
+
pip install "molforge[all]"
|
|
148
|
+
|
|
149
|
+
# development
|
|
150
|
+
git clone https://github.com/DoctorDean/molforge.git
|
|
151
|
+
cd molforge
|
|
152
|
+
pip install -e ".[dev,all]"
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Quickstart
|
|
156
|
+
|
|
157
|
+
The smallest end-to-end example that shows the cross-tool point:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import molforge as mf
|
|
161
|
+
from molforge.wrappers.folding import ESMFold
|
|
162
|
+
from molforge.wrappers.docking import Vina
|
|
163
|
+
from molforge.wrappers.md import OpenMM
|
|
164
|
+
|
|
165
|
+
# 1. Fold a sequence
|
|
166
|
+
folded = ESMFold().predict("MKTVRQERLKSIVRILERSKEPVSGAQLAEELSVS...")
|
|
167
|
+
|
|
168
|
+
# 2. Save as PDB, save as mmCIF, hand to anything
|
|
169
|
+
mf.save(folded, "candidate.pdb")
|
|
170
|
+
mf.save(folded, "candidate.cif")
|
|
171
|
+
|
|
172
|
+
# 3. Dock a ligand against it (Vina-prepared PDBQT files)
|
|
173
|
+
result = Vina().dock(
|
|
174
|
+
receptor="receptor.pdbqt",
|
|
175
|
+
ligand="ligand.pdbqt",
|
|
176
|
+
center=(10.0, 5.0, -2.0),
|
|
177
|
+
box_size=(20.0, 20.0, 20.0),
|
|
178
|
+
)
|
|
179
|
+
top_pose = result.best
|
|
180
|
+
|
|
181
|
+
# 4. Drop into MD for relaxation
|
|
182
|
+
trajectory = OpenMM().simulate(top_pose.complex, steps=10_000)
|
|
183
|
+
|
|
184
|
+
# 5. Inspect — hierarchical or linear, your call
|
|
185
|
+
print(folded.sequence) # one-letter per chain
|
|
186
|
+
print(folded.atom_array.coords.shape) # (N, 3) NumPy array
|
|
187
|
+
ca = folded.chains["A"].residues[42].atoms["CA"] # specific atom
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Notice what *isn't* there: file-format conversions, atom-name remapping, hand-rolled PDB parsers, custom data classes per engine. molforge does that work so your script reads like the science you're actually doing.
|
|
191
|
+
|
|
192
|
+
## Repository structure
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
molforge/
|
|
196
|
+
├── src/molforge/ # Library source (src-layout)
|
|
197
|
+
│ ├── core/ # Hierarchical + linear data model
|
|
198
|
+
│ ├── sequence/ # Sequence operations, alignment, mutations
|
|
199
|
+
│ ├── structure/ # RMSD, SASA, contacts, geometry
|
|
200
|
+
│ ├── md/ # MD trajectories and analysis
|
|
201
|
+
│ ├── docking/ # Docking abstractions and pose handling
|
|
202
|
+
│ ├── ml/ # ML utilities, featurizers, tensor views
|
|
203
|
+
│ ├── io/ # PDB, mmCIF, FASTA, PDBQT, PQR, SDF, MOL2
|
|
204
|
+
│ ├── plugins/ # Plugin registry and entry-point discovery
|
|
205
|
+
│ ├── metrics/ # TM-score, lDDT, GDT-TS, docking metrics
|
|
206
|
+
│ └── wrappers/ # Thin interfaces to external engines
|
|
207
|
+
│ ├── folding/ # AlphaFold, ESMFold, Boltz, Rosetta
|
|
208
|
+
│ ├── docking/ # AutoDock Vina, DiffDock
|
|
209
|
+
│ └── md/ # OpenMM, GROMACS
|
|
210
|
+
├── tests/ # pytest suite (unit + integration)
|
|
211
|
+
├── docs/ # Architecture docs and reference
|
|
212
|
+
├── notebooks/ # Walkthroughs and worked examples
|
|
213
|
+
├── plugins/ # Example external plugins
|
|
214
|
+
├── pyproject.toml # Build config, deps, tool config
|
|
215
|
+
└── ACKNOWLEDGEMENTS.md # Prior art and intellectual debts
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
A deeper architecture walkthrough is in [`docs/architecture/overview.md`](docs/architecture/overview.md).
|
|
219
|
+
|
|
220
|
+
## Status
|
|
221
|
+
|
|
222
|
+
molforge is **pre-1.0** and under active development. What's working today:
|
|
223
|
+
|
|
224
|
+
- **Core data model** — `Protein` / `Chain` / `Residue` / `Atom` over a canonical NumPy-backed `AtomArray`, with first-class heterogeneous content (ligands, water, ions, modified residues).
|
|
225
|
+
- **File I/O** — full read/write for **PDB** (with NMR ensembles, altlocs, insertion codes) and **mmCIF** (the modern format for large structures); **FASTA** sequence I/O; **AlphaFold** loader that surfaces pLDDT as a first-class field. PDBQT, PQR, SDF, MOL2 are stubbed with committed APIs.
|
|
226
|
+
- **Sequence operations** — pairwise **alignment** (Needleman-Wunsch / Smith-Waterman with BLOSUM62 / PAM250), point **mutations** with protein-engineering notation (`A123V`, `A123V/T56K`, `H:K42N`), composition and property helpers (MW, GRAVY, aromaticity).
|
|
227
|
+
- **Structural analysis** — Kabsch/Umeyama **superposition**, **RMSD** (whole-structure and per-residue, multiple atom subsets), **contact and distance maps**, **radius of gyration**, **centroid / center of mass**, in-place **translate / rotate**, **DSSP** secondary-structure assignment (8-state and 3-state, no external binary).
|
|
228
|
+
- **Two engine wrappers working end-to-end** — **ESMFold** (sequence → folded `Protein`, `pip install 'molforge[ml]'`) and **AutoDock Vina** (receptor + ligand → ranked docking poses, `pip install vina meeko`). The wrapper pattern is now validated across both folding and docking categories; the rest follow the same template.
|
|
229
|
+
|
|
230
|
+
Coming next: SASA, backbone dihedrals (φ/ψ/ω), `meeko`-based receptor/ligand prep for Vina, additional engine wrappers (AlphaFold, OpenMM). See [`CHANGELOG.md`](CHANGELOG.md) for the full picture.
|
|
231
|
+
|
|
232
|
+
## Acknowledgements
|
|
233
|
+
|
|
234
|
+
molforge is inspired by [Protkit](https://github.com/silicogenesis/protkit) (SilicoGenesis), which pioneered the idea of a unified, hierarchical representation for protein structures in Python. molforge extends that direction toward cross-tool, cross-format workflows and a different internal architecture (NumPy-backed linear store, hierarchical views as accessors). See [`ACKNOWLEDGEMENTS.md`](ACKNOWLEDGEMENTS.md) for the longer list of projects we've learned from.
|
|
235
|
+
|
|
236
|
+
## Contributing
|
|
237
|
+
|
|
238
|
+
We welcome contributions. See [`CONTRIBUTING.md`](CONTRIBUTING.md) and the [Code of Conduct](CODE_OF_CONDUCT.md) before opening an issue or PR.
|
|
239
|
+
|
|
240
|
+
## License
|
|
241
|
+
|
|
242
|
+
MIT — see [`LICENSE`](LICENSE).
|
|
243
|
+
|
|
244
|
+
## Citation
|
|
245
|
+
|
|
246
|
+
If you use `molforge` in academic work, please cite us (BibTeX coming with the first tagged release).
|