foldreport 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. foldreport-0.1.0/CLAUDE.md +35 -0
  2. foldreport-0.1.0/LICENSE +21 -0
  3. foldreport-0.1.0/MANIFEST.in +23 -0
  4. foldreport-0.1.0/PKG-INFO +154 -0
  5. foldreport-0.1.0/PLAN.md +209 -0
  6. foldreport-0.1.0/README.md +129 -0
  7. foldreport-0.1.0/foldreport/__init__.py +16 -0
  8. foldreport-0.1.0/foldreport/cli.py +78 -0
  9. foldreport-0.1.0/foldreport/figures.py +125 -0
  10. foldreport-0.1.0/foldreport/metrics.py +89 -0
  11. foldreport-0.1.0/foldreport/models.py +75 -0
  12. foldreport-0.1.0/foldreport/parsers/__init__.py +65 -0
  13. foldreport-0.1.0/foldreport/parsers/af3_server.py +129 -0
  14. foldreport-0.1.0/foldreport/parsers/alphafold_db.py +144 -0
  15. foldreport-0.1.0/foldreport/parsers/base.py +137 -0
  16. foldreport-0.1.0/foldreport/parsers/boltz.py +181 -0
  17. foldreport-0.1.0/foldreport/parsers/colabfold.py +151 -0
  18. foldreport-0.1.0/foldreport/parsers/openfold3.py +171 -0
  19. foldreport-0.1.0/foldreport/report/3Dmol-min.js +2 -0
  20. foldreport-0.1.0/foldreport/report/__init__.py +5 -0
  21. foldreport-0.1.0/foldreport/report/builder.py +277 -0
  22. foldreport-0.1.0/foldreport/report/template.html +299 -0
  23. foldreport-0.1.0/foldreport.egg-info/PKG-INFO +154 -0
  24. foldreport-0.1.0/foldreport.egg-info/SOURCES.txt +80 -0
  25. foldreport-0.1.0/foldreport.egg-info/dependency_links.txt +1 -0
  26. foldreport-0.1.0/foldreport.egg-info/entry_points.txt +2 -0
  27. foldreport-0.1.0/foldreport.egg-info/requires.txt +8 -0
  28. foldreport-0.1.0/foldreport.egg-info/top_level.txt +1 -0
  29. foldreport-0.1.0/pyproject.toml +47 -0
  30. foldreport-0.1.0/setup.cfg +4 -0
  31. foldreport-0.1.0/tests/conftest.py +62 -0
  32. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_full_data_0.json +1 -0
  33. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_full_data_1.json +1 -0
  34. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_job_request.json +1 -0
  35. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_model_0.cif +140 -0
  36. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_model_1.cif +140 -0
  37. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_summary_confidences_0.json +1 -0
  38. foldreport-0.1.0/tests/data/af3_server/fold_mycomplex_summary_confidences_1.json +1 -0
  39. foldreport-0.1.0/tests/data/af3_server/terms_of_use.md +1 -0
  40. foldreport-0.1.0/tests/data/alphafold_db/AF-P00001-F1-metadata.json +1 -0
  41. foldreport-0.1.0/tests/data/alphafold_db/AF-P00001-F1-model_v6.cif +111 -0
  42. foldreport-0.1.0/tests/data/alphafold_db/AF-P00001-F1-predicted_aligned_error_v6.json +1 -0
  43. foldreport-0.1.0/tests/data/alphafold_db/AF-P00002-F1-metadata.json +1 -0
  44. foldreport-0.1.0/tests/data/alphafold_db/AF-P00002-F1-model_v6.cif +101 -0
  45. foldreport-0.1.0/tests/data/alphafold_db/AF-P00002-F1-predicted_aligned_error_v6.json +1 -0
  46. foldreport-0.1.0/tests/data/boltz/log.txt +1 -0
  47. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/confidence_mycomplex_model_0.json +1 -0
  48. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/confidence_mycomplex_model_1.json +1 -0
  49. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/mycomplex_model_0.cif +140 -0
  50. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/mycomplex_model_1.cif +140 -0
  51. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/pae_mycomplex_model_0.npz +0 -0
  52. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/pae_mycomplex_model_1.npz +0 -0
  53. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/plddt_mycomplex_model_0.npz +0 -0
  54. foldreport-0.1.0/tests/data/boltz/predictions/mycomplex/plddt_mycomplex_model_1.npz +0 -0
  55. foldreport-0.1.0/tests/data/colabfold/cite.bibtex +1 -0
  56. foldreport-0.1.0/tests/data/colabfold/complex.a3m +2 -0
  57. foldreport-0.1.0/tests/data/colabfold/complex_scores_rank_001_alphafold2_multimer_v3_model_3_seed_000.json +1 -0
  58. foldreport-0.1.0/tests/data/colabfold/complex_scores_rank_002_alphafold2_multimer_v3_model_1_seed_000.json +1 -0
  59. foldreport-0.1.0/tests/data/colabfold/complex_unrelaxed_rank_001_alphafold2_multimer_v3_model_3_seed_000.pdb +60 -0
  60. foldreport-0.1.0/tests/data/colabfold/complex_unrelaxed_rank_002_alphafold2_multimer_v3_model_1_seed_000.pdb +60 -0
  61. foldreport-0.1.0/tests/data/colabfold/config.json +1 -0
  62. foldreport-0.1.0/tests/data/colabfold/log.txt +2 -0
  63. foldreport-0.1.0/tests/data/empty/readme.txt +1 -0
  64. foldreport-0.1.0/tests/data/malformed/broken_scores_rank_001_alphafold2_ptm_model_1_seed_000.json +1 -0
  65. foldreport-0.1.0/tests/data/malformed/broken_unrelaxed_rank_001_alphafold2_ptm_model_1_seed_000.pdb +35 -0
  66. foldreport-0.1.0/tests/data/openfold3/mycomplex/experiment_config.json +1 -0
  67. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-0/mycomplex_confidences.json +1 -0
  68. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-0/mycomplex_model.cif +140 -0
  69. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-0/mycomplex_summary_confidences.json +1 -0
  70. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-1/mycomplex_confidences.json +1 -0
  71. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-1/mycomplex_model.cif +140 -0
  72. foldreport-0.1.0/tests/data/openfold3/mycomplex/seed-1_sample-1/mycomplex_summary_confidences.json +1 -0
  73. foldreport-0.1.0/tests/data/single_chain/monomer_scores_rank_001_alphafold2_ptm_model_3_seed_000.json +1 -0
  74. foldreport-0.1.0/tests/data/single_chain/monomer_unrelaxed_rank_001_alphafold2_ptm_model_3_seed_000.pdb +35 -0
  75. foldreport-0.1.0/tests/make_fixtures.py +349 -0
  76. foldreport-0.1.0/tests/test_alphafold_db.py +49 -0
  77. foldreport-0.1.0/tests/test_cli.py +36 -0
  78. foldreport-0.1.0/tests/test_colabfold_parser.py +54 -0
  79. foldreport-0.1.0/tests/test_edge_cases.py +116 -0
  80. foldreport-0.1.0/tests/test_metrics_and_figures.py +64 -0
  81. foldreport-0.1.0/tests/test_other_parsers.py +87 -0
  82. foldreport-0.1.0/tests/test_report.py +41 -0
@@ -0,0 +1,35 @@
1
+ # CLAUDE.md
2
+
3
+ Guidance for working in this repository.
4
+
5
+ ## Project
6
+
7
+ **FoldReport** — point the tool at a folder of structure predictions (ColabFold,
8
+ AlphaFold 3 Server, Boltz, OpenFold3) and get a single self-contained HTML report
9
+ that ranks all predictions by confidence and lets you explore each one (interactive
10
+ PAE, per-residue pLDDT, interface metrics) without installing anything else or
11
+ opening a notebook.
12
+
13
+ The full specification lives in [PLAN.md](PLAN.md).
14
+
15
+ ## Language policy
16
+
17
+ **All code, comments, docstrings, identifiers, commit messages, documentation, and
18
+ user-facing strings MUST be written in English.** This applies to everything in the
19
+ repository regardless of the language used in conversation or in PLAN.md (which is in
20
+ Spanish). Do not introduce non-English text into the codebase.
21
+
22
+ ## Architecture
23
+
24
+ - The heart of the project is the internal representation in `foldreport/models.py`.
25
+ Every parser produces `list[Prediction]`; nothing downstream knows the original
26
+ format. Adding a tool means writing a parser, not touching the rest.
27
+ - Missing metrics are `None`, never invented. The report renders "N/A".
28
+ - Format detection is automatic via each parser's `can_handle()`.
29
+
30
+ ## Dev workflow
31
+
32
+ - Use the project virtualenv at `.venv`.
33
+ - Install in editable mode: `.venv\Scripts\python.exe -m pip install -e ".[dev]"`
34
+ - Run tests: `.venv\Scripts\python.exe -m pytest`
35
+ - Keep dependencies minimal — one-command install is core to adoption.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sergio Gracia
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,23 @@
1
+ # Source distribution contents.
2
+ # The wheel's runtime data files are declared via [tool.setuptools.package-data]
3
+ # in pyproject.toml; this file controls what extra files ship in the sdist.
4
+
5
+ include README.md
6
+ include LICENSE
7
+ include CLAUDE.md
8
+ include PLAN.md
9
+
10
+ # Bundled report assets (also required at runtime).
11
+ include foldreport/report/template.html
12
+ include foldreport/report/3Dmol-min.js
13
+
14
+ # Tests and their fixtures, so the sdist is self-checking.
15
+ recursive-include tests *.py
16
+ recursive-include tests/data *
17
+
18
+ # Keep build noise out of the sdist.
19
+ global-exclude __pycache__ *.py[cod]
20
+ prune .github
21
+ prune .claude
22
+ prune examples
23
+ prune .venv
@@ -0,0 +1,154 @@
1
+ Metadata-Version: 2.4
2
+ Name: foldreport
3
+ Version: 0.1.0
4
+ Summary: Unify structure-prediction outputs (ColabFold, AlphaFold 3 Server, Boltz) into a single self-contained HTML report ranked by confidence.
5
+ Author: Sergio Gracia
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/sergio-gracia/foldreport
8
+ Project-URL: Issues, https://github.com/sergio-gracia/foldreport/issues
9
+ Keywords: alphafold,colabfold,boltz,protein-structure,plddt,pae,bioinformatics
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: numpy>=1.23
18
+ Requires-Dist: pandas>=1.5
19
+ Requires-Dist: matplotlib>=3.6
20
+ Requires-Dist: gemmi>=0.6
21
+ Requires-Dist: click>=8.1
22
+ Provides-Extra: dev
23
+ Requires-Dist: pytest>=7.0; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # FoldReport
27
+
28
+ **Point it at a folder of structure predictions and get one self-contained HTML report
29
+ that ranks them all by confidence.**
30
+
31
+ ### ▶ [Try the live demo report →](https://sergio-gracia.github.io/foldreport/)
32
+
33
+ See exactly what you get before installing anything: the same complex predicted by all
34
+ four supported tools (eight pooled predictions), ranked in one page. Click a row to open
35
+ its detail card — interactive 3D viewer colored by pLDDT, per-residue pLDDT plot, PAE
36
+ heatmap, and interface metrics. It is the exact file `foldreport` writes, served as-is.
37
+
38
+ FoldReport reads the outputs of modern structure-prediction tools — **ColabFold**,
39
+ the **AlphaFold 3 Server**, **Boltz**, and **OpenFold3** — as well as entries from the
40
+ **AlphaFold Protein Structure Database**, and unifies them into a single navigable
41
+ `.html` file: a confidence-ranked table on top (filterable by tool,
42
+ name, and confidence), and a detail card per prediction with an embedded 3D viewer
43
+ (colored by pLDDT), a per-residue pLDDT plot, an interactive PAE heatmap, and interface
44
+ metrics (pTM, ipTM, …).
45
+
46
+ The report is **one file**. No server, no notebook, no internet connection, and no
47
+ adjacent assets — open it in any browser and share it as a single attachment.
48
+
49
+ ## Why
50
+
51
+ Running AlphaFold is solved. The bottleneck moved *downstream*: you end up with dozens
52
+ or hundreds of output folders, in slightly different formats, and have to decide *what
53
+ to look at*. FoldReport answers "300 outputs from 3 tools — which ones matter?" in one
54
+ command.
55
+
56
+ ## Install
57
+
58
+ ```bash
59
+ pip install foldreport
60
+ ```
61
+
62
+ Or from a checkout:
63
+
64
+ ```bash
65
+ pip install .
66
+ ```
67
+
68
+ ## Quick start (copy-paste)
69
+
70
+ A ready-to-run example dataset ships in the repo: the same complex predicted by all
71
+ four supported tools (two models each, eight pooled predictions), one folder per tool.
72
+
73
+ ```bash
74
+ foldreport examples/demo/colabfold examples/demo/af3_server examples/demo/boltz examples/demo/openfold3 -o report.html
75
+ ```
76
+
77
+ Open `report.html` in your browser. That's it.
78
+
79
+ Point it at a single run the same way — the format is autodetected:
80
+
81
+ ```bash
82
+ foldreport path/to/colabfold_run -o report.html
83
+ ```
84
+
85
+ Pool several runs (even from different tools) into one ranked report:
86
+
87
+ ```bash
88
+ foldreport run_colabfold/ run_af3/ run_boltz/ run_openfold3/ -o combined.html
89
+ ```
90
+
91
+ ### Options
92
+
93
+ | Flag | Description |
94
+ |------|-------------|
95
+ | `-o, --output` | Path of the HTML report to write (default `foldreport.html`). |
96
+ | `-t, --title` | Title shown at the top of the report. |
97
+ | `--csv` | Also write the ranked metrics table as CSV. |
98
+ | `-V, --version` | Print version. |
99
+
100
+ ## Supported tools
101
+
102
+ | Tool | Detected from | pLDDT | PAE | pTM / ipTM |
103
+ |------|---------------|:-----:|:---:|:----------:|
104
+ | ColabFold | `*_scores_rank_*.json` + `*_rank_*.pdb` | ✓ | ✓ | ✓ |
105
+ | AlphaFold 3 Server | `*_summary_confidences_*.json` + `*_full_data_*.json` | ✓ | ✓ | ✓ |
106
+ | Boltz | `confidence_*_model_*.json` + `*.npz` | ✓ | ✓ | ✓ |
107
+ | OpenFold3 | `seed-*_sample-*/` + `*_confidences.json` + `*_summary_confidences.json` | ✓ | ✓ | ✓ |
108
+ | AlphaFold DB | `AF-<ACC>-F*-model_v*.cif` + `*_predicted_aligned_error_v*.json` | ✓ | ✓ | — |
109
+
110
+ Metrics a tool does not provide are shown as **N/A** — never fabricated.
111
+
112
+ ## How it works
113
+
114
+ Every parser converts a tool's on-disk output into a common internal representation
115
+ (`foldreport/models.py`). Nothing downstream — metrics, figures, report — knows the
116
+ original format, so adding a tool means writing one parser, not touching the rest.
117
+ Format detection is automatic; you never declare which tool produced a folder.
118
+
119
+ ## Development
120
+
121
+ ```bash
122
+ python -m venv .venv
123
+ .venv/Scripts/python -m pip install -e ".[dev]" # Windows
124
+ .venv/bin/python -m pip install -e ".[dev]" # macOS/Linux
125
+ .venv/Scripts/python -m pytest # run tests
126
+ ```
127
+
128
+ The test fixtures are synthetic but faithful to each tool's real file layout, names,
129
+ and JSON keys; regenerate them with `python tests/make_fixtures.py`. Regenerate the
130
+ example dataset with `python examples/make_demo.py`.
131
+
132
+ ### Try it on real biological data
133
+
134
+ To validate the pipeline on genuine predictions, download a small set of real proteins
135
+ from the [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk):
136
+
137
+ ```bash
138
+ python examples/fetch_afdb.py # default set: INS, UBC, LYZ, HBA1
139
+ python examples/fetch_afdb.py P01308 P0CG48 P00698 # or any UniProt accessions
140
+ ```
141
+
142
+ This writes the real structures + PAE into `examples/afdb_demo/` and builds
143
+ `examples/afdb_report.html`. It is the only network-touching part of the project and is
144
+ fully opt-in — the test suite never hits the network.
145
+
146
+ ## Scope
147
+
148
+ FoldReport processes *existing* predictions only. It does not run inference (no GPU, no
149
+ model), predict mutation effects, or edit structures. The deliverable is a CLI plus a
150
+ static HTML file — no backend, database, or accounts.
151
+
152
+ ## License
153
+
154
+ MIT.
@@ -0,0 +1,209 @@
1
+ # FoldReport — Especificación de proyecto
2
+
3
+ > Brief para construir el proyecto con Claude Code. Pásale este fichero como contexto
4
+ > inicial. Cada fase tiene criterios de aceptación claros; trabájalas en orden y no
5
+ > empieces una hasta que la anterior pase sus criterios.
6
+
7
+ ---
8
+
9
+ ## 1. Visión en una frase
10
+
11
+ Apuntas la herramienta a una carpeta de predicciones de estructura —vengan de **ColabFold**,
12
+ del **AlphaFold 3 Server**, de **Boltz** o de **OpenFold3**— y obtienes un **único informe HTML
13
+ autocontenido** que rankea todas las predicciones por confianza y deja explorar cada una
14
+ (PAE interactivo, pLDDT por residuo, métricas de interfaz) sin instalar nada más ni abrir un notebook.
15
+
16
+ **La cuña que nadie cubre:** multi-herramienta + por lotes + informe navegable en un solo archivo.
17
+ Mantén esta frase como filtro: si una feature no sirve a "unificar salidas de varias
18
+ herramientas en un informe compartible", probablemente sobra para el MVP.
19
+
20
+ ---
21
+
22
+ ## 2. El problema (por qué existe esto)
23
+
24
+ - Ejecutar AlphaFold ya está resuelto (ColabFold, el AF3 Server). El cuello de botella se movió
25
+ al **después**: tienes decenas o cientos de carpetas de salida y necesitas decidir *qué mirar*.
26
+ - Cada herramienta moderna escupe formatos y métricas ligeramente distintos (pLDDT, PAE, pTM,
27
+ ipTM, mpDockQ) con estructuras de carpeta diferentes. Comparar entre ellas hoy es manual.
28
+ - Las herramientas existentes cubren piezas sueltas: hacen figuras de una predicción cada vez,
29
+ o requieren un visor pesado, o solo entienden un formato. Ninguna resuelve
30
+ "300 salidas de 3 herramientas, decididme cuáles importan".
31
+
32
+ ## 3. Usuario objetivo
33
+
34
+ Biólogo estructural / bioinformático que ya tiene salidas de predicción y necesita triarlas e
35
+ interpretarlas rápido. Asume que sabe qué es pLDDT y PAE pero no quiere escribir scripts de
36
+ parsing cada vez.
37
+
38
+ ---
39
+
40
+ ## 4. Alcance del MVP
41
+
42
+ ### DENTRO (v0.1 – v1.0)
43
+ - Ingesta de una carpeta (o varias) con predicciones.
44
+ - Parseo robusto de **ColabFold** primero; **AF3 Server** y **Boltz** después.
45
+ - Normalización a una representación interna común (ver §6).
46
+ - Tabla de métricas ordenable/filtrable: una fila por predicción/modelo.
47
+ - Figuras de calidad de publicación: PAE y pLDDT por residuo.
48
+ - Informe HTML **autocontenido** (un solo .html, sin dependencias externas en runtime) con
49
+ visor 3D embebido y ranking por confianza.
50
+ - CLI de un comando: `foldreport <carpeta> -o informe.html`.
51
+
52
+ ### FUERA (explícitamente, para no dispersarse)
53
+ - Ejecutar AlphaFold / inferencia (cero GPU, cero modelo). Solo se procesan salidas existentes.
54
+ - Predicción de efecto de mutaciones / variantes.
55
+ - Edición o reparación de estructuras.
56
+ - Backend con servidor, base de datos o cuentas de usuario. El entregable es CLI + HTML estático.
57
+ - Soporte de formatos legacy raros. Empieza por lo que la comunidad usa hoy.
58
+
59
+ ---
60
+
61
+ ## 5. Stack técnico
62
+
63
+ - **Lenguaje:** Python 3.10+.
64
+ - **Parsing estructura (mmCIF/PDB):** `gemmi` (preferido) o `biotite`.
65
+ - **Datos/tablas:** `pandas`.
66
+ - **Figuras estáticas:** `matplotlib`.
67
+ - **Visor 3D embebido en navegador (sin servidor):** `py3Dmol` o Mol*.
68
+ - **CLI:** `click` o `typer`.
69
+ - **Empaquetado:** `pyproject.toml`, instalable con `pip install .`. Objetivo: `pip install foldreport`.
70
+ - **Tests:** `pytest`.
71
+
72
+ Mantén las dependencias mínimas. Cada dependencia nueva es fricción de instalación, y la
73
+ instalación de un comando es clave para la adopción.
74
+
75
+ ---
76
+
77
+ ## 6. Diseño de la representación interna (el corazón del proyecto)
78
+
79
+ El error más caro sería acoplar el código a un formato concreto. Define **primero** una capa
80
+ intermedia limpia y haz que cada parser produzca exactamente esto. Así añadir una herramienta
81
+ nueva es escribir un parser, no tocar el resto.
82
+
83
+ Estructuras sugeridas (ajústalas, pero respeta el principio):
84
+
85
+ ```python
86
+ @dataclass
87
+ class Prediction:
88
+ name: str # identificador legible
89
+ source_tool: str # "colabfold" | "af3_server" | "boltz" | ...
90
+ structure_path: Path # ruta al .cif/.pdb
91
+ chains: list[Chain]
92
+ plddt: list[float] # por residuo, orden canónico
93
+ pae: np.ndarray | None # matriz NxN o None si no hay
94
+ metrics: PredictionMetrics
95
+ raw_files: dict[str, Path] # trazabilidad de dónde salió cada cosa
96
+
97
+ @dataclass
98
+ class PredictionMetrics:
99
+ mean_plddt: float
100
+ ptm: float | None
101
+ iptm: float | None
102
+ mpdockq: float | None
103
+ n_chains: int
104
+ n_residues: int
105
+ # rellena None lo que la herramienta no provea; el informe debe tolerar huecos
106
+
107
+ # Contrato de parser: Path de carpeta -> list[Prediction]
108
+ class Parser(Protocol):
109
+ def can_handle(self, path: Path) -> bool: ...
110
+ def parse(self, path: Path) -> list[Prediction]: ...
111
+ ```
112
+
113
+ Principios:
114
+ - Todo parser devuelve `list[Prediction]`. Nada aguas abajo conoce el formato original.
115
+ - Las métricas ausentes son `None`, nunca inventadas. El informe muestra "N/A".
116
+ - Detección automática de formato vía `can_handle()`; el usuario no debería tener que declararlo.
117
+
118
+ ---
119
+
120
+ ## 7. Estructura de ficheros sugerida
121
+
122
+ ```
123
+ foldreport/
124
+ ├── pyproject.toml
125
+ ├── README.md
126
+ ├── foldreport/
127
+ │ ├── __init__.py
128
+ │ ├── cli.py
129
+ │ ├── models.py # dataclasses de §6
130
+ │ ├── parsers/
131
+ │ │ ├── __init__.py # registro + autodetección
132
+ │ │ ├── base.py # Protocol/ABC del parser
133
+ │ │ ├── colabfold.py
134
+ │ │ ├── af3_server.py
135
+ │ │ └── boltz.py
136
+ │ ├── metrics.py # cálculo/normalización de métricas
137
+ │ ├── figures.py # PAE, pLDDT (matplotlib)
138
+ │ └── report/
139
+ │ ├── builder.py # ensambla el HTML
140
+ │ └── template.html # plantilla autocontenida
141
+ ├── tests/
142
+ │ ├── data/ # fixtures mínimas reales de cada herramienta
143
+ │ └── test_*.py
144
+ └── examples/
145
+ └── demo/ # carpeta de ejemplo lista para `foldreport examples/demo`
146
+ ```
147
+
148
+ ---
149
+
150
+ ## 8. Hoja de ruta por fases (con criterios de aceptación)
151
+
152
+ ### Fase 1 — Parser ColabFold sólido (semanas 1–2)
153
+ - Implementa `models.py` y el `Parser` base.
154
+ - Implementa el parser de ColabFold completo.
155
+ - **Aceptación:** dada una carpeta real de ColabFold, `parse()` devuelve `Prediction`s con
156
+ estructura, pLDDT, PAE y métricas correctas. Test con fixture real en `tests/data/`.
157
+
158
+ ### Fase 2 — Figuras y tabla de métricas (semanas 3–4)
159
+ - `figures.py`: gráfico de PAE y de pLDDT por residuo, calidad de publicación.
160
+ - `metrics.py`: tabla normalizada (pandas) ordenable por cualquier métrica.
161
+ - **Aceptación:** desde una lista de `Prediction` se generan los PNG/figuras y un DataFrame
162
+ con una fila por predicción. Huecos como `None` se manejan sin romper.
163
+
164
+ ### Fase 3 — Informe HTML autocontenido (semanas 5–6)
165
+ - Visor 3D embebido (py3Dmol/Mol*) coloreado por pLDDT.
166
+ - Plantilla que junta: tabla rankeada arriba, detalle por predicción debajo.
167
+ - **Aceptación:** `foldreport <carpeta> -o informe.html` produce **un solo .html** que abre en
168
+ el navegador sin conexión y sin ficheros adyacentes, con ranking por confianza funcional.
169
+
170
+ ### Fase 4 — Segundo y tercer formato + pulido (semanas 7–8)
171
+ - Añade parser de AF3 Server (y Boltz si da tiempo). Esto valida que la abstracción de §6 aguanta.
172
+ - README con ejemplo reproducible copy-paste, datos de prueba en el repo, `pip install` de un comando.
173
+ - **Aceptación:** la misma orden funciona sobre carpetas de ≥2 herramientas distintas sin cambios,
174
+ produciendo informes equivalentes. Un usuario nuevo logra un informe en <5 min desde el README.
175
+
176
+ ---
177
+
178
+ ## 9. Qué hace que se adopte (prioridad nº1 del autor)
179
+
180
+ Lo que separa la herramienta que la gente usa de la que muere en GitHub casi nunca es el código:
181
+
182
+ 1. **Instalación de un comando** (`pip install foldreport`).
183
+ 2. **README con un ejemplo que funciona copiando y pegando**, con datos incluidos en el repo.
184
+ 3. **Resolver UN caso de uso del todo** antes que cinco a medias.
185
+ 4. Coste de probar ≈ cero: apuntar a una carpeta y obtener algo útil en segundos.
186
+ 5. Cuando esté maduro: preprint corto en bioRxiv y difusión donde está la comunidad estructural.
187
+
188
+ Regla de oro de scope: ante cualquier feature nueva, pregúntate si sirve a la frase de §1.
189
+ Si no, va al backlog, no al MVP.
190
+
191
+ ---
192
+
193
+ ## 10. Riesgo conocido
194
+
195
+ El espacio de análisis post-AlphaFold tiene varios actores (herramientas que hacen figuras de PAE,
196
+ visores de una predicción, plugins de visores pesados). Si esto es "una más que dibuja PAE", se
197
+ pierde. La defensa es la cuña: **unificar salidas de varias herramientas en un informe que se
198
+ comparte como un solo archivo.** Esa combinación es lo que hoy no existe.
199
+
200
+ ---
201
+
202
+ ## 11. Primeras instrucciones para Claude Code
203
+
204
+ 1. Crea el esqueleto del repo según §7 con `pyproject.toml` instalable.
205
+ 2. Implementa `models.py` (§6) y `parsers/base.py` con el contrato `Parser`.
206
+ 3. Implementa `parsers/colabfold.py` y un test con una fixture mínima en `tests/data/colabfold/`.
207
+ (Si no tienes una salida real a mano, genera una fixture sintética fiel al formato real de
208
+ ColabFold y deja un TODO para sustituirla por una real.)
209
+ 4. No avances a figuras hasta que el parser pase su test.
@@ -0,0 +1,129 @@
1
+ # FoldReport
2
+
3
+ **Point it at a folder of structure predictions and get one self-contained HTML report
4
+ that ranks them all by confidence.**
5
+
6
+ ### ▶ [Try the live demo report →](https://sergio-gracia.github.io/foldreport/)
7
+
8
+ See exactly what you get before installing anything: the same complex predicted by all
9
+ four supported tools (eight pooled predictions), ranked in one page. Click a row to open
10
+ its detail card — interactive 3D viewer colored by pLDDT, per-residue pLDDT plot, PAE
11
+ heatmap, and interface metrics. It is the exact file `foldreport` writes, served as-is.
12
+
13
+ FoldReport reads the outputs of modern structure-prediction tools — **ColabFold**,
14
+ the **AlphaFold 3 Server**, **Boltz**, and **OpenFold3** — as well as entries from the
15
+ **AlphaFold Protein Structure Database**, and unifies them into a single navigable
16
+ `.html` file: a confidence-ranked table on top (filterable by tool,
17
+ name, and confidence), and a detail card per prediction with an embedded 3D viewer
18
+ (colored by pLDDT), a per-residue pLDDT plot, an interactive PAE heatmap, and interface
19
+ metrics (pTM, ipTM, …).
20
+
21
+ The report is **one file**. No server, no notebook, no internet connection, and no
22
+ adjacent assets — open it in any browser and share it as a single attachment.
23
+
24
+ ## Why
25
+
26
+ Running AlphaFold is solved. The bottleneck moved *downstream*: you end up with dozens
27
+ or hundreds of output folders, in slightly different formats, and have to decide *what
28
+ to look at*. FoldReport answers "300 outputs from 3 tools — which ones matter?" in one
29
+ command.
30
+
31
+ ## Install
32
+
33
+ ```bash
34
+ pip install foldreport
35
+ ```
36
+
37
+ Or from a checkout:
38
+
39
+ ```bash
40
+ pip install .
41
+ ```
42
+
43
+ ## Quick start (copy-paste)
44
+
45
+ A ready-to-run example dataset ships in the repo: the same complex predicted by all
46
+ four supported tools (two models each, eight pooled predictions), one folder per tool.
47
+
48
+ ```bash
49
+ foldreport examples/demo/colabfold examples/demo/af3_server examples/demo/boltz examples/demo/openfold3 -o report.html
50
+ ```
51
+
52
+ Open `report.html` in your browser. That's it.
53
+
54
+ Point it at a single run the same way — the format is autodetected:
55
+
56
+ ```bash
57
+ foldreport path/to/colabfold_run -o report.html
58
+ ```
59
+
60
+ Pool several runs (even from different tools) into one ranked report:
61
+
62
+ ```bash
63
+ foldreport run_colabfold/ run_af3/ run_boltz/ run_openfold3/ -o combined.html
64
+ ```
65
+
66
+ ### Options
67
+
68
+ | Flag | Description |
69
+ |------|-------------|
70
+ | `-o, --output` | Path of the HTML report to write (default `foldreport.html`). |
71
+ | `-t, --title` | Title shown at the top of the report. |
72
+ | `--csv` | Also write the ranked metrics table as CSV. |
73
+ | `-V, --version` | Print version. |
74
+
75
+ ## Supported tools
76
+
77
+ | Tool | Detected from | pLDDT | PAE | pTM / ipTM |
78
+ |------|---------------|:-----:|:---:|:----------:|
79
+ | ColabFold | `*_scores_rank_*.json` + `*_rank_*.pdb` | ✓ | ✓ | ✓ |
80
+ | AlphaFold 3 Server | `*_summary_confidences_*.json` + `*_full_data_*.json` | ✓ | ✓ | ✓ |
81
+ | Boltz | `confidence_*_model_*.json` + `*.npz` | ✓ | ✓ | ✓ |
82
+ | OpenFold3 | `seed-*_sample-*/` + `*_confidences.json` + `*_summary_confidences.json` | ✓ | ✓ | ✓ |
83
+ | AlphaFold DB | `AF-<ACC>-F*-model_v*.cif` + `*_predicted_aligned_error_v*.json` | ✓ | ✓ | — |
84
+
85
+ Metrics a tool does not provide are shown as **N/A** — never fabricated.
86
+
87
+ ## How it works
88
+
89
+ Every parser converts a tool's on-disk output into a common internal representation
90
+ (`foldreport/models.py`). Nothing downstream — metrics, figures, report — knows the
91
+ original format, so adding a tool means writing one parser, not touching the rest.
92
+ Format detection is automatic; you never declare which tool produced a folder.
93
+
94
+ ## Development
95
+
96
+ ```bash
97
+ python -m venv .venv
98
+ .venv/Scripts/python -m pip install -e ".[dev]" # Windows
99
+ .venv/bin/python -m pip install -e ".[dev]" # macOS/Linux
100
+ .venv/Scripts/python -m pytest # run tests
101
+ ```
102
+
103
+ The test fixtures are synthetic but faithful to each tool's real file layout, names,
104
+ and JSON keys; regenerate them with `python tests/make_fixtures.py`. Regenerate the
105
+ example dataset with `python examples/make_demo.py`.
106
+
107
+ ### Try it on real biological data
108
+
109
+ To validate the pipeline on genuine predictions, download a small set of real proteins
110
+ from the [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk):
111
+
112
+ ```bash
113
+ python examples/fetch_afdb.py # default set: INS, UBC, LYZ, HBA1
114
+ python examples/fetch_afdb.py P01308 P0CG48 P00698 # or any UniProt accessions
115
+ ```
116
+
117
+ This writes the real structures + PAE into `examples/afdb_demo/` and builds
118
+ `examples/afdb_report.html`. It is the only network-touching part of the project and is
119
+ fully opt-in — the test suite never hits the network.
120
+
121
+ ## Scope
122
+
123
+ FoldReport processes *existing* predictions only. It does not run inference (no GPU, no
124
+ model), predict mutation effects, or edit structures. The deliverable is a CLI plus a
125
+ static HTML file — no backend, database, or accounts.
126
+
127
+ ## License
128
+
129
+ MIT.
@@ -0,0 +1,16 @@
1
+ """FoldReport — unify structure-prediction outputs into a single HTML report."""
2
+
3
+ from foldreport.models import (
4
+ Chain,
5
+ Prediction,
6
+ PredictionMetrics,
7
+ )
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ __all__ = [
12
+ "Chain",
13
+ "Prediction",
14
+ "PredictionMetrics",
15
+ "__version__",
16
+ ]
@@ -0,0 +1,78 @@
1
+ """Command-line interface: ``foldreport <folder> [...] -o report.html``.
2
+
3
+ Point it at one or more folders of predictions. Each folder's format is autodetected;
4
+ all predictions are pooled, ranked by confidence, and written to a single HTML file.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ import click
13
+
14
+ from foldreport import __version__
15
+ from foldreport.metrics import ranked_dataframe
16
+ from foldreport.parsers import detect_parser, parse_folder
17
+ from foldreport.report import build_report
18
+
19
+
20
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
21
+ @click.argument(
22
+ "folders",
23
+ nargs=-1,
24
+ required=True,
25
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
26
+ )
27
+ @click.option(
28
+ "-o",
29
+ "--output",
30
+ type=click.Path(dir_okay=False, path_type=Path),
31
+ default=Path("foldreport.html"),
32
+ show_default=True,
33
+ help="Path of the self-contained HTML report to write.",
34
+ )
35
+ @click.option(
36
+ "-t",
37
+ "--title",
38
+ default="FoldReport",
39
+ show_default=True,
40
+ help="Title shown at the top of the report.",
41
+ )
42
+ @click.option(
43
+ "--csv",
44
+ type=click.Path(dir_okay=False, path_type=Path),
45
+ default=None,
46
+ help="Also write the ranked metrics table to this CSV path.",
47
+ )
48
+ @click.version_option(__version__, "-V", "--version", prog_name="foldreport")
49
+ def main(folders: tuple[Path, ...], output: Path, title: str, csv: Path | None) -> None:
50
+ """Build a single HTML report from prediction FOLDERS.
51
+
52
+ Supported tools (autodetected): ColabFold, AlphaFold 3 Server, Boltz, OpenFold3,
53
+ and AlphaFold DB downloads.
54
+ """
55
+ predictions = []
56
+ for folder in folders:
57
+ parser = detect_parser(folder)
58
+ if parser is None:
59
+ click.echo(f" ! Skipping {folder}: no supported format detected.", err=True)
60
+ continue
61
+ found = parse_folder(folder)
62
+ click.echo(f" + {folder}: {len(found)} prediction(s) via '{parser.name}'.")
63
+ predictions.extend(found)
64
+
65
+ if not predictions:
66
+ click.echo("No predictions found in the given folder(s).", err=True)
67
+ sys.exit(1)
68
+
69
+ if csv is not None:
70
+ ranked_dataframe(predictions).to_csv(csv, index=False)
71
+ click.echo(f" > Metrics table: {csv}")
72
+
73
+ out_path = build_report(predictions, output, title=title)
74
+ click.echo(f" > Report ({len(predictions)} predictions): {out_path}")
75
+
76
+
77
+ if __name__ == "__main__":
78
+ main()