foldreport 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
foldreport/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ """FoldReport — unify structure-prediction outputs into a single HTML report."""
2
+
3
+ from foldreport.models import (
4
+ Chain,
5
+ Prediction,
6
+ PredictionMetrics,
7
+ )
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ __all__ = [
12
+ "Chain",
13
+ "Prediction",
14
+ "PredictionMetrics",
15
+ "__version__",
16
+ ]
foldreport/cli.py ADDED
@@ -0,0 +1,78 @@
1
+ """Command-line interface: ``foldreport <folder> [...] -o report.html``.
2
+
3
+ Point it at one or more folders of predictions. Each folder's format is autodetected;
4
+ all predictions are pooled, ranked by confidence, and written to a single HTML file.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import sys
10
+ from pathlib import Path
11
+
12
+ import click
13
+
14
+ from foldreport import __version__
15
+ from foldreport.metrics import ranked_dataframe
16
+ from foldreport.parsers import detect_parser, parse_folder
17
+ from foldreport.report import build_report
18
+
19
+
20
+ @click.command(context_settings={"help_option_names": ["-h", "--help"]})
21
+ @click.argument(
22
+ "folders",
23
+ nargs=-1,
24
+ required=True,
25
+ type=click.Path(exists=True, file_okay=False, dir_okay=True, path_type=Path),
26
+ )
27
+ @click.option(
28
+ "-o",
29
+ "--output",
30
+ type=click.Path(dir_okay=False, path_type=Path),
31
+ default=Path("foldreport.html"),
32
+ show_default=True,
33
+ help="Path of the self-contained HTML report to write.",
34
+ )
35
+ @click.option(
36
+ "-t",
37
+ "--title",
38
+ default="FoldReport",
39
+ show_default=True,
40
+ help="Title shown at the top of the report.",
41
+ )
42
+ @click.option(
43
+ "--csv",
44
+ type=click.Path(dir_okay=False, path_type=Path),
45
+ default=None,
46
+ help="Also write the ranked metrics table to this CSV path.",
47
+ )
48
+ @click.version_option(__version__, "-V", "--version", prog_name="foldreport")
49
+ def main(folders: tuple[Path, ...], output: Path, title: str, csv: Path | None) -> None:
50
+ """Build a single HTML report from prediction FOLDERS.
51
+
52
+ Supported tools (autodetected): ColabFold, AlphaFold 3 Server, Boltz, OpenFold3,
53
+ and AlphaFold DB downloads.
54
+ """
55
+ predictions = []
56
+ for folder in folders:
57
+ parser = detect_parser(folder)
58
+ if parser is None:
59
+ click.echo(f" ! Skipping {folder}: no supported format detected.", err=True)
60
+ continue
61
+ found = parse_folder(folder)
62
+ click.echo(f" + {folder}: {len(found)} prediction(s) via '{parser.name}'.")
63
+ predictions.extend(found)
64
+
65
+ if not predictions:
66
+ click.echo("No predictions found in the given folder(s).", err=True)
67
+ sys.exit(1)
68
+
69
+ if csv is not None:
70
+ ranked_dataframe(predictions).to_csv(csv, index=False)
71
+ click.echo(f" > Metrics table: {csv}")
72
+
73
+ out_path = build_report(predictions, output, title=title)
74
+ click.echo(f" > Report ({len(predictions)} predictions): {out_path}")
75
+
76
+
77
+ if __name__ == "__main__":
78
+ main()
foldreport/figures.py ADDED
@@ -0,0 +1,125 @@
1
+ """Publication-quality static figures: PAE and per-residue pLDDT.
2
+
3
+ Figures are rendered with a non-interactive matplotlib backend and returned as base64
4
+ PNG data URIs so the report can embed them with zero external files. Predictions that
5
+ lack a given metric simply produce no figure (the caller renders "N/A").
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import base64
11
+ import io
12
+
13
+ import matplotlib
14
+
15
+ matplotlib.use("Agg") # Headless: never requires a display server.
16
+
17
+ import matplotlib.pyplot as plt
18
+ import numpy as np
19
+ from matplotlib.figure import Figure
20
+
21
+ from foldreport.models import Prediction
22
+
23
+ # pLDDT confidence bands used by the AlphaFold family (0-100 scale).
24
+ _PLDDT_BANDS = [
25
+ (90, 100, "#0053D6", "Very high (90-100)"),
26
+ (70, 90, "#65CBF3", "Confident (70-90)"),
27
+ (50, 70, "#FFDB13", "Low (50-70)"),
28
+ (0, 50, "#FF7D45", "Very low (0-50)"),
29
+ ]
30
+
31
+
32
+ def _fig_to_data_uri(fig: Figure, dpi: int = 150) -> str:
33
+ buf = io.BytesIO()
34
+ fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight")
35
+ plt.close(fig)
36
+ encoded = base64.b64encode(buf.getvalue()).decode("ascii")
37
+ return f"data:image/png;base64,{encoded}"
38
+
39
+
40
+ def plddt_figure(pred: Prediction) -> str | None:
41
+ """Per-residue pLDDT line plot with confidence bands. Returns a PNG data URI."""
42
+ if not pred.plddt:
43
+ return None
44
+ plddt = np.asarray(pred.plddt, dtype=float)
45
+ x = np.arange(1, len(plddt) + 1)
46
+
47
+ fig, ax = plt.subplots(figsize=(7, 2.6))
48
+ for low, high, color, _label in _PLDDT_BANDS:
49
+ ax.axhspan(low, high, color=color, alpha=0.12, linewidth=0)
50
+ ax.plot(x, plddt, color="#1a1a1a", linewidth=1.2)
51
+
52
+ _draw_chain_boundaries(ax, pred)
53
+
54
+ ax.set_xlim(1, len(plddt))
55
+ ax.set_ylim(0, 100)
56
+ ax.set_xlabel("Residue")
57
+ ax.set_ylabel("pLDDT")
58
+ ax.set_title("Per-residue pLDDT")
59
+ ax.grid(True, axis="y", alpha=0.2)
60
+ return _fig_to_data_uri(fig)
61
+
62
+
63
+ def pae_figure(pred: Prediction) -> str | None:
64
+ """PAE heatmap (Predicted Aligned Error). Returns a PNG data URI or None."""
65
+ if pred.pae is None:
66
+ return None
67
+ pae = np.asarray(pred.pae, dtype=float)
68
+ if pae.ndim != 2:
69
+ return None
70
+
71
+ fig, ax = plt.subplots(figsize=(4.2, 3.6))
72
+ im = ax.imshow(pae, cmap="Greens_r", vmin=0, vmax=max(float(pae.max()), 1.0), origin="upper")
73
+ ax.set_xlabel("Scored residue")
74
+ ax.set_ylabel("Aligned residue")
75
+ ax.set_title("Predicted Aligned Error")
76
+ cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
77
+ cbar.set_label("Expected position error (Å)")
78
+
79
+ _draw_pae_chain_lines(ax, pred)
80
+ return _fig_to_data_uri(fig)
81
+
82
+
83
+ def _chain_offsets(pred: Prediction) -> list[int]:
84
+ """Cumulative residue offsets at chain boundaries (excluding the final end)."""
85
+ offsets: list[int] = []
86
+ cumulative = 0
87
+ for chain in pred.chains[:-1]:
88
+ cumulative += chain.n_residues
89
+ offsets.append(cumulative)
90
+ return offsets
91
+
92
+
93
+ def _draw_chain_boundaries(ax, pred: Prediction) -> None:
94
+ for boundary in _chain_offsets(pred):
95
+ ax.axvline(boundary + 0.5, color="#888888", linestyle="--", linewidth=0.8)
96
+
97
+
98
+ def _draw_pae_chain_lines(ax, pred: Prediction) -> None:
99
+ for boundary in _chain_offsets(pred):
100
+ ax.axhline(boundary - 0.5, color="#444444", linewidth=0.6)
101
+ ax.axvline(boundary - 0.5, color="#444444", linewidth=0.6)
102
+
103
+
104
+ def pae_data_for_js(pred: Prediction) -> dict | None:
105
+ """Return PAE matrix data for the interactive JS heatmap, or None."""
106
+ if pred.pae is None:
107
+ return None
108
+ pae = np.asarray(pred.pae, dtype=float)
109
+ if pae.ndim != 2:
110
+ return None
111
+ return {
112
+ "matrix": np.round(pae, 2).tolist(),
113
+ "size": int(pae.shape[0]),
114
+ "max_val": round(float(pae.max()), 2),
115
+ "chain_boundaries": _chain_offsets(pred),
116
+ }
117
+
118
+
119
+ def make_figures(pred: Prediction) -> dict[str, str | None]:
120
+ """Return both figures for a prediction as a dict of data URIs (or None)."""
121
+ return {
122
+ "plddt": plddt_figure(pred),
123
+ "pae": pae_figure(pred),
124
+ "pae_interactive": pae_data_for_js(pred),
125
+ }
foldreport/metrics.py ADDED
@@ -0,0 +1,89 @@
1
+ """Normalize a list of predictions into a sortable/filterable metrics table.
2
+
3
+ The table has exactly one row per prediction. Missing metrics stay as ``None`` (which
4
+ pandas renders as ``NaN``); nothing here invents values. The default ranking key is a
5
+ confidence score that gracefully falls back when a tool omits a metric.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Sequence
11
+
12
+ import pandas as pd
13
+
14
+ from foldreport.models import Prediction
15
+
16
+ # Columns in display order. Keep human-readable; the report renders these as headers.
17
+ COLUMNS = [
18
+ "name",
19
+ "source_tool",
20
+ "rank",
21
+ "mean_plddt",
22
+ "ptm",
23
+ "iptm",
24
+ "mpdockq",
25
+ "ranking_score",
26
+ "n_chains",
27
+ "n_residues",
28
+ ]
29
+
30
+
31
+ def metrics_dataframe(predictions: Sequence[Prediction]) -> pd.DataFrame:
32
+ """Build a one-row-per-prediction DataFrame of normalized metrics."""
33
+ rows = []
34
+ for pred in predictions:
35
+ m = pred.metrics
36
+ rows.append(
37
+ {
38
+ "name": pred.name,
39
+ "source_tool": pred.source_tool,
40
+ "rank": pred.rank,
41
+ "mean_plddt": m.mean_plddt,
42
+ "ptm": m.ptm,
43
+ "iptm": m.iptm,
44
+ "mpdockq": m.mpdockq,
45
+ "ranking_score": m.ranking_score,
46
+ "n_chains": m.n_chains,
47
+ "n_residues": m.n_residues,
48
+ }
49
+ )
50
+ df = pd.DataFrame(rows, columns=COLUMNS)
51
+ return df
52
+
53
+
54
+ def confidence_score(pred: Prediction) -> float:
55
+ """A single comparable confidence value used to rank predictions.
56
+
57
+ Preference order, using whatever the tool provided:
58
+ 1. ipTM (complexes) blended with pTM: 0.8*ipTM + 0.2*pTM
59
+ 2. pTM alone
60
+ 3. mean pLDDT scaled to 0-1
61
+ 4. the tool's own ranking_score
62
+ 5. 0.0 as a last resort (keeps it sortable, sorts last)
63
+ """
64
+ m = pred.metrics
65
+ if m.iptm is not None and m.ptm is not None:
66
+ return 0.8 * m.iptm + 0.2 * m.ptm
67
+ if m.iptm is not None:
68
+ return m.iptm
69
+ if m.ptm is not None:
70
+ return m.ptm
71
+ if m.mean_plddt is not None:
72
+ return m.mean_plddt / 100.0
73
+ if m.ranking_score is not None:
74
+ return m.ranking_score
75
+ return 0.0
76
+
77
+
78
+ def rank_predictions(predictions: Sequence[Prediction]) -> list[Prediction]:
79
+ """Return predictions sorted by descending confidence (best first)."""
80
+ return sorted(predictions, key=confidence_score, reverse=True)
81
+
82
+
83
+ def ranked_dataframe(predictions: Sequence[Prediction]) -> pd.DataFrame:
84
+ """Metrics table sorted best-first, with a 1-based ``overall_rank`` column."""
85
+ ordered = rank_predictions(predictions)
86
+ df = metrics_dataframe(ordered)
87
+ df.insert(0, "overall_rank", range(1, len(df) + 1))
88
+ df["confidence"] = [round(confidence_score(p), 4) for p in ordered]
89
+ return df
foldreport/models.py ADDED
@@ -0,0 +1,75 @@
1
+ """Internal representation shared by every parser.
2
+
3
+ This is the core abstraction of FoldReport: each parser converts the on-disk output
4
+ of a prediction tool into ``list[Prediction]`` objects. Nothing downstream (metrics,
5
+ figures, report) knows the original format. Adding a new tool means writing a parser
6
+ that produces these dataclasses, not touching the rest of the codebase.
7
+
8
+ Missing values are always ``None`` — never invented. The report renders them as "N/A".
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass, field
14
+ from pathlib import Path
15
+
16
+ import numpy as np
17
+
18
+
19
+ @dataclass
20
+ class Chain:
21
+ """A single polymer chain within a predicted structure."""
22
+
23
+ chain_id: str
24
+ n_residues: int
25
+ sequence: str | None = None # one-letter sequence, if known
26
+
27
+
28
+ @dataclass
29
+ class PredictionMetrics:
30
+ """Normalized confidence metrics for one prediction.
31
+
32
+ Any metric a tool does not provide stays ``None``; downstream code must tolerate
33
+ holes and never fabricate a value.
34
+ """
35
+
36
+ mean_plddt: float | None = None
37
+ ptm: float | None = None
38
+ iptm: float | None = None
39
+ mpdockq: float | None = None
40
+ n_chains: int = 0
41
+ n_residues: int = 0
42
+ # Optional ranking score reported by some tools (e.g. AF3 Server "ranking_score").
43
+ ranking_score: float | None = None
44
+
45
+
46
+ @dataclass
47
+ class Prediction:
48
+ """A single predicted model, normalized across tools.
49
+
50
+ Attributes:
51
+ name: Human-readable identifier (usually derived from the file name).
52
+ source_tool: One of "colabfold", "af3_server", "boltz", ...
53
+ structure_path: Path to the ``.cif``/``.pdb`` holding coordinates.
54
+ chains: Per-chain metadata in canonical order.
55
+ plddt: Per-residue pLDDT in canonical residue order (0-100). Empty if unknown.
56
+ pae: Predicted Aligned Error matrix (N_tokens x N_tokens) or ``None``.
57
+ metrics: Normalized scalar metrics.
58
+ rank: Tool-reported rank (1 = best) when available, else ``None``.
59
+ raw_files: Provenance — maps a logical role to the file it came from.
60
+ """
61
+
62
+ name: str
63
+ source_tool: str
64
+ structure_path: Path
65
+ chains: list[Chain] = field(default_factory=list)
66
+ plddt: list[float] = field(default_factory=list)
67
+ pae: np.ndarray | None = None
68
+ metrics: PredictionMetrics = field(default_factory=PredictionMetrics)
69
+ rank: int | None = None
70
+ raw_files: dict[str, Path] = field(default_factory=dict)
71
+
72
+ def __post_init__(self) -> None:
73
+ # Keep the PAE as a float ndarray for consistent downstream handling.
74
+ if self.pae is not None and not isinstance(self.pae, np.ndarray):
75
+ self.pae = np.asarray(self.pae, dtype=float)
@@ -0,0 +1,65 @@
1
+ """Parser registry and format autodetection.
2
+
3
+ Each tool-specific parser registers here. ``detect_parser`` asks every parser whether
4
+ it recognizes a folder, so the user never has to declare the format.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from pathlib import Path
10
+
11
+ from foldreport.models import Prediction
12
+ from foldreport.parsers.af3_server import Af3ServerParser
13
+ from foldreport.parsers.alphafold_db import AlphaFoldDBParser
14
+ from foldreport.parsers.base import Parser
15
+ from foldreport.parsers.boltz import BoltzParser
16
+ from foldreport.parsers.colabfold import ColabFoldParser
17
+ from foldreport.parsers.openfold3 import OpenFold3Parser
18
+
19
+ # Order matters only as a tie-breaker; can_handle checks should be mutually exclusive
20
+ # in practice. More specific layouts come first.
21
+ ALL_PARSERS: list[Parser] = [
22
+ ColabFoldParser(),
23
+ Af3ServerParser(),
24
+ OpenFold3Parser(),
25
+ AlphaFoldDBParser(),
26
+ BoltzParser(),
27
+ ]
28
+
29
+
30
+ def detect_parser(path: Path) -> Parser | None:
31
+ """Return the first parser that recognizes ``path``, or None."""
32
+ path = Path(path)
33
+ for parser in ALL_PARSERS:
34
+ if parser.can_handle(path):
35
+ return parser
36
+ return None
37
+
38
+
39
+ def parse_folder(path: Path) -> list[Prediction]:
40
+ """Autodetect the tool for ``path`` and parse all predictions.
41
+
42
+ Raises:
43
+ ValueError: if no registered parser recognizes the folder.
44
+ """
45
+ path = Path(path)
46
+ parser = detect_parser(path)
47
+ if parser is None:
48
+ raise ValueError(
49
+ f"No registered parser recognizes the folder: {path}. "
50
+ f"Supported tools: {', '.join(p.name for p in ALL_PARSERS)}."
51
+ )
52
+ return parser.parse(path)
53
+
54
+
55
+ __all__ = [
56
+ "ALL_PARSERS",
57
+ "Parser",
58
+ "ColabFoldParser",
59
+ "Af3ServerParser",
60
+ "BoltzParser",
61
+ "OpenFold3Parser",
62
+ "AlphaFoldDBParser",
63
+ "detect_parser",
64
+ "parse_folder",
65
+ ]
@@ -0,0 +1,129 @@
1
+ """Parser for AlphaFold 3 Server output folders.
2
+
3
+ The AF3 Server download contains, per job, five ranked models plus JSON sidecars::
4
+
5
+ fold_<job>_model_0.cif ... fold_<job>_model_4.cif
6
+ fold_<job>_full_data_0.json -> atom_plddts, pae, token_chain_ids, ...
7
+ fold_<job>_summary_confidences_0.json -> ptm, iptm, ranking_score, ...
8
+ fold_<job>_job_request.json
9
+ terms_of_use.md
10
+
11
+ pLDDT is stored both per-atom in ``full_data`` and in the mmCIF B-factor column; we
12
+ read per-residue pLDDT from the B-factors for consistency with the other parsers.
13
+ The PAE matrix (per token) comes from ``full_data``.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import json
19
+ import re
20
+ from pathlib import Path
21
+
22
+ import numpy as np
23
+
24
+ from foldreport.models import Prediction, PredictionMetrics
25
+ from foldreport.parsers import base
26
+
27
+ _MODEL_RE = re.compile(r"^(?P<job>.+)_model_(?P<idx>\d+)\.(?:cif|pdb)$")
28
+ _SUMMARY_RE = re.compile(r"^(?P<job>.+)_summary_confidences_(?P<idx>\d+)\.json$")
29
+ _FULLDATA_RE = re.compile(r"^(?P<job>.+)_full_data_(?P<idx>\d+)\.json$")
30
+
31
+
32
+ class Af3ServerParser:
33
+ """Parse a folder produced by the AlphaFold 3 Server."""
34
+
35
+ name = "af3_server"
36
+
37
+ def can_handle(self, path: Path) -> bool:
38
+ path = Path(path)
39
+ if not path.is_dir():
40
+ return False
41
+ has_summary = has_fulldata = False
42
+ for entry in path.iterdir():
43
+ if _SUMMARY_RE.match(entry.name):
44
+ has_summary = True
45
+ elif _FULLDATA_RE.match(entry.name):
46
+ has_fulldata = True
47
+ if has_summary and has_fulldata:
48
+ return True
49
+ return False
50
+
51
+ def parse(self, path: Path) -> list[Prediction]:
52
+ path = Path(path)
53
+ models = self._index(path, _MODEL_RE)
54
+ summaries = self._index(path, _SUMMARY_RE)
55
+ fulldata = self._index(path, _FULLDATA_RE)
56
+
57
+ predictions: list[Prediction] = []
58
+ for (job, idx), struct_path in sorted(models.items(), key=lambda kv: kv[0][1]):
59
+ summary = _load_json(summaries.get((job, idx)))
60
+ data = _load_json(fulldata.get((job, idx)))
61
+
62
+ plddt = base.plddt_from_bfactors(struct_path)
63
+ pae = _as_matrix(data.get("pae"))
64
+
65
+ structure = base.read_structure(struct_path)
66
+ chains = base.chains_from_structure(structure)
67
+ n_residues = sum(c.n_residues for c in chains)
68
+
69
+ metrics = PredictionMetrics(
70
+ mean_plddt=float(np.mean(plddt)) if plddt else None,
71
+ ptm=_as_opt_float(summary.get("ptm")),
72
+ iptm=_as_opt_float(summary.get("iptm")),
73
+ mpdockq=None, # AF3 Server does not report mpDockQ.
74
+ n_chains=len(chains),
75
+ n_residues=n_residues,
76
+ ranking_score=_as_opt_float(summary.get("ranking_score")),
77
+ )
78
+
79
+ raw_files: dict[str, Path] = {"structure": struct_path}
80
+ if summaries.get((job, idx)):
81
+ raw_files["summary"] = summaries[(job, idx)]
82
+ if fulldata.get((job, idx)):
83
+ raw_files["full_data"] = fulldata[(job, idx)]
84
+
85
+ predictions.append(
86
+ Prediction(
87
+ name=f"{_clean_job(job)}_model_{idx}",
88
+ source_tool=self.name,
89
+ structure_path=struct_path,
90
+ chains=chains,
91
+ plddt=plddt,
92
+ pae=pae,
93
+ metrics=metrics,
94
+ rank=int(idx) + 1, # model_0 is the top-ranked model
95
+ raw_files=raw_files,
96
+ )
97
+ )
98
+ return predictions
99
+
100
+ @staticmethod
101
+ def _index(path: Path, pattern: re.Pattern) -> dict[tuple[str, str], Path]:
102
+ out: dict[tuple[str, str], Path] = {}
103
+ for entry in path.iterdir():
104
+ m = pattern.match(entry.name)
105
+ if m:
106
+ out[(m["job"], m["idx"])] = entry
107
+ return out
108
+
109
+
110
+ def _clean_job(job: str) -> str:
111
+ return job[len("fold_"):] if job.startswith("fold_") else job
112
+
113
+
114
+ def _load_json(path: Path | None) -> dict:
115
+ if path is None:
116
+ return {}
117
+ with open(path, "r", encoding="utf-8") as fh:
118
+ return json.load(fh)
119
+
120
+
121
+ def _as_opt_float(value) -> float | None:
122
+ return None if value is None else float(value)
123
+
124
+
125
+ def _as_matrix(value) -> np.ndarray | None:
126
+ if value is None:
127
+ return None
128
+ arr = np.asarray(value, dtype=float)
129
+ return arr if arr.ndim == 2 else None