pen-stack 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pen_stack/__init__.py +2 -0
- pen_stack/_resources.py +34 -0
- pen_stack/adapt/__init__.py +14 -0
- pen_stack/adapt/finetune.py +33 -0
- pen_stack/adapt/ingest.py +86 -0
- pen_stack/adapt/pipeline.py +101 -0
- pen_stack/adapt/recalibrate.py +58 -0
- pen_stack/adapt/report.py +130 -0
- pen_stack/agent/__init__.py +1 -0
- pen_stack/agent/guardrails.py +49 -0
- pen_stack/agent/mcp_server.py +42 -0
- pen_stack/agent/orchestrator.py +106 -0
- pen_stack/agent/pen_agent.py +169 -0
- pen_stack/agent/tools.py +130 -0
- pen_stack/atlas/__init__.py +1 -0
- pen_stack/atlas/build_wtkb.py +80 -0
- pen_stack/atlas/crosslink.py +144 -0
- pen_stack/atlas/expand.py +190 -0
- pen_stack/atlas/schema.py +59 -0
- pen_stack/atlas/scorecard.py +134 -0
- pen_stack/atlas/universe.py +75 -0
- pen_stack/atlas/variant_propose.py +155 -0
- pen_stack/bridge/__init__.py +1 -0
- pen_stack/bridge/activity.py +52 -0
- pen_stack/bridge/cli.py +65 -0
- pen_stack/bridge/fold_qc.py +53 -0
- pen_stack/bridge/guide_qc.py +84 -0
- pen_stack/bridge/ingest.py +139 -0
- pen_stack/bridge/offtarget.py +133 -0
- pen_stack/bridge/ortholog_screen.py +73 -0
- pen_stack/bridge/pipeline.py +83 -0
- pen_stack/cli.py +126 -0
- pen_stack/data/__init__.py +1 -0
- pen_stack/data/encode.py +84 -0
- pen_stack/data/genome.py +71 -0
- pen_stack/data/ingest_chromatin.py +119 -0
- pen_stack/data/ingest_integration.py +112 -0
- pen_stack/data/ingest_safety_annot.py +164 -0
- pen_stack/data/ingest_trip.py +76 -0
- pen_stack/mech/__init__.py +1 -0
- pen_stack/mech/classify_atlas.py +71 -0
- pen_stack/mech/whitelist.py +66 -0
- pen_stack/monitor/__init__.py +1 -0
- pen_stack/monitor/europepmc.py +32 -0
- pen_stack/monitor/run.py +57 -0
- pen_stack/monitor/triage.py +63 -0
- pen_stack/planner/__init__.py +1 -0
- pen_stack/planner/cargo.py +56 -0
- pen_stack/planner/cargo_polish.py +146 -0
- pen_stack/planner/delivery.py +32 -0
- pen_stack/planner/multiplex.py +110 -0
- pen_stack/planner/optimize.py +156 -0
- pen_stack/planner/pipeline.py +86 -0
- pen_stack/planner/report.py +26 -0
- pen_stack/rag/__init__.py +1 -0
- pen_stack/rag/index.py +53 -0
- pen_stack/rag/llm.py +178 -0
- pen_stack/rag/qa.py +105 -0
- pen_stack/score/__init__.py +1 -0
- pen_stack/score/recalibrate.py +77 -0
- pen_stack/score/therapeutic.py +85 -0
- pen_stack/server/__init__.py +1 -0
- pen_stack/server/api.py +142 -0
- pen_stack/ui/__init__.py +1 -0
- pen_stack/ui/app.py +518 -0
- pen_stack/validate/__init__.py +1 -0
- pen_stack/validate/adapt_demo.py +69 -0
- pen_stack/validate/agent_eval.py +117 -0
- pen_stack/validate/blind_gsh_discovery.py +165 -0
- pen_stack/validate/cargo_directionality.py +57 -0
- pen_stack/validate/durability_baselines.py +150 -0
- pen_stack/validate/forward_hypotheses.py +104 -0
- pen_stack/validate/guide_qc_demo.py +58 -0
- pen_stack/validate/intent_specification.py +82 -0
- pen_stack/validate/paper3_benchmark.py +165 -0
- pen_stack/validate/paper4_real_validation.py +144 -0
- pen_stack/validate/paper4_validation.py +82 -0
- pen_stack/validate/seq_vs_measured.py +134 -0
- pen_stack/validate/within_locus_ranking.py +74 -0
- pen_stack/validate/writer_recovery.py +86 -0
- pen_stack/wgenome/__init__.py +1 -0
- pen_stack/wgenome/chromatin_seq.py +83 -0
- pen_stack/wgenome/durability.py +108 -0
- pen_stack/wgenome/export_tracks.py +52 -0
- pen_stack/wgenome/features.py +82 -0
- pen_stack/wgenome/gsh_baseline.py +117 -0
- pen_stack/wgenome/providers.py +245 -0
- pen_stack/wgenome/safety.py +69 -0
- pen_stack/wgenome/structure3d.py +168 -0
- pen_stack/wgenome/writability.py +72 -0
- pen_stack-3.1.0.dist-info/METADATA +451 -0
- pen_stack-3.1.0.dist-info/RECORD +96 -0
- pen_stack-3.1.0.dist-info/WHEEL +5 -0
- pen_stack-3.1.0.dist-info/entry_points.txt +3 -0
- pen_stack-3.1.0.dist-info/licenses/LICENSE +21 -0
- pen_stack-3.1.0.dist-info/top_level.txt +1 -0
pen_stack/rag/qa.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Grounded, cited Q&A over the PEN-STACK platform (Phase 2, Step 2.8).
|
|
2
|
+
|
|
3
|
+
The front door for non-expert users. Contract (enforced by pen_stack.agent.guardrails):
|
|
4
|
+
* clinical-directive questions are refused;
|
|
5
|
+
* every *quantitative* claim is produced by a validated tool call (writability / atlas / cross-link),
|
|
6
|
+
never guessed by the LLM - the answer's ``provenance`` block names the tool;
|
|
7
|
+
* every factual claim carries a citation (DOIs from the curated atlas/WT-KB).
|
|
8
|
+
|
|
9
|
+
An optional LLM (Ollama/Qwen via litellm) only *phrases* the grounded facts; it is never the source of a
|
|
10
|
+
number or a citation. With no LLM available the deterministic tool+retrieval path still satisfies the
|
|
11
|
+
contract - that is the whole point.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
|
|
17
|
+
from pen_stack.agent.guardrails import DISCLAIMER, enforce_grounded, out_of_scope
|
|
18
|
+
from pen_stack.rag.index import build_cards, retrieve
|
|
19
|
+
|
|
20
|
+
_GENE_RE = re.compile(r"\b([A-Z][A-Z0-9]{2,9})\b") # crude gene-symbol cue (TRAC, CCR5, ...)
|
|
21
|
+
_FAMILY_HINTS = {
|
|
22
|
+
"bridge": "bridge_IS110", "is110": "bridge_IS110", "iscro4": "bridge_IS110",
|
|
23
|
+
"seek": "seek_IS1111", "is1111": "seek_IS1111", "cast": "CAST_VK",
|
|
24
|
+
"integrase": "serine_integrase", "bxb1": "serine_integrase", "paste": "PE_integrase",
|
|
25
|
+
"prime": "PE_integrase", "cas9": "Cas9", "cas12a": "Cas12a", "tnpb": "TnpB_Fanzor",
|
|
26
|
+
"fanzor": "TnpB_Fanzor",
|
|
27
|
+
}
|
|
28
|
+
_WRITABLE_CUES = ("where", "writable", "safe harbour", "safe harbor", "insert", "insertion site", "locus")
|
|
29
|
+
# Standing citation for tool-derived writability claims: the Phase-1 Writable Genome atlas
|
|
30
|
+
# (TRIP durability supervision + clinical-CIS safety supervision).
|
|
31
|
+
_WRITABILITY_CITATIONS = ["10.1016/j.cell.2013.07.018"] # Akhtar 2013 (TRIP) - durability supervision
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _family_in(question: str) -> str | None:
|
|
35
|
+
q = question.lower()
|
|
36
|
+
for cue, fam in _FAMILY_HINTS.items():
|
|
37
|
+
if cue in q:
|
|
38
|
+
return fam
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def answer(question: str, ct: str = "k562", use_llm: bool = False) -> dict:
|
|
43
|
+
refusal = out_of_scope(question)
|
|
44
|
+
if refusal:
|
|
45
|
+
return {"refused": True, "answer": refusal, "citations": [], "provenance": [],
|
|
46
|
+
"disclaimer": DISCLAIMER}
|
|
47
|
+
|
|
48
|
+
cards = build_cards()
|
|
49
|
+
retrieved = retrieve(question, cards, k=3)
|
|
50
|
+
citations = sorted({d for c in retrieved for d in c.citations})
|
|
51
|
+
provenance: list[dict] = []
|
|
52
|
+
parts: list[str] = []
|
|
53
|
+
|
|
54
|
+
# --- numeric route 1: "where can I write / writable loci for GENE" -> writability tool ---
|
|
55
|
+
if any(cue in question.lower() for cue in _WRITABLE_CUES):
|
|
56
|
+
genes = [g for g in _GENE_RE.findall(question) if g not in {"PEN", "STACK", "DNA", "RNA"}]
|
|
57
|
+
if genes:
|
|
58
|
+
try:
|
|
59
|
+
from pen_stack.atlas.crosslink import loci_for_gene
|
|
60
|
+
g = loci_for_gene(genes[0], ct)
|
|
61
|
+
if not g.empty:
|
|
62
|
+
w = float(g["writability"].max())
|
|
63
|
+
provenance.append({"tool": "crosslink.loci_for_gene",
|
|
64
|
+
"args": {"gene": genes[0], "ct": ct},
|
|
65
|
+
"result": {"max_writability": round(w, 3), "n_bins": int(len(g))}})
|
|
66
|
+
parts.append(f"For {genes[0]} in {ct}, the most writable bin scores "
|
|
67
|
+
f"{w:.3f} (writability = safety x durability), across {len(g)} bins.")
|
|
68
|
+
citations = sorted(set(citations) | set(_WRITABILITY_CITATIONS))
|
|
69
|
+
except FileNotFoundError:
|
|
70
|
+
parts.append("(Writable-genome atlas not loaded; numeric writability unavailable.)")
|
|
71
|
+
|
|
72
|
+
# --- numeric route 2: "which writer / tell me about FAMILY" -> atlas tool ---
|
|
73
|
+
fam = _family_in(question)
|
|
74
|
+
if fam:
|
|
75
|
+
import pandas as pd
|
|
76
|
+
|
|
77
|
+
from pen_stack.rag.index import _ATLAS
|
|
78
|
+
adf = pd.read_parquet(_ATLAS)
|
|
79
|
+
sub = adf[adf["family"] == fam]
|
|
80
|
+
rep = sub[sub["entry_kind"] == "curated_core"]
|
|
81
|
+
rep = rep.iloc[0] if len(rep) else sub.iloc[0]
|
|
82
|
+
provenance.append({"tool": "atlas.query", "args": {"family": fam},
|
|
83
|
+
"result": {"n_systems": int(len(sub)),
|
|
84
|
+
"reachability_tier": rep.get("reachability_tier"),
|
|
85
|
+
"deliv_class": rep.get("deliv_class")}})
|
|
86
|
+
parts.append(f"{fam}: {len(sub):,} catalogued systems; reachability {rep.get('reachability_tier')}; "
|
|
87
|
+
f"deliverability {rep.get('deliv_class')}; representative {rep['representative_system']}.")
|
|
88
|
+
|
|
89
|
+
# --- factual route: retrieval-grounded summary (always cited) ---
|
|
90
|
+
if retrieved:
|
|
91
|
+
parts.append("Relevant atlas facts: " + " | ".join(c.text for c in retrieved[:2]))
|
|
92
|
+
|
|
93
|
+
if not parts:
|
|
94
|
+
parts.append("No grounded match in the atlas. Try naming a writer family or a target gene.")
|
|
95
|
+
|
|
96
|
+
out = {"refused": False, "answer": " ".join(parts), "citations": citations,
|
|
97
|
+
"provenance": provenance, "disclaimer": DISCLAIMER}
|
|
98
|
+
out = enforce_grounded(out)
|
|
99
|
+
# optional LLM phrasing - presentation only; numbers/citations stay tool-derived (additive field)
|
|
100
|
+
if use_llm:
|
|
101
|
+
from pen_stack.rag.llm import phrase
|
|
102
|
+
phrased = phrase(out["answer"])
|
|
103
|
+
if phrased:
|
|
104
|
+
out["answer_phrased"] = phrased
|
|
105
|
+
return out
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""pen_stack.score - see PEN-STACK v3.0 program doc."""
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Re-ground the scoring axes (Phase 0, Step 0.3).
|
|
2
|
+
|
|
3
|
+
The prior `prog`/`cargo` axes were effectively hand-set flags (`s_prog=1.0` for everything) that
|
|
4
|
+
required per-enzyme overrides to pass any gate. Here each axis is a documented, continuous function
|
|
5
|
+
of a *measured* input read from ``configs/score_axes.yaml``. There are NO per-enzyme override
|
|
6
|
+
constants in this module - that invariant is checked by ``tests/unit/test_no_overrides.py``.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
_CFG_PATH = Path(__file__).resolve().parents[2] / "configs" / "score_axes.yaml"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_axes_config(path: str | Path = _CFG_PATH) -> dict:
|
|
20
|
+
return yaml.safe_load(Path(path).read_text(encoding="utf-8"))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def recalibrate_cargo(df: pd.DataFrame, cfg: dict) -> pd.DataFrame:
|
|
24
|
+
"""S_Cargo from measured cargo bp (monotone); fall back to upstream s_cargo if bp unknown."""
|
|
25
|
+
cap = float(cfg["cargo"]["cap_bp"])
|
|
26
|
+
out = df.copy()
|
|
27
|
+
if "cargo_capacity_bp" in out.columns:
|
|
28
|
+
bp = out["cargo_capacity_bp"].astype("float64").clip(0, cap)
|
|
29
|
+
recal = np.log1p(bp) / np.log1p(cap)
|
|
30
|
+
# only override where we actually have a measured bp; otherwise keep upstream s_cargo
|
|
31
|
+
out["S_Cargo"] = np.where(bp.notna() & (bp > 0), recal, out.get("s_cargo"))
|
|
32
|
+
else:
|
|
33
|
+
out["S_Cargo"] = out.get("s_cargo")
|
|
34
|
+
return out
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def recalibrate_prog(df: pd.DataFrame, cfg: dict) -> pd.DataFrame:
|
|
38
|
+
"""S_Prog from MEASURED targeting modality (documented anchors), not a 0/1 flag."""
|
|
39
|
+
p = cfg["programmability"]
|
|
40
|
+
anchor = p["modality_anchor"]
|
|
41
|
+
bip_fams = set(p.get("bipartite_reprogrammable_families", []))
|
|
42
|
+
bonus = float(p.get("bipartite_bonus_to", 1.0))
|
|
43
|
+
|
|
44
|
+
def _prog(row) -> float:
|
|
45
|
+
fam = row.get("family")
|
|
46
|
+
if fam in bip_fams:
|
|
47
|
+
return bonus
|
|
48
|
+
modality = row.get("targeting_modality")
|
|
49
|
+
if modality in anchor:
|
|
50
|
+
return float(anchor[modality])
|
|
51
|
+
# fall back to upstream s_prog if no modality info (documented degradation, not an override)
|
|
52
|
+
return float(row["s_prog"]) if pd.notna(row.get("s_prog")) else np.nan
|
|
53
|
+
|
|
54
|
+
out = df.copy()
|
|
55
|
+
out["S_Prog"] = out.apply(_prog, axis=1)
|
|
56
|
+
return out
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def backfill_length(df: pd.DataFrame, cfg: dict) -> pd.DataFrame:
|
|
60
|
+
"""Backfill length_aa from independently-verified UniProt lengths (upstream has all None)."""
|
|
61
|
+
table = cfg["length_aa_backfill"]
|
|
62
|
+
out = df.copy()
|
|
63
|
+
key = "entity_id" if "entity_id" in out.columns else "representative_system"
|
|
64
|
+
filled = out["length_aa"] if "length_aa" in out.columns else pd.Series([None] * len(out))
|
|
65
|
+
out["length_aa"] = [
|
|
66
|
+
(table.get(k) if (pd.isna(v) or v is None) else v)
|
|
67
|
+
for k, v in zip(out[key], filled)
|
|
68
|
+
]
|
|
69
|
+
return out
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def recalibrate_all(df: pd.DataFrame, cfg: dict | None = None) -> pd.DataFrame:
|
|
73
|
+
cfg = cfg or load_axes_config()
|
|
74
|
+
out = backfill_length(df, cfg)
|
|
75
|
+
out = recalibrate_cargo(out, cfg)
|
|
76
|
+
out = recalibrate_prog(out, cfg)
|
|
77
|
+
return out
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Therapeutic-readiness scoring across families (Phase 2, Step 2.3).
|
|
2
|
+
|
|
3
|
+
The motto's "therapeutic-ready" axis, realised and *measured*: score every Writer-Atlas system for
|
|
4
|
+
deliverability, cargo capacity, immunogenicity proxy, and human-cell compatibility - using the Phase-0
|
|
5
|
+
re-grounded axes (configs/score_axes.yaml is the single source of thresholds; no per-enzyme overrides).
|
|
6
|
+
All components are retained on the row (a transparent profile, never collapsed to one opaque number).
|
|
7
|
+
|
|
8
|
+
Inputs : pen_stack/atlas/atlas.parquet, configs/score_axes.yaml.
|
|
9
|
+
Outputs: atlas.parquet updated with deliv_class / S_Deliv / S_Cargo / S_HumanCell / readiness.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from pen_stack.score.recalibrate import load_axes_config
|
|
19
|
+
|
|
20
|
+
_ROOT = Path(__file__).resolve().parents[2]
|
|
21
|
+
_ATLAS = _ROOT / "pen_stack" / "atlas" / "atlas.parquet"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def deliverability_class(length_aa: float | None, cfg: dict) -> str:
|
|
25
|
+
"""AAV single (<=~730 aa effector) / split-AAV (<=1500) / mRNA-RNP, from effector size."""
|
|
26
|
+
d = cfg["deliverability"]
|
|
27
|
+
if length_aa is None or (isinstance(length_aa, float) and np.isnan(length_aa)):
|
|
28
|
+
return "unknown"
|
|
29
|
+
L = float(length_aa)
|
|
30
|
+
if L <= d["aav_single_max_aa"]:
|
|
31
|
+
return "AAV"
|
|
32
|
+
if L <= d["split_aav_max_aa"]:
|
|
33
|
+
return "split-AAV"
|
|
34
|
+
return "mRNA-RNP"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _s_cargo(bp, cfg) -> float:
|
|
38
|
+
cap = float(cfg["cargo"]["cap_bp"])
|
|
39
|
+
if bp is None or (isinstance(bp, float) and np.isnan(bp)) or bp <= 0:
|
|
40
|
+
return np.nan
|
|
41
|
+
return float(np.log1p(min(float(bp), cap)) / np.log1p(cap))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _s_humancell(hca: str | None) -> float:
|
|
45
|
+
"""Coarse human-cell compatibility from the curated activity string (measured > demonstrated > none)."""
|
|
46
|
+
t = (hca or "").lower()
|
|
47
|
+
if "not measured" in t or "bacterial" in t:
|
|
48
|
+
return 0.0
|
|
49
|
+
if "low in human" in t or "modest" in t:
|
|
50
|
+
return 0.4
|
|
51
|
+
if "human cell" in t or "human cells" in t or "primary t cell" in t or "hepatocyte" in t or "clinical" in t:
|
|
52
|
+
return 1.0
|
|
53
|
+
return np.nan
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def therapeutic_profile(atlas_df: pd.DataFrame, cfg: dict | None = None) -> pd.DataFrame:
|
|
57
|
+
cfg = cfg or load_axes_config()
|
|
58
|
+
df = atlas_df.copy()
|
|
59
|
+
classes = cfg["deliverability"]["classes"]
|
|
60
|
+
|
|
61
|
+
df["deliv_class"] = df["length_aa"].apply(lambda L: deliverability_class(L, cfg))
|
|
62
|
+
df["S_Deliv"] = df["deliv_class"].map(classes) # unknown -> NaN
|
|
63
|
+
df["S_Cargo"] = df["cargo_capacity_bp"].apply(lambda bp: _s_cargo(bp, cfg))
|
|
64
|
+
df["S_HumanCell"] = df["human_cell_activity"].apply(_s_humancell)
|
|
65
|
+
df["S_DSBfree"] = df["dsb_free"].apply(lambda b: 1.0 if b is True else (0.0 if b is False else np.nan))
|
|
66
|
+
|
|
67
|
+
# transparent composite (mean of available components); components remain on the row
|
|
68
|
+
comp = df[["S_Deliv", "S_Cargo", "S_HumanCell", "S_DSBfree"]]
|
|
69
|
+
df["readiness"] = comp.mean(axis=1, skipna=True)
|
|
70
|
+
return df
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def apply_to_atlas(atlas_parquet: str | Path = _ATLAS, out: str | Path = _ATLAS) -> pd.DataFrame:
|
|
74
|
+
atlas = pd.read_parquet(atlas_parquet)
|
|
75
|
+
out_df = therapeutic_profile(atlas)
|
|
76
|
+
out_df.to_parquet(out, index=False)
|
|
77
|
+
return out_df
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__": # pragma: no cover
|
|
81
|
+
a = apply_to_atlas()
|
|
82
|
+
cores = a[a.entry_kind == "curated_core"]
|
|
83
|
+
print(cores[["representative_system", "length_aa", "deliv_class", "S_Deliv",
|
|
84
|
+
"S_Cargo", "S_HumanCell", "readiness"]].to_string(index=False))
|
|
85
|
+
print("\ndeliv_class distribution:\n", a["deliv_class"].value_counts(dropna=False))
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""pen_stack.server - see PEN-STACK v3.0 program doc."""
|
pen_stack/server/api.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""PEN-STACK REST API (Phase 2, Step 2.6) - atlas + cross-link endpoints over FastAPI.
|
|
2
|
+
|
|
3
|
+
Extends the Phase-1 atlas with the Writer Atlas and the writer<->locus cross-link. Every quantitative
|
|
4
|
+
result is computed by the validated library functions (never guessed); the ``/ask`` route defers numeric
|
|
5
|
+
claims to those tools (Step 2.8). Heavy data is loaded lazily so the app boots without the Phase-1 atlas.
|
|
6
|
+
|
|
7
|
+
Run: ``uvicorn pen_stack.server.api:app --host 0.0.0.0 --port 8000`` (needs the ``server`` extra).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from fastapi import FastAPI, HTTPException, Query
|
|
17
|
+
except ImportError as e: # pragma: no cover - server extra optional
|
|
18
|
+
raise ImportError("FastAPI not installed: pip install 'pen-stack[server]'") from e
|
|
19
|
+
|
|
20
|
+
from pen_stack import __version__
|
|
21
|
+
|
|
22
|
+
_ATLAS = Path(__file__).resolve().parents[1] / "atlas" / "atlas.parquet"
|
|
23
|
+
|
|
24
|
+
app = FastAPI(title="PEN-STACK API", version=__version__,
|
|
25
|
+
description="Open infrastructure for genome writing: Writer Atlas + Writable Genome cross-link.")
|
|
26
|
+
|
|
27
|
+
_DISCLAIMER = ("Decision-support only - predictions are calibrated risk/durability estimates, not "
|
|
28
|
+
"clinical directives. Tier-2/3 reachability is candidate and requires experimental validation.")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _atlas_df() -> pd.DataFrame:
|
|
32
|
+
if not _ATLAS.exists():
|
|
33
|
+
raise HTTPException(503, "atlas.parquet not built")
|
|
34
|
+
return pd.read_parquet(_ATLAS)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@app.get("/health")
|
|
38
|
+
def health():
|
|
39
|
+
return {"status": "ok", "version": __version__, "atlas_present": _ATLAS.exists()}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@app.get("/atlas/coverage")
|
|
43
|
+
def atlas_coverage():
|
|
44
|
+
df = _atlas_df()
|
|
45
|
+
cov = (df.groupby("family")
|
|
46
|
+
.agg(n=("representative_system", "size"),
|
|
47
|
+
measured=("confidence", lambda s: int((s == "measured").sum())),
|
|
48
|
+
reachability_tier=("reachability_tier", "first"),
|
|
49
|
+
mechanism=("mechanism_bucket", "first"))
|
|
50
|
+
.reset_index())
|
|
51
|
+
return {"families": int(df["family"].nunique()), "systems": int(len(df)),
|
|
52
|
+
"coverage": cov.to_dict("records"), "disclaimer": _DISCLAIMER}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@app.get("/atlas")
|
|
56
|
+
def atlas(family: str | None = None, limit: int = Query(50, le=500)):
|
|
57
|
+
df = _atlas_df()
|
|
58
|
+
if family:
|
|
59
|
+
df = df[df["family"] == family]
|
|
60
|
+
cols = [c for c in ["representative_system", "family", "confidence", "mechanism_bucket",
|
|
61
|
+
"deliv_class", "readiness", "cargo_capacity_bp", "reachability_tier",
|
|
62
|
+
"human_cell_activity"] if c in df.columns]
|
|
63
|
+
return {"n": int(len(df)), "rows": df[cols].head(limit).to_dict("records"), "disclaimer": _DISCLAIMER}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@app.get("/crosslink/writers")
|
|
67
|
+
def crosslink_writers(chrom: str, bin: int, ct: str = "k562"):
|
|
68
|
+
from pen_stack.atlas import crosslink as cl
|
|
69
|
+
try:
|
|
70
|
+
w = cl.writers_for_locus(chrom, bin, ct)
|
|
71
|
+
except FileNotFoundError as e:
|
|
72
|
+
raise HTTPException(503, str(e)) from e
|
|
73
|
+
if w.empty:
|
|
74
|
+
return {"locus": f"{chrom}:bin{bin}", "writers": [], "disclaimer": _DISCLAIMER}
|
|
75
|
+
fams = w.groupby("family").size().to_dict()
|
|
76
|
+
return {"locus": f"{chrom}:bin{bin}", "ct": ct,
|
|
77
|
+
"locus_writability": float(w["locus_writability"].iloc[0]),
|
|
78
|
+
"families": {k: int(v) for k, v in fams.items()},
|
|
79
|
+
"n_systems": int(len(w)), "disclaimer": _DISCLAIMER}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@app.get("/crosslink/loci")
|
|
83
|
+
def crosslink_loci(family: str, ct: str = "k562", top: int = Query(20, le=200)):
|
|
84
|
+
from pen_stack.atlas import crosslink as cl
|
|
85
|
+
try:
|
|
86
|
+
loci = cl.loci_for_writer(family, ct, top=top)
|
|
87
|
+
except FileNotFoundError as e:
|
|
88
|
+
raise HTTPException(503, str(e)) from e
|
|
89
|
+
return {"family": family, "ct": ct, "loci": loci.to_dict("records"), "disclaimer": _DISCLAIMER}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@app.get("/writable")
|
|
93
|
+
def writable(gene: str, ct: str = "k562", top: int = Query(20, le=200)):
|
|
94
|
+
from pen_stack.atlas.crosslink import loci_for_gene
|
|
95
|
+
try:
|
|
96
|
+
g = loci_for_gene(gene, ct)
|
|
97
|
+
except FileNotFoundError as e:
|
|
98
|
+
raise HTTPException(503, str(e)) from e
|
|
99
|
+
if g.empty:
|
|
100
|
+
return {"gene": gene, "ct": ct, "loci": [], "disclaimer": _DISCLAIMER}
|
|
101
|
+
cols = ["chrom", "bin", "safety", "p_durable", "writability"]
|
|
102
|
+
return {"gene": gene, "ct": ct, "loci": g[cols].head(top).to_dict("records"), "disclaimer": _DISCLAIMER}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@app.get("/bridge/design")
|
|
106
|
+
def bridge_design(target: str, donor: str, scaffold: str = "ISCro4_enhanced",
|
|
107
|
+
ct: str | None = None, scan: bool = False):
|
|
108
|
+
"""Bridge-recombinase design + off-target/QC (Phase 1.5). scan=false by default (genome scan is heavy)."""
|
|
109
|
+
from pen_stack.bridge.pipeline import design_and_assess
|
|
110
|
+
res = design_and_assess(target, donor, scaffold, ct=ct, scan=scan)
|
|
111
|
+
off = res["offtargets"]
|
|
112
|
+
if off.get("scanned") and "table" in off:
|
|
113
|
+
t = off["table"]
|
|
114
|
+
off = {"scanned": True, "n_candidates": off["n_candidates"], "n_exact": off["n_exact"],
|
|
115
|
+
"top": t.head(20).to_dict("records")}
|
|
116
|
+
return {"brna": {k: v for k, v in res["brna"].items() if k != "bridge_sequence"} |
|
|
117
|
+
({"bridge_sequence_len": len(res["brna"]["bridge_sequence"])} if res["brna"].get("available") else {}),
|
|
118
|
+
"qc": res["qc"], "offtargets": off, "disclaimer": res["disclaimer"]}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.get("/ask")
|
|
122
|
+
def ask(q: str):
|
|
123
|
+
"""Grounded, cited Q&A (Step 2.8). Numeric claims are resolved by tool calls, never guessed."""
|
|
124
|
+
from pen_stack.rag.qa import answer
|
|
125
|
+
return answer(q)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@app.get("/plan")
|
|
129
|
+
def plan(gene: str, intent: str, cargo_bp: int = 2000, ct: str = "k562", k: int = Query(5, le=20)):
|
|
130
|
+
"""Write Planner (Step 3.4): goal + edit_intent -> ranked, traceable plans."""
|
|
131
|
+
from pen_stack.planner.optimize import EditIntent
|
|
132
|
+
from pen_stack.planner.pipeline import plan_write
|
|
133
|
+
try:
|
|
134
|
+
intent_e = EditIntent(intent)
|
|
135
|
+
except ValueError as e:
|
|
136
|
+
raise HTTPException(422, f"unknown edit_intent: {intent}") from e
|
|
137
|
+
try:
|
|
138
|
+
plans = plan_write(gene, intent_e, cargo_bp, ct, k=k)
|
|
139
|
+
except FileNotFoundError as e:
|
|
140
|
+
raise HTTPException(503, str(e)) from e
|
|
141
|
+
return {"gene": gene, "intent": intent, "ct": ct, "n": len(plans), "plans": plans,
|
|
142
|
+
"disclaimer": _DISCLAIMER}
|
pen_stack/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""pen_stack.ui - see PEN-STACK v3.0 program doc."""
|