pen-stack 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pen_stack/__init__.py +2 -0
- pen_stack/_resources.py +34 -0
- pen_stack/adapt/__init__.py +14 -0
- pen_stack/adapt/finetune.py +33 -0
- pen_stack/adapt/ingest.py +86 -0
- pen_stack/adapt/pipeline.py +101 -0
- pen_stack/adapt/recalibrate.py +58 -0
- pen_stack/adapt/report.py +130 -0
- pen_stack/agent/__init__.py +1 -0
- pen_stack/agent/guardrails.py +49 -0
- pen_stack/agent/mcp_server.py +42 -0
- pen_stack/agent/orchestrator.py +106 -0
- pen_stack/agent/pen_agent.py +169 -0
- pen_stack/agent/tools.py +130 -0
- pen_stack/atlas/__init__.py +1 -0
- pen_stack/atlas/build_wtkb.py +80 -0
- pen_stack/atlas/crosslink.py +144 -0
- pen_stack/atlas/expand.py +190 -0
- pen_stack/atlas/schema.py +59 -0
- pen_stack/atlas/scorecard.py +134 -0
- pen_stack/atlas/universe.py +75 -0
- pen_stack/atlas/variant_propose.py +155 -0
- pen_stack/bridge/__init__.py +1 -0
- pen_stack/bridge/activity.py +52 -0
- pen_stack/bridge/cli.py +65 -0
- pen_stack/bridge/fold_qc.py +53 -0
- pen_stack/bridge/guide_qc.py +84 -0
- pen_stack/bridge/ingest.py +139 -0
- pen_stack/bridge/offtarget.py +133 -0
- pen_stack/bridge/ortholog_screen.py +73 -0
- pen_stack/bridge/pipeline.py +83 -0
- pen_stack/cli.py +126 -0
- pen_stack/data/__init__.py +1 -0
- pen_stack/data/encode.py +84 -0
- pen_stack/data/genome.py +71 -0
- pen_stack/data/ingest_chromatin.py +119 -0
- pen_stack/data/ingest_integration.py +112 -0
- pen_stack/data/ingest_safety_annot.py +164 -0
- pen_stack/data/ingest_trip.py +76 -0
- pen_stack/mech/__init__.py +1 -0
- pen_stack/mech/classify_atlas.py +71 -0
- pen_stack/mech/whitelist.py +66 -0
- pen_stack/monitor/__init__.py +1 -0
- pen_stack/monitor/europepmc.py +32 -0
- pen_stack/monitor/run.py +57 -0
- pen_stack/monitor/triage.py +63 -0
- pen_stack/planner/__init__.py +1 -0
- pen_stack/planner/cargo.py +56 -0
- pen_stack/planner/cargo_polish.py +146 -0
- pen_stack/planner/delivery.py +32 -0
- pen_stack/planner/multiplex.py +110 -0
- pen_stack/planner/optimize.py +156 -0
- pen_stack/planner/pipeline.py +86 -0
- pen_stack/planner/report.py +26 -0
- pen_stack/rag/__init__.py +1 -0
- pen_stack/rag/index.py +53 -0
- pen_stack/rag/llm.py +178 -0
- pen_stack/rag/qa.py +105 -0
- pen_stack/score/__init__.py +1 -0
- pen_stack/score/recalibrate.py +77 -0
- pen_stack/score/therapeutic.py +85 -0
- pen_stack/server/__init__.py +1 -0
- pen_stack/server/api.py +142 -0
- pen_stack/ui/__init__.py +1 -0
- pen_stack/ui/app.py +518 -0
- pen_stack/validate/__init__.py +1 -0
- pen_stack/validate/adapt_demo.py +69 -0
- pen_stack/validate/agent_eval.py +117 -0
- pen_stack/validate/blind_gsh_discovery.py +165 -0
- pen_stack/validate/cargo_directionality.py +57 -0
- pen_stack/validate/durability_baselines.py +150 -0
- pen_stack/validate/forward_hypotheses.py +104 -0
- pen_stack/validate/guide_qc_demo.py +58 -0
- pen_stack/validate/intent_specification.py +82 -0
- pen_stack/validate/paper3_benchmark.py +165 -0
- pen_stack/validate/paper4_real_validation.py +144 -0
- pen_stack/validate/paper4_validation.py +82 -0
- pen_stack/validate/seq_vs_measured.py +134 -0
- pen_stack/validate/within_locus_ranking.py +74 -0
- pen_stack/validate/writer_recovery.py +86 -0
- pen_stack/wgenome/__init__.py +1 -0
- pen_stack/wgenome/chromatin_seq.py +83 -0
- pen_stack/wgenome/durability.py +108 -0
- pen_stack/wgenome/export_tracks.py +52 -0
- pen_stack/wgenome/features.py +82 -0
- pen_stack/wgenome/gsh_baseline.py +117 -0
- pen_stack/wgenome/providers.py +245 -0
- pen_stack/wgenome/safety.py +69 -0
- pen_stack/wgenome/structure3d.py +168 -0
- pen_stack/wgenome/writability.py +72 -0
- pen_stack-3.1.0.dist-info/METADATA +451 -0
- pen_stack-3.1.0.dist-info/RECORD +96 -0
- pen_stack-3.1.0.dist-info/WHEEL +5 -0
- pen_stack-3.1.0.dist-info/entry_points.txt +3 -0
- pen_stack-3.1.0.dist-info/licenses/LICENSE +21 -0
- pen_stack-3.1.0.dist-info/top_level.txt +1 -0
pen_stack/ui/app.py
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
"""PEN-STACK - The Writable Genome | Streamlit atlas browser.
|
|
2
|
+
|
|
3
|
+
A scientific front-end over the 3M-locus Writable Genome atlas. Two core queries:
|
|
4
|
+
- Forward: a gene/coordinate -> is it safe + durable to WRITE here? (decomposed verdict)
|
|
5
|
+
- Inverse: a disease gene -> the top-N safest, most durable writable loci within a window.
|
|
6
|
+
Plus an atlas genome browser, the blind-validation dashboard, and cross-cell-type comparison.
|
|
7
|
+
|
|
8
|
+
Run: streamlit run pen_stack/ui/app.py
|
|
9
|
+
Data: set PEN_ATLAS_DIR (default: ./data or Final_Part_v3.0/phase_1/out). Needs atlas_<ct>.parquet,
|
|
10
|
+
gene_coords.parquet, and (optional) validation_report.json.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
import plotly.graph_objects as go
|
|
20
|
+
import streamlit as st
|
|
21
|
+
|
|
22
|
+
# ----------------------------------------------------------------------------- config / theme
|
|
23
|
+
st.set_page_config(page_title="PEN-STACK | The Writable Genome", page_icon="",
|
|
24
|
+
layout="wide", initial_sidebar_state="expanded")
|
|
25
|
+
|
|
26
|
+
CSS = """
|
|
27
|
+
<style>
|
|
28
|
+
:root { --ink:#05080f; --panel:#0c1322; --line:#1c2840; --cyan:#37e6e0; --green:#3dffa2;
|
|
29
|
+
--amber:#ffc857; --red:#ff5d6c; --txt:#dfe9ff; --mut:#7e8db5; }
|
|
30
|
+
.stApp { background: radial-gradient(1200px 700px at 80% -10%, #10203f 0%, var(--ink) 55%); color:var(--txt); }
|
|
31
|
+
section[data-testid="stSidebar"] { background:#070c17; border-right:1px solid var(--line); }
|
|
32
|
+
h1,h2,h3,h4 { color:var(--txt); font-family:'Segoe UI',system-ui,sans-serif; letter-spacing:.2px; }
|
|
33
|
+
.mono { font-family:'JetBrains Mono','Consolas',monospace; }
|
|
34
|
+
.hero { font-size:2.5rem; font-weight:800; line-height:1.05;
|
|
35
|
+
background:linear-gradient(90deg,var(--cyan),var(--green)); -webkit-background-clip:text;
|
|
36
|
+
-webkit-text-fill-color:transparent; }
|
|
37
|
+
.sub { color:var(--mut); font-size:1.02rem; }
|
|
38
|
+
.card { background:linear-gradient(180deg,var(--panel),#0a1020); border:1px solid var(--line);
|
|
39
|
+
border-radius:16px; padding:18px 20px; box-shadow:0 0 0 1px rgba(55,230,224,.04), 0 18px 40px -28px #000; }
|
|
40
|
+
.kpi { font-size:2.1rem; font-weight:800; }
|
|
41
|
+
.kpi-l { color:var(--mut); font-size:.78rem; text-transform:uppercase; letter-spacing:.16em; }
|
|
42
|
+
.verdict { border-radius:16px; padding:18px 24px; font-weight:800; font-size:1.5rem; border:1px solid; }
|
|
43
|
+
.v-go { background:rgba(61,255,162,.08); border-color:var(--green); color:var(--green); }
|
|
44
|
+
.v-cau { background:rgba(255,200,87,.08); border-color:var(--amber); color:var(--amber); }
|
|
45
|
+
.v-no { background:rgba(255,93,108,.08); border-color:var(--red); color:var(--red); }
|
|
46
|
+
.badge { display:inline-block; padding:2px 10px; border:1px solid var(--line); border-radius:999px;
|
|
47
|
+
color:var(--cyan); font-size:.72rem; margin-right:6px; }
|
|
48
|
+
.stDataFrame { border:1px solid var(--line); border-radius:12px; }
|
|
49
|
+
hr { border-color:var(--line); }
|
|
50
|
+
</style>
|
|
51
|
+
"""
|
|
52
|
+
st.markdown(CSS, unsafe_allow_html=True)
|
|
53
|
+
PLOTLY = dict(template="plotly_dark", paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
|
|
54
|
+
font=dict(color="#dfe9ff"), margin=dict(l=10, r=10, t=30, b=10))
|
|
55
|
+
CT_LABEL = {"k562": "K562 (erythroleukemia)", "hepg2": "HepG2 (hepatocellular)",
|
|
56
|
+
"hspc": "HSPC (CD34+ progenitor)"}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ----------------------------------------------------------------------------- data loading
|
|
60
|
+
def _data_dir() -> Path:
|
|
61
|
+
for c in [os.environ.get("PEN_ATLAS_DIR"), "data", "Final_Part_v3.0/phase_1/out",
|
|
62
|
+
str(Path(__file__).resolve().parents[2] / ".." / "phase_1" / "out")]:
|
|
63
|
+
if c and (Path(c) / "atlas_k562.parquet").exists():
|
|
64
|
+
return Path(c)
|
|
65
|
+
return Path(os.environ.get("PEN_ATLAS_DIR", "data"))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
DATA = _data_dir()
|
|
69
|
+
BIN_BP = 1000
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@st.cache_data(show_spinner=False)
|
|
73
|
+
def load_atlas(ct: str) -> pd.DataFrame:
|
|
74
|
+
df = pd.read_parquet(DATA / f"atlas_{ct}.parquet")
|
|
75
|
+
for col in ("writability", "safety", "p_durable"):
|
|
76
|
+
df[f"{col}_pct"] = df[col].rank(pct=True)
|
|
77
|
+
return df
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@st.cache_data(show_spinner=False)
|
|
81
|
+
def load_genes() -> pd.DataFrame:
|
|
82
|
+
for p in [DATA / "gene_coords.parquet", DATA.parent / "app_data" / "gene_coords.parquet"]:
|
|
83
|
+
if p.exists():
|
|
84
|
+
return pd.read_parquet(p)
|
|
85
|
+
return pd.DataFrame(columns=["chrom", "start", "end", "strand", "gene"])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@st.cache_data(show_spinner=False)
|
|
89
|
+
def load_validation() -> dict | None:
|
|
90
|
+
p = DATA / "validation_report.json"
|
|
91
|
+
return json.loads(p.read_text()) if p.exists() else None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@st.cache_data(show_spinner=False)
|
|
95
|
+
def load_writer_atlas() -> pd.DataFrame:
|
|
96
|
+
"""Phase-2 Writer Atlas (33k systems x measured axes). Ships inside the package."""
|
|
97
|
+
p = Path(__file__).resolve().parents[1] / "atlas" / "atlas.parquet"
|
|
98
|
+
return pd.read_parquet(p) if p.exists() else pd.DataFrame()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def region_bins(df, chrom, start, end):
|
|
102
|
+
return df[(df.chrom == chrom) & (df.bin * BIN_BP >= start) & (df.bin * BIN_BP <= end)]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def verdict(writ_pct, safety_pct):
|
|
106
|
+
if safety_pct < 0.15 or writ_pct < 0.20:
|
|
107
|
+
return "AVOID - high genotoxic risk / poor durability", "v-no"
|
|
108
|
+
if writ_pct < 0.55:
|
|
109
|
+
return "CAUTION - sub-optimal; consider nearby alternatives", "v-cau"
|
|
110
|
+
return "WRITABLE - safe & durable insertion locus", "v-go"
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
# ----------------------------------------------------------------------------- viz helpers
|
|
114
|
+
def gauge(value, title, color):
|
|
115
|
+
fig = go.Figure(go.Indicator(
|
|
116
|
+
mode="gauge+number", value=value * 100, number={"suffix": "%", "font": {"size": 34}},
|
|
117
|
+
title={"text": title, "font": {"size": 14}},
|
|
118
|
+
gauge={"axis": {"range": [0, 100], "tickcolor": "#7e8db5"},
|
|
119
|
+
"bar": {"color": color}, "bgcolor": "rgba(0,0,0,0)",
|
|
120
|
+
"borderwidth": 1, "bordercolor": "#1c2840",
|
|
121
|
+
"steps": [{"range": [0, 20], "color": "rgba(255,93,108,.18)"},
|
|
122
|
+
{"range": [20, 55], "color": "rgba(255,200,87,.14)"},
|
|
123
|
+
{"range": [55, 100], "color": "rgba(61,255,162,.14)"}]}))
|
|
124
|
+
fig.update_layout(height=230, **PLOTLY)
|
|
125
|
+
return fig
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def track_fig(sub, center=None):
|
|
129
|
+
fig = go.Figure()
|
|
130
|
+
x = sub.bin * BIN_BP / 1e6
|
|
131
|
+
fig.add_trace(go.Scatter(x=x, y=sub.writability, name="writability", mode="lines",
|
|
132
|
+
line=dict(color="#3dffa2", width=1.5), fill="tozeroy",
|
|
133
|
+
fillcolor="rgba(61,255,162,.12)"))
|
|
134
|
+
fig.add_trace(go.Scatter(x=x, y=sub.safety, name="safety", mode="lines",
|
|
135
|
+
line=dict(color="#37e6e0", width=1)))
|
|
136
|
+
fig.add_trace(go.Scatter(x=x, y=sub.p_durable, name="durability", mode="lines",
|
|
137
|
+
line=dict(color="#ffc857", width=1)))
|
|
138
|
+
if center is not None:
|
|
139
|
+
fig.add_vline(x=center / 1e6, line_color="#ff5d6c", line_dash="dot")
|
|
140
|
+
fig.update_layout(height=300, xaxis_title="position (Mb)", yaxis_title="score",
|
|
141
|
+
legend=dict(orientation="h", y=1.15), **PLOTLY)
|
|
142
|
+
return fig
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# ----------------------------------------------------------------------------- sidebar
|
|
146
|
+
st.sidebar.markdown("## PEN-STACK")
|
|
147
|
+
st.sidebar.caption("The Writable Genome | v3.0")
|
|
148
|
+
page = st.sidebar.radio("Navigate", ["Overview", "Forward query", "Site finder (inverse)",
|
|
149
|
+
"Atlas browser", "Validation", "Cross-cell-type",
|
|
150
|
+
"Writer Atlas", "Bridge design", "Write Planner", "Ask (RAG)",
|
|
151
|
+
"Agent"])
|
|
152
|
+
_available_cts = sorted(p.stem.replace("atlas_", "") for p in DATA.glob("atlas_*.parquet")
|
|
153
|
+
if p.stem.replace("atlas_", "") in CT_LABEL) or ["k562"]
|
|
154
|
+
ct = st.sidebar.selectbox("Cell type", _available_cts, format_func=lambda c: CT_LABEL.get(c, c.upper()))
|
|
155
|
+
st.sidebar.markdown("---")
|
|
156
|
+
st.sidebar.caption("Writability = **safety x durability x reachability**, learned blind on public data.")
|
|
157
|
+
if not (DATA / "atlas_k562.parquet").exists():
|
|
158
|
+
st.sidebar.error(f"Atlas not found in {DATA}. Set PEN_ATLAS_DIR.")
|
|
159
|
+
st.stop()
|
|
160
|
+
|
|
161
|
+
genes = load_genes()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def gene_row(name):
|
|
165
|
+
r = genes[genes.gene.str.upper() == name.strip().upper()]
|
|
166
|
+
return None if r.empty else r.iloc[0]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# ----------------------------------------------------------------------------- pages
|
|
170
|
+
if page == "Overview":
|
|
171
|
+
st.markdown('<div class="hero">The Writable Genome</div>', unsafe_allow_html=True)
|
|
172
|
+
st.markdown('<p class="sub">A predictive, writer-aware atlas of <b>where in the genome you can '
|
|
173
|
+
'safely and durably write new DNA</b> - and which enzyme can write it there.</p>',
|
|
174
|
+
unsafe_allow_html=True)
|
|
175
|
+
df = load_atlas(ct)
|
|
176
|
+
c = st.columns(4)
|
|
177
|
+
kpis = [("loci scored", f"{len(df):,}"), ("cell type", ct.upper()),
|
|
178
|
+
("mean writability", f"{df.writability.mean():.2f}"),
|
|
179
|
+
("median safety", f"{df.safety.median():.2f}")]
|
|
180
|
+
for col, (lab, val) in zip(c, kpis):
|
|
181
|
+
col.markdown(f'<div class="card"><div class="kpi-l">{lab}</div>'
|
|
182
|
+
f'<div class="kpi mono">{val}</div></div>', unsafe_allow_html=True)
|
|
183
|
+
st.markdown("####")
|
|
184
|
+
left, right = st.columns([2, 1])
|
|
185
|
+
with left:
|
|
186
|
+
st.markdown("##### Genome-wide writability distribution")
|
|
187
|
+
h = go.Figure(go.Histogram(x=df.writability, nbinsx=60, marker_color="#37e6e0"))
|
|
188
|
+
h.update_layout(height=320, xaxis_title="writability score", yaxis_title="loci", **PLOTLY)
|
|
189
|
+
st.plotly_chart(h, use_container_width=True)
|
|
190
|
+
with right:
|
|
191
|
+
st.markdown("##### Three learned layers")
|
|
192
|
+
st.markdown('<div class="card mono">'
|
|
193
|
+
'<span class="badge">SAFETY</span> genotoxicity risk<br>'
|
|
194
|
+
'<span style="color:#7e8db5">COSMIC | DepMap | 3.7M MLV sites</span><br><br>'
|
|
195
|
+
'<span class="badge">DURABILITY</span> will it stay expressed<br>'
|
|
196
|
+
'<span style="color:#7e8db5">TRIP position-effect model</span><br><br>'
|
|
197
|
+
'<span class="badge">REACHABILITY</span> which writer reaches it<br>'
|
|
198
|
+
'<span style="color:#7e8db5">Writer-Targeting KB (8 families)</span></div>',
|
|
199
|
+
unsafe_allow_html=True)
|
|
200
|
+
v = load_validation()
|
|
201
|
+
if v:
|
|
202
|
+
st.markdown("##### Blind validation - all pre-registered checks")
|
|
203
|
+
cols = st.columns(len(v.get("prereg_checks", {})) or 1)
|
|
204
|
+
for col, (k, ok) in zip(cols, v.get("prereg_checks", {}).items()):
|
|
205
|
+
col.markdown(f'<div class="card"><div class="kpi-l">{k}</div>'
|
|
206
|
+
f'<div class="kpi" style="color:{"#3dffa2" if ok else "#ff5d6c"}">'
|
|
207
|
+
f'{"PASS" if ok else "FAIL"}</div></div>', unsafe_allow_html=True)
|
|
208
|
+
|
|
209
|
+
elif page == "Forward query":
|
|
210
|
+
st.markdown("### Forward query - *is it safe to write here?*")
|
|
211
|
+
df = load_atlas(ct)
|
|
212
|
+
c1, c2, c3 = st.columns([2, 1, 1])
|
|
213
|
+
q = c1.text_input("Gene symbol or coordinate (chr:pos)", "AAVS1")
|
|
214
|
+
win = c2.number_input("window (kb)", 1, 200, 20)
|
|
215
|
+
go_btn = c3.button("Evaluate", type="primary", use_container_width=True)
|
|
216
|
+
alias = {"AAVS1": "PPP1R12C"}
|
|
217
|
+
if go_btn or q:
|
|
218
|
+
chrom = start = end = None
|
|
219
|
+
if ":" in q:
|
|
220
|
+
chrom, pos = q.split(":")[0], int(q.split(":")[1].replace(",", ""))
|
|
221
|
+
start, end = pos - win * 1000, pos + win * 1000
|
|
222
|
+
else:
|
|
223
|
+
gr = gene_row(alias.get(q.strip().upper(), q))
|
|
224
|
+
if gr is not None:
|
|
225
|
+
chrom, start, end = gr.chrom, gr.start - win * 1000, gr.end + win * 1000
|
|
226
|
+
if chrom is None:
|
|
227
|
+
st.warning("Gene/coordinate not found.")
|
|
228
|
+
else:
|
|
229
|
+
sub = region_bins(df, chrom, max(0, start), end)
|
|
230
|
+
if sub.empty:
|
|
231
|
+
st.warning("No atlas bins in that region.")
|
|
232
|
+
else:
|
|
233
|
+
wr, sf, du = sub.writability.mean(), sub.safety.mean(), sub.p_durable.mean()
|
|
234
|
+
wrp = float((df.writability < wr).mean())
|
|
235
|
+
sfp = float((df.safety < sf).mean())
|
|
236
|
+
msg, cls = verdict(wrp, sfp)
|
|
237
|
+
st.markdown(f'<div class="verdict {cls}">{msg}</div>', unsafe_allow_html=True)
|
|
238
|
+
st.caption(f"{chrom}:{max(0,start):,}-{end:,} | {CT_LABEL[ct]} | {len(sub)} loci")
|
|
239
|
+
g = st.columns(3)
|
|
240
|
+
g[0].plotly_chart(gauge(wr, "Writability", "#3dffa2"), use_container_width=True)
|
|
241
|
+
g[1].plotly_chart(gauge(sf, "Safety", "#37e6e0"), use_container_width=True)
|
|
242
|
+
g[2].plotly_chart(gauge(du, "Durability", "#ffc857"), use_container_width=True)
|
|
243
|
+
st.markdown("##### Local writability landscape")
|
|
244
|
+
st.plotly_chart(track_fig(sub, center=(start + end) // 2), use_container_width=True)
|
|
245
|
+
st.markdown('<span class="badge">reachable writers</span> '
|
|
246
|
+
f'<span class="mono">{sub.reachable_tier1.iloc[0]}</span> '
|
|
247
|
+
'<span style="color:#7e8db5">(Tier-1, locus-level)</span>',
|
|
248
|
+
unsafe_allow_html=True)
|
|
249
|
+
|
|
250
|
+
elif page == "Site finder (inverse)":
|
|
251
|
+
st.markdown("### Site finder - *the safest writable loci near a target*")
|
|
252
|
+
df = load_atlas(ct)
|
|
253
|
+
c1, c2, c3, c4 = st.columns([2, 1, 1, 1])
|
|
254
|
+
gname = c1.text_input("Disease / target gene", "HBB")
|
|
255
|
+
span = c2.number_input("search +/- (Mb)", 0.1, 5.0, 1.0)
|
|
256
|
+
topn = c3.number_input("top N", 5, 200, 50)
|
|
257
|
+
find = c4.button("Find sites", type="primary", use_container_width=True)
|
|
258
|
+
if find or gname:
|
|
259
|
+
gr = gene_row(gname)
|
|
260
|
+
if gr is None:
|
|
261
|
+
st.warning("Gene not found.")
|
|
262
|
+
else:
|
|
263
|
+
lo, hi = gr.start - int(span * 1e6), gr.end + int(span * 1e6)
|
|
264
|
+
sub = region_bins(df, gr.chrom, max(0, lo), hi).copy()
|
|
265
|
+
top = sub.nlargest(int(topn), "writability")
|
|
266
|
+
st.caption(f"{gname} ({gr.chrom}:{gr.start:,}) | searching +/- {span} Mb | {len(sub)} loci scanned")
|
|
267
|
+
k = st.columns(3)
|
|
268
|
+
k[0].markdown(f'<div class="card"><div class="kpi-l">candidate loci</div>'
|
|
269
|
+
f'<div class="kpi mono">{len(sub):,}</div></div>', unsafe_allow_html=True)
|
|
270
|
+
k[1].markdown(f'<div class="card"><div class="kpi-l">best writability</div>'
|
|
271
|
+
f'<div class="kpi mono" style="color:#3dffa2">{top.writability.max():.2f}</div></div>',
|
|
272
|
+
unsafe_allow_html=True)
|
|
273
|
+
k[2].markdown(f'<div class="card"><div class="kpi-l">target locus writability</div>'
|
|
274
|
+
f'<div class="kpi mono">{sub[(sub.bin*BIN_BP>=gr.start)&(sub.bin*BIN_BP<=gr.end)].writability.mean():.2f}'
|
|
275
|
+
'</div></div>', unsafe_allow_html=True)
|
|
276
|
+
fig = go.Figure()
|
|
277
|
+
fig.add_trace(go.Scatter(x=sub.bin * BIN_BP / 1e6, y=sub.writability, mode="markers",
|
|
278
|
+
marker=dict(size=4, color=sub.writability, colorscale="Tealgrn",
|
|
279
|
+
showscale=False), name="loci"))
|
|
280
|
+
fig.add_trace(go.Scatter(x=top.bin * BIN_BP / 1e6, y=top.writability, mode="markers",
|
|
281
|
+
marker=dict(size=9, color="#3dffa2", line=dict(color="#fff", width=.5)),
|
|
282
|
+
name=f"top {topn}"))
|
|
283
|
+
fig.add_vrect(x0=gr.start / 1e6, x1=gr.end / 1e6, fillcolor="rgba(255,93,108,.25)",
|
|
284
|
+
line_width=0, annotation_text=gname)
|
|
285
|
+
fig.update_layout(height=320, xaxis_title="position (Mb)", yaxis_title="writability",
|
|
286
|
+
legend=dict(orientation="h", y=1.15), **PLOTLY)
|
|
287
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
288
|
+
out = top[["chrom", "bin", "writability", "safety", "p_durable", "reachable_tier1"]].copy()
|
|
289
|
+
out["position"] = out.bin * BIN_BP
|
|
290
|
+
out = out[["chrom", "position", "writability", "safety", "p_durable", "reachable_tier1"]]
|
|
291
|
+
st.markdown(f"##### Top {topn} writable loci")
|
|
292
|
+
st.dataframe(out.round(3), use_container_width=True, height=360)
|
|
293
|
+
st.download_button("v Download ranked loci (CSV)", out.to_csv(index=False),
|
|
294
|
+
f"writable_loci_{gname}_{ct}.csv", "text/csv")
|
|
295
|
+
|
|
296
|
+
elif page == "Atlas browser":
|
|
297
|
+
st.markdown("### Atlas browser - *genome-wide tracks*")
|
|
298
|
+
df = load_atlas(ct)
|
|
299
|
+
c1, c2, c3 = st.columns([1, 2, 2])
|
|
300
|
+
chrom = c1.selectbox("chromosome", sorted(df.chrom.unique(), key=lambda x: (len(x), x)))
|
|
301
|
+
cmax = int(df[df.chrom == chrom].bin.max() * BIN_BP)
|
|
302
|
+
rng = c2.slider("region (Mb)", 0.0, cmax / 1e6, (0.0, min(5.0, cmax / 1e6)), step=0.5)
|
|
303
|
+
c3.markdown("####")
|
|
304
|
+
sub = region_bins(df, chrom, int(rng[0] * 1e6), int(rng[1] * 1e6))
|
|
305
|
+
if len(sub) > 8000:
|
|
306
|
+
sub = sub.iloc[:: len(sub) // 8000]
|
|
307
|
+
st.plotly_chart(track_fig(sub), use_container_width=True)
|
|
308
|
+
st.caption(f"{chrom}:{int(rng[0]*1e6):,}-{int(rng[1]*1e6):,} | {len(sub):,} bins shown | {CT_LABEL[ct]}")
|
|
309
|
+
|
|
310
|
+
elif page == "Validation":
|
|
311
|
+
st.markdown("### Blind validation - *recovering known truth*")
|
|
312
|
+
v = load_validation()
|
|
313
|
+
if not v:
|
|
314
|
+
st.info("validation_report.json not found in the data directory.")
|
|
315
|
+
else:
|
|
316
|
+
d = v.get("durability") or {}
|
|
317
|
+
a = v.get("atlas", {})
|
|
318
|
+
c = st.columns(3)
|
|
319
|
+
c[0].markdown(f'<div class="card"><div class="kpi-l">durability Spearman rho</div>'
|
|
320
|
+
f'<div class="kpi mono" style="color:#3dffa2">{d.get("expr_spearman",0):.2f}</div></div>',
|
|
321
|
+
unsafe_allow_html=True)
|
|
322
|
+
c[1].markdown(f'<div class="card"><div class="kpi-l">silenced/stable AUROC</div>'
|
|
323
|
+
f'<div class="kpi mono">{d.get("silenced_auroc",0):.2f}</div>'
|
|
324
|
+
f'<div class="kpi-l">baseline {d.get("silenced_baseline_h3k9me3_auroc",0):.2f}</div></div>',
|
|
325
|
+
unsafe_allow_html=True)
|
|
326
|
+
allok = v.get("all_prereg_checks_pass")
|
|
327
|
+
c[2].markdown(f'<div class="card"><div class="kpi-l">pre-registered checks</div>'
|
|
328
|
+
f'<div class="kpi" style="color:{"#3dffa2" if allok else "#ff5d6c"}">'
|
|
329
|
+
f'{"ALL PASS" if allok else "REVIEW"}</div></div>', unsafe_allow_html=True)
|
|
330
|
+
st.markdown("##### Safe harbours vs genotoxic CIS - writability percentile")
|
|
331
|
+
rows = []
|
|
332
|
+
for cell, av in a.items():
|
|
333
|
+
for name, (cls, pct) in av.get("loci", {}).items():
|
|
334
|
+
rows.append({"cell": cell.upper(), "locus": name, "class": cls, "pct": pct})
|
|
335
|
+
if rows:
|
|
336
|
+
rdf = pd.DataFrame(rows)
|
|
337
|
+
fig = go.Figure()
|
|
338
|
+
for cls, color in [("SAFE", "#3dffa2"), ("GTOX", "#ff5d6c")]:
|
|
339
|
+
s = rdf[rdf["class"] == cls]
|
|
340
|
+
fig.add_trace(go.Bar(x=s.locus + " | " + s.cell, y=s.pct, name=cls, marker_color=color))
|
|
341
|
+
fig.update_layout(height=360, yaxis_title="writability percentile",
|
|
342
|
+
barmode="group", **PLOTLY)
|
|
343
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
344
|
+
st.caption("Validated safe harbours (green) score high; clinical genotoxic loci (red) score "
|
|
345
|
+
"near zero - recovered blind, never trained on these labels.")
|
|
346
|
+
|
|
347
|
+
elif page == "Cross-cell-type":
|
|
348
|
+
st.markdown("### Cross-cell-type - *function transfer, reported honestly*")
|
|
349
|
+
a = load_atlas("k562")[["chrom", "bin", "writability"]].rename(columns={"writability": "k562"})
|
|
350
|
+
b = load_atlas("hepg2")[["chrom", "bin", "writability"]].rename(columns={"writability": "hepg2"})
|
|
351
|
+
m = a.merge(b, on=["chrom", "bin"]).sample(min(40000, len(a)), random_state=0)
|
|
352
|
+
rho = float(pd.Series(m.k562).corr(pd.Series(m.hepg2), method="spearman"))
|
|
353
|
+
st.markdown(f'<div class="card"><div class="kpi-l">K562 <-> HepG2 writability Spearman</div>'
|
|
354
|
+
f'<div class="kpi mono" style="color:#37e6e0">{rho:.2f}</div></div>', unsafe_allow_html=True)
|
|
355
|
+
fig = go.Figure(go.Histogram2d(x=m.k562, y=m.hepg2, colorscale="Tealgrn", nbinsx=50, nbinsy=50))
|
|
356
|
+
fig.update_layout(height=420, xaxis_title="K562 writability", yaxis_title="HepG2 writability", **PLOTLY)
|
|
357
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
358
|
+
st.caption("The model is cell-type-specific in inputs, agnostic in function: writability correlates "
|
|
359
|
+
"across cell types yet differs locus-by-locus - the quantified transfer, not a footnote.")
|
|
360
|
+
|
|
361
|
+
elif page == "Writer Atlas":
|
|
362
|
+
st.markdown("### Writer Atlas - *every genome-writing family on common, measured axes*")
|
|
363
|
+
wa = load_writer_atlas()
|
|
364
|
+
if wa.empty:
|
|
365
|
+
st.info("atlas.parquet not found - run `python scripts/p2_build_atlas.py`.")
|
|
366
|
+
else:
|
|
367
|
+
cov = (wa.groupby("family")
|
|
368
|
+
.agg(systems=("representative_system", "size"),
|
|
369
|
+
measured=("confidence", lambda s: int((s == "measured").sum())),
|
|
370
|
+
tier=("reachability_tier", "first"),
|
|
371
|
+
mechanism=("mechanism_bucket", "first"),
|
|
372
|
+
deliv=("deliv_class", "first"),
|
|
373
|
+
cargo_bp=("cargo_capacity_bp", "max"))
|
|
374
|
+
.reset_index().sort_values("systems", ascending=False))
|
|
375
|
+
k = st.columns(3)
|
|
376
|
+
k[0].markdown(f'<div class="card"><div class="kpi-l">writer families</div>'
|
|
377
|
+
f'<div class="kpi mono">{wa.family.nunique()}</div></div>', unsafe_allow_html=True)
|
|
378
|
+
k[1].markdown(f'<div class="card"><div class="kpi-l">catalogued systems</div>'
|
|
379
|
+
f'<div class="kpi mono">{len(wa):,}</div></div>', unsafe_allow_html=True)
|
|
380
|
+
k[2].markdown(f'<div class="card"><div class="kpi-l">IS110 orthologs</div>'
|
|
381
|
+
f'<div class="kpi mono" style="color:#3dffa2">{int((wa.family=="bridge_IS110").sum()):,}</div></div>',
|
|
382
|
+
unsafe_allow_html=True)
|
|
383
|
+
st.markdown("##### Family coverage (measured axes + reachability tier)")
|
|
384
|
+
st.dataframe(cov, use_container_width=True, height=320)
|
|
385
|
+
fams = st.multiselect("Compare families", sorted(wa.family.unique()),
|
|
386
|
+
default=["bridge_IS110", "CAST_VK", "serine_integrase", "PE_integrase"])
|
|
387
|
+
comp = wa[wa.family.isin(fams) & wa.entry_kind.eq("curated_core")] if "entry_kind" in wa else wa[wa.family.isin(fams)]
|
|
388
|
+
if not comp.empty and "readiness" in comp:
|
|
389
|
+
fig = go.Figure(go.Bar(x=comp.representative_system, y=comp.readiness,
|
|
390
|
+
marker_color="#37e6e0", text=comp.deliv_class))
|
|
391
|
+
fig.update_layout(height=320, yaxis_title="therapeutic readiness",
|
|
392
|
+
xaxis_title="representative system", **PLOTLY)
|
|
393
|
+
st.plotly_chart(fig, use_container_width=True)
|
|
394
|
+
st.caption("Reachability tiers: Tier-1 directly scannable | Tier-2 candidate (requires validation) "
|
|
395
|
+
". Tier-3 not yet predictable. Every system carries a confidence tag + source DOI.")
|
|
396
|
+
|
|
397
|
+
elif page == "Bridge design":
|
|
398
|
+
st.markdown("### Bridge design + off-target - *the first instrument of PEN-STACK*")
|
|
399
|
+
st.caption("Design a bridge RNA (wraps the Arc BridgeRNADesigner) and assess fold + cross-loop QC and "
|
|
400
|
+
"genome-wide off-target risk (position-weight model; measured profile from Perry 2025).")
|
|
401
|
+
c1, c2 = st.columns(2)
|
|
402
|
+
target = c1.text_input("Target core (14 nt)", "ACGTGTCTACGTGA")
|
|
403
|
+
donor = c2.text_input("Donor core (14 nt)", "TTGCATCTAGGCAC")
|
|
404
|
+
scaffold = st.selectbox("Scaffold", ["ISCro4_enhanced", "ISCro4_WT", "IS621"])
|
|
405
|
+
scan_chrom = st.selectbox("Off-target scan", ["none (QC only)", "chr22", "chr21", "chrX"])
|
|
406
|
+
if st.button("Design + assess", type="primary"):
|
|
407
|
+
from pen_stack.bridge.fold_qc import qc_verdict
|
|
408
|
+
from pen_stack.bridge.ingest import load_measured_profile
|
|
409
|
+
from pen_stack.bridge.pipeline import design_brna
|
|
410
|
+
brna = design_brna(target, donor, scaffold)
|
|
411
|
+
st.markdown(f'<div class="card"><b>Bridge RNA</b> ({scaffold}) - target {brna["target"]} | '
|
|
412
|
+
f'donor {brna["donor"]}' +
|
|
413
|
+
(f' | scaffold {len(brna["bridge_sequence"])} nt' if brna.get("available")
|
|
414
|
+
else f' | <i>{brna["note"]}</i>') + '</div>', unsafe_allow_html=True)
|
|
415
|
+
qc = qc_verdict(brna["target"], brna["donor"], brna.get("bridge_sequence"))
|
|
416
|
+
vclass = "v-yes" if qc["pass"] else "v-cau"
|
|
417
|
+
st.markdown(f'<div class="verdict {vclass}">QC {"PASS" if qc["pass"] else "REVIEW"} - '
|
|
418
|
+
f'cross-loop {qc["cross_loop"]}' +
|
|
419
|
+
(f' | fold MFE {qc["fold"]["mfe"]}' if qc.get("fold", {}).get("available") else "") +
|
|
420
|
+
'</div>', unsafe_allow_html=True)
|
|
421
|
+
mp = load_measured_profile()
|
|
422
|
+
if not mp.empty:
|
|
423
|
+
st.caption("Measured off-target position profile (Perry 2025, 6,856 real off-targets) - "
|
|
424
|
+
"central core (7-9) is the specificity determinant:")
|
|
425
|
+
st.bar_chart(mp.set_index("position")["protective_weight"])
|
|
426
|
+
if scan_chrom != "none (QC only)":
|
|
427
|
+
from pen_stack.bridge.pipeline import _hg38
|
|
428
|
+
fa = _hg38()
|
|
429
|
+
if fa is None:
|
|
430
|
+
st.warning("hg38 fasta not found on this host (set PEN_HG38); QC shown above.")
|
|
431
|
+
else:
|
|
432
|
+
from pen_stack.bridge.offtarget import scan_offtargets
|
|
433
|
+
with st.spinner(f"scanning {scan_chrom} for off-target pseudosites..."):
|
|
434
|
+
df = scan_offtargets(fa, brna["target"], [scan_chrom])
|
|
435
|
+
st.caption(f"{len(df)} off-target pseudosites on {scan_chrom} "
|
|
436
|
+
f"({int((df.risk>0.5).sum()) if len(df) else 0} high-risk):")
|
|
437
|
+
if len(df):
|
|
438
|
+
st.dataframe(df.head(15)[["chrom", "pos", "site", "n_mm", "risk"]].round(3),
|
|
439
|
+
use_container_width=True)
|
|
440
|
+
st.caption("Decision-support only; predicted off-targets require experimental validation.")
|
|
441
|
+
|
|
442
|
+
elif page == "Write Planner":
|
|
443
|
+
st.markdown("### Write Planner - *inverse design (Phase 3 capstone)*")
|
|
444
|
+
st.caption("goal + edit_intent -> ranked, traceable site x writer x cargo x delivery plans. "
|
|
445
|
+
"edit_intent is load-bearing (an in-gene site ranks high for knock-in, low for safe-harbour).")
|
|
446
|
+
gene = st.text_input("Target gene", "TRAC")
|
|
447
|
+
intent = st.selectbox("Edit intent", ["knock_in_with_disruption", "safe_harbour_insertion",
|
|
448
|
+
"high_durability_insertion", "regulatory_excision", "repeat_excision"])
|
|
449
|
+
cargo_bp = int(st.number_input("Cargo size (bp)", 100, 40000, 2000))
|
|
450
|
+
if st.button("Plan", type="primary"):
|
|
451
|
+
from pen_stack.planner.optimize import EditIntent
|
|
452
|
+
from pen_stack.planner.pipeline import plan_write
|
|
453
|
+
try:
|
|
454
|
+
with st.spinner("optimising destination x writer x cargo x delivery..."):
|
|
455
|
+
plans = plan_write(gene, EditIntent(intent), cargo_bp, ct, k=5)
|
|
456
|
+
except FileNotFoundError as e:
|
|
457
|
+
st.error(str(e))
|
|
458
|
+
plans = []
|
|
459
|
+
if not plans:
|
|
460
|
+
st.warning("No plan found (gene not in the atlas, or no reachable site).")
|
|
461
|
+
for i, p in enumerate(plans, 1):
|
|
462
|
+
s = p["site"]
|
|
463
|
+
st.markdown(f'<div class="card"><b>Plan {i}</b> - {s["chrom"]}:{s["pos"]:,} '
|
|
464
|
+
f'(on_target={p["on_target"]}) | writer <b>{p["writer"]}</b> '
|
|
465
|
+
f'[{p["reachability_tier"]}]<br>safety {p["safety"]} | durability {p["durability"]} '
|
|
466
|
+
f'. writer-activity {p["writer_activity"]} | score {p["score"]}<br>'
|
|
467
|
+
f'cargo {p["cargo"]["payload_bp"]}bp->{p["cargo"]["assembled_bp"]}bp '
|
|
468
|
+
f'(size_ok={p["cargo"]["size_ok"]}) | delivery <b>{p["delivery"]["delivery"]}</b> | '
|
|
469
|
+
f'off-target {p["cargo"].get("offtargets",{}).get("status","n/a")}</div>',
|
|
470
|
+
unsafe_allow_html=True)
|
|
471
|
+
if plans:
|
|
472
|
+
st.caption(plans[0]["disclaimer"])
|
|
473
|
+
|
|
474
|
+
elif page == "Ask (RAG)":
|
|
475
|
+
st.markdown("### Ask - *grounded, cited Q&A over the platform*")
|
|
476
|
+
st.caption("Numbers come from validated tool calls (never guessed); clinical-directive questions are refused.")
|
|
477
|
+
q = st.text_input("Ask a question",
|
|
478
|
+
"Which bridge recombinase works in human cells, and where can I write into CCR5?")
|
|
479
|
+
if st.button("Ask", type="primary") or q:
|
|
480
|
+
from pen_stack.rag.qa import answer as rag_answer
|
|
481
|
+
a = rag_answer(q, ct=ct)
|
|
482
|
+
if a.get("refused"):
|
|
483
|
+
st.markdown(f'<div class="verdict v-no">{a["answer"]}</div>', unsafe_allow_html=True)
|
|
484
|
+
else:
|
|
485
|
+
st.markdown(f'<div class="card">{a["answer"]}</div>', unsafe_allow_html=True)
|
|
486
|
+
if a.get("provenance"):
|
|
487
|
+
st.markdown("##### Tool provenance (every number traces here)")
|
|
488
|
+
st.json(a["provenance"])
|
|
489
|
+
if a.get("citations"):
|
|
490
|
+
st.markdown("##### Citations")
|
|
491
|
+
st.write(", ".join(a["citations"]))
|
|
492
|
+
st.caption(a.get("disclaimer", ""))
|
|
493
|
+
|
|
494
|
+
elif page == "Agent":
|
|
495
|
+
st.markdown("### Agent - *natural-language goal -> cited, auditable write plan*")
|
|
496
|
+
st.caption("The PEN-STACK agent orchestrates every validated tool. It obtains numbers ONLY from tool "
|
|
497
|
+
"calls (no fabrication), refuses clinical directives, and logs an auditable trace.")
|
|
498
|
+
goal = st.text_input("Goal", "Knock a CAR into TRAC, disrupting the TCR for allogeneic CAR-T.")
|
|
499
|
+
if st.button("Plan with agent", type="primary"):
|
|
500
|
+
from pen_stack.agent.orchestrator import run_agent
|
|
501
|
+
with st.spinner("Agent calling validated tools..."):
|
|
502
|
+
res = run_agent(goal)
|
|
503
|
+
if res.get("refused"):
|
|
504
|
+
st.markdown(f'<div class="verdict v-no">{res["plan"]}</div>', unsafe_allow_html=True)
|
|
505
|
+
else:
|
|
506
|
+
mode = "LLM tool-calling" if res.get("llm") else "deterministic fallback (no LLM reachable)"
|
|
507
|
+
st.caption(f"mode: {mode}")
|
|
508
|
+
st.markdown(f'<div class="card">{res["plan"]}</div>', unsafe_allow_html=True)
|
|
509
|
+
if res.get("trace"):
|
|
510
|
+
st.markdown("##### Auditable trace (every number traces to a tool call)")
|
|
511
|
+
for i, step in enumerate(res["trace"], 1):
|
|
512
|
+
with st.expander(f"step {i}: {step['tool']}({step['args']})"):
|
|
513
|
+
st.json(step["result"])
|
|
514
|
+
st.caption(res.get("disclaimer", ""))
|
|
515
|
+
|
|
516
|
+
st.markdown("---")
|
|
517
|
+
st.caption("PEN-STACK v3.0 | The Writable Genome + Writer Atlas + Write Planner + agent | decision-support, "
|
|
518
|
+
"not a clinical directive | every score traceable to public data + a pre-registered model.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""pen_stack.validate - see PEN-STACK v3.0 program doc."""
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""WS-F acceptance demo - the adaptation gate on a held-out dataset (deterministic, CI-safe, synthetic).
|
|
2
|
+
|
|
3
|
+
Demonstrates all WS-F acceptance points without any private data or atlas:
|
|
4
|
+
1. ACTIVATE case: a miscalibrated released score + informative labels -> isotonic recalibration improves
|
|
5
|
+
held-out Brier/ECE -> the gate ACTIVATES the adapted model.
|
|
6
|
+
2. REJECT case: labels independent of the score -> no held-out improvement -> the gate REJECTS (the
|
|
7
|
+
released model is kept). This proves the gate actually guards quality, not just rubber-stamps.
|
|
8
|
+
3. The released model is provably unchanged in both cases (fingerprint identical), and a before/after
|
|
9
|
+
report + model card are written under models/local_<id>/.
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import pandas as pd
|
|
18
|
+
|
|
19
|
+
from pen_stack.adapt.pipeline import adapt
|
|
20
|
+
|
|
21
|
+
_OUT = Path(__file__).resolve().parents[2] / "out" / "adapt_demo.json"
|
|
22
|
+
_CHROMS = ["chr1", "chr2", "chr3", "chr4", "chr5", "chr6"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _synth(n: int, seed: int, informative: bool) -> pd.DataFrame:
|
|
26
|
+
rng = np.random.default_rng(seed)
|
|
27
|
+
latent = rng.random(n) # the true risk in [0,1]
|
|
28
|
+
if informative:
|
|
29
|
+
label = (rng.random(n) < latent).astype(float) # label tracks latent
|
|
30
|
+
score = np.clip(latent ** 2, 0, 1) # SAME ranking, miscalibrated (under-predicts)
|
|
31
|
+
else:
|
|
32
|
+
label = (rng.random(n) < 0.5).astype(float) # label independent of score
|
|
33
|
+
score = rng.random(n)
|
|
34
|
+
chrom = rng.choice(_CHROMS, size=n)
|
|
35
|
+
return pd.DataFrame({"chrom": chrom, "bin": rng.integers(0, 10_000, n), "score": score, "label": label})
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def run(out: str | Path = _OUT) -> dict:
|
|
39
|
+
activate = adapt(_synth(400, 1, informative=True), target="safety", method="isotonic",
|
|
40
|
+
local_id="demo_activate", primary="brier", margin=0.0)
|
|
41
|
+
reject = adapt(_synth(400, 2, informative=False), target="safety", method="isotonic",
|
|
42
|
+
local_id="demo_reject", primary="brier", margin=0.01)
|
|
43
|
+
report = {
|
|
44
|
+
"activate_case": {"gate": activate["gate"]["decision"], "activated": activate["activated"],
|
|
45
|
+
"brier_released": activate["held_out_before"]["brier"],
|
|
46
|
+
"brier_adapted": activate["held_out_after"]["brier"],
|
|
47
|
+
"auroc_preserved": bool(abs(activate["held_out_before"]["auroc"]
|
|
48
|
+
- activate["held_out_after"]["auroc"]) < 0.05),
|
|
49
|
+
"released_model_unchanged": activate["released_model_unchanged"]},
|
|
50
|
+
"reject_case": {"gate": reject["gate"]["decision"], "activated": reject["activated"],
|
|
51
|
+
"brier_released": reject["held_out_before"]["brier"],
|
|
52
|
+
"brier_adapted": reject["held_out_after"]["brier"],
|
|
53
|
+
"released_model_unchanged": reject["released_model_unchanged"]},
|
|
54
|
+
"acceptance": {
|
|
55
|
+
"adaptation_improves_or_is_rejected": bool(activate["activated"] and not reject["activated"]),
|
|
56
|
+
"released_model_provably_unchanged": bool(activate["released_model_unchanged"]
|
|
57
|
+
and reject["released_model_unchanged"]),
|
|
58
|
+
"before_after_report_produced": bool(Path(activate["paths"]["report"]).exists()
|
|
59
|
+
and Path(activate["paths"]["model_card"]).exists()),
|
|
60
|
+
},
|
|
61
|
+
"scope": "recalibration / light fine-tuning behind a held-out gate; not unsupervised learning.",
|
|
62
|
+
}
|
|
63
|
+
Path(out).parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
Path(out).write_text(json.dumps(report, indent=2, default=str), encoding="utf-8")
|
|
65
|
+
return report
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__": # pragma: no cover
|
|
69
|
+
print(json.dumps(run(), indent=2, default=str))
|