pen-stack 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pen_stack-3.1.0/CHANGELOG.md +202 -0
- pen_stack-3.1.0/CITATION.cff +19 -0
- pen_stack-3.1.0/LICENSE +21 -0
- pen_stack-3.1.0/MANIFEST.in +16 -0
- pen_stack-3.1.0/PKG-INFO +451 -0
- pen_stack-3.1.0/README.md +378 -0
- pen_stack-3.1.0/bench/run.py +151 -0
- pen_stack-3.1.0/benchmarks/genome_writing_bench/LEADERBOARD.md +22 -0
- pen_stack-3.1.0/benchmarks/genome_writing_bench/README.md +75 -0
- pen_stack-3.1.0/benchmarks/genome_writing_bench/SHA256SUMS +4 -0
- pen_stack-3.1.0/benchmarks/genome_writing_bench/SUBMISSIONS.md +53 -0
- pen_stack-3.1.0/benchmarks/genome_writing_bench/tasks.yaml +81 -0
- pen_stack-3.1.0/configs/atlas_families.yaml +81 -0
- pen_stack-3.1.0/configs/bridge_offtarget_profile.yaml +51 -0
- pen_stack-3.1.0/configs/cargo_polish.yaml +45 -0
- pen_stack-3.1.0/configs/datasets.yaml +99 -0
- pen_stack-3.1.0/configs/delivery_rules.yaml +15 -0
- pen_stack-3.1.0/configs/gates_v3.yaml +208 -0
- pen_stack-3.1.0/configs/gsh_validated_heldout.yaml +39 -0
- pen_stack-3.1.0/configs/intent_weights.yaml +42 -0
- pen_stack-3.1.0/configs/llm.yaml +59 -0
- pen_stack-3.1.0/configs/monitor_queries.yaml +33 -0
- pen_stack-3.1.0/configs/score_axes.yaml +68 -0
- pen_stack-3.1.0/configs/universe_crosswalk.yaml +36 -0
- pen_stack-3.1.0/configs/wtkb_curated.yaml +165 -0
- pen_stack-3.1.0/data/curated/bridge_offtarget_profile_measured.parquet +0 -0
- pen_stack-3.1.0/data/curated/gene_coords.parquet +0 -0
- pen_stack-3.1.0/data/curated/unified_editor_universe.parquet +0 -0
- pen_stack-3.1.0/docs/DEPLOY.md +39 -0
- pen_stack-3.1.0/docs/INFRA.md +64 -0
- pen_stack-3.1.0/docs/MCP.md +24 -0
- pen_stack-3.1.0/docs/RELEASING.md +58 -0
- pen_stack-3.1.0/docs/REPRO.md +46 -0
- pen_stack-3.1.0/docs/agent.md +78 -0
- pen_stack-3.1.0/docs/alphagenome_feasibility.md +49 -0
- pen_stack-3.1.0/docs/benchmark_circularity.md +64 -0
- pen_stack-3.1.0/docs/cards/atlas.md +41 -0
- pen_stack-3.1.0/docs/cards/durability.md +43 -0
- pen_stack-3.1.0/docs/cards/safety.md +39 -0
- pen_stack-3.1.0/docs/dissemination.md +34 -0
- pen_stack-3.1.0/docs/index.md +47 -0
- pen_stack-3.1.0/docs/positioning.md +55 -0
- pen_stack-3.1.0/docs/private_data_formats.md +61 -0
- pen_stack-3.1.0/docs/quickstart.md +64 -0
- pen_stack-3.1.0/docs/scorecard.md +54 -0
- pen_stack-3.1.0/docs/tutorials/compare-families.md +43 -0
- pen_stack-3.1.0/docs/tutorials/score-deliverability.md +36 -0
- pen_stack-3.1.0/docs/tutorials/where-can-i-write.md +43 -0
- pen_stack-3.1.0/docs/tutorials/which-writer-reaches-locus.md +41 -0
- pen_stack-3.1.0/docs/wtkb.md +25 -0
- pen_stack-3.1.0/pen_stack/__init__.py +2 -0
- pen_stack-3.1.0/pen_stack/_resources.py +34 -0
- pen_stack-3.1.0/pen_stack/adapt/__init__.py +14 -0
- pen_stack-3.1.0/pen_stack/adapt/finetune.py +33 -0
- pen_stack-3.1.0/pen_stack/adapt/ingest.py +86 -0
- pen_stack-3.1.0/pen_stack/adapt/pipeline.py +101 -0
- pen_stack-3.1.0/pen_stack/adapt/recalibrate.py +58 -0
- pen_stack-3.1.0/pen_stack/adapt/report.py +130 -0
- pen_stack-3.1.0/pen_stack/agent/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/agent/guardrails.py +49 -0
- pen_stack-3.1.0/pen_stack/agent/mcp_server.py +42 -0
- pen_stack-3.1.0/pen_stack/agent/orchestrator.py +106 -0
- pen_stack-3.1.0/pen_stack/agent/pen_agent.py +169 -0
- pen_stack-3.1.0/pen_stack/agent/tools.py +130 -0
- pen_stack-3.1.0/pen_stack/atlas/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/atlas/build_wtkb.py +80 -0
- pen_stack-3.1.0/pen_stack/atlas/crosslink.py +144 -0
- pen_stack-3.1.0/pen_stack/atlas/expand.py +190 -0
- pen_stack-3.1.0/pen_stack/atlas/schema.py +59 -0
- pen_stack-3.1.0/pen_stack/atlas/scorecard.py +134 -0
- pen_stack-3.1.0/pen_stack/atlas/universe.py +75 -0
- pen_stack-3.1.0/pen_stack/atlas/variant_propose.py +155 -0
- pen_stack-3.1.0/pen_stack/bridge/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/bridge/activity.py +52 -0
- pen_stack-3.1.0/pen_stack/bridge/cli.py +65 -0
- pen_stack-3.1.0/pen_stack/bridge/fold_qc.py +53 -0
- pen_stack-3.1.0/pen_stack/bridge/guide_qc.py +84 -0
- pen_stack-3.1.0/pen_stack/bridge/ingest.py +139 -0
- pen_stack-3.1.0/pen_stack/bridge/offtarget.py +133 -0
- pen_stack-3.1.0/pen_stack/bridge/ortholog_screen.py +73 -0
- pen_stack-3.1.0/pen_stack/bridge/pipeline.py +83 -0
- pen_stack-3.1.0/pen_stack/cli.py +126 -0
- pen_stack-3.1.0/pen_stack/data/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/data/encode.py +84 -0
- pen_stack-3.1.0/pen_stack/data/genome.py +71 -0
- pen_stack-3.1.0/pen_stack/data/ingest_chromatin.py +119 -0
- pen_stack-3.1.0/pen_stack/data/ingest_integration.py +112 -0
- pen_stack-3.1.0/pen_stack/data/ingest_safety_annot.py +164 -0
- pen_stack-3.1.0/pen_stack/data/ingest_trip.py +76 -0
- pen_stack-3.1.0/pen_stack/mech/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/mech/classify_atlas.py +71 -0
- pen_stack-3.1.0/pen_stack/mech/whitelist.py +66 -0
- pen_stack-3.1.0/pen_stack/monitor/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/monitor/europepmc.py +32 -0
- pen_stack-3.1.0/pen_stack/monitor/run.py +57 -0
- pen_stack-3.1.0/pen_stack/monitor/triage.py +63 -0
- pen_stack-3.1.0/pen_stack/planner/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/planner/cargo.py +56 -0
- pen_stack-3.1.0/pen_stack/planner/cargo_polish.py +146 -0
- pen_stack-3.1.0/pen_stack/planner/delivery.py +32 -0
- pen_stack-3.1.0/pen_stack/planner/multiplex.py +110 -0
- pen_stack-3.1.0/pen_stack/planner/optimize.py +156 -0
- pen_stack-3.1.0/pen_stack/planner/pipeline.py +86 -0
- pen_stack-3.1.0/pen_stack/planner/report.py +26 -0
- pen_stack-3.1.0/pen_stack/rag/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/rag/index.py +53 -0
- pen_stack-3.1.0/pen_stack/rag/llm.py +178 -0
- pen_stack-3.1.0/pen_stack/rag/qa.py +105 -0
- pen_stack-3.1.0/pen_stack/score/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/score/recalibrate.py +77 -0
- pen_stack-3.1.0/pen_stack/score/therapeutic.py +85 -0
- pen_stack-3.1.0/pen_stack/server/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/server/api.py +142 -0
- pen_stack-3.1.0/pen_stack/ui/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/ui/app.py +518 -0
- pen_stack-3.1.0/pen_stack/validate/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/validate/adapt_demo.py +69 -0
- pen_stack-3.1.0/pen_stack/validate/agent_eval.py +117 -0
- pen_stack-3.1.0/pen_stack/validate/blind_gsh_discovery.py +165 -0
- pen_stack-3.1.0/pen_stack/validate/cargo_directionality.py +57 -0
- pen_stack-3.1.0/pen_stack/validate/durability_baselines.py +150 -0
- pen_stack-3.1.0/pen_stack/validate/forward_hypotheses.py +104 -0
- pen_stack-3.1.0/pen_stack/validate/guide_qc_demo.py +58 -0
- pen_stack-3.1.0/pen_stack/validate/intent_specification.py +82 -0
- pen_stack-3.1.0/pen_stack/validate/paper3_benchmark.py +165 -0
- pen_stack-3.1.0/pen_stack/validate/paper4_real_validation.py +144 -0
- pen_stack-3.1.0/pen_stack/validate/paper4_validation.py +82 -0
- pen_stack-3.1.0/pen_stack/validate/seq_vs_measured.py +134 -0
- pen_stack-3.1.0/pen_stack/validate/within_locus_ranking.py +74 -0
- pen_stack-3.1.0/pen_stack/validate/writer_recovery.py +86 -0
- pen_stack-3.1.0/pen_stack/wgenome/__init__.py +1 -0
- pen_stack-3.1.0/pen_stack/wgenome/chromatin_seq.py +83 -0
- pen_stack-3.1.0/pen_stack/wgenome/durability.py +108 -0
- pen_stack-3.1.0/pen_stack/wgenome/export_tracks.py +52 -0
- pen_stack-3.1.0/pen_stack/wgenome/features.py +82 -0
- pen_stack-3.1.0/pen_stack/wgenome/gsh_baseline.py +117 -0
- pen_stack-3.1.0/pen_stack/wgenome/providers.py +245 -0
- pen_stack-3.1.0/pen_stack/wgenome/safety.py +69 -0
- pen_stack-3.1.0/pen_stack/wgenome/structure3d.py +168 -0
- pen_stack-3.1.0/pen_stack/wgenome/writability.py +72 -0
- pen_stack-3.1.0/pen_stack.egg-info/PKG-INFO +451 -0
- pen_stack-3.1.0/pen_stack.egg-info/SOURCES.txt +182 -0
- pen_stack-3.1.0/pen_stack.egg-info/dependency_links.txt +1 -0
- pen_stack-3.1.0/pen_stack.egg-info/entry_points.txt +3 -0
- pen_stack-3.1.0/pen_stack.egg-info/requires.txt +54 -0
- pen_stack-3.1.0/pen_stack.egg-info/top_level.txt +1 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_phase0.json +9 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_phase1_5.json +11 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_phase2.json +12 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_phase3.json +11 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_a.json +11 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_b.json +11 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_c.json +9 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_d.json +9 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_e.json +9 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_f.json +8 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_g.json +8 -0
- pen_stack-3.1.0/prereg/SHA256_LOCK_ws_h.json +8 -0
- pen_stack-3.1.0/prereg/paper1.yaml +60 -0
- pen_stack-3.1.0/prereg/paper2.yaml +74 -0
- pen_stack-3.1.0/prereg/paper3.yaml +63 -0
- pen_stack-3.1.0/prereg/paper4.yaml +71 -0
- pen_stack-3.1.0/prereg/phase0.yaml +28 -0
- pen_stack-3.1.0/prereg/ws_a.yaml +52 -0
- pen_stack-3.1.0/prereg/ws_b.yaml +50 -0
- pen_stack-3.1.0/prereg/ws_c.yaml +47 -0
- pen_stack-3.1.0/prereg/ws_d.yaml +29 -0
- pen_stack-3.1.0/prereg/ws_e.yaml +36 -0
- pen_stack-3.1.0/prereg/ws_f.yaml +38 -0
- pen_stack-3.1.0/prereg/ws_g.yaml +33 -0
- pen_stack-3.1.0/prereg/ws_h.yaml +32 -0
- pen_stack-3.1.0/pyproject.toml +108 -0
- pen_stack-3.1.0/scripts/p1_build_atlas.py +87 -0
- pen_stack-3.1.0/scripts/p1_build_durability.py +61 -0
- pen_stack-3.1.0/scripts/p1_export_tracks.py +22 -0
- pen_stack-3.1.0/scripts/p1_safety_concordance.py +82 -0
- pen_stack-3.1.0/scripts/p1_train_safety.py +48 -0
- pen_stack-3.1.0/scripts/p1_validation_report.py +80 -0
- pen_stack-3.1.0/scripts/p2_build_atlas.py +36 -0
- pen_stack-3.1.0/scripts/p3_benchmark_report.py +78 -0
- pen_stack-3.1.0/scripts/p4_genome_scan.py +43 -0
- pen_stack-3.1.0/scripts/ws_b_report.py +91 -0
- pen_stack-3.1.0/scripts/ws_c_report.py +75 -0
- pen_stack-3.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to PEN-STACK are documented here. This file follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
|
|
5
|
+
|
|
6
|
+
## [3.1.0] - 2026-06-04 - v3.1 release: publishable contributions + an adopted benchmark
|
|
7
|
+
|
|
8
|
+
The v3.1 cycle completes (workstreams A-H). It hardens the honesty of the planning benchmark, surrounds the
|
|
9
|
+
models with strong baselines, adds a predicted-structure safety axis, and ships the first benchmark and
|
|
10
|
+
grounded agent for the genome-writing side. Every workstream is pre-registered (`prereg/ws_*.yaml`,
|
|
11
|
+
SHA-locked) and reports its honest negatives.
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
- **WS-B - strong baselines + safety primary-metric switch.** Endogenous-expression baseline (TRIP-trained
|
|
15
|
+
Spearman 0.51 vs AlphaGenome ES-Bruce4 proxy 0.43), multi-mark ablation (all-marks >= best single), and a
|
|
16
|
+
published GSH rule-set: safe-harbour discrimination (learned 0.92, 95% CI [0.82, 0.98] vs distance-rule
|
|
17
|
+
0.38, delta CI excludes zero) is now the primary safety metric; the circular `genotoxic_cis` AUROC is a
|
|
18
|
+
labeled diagnostic. (`pen_stack.wgenome.gsh_baseline`, `pen_stack.validate.durability_baselines`.)
|
|
19
|
+
- **WS-C - AlphaGenome integration.** Hosted-API provider with an offline cache; predicted-vs-measured track
|
|
20
|
+
validation (HepG2 ATAC Pearson 0.85) with an honest score-level low-confidence flag; a 3D structural-risk
|
|
21
|
+
axis from contact-map deltas (`pen_stack.wgenome.{providers,chromatin_seq,structure3d}`,
|
|
22
|
+
`pen_stack.validate.seq_vs_measured`).
|
|
23
|
+
- **WS-D - Cargo Polish.** Cargo-sequence silencing-risk scan (`pen_stack.planner.cargo_polish`).
|
|
24
|
+
- **WS-E - Genome-Writing Bench v0.1 + PEN-Agent.** The first writing-side benchmark (`benchmarks/`,
|
|
25
|
+
`bench/run.py`) with deterministic scorers, a leaderboard, and a real LLM-agent baseline; a grounded
|
|
26
|
+
write-planning state machine with a no-fabrication hard gate (`pen_stack.agent.pen_agent`).
|
|
27
|
+
- **WS-F - local recalibration / private-data adaptation.** Gated recalibration / fine-tuning on private
|
|
28
|
+
data, in-container; the adapted model activates only if it beats the released model AND a no-skill
|
|
29
|
+
baseline; the released model is provably unchanged (`pen_stack.adapt`).
|
|
30
|
+
- **WS-G - multiplex + guide QC.** A pairwise translocation-risk screen (`pen_stack.planner.multiplex`,
|
|
31
|
+
surfaced in PEN-Agent) and a bridge-RNA guide ranker (`pen_stack.bridge.guide_qc`).
|
|
32
|
+
- **WS-H - release + dissemination.** README/badges updated for v3.1, `docs/quickstart.md`,
|
|
33
|
+
`docs/positioning.md`, the leaderboard submission guide, the dissemination log, and version 3.1.0.
|
|
34
|
+
|
|
35
|
+
### Changed (honesty)
|
|
36
|
+
- The planning benchmark's `recovery_at_k` ranking is now deterministic (stable sort + tie-breakers).
|
|
37
|
+
- The LLM stack defaults to the local Ollama model on the compute tier with an automatic hosted-Nemotron
|
|
38
|
+
fallback, a cooldown cache, and bounded timeouts (no more multi-minute stalls when a provider is absent).
|
|
39
|
+
|
|
40
|
+
## [3.1.0a0] - 2026-06-04 - v3.1 WS-A: de-circularize the planning benchmark (gate)
|
|
41
|
+
|
|
42
|
+
The v3.1 cycle (publishable contributions + an adopted benchmark) opens with its gate: de-circularizing the
|
|
43
|
+
Phase-3 planning benchmark before anything builds on it.
|
|
44
|
+
|
|
45
|
+
### Changed (honesty)
|
|
46
|
+
- **The Phase-3 "discriminating-stratum recovery@10 = 1.00 vs 0.00 (McNemar p, CI)" is now labeled
|
|
47
|
+
definitional, not predictive,** everywhere (README, manuscript abstract, `prereg/paper3.yaml`,
|
|
48
|
+
`validate/paper3_benchmark.py` docstring). An on-target identity term dominates the score, so the planner
|
|
49
|
+
ranks the goal's own gene first by construction. Documented in `docs/benchmark_circularity.md` (WS-A1).
|
|
50
|
+
- The intent result is reframed as a **specification-compliance correctness table** (`validate/intent_specification.py`,
|
|
51
|
+
7/7), with no recovery/p-value/CI language (WS-A2).
|
|
52
|
+
|
|
53
|
+
### Added (the honest, non-circular replacements)
|
|
54
|
+
- **Blind safe-harbour site discovery (the new headline)**: `validate/blind_gsh_discovery.py` +
|
|
55
|
+
`configs/gsh_validated_heldout.yaml` (5 DOI-validated held-out GSH, gene-anchored to hg38) +
|
|
56
|
+
frozen/SHA-locked `data/gsh_matched_controls.parquet`. Run genome-wide (no on-target term), the planner's
|
|
57
|
+
writability separates validated GSH from matched-context controls at **AUROC 0.92** (safety-only 0.50)
|
|
58
|
+
(WS-A3).
|
|
59
|
+
- **Diversified writer-family recovery**: `validate/writer_recovery.py` + `data/writer_panel.csv` (8 writes,
|
|
60
|
+
4 families, DOIs). recovery@1 = **1.0** vs prevalence 0.25 (smallest-capacity DSB-free writer that fits
|
|
61
|
+
the cargo) (WS-A4).
|
|
62
|
+
- **Within-locus ranking** (descriptive): `validate/within_locus_ranking.py` - AAVS1 documented bin at the
|
|
63
|
+
93rd within-locus percentile (top quartile); CLYBL at the 34th (honest negative) (WS-A5).
|
|
64
|
+
- **Consolidated report** `scripts/p3_benchmark_report.py` -> `out/ws_a_report.md`; `prereg/ws_a.yaml` +
|
|
65
|
+
SHA lock. Gate G-A is met: blind AUROC reported, no circular claims remain (WS-A6).
|
|
66
|
+
|
|
67
|
+
## [Unreleased] - 2026-06-03 - honest reframing, repository polish, coverage, hybrid LLM
|
|
68
|
+
|
|
69
|
+
### Added
|
|
70
|
+
- **Hybrid LLM backend** (`pen_stack/rag/llm.py`, `configs/llm.yaml`): a strong hosted model for
|
|
71
|
+
reasoning/agent/Q&A (NVIDIA Nemotron, OpenAI-compatible, free) with **automatic fallback** to the local
|
|
72
|
+
Ollama model, then to the deterministic no-LLM path. One `provider` switch. The agent and RAG were
|
|
73
|
+
refactored onto a single provider-agnostic `chat()` (NVIDIA tool-call IDs and Ollama native message
|
|
74
|
+
threading both handled). The LLM stays non-load-bearing - every number/citation still comes from
|
|
75
|
+
validated tools - so the model choice does not affect scientific reproducibility; it only improves
|
|
76
|
+
orchestration (Nemotron planned a goal in 2 tool calls vs the local 7B's 8-call loop). Core scientific
|
|
77
|
+
compute stays local/VM and uses no LLM. API keys are read from an env var or a **gitignored** file and
|
|
78
|
+
are never committed.
|
|
79
|
+
|
|
80
|
+
### Changed
|
|
81
|
+
- **Paper 4 reframed to its honest scope.** `pen-bridge` is positioned as the first measured-data-validated
|
|
82
|
+
tool that **nominates and ranks candidate off-target *locations*** for bridge recombinases - a
|
|
83
|
+
**screening tool, not a quantitative safety calculator**. The AUROC 0.77 vs 0.62 result is stated with
|
|
84
|
+
its caveat (favourable negative set; mostly tests core integrity), and the magnitude limitation
|
|
85
|
+
(sequence-risk does not rank recombination amount, rho ~0.30) is named as the single most important
|
|
86
|
+
limitation. Application-Note tier, first-of-its-kind for an unoccupied gap; the Writable Genome remains
|
|
87
|
+
the flagship. Manuscript + `prereg/paper4.yaml` + summaries updated.
|
|
88
|
+
- **Variant-effect reframed:** the DMS recovers KNOWN enhancers (a catalogue feature), it is not a novel
|
|
89
|
+
variant-design method; EVOLVEpro is the engine to wrap when generating new variants.
|
|
90
|
+
- **Repository made clean ASCII:** removed all decorative emojis and em/en dashes and other non-ASCII
|
|
91
|
+
punctuation across code, docs, configs, and manuscripts (box-drawing tree characters kept).
|
|
92
|
+
|
|
93
|
+
### Added
|
|
94
|
+
- 72-system ortholog characterisation (`bridge/ortholog_screen.py`) - explicitly DESCRIPTIVE (Table S1 has
|
|
95
|
+
no activity label): sequence-similarity organisation vs the validated standout ISCro4 (IS621 ranks most
|
|
96
|
+
similar, a sanity check). Exploratory secondary result, N ~72.
|
|
97
|
+
- Coverage: CI runs `pytest --cov`, uploads to Codecov, and publishes a self-hosted coverage badge
|
|
98
|
+
(`tools/make_coverage_badge.py` -> `.github/badges/coverage.svg`). Unit-test coverage of the core logic
|
|
99
|
+
is **69%** (integration-only modules that need GPU/VM/network/LLM are excluded via `[tool.coverage.run]`).
|
|
100
|
+
- Professional, emoji-free README with connected-repo badges (genome-atlas / mech-class / pen-score /
|
|
101
|
+
pen-assemble / pen-compare), an architecture diagram, and the problem/gaps explanation.
|
|
102
|
+
|
|
103
|
+
## [3.0.0a5] - 2026-06-02 - Phase 1.5 (Bridge-recombinase off-target engine -> Paper 4, BEACHHEAD)
|
|
104
|
+
|
|
105
|
+
The first public instrument: a bridge-recombinase off-target screening tool.
|
|
106
|
+
|
|
107
|
+
### Added
|
|
108
|
+
- **Off-target engine** (`pen_stack/bridge/offtarget.py` + `configs/bridge_offtarget_profile.yaml`):
|
|
109
|
+
genome-wide hg38 pseudosite scan (CT-core seed, per-chromosome, memory-bounded) + a position-weight
|
|
110
|
+
risk model grounded in the published mechanism. **Beats naive Hamming: AUROC 1.00 vs 0.59** at
|
|
111
|
+
separating core-preserving (real-risk) from core-disrupting (abolished) sites. Exposes
|
|
112
|
+
`predict_offtargets(family, site)` - completes the Phase-3 Planner cargo hook.
|
|
113
|
+
- **Fold / cross-loop QC** (`bridge/fold_qc.py`): ViennaRNA fold (verified MFE on a 190-nt design) +
|
|
114
|
+
TBL/DBL cross-loop complementarity.
|
|
115
|
+
- **Activity framework** (`bridge/activity.py`): exploratory DMS + 72-system trainer (deferred; data paywalled).
|
|
116
|
+
- **`pen-bridge`** (`bridge/pipeline.py`, `bridge/cli.py`, `/bridge/design` API): **wraps** the Arc
|
|
117
|
+
BridgeRNADesigner (verified) and adds the off-target + QC layer.
|
|
118
|
+
- `validate/paper4_validation.py` + `scripts/p4_genome_scan.py`; `prereg/paper4.yaml` + SHA lock.
|
|
119
|
+
|
|
120
|
+
### Notes
|
|
121
|
+
- **Phase 1.5 COMPLETE** - pre-registered criteria met (or honestly gated): the off-target engine,
|
|
122
|
+
ViennaRNA fold, and designer wrap are verified on the VM (real hg38 scan: chr22 in ~21 s). The *blind
|
|
123
|
+
recall of Perry 2025's measured off-targets* and the DMS/activity model are gated on the paywalled
|
|
124
|
+
Perry 2025 supplementary (drop in via `ingest.load_offtarget_profile`). Completes the deferred Phase-2
|
|
125
|
+
Section 2.4 and Phase-3 Section 3.2 hooks. 68 tests green; ruff clean. **All program phases (0,1,1.5,2,3) now done.**
|
|
126
|
+
|
|
127
|
+
## [3.0.0a4] - 2026-06-02 - Phase 3 (The Write Planner + agentic platform -> Paper 3, CAPSTONE)
|
|
128
|
+
|
|
129
|
+
Inverse design + the paper-defining recovery@k benchmark + the agentic platform.
|
|
130
|
+
|
|
131
|
+
### Added
|
|
132
|
+
- **Inverse-design optimiser** (`pen_stack/planner/optimize.py`, `configs/intent_weights.yaml`): an
|
|
133
|
+
`edit_intent`-conditioned objective whose `target_gene_sign` flips whether hitting the target gene is
|
|
134
|
+
penalised or rewarded - the same TRAC site ranks #1 (knock-in) vs #101 (safe-harbour).
|
|
135
|
+
- **Cargo/delivery** (`planner/cargo.py`, `planner/delivery.py`): donor spec + size check + delivery rule
|
|
136
|
+
table; bridge/seek off-target via an optional Phase-1.5 hook (pending until 1.5).
|
|
137
|
+
- **End-to-end Planner** (`planner/pipeline.py`, `report.py`, `/plan` API, `pen-stack plan` CLI): ranked,
|
|
138
|
+
fully traceable plans with per-field provenance.
|
|
139
|
+
- **Two-stratum recovery@k benchmark** (`validate/paper3_benchmark.py`, `data/benchmark_panel.csv`,
|
|
140
|
+
`prereg/paper3.yaml`): **discriminating stratum planner 1.00 vs baseline 0.00, McNemar p=0.0156, gap CI
|
|
141
|
+
[1.0,1.0] excludes zero; control tie 0.67=0.67**. Panel cited to Europe-PMC-verified sources.
|
|
142
|
+
- **Forward hypotheses** (`validate/forward_hypotheses.py`): date-stamped novel F8/SERPINA1/CISH/HBA1
|
|
143
|
+
proposals + grounded cited ranking.
|
|
144
|
+
- **Agentic platform**: `agent/tools.py` + `agent/orchestrator.py` (Ollama tool-calling, auditable trace,
|
|
145
|
+
no-fabrication, refusals), `agent/mcp_server.py` (fastmcp), `docker-compose.yml` + `docker/ui.Dockerfile`
|
|
146
|
+
+ Streamlit **Agent** page + `docs/DEPLOY.md`/`docs/MCP.md`, `validate/agent_eval.py`.
|
|
147
|
+
- Shipped `data/curated/gene_coords.parquet` (GENCODE-derived) so tools work in any container.
|
|
148
|
+
|
|
149
|
+
### Notes
|
|
150
|
+
- **Phase 3 COMPLETE** - pre-registered criteria met (`prereg/paper3.yaml` + `SHA256_LOCK_phase3.json`).
|
|
151
|
+
Agent verified on the VM in LLM mode (no-fabrication + plan-equivalence + refusals all pass). 63 tests
|
|
152
|
+
green; ruff clean. Wet-lab (3.7) skipped - non-gating. Bridge off-target hook completes with Phase 1.5.
|
|
153
|
+
|
|
154
|
+
## [3.0.0a3] - 2026-06-02 - Phase 2 (Writer Atlas + Unified Stack -> Paper 2)
|
|
155
|
+
|
|
156
|
+
The broad, cross-family Writer Atlas, the writer<->locus cross-link, and the installable platform.
|
|
157
|
+
|
|
158
|
+
### Added
|
|
159
|
+
- **Writer Atlas** (`pen_stack/atlas/expand.py`, `atlas.parquet`): **33,370 systems across 8 families**
|
|
160
|
+
(31,885 IS110/IS1111 orthologs + curated cores/reps), every row confidence-tagged + >=1 source DOI,
|
|
161
|
+
targeting metadata inherited from the WT-KB. `configs/atlas_families.yaml` drives the UniProt queries.
|
|
162
|
+
- **Mechanism at scale** (`pen_stack/mech/`): ported audited 18-family Pfam whitelist v1.2.1; composite
|
|
163
|
+
co-occurrence rules; **core agreement 1.00** vs audited labels; conflicting calls -> review queue.
|
|
164
|
+
- **Therapeutic readiness** (`pen_stack/score/therapeutic.py`): deliverability/cargo/human-cell axes,
|
|
165
|
+
components retained (ISCro4 326aa->AAV).
|
|
166
|
+
- **Cross-link** (`pen_stack/atlas/crosslink.py`): bidirectional writer<->locus queries; AAVS1 held-out
|
|
167
|
+
check passes (0.90 writability + bridge-reachable). Per-family caches for k562/hepg2/hspc.
|
|
168
|
+
- **Variant proposal** (`pen_stack/atlas/variant_propose.py`): point-mutation framework + retrospective
|
|
169
|
+
harness, no chimeras; DMS model pluggable (Phase 1.5).
|
|
170
|
+
- **PEN-MONITOR** (`pen_stack/monitor/`): Europe PMC living-database engine; back-test surfaces ISPpu10;
|
|
171
|
+
never auto-edits the atlas; every candidate cited.
|
|
172
|
+
- **Grounded RAG** (`pen_stack/rag/`, `pen_stack/agent/guardrails.py`): numbers from tool calls, claims
|
|
173
|
+
cited, clinical directives refused; optional Ollama/Qwen phrasing layer (presentation only).
|
|
174
|
+
- **Stack**: unified CLI subcommands, FastAPI server (`pen_stack/server/api.py`), Streamlit platform UI
|
|
175
|
+
(Writer Atlas + Ask pages), mkdocs site + 4 use-case tutorials. 46 tests green; ruff clean.
|
|
176
|
+
|
|
177
|
+
### Notes
|
|
178
|
+
- **Phase 2 COMPLETE** - pre-registered criteria met (`prereg/paper2.yaml` + `SHA256_LOCK_phase2.json`);
|
|
179
|
+
atlas Zenodo DOI pending author upload. Verified on the VM (Docker): API, UI (:8501), RAG with Qwen.
|
|
180
|
+
|
|
181
|
+
## [3.0.0a0] - 2026-06-01 - Phase 0 (in progress)
|
|
182
|
+
|
|
183
|
+
Fresh v3.0 monorepo. Supersedes the v1.0 platform repository (archived); consolidates the five prior
|
|
184
|
+
repositories (`genome-atlas`, `mech-class`, `pen-score`, `pen-assemble`, `pen-compare`) as provenance.
|
|
185
|
+
|
|
186
|
+
### Added
|
|
187
|
+
- Monorepo scaffold: 13 modules (`atlas`, `mech`, `score`, `wgenome`, `planner`, `bridge`, `monitor`,
|
|
188
|
+
`rag`, `agent`, `ui`, `data`, `validate`, `server`), `pyproject.toml`, Docker image spec, `penctl`
|
|
189
|
+
laptop<->VM orchestrator, CI, `configs/`, `prereg/`.
|
|
190
|
+
- `docs/INFRA.md` - three-tier (laptop / VM / Drive) Docker-only, SFTP-only workflow.
|
|
191
|
+
- `configs/llm.yaml` - single LLM switch (Ollama + Qwen2.5-7B-Instruct, Apache-2.0).
|
|
192
|
+
- `configs/datasets.yaml` - pinned dataset accessions + verified IDs (see VERIFICATION_REPORT_v3.0).
|
|
193
|
+
|
|
194
|
+
- **WT-KB** (`pen_stack/atlas/`): 8 fully-sourced writer families with reachability tiers; schema enforces the >=1-DOI sourcing rule.
|
|
195
|
+
- **Re-grounded axes** (`pen_stack/score/recalibrate.py`, `configs/score_axes.yaml`): `S_Cargo` from measured bp, `S_Prog` from targeting modality, `length_aa` backfilled - no per-enzyme overrides.
|
|
196
|
+
- **Canonical universe** (`pen_stack/atlas/universe.py::assemble`): one path joining the 1,058-entity universe + WT-KB + crosswalk; cross-module consistency test.
|
|
197
|
+
- **Descriptive scorecard** (`pen_stack/atlas/scorecard.py`): reframed from the circular certification; blind concordance recovers ISCro4 as the bridge standout without naming it. 21 tests green.
|
|
198
|
+
|
|
199
|
+
### Notes
|
|
200
|
+
- Independent verification of all datasets/IDs/DOIs/tools completed: no critical errors in the v3.0 plan
|
|
201
|
+
(full report in `Final_Part_v3.0/VERIFICATION_REPORT_v3.0.md`).
|
|
202
|
+
- **Phase 0 COMPLETE** - all pre-registered success criteria met (`prereg/phase0.yaml` + SHA lock).
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
message: "If you use PEN-STACK, please cite it as below."
|
|
3
|
+
title: "PEN-STACK: open infrastructure for genome writing"
|
|
4
|
+
version: 3.1.0
|
|
5
|
+
date-released: 2026-06-01
|
|
6
|
+
authors:
|
|
7
|
+
- family-names: "Mahaboob Ali"
|
|
8
|
+
given-names: "Anees Ahmed"
|
|
9
|
+
affiliation: "VIT University, Vellore"
|
|
10
|
+
email: ahmedaneesm@gmail.com
|
|
11
|
+
repository-code: "https://github.com/ahmedanees-m/pen-stack"
|
|
12
|
+
license: MIT
|
|
13
|
+
keywords:
|
|
14
|
+
- genome writing
|
|
15
|
+
- writable genome
|
|
16
|
+
- writer atlas
|
|
17
|
+
- bridge recombinase
|
|
18
|
+
- safe harbor
|
|
19
|
+
- write planner
|
pen_stack-3.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anees Ahmed Mahaboob Ali
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Source-distribution completeness: ship the small committed config/data/spec files and the benchmark so a
|
|
2
|
+
# source build is reproducible. Large artifacts (atlases, BigWig, models) are NOT shipped - they are on
|
|
3
|
+
# Zenodo (DOI) per the data policy; clone the repo + fetch Zenodo for the full pipeline.
|
|
4
|
+
include README.md LICENSE CHANGELOG.md CITATION.cff pyproject.toml
|
|
5
|
+
recursive-include configs *.yaml *.yml *.txt.example
|
|
6
|
+
recursive-include prereg *.yaml *.json
|
|
7
|
+
recursive-include data/curated *
|
|
8
|
+
recursive-include benchmarks *.yaml *.md SHA256SUMS
|
|
9
|
+
recursive-include bench *.py
|
|
10
|
+
recursive-include docs *.md
|
|
11
|
+
graft scripts
|
|
12
|
+
global-exclude *.pyc __pycache__ *.so
|
|
13
|
+
# never ship secrets or large caches
|
|
14
|
+
exclude configs/nvidia_api_key.txt configs/alphagenome_api_key.txt
|
|
15
|
+
prune data/alphagenome_cache
|
|
16
|
+
prune models
|