pen-stack 3.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. pen_stack-3.1.0/CHANGELOG.md +202 -0
  2. pen_stack-3.1.0/CITATION.cff +19 -0
  3. pen_stack-3.1.0/LICENSE +21 -0
  4. pen_stack-3.1.0/MANIFEST.in +16 -0
  5. pen_stack-3.1.0/PKG-INFO +451 -0
  6. pen_stack-3.1.0/README.md +378 -0
  7. pen_stack-3.1.0/bench/run.py +151 -0
  8. pen_stack-3.1.0/benchmarks/genome_writing_bench/LEADERBOARD.md +22 -0
  9. pen_stack-3.1.0/benchmarks/genome_writing_bench/README.md +75 -0
  10. pen_stack-3.1.0/benchmarks/genome_writing_bench/SHA256SUMS +4 -0
  11. pen_stack-3.1.0/benchmarks/genome_writing_bench/SUBMISSIONS.md +53 -0
  12. pen_stack-3.1.0/benchmarks/genome_writing_bench/tasks.yaml +81 -0
  13. pen_stack-3.1.0/configs/atlas_families.yaml +81 -0
  14. pen_stack-3.1.0/configs/bridge_offtarget_profile.yaml +51 -0
  15. pen_stack-3.1.0/configs/cargo_polish.yaml +45 -0
  16. pen_stack-3.1.0/configs/datasets.yaml +99 -0
  17. pen_stack-3.1.0/configs/delivery_rules.yaml +15 -0
  18. pen_stack-3.1.0/configs/gates_v3.yaml +208 -0
  19. pen_stack-3.1.0/configs/gsh_validated_heldout.yaml +39 -0
  20. pen_stack-3.1.0/configs/intent_weights.yaml +42 -0
  21. pen_stack-3.1.0/configs/llm.yaml +59 -0
  22. pen_stack-3.1.0/configs/monitor_queries.yaml +33 -0
  23. pen_stack-3.1.0/configs/score_axes.yaml +68 -0
  24. pen_stack-3.1.0/configs/universe_crosswalk.yaml +36 -0
  25. pen_stack-3.1.0/configs/wtkb_curated.yaml +165 -0
  26. pen_stack-3.1.0/data/curated/bridge_offtarget_profile_measured.parquet +0 -0
  27. pen_stack-3.1.0/data/curated/gene_coords.parquet +0 -0
  28. pen_stack-3.1.0/data/curated/unified_editor_universe.parquet +0 -0
  29. pen_stack-3.1.0/docs/DEPLOY.md +39 -0
  30. pen_stack-3.1.0/docs/INFRA.md +64 -0
  31. pen_stack-3.1.0/docs/MCP.md +24 -0
  32. pen_stack-3.1.0/docs/RELEASING.md +58 -0
  33. pen_stack-3.1.0/docs/REPRO.md +46 -0
  34. pen_stack-3.1.0/docs/agent.md +78 -0
  35. pen_stack-3.1.0/docs/alphagenome_feasibility.md +49 -0
  36. pen_stack-3.1.0/docs/benchmark_circularity.md +64 -0
  37. pen_stack-3.1.0/docs/cards/atlas.md +41 -0
  38. pen_stack-3.1.0/docs/cards/durability.md +43 -0
  39. pen_stack-3.1.0/docs/cards/safety.md +39 -0
  40. pen_stack-3.1.0/docs/dissemination.md +34 -0
  41. pen_stack-3.1.0/docs/index.md +47 -0
  42. pen_stack-3.1.0/docs/positioning.md +55 -0
  43. pen_stack-3.1.0/docs/private_data_formats.md +61 -0
  44. pen_stack-3.1.0/docs/quickstart.md +64 -0
  45. pen_stack-3.1.0/docs/scorecard.md +54 -0
  46. pen_stack-3.1.0/docs/tutorials/compare-families.md +43 -0
  47. pen_stack-3.1.0/docs/tutorials/score-deliverability.md +36 -0
  48. pen_stack-3.1.0/docs/tutorials/where-can-i-write.md +43 -0
  49. pen_stack-3.1.0/docs/tutorials/which-writer-reaches-locus.md +41 -0
  50. pen_stack-3.1.0/docs/wtkb.md +25 -0
  51. pen_stack-3.1.0/pen_stack/__init__.py +2 -0
  52. pen_stack-3.1.0/pen_stack/_resources.py +34 -0
  53. pen_stack-3.1.0/pen_stack/adapt/__init__.py +14 -0
  54. pen_stack-3.1.0/pen_stack/adapt/finetune.py +33 -0
  55. pen_stack-3.1.0/pen_stack/adapt/ingest.py +86 -0
  56. pen_stack-3.1.0/pen_stack/adapt/pipeline.py +101 -0
  57. pen_stack-3.1.0/pen_stack/adapt/recalibrate.py +58 -0
  58. pen_stack-3.1.0/pen_stack/adapt/report.py +130 -0
  59. pen_stack-3.1.0/pen_stack/agent/__init__.py +1 -0
  60. pen_stack-3.1.0/pen_stack/agent/guardrails.py +49 -0
  61. pen_stack-3.1.0/pen_stack/agent/mcp_server.py +42 -0
  62. pen_stack-3.1.0/pen_stack/agent/orchestrator.py +106 -0
  63. pen_stack-3.1.0/pen_stack/agent/pen_agent.py +169 -0
  64. pen_stack-3.1.0/pen_stack/agent/tools.py +130 -0
  65. pen_stack-3.1.0/pen_stack/atlas/__init__.py +1 -0
  66. pen_stack-3.1.0/pen_stack/atlas/build_wtkb.py +80 -0
  67. pen_stack-3.1.0/pen_stack/atlas/crosslink.py +144 -0
  68. pen_stack-3.1.0/pen_stack/atlas/expand.py +190 -0
  69. pen_stack-3.1.0/pen_stack/atlas/schema.py +59 -0
  70. pen_stack-3.1.0/pen_stack/atlas/scorecard.py +134 -0
  71. pen_stack-3.1.0/pen_stack/atlas/universe.py +75 -0
  72. pen_stack-3.1.0/pen_stack/atlas/variant_propose.py +155 -0
  73. pen_stack-3.1.0/pen_stack/bridge/__init__.py +1 -0
  74. pen_stack-3.1.0/pen_stack/bridge/activity.py +52 -0
  75. pen_stack-3.1.0/pen_stack/bridge/cli.py +65 -0
  76. pen_stack-3.1.0/pen_stack/bridge/fold_qc.py +53 -0
  77. pen_stack-3.1.0/pen_stack/bridge/guide_qc.py +84 -0
  78. pen_stack-3.1.0/pen_stack/bridge/ingest.py +139 -0
  79. pen_stack-3.1.0/pen_stack/bridge/offtarget.py +133 -0
  80. pen_stack-3.1.0/pen_stack/bridge/ortholog_screen.py +73 -0
  81. pen_stack-3.1.0/pen_stack/bridge/pipeline.py +83 -0
  82. pen_stack-3.1.0/pen_stack/cli.py +126 -0
  83. pen_stack-3.1.0/pen_stack/data/__init__.py +1 -0
  84. pen_stack-3.1.0/pen_stack/data/encode.py +84 -0
  85. pen_stack-3.1.0/pen_stack/data/genome.py +71 -0
  86. pen_stack-3.1.0/pen_stack/data/ingest_chromatin.py +119 -0
  87. pen_stack-3.1.0/pen_stack/data/ingest_integration.py +112 -0
  88. pen_stack-3.1.0/pen_stack/data/ingest_safety_annot.py +164 -0
  89. pen_stack-3.1.0/pen_stack/data/ingest_trip.py +76 -0
  90. pen_stack-3.1.0/pen_stack/mech/__init__.py +1 -0
  91. pen_stack-3.1.0/pen_stack/mech/classify_atlas.py +71 -0
  92. pen_stack-3.1.0/pen_stack/mech/whitelist.py +66 -0
  93. pen_stack-3.1.0/pen_stack/monitor/__init__.py +1 -0
  94. pen_stack-3.1.0/pen_stack/monitor/europepmc.py +32 -0
  95. pen_stack-3.1.0/pen_stack/monitor/run.py +57 -0
  96. pen_stack-3.1.0/pen_stack/monitor/triage.py +63 -0
  97. pen_stack-3.1.0/pen_stack/planner/__init__.py +1 -0
  98. pen_stack-3.1.0/pen_stack/planner/cargo.py +56 -0
  99. pen_stack-3.1.0/pen_stack/planner/cargo_polish.py +146 -0
  100. pen_stack-3.1.0/pen_stack/planner/delivery.py +32 -0
  101. pen_stack-3.1.0/pen_stack/planner/multiplex.py +110 -0
  102. pen_stack-3.1.0/pen_stack/planner/optimize.py +156 -0
  103. pen_stack-3.1.0/pen_stack/planner/pipeline.py +86 -0
  104. pen_stack-3.1.0/pen_stack/planner/report.py +26 -0
  105. pen_stack-3.1.0/pen_stack/rag/__init__.py +1 -0
  106. pen_stack-3.1.0/pen_stack/rag/index.py +53 -0
  107. pen_stack-3.1.0/pen_stack/rag/llm.py +178 -0
  108. pen_stack-3.1.0/pen_stack/rag/qa.py +105 -0
  109. pen_stack-3.1.0/pen_stack/score/__init__.py +1 -0
  110. pen_stack-3.1.0/pen_stack/score/recalibrate.py +77 -0
  111. pen_stack-3.1.0/pen_stack/score/therapeutic.py +85 -0
  112. pen_stack-3.1.0/pen_stack/server/__init__.py +1 -0
  113. pen_stack-3.1.0/pen_stack/server/api.py +142 -0
  114. pen_stack-3.1.0/pen_stack/ui/__init__.py +1 -0
  115. pen_stack-3.1.0/pen_stack/ui/app.py +518 -0
  116. pen_stack-3.1.0/pen_stack/validate/__init__.py +1 -0
  117. pen_stack-3.1.0/pen_stack/validate/adapt_demo.py +69 -0
  118. pen_stack-3.1.0/pen_stack/validate/agent_eval.py +117 -0
  119. pen_stack-3.1.0/pen_stack/validate/blind_gsh_discovery.py +165 -0
  120. pen_stack-3.1.0/pen_stack/validate/cargo_directionality.py +57 -0
  121. pen_stack-3.1.0/pen_stack/validate/durability_baselines.py +150 -0
  122. pen_stack-3.1.0/pen_stack/validate/forward_hypotheses.py +104 -0
  123. pen_stack-3.1.0/pen_stack/validate/guide_qc_demo.py +58 -0
  124. pen_stack-3.1.0/pen_stack/validate/intent_specification.py +82 -0
  125. pen_stack-3.1.0/pen_stack/validate/paper3_benchmark.py +165 -0
  126. pen_stack-3.1.0/pen_stack/validate/paper4_real_validation.py +144 -0
  127. pen_stack-3.1.0/pen_stack/validate/paper4_validation.py +82 -0
  128. pen_stack-3.1.0/pen_stack/validate/seq_vs_measured.py +134 -0
  129. pen_stack-3.1.0/pen_stack/validate/within_locus_ranking.py +74 -0
  130. pen_stack-3.1.0/pen_stack/validate/writer_recovery.py +86 -0
  131. pen_stack-3.1.0/pen_stack/wgenome/__init__.py +1 -0
  132. pen_stack-3.1.0/pen_stack/wgenome/chromatin_seq.py +83 -0
  133. pen_stack-3.1.0/pen_stack/wgenome/durability.py +108 -0
  134. pen_stack-3.1.0/pen_stack/wgenome/export_tracks.py +52 -0
  135. pen_stack-3.1.0/pen_stack/wgenome/features.py +82 -0
  136. pen_stack-3.1.0/pen_stack/wgenome/gsh_baseline.py +117 -0
  137. pen_stack-3.1.0/pen_stack/wgenome/providers.py +245 -0
  138. pen_stack-3.1.0/pen_stack/wgenome/safety.py +69 -0
  139. pen_stack-3.1.0/pen_stack/wgenome/structure3d.py +168 -0
  140. pen_stack-3.1.0/pen_stack/wgenome/writability.py +72 -0
  141. pen_stack-3.1.0/pen_stack.egg-info/PKG-INFO +451 -0
  142. pen_stack-3.1.0/pen_stack.egg-info/SOURCES.txt +182 -0
  143. pen_stack-3.1.0/pen_stack.egg-info/dependency_links.txt +1 -0
  144. pen_stack-3.1.0/pen_stack.egg-info/entry_points.txt +3 -0
  145. pen_stack-3.1.0/pen_stack.egg-info/requires.txt +54 -0
  146. pen_stack-3.1.0/pen_stack.egg-info/top_level.txt +1 -0
  147. pen_stack-3.1.0/prereg/SHA256_LOCK_phase0.json +9 -0
  148. pen_stack-3.1.0/prereg/SHA256_LOCK_phase1_5.json +11 -0
  149. pen_stack-3.1.0/prereg/SHA256_LOCK_phase2.json +12 -0
  150. pen_stack-3.1.0/prereg/SHA256_LOCK_phase3.json +11 -0
  151. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_a.json +11 -0
  152. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_b.json +11 -0
  153. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_c.json +9 -0
  154. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_d.json +9 -0
  155. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_e.json +9 -0
  156. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_f.json +8 -0
  157. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_g.json +8 -0
  158. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_h.json +8 -0
  159. pen_stack-3.1.0/prereg/paper1.yaml +60 -0
  160. pen_stack-3.1.0/prereg/paper2.yaml +74 -0
  161. pen_stack-3.1.0/prereg/paper3.yaml +63 -0
  162. pen_stack-3.1.0/prereg/paper4.yaml +71 -0
  163. pen_stack-3.1.0/prereg/phase0.yaml +28 -0
  164. pen_stack-3.1.0/prereg/ws_a.yaml +52 -0
  165. pen_stack-3.1.0/prereg/ws_b.yaml +50 -0
  166. pen_stack-3.1.0/prereg/ws_c.yaml +47 -0
  167. pen_stack-3.1.0/prereg/ws_d.yaml +29 -0
  168. pen_stack-3.1.0/prereg/ws_e.yaml +36 -0
  169. pen_stack-3.1.0/prereg/ws_f.yaml +38 -0
  170. pen_stack-3.1.0/prereg/ws_g.yaml +33 -0
  171. pen_stack-3.1.0/prereg/ws_h.yaml +32 -0
  172. pen_stack-3.1.0/pyproject.toml +108 -0
  173. pen_stack-3.1.0/scripts/p1_build_atlas.py +87 -0
  174. pen_stack-3.1.0/scripts/p1_build_durability.py +61 -0
  175. pen_stack-3.1.0/scripts/p1_export_tracks.py +22 -0
  176. pen_stack-3.1.0/scripts/p1_safety_concordance.py +82 -0
  177. pen_stack-3.1.0/scripts/p1_train_safety.py +48 -0
  178. pen_stack-3.1.0/scripts/p1_validation_report.py +80 -0
  179. pen_stack-3.1.0/scripts/p2_build_atlas.py +36 -0
  180. pen_stack-3.1.0/scripts/p3_benchmark_report.py +78 -0
  181. pen_stack-3.1.0/scripts/p4_genome_scan.py +43 -0
  182. pen_stack-3.1.0/scripts/ws_b_report.py +91 -0
  183. pen_stack-3.1.0/scripts/ws_c_report.py +75 -0
  184. pen_stack-3.1.0/setup.cfg +4 -0
@@ -0,0 +1,202 @@
1
+ # Changelog
2
+
3
+ All notable changes to PEN-STACK are documented here. This file follows
4
+ [Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
5
+
6
+ ## [3.1.0] - 2026-06-04 - v3.1 release: publishable contributions + an adopted benchmark
7
+
8
+ The v3.1 cycle completes (workstreams A-H). It hardens the honesty of the planning benchmark, surrounds the
9
+ models with strong baselines, adds a predicted-structure safety axis, and ships the first benchmark and
10
+ grounded agent for the genome-writing side. Every workstream is pre-registered (`prereg/ws_*.yaml`,
11
+ SHA-locked) and reports its honest negatives.
12
+
13
+ ### Added
14
+ - **WS-B - strong baselines + safety primary-metric switch.** Endogenous-expression baseline (TRIP-trained
15
+ Spearman 0.51 vs AlphaGenome ES-Bruce4 proxy 0.43), multi-mark ablation (all-marks >= best single), and a
16
+ published GSH rule-set: safe-harbour discrimination (learned 0.92, 95% CI [0.82, 0.98] vs distance-rule
17
+ 0.38, delta CI excludes zero) is now the primary safety metric; the circular `genotoxic_cis` AUROC is a
18
+ labeled diagnostic. (`pen_stack.wgenome.gsh_baseline`, `pen_stack.validate.durability_baselines`.)
19
+ - **WS-C - AlphaGenome integration.** Hosted-API provider with an offline cache; predicted-vs-measured track
20
+ validation (HepG2 ATAC Pearson 0.85) with an honest score-level low-confidence flag; a 3D structural-risk
21
+ axis from contact-map deltas (`pen_stack.wgenome.{providers,chromatin_seq,structure3d}`,
22
+ `pen_stack.validate.seq_vs_measured`).
23
+ - **WS-D - Cargo Polish.** Cargo-sequence silencing-risk scan (`pen_stack.planner.cargo_polish`).
24
+ - **WS-E - Genome-Writing Bench v0.1 + PEN-Agent.** The first writing-side benchmark (`benchmarks/`,
25
+ `bench/run.py`) with deterministic scorers, a leaderboard, and a real LLM-agent baseline; a grounded
26
+ write-planning state machine with a no-fabrication hard gate (`pen_stack.agent.pen_agent`).
27
+ - **WS-F - local recalibration / private-data adaptation.** Gated recalibration / fine-tuning on private
28
+ data, in-container; the adapted model activates only if it beats the released model AND a no-skill
29
+ baseline; the released model is provably unchanged (`pen_stack.adapt`).
30
+ - **WS-G - multiplex + guide QC.** A pairwise translocation-risk screen (`pen_stack.planner.multiplex`,
31
+ surfaced in PEN-Agent) and a bridge-RNA guide ranker (`pen_stack.bridge.guide_qc`).
32
+ - **WS-H - release + dissemination.** README/badges updated for v3.1, `docs/quickstart.md`,
33
+ `docs/positioning.md`, the leaderboard submission guide, the dissemination log, and version 3.1.0.
34
+
35
+ ### Changed (honesty)
36
+ - The planning benchmark's `recovery_at_k` ranking is now deterministic (stable sort + tie-breakers).
37
+ - The LLM stack defaults to the local Ollama model on the compute tier with an automatic hosted-Nemotron
38
+ fallback, a cooldown cache, and bounded timeouts (no more multi-minute stalls when a provider is absent).
39
+
40
+ ## [3.1.0a0] - 2026-06-04 - v3.1 WS-A: de-circularize the planning benchmark (gate)
41
+
42
+ The v3.1 cycle (publishable contributions + an adopted benchmark) opens with its gate: de-circularizing the
43
+ Phase-3 planning benchmark before anything builds on it.
44
+
45
+ ### Changed (honesty)
46
+ - **The Phase-3 "discriminating-stratum recovery@10 = 1.00 vs 0.00 (McNemar p, CI)" is now labeled
47
+ definitional, not predictive,** everywhere (README, manuscript abstract, `prereg/paper3.yaml`,
48
+ `validate/paper3_benchmark.py` docstring). An on-target identity term dominates the score, so the planner
49
+ ranks the goal's own gene first by construction. Documented in `docs/benchmark_circularity.md` (WS-A1).
50
+ - The intent result is reframed as a **specification-compliance correctness table** (`validate/intent_specification.py`,
51
+ 7/7), with no recovery/p-value/CI language (WS-A2).
52
+
53
+ ### Added (the honest, non-circular replacements)
54
+ - **Blind safe-harbour site discovery (the new headline)**: `validate/blind_gsh_discovery.py` +
55
+ `configs/gsh_validated_heldout.yaml` (5 DOI-validated held-out GSH, gene-anchored to hg38) +
56
+ frozen/SHA-locked `data/gsh_matched_controls.parquet`. Run genome-wide (no on-target term), the planner's
57
+ writability separates validated GSH from matched-context controls at **AUROC 0.92** (safety-only 0.50)
58
+ (WS-A3).
59
+ - **Diversified writer-family recovery**: `validate/writer_recovery.py` + `data/writer_panel.csv` (8 writes,
60
+ 4 families, DOIs). recovery@1 = **1.0** vs prevalence 0.25 (smallest-capacity DSB-free writer that fits
61
+ the cargo) (WS-A4).
62
+ - **Within-locus ranking** (descriptive): `validate/within_locus_ranking.py` - AAVS1 documented bin at the
63
+ 93rd within-locus percentile (top quartile); CLYBL at the 34th (honest negative) (WS-A5).
64
+ - **Consolidated report** `scripts/p3_benchmark_report.py` -> `out/ws_a_report.md`; `prereg/ws_a.yaml` +
65
+ SHA lock. Gate G-A is met: blind AUROC reported, no circular claims remain (WS-A6).
66
+
67
+ ## [Unreleased] - 2026-06-03 - honest reframing, repository polish, coverage, hybrid LLM
68
+
69
+ ### Added
70
+ - **Hybrid LLM backend** (`pen_stack/rag/llm.py`, `configs/llm.yaml`): a strong hosted model for
71
+ reasoning/agent/Q&A (NVIDIA Nemotron, OpenAI-compatible, free) with **automatic fallback** to the local
72
+ Ollama model, then to the deterministic no-LLM path. One `provider` switch. The agent and RAG were
73
+ refactored onto a single provider-agnostic `chat()` (NVIDIA tool-call IDs and Ollama native message
74
+ threading both handled). The LLM stays non-load-bearing - every number/citation still comes from
75
+ validated tools - so the model choice does not affect scientific reproducibility; it only improves
76
+ orchestration (Nemotron planned a goal in 2 tool calls vs the local 7B's 8-call loop). Core scientific
77
+ compute stays local/VM and uses no LLM. API keys are read from an env var or a **gitignored** file and
78
+ are never committed.
79
+
80
+ ### Changed
81
+ - **Paper 4 reframed to its honest scope.** `pen-bridge` is positioned as the first measured-data-validated
82
+ tool that **nominates and ranks candidate off-target *locations*** for bridge recombinases - a
83
+ **screening tool, not a quantitative safety calculator**. The AUROC 0.77 vs 0.62 result is stated with
84
+ its caveat (favourable negative set; mostly tests core integrity), and the magnitude limitation
85
+ (sequence-risk does not rank recombination amount, rho ~0.30) is named as the single most important
86
+ limitation. Application-Note tier, first-of-its-kind for an unoccupied gap; the Writable Genome remains
87
+ the flagship. Manuscript + `prereg/paper4.yaml` + summaries updated.
88
+ - **Variant-effect reframed:** the DMS recovers KNOWN enhancers (a catalogue feature), it is not a novel
89
+ variant-design method; EVOLVEpro is the engine to wrap when generating new variants.
90
+ - **Repository made clean ASCII:** removed all decorative emojis and em/en dashes and other non-ASCII
91
+ punctuation across code, docs, configs, and manuscripts (box-drawing tree characters kept).
92
+
93
+ ### Added
94
+ - 72-system ortholog characterisation (`bridge/ortholog_screen.py`) - explicitly DESCRIPTIVE (Table S1 has
95
+ no activity label): sequence-similarity organisation vs the validated standout ISCro4 (IS621 ranks most
96
+ similar, a sanity check). Exploratory secondary result, N ~72.
97
+ - Coverage: CI runs `pytest --cov`, uploads to Codecov, and publishes a self-hosted coverage badge
98
+ (`tools/make_coverage_badge.py` -> `.github/badges/coverage.svg`). Unit-test coverage of the core logic
99
+ is **69%** (integration-only modules that need GPU/VM/network/LLM are excluded via `[tool.coverage.run]`).
100
+ - Professional, emoji-free README with connected-repo badges (genome-atlas / mech-class / pen-score /
101
+ pen-assemble / pen-compare), an architecture diagram, and the problem/gaps explanation.
102
+
103
+ ## [3.0.0a5] - 2026-06-02 - Phase 1.5 (Bridge-recombinase off-target engine -> Paper 4, BEACHHEAD)
104
+
105
+ The first public instrument: a bridge-recombinase off-target screening tool.
106
+
107
+ ### Added
108
+ - **Off-target engine** (`pen_stack/bridge/offtarget.py` + `configs/bridge_offtarget_profile.yaml`):
109
+ genome-wide hg38 pseudosite scan (CT-core seed, per-chromosome, memory-bounded) + a position-weight
110
+ risk model grounded in the published mechanism. **Beats naive Hamming: AUROC 1.00 vs 0.59** at
111
+ separating core-preserving (real-risk) from core-disrupting (abolished) sites. Exposes
112
+ `predict_offtargets(family, site)` - completes the Phase-3 Planner cargo hook.
113
+ - **Fold / cross-loop QC** (`bridge/fold_qc.py`): ViennaRNA fold (verified MFE on a 190-nt design) +
114
+ TBL/DBL cross-loop complementarity.
115
+ - **Activity framework** (`bridge/activity.py`): exploratory DMS + 72-system trainer (deferred; data paywalled).
116
+ - **`pen-bridge`** (`bridge/pipeline.py`, `bridge/cli.py`, `/bridge/design` API): **wraps** the Arc
117
+ BridgeRNADesigner (verified) and adds the off-target + QC layer.
118
+ - `validate/paper4_validation.py` + `scripts/p4_genome_scan.py`; `prereg/paper4.yaml` + SHA lock.
119
+
120
+ ### Notes
121
+ - **Phase 1.5 COMPLETE** - pre-registered criteria met (or honestly gated): the off-target engine,
122
+ ViennaRNA fold, and designer wrap are verified on the VM (real hg38 scan: chr22 in ~21 s). The *blind
123
+ recall of Perry 2025's measured off-targets* and the DMS/activity model are gated on the paywalled
124
+ Perry 2025 supplementary (drop in via `ingest.load_offtarget_profile`). Completes the deferred Phase-2
125
+ Section 2.4 and Phase-3 Section 3.2 hooks. 68 tests green; ruff clean. **All program phases (0,1,1.5,2,3) now done.**
126
+
127
+ ## [3.0.0a4] - 2026-06-02 - Phase 3 (The Write Planner + agentic platform -> Paper 3, CAPSTONE)
128
+
129
+ Inverse design + the paper-defining recovery@k benchmark + the agentic platform.
130
+
131
+ ### Added
132
+ - **Inverse-design optimiser** (`pen_stack/planner/optimize.py`, `configs/intent_weights.yaml`): an
133
+ `edit_intent`-conditioned objective whose `target_gene_sign` flips whether hitting the target gene is
134
+ penalised or rewarded - the same TRAC site ranks #1 (knock-in) vs #101 (safe-harbour).
135
+ - **Cargo/delivery** (`planner/cargo.py`, `planner/delivery.py`): donor spec + size check + delivery rule
136
+ table; bridge/seek off-target via an optional Phase-1.5 hook (pending until 1.5).
137
+ - **End-to-end Planner** (`planner/pipeline.py`, `report.py`, `/plan` API, `pen-stack plan` CLI): ranked,
138
+ fully traceable plans with per-field provenance.
139
+ - **Two-stratum recovery@k benchmark** (`validate/paper3_benchmark.py`, `data/benchmark_panel.csv`,
140
+ `prereg/paper3.yaml`): **discriminating stratum planner 1.00 vs baseline 0.00, McNemar p=0.0156, gap CI
141
+ [1.0,1.0] excludes zero; control tie 0.67=0.67**. Panel cited to Europe-PMC-verified sources.
142
+ - **Forward hypotheses** (`validate/forward_hypotheses.py`): date-stamped novel F8/SERPINA1/CISH/HBA1
143
+ proposals + grounded cited ranking.
144
+ - **Agentic platform**: `agent/tools.py` + `agent/orchestrator.py` (Ollama tool-calling, auditable trace,
145
+ no-fabrication, refusals), `agent/mcp_server.py` (fastmcp), `docker-compose.yml` + `docker/ui.Dockerfile`
146
+ + Streamlit **Agent** page + `docs/DEPLOY.md`/`docs/MCP.md`, `validate/agent_eval.py`.
147
+ - Shipped `data/curated/gene_coords.parquet` (GENCODE-derived) so tools work in any container.
148
+
149
+ ### Notes
150
+ - **Phase 3 COMPLETE** - pre-registered criteria met (`prereg/paper3.yaml` + `SHA256_LOCK_phase3.json`).
151
+ Agent verified on the VM in LLM mode (no-fabrication + plan-equivalence + refusals all pass). 63 tests
152
+ green; ruff clean. Wet-lab (3.7) skipped - non-gating. Bridge off-target hook completes with Phase 1.5.
153
+
154
+ ## [3.0.0a3] - 2026-06-02 - Phase 2 (Writer Atlas + Unified Stack -> Paper 2)
155
+
156
+ The broad, cross-family Writer Atlas, the writer<->locus cross-link, and the installable platform.
157
+
158
+ ### Added
159
+ - **Writer Atlas** (`pen_stack/atlas/expand.py`, `atlas.parquet`): **33,370 systems across 8 families**
160
+ (31,885 IS110/IS1111 orthologs + curated cores/reps), every row confidence-tagged + >=1 source DOI,
161
+ targeting metadata inherited from the WT-KB. `configs/atlas_families.yaml` drives the UniProt queries.
162
+ - **Mechanism at scale** (`pen_stack/mech/`): ported audited 18-family Pfam whitelist v1.2.1; composite
163
+ co-occurrence rules; **core agreement 1.00** vs audited labels; conflicting calls -> review queue.
164
+ - **Therapeutic readiness** (`pen_stack/score/therapeutic.py`): deliverability/cargo/human-cell axes,
165
+ components retained (ISCro4 326aa->AAV).
166
+ - **Cross-link** (`pen_stack/atlas/crosslink.py`): bidirectional writer<->locus queries; AAVS1 held-out
167
+ check passes (0.90 writability + bridge-reachable). Per-family caches for k562/hepg2/hspc.
168
+ - **Variant proposal** (`pen_stack/atlas/variant_propose.py`): point-mutation framework + retrospective
169
+ harness, no chimeras; DMS model pluggable (Phase 1.5).
170
+ - **PEN-MONITOR** (`pen_stack/monitor/`): Europe PMC living-database engine; back-test surfaces ISPpu10;
171
+ never auto-edits the atlas; every candidate cited.
172
+ - **Grounded RAG** (`pen_stack/rag/`, `pen_stack/agent/guardrails.py`): numbers from tool calls, claims
173
+ cited, clinical directives refused; optional Ollama/Qwen phrasing layer (presentation only).
174
+ - **Stack**: unified CLI subcommands, FastAPI server (`pen_stack/server/api.py`), Streamlit platform UI
175
+ (Writer Atlas + Ask pages), mkdocs site + 4 use-case tutorials. 46 tests green; ruff clean.
176
+
177
+ ### Notes
178
+ - **Phase 2 COMPLETE** - pre-registered criteria met (`prereg/paper2.yaml` + `SHA256_LOCK_phase2.json`);
179
+ atlas Zenodo DOI pending author upload. Verified on the VM (Docker): API, UI (:8501), RAG with Qwen.
180
+
181
+ ## [3.0.0a0] - 2026-06-01 - Phase 0 (in progress)
182
+
183
+ Fresh v3.0 monorepo. Supersedes the v1.0 platform repository (archived); consolidates the five prior
184
+ repositories (`genome-atlas`, `mech-class`, `pen-score`, `pen-assemble`, `pen-compare`) as provenance.
185
+
186
+ ### Added
187
+ - Monorepo scaffold: 13 modules (`atlas`, `mech`, `score`, `wgenome`, `planner`, `bridge`, `monitor`,
188
+ `rag`, `agent`, `ui`, `data`, `validate`, `server`), `pyproject.toml`, Docker image spec, `penctl`
189
+ laptop<->VM orchestrator, CI, `configs/`, `prereg/`.
190
+ - `docs/INFRA.md` - three-tier (laptop / VM / Drive) Docker-only, SFTP-only workflow.
191
+ - `configs/llm.yaml` - single LLM switch (Ollama + Qwen2.5-7B-Instruct, Apache-2.0).
192
+ - `configs/datasets.yaml` - pinned dataset accessions + verified IDs (see VERIFICATION_REPORT_v3.0).
193
+
194
+ - **WT-KB** (`pen_stack/atlas/`): 8 fully-sourced writer families with reachability tiers; schema enforces the >=1-DOI sourcing rule.
195
+ - **Re-grounded axes** (`pen_stack/score/recalibrate.py`, `configs/score_axes.yaml`): `S_Cargo` from measured bp, `S_Prog` from targeting modality, `length_aa` backfilled - no per-enzyme overrides.
196
+ - **Canonical universe** (`pen_stack/atlas/universe.py::assemble`): one path joining the 1,058-entity universe + WT-KB + crosswalk; cross-module consistency test.
197
+ - **Descriptive scorecard** (`pen_stack/atlas/scorecard.py`): reframed from the circular certification; blind concordance recovers ISCro4 as the bridge standout without naming it. 21 tests green.
198
+
199
+ ### Notes
200
+ - Independent verification of all datasets/IDs/DOIs/tools completed: no critical errors in the v3.0 plan
201
+ (full report in `Final_Part_v3.0/VERIFICATION_REPORT_v3.0.md`).
202
+ - **Phase 0 COMPLETE** - all pre-registered success criteria met (`prereg/phase0.yaml` + SHA lock).
@@ -0,0 +1,19 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use PEN-STACK, please cite it as below."
3
+ title: "PEN-STACK: open infrastructure for genome writing"
4
+ version: 3.1.0
5
+ date-released: 2026-06-01
6
+ authors:
7
+ - family-names: "Mahaboob Ali"
8
+ given-names: "Anees Ahmed"
9
+ affiliation: "VIT University, Vellore"
10
+ email: ahmedaneesm@gmail.com
11
+ repository-code: "https://github.com/ahmedanees-m/pen-stack"
12
+ license: MIT
13
+ keywords:
14
+ - genome writing
15
+ - writable genome
16
+ - writer atlas
17
+ - bridge recombinase
18
+ - safe harbor
19
+ - write planner
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Anees Ahmed Mahaboob Ali
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,16 @@
1
+ # Source-distribution completeness: ship the small committed config/data/spec files and the benchmark so a
2
+ # source build is reproducible. Large artifacts (atlases, BigWig, models) are NOT shipped - they are on
3
+ # Zenodo (DOI) per the data policy; clone the repo + fetch Zenodo for the full pipeline.
4
+ include README.md LICENSE CHANGELOG.md CITATION.cff pyproject.toml
5
+ recursive-include configs *.yaml *.yml *.txt.example
6
+ recursive-include prereg *.yaml *.json
7
+ recursive-include data/curated *
8
+ recursive-include benchmarks *.yaml *.md SHA256SUMS
9
+ recursive-include bench *.py
10
+ recursive-include docs *.md
11
+ graft scripts
12
+ global-exclude *.pyc __pycache__ *.so
13
+ # never ship secrets or large caches
14
+ exclude configs/nvidia_api_key.txt configs/alphagenome_api_key.txt
15
+ prune data/alphagenome_cache
16
+ prune models