pen-stack 3.1.0__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. {pen_stack-3.1.0 → pen_stack-3.3.0}/CHANGELOG.md +266 -202
  2. {pen_stack-3.1.0 → pen_stack-3.3.0}/CITATION.cff +19 -19
  3. {pen_stack-3.1.0 → pen_stack-3.3.0}/PKG-INFO +529 -451
  4. {pen_stack-3.1.0 → pen_stack-3.3.0}/README.md +454 -378
  5. {pen_stack-3.1.0 → pen_stack-3.3.0}/bench/run.py +14 -4
  6. pen_stack-3.3.0/benchmarks/genome_writing_bench/LEADERBOARD.md +52 -0
  7. {pen_stack-3.1.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/README.md +17 -5
  8. pen_stack-3.3.0/benchmarks/genome_writing_bench/SHA256SUMS +4 -0
  9. {pen_stack-3.1.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/SUBMISSIONS.md +53 -53
  10. pen_stack-3.3.0/benchmarks/genome_writing_bench/tasks.yaml +160 -0
  11. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/atlas_families.yaml +81 -81
  12. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/bridge_offtarget_profile.yaml +51 -51
  13. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/datasets.yaml +99 -99
  14. pen_stack-3.3.0/configs/delivery_constraints.yaml +50 -0
  15. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/delivery_rules.yaml +15 -15
  16. pen_stack-3.3.0/configs/delivery_vehicles.yaml +105 -0
  17. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/gates_v3.yaml +208 -208
  18. pen_stack-3.3.0/configs/gsh_validated_heldout.yaml +87 -0
  19. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/intent_weights.yaml +42 -42
  20. pen_stack-3.3.0/configs/known_unknowns.yaml +67 -0
  21. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/monitor_queries.yaml +33 -33
  22. pen_stack-3.3.0/configs/rules/delivery.yaml +40 -0
  23. pen_stack-3.3.0/configs/rules/fold.yaml +15 -0
  24. pen_stack-3.3.0/configs/rules/multiplex.yaml +13 -0
  25. pen_stack-3.3.0/configs/rules/payload.yaml +22 -0
  26. pen_stack-3.3.0/configs/rules/reachability.yaml +16 -0
  27. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/score_axes.yaml +68 -68
  28. pen_stack-3.3.0/configs/target_sites.yaml +66 -0
  29. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/universe_crosswalk.yaml +36 -36
  30. pen_stack-3.3.0/configs/write_types.yaml +57 -0
  31. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/wtkb_curated.yaml +165 -165
  32. pen_stack-3.3.0/data/curated/bridge_offtarget_energetics.json +202 -0
  33. pen_stack-3.3.0/docs/BACKLOG.md +20 -0
  34. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/DEPLOY.md +39 -39
  35. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/INFRA.md +64 -64
  36. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/MCP.md +24 -24
  37. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/RELEASING.md +18 -16
  38. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/REPRO.md +46 -46
  39. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/cards/atlas.md +41 -41
  40. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/cards/durability.md +43 -43
  41. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/cards/safety.md +39 -39
  42. pen_stack-3.3.0/docs/delivery.md +22 -0
  43. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/dissemination.md +34 -34
  44. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/index.md +47 -47
  45. pen_stack-3.3.0/docs/mechanistic_constraints.md +59 -0
  46. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/positioning.md +55 -55
  47. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/private_data_formats.md +61 -61
  48. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/quickstart.md +64 -64
  49. pen_stack-3.3.0/docs/rules.md +19 -0
  50. pen_stack-3.3.0/docs/scope.md +50 -0
  51. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/scorecard.md +54 -54
  52. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/tutorials/score-deliverability.md +36 -36
  53. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/tutorials/where-can-i-write.md +43 -43
  54. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/tutorials/which-writer-reaches-locus.md +41 -41
  55. pen_stack-3.3.0/docs/uncertainty.md +84 -0
  56. pen_stack-3.3.0/docs/verify.md +60 -0
  57. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/wtkb.md +25 -25
  58. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/__init__.py +2 -2
  59. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/__init__.py +14 -14
  60. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/finetune.py +33 -33
  61. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/ingest.py +86 -86
  62. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/pipeline.py +101 -101
  63. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/recalibrate.py +58 -58
  64. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/adapt/report.py +130 -130
  65. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/__init__.py +1 -1
  66. pen_stack-3.3.0/pen_stack/agent/epistemic.py +100 -0
  67. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/guardrails.py +67 -49
  68. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/mcp_server.py +53 -42
  69. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/orchestrator.py +106 -106
  70. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/pen_agent.py +77 -4
  71. pen_stack-3.3.0/pen_stack/agent/scope.py +60 -0
  72. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/agent/tools.py +130 -130
  73. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/__init__.py +1 -1
  74. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/build_wtkb.py +80 -80
  75. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/crosslink.py +144 -144
  76. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/expand.py +190 -190
  77. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/schema.py +59 -59
  78. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/scorecard.py +134 -134
  79. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/universe.py +75 -75
  80. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/atlas/variant_propose.py +155 -155
  81. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/__init__.py +1 -1
  82. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/activity.py +52 -52
  83. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/cli.py +65 -65
  84. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/fold_qc.py +53 -53
  85. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/guide_qc.py +87 -84
  86. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/ingest.py +139 -139
  87. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/offtarget.py +191 -133
  88. pen_stack-3.3.0/pen_stack/bridge/offtarget_energetics.py +105 -0
  89. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/pipeline.py +83 -83
  90. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/cli.py +126 -126
  91. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/__init__.py +1 -1
  92. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/encode.py +84 -84
  93. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/ingest_integration.py +112 -112
  94. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/ingest_trip.py +76 -76
  95. pen_stack-3.3.0/pen_stack/env/__init__.py +1 -0
  96. pen_stack-3.3.0/pen_stack/env/genome_writing_env.py +192 -0
  97. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/mech/__init__.py +1 -1
  98. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/mech/classify_atlas.py +71 -71
  99. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/mech/whitelist.py +66 -66
  100. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/monitor/__init__.py +1 -1
  101. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/monitor/europepmc.py +32 -32
  102. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/monitor/run.py +57 -57
  103. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/monitor/triage.py +63 -63
  104. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/__init__.py +1 -1
  105. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/cargo.py +56 -56
  106. pen_stack-3.3.0/pen_stack/planner/delivery.py +55 -0
  107. pen_stack-3.3.0/pen_stack/planner/delivery_constraints.py +110 -0
  108. pen_stack-3.3.0/pen_stack/planner/delivery_vehicles.py +37 -0
  109. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/multiplex.py +110 -110
  110. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/optimize.py +240 -156
  111. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/pipeline.py +86 -86
  112. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/report.py +26 -26
  113. pen_stack-3.3.0/pen_stack/planner/router.py +57 -0
  114. pen_stack-3.3.0/pen_stack/planner/target_site.py +118 -0
  115. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/rag/__init__.py +1 -1
  116. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/rag/llm.py +178 -178
  117. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/rag/qa.py +105 -105
  118. pen_stack-3.3.0/pen_stack/rules/__init__.py +9 -0
  119. pen_stack-3.3.0/pen_stack/rules/evaluators.py +192 -0
  120. pen_stack-3.3.0/pen_stack/rules/loader.py +31 -0
  121. pen_stack-3.3.0/pen_stack/rules/schema.py +82 -0
  122. pen_stack-3.3.0/pen_stack/rules/solver.py +43 -0
  123. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/score/__init__.py +1 -1
  124. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/score/recalibrate.py +77 -77
  125. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/score/therapeutic.py +85 -85
  126. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/server/__init__.py +1 -1
  127. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/server/api.py +151 -142
  128. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/ui/__init__.py +1 -1
  129. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/ui/app.py +713 -518
  130. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/__init__.py +1 -1
  131. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/adapt_demo.py +69 -69
  132. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/agent_eval.py +117 -117
  133. pen_stack-3.3.0/pen_stack/validate/bench_rule_tasks.py +84 -0
  134. pen_stack-3.3.0/pen_stack/validate/bench_trust_tasks.py +92 -0
  135. pen_stack-3.3.0/pen_stack/validate/blind_gsh_discovery.py +261 -0
  136. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/durability_baselines.py +36 -1
  137. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/forward_hypotheses.py +104 -104
  138. pen_stack-3.3.0/pen_stack/validate/guide_qc_demo.py +69 -0
  139. pen_stack-3.3.0/pen_stack/validate/offtarget_energetics_eval.py +144 -0
  140. pen_stack-3.3.0/pen_stack/validate/out_of_scope_refusal.py +82 -0
  141. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/paper3_benchmark.py +165 -165
  142. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/paper4_real_validation.py +144 -144
  143. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/paper4_validation.py +82 -82
  144. pen_stack-3.3.0/pen_stack/validate/selective_prediction.py +104 -0
  145. pen_stack-3.3.0/pen_stack/validate/target_site_controls.py +65 -0
  146. pen_stack-3.3.0/pen_stack/validate/uncertainty_eval.py +244 -0
  147. pen_stack-3.3.0/pen_stack/validate/ungrounded_baseline.py +234 -0
  148. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/within_locus_ranking.py +13 -3
  149. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/writer_recovery.py +6 -1
  150. pen_stack-3.3.0/pen_stack/verify/__init__.py +5 -0
  151. pen_stack-3.3.0/pen_stack/verify/schema.py +34 -0
  152. pen_stack-3.3.0/pen_stack/verify/service.py +90 -0
  153. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/__init__.py +1 -1
  154. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/durability.py +108 -108
  155. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/features.py +82 -82
  156. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/gsh_baseline.py +44 -7
  157. pen_stack-3.3.0/pen_stack/wgenome/ood.py +135 -0
  158. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/safety.py +69 -69
  159. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/structure3d.py +54 -10
  160. pen_stack-3.3.0/pen_stack/wgenome/uncertainty.py +260 -0
  161. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/PKG-INFO +529 -451
  162. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/SOURCES.txt +61 -0
  163. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/requires.txt +3 -0
  164. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase0.json +8 -8
  165. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase1_5.json +10 -10
  166. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase2.json +11 -11
  167. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase3.json +10 -10
  168. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_a.json +11 -0
  169. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_b.json +11 -0
  170. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_ba.json +9 -0
  171. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_ba_v33.json +9 -0
  172. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_c.json +9 -9
  173. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_d.json +10 -0
  174. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_e.json +9 -9
  175. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_ep.json +9 -0
  176. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_f.json +8 -8
  177. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_g.json +8 -8
  178. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_h.json +8 -8
  179. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_mc.json +10 -0
  180. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_r.json +14 -0
  181. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_route.json +9 -0
  182. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_uq.json +9 -0
  183. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_v.json +8 -0
  184. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/paper1.yaml +60 -60
  185. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/paper2.yaml +74 -74
  186. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/paper3.yaml +64 -63
  187. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/paper4.yaml +71 -71
  188. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/phase0.yaml +28 -28
  189. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_a.yaml +11 -6
  190. pen_stack-3.3.0/prereg/ws_ba.yaml +45 -0
  191. pen_stack-3.3.0/prereg/ws_ba_v33.yaml +12 -0
  192. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_c.yaml +2 -1
  193. pen_stack-3.3.0/prereg/ws_d.yaml +11 -0
  194. pen_stack-3.3.0/prereg/ws_ep.yaml +40 -0
  195. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_f.yaml +38 -38
  196. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_g.yaml +37 -33
  197. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_h.yaml +32 -32
  198. pen_stack-3.3.0/prereg/ws_mc.yaml +51 -0
  199. pen_stack-3.3.0/prereg/ws_r.yaml +51 -0
  200. pen_stack-3.3.0/prereg/ws_route.yaml +9 -0
  201. pen_stack-3.3.0/prereg/ws_uq.yaml +62 -0
  202. pen_stack-3.3.0/prereg/ws_v.yaml +15 -0
  203. {pen_stack-3.1.0 → pen_stack-3.3.0}/pyproject.toml +111 -108
  204. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_safety_concordance.py +82 -82
  205. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p3_benchmark_report.py +8 -4
  206. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p4_genome_scan.py +43 -43
  207. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/ws_b_report.py +28 -15
  208. {pen_stack-3.1.0 → pen_stack-3.3.0}/setup.cfg +4 -4
  209. pen_stack-3.1.0/benchmarks/genome_writing_bench/LEADERBOARD.md +0 -22
  210. pen_stack-3.1.0/benchmarks/genome_writing_bench/SHA256SUMS +0 -4
  211. pen_stack-3.1.0/benchmarks/genome_writing_bench/tasks.yaml +0 -81
  212. pen_stack-3.1.0/configs/gsh_validated_heldout.yaml +0 -39
  213. pen_stack-3.1.0/pen_stack/planner/delivery.py +0 -32
  214. pen_stack-3.1.0/pen_stack/validate/blind_gsh_discovery.py +0 -165
  215. pen_stack-3.1.0/pen_stack/validate/guide_qc_demo.py +0 -58
  216. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_a.json +0 -11
  217. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_b.json +0 -11
  218. pen_stack-3.1.0/prereg/SHA256_LOCK_ws_d.json +0 -9
  219. pen_stack-3.1.0/prereg/ws_d.yaml +0 -29
  220. {pen_stack-3.1.0 → pen_stack-3.3.0}/LICENSE +0 -0
  221. {pen_stack-3.1.0 → pen_stack-3.3.0}/MANIFEST.in +0 -0
  222. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/cargo_polish.yaml +0 -0
  223. {pen_stack-3.1.0 → pen_stack-3.3.0}/configs/llm.yaml +0 -0
  224. {pen_stack-3.1.0 → pen_stack-3.3.0}/data/curated/bridge_offtarget_profile_measured.parquet +0 -0
  225. {pen_stack-3.1.0 → pen_stack-3.3.0}/data/curated/gene_coords.parquet +0 -0
  226. {pen_stack-3.1.0 → pen_stack-3.3.0}/data/curated/unified_editor_universe.parquet +0 -0
  227. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/agent.md +0 -0
  228. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/alphagenome_feasibility.md +0 -0
  229. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/benchmark_circularity.md +0 -0
  230. {pen_stack-3.1.0 → pen_stack-3.3.0}/docs/tutorials/compare-families.md +0 -0
  231. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/_resources.py +0 -0
  232. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/bridge/ortholog_screen.py +0 -0
  233. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/genome.py +0 -0
  234. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/ingest_chromatin.py +0 -0
  235. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/data/ingest_safety_annot.py +0 -0
  236. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/planner/cargo_polish.py +0 -0
  237. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/rag/index.py +0 -0
  238. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/cargo_directionality.py +0 -0
  239. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/intent_specification.py +0 -0
  240. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/validate/seq_vs_measured.py +0 -0
  241. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/chromatin_seq.py +0 -0
  242. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/export_tracks.py +0 -0
  243. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/providers.py +0 -0
  244. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack/wgenome/writability.py +0 -0
  245. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/dependency_links.txt +0 -0
  246. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/entry_points.txt +0 -0
  247. {pen_stack-3.1.0 → pen_stack-3.3.0}/pen_stack.egg-info/top_level.txt +0 -0
  248. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_b.yaml +0 -0
  249. {pen_stack-3.1.0 → pen_stack-3.3.0}/prereg/ws_e.yaml +0 -0
  250. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_build_atlas.py +0 -0
  251. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_build_durability.py +0 -0
  252. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_export_tracks.py +0 -0
  253. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_train_safety.py +0 -0
  254. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p1_validation_report.py +0 -0
  255. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/p2_build_atlas.py +0 -0
  256. {pen_stack-3.1.0 → pen_stack-3.3.0}/scripts/ws_c_report.py +0 -0
@@ -1,202 +1,266 @@
1
- # Changelog
2
-
3
- All notable changes to PEN-STACK are documented here. This file follows
4
- [Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
5
-
6
- ## [3.1.0] - 2026-06-04 - v3.1 release: publishable contributions + an adopted benchmark
7
-
8
- The v3.1 cycle completes (workstreams A-H). It hardens the honesty of the planning benchmark, surrounds the
9
- models with strong baselines, adds a predicted-structure safety axis, and ships the first benchmark and
10
- grounded agent for the genome-writing side. Every workstream is pre-registered (`prereg/ws_*.yaml`,
11
- SHA-locked) and reports its honest negatives.
12
-
13
- ### Added
14
- - **WS-B - strong baselines + safety primary-metric switch.** Endogenous-expression baseline (TRIP-trained
15
- Spearman 0.51 vs AlphaGenome ES-Bruce4 proxy 0.43), multi-mark ablation (all-marks >= best single), and a
16
- published GSH rule-set: safe-harbour discrimination (learned 0.92, 95% CI [0.82, 0.98] vs distance-rule
17
- 0.38, delta CI excludes zero) is now the primary safety metric; the circular `genotoxic_cis` AUROC is a
18
- labeled diagnostic. (`pen_stack.wgenome.gsh_baseline`, `pen_stack.validate.durability_baselines`.)
19
- - **WS-C - AlphaGenome integration.** Hosted-API provider with an offline cache; predicted-vs-measured track
20
- validation (HepG2 ATAC Pearson 0.85) with an honest score-level low-confidence flag; a 3D structural-risk
21
- axis from contact-map deltas (`pen_stack.wgenome.{providers,chromatin_seq,structure3d}`,
22
- `pen_stack.validate.seq_vs_measured`).
23
- - **WS-D - Cargo Polish.** Cargo-sequence silencing-risk scan (`pen_stack.planner.cargo_polish`).
24
- - **WS-E - Genome-Writing Bench v0.1 + PEN-Agent.** The first writing-side benchmark (`benchmarks/`,
25
- `bench/run.py`) with deterministic scorers, a leaderboard, and a real LLM-agent baseline; a grounded
26
- write-planning state machine with a no-fabrication hard gate (`pen_stack.agent.pen_agent`).
27
- - **WS-F - local recalibration / private-data adaptation.** Gated recalibration / fine-tuning on private
28
- data, in-container; the adapted model activates only if it beats the released model AND a no-skill
29
- baseline; the released model is provably unchanged (`pen_stack.adapt`).
30
- - **WS-G - multiplex + guide QC.** A pairwise translocation-risk screen (`pen_stack.planner.multiplex`,
31
- surfaced in PEN-Agent) and a bridge-RNA guide ranker (`pen_stack.bridge.guide_qc`).
32
- - **WS-H - release + dissemination.** README/badges updated for v3.1, `docs/quickstart.md`,
33
- `docs/positioning.md`, the leaderboard submission guide, the dissemination log, and version 3.1.0.
34
-
35
- ### Changed (honesty)
36
- - The planning benchmark's `recovery_at_k` ranking is now deterministic (stable sort + tie-breakers).
37
- - The LLM stack defaults to the local Ollama model on the compute tier with an automatic hosted-Nemotron
38
- fallback, a cooldown cache, and bounded timeouts (no more multi-minute stalls when a provider is absent).
39
-
40
- ## [3.1.0a0] - 2026-06-04 - v3.1 WS-A: de-circularize the planning benchmark (gate)
41
-
42
- The v3.1 cycle (publishable contributions + an adopted benchmark) opens with its gate: de-circularizing the
43
- Phase-3 planning benchmark before anything builds on it.
44
-
45
- ### Changed (honesty)
46
- - **The Phase-3 "discriminating-stratum recovery@10 = 1.00 vs 0.00 (McNemar p, CI)" is now labeled
47
- definitional, not predictive,** everywhere (README, manuscript abstract, `prereg/paper3.yaml`,
48
- `validate/paper3_benchmark.py` docstring). An on-target identity term dominates the score, so the planner
49
- ranks the goal's own gene first by construction. Documented in `docs/benchmark_circularity.md` (WS-A1).
50
- - The intent result is reframed as a **specification-compliance correctness table** (`validate/intent_specification.py`,
51
- 7/7), with no recovery/p-value/CI language (WS-A2).
52
-
53
- ### Added (the honest, non-circular replacements)
54
- - **Blind safe-harbour site discovery (the new headline)**: `validate/blind_gsh_discovery.py` +
55
- `configs/gsh_validated_heldout.yaml` (5 DOI-validated held-out GSH, gene-anchored to hg38) +
56
- frozen/SHA-locked `data/gsh_matched_controls.parquet`. Run genome-wide (no on-target term), the planner's
57
- writability separates validated GSH from matched-context controls at **AUROC 0.92** (safety-only 0.50)
58
- (WS-A3).
59
- - **Diversified writer-family recovery**: `validate/writer_recovery.py` + `data/writer_panel.csv` (8 writes,
60
- 4 families, DOIs). recovery@1 = **1.0** vs prevalence 0.25 (smallest-capacity DSB-free writer that fits
61
- the cargo) (WS-A4).
62
- - **Within-locus ranking** (descriptive): `validate/within_locus_ranking.py` - AAVS1 documented bin at the
63
- 93rd within-locus percentile (top quartile); CLYBL at the 34th (honest negative) (WS-A5).
64
- - **Consolidated report** `scripts/p3_benchmark_report.py` -> `out/ws_a_report.md`; `prereg/ws_a.yaml` +
65
- SHA lock. Gate G-A is met: blind AUROC reported, no circular claims remain (WS-A6).
66
-
67
- ## [Unreleased] - 2026-06-03 - honest reframing, repository polish, coverage, hybrid LLM
68
-
69
- ### Added
70
- - **Hybrid LLM backend** (`pen_stack/rag/llm.py`, `configs/llm.yaml`): a strong hosted model for
71
- reasoning/agent/Q&A (NVIDIA Nemotron, OpenAI-compatible, free) with **automatic fallback** to the local
72
- Ollama model, then to the deterministic no-LLM path. One `provider` switch. The agent and RAG were
73
- refactored onto a single provider-agnostic `chat()` (NVIDIA tool-call IDs and Ollama native message
74
- threading both handled). The LLM stays non-load-bearing - every number/citation still comes from
75
- validated tools - so the model choice does not affect scientific reproducibility; it only improves
76
- orchestration (Nemotron planned a goal in 2 tool calls vs the local 7B's 8-call loop). Core scientific
77
- compute stays local/VM and uses no LLM. API keys are read from an env var or a **gitignored** file and
78
- are never committed.
79
-
80
- ### Changed
81
- - **Paper 4 reframed to its honest scope.** `pen-bridge` is positioned as the first measured-data-validated
82
- tool that **nominates and ranks candidate off-target *locations*** for bridge recombinases - a
83
- **screening tool, not a quantitative safety calculator**. The AUROC 0.77 vs 0.62 result is stated with
84
- its caveat (favourable negative set; mostly tests core integrity), and the magnitude limitation
85
- (sequence-risk does not rank recombination amount, rho ~0.30) is named as the single most important
86
- limitation. Application-Note tier, first-of-its-kind for an unoccupied gap; the Writable Genome remains
87
- the flagship. Manuscript + `prereg/paper4.yaml` + summaries updated.
88
- - **Variant-effect reframed:** the DMS recovers KNOWN enhancers (a catalogue feature), it is not a novel
89
- variant-design method; EVOLVEpro is the engine to wrap when generating new variants.
90
- - **Repository made clean ASCII:** removed all decorative emojis and em/en dashes and other non-ASCII
91
- punctuation across code, docs, configs, and manuscripts (box-drawing tree characters kept).
92
-
93
- ### Added
94
- - 72-system ortholog characterisation (`bridge/ortholog_screen.py`) - explicitly DESCRIPTIVE (Table S1 has
95
- no activity label): sequence-similarity organisation vs the validated standout ISCro4 (IS621 ranks most
96
- similar, a sanity check). Exploratory secondary result, N ~72.
97
- - Coverage: CI runs `pytest --cov`, uploads to Codecov, and publishes a self-hosted coverage badge
98
- (`tools/make_coverage_badge.py` -> `.github/badges/coverage.svg`). Unit-test coverage of the core logic
99
- is **69%** (integration-only modules that need GPU/VM/network/LLM are excluded via `[tool.coverage.run]`).
100
- - Professional, emoji-free README with connected-repo badges (genome-atlas / mech-class / pen-score /
101
- pen-assemble / pen-compare), an architecture diagram, and the problem/gaps explanation.
102
-
103
- ## [3.0.0a5] - 2026-06-02 - Phase 1.5 (Bridge-recombinase off-target engine -> Paper 4, BEACHHEAD)
104
-
105
- The first public instrument: a bridge-recombinase off-target screening tool.
106
-
107
- ### Added
108
- - **Off-target engine** (`pen_stack/bridge/offtarget.py` + `configs/bridge_offtarget_profile.yaml`):
109
- genome-wide hg38 pseudosite scan (CT-core seed, per-chromosome, memory-bounded) + a position-weight
110
- risk model grounded in the published mechanism. **Beats naive Hamming: AUROC 1.00 vs 0.59** at
111
- separating core-preserving (real-risk) from core-disrupting (abolished) sites. Exposes
112
- `predict_offtargets(family, site)` - completes the Phase-3 Planner cargo hook.
113
- - **Fold / cross-loop QC** (`bridge/fold_qc.py`): ViennaRNA fold (verified MFE on a 190-nt design) +
114
- TBL/DBL cross-loop complementarity.
115
- - **Activity framework** (`bridge/activity.py`): exploratory DMS + 72-system trainer (deferred; data paywalled).
116
- - **`pen-bridge`** (`bridge/pipeline.py`, `bridge/cli.py`, `/bridge/design` API): **wraps** the Arc
117
- BridgeRNADesigner (verified) and adds the off-target + QC layer.
118
- - `validate/paper4_validation.py` + `scripts/p4_genome_scan.py`; `prereg/paper4.yaml` + SHA lock.
119
-
120
- ### Notes
121
- - **Phase 1.5 COMPLETE** - pre-registered criteria met (or honestly gated): the off-target engine,
122
- ViennaRNA fold, and designer wrap are verified on the VM (real hg38 scan: chr22 in ~21 s). The *blind
123
- recall of Perry 2025's measured off-targets* and the DMS/activity model are gated on the paywalled
124
- Perry 2025 supplementary (drop in via `ingest.load_offtarget_profile`). Completes the deferred Phase-2
125
- Section 2.4 and Phase-3 Section 3.2 hooks. 68 tests green; ruff clean. **All program phases (0,1,1.5,2,3) now done.**
126
-
127
- ## [3.0.0a4] - 2026-06-02 - Phase 3 (The Write Planner + agentic platform -> Paper 3, CAPSTONE)
128
-
129
- Inverse design + the paper-defining recovery@k benchmark + the agentic platform.
130
-
131
- ### Added
132
- - **Inverse-design optimiser** (`pen_stack/planner/optimize.py`, `configs/intent_weights.yaml`): an
133
- `edit_intent`-conditioned objective whose `target_gene_sign` flips whether hitting the target gene is
134
- penalised or rewarded - the same TRAC site ranks #1 (knock-in) vs #101 (safe-harbour).
135
- - **Cargo/delivery** (`planner/cargo.py`, `planner/delivery.py`): donor spec + size check + delivery rule
136
- table; bridge/seek off-target via an optional Phase-1.5 hook (pending until 1.5).
137
- - **End-to-end Planner** (`planner/pipeline.py`, `report.py`, `/plan` API, `pen-stack plan` CLI): ranked,
138
- fully traceable plans with per-field provenance.
139
- - **Two-stratum recovery@k benchmark** (`validate/paper3_benchmark.py`, `data/benchmark_panel.csv`,
140
- `prereg/paper3.yaml`): **discriminating stratum planner 1.00 vs baseline 0.00, McNemar p=0.0156, gap CI
141
- [1.0,1.0] excludes zero; control tie 0.67=0.67**. Panel cited to Europe-PMC-verified sources.
142
- - **Forward hypotheses** (`validate/forward_hypotheses.py`): date-stamped novel F8/SERPINA1/CISH/HBA1
143
- proposals + grounded cited ranking.
144
- - **Agentic platform**: `agent/tools.py` + `agent/orchestrator.py` (Ollama tool-calling, auditable trace,
145
- no-fabrication, refusals), `agent/mcp_server.py` (fastmcp), `docker-compose.yml` + `docker/ui.Dockerfile`
146
- + Streamlit **Agent** page + `docs/DEPLOY.md`/`docs/MCP.md`, `validate/agent_eval.py`.
147
- - Shipped `data/curated/gene_coords.parquet` (GENCODE-derived) so tools work in any container.
148
-
149
- ### Notes
150
- - **Phase 3 COMPLETE** - pre-registered criteria met (`prereg/paper3.yaml` + `SHA256_LOCK_phase3.json`).
151
- Agent verified on the VM in LLM mode (no-fabrication + plan-equivalence + refusals all pass). 63 tests
152
- green; ruff clean. Wet-lab (3.7) skipped - non-gating. Bridge off-target hook completes with Phase 1.5.
153
-
154
- ## [3.0.0a3] - 2026-06-02 - Phase 2 (Writer Atlas + Unified Stack -> Paper 2)
155
-
156
- The broad, cross-family Writer Atlas, the writer<->locus cross-link, and the installable platform.
157
-
158
- ### Added
159
- - **Writer Atlas** (`pen_stack/atlas/expand.py`, `atlas.parquet`): **33,370 systems across 8 families**
160
- (31,885 IS110/IS1111 orthologs + curated cores/reps), every row confidence-tagged + >=1 source DOI,
161
- targeting metadata inherited from the WT-KB. `configs/atlas_families.yaml` drives the UniProt queries.
162
- - **Mechanism at scale** (`pen_stack/mech/`): ported audited 18-family Pfam whitelist v1.2.1; composite
163
- co-occurrence rules; **core agreement 1.00** vs audited labels; conflicting calls -> review queue.
164
- - **Therapeutic readiness** (`pen_stack/score/therapeutic.py`): deliverability/cargo/human-cell axes,
165
- components retained (ISCro4 326aa->AAV).
166
- - **Cross-link** (`pen_stack/atlas/crosslink.py`): bidirectional writer<->locus queries; AAVS1 held-out
167
- check passes (0.90 writability + bridge-reachable). Per-family caches for k562/hepg2/hspc.
168
- - **Variant proposal** (`pen_stack/atlas/variant_propose.py`): point-mutation framework + retrospective
169
- harness, no chimeras; DMS model pluggable (Phase 1.5).
170
- - **PEN-MONITOR** (`pen_stack/monitor/`): Europe PMC living-database engine; back-test surfaces ISPpu10;
171
- never auto-edits the atlas; every candidate cited.
172
- - **Grounded RAG** (`pen_stack/rag/`, `pen_stack/agent/guardrails.py`): numbers from tool calls, claims
173
- cited, clinical directives refused; optional Ollama/Qwen phrasing layer (presentation only).
174
- - **Stack**: unified CLI subcommands, FastAPI server (`pen_stack/server/api.py`), Streamlit platform UI
175
- (Writer Atlas + Ask pages), mkdocs site + 4 use-case tutorials. 46 tests green; ruff clean.
176
-
177
- ### Notes
178
- - **Phase 2 COMPLETE** - pre-registered criteria met (`prereg/paper2.yaml` + `SHA256_LOCK_phase2.json`);
179
- atlas Zenodo DOI pending author upload. Verified on the VM (Docker): API, UI (:8501), RAG with Qwen.
180
-
181
- ## [3.0.0a0] - 2026-06-01 - Phase 0 (in progress)
182
-
183
- Fresh v3.0 monorepo. Supersedes the v1.0 platform repository (archived); consolidates the five prior
184
- repositories (`genome-atlas`, `mech-class`, `pen-score`, `pen-assemble`, `pen-compare`) as provenance.
185
-
186
- ### Added
187
- - Monorepo scaffold: 13 modules (`atlas`, `mech`, `score`, `wgenome`, `planner`, `bridge`, `monitor`,
188
- `rag`, `agent`, `ui`, `data`, `validate`, `server`), `pyproject.toml`, Docker image spec, `penctl`
189
- laptop<->VM orchestrator, CI, `configs/`, `prereg/`.
190
- - `docs/INFRA.md` - three-tier (laptop / VM / Drive) Docker-only, SFTP-only workflow.
191
- - `configs/llm.yaml` - single LLM switch (Ollama + Qwen2.5-7B-Instruct, Apache-2.0).
192
- - `configs/datasets.yaml` - pinned dataset accessions + verified IDs (see VERIFICATION_REPORT_v3.0).
193
-
194
- - **WT-KB** (`pen_stack/atlas/`): 8 fully-sourced writer families with reachability tiers; schema enforces the >=1-DOI sourcing rule.
195
- - **Re-grounded axes** (`pen_stack/score/recalibrate.py`, `configs/score_axes.yaml`): `S_Cargo` from measured bp, `S_Prog` from targeting modality, `length_aa` backfilled - no per-enzyme overrides.
196
- - **Canonical universe** (`pen_stack/atlas/universe.py::assemble`): one path joining the 1,058-entity universe + WT-KB + crosswalk; cross-module consistency test.
197
- - **Descriptive scorecard** (`pen_stack/atlas/scorecard.py`): reframed from the circular certification; blind concordance recovers ISCro4 as the bridge standout without naming it. 21 tests green.
198
-
199
- ### Notes
200
- - Independent verification of all datasets/IDs/DOIs/tools completed: no critical errors in the v3.0 plan
201
- (full report in `Final_Part_v3.0/VERIFICATION_REPORT_v3.0.md`).
202
- - **Phase 0 COMPLETE** - all pre-registered success criteria met (`prereg/phase0.yaml` + SHA lock).
1
+ # Changelog
2
+
3
+ All notable changes to PEN-STACK are documented here. This file follows
4
+ [Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
5
+
6
+ ## [3.3.0] - 2026-06-09 - v3.3 release: the Verifier (a type checker for genome writes)
7
+
8
+ v3.3 lifts the laws of genome writing into a versioned, machine-readable rule base and exposes a single
9
+ `verify(design) -> Verdict` call (legal/illegal + named rule + calibrated confidence + scope) over Python,
10
+ REST, and MCP. Workstreams WS-{R,D,ROUTE,V,BA}, each SHA-locked.
11
+
12
+ ### Added
13
+ - **WS-R - rule base + solver.** `pen_stack/rules/{schema,evaluators,loader,solver}.py` + `configs/rules/*.yaml`
14
+ (9 rules across reachability/fold/payload/multiplex/delivery), each id/kind/mechanism/param/provenance(DOI)/
15
+ test. Evaluators delegate to the existing validated functions; a parity test proves no decision changed.
16
+ Legality and confidence are kept as distinct axes.
17
+ - **WS-D - delivery palette.** `configs/delivery_vehicles.yaml` + `planner/delivery_vehicles.py`: 8 vehicles
18
+ (AAV single/dual, lentivirus, HDAd, HSV amplicon, LNP-mRNA, eVLP, electroporation) with capacity/integration/
19
+ cargo-form/DOIs; delivery rules (hard rejects + soft penalties + an immunogenicity-magnitude scope flag).
20
+ - **WS-ROUTE - write-type router.** `planner/router.py` + `configs/write_types.yaml`: dispatches insertion/
21
+ excision/inversion/replacement/regulatory_rewrite/landing_pad_install/multiplex; unsupported types defer.
22
+ - **WS-V - verification service.** `pen_stack/verify/{service,schema}.py`: `verify(design) -> Verdict`; `POST
23
+ /verify` + MCP `verify_write`; `docs/verify.md`. No fabrication (every number tool-sourced).
24
+ - **WS-BA - bench v0.2.1 + agent.** T12 rule-grounded legality-with-explanation (verifier reason accuracy 1.0
25
+ vs ungrounded 0.0); the agent submits its plan to the verifier. Bench 12/12 available, planner beats baseline
26
+ 8/8.
27
+ - **Docs:** `docs/verify.md`, `docs/rules.md`, `docs/delivery.md`.
28
+
29
+ ### Changed
30
+ - Version 3.2.0 -> 3.3.0 (pyproject, `__init__`, CITATION.cff). README "what is new in v3.3"; bench badge v0.2.1.
31
+
32
+ ## [3.2.0] - 2026-06-08 - v3.2 release: a calibrated, self-aware co-scientist
33
+
34
+ The v3.2 cycle makes the genome-writing funnel **trustworthy**: every value carries a calibrated confidence,
35
+ an extrapolation flag, and — where the biology is beyond any tool here — an explicit out-of-scope deferral.
36
+ Workstreams UQ/EP/MC/BA, each pre-registered (`prereg/ws_{uq,ep,mc,ba}.yaml`, SHA-locked) and reporting its
37
+ honest negatives. The Genome-Writing Bench bumps to **v0.2**.
38
+
39
+ ### Added
40
+ - **WS-UQ - calibrated uncertainty + OOD.** Conformal prediction intervals (durability expression) and APS /
41
+ Mondrian prediction sets (safety, silenced) wrapping the existing heads with no retraining
42
+ (`pen_stack.wgenome.uncertainty`); an OOD detector that widens intervals out-of-distribution
43
+ (`pen_stack.wgenome.ood`); selective prediction + plan-level confidence
44
+ (`pen_stack.validate.selective_prediction`). Held-out coverage 0.895 vs 0.90 nominal; risk-coverage accuracy
45
+ 0.739->0.930 under abstention. OOD across human cell types is weak (0.65-0.73) - reported as a heuristic.
46
+ - **WS-EP - epistemic scoping.** A three-tier status (grounded-confident / grounded-extrapolating /
47
+ not-computable) on every agent output (`pen_stack.agent.epistemic`); a known-unknowns registry + scope
48
+ matcher (`configs/known_unknowns.yaml`, `pen_stack.agent.scope`, `docs/scope.md`) that defers out-of-scope
49
+ questions (deferral 1.0, false-defer 0.0); abstention in the agent. No-fabrication gate intact.
50
+ - **WS-MC - mechanistic filters.** A hard target-site/PAM/att-site reachability reject
51
+ (`pen_stack.planner.target_site`, `configs/target_sites.yaml`; controls 9/9); vehicle-specific
52
+ delivery-sequence penalties (`pen_stack.planner.delivery_constraints`); and an off-target **energetics**
53
+ model (`pen_stack.bridge.offtarget_energetics`) that beats the 0.77 baseline at held-out AUROC 0.88 on the
54
+ comparable (core-disrupted) construction and ships as the default ranker. A reviewer-driven re-run
55
+ (`by_negative_construction`) shows that gap is mostly the core-penalisation artifact; with the core held
56
+ matched the non-core substitution-identity gain is real but modest (Δ≈0.04, 0.687 vs 0.646). Both AUROCs
57
+ carry a favourable-negative-set caveat (decoys derived from real off-targets; no non-recombining background).
58
+ - **WS-BA - bench v0.2 + uncertainty-aware agent.** Four trust tasks (T8 calibration, T9 selective prediction,
59
+ T10 OOD honesty, T11 out-of-scope refusal) contrasting the uncertainty-aware agent with an over-confident
60
+ baseline (4/4); PEN-Agent emits confidence + epistemic status + abstains; UI surfaces them. Bench re-SHA-locked.
61
+ - **WS-OPT1 (optional) - Gymnasium interface.** A thin `gymnasium.Env` over the planner (`pen_stack.env`,
62
+ `[env]` extra) for agent-developer interoperability - interface only, no RL superiority claimed.
63
+ - **Docs:** `docs/uncertainty.md`, `docs/scope.md`, `docs/mechanistic_constraints.md`; M-UQ methods note +
64
+ M1/M2 manuscript updates. WS-OPT2 (Opentrons) deferred to `docs/BACKLOG.md`.
65
+
66
+ ### Changed
67
+ - Version 3.1.0 -> 3.2.0 (pyproject, `__init__`, CITATION.cff). README "what is new in v3.2"; badges + bench
68
+ v0.2. The bridge off-target default ranker is now the energetics model when its penalty table is present.
69
+
70
+ ## [3.1.0] - 2026-06-04 - v3.1 release: publishable contributions + an adopted benchmark
71
+
72
+ The v3.1 cycle completes (workstreams A-H). It hardens the honesty of the planning benchmark, surrounds the
73
+ models with strong baselines, adds a predicted-structure safety axis, and ships the first benchmark and
74
+ grounded agent for the genome-writing side. Every workstream is pre-registered (`prereg/ws_*.yaml`,
75
+ SHA-locked) and reports its honest negatives.
76
+
77
+ ### Added
78
+ - **WS-B - strong baselines + safety primary-metric switch.** Endogenous-expression baseline (TRIP-trained
79
+ Spearman 0.51 vs AlphaGenome ES-Bruce4 proxy 0.43), multi-mark ablation (all-marks >= best single), and a
80
+ published GSH rule-set: safe-harbour discrimination (learned 0.92, 95% CI [0.82, 0.98] vs distance-rule
81
+ 0.38, delta CI excludes zero) is now the primary safety metric; the circular `genotoxic_cis` AUROC is a
82
+ labeled diagnostic. (`pen_stack.wgenome.gsh_baseline`, `pen_stack.validate.durability_baselines`.)
83
+ - **WS-C - AlphaGenome integration.** Hosted-API provider with an offline cache; predicted-vs-measured track
84
+ validation (HepG2 ATAC Pearson 0.85) with an honest score-level low-confidence flag; a 3D structural-risk
85
+ axis from contact-map deltas (`pen_stack.wgenome.{providers,chromatin_seq,structure3d}`,
86
+ `pen_stack.validate.seq_vs_measured`).
87
+ - **WS-D - Cargo Polish.** Cargo-sequence silencing-risk scan (`pen_stack.planner.cargo_polish`).
88
+ - **WS-E - Genome-Writing Bench v0.1 + PEN-Agent.** The first writing-side benchmark (`benchmarks/`,
89
+ `bench/run.py`) with deterministic scorers, a leaderboard, and a real LLM-agent baseline; a grounded
90
+ write-planning state machine with a no-fabrication hard gate (`pen_stack.agent.pen_agent`).
91
+ - **WS-F - local recalibration / private-data adaptation.** Gated recalibration / fine-tuning on private
92
+ data, in-container; the adapted model activates only if it beats the released model AND a no-skill
93
+ baseline; the released model is provably unchanged (`pen_stack.adapt`).
94
+ - **WS-G - multiplex + guide QC.** A pairwise translocation-risk screen (`pen_stack.planner.multiplex`,
95
+ surfaced in PEN-Agent) and a bridge-RNA guide ranker (`pen_stack.bridge.guide_qc`).
96
+ - **WS-H - release + dissemination.** README/badges updated for v3.1, `docs/quickstart.md`,
97
+ `docs/positioning.md`, the leaderboard submission guide, the dissemination log, and version 3.1.0.
98
+
99
+ ### Changed (honesty)
100
+ - The planning benchmark's `recovery_at_k` ranking is now deterministic (stable sort + tie-breakers).
101
+ - The LLM stack defaults to the local Ollama model on the compute tier with an automatic hosted-Nemotron
102
+ fallback, a cooldown cache, and bounded timeouts (no more multi-minute stalls when a provider is absent).
103
+
104
+ ## [3.1.0a0] - 2026-06-04 - v3.1 WS-A: de-circularize the planning benchmark (gate)
105
+
106
+ The v3.1 cycle (publishable contributions + an adopted benchmark) opens with its gate: de-circularizing the
107
+ Phase-3 planning benchmark before anything builds on it.
108
+
109
+ ### Changed (honesty)
110
+ - **The Phase-3 "discriminating-stratum recovery@10 = 1.00 vs 0.00 (McNemar p, CI)" is now labeled
111
+ definitional, not predictive,** everywhere (README, manuscript abstract, `prereg/paper3.yaml`,
112
+ `validate/paper3_benchmark.py` docstring). An on-target identity term dominates the score, so the planner
113
+ ranks the goal's own gene first by construction. Documented in `docs/benchmark_circularity.md` (WS-A1).
114
+ - The intent result is reframed as a **specification-compliance correctness table** (`validate/intent_specification.py`,
115
+ 7/7), with no recovery/p-value/CI language (WS-A2).
116
+
117
+ ### Added (the honest, non-circular replacements)
118
+ - **Blind safe-harbour site discovery (the new headline)**: `validate/blind_gsh_discovery.py` +
119
+ `configs/gsh_validated_heldout.yaml` (5 DOI-validated held-out GSH, gene-anchored to hg38) +
120
+ frozen/SHA-locked `data/gsh_matched_controls.parquet`. Run genome-wide (no on-target term), the planner's
121
+ writability separates validated GSH from matched-context controls at **AUROC 0.92** (safety-only 0.50)
122
+ (WS-A3).
123
+ - **Diversified writer-family recovery**: `validate/writer_recovery.py` + `data/writer_panel.csv` (8 writes,
124
+ 4 families, DOIs). recovery@1 = **1.0** vs prevalence 0.25 (smallest-capacity DSB-free writer that fits
125
+ the cargo) (WS-A4).
126
+ - **Within-locus ranking** (descriptive): `validate/within_locus_ranking.py` - AAVS1 documented bin at the
127
+ 93rd within-locus percentile (top quartile); CLYBL at the 34th (honest negative) (WS-A5).
128
+ - **Consolidated report** `scripts/p3_benchmark_report.py` -> `out/ws_a_report.md`; `prereg/ws_a.yaml` +
129
+ SHA lock. Gate G-A is met: blind AUROC reported, no circular claims remain (WS-A6).
130
+
131
+ ## [Unreleased] - 2026-06-03 - honest reframing, repository polish, coverage, hybrid LLM
132
+
133
+ ### Added
134
+ - **Hybrid LLM backend** (`pen_stack/rag/llm.py`, `configs/llm.yaml`): a strong hosted model for
135
+ reasoning/agent/Q&A (NVIDIA Nemotron, OpenAI-compatible, free) with **automatic fallback** to the local
136
+ Ollama model, then to the deterministic no-LLM path. One `provider` switch. The agent and RAG were
137
+ refactored onto a single provider-agnostic `chat()` (NVIDIA tool-call IDs and Ollama native message
138
+ threading both handled). The LLM stays non-load-bearing - every number/citation still comes from
139
+ validated tools - so the model choice does not affect scientific reproducibility; it only improves
140
+ orchestration (Nemotron planned a goal in 2 tool calls vs the local 7B's 8-call loop). Core scientific
141
+ compute stays local/VM and uses no LLM. API keys are read from an env var or a **gitignored** file and
142
+ are never committed.
143
+
144
+ ### Changed
145
+ - **Paper 4 reframed to its honest scope.** `pen-bridge` is positioned as the first measured-data-validated
146
+ tool that **nominates and ranks candidate off-target *locations*** for bridge recombinases - a
147
+ **screening tool, not a quantitative safety calculator**. The AUROC 0.77 vs 0.62 result is stated with
148
+ its caveat (favourable negative set; mostly tests core integrity), and the magnitude limitation
149
+ (sequence-risk does not rank recombination amount, rho ~0.30) is named as the single most important
150
+ limitation. Application-Note tier, first-of-its-kind for an unoccupied gap; the Writable Genome remains
151
+ the flagship. Manuscript + `prereg/paper4.yaml` + summaries updated.
152
+ - **Variant-effect reframed:** the DMS recovers KNOWN enhancers (a catalogue feature), it is not a novel
153
+ variant-design method; EVOLVEpro is the engine to wrap when generating new variants.
154
+ - **Repository made clean ASCII:** removed all decorative emojis and em/en dashes and other non-ASCII
155
+ punctuation across code, docs, configs, and manuscripts (box-drawing tree characters kept).
156
+
157
+ ### Added
158
+ - 72-system ortholog characterisation (`bridge/ortholog_screen.py`) - explicitly DESCRIPTIVE (Table S1 has
159
+ no activity label): sequence-similarity organisation vs the validated standout ISCro4 (IS621 ranks most
160
+ similar, a sanity check). Exploratory secondary result, N ~72.
161
+ - Coverage: CI runs `pytest --cov`, uploads to Codecov, and publishes a self-hosted coverage badge
162
+ (`tools/make_coverage_badge.py` -> `.github/badges/coverage.svg`). Unit-test coverage of the core logic
163
+ is **69%** (integration-only modules that need GPU/VM/network/LLM are excluded via `[tool.coverage.run]`).
164
+ - Professional, emoji-free README with connected-repo badges (genome-atlas / mech-class / pen-score /
165
+ pen-assemble / pen-compare), an architecture diagram, and the problem/gaps explanation.
166
+
167
+ ## [3.0.0a5] - 2026-06-02 - Phase 1.5 (Bridge-recombinase off-target engine -> Paper 4, BEACHHEAD)
168
+
169
+ The first public instrument: a bridge-recombinase off-target screening tool.
170
+
171
+ ### Added
172
+ - **Off-target engine** (`pen_stack/bridge/offtarget.py` + `configs/bridge_offtarget_profile.yaml`):
173
+ genome-wide hg38 pseudosite scan (CT-core seed, per-chromosome, memory-bounded) + a position-weight
174
+ risk model grounded in the published mechanism. **Beats naive Hamming: AUROC 1.00 vs 0.59** at
175
+ separating core-preserving (real-risk) from core-disrupting (abolished) sites. Exposes
176
+ `predict_offtargets(family, site)` - completes the Phase-3 Planner cargo hook.
177
+ - **Fold / cross-loop QC** (`bridge/fold_qc.py`): ViennaRNA fold (verified MFE on a 190-nt design) +
178
+ TBL/DBL cross-loop complementarity.
179
+ - **Activity framework** (`bridge/activity.py`): exploratory DMS + 72-system trainer (deferred; data paywalled).
180
+ - **`pen-bridge`** (`bridge/pipeline.py`, `bridge/cli.py`, `/bridge/design` API): **wraps** the Arc
181
+ BridgeRNADesigner (verified) and adds the off-target + QC layer.
182
+ - `validate/paper4_validation.py` + `scripts/p4_genome_scan.py`; `prereg/paper4.yaml` + SHA lock.
183
+
184
+ ### Notes
185
+ - **Phase 1.5 COMPLETE** - pre-registered criteria met (or honestly gated): the off-target engine,
186
+ ViennaRNA fold, and designer wrap are verified on the VM (real hg38 scan: chr22 in ~21 s). The *blind
187
+ recall of Perry 2025's measured off-targets* and the DMS/activity model are gated on the paywalled
188
+ Perry 2025 supplementary (drop in via `ingest.load_offtarget_profile`). Completes the deferred Phase-2
189
+ Section 2.4 and Phase-3 Section 3.2 hooks. 68 tests green; ruff clean. **All program phases (0,1,1.5,2,3) now done.**
190
+
191
+ ## [3.0.0a4] - 2026-06-02 - Phase 3 (The Write Planner + agentic platform -> Paper 3, CAPSTONE)
192
+
193
+ Inverse design + the paper-defining recovery@k benchmark + the agentic platform.
194
+
195
+ ### Added
196
+ - **Inverse-design optimiser** (`pen_stack/planner/optimize.py`, `configs/intent_weights.yaml`): an
197
+ `edit_intent`-conditioned objective whose `target_gene_sign` flips whether hitting the target gene is
198
+ penalised or rewarded - the same TRAC site ranks #1 (knock-in) vs #101 (safe-harbour).
199
+ - **Cargo/delivery** (`planner/cargo.py`, `planner/delivery.py`): donor spec + size check + delivery rule
200
+ table; bridge/seek off-target via an optional Phase-1.5 hook (pending until 1.5).
201
+ - **End-to-end Planner** (`planner/pipeline.py`, `report.py`, `/plan` API, `pen-stack plan` CLI): ranked,
202
+ fully traceable plans with per-field provenance.
203
+ - **Two-stratum recovery@k benchmark** (`validate/paper3_benchmark.py`, `data/benchmark_panel.csv`,
204
+ `prereg/paper3.yaml`): **discriminating stratum planner 1.00 vs baseline 0.00, McNemar p=0.0156, gap CI
205
+ [1.0,1.0] excludes zero; control tie 0.67=0.67**. Panel cited to Europe-PMC-verified sources.
206
+ - **Forward hypotheses** (`validate/forward_hypotheses.py`): date-stamped novel F8/SERPINA1/CISH/HBA1
207
+ proposals + grounded cited ranking.
208
+ - **Agentic platform**: `agent/tools.py` + `agent/orchestrator.py` (Ollama tool-calling, auditable trace,
209
+ no-fabrication, refusals), `agent/mcp_server.py` (fastmcp), `docker-compose.yml` + `docker/ui.Dockerfile`
210
+ + Streamlit **Agent** page + `docs/DEPLOY.md`/`docs/MCP.md`, `validate/agent_eval.py`.
211
+ - Shipped `data/curated/gene_coords.parquet` (GENCODE-derived) so tools work in any container.
212
+
213
+ ### Notes
214
+ - **Phase 3 COMPLETE** - pre-registered criteria met (`prereg/paper3.yaml` + `SHA256_LOCK_phase3.json`).
215
+ Agent verified on the VM in LLM mode (no-fabrication + plan-equivalence + refusals all pass). 63 tests
216
+ green; ruff clean. Wet-lab (3.7) skipped - non-gating. Bridge off-target hook completes with Phase 1.5.
217
+
218
+ ## [3.0.0a3] - 2026-06-02 - Phase 2 (Writer Atlas + Unified Stack -> Paper 2)
219
+
220
+ The broad, cross-family Writer Atlas, the writer<->locus cross-link, and the installable platform.
221
+
222
+ ### Added
223
+ - **Writer Atlas** (`pen_stack/atlas/expand.py`, `atlas.parquet`): **33,370 systems across 8 families**
224
+ (31,885 IS110/IS1111 orthologs + curated cores/reps), every row confidence-tagged + >=1 source DOI,
225
+ targeting metadata inherited from the WT-KB. `configs/atlas_families.yaml` drives the UniProt queries.
226
+ - **Mechanism at scale** (`pen_stack/mech/`): ported audited 18-family Pfam whitelist v1.2.1; composite
227
+ co-occurrence rules; **core agreement 1.00** vs audited labels; conflicting calls -> review queue.
228
+ - **Therapeutic readiness** (`pen_stack/score/therapeutic.py`): deliverability/cargo/human-cell axes,
229
+ components retained (ISCro4 326aa->AAV).
230
+ - **Cross-link** (`pen_stack/atlas/crosslink.py`): bidirectional writer<->locus queries; AAVS1 held-out
231
+ check passes (0.90 writability + bridge-reachable). Per-family caches for k562/hepg2/hspc.
232
+ - **Variant proposal** (`pen_stack/atlas/variant_propose.py`): point-mutation framework + retrospective
233
+ harness, no chimeras; DMS model pluggable (Phase 1.5).
234
+ - **PEN-MONITOR** (`pen_stack/monitor/`): Europe PMC living-database engine; back-test surfaces ISPpu10;
235
+ never auto-edits the atlas; every candidate cited.
236
+ - **Grounded RAG** (`pen_stack/rag/`, `pen_stack/agent/guardrails.py`): numbers from tool calls, claims
237
+ cited, clinical directives refused; optional Ollama/Qwen phrasing layer (presentation only).
238
+ - **Stack**: unified CLI subcommands, FastAPI server (`pen_stack/server/api.py`), Streamlit platform UI
239
+ (Writer Atlas + Ask pages), mkdocs site + 4 use-case tutorials. 46 tests green; ruff clean.
240
+
241
+ ### Notes
242
+ - **Phase 2 COMPLETE** - pre-registered criteria met (`prereg/paper2.yaml` + `SHA256_LOCK_phase2.json`);
243
+ atlas Zenodo DOI pending author upload. Verified on the VM (Docker): API, UI (:8501), RAG with Qwen.
244
+
245
+ ## [3.0.0a0] - 2026-06-01 - Phase 0 (in progress)
246
+
247
+ Fresh v3.0 monorepo. Supersedes the v1.0 platform repository (archived); consolidates the five prior
248
+ repositories (`genome-atlas`, `mech-class`, `pen-score`, `pen-assemble`, `pen-compare`) as provenance.
249
+
250
+ ### Added
251
+ - Monorepo scaffold: 13 modules (`atlas`, `mech`, `score`, `wgenome`, `planner`, `bridge`, `monitor`,
252
+ `rag`, `agent`, `ui`, `data`, `validate`, `server`), `pyproject.toml`, Docker image spec, `penctl`
253
+ laptop<->VM orchestrator, CI, `configs/`, `prereg/`.
254
+ - `docs/INFRA.md` - three-tier (laptop / VM / Drive) Docker-only, SFTP-only workflow.
255
+ - `configs/llm.yaml` - single LLM switch (Ollama + Qwen2.5-7B-Instruct, Apache-2.0).
256
+ - `configs/datasets.yaml` - pinned dataset accessions + verified IDs (see VERIFICATION_REPORT_v3.0).
257
+
258
+ - **WT-KB** (`pen_stack/atlas/`): 8 fully-sourced writer families with reachability tiers; schema enforces the >=1-DOI sourcing rule.
259
+ - **Re-grounded axes** (`pen_stack/score/recalibrate.py`, `configs/score_axes.yaml`): `S_Cargo` from measured bp, `S_Prog` from targeting modality, `length_aa` backfilled - no per-enzyme overrides.
260
+ - **Canonical universe** (`pen_stack/atlas/universe.py::assemble`): one path joining the 1,058-entity universe + WT-KB + crosswalk; cross-module consistency test.
261
+ - **Descriptive scorecard** (`pen_stack/atlas/scorecard.py`): reframed from the circular certification; blind concordance recovers ISCro4 as the bridge standout without naming it. 21 tests green.
262
+
263
+ ### Notes
264
+ - Independent verification of all datasets/IDs/DOIs/tools completed: no critical errors in the v3.0 plan
265
+ (full report in `Final_Part_v3.0/VERIFICATION_REPORT_v3.0.md`).
266
+ - **Phase 0 COMPLETE** - all pre-registered success criteria met (`prereg/phase0.yaml` + SHA lock).
@@ -1,19 +1,19 @@
1
- cff-version: 1.2.0
2
- message: "If you use PEN-STACK, please cite it as below."
3
- title: "PEN-STACK: open infrastructure for genome writing"
4
- version: 3.1.0
5
- date-released: 2026-06-01
6
- authors:
7
- - family-names: "Mahaboob Ali"
8
- given-names: "Anees Ahmed"
9
- affiliation: "VIT University, Vellore"
10
- email: ahmedaneesm@gmail.com
11
- repository-code: "https://github.com/ahmedanees-m/pen-stack"
12
- license: MIT
13
- keywords:
14
- - genome writing
15
- - writable genome
16
- - writer atlas
17
- - bridge recombinase
18
- - safe harbor
19
- - write planner
1
+ cff-version: 1.2.0
2
+ message: "If you use PEN-STACK, please cite it as below."
3
+ title: "PEN-STACK: open infrastructure for genome writing"
4
+ version: 3.3.0
5
+ date-released: 2026-06-01
6
+ authors:
7
+ - family-names: "Mahaboob Ali"
8
+ given-names: "Anees Ahmed"
9
+ affiliation: "VIT University, Vellore"
10
+ email: ahmedaneesm@gmail.com
11
+ repository-code: "https://github.com/ahmedanees-m/pen-stack"
12
+ license: MIT
13
+ keywords:
14
+ - genome writing
15
+ - writable genome
16
+ - writer atlas
17
+ - bridge recombinase
18
+ - safe harbor
19
+ - write planner