pen-stack 3.2.0__tar.gz → 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. {pen_stack-3.2.0 → pen_stack-3.3.0}/CHANGELOG.md +31 -2
  2. {pen_stack-3.2.0 → pen_stack-3.3.0}/CITATION.cff +1 -1
  3. {pen_stack-3.2.0 → pen_stack-3.3.0}/PKG-INFO +49 -18
  4. {pen_stack-3.2.0 → pen_stack-3.3.0}/README.md +48 -17
  5. {pen_stack-3.2.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/LEADERBOARD.md +6 -5
  6. {pen_stack-3.2.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/SHA256SUMS +1 -1
  7. {pen_stack-3.2.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/tasks.yaml +16 -2
  8. pen_stack-3.3.0/configs/delivery_vehicles.yaml +105 -0
  9. pen_stack-3.3.0/configs/rules/delivery.yaml +40 -0
  10. pen_stack-3.3.0/configs/rules/fold.yaml +15 -0
  11. pen_stack-3.3.0/configs/rules/multiplex.yaml +13 -0
  12. pen_stack-3.3.0/configs/rules/payload.yaml +22 -0
  13. pen_stack-3.3.0/configs/rules/reachability.yaml +16 -0
  14. pen_stack-3.3.0/configs/write_types.yaml +57 -0
  15. pen_stack-3.3.0/docs/delivery.md +22 -0
  16. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/mechanistic_constraints.md +16 -9
  17. pen_stack-3.3.0/docs/rules.md +19 -0
  18. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/uncertainty.md +26 -10
  19. pen_stack-3.3.0/docs/verify.md +60 -0
  20. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/__init__.py +1 -1
  21. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/mcp_server.py +11 -0
  22. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/pen_agent.py +16 -0
  23. pen_stack-3.3.0/pen_stack/planner/delivery_vehicles.py +37 -0
  24. pen_stack-3.3.0/pen_stack/planner/router.py +57 -0
  25. pen_stack-3.3.0/pen_stack/rules/__init__.py +9 -0
  26. pen_stack-3.3.0/pen_stack/rules/evaluators.py +192 -0
  27. pen_stack-3.3.0/pen_stack/rules/loader.py +31 -0
  28. pen_stack-3.3.0/pen_stack/rules/schema.py +82 -0
  29. pen_stack-3.3.0/pen_stack/rules/solver.py +43 -0
  30. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/server/api.py +9 -0
  31. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/ui/app.py +37 -1
  32. pen_stack-3.3.0/pen_stack/validate/bench_rule_tasks.py +84 -0
  33. pen_stack-3.3.0/pen_stack/validate/offtarget_energetics_eval.py +144 -0
  34. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/uncertainty_eval.py +54 -0
  35. pen_stack-3.3.0/pen_stack/verify/__init__.py +5 -0
  36. pen_stack-3.3.0/pen_stack/verify/schema.py +34 -0
  37. pen_stack-3.3.0/pen_stack/verify/service.py +90 -0
  38. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/PKG-INFO +49 -18
  39. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/SOURCES.txt +29 -0
  40. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_ba_v33.json +9 -0
  41. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_d.json +10 -0
  42. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_r.json +14 -0
  43. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_route.json +9 -0
  44. pen_stack-3.3.0/prereg/SHA256_LOCK_ws_v.json +8 -0
  45. pen_stack-3.3.0/prereg/ws_ba_v33.yaml +12 -0
  46. pen_stack-3.3.0/prereg/ws_d.yaml +11 -0
  47. pen_stack-3.3.0/prereg/ws_r.yaml +51 -0
  48. pen_stack-3.3.0/prereg/ws_route.yaml +9 -0
  49. pen_stack-3.3.0/prereg/ws_v.yaml +15 -0
  50. {pen_stack-3.2.0 → pen_stack-3.3.0}/pyproject.toml +1 -1
  51. pen_stack-3.2.0/pen_stack/validate/offtarget_energetics_eval.py +0 -102
  52. pen_stack-3.2.0/prereg/SHA256_LOCK_ws_d.json +0 -9
  53. pen_stack-3.2.0/prereg/ws_d.yaml +0 -29
  54. {pen_stack-3.2.0 → pen_stack-3.3.0}/LICENSE +0 -0
  55. {pen_stack-3.2.0 → pen_stack-3.3.0}/MANIFEST.in +0 -0
  56. {pen_stack-3.2.0 → pen_stack-3.3.0}/bench/run.py +0 -0
  57. {pen_stack-3.2.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/README.md +0 -0
  58. {pen_stack-3.2.0 → pen_stack-3.3.0}/benchmarks/genome_writing_bench/SUBMISSIONS.md +0 -0
  59. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/atlas_families.yaml +0 -0
  60. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/bridge_offtarget_profile.yaml +0 -0
  61. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/cargo_polish.yaml +0 -0
  62. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/datasets.yaml +0 -0
  63. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/delivery_constraints.yaml +0 -0
  64. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/delivery_rules.yaml +0 -0
  65. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/gates_v3.yaml +0 -0
  66. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/gsh_validated_heldout.yaml +0 -0
  67. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/intent_weights.yaml +0 -0
  68. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/known_unknowns.yaml +0 -0
  69. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/llm.yaml +0 -0
  70. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/monitor_queries.yaml +0 -0
  71. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/score_axes.yaml +0 -0
  72. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/target_sites.yaml +0 -0
  73. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/universe_crosswalk.yaml +0 -0
  74. {pen_stack-3.2.0 → pen_stack-3.3.0}/configs/wtkb_curated.yaml +0 -0
  75. {pen_stack-3.2.0 → pen_stack-3.3.0}/data/curated/bridge_offtarget_energetics.json +0 -0
  76. {pen_stack-3.2.0 → pen_stack-3.3.0}/data/curated/bridge_offtarget_profile_measured.parquet +0 -0
  77. {pen_stack-3.2.0 → pen_stack-3.3.0}/data/curated/gene_coords.parquet +0 -0
  78. {pen_stack-3.2.0 → pen_stack-3.3.0}/data/curated/unified_editor_universe.parquet +0 -0
  79. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/BACKLOG.md +0 -0
  80. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/DEPLOY.md +0 -0
  81. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/INFRA.md +0 -0
  82. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/MCP.md +0 -0
  83. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/RELEASING.md +0 -0
  84. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/REPRO.md +0 -0
  85. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/agent.md +0 -0
  86. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/alphagenome_feasibility.md +0 -0
  87. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/benchmark_circularity.md +0 -0
  88. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/cards/atlas.md +0 -0
  89. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/cards/durability.md +0 -0
  90. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/cards/safety.md +0 -0
  91. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/dissemination.md +0 -0
  92. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/index.md +0 -0
  93. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/positioning.md +0 -0
  94. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/private_data_formats.md +0 -0
  95. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/quickstart.md +0 -0
  96. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/scope.md +0 -0
  97. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/scorecard.md +0 -0
  98. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/tutorials/compare-families.md +0 -0
  99. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/tutorials/score-deliverability.md +0 -0
  100. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/tutorials/where-can-i-write.md +0 -0
  101. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/tutorials/which-writer-reaches-locus.md +0 -0
  102. {pen_stack-3.2.0 → pen_stack-3.3.0}/docs/wtkb.md +0 -0
  103. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/_resources.py +0 -0
  104. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/__init__.py +0 -0
  105. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/finetune.py +0 -0
  106. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/ingest.py +0 -0
  107. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/pipeline.py +0 -0
  108. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/recalibrate.py +0 -0
  109. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/adapt/report.py +0 -0
  110. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/__init__.py +0 -0
  111. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/epistemic.py +0 -0
  112. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/guardrails.py +0 -0
  113. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/orchestrator.py +0 -0
  114. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/scope.py +0 -0
  115. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/agent/tools.py +0 -0
  116. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/__init__.py +0 -0
  117. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/build_wtkb.py +0 -0
  118. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/crosslink.py +0 -0
  119. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/expand.py +0 -0
  120. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/schema.py +0 -0
  121. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/scorecard.py +0 -0
  122. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/universe.py +0 -0
  123. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/atlas/variant_propose.py +0 -0
  124. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/__init__.py +0 -0
  125. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/activity.py +0 -0
  126. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/cli.py +0 -0
  127. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/fold_qc.py +0 -0
  128. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/guide_qc.py +0 -0
  129. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/ingest.py +0 -0
  130. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/offtarget.py +0 -0
  131. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/offtarget_energetics.py +0 -0
  132. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/ortholog_screen.py +0 -0
  133. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/bridge/pipeline.py +0 -0
  134. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/cli.py +0 -0
  135. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/__init__.py +0 -0
  136. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/encode.py +0 -0
  137. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/genome.py +0 -0
  138. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/ingest_chromatin.py +0 -0
  139. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/ingest_integration.py +0 -0
  140. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/ingest_safety_annot.py +0 -0
  141. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/data/ingest_trip.py +0 -0
  142. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/env/__init__.py +0 -0
  143. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/env/genome_writing_env.py +0 -0
  144. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/mech/__init__.py +0 -0
  145. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/mech/classify_atlas.py +0 -0
  146. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/mech/whitelist.py +0 -0
  147. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/monitor/__init__.py +0 -0
  148. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/monitor/europepmc.py +0 -0
  149. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/monitor/run.py +0 -0
  150. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/monitor/triage.py +0 -0
  151. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/__init__.py +0 -0
  152. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/cargo.py +0 -0
  153. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/cargo_polish.py +0 -0
  154. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/delivery.py +0 -0
  155. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/delivery_constraints.py +0 -0
  156. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/multiplex.py +0 -0
  157. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/optimize.py +0 -0
  158. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/pipeline.py +0 -0
  159. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/report.py +0 -0
  160. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/planner/target_site.py +0 -0
  161. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/rag/__init__.py +0 -0
  162. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/rag/index.py +0 -0
  163. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/rag/llm.py +0 -0
  164. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/rag/qa.py +0 -0
  165. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/score/__init__.py +0 -0
  166. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/score/recalibrate.py +0 -0
  167. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/score/therapeutic.py +0 -0
  168. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/server/__init__.py +0 -0
  169. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/ui/__init__.py +0 -0
  170. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/__init__.py +0 -0
  171. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/adapt_demo.py +0 -0
  172. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/agent_eval.py +0 -0
  173. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/bench_trust_tasks.py +0 -0
  174. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/blind_gsh_discovery.py +0 -0
  175. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/cargo_directionality.py +0 -0
  176. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/durability_baselines.py +0 -0
  177. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/forward_hypotheses.py +0 -0
  178. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/guide_qc_demo.py +0 -0
  179. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/intent_specification.py +0 -0
  180. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/out_of_scope_refusal.py +0 -0
  181. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/paper3_benchmark.py +0 -0
  182. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/paper4_real_validation.py +0 -0
  183. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/paper4_validation.py +0 -0
  184. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/selective_prediction.py +0 -0
  185. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/seq_vs_measured.py +0 -0
  186. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/target_site_controls.py +0 -0
  187. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/ungrounded_baseline.py +0 -0
  188. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/within_locus_ranking.py +0 -0
  189. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/validate/writer_recovery.py +0 -0
  190. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/__init__.py +0 -0
  191. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/chromatin_seq.py +0 -0
  192. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/durability.py +0 -0
  193. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/export_tracks.py +0 -0
  194. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/features.py +0 -0
  195. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/gsh_baseline.py +0 -0
  196. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/ood.py +0 -0
  197. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/providers.py +0 -0
  198. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/safety.py +0 -0
  199. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/structure3d.py +0 -0
  200. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/uncertainty.py +0 -0
  201. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack/wgenome/writability.py +0 -0
  202. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/dependency_links.txt +0 -0
  203. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/entry_points.txt +0 -0
  204. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/requires.txt +0 -0
  205. {pen_stack-3.2.0 → pen_stack-3.3.0}/pen_stack.egg-info/top_level.txt +0 -0
  206. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase0.json +0 -0
  207. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase1_5.json +0 -0
  208. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase2.json +0 -0
  209. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_phase3.json +0 -0
  210. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_a.json +0 -0
  211. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_b.json +0 -0
  212. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_ba.json +0 -0
  213. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_c.json +0 -0
  214. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_e.json +0 -0
  215. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_ep.json +0 -0
  216. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_f.json +0 -0
  217. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_g.json +0 -0
  218. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_h.json +0 -0
  219. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_mc.json +0 -0
  220. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/SHA256_LOCK_ws_uq.json +0 -0
  221. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/paper1.yaml +0 -0
  222. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/paper2.yaml +0 -0
  223. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/paper3.yaml +0 -0
  224. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/paper4.yaml +0 -0
  225. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/phase0.yaml +0 -0
  226. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_a.yaml +0 -0
  227. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_b.yaml +0 -0
  228. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_ba.yaml +0 -0
  229. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_c.yaml +0 -0
  230. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_e.yaml +0 -0
  231. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_ep.yaml +0 -0
  232. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_f.yaml +0 -0
  233. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_g.yaml +0 -0
  234. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_h.yaml +0 -0
  235. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_mc.yaml +0 -0
  236. {pen_stack-3.2.0 → pen_stack-3.3.0}/prereg/ws_uq.yaml +0 -0
  237. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_build_atlas.py +0 -0
  238. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_build_durability.py +0 -0
  239. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_export_tracks.py +0 -0
  240. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_safety_concordance.py +0 -0
  241. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_train_safety.py +0 -0
  242. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p1_validation_report.py +0 -0
  243. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p2_build_atlas.py +0 -0
  244. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p3_benchmark_report.py +0 -0
  245. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/p4_genome_scan.py +0 -0
  246. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/ws_b_report.py +0 -0
  247. {pen_stack-3.2.0 → pen_stack-3.3.0}/scripts/ws_c_report.py +0 -0
  248. {pen_stack-3.2.0 → pen_stack-3.3.0}/setup.cfg +0 -0
@@ -3,6 +3,32 @@
3
3
  All notable changes to PEN-STACK are documented here. This file follows
4
4
  [Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
5
5
 
6
+ ## [3.3.0] - 2026-06-09 - v3.3 release: the Verifier (a type checker for genome writes)
7
+
8
+ v3.3 lifts the laws of genome writing into a versioned, machine-readable rule base and exposes a single
9
+ `verify(design) -> Verdict` call (legal/illegal + named rule + calibrated confidence + scope) over Python,
10
+ REST, and MCP. Workstreams WS-{R,D,ROUTE,V,BA}, each SHA-locked.
11
+
12
+ ### Added
13
+ - **WS-R - rule base + solver.** `pen_stack/rules/{schema,evaluators,loader,solver}.py` + `configs/rules/*.yaml`
14
+ (9 rules across reachability/fold/payload/multiplex/delivery), each id/kind/mechanism/param/provenance(DOI)/
15
+ test. Evaluators delegate to the existing validated functions; a parity test proves no decision changed.
16
+ Legality and confidence are kept as distinct axes.
17
+ - **WS-D - delivery palette.** `configs/delivery_vehicles.yaml` + `planner/delivery_vehicles.py`: 8 vehicles
18
+ (AAV single/dual, lentivirus, HDAd, HSV amplicon, LNP-mRNA, eVLP, electroporation) with capacity/integration/
19
+ cargo-form/DOIs; delivery rules (hard rejects + soft penalties + an immunogenicity-magnitude scope flag).
20
+ - **WS-ROUTE - write-type router.** `planner/router.py` + `configs/write_types.yaml`: dispatches insertion/
21
+ excision/inversion/replacement/regulatory_rewrite/landing_pad_install/multiplex; unsupported types defer.
22
+ - **WS-V - verification service.** `pen_stack/verify/{service,schema}.py`: `verify(design) -> Verdict`; `POST
23
+ /verify` + MCP `verify_write`; `docs/verify.md`. No fabrication (every number tool-sourced).
24
+ - **WS-BA - bench v0.2.1 + agent.** T12 rule-grounded legality-with-explanation (verifier reason accuracy 1.0
25
+ vs ungrounded 0.0); the agent submits its plan to the verifier. Bench 12/12 available, planner beats baseline
26
+ 8/8.
27
+ - **Docs:** `docs/verify.md`, `docs/rules.md`, `docs/delivery.md`.
28
+
29
+ ### Changed
30
+ - Version 3.2.0 -> 3.3.0 (pyproject, `__init__`, CITATION.cff). README "what is new in v3.3"; bench badge v0.2.1.
31
+
6
32
  ## [3.2.0] - 2026-06-08 - v3.2 release: a calibrated, self-aware co-scientist
7
33
 
8
34
  The v3.2 cycle makes the genome-writing funnel **trustworthy**: every value carries a calibrated confidence,
@@ -24,8 +50,11 @@ honest negatives. The Genome-Writing Bench bumps to **v0.2**.
24
50
  - **WS-MC - mechanistic filters.** A hard target-site/PAM/att-site reachability reject
25
51
  (`pen_stack.planner.target_site`, `configs/target_sites.yaml`; controls 9/9); vehicle-specific
26
52
  delivery-sequence penalties (`pen_stack.planner.delivery_constraints`); and an off-target **energetics**
27
- model (`pen_stack.bridge.offtarget_energetics`) that beats the 0.77 baseline at held-out AUROC 0.88 (robust
28
- over 5 seeds) and ships as the default ranker.
53
+ model (`pen_stack.bridge.offtarget_energetics`) that beats the 0.77 baseline at held-out AUROC 0.88 on the
54
+ comparable (core-disrupted) construction and ships as the default ranker. A reviewer-driven re-run
55
+ (`by_negative_construction`) shows that gap is mostly the core-penalisation artifact; with the core held
56
+ matched the non-core substitution-identity gain is real but modest (Δ≈0.04, 0.687 vs 0.646). Both AUROCs
57
+ carry a favourable-negative-set caveat (decoys derived from real off-targets; no non-recombining background).
29
58
  - **WS-BA - bench v0.2 + uncertainty-aware agent.** Four trust tasks (T8 calibration, T9 selective prediction,
30
59
  T10 OOD honesty, T11 out-of-scope refusal) contrasting the uncertainty-aware agent with an over-confident
31
60
  baseline (4/4); PEN-Agent emits confidence + epistemic status + abstains; UI surfaces them. Bench re-SHA-locked.
@@ -1,7 +1,7 @@
1
1
  cff-version: 1.2.0
2
2
  message: "If you use PEN-STACK, please cite it as below."
3
3
  title: "PEN-STACK: open infrastructure for genome writing"
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  date-released: 2026-06-01
6
6
  authors:
7
7
  - family-names: "Mahaboob Ali"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pen-stack
3
- Version: 3.2.0
3
+ Version: 3.3.0
4
4
  Summary: Open infrastructure for genome writing: the Writable Genome atlas, the Writer Atlas, and the Write Planner.
5
5
  Author-email: Anees Ahmed Mahaboob Ali <ahmedaneesm@gmail.com>
6
6
  License: MIT
@@ -89,12 +89,12 @@ and durably write new DNA, **which enzyme** can write it there, and **how** to d
89
89
  [![codecov](https://codecov.io/gh/ahmedanees-m/pen-stack/branch/main/graph/badge.svg)](https://codecov.io/gh/ahmedanees-m/pen-stack)
90
90
  [![License: MIT](https://img.shields.io/badge/License-MIT-informational.svg)](LICENSE)
91
91
  [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/)
92
- [![Version](https://img.shields.io/badge/version-3.2.0-blue.svg)](CHANGELOG.md)
93
- [![Tests](https://img.shields.io/badge/tests-176%20passing-success.svg)](tests/)
92
+ [![Version](https://img.shields.io/badge/version-3.3.0-blue.svg)](CHANGELOG.md)
93
+ [![Tests](https://img.shields.io/badge/tests-179%20passing-success.svg)](tests/)
94
94
  [![Lint: ruff](https://img.shields.io/badge/lint-ruff-purple.svg)](https://github.com/astral-sh/ruff)
95
95
  [![Runtime: Docker](https://img.shields.io/badge/runtime-docker-2496ED.svg)](docker/)
96
96
  [![Validation: pre-registered](https://img.shields.io/badge/validation-pre--registered-critical.svg)](prereg/)
97
- [![Genome-Writing Bench v0.2](https://img.shields.io/badge/benchmark-Genome--Writing%20Bench%20v0.2-6f42c1.svg)](benchmarks/genome_writing_bench/)
97
+ [![Genome-Writing Bench v0.2](https://img.shields.io/badge/benchmark-Genome--Writing%20Bench%20v0.2.1-6f42c1.svg)](benchmarks/genome_writing_bench/)
98
98
 
99
99
  **Built on five prior, separately published repositories:**
100
100
 
@@ -133,6 +133,24 @@ Two questions gate every genome-writing project, and before PEN-STACK no resourc
133
133
  Everything is built on bulk-downloadable public data, runs on a single GPU, and is validated **blind** against
134
134
  a pre-registered, honest baseline before release.
135
135
 
136
+ ## What is new in v3.3 — the Verifier (a type checker for genome writes)
137
+
138
+ v3.3 lifts the *laws of genome writing* out of code into a **versioned, machine-readable rule base** and
139
+ exposes a single **`verify(design) → Verdict`** call: submit a proposed write and get back *legal / illegal +
140
+ the named violated rule + a calibrated confidence + a scope flag* — over Python, REST (`POST /verify`), and an
141
+ MCP tool (`verify_write`) any AI agent can submit to. PEN-STACK becomes the layer that *checks* what the
142
+ foundation models *generate*.
143
+
144
+ | Workstream | What it adds | Result |
145
+ |---|---|---|
146
+ | **R — rule base + solver** | the laws lifted into `configs/rules/*.yaml` (9 rules: reachability, fold, payload, multiplex, delivery), each id/kind/mechanism/param/**citation**/test; a solver returning legality + named reasons | a **parity test** proves the rules reproduce the prior in-code decisions (relocation, not behaviour change); positives legal, negatives rejected by the **correct named rule** |
147
+ | **D — delivery palette** | the AAV-only assumption replaced by an **8-vehicle palette** (AAV single/dual, lentivirus, HDAd ~35 kb, HSV amplicon >100 kb, LNP-mRNA, eVLP, electroporation) with capacity/integration/cargo-form/DOIs | hard rejects (cargo>capacity, RNP-into-DNA-only-vehicle, non-integrating-goal+integrating-vehicle); immunogenicity *magnitude* declared out-of-scope, never predicted |
148
+ | **ROUTE — write-type router** | the fixed insertion chain becomes one sub-graph of a router over insertion / excision / inversion / replacement / regulatory-rewrite / landing-pad / multiplex | each type routes to its rule sub-graph; unsupported/ambiguous types **defer**, never guess |
149
+ | **V — verification service** | `verify(design) → Verdict` over Python/REST/MCP; legality (rules) + confidence (v3.2 L4) + scope, kept as **distinct axes** | every Verdict carries legality + (confidence ∨ abstention) + scope; **no fabrication** (every number tool-sourced) |
150
+ | **BA — bench + agent** | Bench **v0.2.1** adds **T12 rule-grounded legality-with-explanation**; the agent submits its own plan to the verifier | verifier verdict+reason accuracy **1.0**; an ungrounded judge cannot cite a rule (0.0) — the verifier uniquely supplies grounded reasons; no-fabrication intact |
151
+
152
+ See `docs/verify.md`, `docs/rules.md`, `docs/delivery.md`.
153
+
136
154
  ## What is new in v3.2 — a calibrated, self-aware co-scientist
137
155
 
138
156
  v3.2 makes the genome-writing funnel **trustworthy**: every value the funnel returns now carries a calibrated
@@ -143,9 +161,9 @@ each number* and *where the edge of its knowledge is*. Every workstream is pre-r
143
161
 
144
162
  | Workstream | What it adds | Honest headline result |
145
163
  |---|---|---|
146
- | **UQ — calibrated uncertainty + OOD** | conformal prediction intervals / sets over the existing heads (no retraining), an out-of-distribution detector, and selective prediction | durability **expression interval covers 0.895** vs 0.90 nominal on held-out chromosomes (within tolerance); the silenced set over-covers (0.996) because the head is weakhonest; **risk-coverage: accuracy rises 0.739→0.930** as low-confidence predictions are abstained (the uncertainty is *useful*). OOD across human cell types is **weak** (K562→HSPC AUROC 0.72, K562→HepG2 0.650.73) — chromatin marks are conserved across cell types; reported as a heuristic signal, not a guarantee |
164
+ | **UQ — calibrated uncertainty + OOD** | conformal prediction intervals / sets over the existing heads (no retraining), an out-of-distribution detector, and selective prediction | calibrated UQ is **useful on the expression axis**: the durability **expression interval covers 0.895** vs 0.90 nominal on held-out chromosomes (within tolerance) and **risk-coverage accuracy rises 0.739→0.930** under abstention. On the **silenced axis it is informative-in-name-only** at this N the set covers 0.996 with mean size 1.93 of 2 (the full label set), because the head is weak (we say so plainly). OOD fires strongly on a real **chromatin-state** shift (euchromatin→heterochromatin AUROC **0.98**) but is **weak across biological context** K562→HSPC 0.72, K562→HepG2 0.65, even cross-species mESC→human **0.56**because chromatin-mark distributions barely move across cell types/species; reported as a heuristic feature-space-novelty signal, not a guarantee |
147
165
  | **EP — epistemic scope** | a three-tier status (grounded-confident / grounded-extrapolating / not-computable) on every output, plus a known-unknowns registry + scope matcher | out-of-scope probes deferred **1.0**, in-scope false-defer **0.0** (zero fabrication); the no-fabrication hard gate still holds. The unknown funnel (structure→phenotype, in-vivo immunogenicity, long-term durability, epistasis, polygenic, germline) is made *legible*, not closed |
148
- | **MC — mechanistic filters** | a hard target-site/PAM/att-site reachability reject, vehicle-specific delivery-sequence penalties, and an off-target **energetics** model | positive+negative target-site controls 9/9 (a physically impossible writer–site pairing is rejected); **off-target energetics (position × substitution identity) beats the 0.77 baseline at held-out AUROC 0.88** (robust over 5 seeds) and ships as the default ranker |
166
+ | **MC — mechanistic filters** | a hard target-site/PAM/att-site reachability reject, vehicle-specific delivery-sequence penalties, and an off-target **energetics** model | positive+negative target-site controls 9/9 (a physically impossible writer–site pairing is rejected); off-target **energetics beats the 0.77 baseline at AUROC 0.88** on the comparable (core-disrupted) construction and ships as the default ranker — but a reviewer-driven re-run shows that gap is *mostly the core-penalisation artifact*: with the core held matched, the non-core substitution-identity gain is real but **modest (Δ≈0.04: 0.687 vs 0.646)**; both AUROCs carry a favourable-negative-set caveat |
149
167
  | **BA — bench v0.2 + uncertainty-aware agent** | four trust tasks (T8 calibration, T9 selective prediction, T10 OOD honesty, T11 out-of-scope) + the agent emits confidence + epistemic status + abstains | the uncertainty-aware agent beats an over-confident baseline **4/4** on the trust tasks; the leaderboard now separates *trustworthy* agents, not just grounded ones |
150
168
 
151
169
  Optional: a thin **Gymnasium environment interface** (`pen_stack/env/`, `[env]` extra) for agent-developer
@@ -251,7 +269,7 @@ PEN-STACK is organised as **two reference layers + one engine + a services layer
251
269
  magnitude, rho approximately 0.30). A first-of-its-kind beachhead for a genuinely unoccupied gap, not a
252
270
  Nature-tier breakthrough; the Writable Genome (Paper 1) remains the flagship novelty.
253
271
 
254
- ## The Genome-Writing Bench (v0.2, M2)
272
+ ## The Genome-Writing Bench (v0.2.1, M2)
255
273
 
256
274
  The first benchmark for the **writing** side of genome engineering - *where* to write, *what* writer to use,
257
275
  *how* to design the cargo, and *what off-target / structural risk* a write carries - complementing the many
@@ -321,6 +339,9 @@ pen-stack/
321
339
  │ │ ├── safety.py calibrated genotoxicity-risk model (chrom-block CV + baseline)
322
340
  │ │ ├── durability.py conditional chromatin->expression model (TRIP-trained, transferable)
323
341
  │ │ ├── writability.py decomposable safety x durability x reachability integration
342
+ │ │ ├── uncertainty.py v3.2 conformal intervals/sets over the heads (no retraining)
343
+ │ │ ├── ood.py v3.2 out-of-distribution / extrapolation detector
344
+ │ │ ├── structure3d.py 3D structural-risk axis (AlphaGenome contact-map deltas, 11 hijack loci)
324
345
  │ │ └── export_tracks.py BigWig / BED atlas export
325
346
  │ ├── atlas/ Writer Atlas + WT-KB + cross-link (Papers 1-2)
326
347
  │ │ ├── schema.py pydantic WriterEntry (enforces >=1 DOI per row)
@@ -333,29 +354,39 @@ pen-stack/
333
354
  │ ├── mech/ mechanism classification at scale (audited 18-family whitelist v1.2.1)
334
355
  │ ├── score/ re-grounded axes + therapeutic-readiness scoring
335
356
  │ ├── planner/ Write Planner (Paper 3): optimize / cargo / cargo_polish / multiplex / pipeline
357
+ │ │ + v3.2 target_site (hard PAM/att/core reject) / delivery_constraints
358
+ │ │ + v3.3 router (write-type dispatch) / delivery_vehicles (8-vehicle palette)
336
359
  │ ├── bridge/ bridge off-target engine (Paper 4): offtarget / fold_qc / guide_qc / pipeline / cli
360
+ │ │ + v3.2 offtarget_energetics (position x substitution; held-out 0.88, ships)
337
361
  │ ├── agent/ agentic platform: tools / orchestrator / pen_agent / mcp_server / guardrails
362
+ │ │ + v3.2 epistemic (3-tier status) / scope (known-unknowns matcher)
363
+ │ ├── rules/ v3.3 machine-readable rules engine (schema/evaluators/loader/solver) over configs/rules/*.yaml
364
+ │ ├── verify/ v3.3 verification service: verify(design) -> Verdict (legal+reasons+confidence+scope)
338
365
  │ ├── adapt/ local recalibration / private-data adaptation behind a gate (v3.1, WS-F)
366
+ │ ├── env/ v3.2 optional Gymnasium interface (genome_writing_env; [env] extra)
339
367
  │ ├── monitor/ PEN-MONITOR living database (Europe PMC)
340
368
  │ ├── rag/ grounded, cited Q&A (hybrid LLM: Ollama primary, Nemotron fallback)
341
- │ ├── validate/ benchmarks: blind_gsh_discovery / durability_baselines (B1+B2 with CIs) /
342
- │ │ seq_vs_measured / writer_recovery / within_locus_ranking / agent_eval /
343
- │ │ ungrounded_baseline (T7) / guide_qc_demo / adapt_demo
344
- ├── wgenome/structure3d.py 3D structural-risk axis (AlphaGenome contact-map deltas, 11 hijack loci)
369
+ │ ├── validate/ benchmarks: blind_gsh_discovery / durability_baselines / writer_recovery /
370
+ │ │ within_locus_ranking / agent_eval / ungrounded_baseline (T7) / adapt_demo /
371
+ │ │ v3.2 selective_prediction / uncertainty_eval / bench_trust_tasks (T8-T11) /
372
+ │ out_of_scope_refusal / target_site_controls / offtarget_energetics_eval
345
373
  │ ├── data/ ingestion (genome, chromatin, integration, TRIP, safety annotations)
346
374
  │ ├── server/api.py FastAPI REST (atlas, crosslink, writable, plan, bridge, ask)
347
- │ ├── ui/app.py Streamlit web app (11 pages)
375
+ │ ├── ui/app.py Streamlit web app (16 pages; v3.2 PEN-Agent shows confidence + epistemic status)
348
376
  │ └── cli.py unified CLI
349
- ├── benchmarks/genome_writing_bench/ Genome-Writing Bench v0.1 (tasks / harness / solvers / LEADERBOARD / SHAs)
377
+ ├── benchmarks/genome_writing_bench/ Genome-Writing Bench v0.2 (T1-T11; tasks / harness / solvers / LEADERBOARD / SHAs)
350
378
  ├── bench/run.py one-command bench entrypoint (--agent, --verify)
351
379
  ├── scripts/ reproducible pipeline drivers (p1_*, p2_*, p4_*, ws_*_report)
352
- ├── configs/ pinned datasets + thresholds + curation (YAML; gsh_validated_heldout = 51-locus gold set)
353
- ├── prereg/ SHA-locked success criteria (paper1..4 + ws_a..ws_h + locks)
354
- ├── data/curated/ small committed tables (universe, gene coords, measured bridge profile)
380
+ ├── configs/ pinned datasets + thresholds + curation (YAML); v3.2: known_unknowns /
381
+ │ target_sites / delivery_constraints
382
+ ├── prereg/ SHA-locked success criteria (paper1..4 + ws_a..ws_h + v3.2 ws_{uq,ep,mc,ba} + locks)
383
+ ├── data/curated/ small committed tables (universe, gene coords, measured bridge profile,
384
+ │ v3.2 bridge_offtarget_energetics.json)
355
385
  ├── data/llm_bench_cache/ 28 cached ungrounded-LLM transcripts (T7, offline/CI replay)
356
386
  ├── data/alphagenome_cache/ cached AlphaGenome predictions (tracks + contact maps; offline reproducibility)
357
387
  ├── tests/unit/ unit + regression + blind-validation suite
358
- ├── docs/ mkdocs site (cards, tutorials, INFRA, DEPLOY, MCP)
388
+ ├── docs/ mkdocs site (cards, tutorials, INFRA, DEPLOY, MCP);
389
+ │ v3.2: uncertainty.md / scope.md / mechanistic_constraints.md / BACKLOG.md
359
390
  ├── docker/ CUDA image + UI image + pinned requirements
360
391
  ├── tools/penctl.py laptop<->VM orchestrator (paramiko SSH/SFTP, Docker-only)
361
392
  ├── docker-compose.yml one-command self-hostable platform
@@ -487,7 +518,7 @@ plan. Data releases are deposited on Zenodo (one per paper).
487
518
  author = {Mahaboob Ali, Anees Ahmed},
488
519
  title = {PEN-STACK: open infrastructure for genome writing (The Writable Genome)},
489
520
  year = {2026},
490
- version = {3.1.0},
521
+ version = {3.3.0},
491
522
  url = {https://github.com/ahmedanees-m/pen-stack}
492
523
  }
493
524
  ```
@@ -14,12 +14,12 @@ and durably write new DNA, **which enzyme** can write it there, and **how** to d
14
14
  [![codecov](https://codecov.io/gh/ahmedanees-m/pen-stack/branch/main/graph/badge.svg)](https://codecov.io/gh/ahmedanees-m/pen-stack)
15
15
  [![License: MIT](https://img.shields.io/badge/License-MIT-informational.svg)](LICENSE)
16
16
  [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/)
17
- [![Version](https://img.shields.io/badge/version-3.2.0-blue.svg)](CHANGELOG.md)
18
- [![Tests](https://img.shields.io/badge/tests-176%20passing-success.svg)](tests/)
17
+ [![Version](https://img.shields.io/badge/version-3.3.0-blue.svg)](CHANGELOG.md)
18
+ [![Tests](https://img.shields.io/badge/tests-179%20passing-success.svg)](tests/)
19
19
  [![Lint: ruff](https://img.shields.io/badge/lint-ruff-purple.svg)](https://github.com/astral-sh/ruff)
20
20
  [![Runtime: Docker](https://img.shields.io/badge/runtime-docker-2496ED.svg)](docker/)
21
21
  [![Validation: pre-registered](https://img.shields.io/badge/validation-pre--registered-critical.svg)](prereg/)
22
- [![Genome-Writing Bench v0.2](https://img.shields.io/badge/benchmark-Genome--Writing%20Bench%20v0.2-6f42c1.svg)](benchmarks/genome_writing_bench/)
22
+ [![Genome-Writing Bench v0.2](https://img.shields.io/badge/benchmark-Genome--Writing%20Bench%20v0.2.1-6f42c1.svg)](benchmarks/genome_writing_bench/)
23
23
 
24
24
  **Built on five prior, separately published repositories:**
25
25
 
@@ -58,6 +58,24 @@ Two questions gate every genome-writing project, and before PEN-STACK no resourc
58
58
  Everything is built on bulk-downloadable public data, runs on a single GPU, and is validated **blind** against
59
59
  a pre-registered, honest baseline before release.
60
60
 
61
+ ## What is new in v3.3 — the Verifier (a type checker for genome writes)
62
+
63
+ v3.3 lifts the *laws of genome writing* out of code into a **versioned, machine-readable rule base** and
64
+ exposes a single **`verify(design) → Verdict`** call: submit a proposed write and get back *legal / illegal +
65
+ the named violated rule + a calibrated confidence + a scope flag* — over Python, REST (`POST /verify`), and an
66
+ MCP tool (`verify_write`) any AI agent can submit to. PEN-STACK becomes the layer that *checks* what the
67
+ foundation models *generate*.
68
+
69
+ | Workstream | What it adds | Result |
70
+ |---|---|---|
71
+ | **R — rule base + solver** | the laws lifted into `configs/rules/*.yaml` (9 rules: reachability, fold, payload, multiplex, delivery), each id/kind/mechanism/param/**citation**/test; a solver returning legality + named reasons | a **parity test** proves the rules reproduce the prior in-code decisions (relocation, not behaviour change); positives legal, negatives rejected by the **correct named rule** |
72
+ | **D — delivery palette** | the AAV-only assumption replaced by an **8-vehicle palette** (AAV single/dual, lentivirus, HDAd ~35 kb, HSV amplicon >100 kb, LNP-mRNA, eVLP, electroporation) with capacity/integration/cargo-form/DOIs | hard rejects (cargo>capacity, RNP-into-DNA-only-vehicle, non-integrating-goal+integrating-vehicle); immunogenicity *magnitude* declared out-of-scope, never predicted |
73
+ | **ROUTE — write-type router** | the fixed insertion chain becomes one sub-graph of a router over insertion / excision / inversion / replacement / regulatory-rewrite / landing-pad / multiplex | each type routes to its rule sub-graph; unsupported/ambiguous types **defer**, never guess |
74
+ | **V — verification service** | `verify(design) → Verdict` over Python/REST/MCP; legality (rules) + confidence (v3.2 L4) + scope, kept as **distinct axes** | every Verdict carries legality + (confidence ∨ abstention) + scope; **no fabrication** (every number tool-sourced) |
75
+ | **BA — bench + agent** | Bench **v0.2.1** adds **T12 rule-grounded legality-with-explanation**; the agent submits its own plan to the verifier | verifier verdict+reason accuracy **1.0**; an ungrounded judge cannot cite a rule (0.0) — the verifier uniquely supplies grounded reasons; no-fabrication intact |
76
+
77
+ See `docs/verify.md`, `docs/rules.md`, `docs/delivery.md`.
78
+
61
79
  ## What is new in v3.2 — a calibrated, self-aware co-scientist
62
80
 
63
81
  v3.2 makes the genome-writing funnel **trustworthy**: every value the funnel returns now carries a calibrated
@@ -68,9 +86,9 @@ each number* and *where the edge of its knowledge is*. Every workstream is pre-r
68
86
 
69
87
  | Workstream | What it adds | Honest headline result |
70
88
  |---|---|---|
71
- | **UQ — calibrated uncertainty + OOD** | conformal prediction intervals / sets over the existing heads (no retraining), an out-of-distribution detector, and selective prediction | durability **expression interval covers 0.895** vs 0.90 nominal on held-out chromosomes (within tolerance); the silenced set over-covers (0.996) because the head is weakhonest; **risk-coverage: accuracy rises 0.739→0.930** as low-confidence predictions are abstained (the uncertainty is *useful*). OOD across human cell types is **weak** (K562→HSPC AUROC 0.72, K562→HepG2 0.650.73) — chromatin marks are conserved across cell types; reported as a heuristic signal, not a guarantee |
89
+ | **UQ — calibrated uncertainty + OOD** | conformal prediction intervals / sets over the existing heads (no retraining), an out-of-distribution detector, and selective prediction | calibrated UQ is **useful on the expression axis**: the durability **expression interval covers 0.895** vs 0.90 nominal on held-out chromosomes (within tolerance) and **risk-coverage accuracy rises 0.739→0.930** under abstention. On the **silenced axis it is informative-in-name-only** at this N the set covers 0.996 with mean size 1.93 of 2 (the full label set), because the head is weak (we say so plainly). OOD fires strongly on a real **chromatin-state** shift (euchromatin→heterochromatin AUROC **0.98**) but is **weak across biological context** K562→HSPC 0.72, K562→HepG2 0.65, even cross-species mESC→human **0.56**because chromatin-mark distributions barely move across cell types/species; reported as a heuristic feature-space-novelty signal, not a guarantee |
72
90
  | **EP — epistemic scope** | a three-tier status (grounded-confident / grounded-extrapolating / not-computable) on every output, plus a known-unknowns registry + scope matcher | out-of-scope probes deferred **1.0**, in-scope false-defer **0.0** (zero fabrication); the no-fabrication hard gate still holds. The unknown funnel (structure→phenotype, in-vivo immunogenicity, long-term durability, epistasis, polygenic, germline) is made *legible*, not closed |
73
- | **MC — mechanistic filters** | a hard target-site/PAM/att-site reachability reject, vehicle-specific delivery-sequence penalties, and an off-target **energetics** model | positive+negative target-site controls 9/9 (a physically impossible writer–site pairing is rejected); **off-target energetics (position × substitution identity) beats the 0.77 baseline at held-out AUROC 0.88** (robust over 5 seeds) and ships as the default ranker |
91
+ | **MC — mechanistic filters** | a hard target-site/PAM/att-site reachability reject, vehicle-specific delivery-sequence penalties, and an off-target **energetics** model | positive+negative target-site controls 9/9 (a physically impossible writer–site pairing is rejected); off-target **energetics beats the 0.77 baseline at AUROC 0.88** on the comparable (core-disrupted) construction and ships as the default ranker — but a reviewer-driven re-run shows that gap is *mostly the core-penalisation artifact*: with the core held matched, the non-core substitution-identity gain is real but **modest (Δ≈0.04: 0.687 vs 0.646)**; both AUROCs carry a favourable-negative-set caveat |
74
92
  | **BA — bench v0.2 + uncertainty-aware agent** | four trust tasks (T8 calibration, T9 selective prediction, T10 OOD honesty, T11 out-of-scope) + the agent emits confidence + epistemic status + abstains | the uncertainty-aware agent beats an over-confident baseline **4/4** on the trust tasks; the leaderboard now separates *trustworthy* agents, not just grounded ones |
75
93
 
76
94
  Optional: a thin **Gymnasium environment interface** (`pen_stack/env/`, `[env]` extra) for agent-developer
@@ -176,7 +194,7 @@ PEN-STACK is organised as **two reference layers + one engine + a services layer
176
194
  magnitude, rho approximately 0.30). A first-of-its-kind beachhead for a genuinely unoccupied gap, not a
177
195
  Nature-tier breakthrough; the Writable Genome (Paper 1) remains the flagship novelty.
178
196
 
179
- ## The Genome-Writing Bench (v0.2, M2)
197
+ ## The Genome-Writing Bench (v0.2.1, M2)
180
198
 
181
199
  The first benchmark for the **writing** side of genome engineering - *where* to write, *what* writer to use,
182
200
  *how* to design the cargo, and *what off-target / structural risk* a write carries - complementing the many
@@ -246,6 +264,9 @@ pen-stack/
246
264
  │ │ ├── safety.py calibrated genotoxicity-risk model (chrom-block CV + baseline)
247
265
  │ │ ├── durability.py conditional chromatin->expression model (TRIP-trained, transferable)
248
266
  │ │ ├── writability.py decomposable safety x durability x reachability integration
267
+ │ │ ├── uncertainty.py v3.2 conformal intervals/sets over the heads (no retraining)
268
+ │ │ ├── ood.py v3.2 out-of-distribution / extrapolation detector
269
+ │ │ ├── structure3d.py 3D structural-risk axis (AlphaGenome contact-map deltas, 11 hijack loci)
249
270
  │ │ └── export_tracks.py BigWig / BED atlas export
250
271
  │ ├── atlas/ Writer Atlas + WT-KB + cross-link (Papers 1-2)
251
272
  │ │ ├── schema.py pydantic WriterEntry (enforces >=1 DOI per row)
@@ -258,29 +279,39 @@ pen-stack/
258
279
  │ ├── mech/ mechanism classification at scale (audited 18-family whitelist v1.2.1)
259
280
  │ ├── score/ re-grounded axes + therapeutic-readiness scoring
260
281
  │ ├── planner/ Write Planner (Paper 3): optimize / cargo / cargo_polish / multiplex / pipeline
282
+ │ │ + v3.2 target_site (hard PAM/att/core reject) / delivery_constraints
283
+ │ │ + v3.3 router (write-type dispatch) / delivery_vehicles (8-vehicle palette)
261
284
  │ ├── bridge/ bridge off-target engine (Paper 4): offtarget / fold_qc / guide_qc / pipeline / cli
285
+ │ │ + v3.2 offtarget_energetics (position x substitution; held-out 0.88, ships)
262
286
  │ ├── agent/ agentic platform: tools / orchestrator / pen_agent / mcp_server / guardrails
287
+ │ │ + v3.2 epistemic (3-tier status) / scope (known-unknowns matcher)
288
+ │ ├── rules/ v3.3 machine-readable rules engine (schema/evaluators/loader/solver) over configs/rules/*.yaml
289
+ │ ├── verify/ v3.3 verification service: verify(design) -> Verdict (legal+reasons+confidence+scope)
263
290
  │ ├── adapt/ local recalibration / private-data adaptation behind a gate (v3.1, WS-F)
291
+ │ ├── env/ v3.2 optional Gymnasium interface (genome_writing_env; [env] extra)
264
292
  │ ├── monitor/ PEN-MONITOR living database (Europe PMC)
265
293
  │ ├── rag/ grounded, cited Q&A (hybrid LLM: Ollama primary, Nemotron fallback)
266
- │ ├── validate/ benchmarks: blind_gsh_discovery / durability_baselines (B1+B2 with CIs) /
267
- │ │ seq_vs_measured / writer_recovery / within_locus_ranking / agent_eval /
268
- │ │ ungrounded_baseline (T7) / guide_qc_demo / adapt_demo
269
- ├── wgenome/structure3d.py 3D structural-risk axis (AlphaGenome contact-map deltas, 11 hijack loci)
294
+ │ ├── validate/ benchmarks: blind_gsh_discovery / durability_baselines / writer_recovery /
295
+ │ │ within_locus_ranking / agent_eval / ungrounded_baseline (T7) / adapt_demo /
296
+ │ │ v3.2 selective_prediction / uncertainty_eval / bench_trust_tasks (T8-T11) /
297
+ │ out_of_scope_refusal / target_site_controls / offtarget_energetics_eval
270
298
  │ ├── data/ ingestion (genome, chromatin, integration, TRIP, safety annotations)
271
299
  │ ├── server/api.py FastAPI REST (atlas, crosslink, writable, plan, bridge, ask)
272
- │ ├── ui/app.py Streamlit web app (11 pages)
300
+ │ ├── ui/app.py Streamlit web app (16 pages; v3.2 PEN-Agent shows confidence + epistemic status)
273
301
  │ └── cli.py unified CLI
274
- ├── benchmarks/genome_writing_bench/ Genome-Writing Bench v0.1 (tasks / harness / solvers / LEADERBOARD / SHAs)
302
+ ├── benchmarks/genome_writing_bench/ Genome-Writing Bench v0.2 (T1-T11; tasks / harness / solvers / LEADERBOARD / SHAs)
275
303
  ├── bench/run.py one-command bench entrypoint (--agent, --verify)
276
304
  ├── scripts/ reproducible pipeline drivers (p1_*, p2_*, p4_*, ws_*_report)
277
- ├── configs/ pinned datasets + thresholds + curation (YAML; gsh_validated_heldout = 51-locus gold set)
278
- ├── prereg/ SHA-locked success criteria (paper1..4 + ws_a..ws_h + locks)
279
- ├── data/curated/ small committed tables (universe, gene coords, measured bridge profile)
305
+ ├── configs/ pinned datasets + thresholds + curation (YAML); v3.2: known_unknowns /
306
+ │ target_sites / delivery_constraints
307
+ ├── prereg/ SHA-locked success criteria (paper1..4 + ws_a..ws_h + v3.2 ws_{uq,ep,mc,ba} + locks)
308
+ ├── data/curated/ small committed tables (universe, gene coords, measured bridge profile,
309
+ │ v3.2 bridge_offtarget_energetics.json)
280
310
  ├── data/llm_bench_cache/ 28 cached ungrounded-LLM transcripts (T7, offline/CI replay)
281
311
  ├── data/alphagenome_cache/ cached AlphaGenome predictions (tracks + contact maps; offline reproducibility)
282
312
  ├── tests/unit/ unit + regression + blind-validation suite
283
- ├── docs/ mkdocs site (cards, tutorials, INFRA, DEPLOY, MCP)
313
+ ├── docs/ mkdocs site (cards, tutorials, INFRA, DEPLOY, MCP);
314
+ │ v3.2: uncertainty.md / scope.md / mechanistic_constraints.md / BACKLOG.md
284
315
  ├── docker/ CUDA image + UI image + pinned requirements
285
316
  ├── tools/penctl.py laptop<->VM orchestrator (paramiko SSH/SFTP, Docker-only)
286
317
  ├── docker-compose.yml one-command self-hostable platform
@@ -412,7 +443,7 @@ plan. Data releases are deposited on Zenodo (one per paper).
412
443
  author = {Mahaboob Ali, Anees Ahmed},
413
444
  title = {PEN-STACK: open infrastructure for genome writing (The Writable Genome)},
414
445
  year = {2026},
415
- version = {3.1.0},
446
+ version = {3.3.0},
416
447
  url = {https://github.com/ahmedanees-m/pen-stack}
417
448
  }
418
449
  ```
@@ -1,12 +1,12 @@
1
- # Genome-Writing Bench v0.2 - Leaderboard
1
+ # Genome-Writing Bench v0.2.1 - Leaderboard
2
2
 
3
- Tasks: **11/11 available** in this run (unavailable = needs the Phase-1 atlas / Perry tables / an LLM, which run on the VM/local).
4
- Deterministic planner beats the naive baseline on **7/7** grounded tasks with a baseline.
3
+ Tasks: **12/12 available** in this run (unavailable = needs the Phase-1 atlas / Perry tables / an LLM, which run on the VM/local).
4
+ Deterministic planner beats the naive baseline on **8/8** grounded tasks with a baseline.
5
5
 
6
6
  | Solver | Tasks scored | Beats naive | No-fabrication | Note |
7
7
  |---|---|---|---|---|
8
- | deterministic_planner | 11 | 7/7 | n/a (deterministic) | validated planning tools - the reference |
9
- | naive_baseline | 7 | - | n/a (deterministic) | safety-only / prevalence / Hamming baselines |
8
+ | deterministic_planner | 12 | 8/8 | n/a (deterministic) | validated planning tools - the reference |
9
+ | naive_baseline | 8 | - | n/a (deterministic) | safety-only / prevalence / Hamming baselines |
10
10
 
11
11
  ## Per-task results
12
12
  | Task | Family | Available | Planner | Naive baseline | Gate |
@@ -22,6 +22,7 @@ Deterministic planner beats the naive baseline on **7/7** grounded tasks with a
22
22
  | selective_prediction_usefulness | T9_selective_pred | True | 0.9300087489063867 | 0.7393510014869238 | - |
23
23
  | ood_honesty | T10_ood_honesty | True | 1.0 | 0.0 | - |
24
24
  | out_of_scope_refusal | T11_out_of_scope | True | 1.0 | 0.0 | - |
25
+ | rule_grounded_legality | T12_rule_legality | True | 1.0 | 0.0 | - |
25
26
 
26
27
  ## Trust tasks (T8-T11) - calibration + scope-awareness separate *trustworthy* agents
27
28
  Each contrasts the **uncertainty-aware** agent (conformal coverage, selective prediction, OOD flagging, out-of-scope deferral) with an **over-confident** baseline (an uncalibrated interval, no abstention, never flags OOD, no scope layer). The over-confident agent is the realistic failure mode a calibrated co-scientist must beat.
@@ -1,4 +1,4 @@
1
- ca392477b92ce3ec97304ea1391cd49e29d5a94c4b8adb95c9303da306da5a0b benchmarks/genome_writing_bench/tasks.yaml
1
+ 1242bdc091219e42af7b74ca8b397190af60a52d7be3d0e4048d5a0fe5ca191c benchmarks/genome_writing_bench/tasks.yaml
2
2
  51a1f8acfd49c243f545588f62c48720b1438758ae17b09e097aa7a5c2156cf0 configs/gsh_validated_heldout.yaml
3
3
  758817c1e46c7db10f7f942316663367c5f297cac0cf2f59947a90638a256718 data/writer_panel.csv
4
4
  865b18ff23d140c3df6f3b5f25398581ebdfe3534e1cecf6f512afb540ab5ede data/gsh_matched_controls.parquet
@@ -8,8 +8,8 @@
8
8
  # A task names a `scorer` (module.function in pen_stack.validate / pen_stack.bridge) and a `metric` key to
9
9
  # read from its report. Solvers (deterministic planner, naive baseline, LLM agent) are compared on the same
10
10
  # tasks; a solver that cannot ground a number must refuse, not invent (no-fabrication is a hard gate).
11
- version: "0.2"
12
- prepared: "2026-06-08"
11
+ version: "0.2.1"
12
+ prepared: "2026-06-09"
13
13
 
14
14
  taxonomy:
15
15
  T1_site_selection: "rank genomic sites for writability; recover validated safe-harbours from controls"
@@ -25,6 +25,8 @@ taxonomy:
25
25
  T9_selective_pred: "risk-coverage: does abstaining on low-confidence predictions improve accuracy?"
26
26
  T10_ood_honesty: "on OOD queries, flag extrapolation rather than answer over-confidently"
27
27
  T11_out_of_scope: "defer known-unknown (out-of-scope) probes instead of fabricating a value"
28
+ # v0.2.1 (v3.3): the verifier supplies rule-grounded legality + a NAMED, CITED reason.
29
+ T12_rule_legality: "judge a design legal/illegal AND name the violated rule (vs an ungrounded judge that cannot cite a rule)"
28
30
 
29
31
  tasks:
30
32
  - id: site_selection_blind_gsh
@@ -144,3 +146,15 @@ tasks:
144
146
  agent that answers them (deferral 0.0); plus 0.0 false-defer on in-scope questions"
145
147
  circular: false
146
148
  note: "deterministic (pure-logic, CI-safe); the scope layer is what prevents the out-of-scope fabrication."
149
+
150
+ - id: rule_grounded_legality
151
+ family: T12_rule_legality
152
+ scorer: "pen_stack.validate.bench_rule_tasks:run"
153
+ metric: "verifier_reason_accuracy"
154
+ baseline_metric: "ungrounded_baseline_reason_accuracy"
155
+ higher_is_better: true
156
+ ground_truth: "frozen panel of legal + illegal designs; legality defined by documented physical mechanism
157
+ (not the verifier's own output); each illegal case has an expected violated rule id"
158
+ circular: false
159
+ note: "v3.3 verifier: legal/illegal + NAMED, CITED reason. The ungrounded baseline cannot cite a rule
160
+ (reason accuracy 0 by construction) — the verifier uniquely supplies correct grounded reasons."
@@ -0,0 +1,105 @@
1
+ # PEN-STACK v3.3 — Delivery vehicle palette (WS-D / North-Star §4). The substrate must score and constrain
2
+ # the WHOLE delivery palette, not just dual-AAV. Each row: cargo capacity, integration, division dependence,
3
+ # immunogenicity PRIOR (qualitative; MAGNITUDE is a known-unknown — never predicted), re-dosability, tropism,
4
+ # ex/in-vivo, compatible cargo form {DNA, mRNA, RNP}, and >=1 DOI. Values cited to 2026 sources.
5
+ # `constraint_key` maps to configs/delivery_constraints.yaml for the sequence-level scan.
6
+
7
+ version: "1.0"
8
+
9
+ vehicles:
10
+ AAV_single:
11
+ cargo_capacity_bp: 4700
12
+ integrating: false # episomal
13
+ division_dependent: false
14
+ immunogenicity_prior: "moderate-high; pre-existing NAbs exclude 30-60% of patients"
15
+ re_dosable: false
16
+ tropism: "serotype-dependent (liver, muscle, CNS, retina)"
17
+ in_vivo: true
18
+ compatible_cargo_form: [DNA]
19
+ constraint_key: AAV
20
+ dois: ["10.1038/s41573-019-0012-9"]
21
+
22
+ AAV_dual:
23
+ cargo_capacity_bp: 9000 # split across 2 capsids (~9 kb); efficiency drops sharply
24
+ integrating: false
25
+ division_dependent: false
26
+ immunogenicity_prior: "as AAV; split lowers efficiency"
27
+ re_dosable: false
28
+ tropism: "serotype-dependent"
29
+ in_vivo: true
30
+ compatible_cargo_form: [DNA]
31
+ constraint_key: AAV
32
+ dois: ["10.1089/hum.2017.084"]
33
+
34
+ lentivirus:
35
+ cargo_capacity_bp: 8000
36
+ integrating: true # semi-random integration
37
+ division_dependent: false # integrates in non-dividing too; strong in dividing
38
+ immunogenicity_prior: "moderate"
39
+ re_dosable: false
40
+ tropism: "broad (VSV-G pseudotyped)"
41
+ ex_vivo: true
42
+ compatible_cargo_form: [DNA]
43
+ constraint_key: lentiviral
44
+ dois: ["10.1126/science.1233151"]
45
+
46
+ helper_dependent_adenovirus:
47
+ cargo_capacity_bp: 35000 # "gutless" HDAd, up to ~35 kb
48
+ integrating: false
49
+ division_dependent: false
50
+ immunogenicity_prior: "high (innate + adaptive)"
51
+ re_dosable: false
52
+ tropism: "liver and others"
53
+ in_vivo: true
54
+ compatible_cargo_form: [DNA]
55
+ constraint_key: plasmid
56
+ dois: ["10.1089/hum.2009.213"]
57
+
58
+ hsv_amplicon:
59
+ cargo_capacity_bp: 100000 # >100 kb
60
+ integrating: false
61
+ division_dependent: false
62
+ immunogenicity_prior: "high; neurotropic"
63
+ re_dosable: false
64
+ tropism: "neurotropic (CNS)"
65
+ in_vivo: true
66
+ compatible_cargo_form: [DNA]
67
+ constraint_key: plasmid
68
+ dois: ["10.1038/sj.gt.3302529"]
69
+
70
+ lnp_mrna:
71
+ cargo_capacity_bp: 15000 # large RNA payload (mRNA encoding the writer/RNP)
72
+ integrating: false # transient
73
+ division_dependent: false
74
+ immunogenicity_prior: "low/transient"
75
+ re_dosable: true
76
+ tropism: "liver-tropic by default"
77
+ in_vivo: true
78
+ compatible_cargo_form: [mRNA, RNP]
79
+ constraint_key: lnp_mrna
80
+ dois: ["10.1038/s41578-021-00358-0"]
81
+
82
+ evlp:
83
+ cargo_capacity_bp: null # RNP payload (not a DNA packaging limit)
84
+ integrating: false
85
+ division_dependent: false
86
+ immunogenicity_prior: "low (transient, no DNA)"
87
+ re_dosable: true
88
+ tropism: "engineerable (T cells, retina)"
89
+ in_vivo: true
90
+ ex_vivo: true
91
+ compatible_cargo_form: [RNP]
92
+ constraint_key: evlp
93
+ dois: ["10.1016/j.cell.2022.03.045"]
94
+
95
+ electroporation:
96
+ cargo_capacity_bp: null # physical; no packaging limit
97
+ integrating: false # depends on cargo
98
+ division_dependent: false
99
+ immunogenicity_prior: "n/a (ex vivo)"
100
+ re_dosable: false
101
+ tropism: "n/a (ex vivo)"
102
+ ex_vivo: true
103
+ compatible_cargo_form: [DNA, mRNA, RNP]
104
+ constraint_key: electroporation
105
+ dois: ["10.1038/nprot.2014.157"]
@@ -0,0 +1,40 @@
1
+ # Delivery compatibility (WS-D; new in v3.3, reads configs/delivery_vehicles.yaml). Replaces the single
2
+ # dual-AAV rule with vehicle-aware hard rejects + soft penalties + an explicit out-of-scope flag.
3
+ category: delivery
4
+ rules:
5
+ - id: delivery.cargo_form_compatible
6
+ kind: hard_reject
7
+ category: delivery
8
+ mechanism: "the writer's output form (DNA cargo / mRNA / RNP) must be carriable by the chosen vehicle (e.g. an RNP-only payload cannot ride a DNA-only AAV; an mRNA/RNP rides LNP or eVLP)"
9
+ evaluator: delivery_cargo_form
10
+ param: {}
11
+ provenance: { doi: ["10.1038/s41578-021-00358-0", "10.1016/j.cell.2022.03.045"], note: "cargo-form column in configs/delivery_vehicles.yaml" }
12
+ test_ref: "tests/unit/test_ws_r.py::test_delivery_controls"
13
+ scope: "form compatibility is computable; precise tropism is not"
14
+ - id: delivery.no_integration_constraint
15
+ kind: hard_reject
16
+ category: delivery
17
+ mechanism: "if the goal forbids genomic integration, an integrating vehicle (e.g. lentivirus) is illegal"
18
+ evaluator: delivery_no_integration
19
+ param: {}
20
+ provenance: { doi: ["10.1126/science.1233151"], note: "integration column in configs/delivery_vehicles.yaml" }
21
+ test_ref: "tests/unit/test_ws_r.py::test_delivery_controls"
22
+ scope: "integration status is a documented vehicle property"
23
+ - id: delivery.sequence_constraints
24
+ kind: soft_penalty
25
+ category: delivery
26
+ mechanism: "vehicle-specific construct hazards (lentiviral internal poly(A), AAV inverted-repeat/homopolymer, recombinogenic direct repeats, GC extremes) lower titre/yield"
27
+ evaluator: delivery_sequence_constraints
28
+ param: {}
29
+ provenance: { doi: ["10.1089/hum.2017.084"], note: "v3.2 MC2 delivery_constraints scan" }
30
+ test_ref: "tests/unit/test_ws_r.py::test_delivery_controls"
31
+ scope: "labeled heuristic, directional; not a titre predictor"
32
+ - id: delivery.immunogenicity_magnitude
33
+ kind: scope_flag
34
+ category: delivery
35
+ mechanism: "the MAGNITUDE of immunogenicity and the precise in-vivo tropism of a vehicle are not modeled"
36
+ evaluator: delivery_immunogenicity_scope
37
+ param: {}
38
+ provenance: { note: "known-unknown; surfaced, never predicted (Invariant 2)" }
39
+ test_ref: "tests/unit/test_ws_r.py::test_delivery_controls"
40
+ scope: "declared out of scope; never a hard reject dressed as physics"
@@ -0,0 +1,15 @@
1
+ # Fold / cross-loop legality (relocated from bridge/fold_qc.py). A bridge RNA whose target- and donor-binding
2
+ # loops are complementary can self/cross-recombine instead of engaging the genome — a measured failure mode.
3
+ category: fold
4
+ rules:
5
+ - id: fold.cross_loop_complementarity
6
+ kind: soft_penalty
7
+ category: fold
8
+ mechanism: "bridge-RNA target/donor binding loops must not be mutually (or self-) complementary above threshold, or they recombine with each other (TBL-DBL / self cross-loop)"
9
+ evaluator: fold_cross_loop
10
+ param: { cross_loop_threshold: 0.6 }
11
+ provenance:
12
+ doi: ["10.1126/science.adz0276", "10.1038/s41586-024-07552-4"]
13
+ note: "v3.1.5 fold_qc cross-loop screen (ViennaRNA fold optional)"
14
+ test_ref: "tests/unit/test_ws_r.py::test_fold_controls"
15
+ scope: "crude antiparallel-complementarity screen; sequence-level, not a full co-fold simulation"
@@ -0,0 +1,13 @@
1
+ # Multiplex translocation safety (relocated from planner/multiplex.py). Concurrent DSBs at multiple loci can
2
+ # mis-join into translocations; DSB-free programmable recombinases (bridge/seek/PE) carry ~zero by construction.
3
+ category: multiplex
4
+ rules:
5
+ - id: multiplex.translocation_risk
6
+ kind: soft_penalty
7
+ category: multiplex
8
+ mechanism: "in a multi-edit plan, concurrent double-strand breaks at different loci can mis-join into a translocation; risk rises with cut count / cut probability / proximity, and is ~zero for DSB-free writers"
9
+ evaluator: multiplex_translocation
10
+ param: { risk_threshold: 0.2 }
11
+ provenance: { doi: ["10.1038/nbt.3198"], note: "v3.1 WS-G1 pairwise translocation screen" }
12
+ test_ref: "tests/unit/test_ws_r.py::test_multiplex_controls"
13
+ scope: "interpretable pairwise screen, not a calibrated translocation-rate predictor"
@@ -0,0 +1,22 @@
1
+ # Payload arithmetic (relocated from planner/delivery*.py). Cargo must fit the chosen vehicle's packaging
2
+ # capacity; AAV beyond a single capsid needs split/dual delivery (sharp efficiency loss).
3
+ category: payload
4
+ rules:
5
+ - id: payload.cargo_within_capacity
6
+ kind: hard_reject
7
+ category: payload
8
+ mechanism: "cargo size (bp) must not exceed the delivery vehicle's packaging capacity"
9
+ evaluator: payload_capacity
10
+ param: {}
11
+ provenance: { doi: ["10.1038/s41573-019-0012-9"], note: "vehicle capacities in configs/delivery_vehicles.yaml" }
12
+ test_ref: "tests/unit/test_ws_r.py::test_payload_controls"
13
+ scope: "capacity is a hard packaging limit; efficiency near the limit is a separate soft penalty"
14
+ - id: payload.split_aav_efficiency
15
+ kind: soft_penalty
16
+ category: payload
17
+ mechanism: "an AAV cargo above a single capsid (~4.7 kb) requires split/dual AAV, which drops efficiency sharply"
18
+ evaluator: split_aav_penalty
19
+ param: { single_aav_cap_bp: 4700 }
20
+ provenance: { doi: ["10.1089/hum.2017.084"], note: "dual-AAV split efficiency" }
21
+ test_ref: "tests/unit/test_ws_r.py::test_payload_controls"
22
+ scope: "directional efficiency flag, not a titre prediction"