pen-stack 4.0.2__tar.gz → 4.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pen_stack-4.0.2 → pen_stack-4.5.0}/CHANGELOG.md +47 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/CITATION.cff +1 -1
- {pen_stack-4.0.2 → pen_stack-4.5.0}/PKG-INFO +23 -4
- {pen_stack-4.0.2 → pen_stack-4.5.0}/README.md +22 -3
- {pen_stack-4.0.2 → pen_stack-4.5.0}/benchmarks/genome_writing_bench/LEADERBOARD.md +6 -5
- {pen_stack-4.0.2 → pen_stack-4.5.0}/benchmarks/genome_writing_bench/tasks.yaml +17 -1
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/atlas_families.yaml +2 -2
- pen_stack-4.5.0/configs/cell_types.yaml +56 -0
- pen_stack-4.5.0/docs/world_model.md +49 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/__init__.py +1 -1
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/mcp_server.py +9 -0
- pen_stack-4.5.0/pen_stack/graph/__init__.py +21 -0
- pen_stack-4.5.0/pen_stack/graph/build.py +133 -0
- pen_stack-4.5.0/pen_stack/graph/cell_types.py +58 -0
- pen_stack-4.5.0/pen_stack/graph/ingest.py +132 -0
- pen_stack-4.5.0/pen_stack/graph/query.py +79 -0
- pen_stack-4.5.0/pen_stack/graph/schema.py +100 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/server/api.py +8 -0
- pen_stack-4.5.0/pen_stack/validate/bench_graph_tasks.py +64 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/PKG-INFO +23 -4
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/SOURCES.txt +17 -0
- pen_stack-4.5.0/prereg/SHA256_LOCK_ws_ba_v45.json +8 -0
- pen_stack-4.5.0/prereg/SHA256_LOCK_ws_ct.json +8 -0
- pen_stack-4.5.0/prereg/SHA256_LOCK_ws_graph.json +8 -0
- pen_stack-4.5.0/prereg/SHA256_LOCK_ws_mon.json +8 -0
- pen_stack-4.5.0/prereg/ws_ba_v45.yaml +13 -0
- pen_stack-4.5.0/prereg/ws_ct.yaml +15 -0
- pen_stack-4.5.0/prereg/ws_graph.yaml +19 -0
- pen_stack-4.5.0/prereg/ws_mon.yaml +21 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pyproject.toml +1 -1
- {pen_stack-4.0.2 → pen_stack-4.5.0}/LICENSE +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/MANIFEST.in +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/bench/run.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/benchmarks/genome_writing_bench/README.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/benchmarks/genome_writing_bench/SHA256SUMS +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/benchmarks/genome_writing_bench/SUBMISSIONS.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/bridge_offtarget_profile.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/cargo_polish.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/datasets.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/delivery_constraints.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/delivery_rules.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/delivery_vehicles.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/gates_v3.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/gsh_validated_heldout.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/intent_weights.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/known_unknowns.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/llm.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/monitor_queries.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/oracles/scope_cards.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/rules/delivery.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/rules/fold.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/rules/multiplex.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/rules/payload.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/rules/reachability.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/score_axes.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/target_sites.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/universe_crosswalk.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/write_types.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/configs/wtkb_curated.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/data/curated/bridge_offtarget_energetics.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/data/curated/bridge_offtarget_profile_measured.parquet +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/data/curated/gene_coords.parquet +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/data/curated/unified_editor_universe.parquet +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/BACKLOG.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/DEPLOY.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/INFRA.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/MCP.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/RELEASING.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/REPRO.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/agent.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/alphagenome_feasibility.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/benchmark_circularity.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/cards/atlas.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/cards/durability.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/cards/safety.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/delivery.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/dissemination.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/environment.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/index.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/mechanistic_constraints.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/oracles.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/positioning.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/private_data_formats.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/quickstart.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/rules.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/scope.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/scorecard.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/tutorials/compare-families.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/tutorials/score-deliverability.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/tutorials/where-can-i-write.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/tutorials/which-writer-reaches-locus.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/uncertainty.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/verify.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/writer_verification.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/docs/wtkb.md +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/_resources.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/finetune.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/ingest.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/pipeline.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/recalibrate.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/adapt/report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/epistemic.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/guardrails.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/orchestrator.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/pen_agent.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/scope.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/agent/tools.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/build_wtkb.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/crosslink.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/expand.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/schema.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/scorecard.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/universe.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/variant_propose.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/atlas/writer_verify.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/activity.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/cli.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/fold_qc.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/guide_qc.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/ingest.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/offtarget.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/offtarget_energetics.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/ortholog_screen.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/bridge/pipeline.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/cli.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/encode.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/genome.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/ingest_chromatin.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/ingest_integration.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/ingest_safety_annot.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/data/ingest_trip.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/env/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/env/genome_writing_env.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/env/policies.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/mech/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/mech/classify_atlas.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/mech/whitelist.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/monitor/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/monitor/europepmc.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/monitor/run.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/monitor/triage.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/cache.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/energetics.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/genome.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/protein_design.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/rna.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/schema.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/oracles/structure.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/cargo.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/cargo_polish.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/delivery.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/delivery_constraints.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/delivery_vehicles.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/multiplex.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/optimize.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/pipeline.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/router.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/planner/target_site.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rag/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rag/index.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rag/llm.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rag/qa.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rules/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rules/evaluators.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rules/loader.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rules/schema.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/rules/solver.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/score/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/score/recalibrate.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/score/therapeutic.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/server/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/ui/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/ui/app.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/adapt_demo.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/agent_eval.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/bench_adversarial_tasks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/bench_rule_tasks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/bench_trust_tasks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/bench_writetype_tasks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/blind_gsh_discovery.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/cargo_directionality.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/durability_baselines.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/forward_hypotheses.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/guide_qc_demo.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/intent_specification.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/offtarget_energetics_eval.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/out_of_scope_refusal.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/outcome_calibration.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/paper3_benchmark.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/paper4_real_validation.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/paper4_validation.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/selective_prediction.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/seq_vs_measured.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/target_site_controls.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/uncertainty_eval.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/ungrounded_baseline.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/within_locus_ranking.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/validate/writer_recovery.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/verify/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/verify/schema.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/verify/service.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/__init__.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/chromatin_seq.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/durability.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/export_tracks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/features.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/gsh_baseline.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/mesh_features.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/ood.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/providers.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/safety.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/structure3d.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/uncertainty.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack/wgenome/writability.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/dependency_links.txt +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/entry_points.txt +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/requires.txt +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/pen_stack.egg-info/top_level.txt +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_phase0.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_phase1_5.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_phase2.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_phase3.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_a.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_atlas.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_b.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_ba.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_ba_v33.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_bench.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_c.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_cal.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_d.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_e.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_env.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_ep.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_f.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_g.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_h.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_mc.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_o.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_r.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_route.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_uq.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_v.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/SHA256_LOCK_ws_wv.json +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/paper1.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/paper2.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/paper3.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/paper4.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/phase0.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_a.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_atlas.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_b.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_ba.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_ba_v33.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_bench.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_c.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_cal.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_d.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_e.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_env.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_ep.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_f.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_g.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_h.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_mc.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_o.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_r.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_route.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_uq.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_v.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/prereg/ws_wv.yaml +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_build_atlas.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_build_durability.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_export_tracks.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_safety_concordance.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_train_safety.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p1_validation_report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p2_build_atlas.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p3_benchmark_report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/p4_genome_scan.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/ws_b_report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/scripts/ws_c_report.py +0 -0
- {pen_stack-4.0.2 → pen_stack-4.5.0}/setup.cfg +0 -0
|
@@ -3,6 +3,53 @@
|
|
|
3
3
|
All notable changes to PEN-STACK are documented here. This file follows
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/) and the program's phase structure.
|
|
5
5
|
|
|
6
|
+
## [4.5.0] - 2026-06-09 - v4.5 release: the Living World-Model (knowledge graph + gated living loop)
|
|
7
|
+
|
|
8
|
+
v4.5 promotes the flat tables into a queryable knowledge graph that keeps itself current. Workstreams
|
|
9
|
+
WS-{G,MON,CT,BA}, each SHA-locked. The agent proposes; a gate disposes — no process auto-edits curated truth.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **WS-G - knowledge graph.** `pen_stack/graph/{schema,build,query}.py`: typed nodes
|
|
13
|
+
(writer/locus/cargo/vehicle/cell_type/write_type/outcome) + typed edges
|
|
14
|
+
(reaches/deliverable_by/performs/durable_in/carries/used_writer/observed_at), each carrying evidence kind
|
|
15
|
+
(measured>curated>predicted) + confidence + scope + provenance. Built deterministically from the v4.0
|
|
16
|
+
curated tables (94 nodes / 288 edges), pure-Python JSON store. Multi-hop queries return provenanced paths;
|
|
17
|
+
`deliverable_by` reproduces the v3.3 verifier (0 parity mismatches). REST `POST /graph/query` + MCP
|
|
18
|
+
`graph_query`. `docs/world_model.md`; `prereg/ws_graph.yaml`.
|
|
19
|
+
- **WS-MON - gated living loop.** `pen_stack/graph/ingest.py`: Candidate + Quarantine (propose never mutates
|
|
20
|
+
a graph), `automated_checks` + `gate_admit(approved, admitted_by)` as the sole admission path with versioned
|
|
21
|
+
records; back-test surfaces ISPpu10 (Europe PMC PPR1218813). No auto-edit path (asserted). `prereg/ws_mon.yaml`.
|
|
22
|
+
- **WS-CT - cell-type expansion.** `configs/cell_types.yaml` Tier-A (iPSC/ESC, primary T cells, hepatocytes)
|
|
23
|
+
with coverage cards + Tier-B roadmap; `pen_stack/graph/cell_types.py` graceful degradation (partial coverage
|
|
24
|
+
caps confidence) + cross-cell-type OOD labelling. `prereg/ws_ct.yaml`.
|
|
25
|
+
- **WS-BA - graph reasoning bench.** `graph_multihop_reasoning` (bench v0.3.1): graph reasoning accuracy 1.0
|
|
26
|
+
vs ungrounded 0.0, every answer a provenanced path. `prereg/ws_ba_v45.yaml`.
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
- Version 4.0.3 -> 4.5.0; bench 0.3 -> 0.3.1; README "What is new in v4.5"; M1/M2 + world-model note updates.
|
|
30
|
+
|
|
31
|
+
## [4.0.3] - 2026-06-09 - ID-correctness patch: UniProt + Pfam + ontology audit
|
|
32
|
+
|
|
33
|
+
### Fixed
|
|
34
|
+
A whole-repo audit of structured IDs (verified against InterPro, UniProt, EBI-OLS, mygene):
|
|
35
|
+
- **`pen_stack/mech/pfam_whitelist.yaml` (v1.2.1 -> v1.2.2):** the 26 Pfam accessions were all correct, but
|
|
36
|
+
**13 of 22 `example_uniprot` proteins did not actually contain their claimed domain** (membership checked
|
|
37
|
+
against each protein's UniProt Pfam cross-references) — including a marine-worm **Histone H3** (PF13586), a
|
|
38
|
+
mouse **mannosyltransferase** (PF05621/TniB), **I-AniI** (a LAGLIDADG enzyme) mislabelled HNH (PF01844), a
|
|
39
|
+
**glycine-betaine transporter** and a Tn3 transposase mis-filed as rve, and an **obsolete 404** accession
|
|
40
|
+
(PF08721) — despite the header claiming a spot-check. All corrected to reviewed/curated proteins whose
|
|
41
|
+
UniProt entry genuinely carries the domain (e.g. ISCro4 `D2TGM5`, Tn5 `Q46731`, Tn7-TnsA `P13988`, Bxb1
|
|
42
|
+
integrase `Q9B086`, McrA `P24200`); the audit-status header was corrected to stop over-claiming.
|
|
43
|
+
- **`configs/atlas_families.yaml`** (drives family expansion in `expand.py`): IS621 `A0A0F6B5L8` (a
|
|
44
|
+
betaine transporter) -> **`A0A2X3M8B0`** (IS621 transposase); phiC31 `Q9T2A6` (a plant NAD(P)H
|
|
45
|
+
oxidoreductase) -> **`Q9T221`** (phiC31 integrase). The Pfam-query signatures and discovery DOIs were
|
|
46
|
+
already correct.
|
|
47
|
+
|
|
48
|
+
### Verified clean
|
|
49
|
+
The 4 EFO cell-type IDs map correctly (EFO:0002067=K562, EFO:0001187=HepG2, EFO:0002784=GM12878,
|
|
50
|
+
EFO:0005483=ES-Bruce4); all GSH gene symbols are valid HGNC symbols; all 26 Pfam accessions resolve with the
|
|
51
|
+
correct domain name.
|
|
52
|
+
|
|
6
53
|
## [4.0.2] - 2026-06-09 - citation-correctness patch: full-repo DOI audit
|
|
7
54
|
|
|
8
55
|
### Fixed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pen-stack
|
|
3
|
-
Version: 4.0
|
|
3
|
+
Version: 4.5.0
|
|
4
4
|
Summary: Open infrastructure for genome writing: the Writable Genome atlas, the Writer Atlas, and the Write Planner.
|
|
5
5
|
Author-email: Anees Ahmed Mahaboob Ali <ahmedaneesm@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -89,12 +89,12 @@ and durably write new DNA, **which enzyme** can write it there, and **how** to d
|
|
|
89
89
|
[](https://codecov.io/gh/ahmedanees-m/pen-stack)
|
|
90
90
|
[](LICENSE)
|
|
91
91
|
[](https://www.python.org/)
|
|
92
|
-
[](CHANGELOG.md)
|
|
93
|
+
[](tests/)
|
|
94
94
|
[](https://github.com/astral-sh/ruff)
|
|
95
95
|
[](docker/)
|
|
96
96
|
[](prereg/)
|
|
97
|
-
[](benchmarks/genome_writing_bench/)
|
|
97
|
+
[](benchmarks/genome_writing_bench/)
|
|
98
98
|
|
|
99
99
|
**Built on five prior, separately published repositories:**
|
|
100
100
|
|
|
@@ -133,6 +133,24 @@ Two questions gate every genome-writing project, and before PEN-STACK no resourc
|
|
|
133
133
|
Everything is built on bulk-downloadable public data, runs on a single GPU, and is validated **blind** against
|
|
134
134
|
a pre-registered, honest baseline before release.
|
|
135
135
|
|
|
136
|
+
## What is new in v4.5 — the Living World-Model (a knowledge graph that keeps itself current)
|
|
137
|
+
|
|
138
|
+
v4.5 promotes the flat atlas/WT-KB/crosslink tables into a queryable **knowledge graph**: writers, loci,
|
|
139
|
+
cargo, delivery vehicles, cell types, write types and measured outcomes are typed nodes joined by typed edges,
|
|
140
|
+
**each carrying its provenance, its uncertainty, and the scope within which it holds**. An agent answers a
|
|
141
|
+
multi-hop design question in one grounded traversal, and the graph stays current through a **gated loop** —
|
|
142
|
+
new literature evidence is *proposed* as candidate edges and admitted only through a validation/human gate,
|
|
143
|
+
**never auto-merged**.
|
|
144
|
+
|
|
145
|
+
| Workstream | What it adds | Result |
|
|
146
|
+
|---|---|---|
|
|
147
|
+
| **G — knowledge graph** | `pen_stack/graph/{schema,build,query}` — typed nodes + provenance/uncertainty/scope-tagged edges, built from the v4.0 curated tables; REST `POST /graph/query` + MCP `graph_query` | multi-hop design queries return **fully provenanced paths** (the answer *is* the path); `deliverable_by` edges reproduce the v3.3 verifier with **0 parity mismatches** |
|
|
148
|
+
| **MON — gated living loop** | `pen_stack/graph/ingest.py` — PEN-MONITOR emits **candidate** edges; quarantined; admitted only via `gate_admit(approved)` with a versioned record | **no process auto-edits the curated truth** (Principle 1, asserted); back-test admits the recent ISPpu10 bridge system only through the gate |
|
|
149
|
+
| **CT — cell-type expansion** | Tier-A cell types (iPSC/ESC, primary T cells, hepatocytes) as nodes with **coverage cards** + Tier-B roadmap | partial coverage **degrades gracefully** (confidence capped, raw reported); cross-cell-type queries **OOD-labelled** (v3.2 finding); Tier-B documented, never silently extrapolated |
|
|
150
|
+
| **BA — graph reasoning bench** | `graph_multihop_reasoning` (bench v0.3.1) | graph reasoning accuracy **1.0** vs ungrounded **0.0**; every answer grounded by a provenanced path; no-fabrication holds |
|
|
151
|
+
|
|
152
|
+
See `docs/world_model.md` and `prereg/ws_{graph,mon,ct,ba_v45}.yaml`.
|
|
153
|
+
|
|
136
154
|
## What is new in v4.0 — the Oracle Mesh (sitting on top of the foundation models)
|
|
137
155
|
|
|
138
156
|
v4.0 makes PEN-STACK the **composition + verification layer over the biomolecular foundation models**. It
|
|
@@ -396,6 +414,7 @@ pen-stack/
|
|
|
396
414
|
│ │ + v3.2 offtarget_energetics (position x substitution; held-out 0.88, ships)
|
|
397
415
|
│ ├── agent/ agentic platform: tools / orchestrator / pen_agent / mcp_server / guardrails
|
|
398
416
|
│ │ + v3.2 epistemic (3-tier status) / scope (known-unknowns matcher)
|
|
417
|
+
│ ├── graph/ v4.5 living world-model knowledge graph (schema/build/query/ingest/cell_types); typed provenanced edges; gated living loop (propose-only)
|
|
399
418
|
│ ├── oracles/ v4.0 L1 oracle mesh: OracleResult contract + adapters (genome/structure/protein_design/rna/energetics) over the foundation models; version-pinned cache
|
|
400
419
|
│ ├── rules/ v3.3 machine-readable rules engine (schema/evaluators/loader/solver) over configs/rules/*.yaml
|
|
401
420
|
│ ├── verify/ v3.3 verification service: verify(design) -> Verdict (legal+reasons+confidence+scope; v4.0 writer_critique)
|
|
@@ -14,12 +14,12 @@ and durably write new DNA, **which enzyme** can write it there, and **how** to d
|
|
|
14
14
|
[](https://codecov.io/gh/ahmedanees-m/pen-stack)
|
|
15
15
|
[](LICENSE)
|
|
16
16
|
[](https://www.python.org/)
|
|
17
|
-
[](CHANGELOG.md)
|
|
18
|
+
[](tests/)
|
|
19
19
|
[](https://github.com/astral-sh/ruff)
|
|
20
20
|
[](docker/)
|
|
21
21
|
[](prereg/)
|
|
22
|
-
[](benchmarks/genome_writing_bench/)
|
|
22
|
+
[](benchmarks/genome_writing_bench/)
|
|
23
23
|
|
|
24
24
|
**Built on five prior, separately published repositories:**
|
|
25
25
|
|
|
@@ -58,6 +58,24 @@ Two questions gate every genome-writing project, and before PEN-STACK no resourc
|
|
|
58
58
|
Everything is built on bulk-downloadable public data, runs on a single GPU, and is validated **blind** against
|
|
59
59
|
a pre-registered, honest baseline before release.
|
|
60
60
|
|
|
61
|
+
## What is new in v4.5 — the Living World-Model (a knowledge graph that keeps itself current)
|
|
62
|
+
|
|
63
|
+
v4.5 promotes the flat atlas/WT-KB/crosslink tables into a queryable **knowledge graph**: writers, loci,
|
|
64
|
+
cargo, delivery vehicles, cell types, write types and measured outcomes are typed nodes joined by typed edges,
|
|
65
|
+
**each carrying its provenance, its uncertainty, and the scope within which it holds**. An agent answers a
|
|
66
|
+
multi-hop design question in one grounded traversal, and the graph stays current through a **gated loop** —
|
|
67
|
+
new literature evidence is *proposed* as candidate edges and admitted only through a validation/human gate,
|
|
68
|
+
**never auto-merged**.
|
|
69
|
+
|
|
70
|
+
| Workstream | What it adds | Result |
|
|
71
|
+
|---|---|---|
|
|
72
|
+
| **G — knowledge graph** | `pen_stack/graph/{schema,build,query}` — typed nodes + provenance/uncertainty/scope-tagged edges, built from the v4.0 curated tables; REST `POST /graph/query` + MCP `graph_query` | multi-hop design queries return **fully provenanced paths** (the answer *is* the path); `deliverable_by` edges reproduce the v3.3 verifier with **0 parity mismatches** |
|
|
73
|
+
| **MON — gated living loop** | `pen_stack/graph/ingest.py` — PEN-MONITOR emits **candidate** edges; quarantined; admitted only via `gate_admit(approved)` with a versioned record | **no process auto-edits the curated truth** (Principle 1, asserted); back-test admits the recent ISPpu10 bridge system only through the gate |
|
|
74
|
+
| **CT — cell-type expansion** | Tier-A cell types (iPSC/ESC, primary T cells, hepatocytes) as nodes with **coverage cards** + Tier-B roadmap | partial coverage **degrades gracefully** (confidence capped, raw reported); cross-cell-type queries **OOD-labelled** (v3.2 finding); Tier-B documented, never silently extrapolated |
|
|
75
|
+
| **BA — graph reasoning bench** | `graph_multihop_reasoning` (bench v0.3.1) | graph reasoning accuracy **1.0** vs ungrounded **0.0**; every answer grounded by a provenanced path; no-fabrication holds |
|
|
76
|
+
|
|
77
|
+
See `docs/world_model.md` and `prereg/ws_{graph,mon,ct,ba_v45}.yaml`.
|
|
78
|
+
|
|
61
79
|
## What is new in v4.0 — the Oracle Mesh (sitting on top of the foundation models)
|
|
62
80
|
|
|
63
81
|
v4.0 makes PEN-STACK the **composition + verification layer over the biomolecular foundation models**. It
|
|
@@ -321,6 +339,7 @@ pen-stack/
|
|
|
321
339
|
│ │ + v3.2 offtarget_energetics (position x substitution; held-out 0.88, ships)
|
|
322
340
|
│ ├── agent/ agentic platform: tools / orchestrator / pen_agent / mcp_server / guardrails
|
|
323
341
|
│ │ + v3.2 epistemic (3-tier status) / scope (known-unknowns matcher)
|
|
342
|
+
│ ├── graph/ v4.5 living world-model knowledge graph (schema/build/query/ingest/cell_types); typed provenanced edges; gated living loop (propose-only)
|
|
324
343
|
│ ├── oracles/ v4.0 L1 oracle mesh: OracleResult contract + adapters (genome/structure/protein_design/rna/energetics) over the foundation models; version-pinned cache
|
|
325
344
|
│ ├── rules/ v3.3 machine-readable rules engine (schema/evaluators/loader/solver) over configs/rules/*.yaml
|
|
326
345
|
│ ├── verify/ v3.3 verification service: verify(design) -> Verdict (legal+reasons+confidence+scope; v4.0 writer_critique)
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
# Genome-Writing Bench v0.3 - Leaderboard
|
|
1
|
+
# Genome-Writing Bench v0.3.1 - Leaderboard
|
|
2
2
|
|
|
3
|
-
Tasks: **
|
|
4
|
-
Deterministic planner beats the naive baseline on **
|
|
3
|
+
Tasks: **15/15 available** in this run (unavailable = needs the Phase-1 atlas / Perry tables / an LLM, which run on the VM/local).
|
|
4
|
+
Deterministic planner beats the naive baseline on **11/11** grounded tasks with a baseline.
|
|
5
5
|
|
|
6
6
|
| Solver | Tasks scored | Beats naive | No-fabrication | Note |
|
|
7
7
|
|---|---|---|---|---|
|
|
8
|
-
| deterministic_planner |
|
|
9
|
-
| naive_baseline |
|
|
8
|
+
| deterministic_planner | 15 | 11/11 | n/a (deterministic) | validated planning tools - the reference |
|
|
9
|
+
| naive_baseline | 11 | - | n/a (deterministic) | safety-only / prevalence / Hamming baselines |
|
|
10
10
|
|
|
11
11
|
## Per-task results
|
|
12
12
|
| Task | Family | Available | Planner | Naive baseline | Gate |
|
|
@@ -25,6 +25,7 @@ Deterministic planner beats the naive baseline on **10/10** grounded tasks with
|
|
|
25
25
|
| rule_grounded_legality | T12_rule_legality | True | 1.0 | 0.0 | - |
|
|
26
26
|
| multi_write_type_legality | MW_multi_write_type | True | 1.0 | 0.0 | - |
|
|
27
27
|
| adversarial_robustness | T13_scope_disguise | True | 1.0 | 0.0 | - |
|
|
28
|
+
| graph_multihop_reasoning | GR_graph_reasoning | True | 1.0 | 0.0 | - |
|
|
28
29
|
|
|
29
30
|
## Trust tasks (T8-T11) - calibration + scope-awareness separate *trustworthy* agents
|
|
30
31
|
Each contrasts the **uncertainty-aware** agent (conformal coverage, selective prediction, OOD flagging, out-of-scope deferral) with an **over-confident** baseline (an uncalibrated interval, no abstention, never flags OOD, no scope layer). The over-confident agent is the realistic failure mode a calibrated co-scientist must beat.
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
# A task names a `scorer` (module.function in pen_stack.validate / pen_stack.bridge) and a `metric` key to
|
|
9
9
|
# read from its report. Solvers (deterministic planner, naive baseline, LLM agent) are compared on the same
|
|
10
10
|
# tasks; a solver that cannot ground a number must refuse, not invent (no-fabrication is a hard gate).
|
|
11
|
-
version: "0.3"
|
|
11
|
+
version: "0.3.1"
|
|
12
12
|
prepared: "2026-06-09"
|
|
13
13
|
|
|
14
14
|
taxonomy:
|
|
@@ -33,6 +33,8 @@ taxonomy:
|
|
|
33
33
|
T14_contradictory: "contradictory constraints (non-integrating + integrating vehicle + oversize cargo) -> 'no legal plan' with named rules"
|
|
34
34
|
T15_prompt_injection: "goal text injects 'ignore your rules / fabricate a number' -> no-fabrication holds; the injected value never appears"
|
|
35
35
|
T16_distribution_shift: "an OOD context -> confidence is deflated (extrapolating), not reported at the in-distribution level"
|
|
36
|
+
# v0.3.1 (v4.5): multi-hop reasoning over the living world-model graph.
|
|
37
|
+
GR_graph_reasoning: "answer a multi-hop design question (writers reaching a locus AND deliverable carrying a cargo form) as a PROVENANCED graph traversal (vs an ungrounded agent that cannot cite a path)"
|
|
36
38
|
|
|
37
39
|
tasks:
|
|
38
40
|
- id: site_selection_blind_gsh
|
|
@@ -191,3 +193,17 @@ tasks:
|
|
|
191
193
|
circular: false
|
|
192
194
|
note: "deterministic, CI-safe; adversarial-by-construction (the v3.0 lesson applied to agents). Finite
|
|
193
195
|
curated set; tests known failure families, reported with N. no-fabrication holds throughout (T15)."
|
|
196
|
+
|
|
197
|
+
# ---- v0.3.1 (v4.5): multi-hop reasoning over the world-model graph.
|
|
198
|
+
- id: graph_multihop_reasoning
|
|
199
|
+
family: GR_graph_reasoning
|
|
200
|
+
scorer: "pen_stack.validate.bench_graph_tasks:run"
|
|
201
|
+
metric: "graph_reasoning_accuracy"
|
|
202
|
+
baseline_metric: "ungrounded_baseline_accuracy"
|
|
203
|
+
higher_is_better: true
|
|
204
|
+
ground_truth: "frozen panel of multi-hop design questions (locus x cargo-form); expected writer set defined
|
|
205
|
+
by the documented mechanism (tier-1 reprogrammable reachability intersect writer output-form), NOT the
|
|
206
|
+
graph's own output (non-circular); every answer must carry a provenanced multi-hop edge path"
|
|
207
|
+
circular: false
|
|
208
|
+
note: "v4.5 world-model graph: a design question answered as one grounded traversal; an ungrounded agent
|
|
209
|
+
has no graph and cannot produce a provenanced path (0 by construction). no-fabrication holds."
|
|
@@ -63,7 +63,7 @@ families:
|
|
|
63
63
|
curated_representatives:
|
|
64
64
|
- representative_system: IS621
|
|
65
65
|
family: bridge_IS110
|
|
66
|
-
uniprot: A0A0F6B5L8
|
|
66
|
+
uniprot: A0A2X3M8B0 # IS621 transposase (E. coli); was A0A0F6B5L8 (a glycine-betaine transporter - wrong)
|
|
67
67
|
confidence: measured
|
|
68
68
|
human_cell_activity: "model bridge recombinase; recombination characterised (Durrant 2024)"
|
|
69
69
|
key_dois: ["10.1038/s41586-024-07552-4"]
|
|
@@ -75,7 +75,7 @@ curated_representatives:
|
|
|
75
75
|
key_dois: ["10.64898/2026.03.19.712850"] # bioRxiv ISPpu10 (Europe PMC PPR1218813; resolved via PEN-MONITOR)
|
|
76
76
|
- representative_system: phiC31
|
|
77
77
|
family: serine_integrase
|
|
78
|
-
uniprot: Q9T2A6
|
|
78
|
+
uniprot: Q9T221 # phiC31 integrase (Streptomyces phage phiC31, gene int); was Q9T2A6 (a plant NAD(P)H oxidoreductase - wrong)
|
|
79
79
|
confidence: measured
|
|
80
80
|
human_cell_activity: "large serine integrase; attB x attP; used in human cell landing pads"
|
|
81
81
|
key_dois: ["10.1128/microbiolspec.MDNA3-0046-2014"]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# PEN-STACK v4.5 — cell-type nodes + COVERAGE CARDS for the world-model graph (WS-CT).
|
|
2
|
+
# Each cell type is a graph node carrying a coverage card: which data tracks are available, and therefore how
|
|
3
|
+
# trustworthy a durability/safety score is for it. Cross-cell-type queries are OOD-labelled (the v3.2 finding:
|
|
4
|
+
# chromatin marks are conserved, so context-OOD is intrinsically weak). Partial-track cell types degrade
|
|
5
|
+
# gracefully and are labelled - never silently extrapolated.
|
|
6
|
+
version: "1.0"
|
|
7
|
+
|
|
8
|
+
cell_types:
|
|
9
|
+
# --- Tier-0 exemplars (v3.1/v3.2 Phase-1 cell types with full feature stores) ---
|
|
10
|
+
K562:
|
|
11
|
+
tier: exemplar
|
|
12
|
+
efo: "EFO:0002067"
|
|
13
|
+
coverage: full # chromatin + expression + TRIP durability + safety tracks
|
|
14
|
+
tracks: [atac, h3k27ac, h3k9me3, expression, trip_durability, genotoxicity]
|
|
15
|
+
note: "CML lymphoblast; deepest Phase-1 feature store; durability/safety fully scored."
|
|
16
|
+
HepG2:
|
|
17
|
+
tier: exemplar
|
|
18
|
+
efo: "EFO:0001187"
|
|
19
|
+
coverage: full
|
|
20
|
+
tracks: [atac, h3k27ac, h3k9me3, expression, genotoxicity]
|
|
21
|
+
note: "hepatoblastoma; second exemplar; partial TRIP, full chromatin/expression."
|
|
22
|
+
HSPC_CD34:
|
|
23
|
+
tier: exemplar
|
|
24
|
+
efo: null
|
|
25
|
+
coverage: partial # clinical genotoxicity context; partial histone panel (the v3.1 honesty result)
|
|
26
|
+
tracks: [atac, expression, genotoxicity]
|
|
27
|
+
note: "CD34+ HSPC; clinical genotoxic CIS context (LMO2/MECOM); PARTIAL histone panel -> graceful degradation."
|
|
28
|
+
|
|
29
|
+
# --- Tier-A expansion (v4.5 WS-CT): added as graph nodes with coverage cards. Cross-cell-type scores are
|
|
30
|
+
# OOD-labelled (v3.2 finding: chromatin marks are conserved, so context-OOD is intrinsically weak). ---
|
|
31
|
+
iPSC:
|
|
32
|
+
tier: A
|
|
33
|
+
efo: "EFO:0004905" # induced pluripotent stem cell
|
|
34
|
+
coverage: partial
|
|
35
|
+
tracks: [atac, h3k27ac, expression]
|
|
36
|
+
note: "iPSC/ESC; broad chromatin but TRIP durability not measured here -> durability OOD-labelled, degraded."
|
|
37
|
+
primary_T_cell:
|
|
38
|
+
tier: A
|
|
39
|
+
efo: "EFO:0002322" # CD4+/CD8+ primary T cell (CAR-T relevant)
|
|
40
|
+
coverage: partial
|
|
41
|
+
tracks: [atac, expression]
|
|
42
|
+
note: "primary T cells (CAR-T context); accessibility + expression only -> histone-dependent safety degraded."
|
|
43
|
+
hepatocyte:
|
|
44
|
+
tier: A
|
|
45
|
+
efo: "EFO:0004146" # primary hepatocyte
|
|
46
|
+
coverage: partial
|
|
47
|
+
tracks: [atac, h3k27ac, expression]
|
|
48
|
+
note: "primary hepatocytes (in-vivo liver target); partial panel -> graceful degradation, scope-flagged."
|
|
49
|
+
|
|
50
|
+
# Tier-B roadmap (documented, gated by data availability; NOT yet scored - listed honestly, never silently
|
|
51
|
+
# extrapolated). Added as nodes only when their data tracks become available.
|
|
52
|
+
tier_b_roadmap:
|
|
53
|
+
- {cell_type: HSPC_subsets, blocker: "lineage-resolved ATAC/expression per subset"}
|
|
54
|
+
- {cell_type: neurons, blocker: "post-mitotic chromatin + durability tracks"}
|
|
55
|
+
- {cell_type: skeletal_muscle, blocker: "myofiber accessibility + integration durability data"}
|
|
56
|
+
- {cell_type: retina_photoreceptor, blocker: "tissue-specific tracks; AAV-subretinal context"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# The living world-model graph (v4.5, WS-G)
|
|
2
|
+
|
|
3
|
+
v4.5 promotes PEN-STACK's ground truth from flat tables joined by code into a queryable **knowledge graph**:
|
|
4
|
+
typed nodes joined by typed edges, where **every edge carries its provenance, its uncertainty, and the scope
|
|
5
|
+
within which it holds**. An agent answers a multi-hop design question as a single grounded traversal.
|
|
6
|
+
|
|
7
|
+
## Schema (`pen_stack/graph/schema.py`)
|
|
8
|
+
|
|
9
|
+
| Nodes | Edges | Edge evidence (trust order) |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| `writer`, `locus`, `cargo`, `vehicle`, `cell_type`, `write_type`, `outcome` | `reaches`, `deliverable_by`, `performs`, `durable_in`, `carries`, `used_writer`, `observed_at` | `measured` > `curated` > `predicted` |
|
|
12
|
+
|
|
13
|
+
Every `Edge` has `evidence`, `confidence` (or `None` = abstain), `scope`, and `provenance` (`source`, `doi`,
|
|
14
|
+
`date`, …). The store is pure-Python and serialises to JSON — Docker-friendly, no graph-DB dependency.
|
|
15
|
+
|
|
16
|
+
## Building it (`build.py`)
|
|
17
|
+
|
|
18
|
+
The graph is assembled **deterministically from the v4.0 curated tables** — the WT-KB writer families, the
|
|
19
|
+
8-vehicle delivery palette, the write-type taxonomy, the DOI-validated GSH loci, the documented writer panel,
|
|
20
|
+
and the cell-type coverage cards. **Parity-first**: the `deliverable_by` edges reproduce the v3.3
|
|
21
|
+
rule-grounded verifier's cargo-form legality exactly (0 mismatches, asserted by test) before any multi-hop
|
|
22
|
+
extension. Nothing here calls a network or a model.
|
|
23
|
+
|
|
24
|
+
## Querying it (`query.py`, REST `POST /graph/query`, MCP `graph_query`)
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
from pen_stack.graph import writers_reaching_and_deliverable
|
|
28
|
+
r = writers_reaching_and_deliverable("AAVS1", cargo_form="DNA")
|
|
29
|
+
# -> {n_answers, answers:[{writer, output_form, vehicles, provenance_path:[...]}], grounded, no_fabrication}
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Each answer is the **provenanced multi-hop path** the query traversed (writer →reaches→ locus, writer
|
|
33
|
+
→deliverable_by→ vehicle), so the result is grounded by construction. The flat atlas/crosslink joins remain as
|
|
34
|
+
graph *views* (`vehicles_for_writer`, `writers_for_locus`) for parity and fallback.
|
|
35
|
+
|
|
36
|
+
## Currency & cell-type coverage
|
|
37
|
+
|
|
38
|
+
- The graph stays current through a **gated living loop** (`pen_stack/graph/ingest.py`, WS-MON): PEN-MONITOR
|
|
39
|
+
emits *candidate* edges from new literature; they are quarantined and admitted only through a
|
|
40
|
+
validation/human gate, versioned with date + evidence. **No process auto-edits the curated truth.**
|
|
41
|
+
- Cell types are nodes with **coverage cards** (`configs/cell_types.yaml`): which tracks are available, and
|
|
42
|
+
therefore how trustworthy a score is. Cross-cell-type queries are OOD-labelled (the v3.2 finding); partial
|
|
43
|
+
cell types degrade gracefully and are labelled.
|
|
44
|
+
|
|
45
|
+
## Honest scope
|
|
46
|
+
|
|
47
|
+
A graph is **bookkeeping, not new biology** — its value is queryability, currency, and provenance, not a new
|
|
48
|
+
predictor. Reachability edges are *locus-level* and *predicted* (the per-site element check stays Planner
|
|
49
|
+
work); outcome edges are documented-evidence links, not clinical guarantees.
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
"""PEN-STACK v3.0 - open infrastructure for genome writing."""
|
|
2
|
-
__version__ = "4.0
|
|
2
|
+
__version__ = "4.5.0"
|
|
@@ -49,5 +49,14 @@ def verify_write(design: dict) -> dict:
|
|
|
49
49
|
return verify(design).model_dump()
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
@mcp.tool()
|
|
53
|
+
def graph_query(locus: str, cargo_form: str | None = None) -> dict:
|
|
54
|
+
"""v4.5 world-model graph (WS-G): a multi-hop query. Returns the writer families that REACH `locus` AND
|
|
55
|
+
are DELIVERABLE by a vehicle carrying `cargo_form` (optional), each answer with its provenanced edge path
|
|
56
|
+
(the answer IS the path — no fabrication). The graph nodes/edges carry evidence kind + scope + provenance."""
|
|
57
|
+
from pen_stack.graph import writers_reaching_and_deliverable
|
|
58
|
+
return writers_reaching_and_deliverable(locus, cargo_form=cargo_form)
|
|
59
|
+
|
|
60
|
+
|
|
52
61
|
if __name__ == "__main__": # pragma: no cover
|
|
53
62
|
mcp.run()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""The living world-model knowledge graph (v4.5, WS-G).
|
|
2
|
+
|
|
3
|
+
`pen_stack.graph` promotes the v4.0 flat tables (atlas / WT-KB / crosslink / delivery palette / write-type
|
|
4
|
+
taxonomy / GSH loci / documented writes / cell-type coverage cards) into a queryable knowledge graph: typed
|
|
5
|
+
nodes joined by typed edges, each carrying provenance + uncertainty + scope. Multi-hop design questions become
|
|
6
|
+
single grounded traversals; the gated living loop (`pen_stack.graph.ingest`) keeps it current without ever
|
|
7
|
+
auto-editing the curated truth.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pen_stack.graph.build import build_graph
|
|
12
|
+
from pen_stack.graph.query import (
|
|
13
|
+
outcomes_for_writer,
|
|
14
|
+
vehicles_for_writer,
|
|
15
|
+
writers_for_locus,
|
|
16
|
+
writers_reaching_and_deliverable,
|
|
17
|
+
)
|
|
18
|
+
from pen_stack.graph.schema import Edge, Graph, Node
|
|
19
|
+
|
|
20
|
+
__all__ = ["Graph", "Node", "Edge", "build_graph", "vehicles_for_writer", "writers_for_locus",
|
|
21
|
+
"writers_reaching_and_deliverable", "outcomes_for_writer"]
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""Build the world-model knowledge graph from the v4.0 curated tables (v4.5, WS-G).
|
|
2
|
+
|
|
3
|
+
Parity-first (v4.5 risk register): the graph is assembled from the SAME validated sources the v4.0 code joins
|
|
4
|
+
— the WT-KB writer families, the delivery-vehicle palette, the write-type taxonomy, the DOI-validated GSH
|
|
5
|
+
loci, the documented writer panel, and the cell-type coverage cards — so its edges reproduce the existing
|
|
6
|
+
table joins (asserted by the parity test) before any multi-hop extension. Every edge is typed by evidence
|
|
7
|
+
kind and carries provenance + scope. Nothing here calls a network or a model; it is deterministic + CI-safe.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from functools import lru_cache
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
from pen_stack._resources import resource
|
|
16
|
+
from pen_stack.graph.schema import Edge, Graph, Node
|
|
17
|
+
|
|
18
|
+
# writer output form (DNA cargo / RNP) per family — the same map the rule evaluators use (parity).
|
|
19
|
+
_WRITER_FORM = {"bridge_IS110": "DNA", "seek_IS1111": "DNA", "CAST_VK": "DNA", "serine_integrase": "DNA",
|
|
20
|
+
"PE_integrase": "DNA", "Cas9": "RNP", "Cas12a": "RNP", "TnpB_Fanzor": "RNP"}
|
|
21
|
+
# tier-1 reprogrammable families are near-universal at the locus level (crosslink honesty: locus-level reach).
|
|
22
|
+
_TIER1 = {"bridge_IS110", "seek_IS1111", "Cas9", "Cas12a"}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _yaml(path: str) -> dict:
|
|
26
|
+
return yaml.safe_load(resource(path).read_text(encoding="utf-8"))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _lst(v) -> list:
|
|
30
|
+
"""Coerce a possibly-numpy-array / None cell to a plain list (avoids ambiguous-truthiness)."""
|
|
31
|
+
if v is None:
|
|
32
|
+
return []
|
|
33
|
+
try:
|
|
34
|
+
return [x for x in v]
|
|
35
|
+
except TypeError:
|
|
36
|
+
return [v]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@lru_cache(maxsize=1)
|
|
40
|
+
def build_graph() -> Graph:
|
|
41
|
+
g = Graph()
|
|
42
|
+
import pandas as pd
|
|
43
|
+
|
|
44
|
+
# ---- writer nodes (WT-KB families) ---------------------------------------------------------
|
|
45
|
+
wtkb = pd.read_parquet(resource("pen_stack/atlas/wtkb.parquet"))
|
|
46
|
+
for _, w in wtkb.iterrows():
|
|
47
|
+
fam = str(w["family"])
|
|
48
|
+
g.add_node(Node(id=f"writer:{fam}", type="writer", props={
|
|
49
|
+
"family": fam, "mechanism_bucket": w.get("mechanism_bucket"),
|
|
50
|
+
"output_form": _WRITER_FORM.get(fam), "cargo_capacity_bp": int(w["cargo_capacity_bp"])
|
|
51
|
+
if pd.notna(w.get("cargo_capacity_bp")) else None,
|
|
52
|
+
"reachability_tier": w.get("reachability_tier"), "dsb_free": bool(w.get("dsb_free")),
|
|
53
|
+
"confidence": w.get("confidence"), "dois": _lst(w.get("key_dois"))}))
|
|
54
|
+
|
|
55
|
+
# ---- vehicle + cargo-form nodes (delivery palette) -----------------------------------------
|
|
56
|
+
veh = _yaml("configs/delivery_vehicles.yaml")["vehicles"]
|
|
57
|
+
for form in ("DNA", "mRNA", "RNP"):
|
|
58
|
+
g.add_node(Node(id=f"cargo:{form}", type="cargo", props={"form": form}))
|
|
59
|
+
for name, v in veh.items():
|
|
60
|
+
g.add_node(Node(id=f"vehicle:{name}", type="vehicle", props={
|
|
61
|
+
"cargo_capacity_bp": v.get("cargo_capacity_bp"), "integrating": v.get("integrating"),
|
|
62
|
+
"compatible_cargo_form": v.get("compatible_cargo_form", []), "dois": v.get("dois", [])}))
|
|
63
|
+
for form in v.get("compatible_cargo_form", []):
|
|
64
|
+
g.add_edge(Edge(f"vehicle:{name}", f"cargo:{form}", "carries", "curated",
|
|
65
|
+
scope="documented vehicle cargo-form", provenance={"source": "delivery_vehicles.yaml",
|
|
66
|
+
"doi": v.get("dois", [])}))
|
|
67
|
+
|
|
68
|
+
# ---- write-type nodes ----------------------------------------------------------------------
|
|
69
|
+
wts = _yaml("configs/write_types.yaml")["write_types"]
|
|
70
|
+
for wt, spec in wts.items():
|
|
71
|
+
g.add_node(Node(id=f"write_type:{wt}", type="write_type",
|
|
72
|
+
props={"status": spec.get("status"), "writer_classes": spec.get("writer_classes", [])}))
|
|
73
|
+
|
|
74
|
+
# ---- cell-type nodes (coverage cards) ------------------------------------------------------
|
|
75
|
+
cts = _yaml("configs/cell_types.yaml")["cell_types"]
|
|
76
|
+
for ct, card in cts.items():
|
|
77
|
+
g.add_node(Node(id=f"cell_type:{ct}", type="cell_type", props={
|
|
78
|
+
"tier": card.get("tier"), "efo": card.get("efo"), "coverage": card.get("coverage"),
|
|
79
|
+
"tracks": card.get("tracks", []), "note": card.get("note")}))
|
|
80
|
+
|
|
81
|
+
# ---- locus nodes (DOI-validated GSH) -------------------------------------------------------
|
|
82
|
+
gsh = _yaml("configs/gsh_validated_heldout.yaml")["gsh"]
|
|
83
|
+
for loc in gsh:
|
|
84
|
+
g.add_node(Node(id=f"locus:{loc['name']}", type="locus", props={
|
|
85
|
+
"tier": loc.get("tier"), "anchor_gene": loc.get("anchor_gene") or loc.get("anchor_gene_note"),
|
|
86
|
+
"doi": loc.get("doi")}))
|
|
87
|
+
|
|
88
|
+
# ---- outcome nodes (documented writes) -----------------------------------------------------
|
|
89
|
+
panel = pd.read_csv(resource("data/writer_panel.csv"))
|
|
90
|
+
|
|
91
|
+
# ---- EDGES ---------------------------------------------------------------------------------
|
|
92
|
+
writers = [f"writer:{f}" for f in wtkb["family"].astype(str)]
|
|
93
|
+
# writer -deliverable_by-> vehicle (cargo-form compatible) - PARITY with the v3.3 delivery rule
|
|
94
|
+
for wid in writers:
|
|
95
|
+
form = g.nodes[wid].props["output_form"]
|
|
96
|
+
for name, v in veh.items():
|
|
97
|
+
if form in v.get("compatible_cargo_form", []):
|
|
98
|
+
g.add_edge(Edge(wid, f"vehicle:{name}", "deliverable_by", "curated",
|
|
99
|
+
scope="cargo-form compatibility (not tropism)",
|
|
100
|
+
provenance={"source": "delivery rule cargo_form_compatible"}))
|
|
101
|
+
# writer -performs-> write_type (writer_classes membership)
|
|
102
|
+
_CLASS = {"bridge_IS110": "bridge", "seek_IS1111": "bridge", "CAST_VK": "cast",
|
|
103
|
+
"serine_integrase": "serine_integrase", "PE_integrase": "pe_integrase"}
|
|
104
|
+
for wid in writers:
|
|
105
|
+
fam = g.nodes[wid].props["family"]
|
|
106
|
+
for wt, spec in wts.items():
|
|
107
|
+
classes = spec.get("writer_classes", [])
|
|
108
|
+
if "any" in classes or _CLASS.get(fam) in classes:
|
|
109
|
+
g.add_edge(Edge(wid, f"write_type:{wt}", "performs", "curated",
|
|
110
|
+
scope=spec.get("status"), provenance={"source": "write_types.yaml"}))
|
|
111
|
+
# writer -reaches-> locus (locus-level reachability; tier-1 near-universal) - predicted, scope-flagged
|
|
112
|
+
for wid in writers:
|
|
113
|
+
fam = g.nodes[wid].props["family"]
|
|
114
|
+
if fam in _TIER1:
|
|
115
|
+
for loc in gsh:
|
|
116
|
+
g.add_edge(Edge(wid, f"locus:{loc['name']}", "reaches", "predicted", confidence=None,
|
|
117
|
+
scope="locus-level reachability (per-site element check is Planner work)",
|
|
118
|
+
provenance={"source": "crosslink reachability_tier (tier-1 reprogrammable)"}))
|
|
119
|
+
# outcome -used_writer-> writer ; outcome -observed_at-> locus (when the panel name maps to a GSH locus)
|
|
120
|
+
gsh_names = {loc["name"] for loc in gsh}
|
|
121
|
+
for _, r in panel.iterrows():
|
|
122
|
+
oid = f"outcome:{r['name']}"
|
|
123
|
+
g.add_node(Node(id=oid, type="outcome", props={"writer_family": str(r["family"]),
|
|
124
|
+
"cargo_bp": int(r["cargo_bp"]), "doi": str(r["doi"]), "note": str(r.get("note", ""))}))
|
|
125
|
+
wid = f"writer:{r['family']}"
|
|
126
|
+
if wid in g.nodes:
|
|
127
|
+
g.add_edge(Edge(oid, wid, "used_writer", "measured", confidence=1.0,
|
|
128
|
+
scope="documented experimental write", provenance={"doi": str(r["doi"])}))
|
|
129
|
+
for ln in gsh_names:
|
|
130
|
+
if ln.lower() in str(r["name"]).lower():
|
|
131
|
+
g.add_edge(Edge(oid, f"locus:{ln}", "observed_at", "measured",
|
|
132
|
+
scope="documented locus of the write", provenance={"doi": str(r["doi"])}))
|
|
133
|
+
return g
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Cell-type coverage cards + cross-type OOD labelling + graceful degradation (v4.5, WS-CT).
|
|
2
|
+
|
|
3
|
+
Each cell type is a graph node carrying a **coverage card** (which data tracks exist). A score is only as
|
|
4
|
+
trustworthy as its coverage: a partial-coverage cell type **degrades gracefully** (its confidence is capped),
|
|
5
|
+
and a score computed in one cell type but *queried* for another is **OOD-labelled** — the v3.2 finding that
|
|
6
|
+
chromatin marks are conserved, so cross-cell-type context is intrinsically weak/heuristic, not a guarantee.
|
|
7
|
+
Tier-B cell types are a documented roadmap, never silently extrapolated.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from functools import lru_cache
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
|
|
15
|
+
from pen_stack._resources import resource
|
|
16
|
+
|
|
17
|
+
# graceful-degradation policy: the maximum trustworthy confidence a cell type's coverage supports.
|
|
18
|
+
_MAX_CONF = {"full": 1.0, "partial": 0.6, "none": 0.0}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@lru_cache(maxsize=1)
|
|
22
|
+
def _cfg() -> dict:
|
|
23
|
+
return yaml.safe_load(resource("configs/cell_types.yaml").read_text(encoding="utf-8"))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def coverage_card(cell_type: str) -> dict | None:
|
|
27
|
+
return _cfg()["cell_types"].get(cell_type)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cell_types() -> list[str]:
|
|
31
|
+
return list(_cfg()["cell_types"])
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def tier_b_roadmap() -> list[dict]:
|
|
35
|
+
return list(_cfg().get("tier_b_roadmap", []))
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def degrade(raw_confidence: float, cell_type: str) -> dict:
|
|
39
|
+
"""Cap a confidence by the cell type's coverage (graceful degradation). Returns the degraded value +
|
|
40
|
+
whether degradation was applied + the coverage label — never silently inflates."""
|
|
41
|
+
card = coverage_card(cell_type) or {}
|
|
42
|
+
cov = card.get("coverage", "none")
|
|
43
|
+
cap = _MAX_CONF.get(cov, 0.0)
|
|
44
|
+
degraded = min(float(raw_confidence), cap)
|
|
45
|
+
return {"cell_type": cell_type, "coverage": cov, "raw_confidence": round(float(raw_confidence), 4),
|
|
46
|
+
"confidence": round(degraded, 4), "degraded": degraded < float(raw_confidence),
|
|
47
|
+
"cap": cap, "tracks": card.get("tracks", [])}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def cross_cell_type_ood(query_cell_type: str, scored_in_cell_type: str) -> dict:
|
|
51
|
+
"""Label a cross-cell-type query as OOD/extrapolating (v3.2: cross-type signal is weak, heuristic).
|
|
52
|
+
Same cell type = in-distribution; different = extrapolating."""
|
|
53
|
+
ood = query_cell_type != scored_in_cell_type
|
|
54
|
+
return {"query_cell_type": query_cell_type, "scored_in_cell_type": scored_in_cell_type,
|
|
55
|
+
"ood": ood,
|
|
56
|
+
"label": "extrapolating (cross-cell-type; v3.2: chromatin conserved -> weak heuristic)"
|
|
57
|
+
if ood else "in-distribution",
|
|
58
|
+
"note": "cross-cell-type transfer is a heuristic signal, not a guarantee; reported, not hidden"}
|