pen-stack 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pen_stack/__init__.py +2 -0
- pen_stack/_resources.py +34 -0
- pen_stack/adapt/__init__.py +14 -0
- pen_stack/adapt/finetune.py +33 -0
- pen_stack/adapt/ingest.py +86 -0
- pen_stack/adapt/pipeline.py +101 -0
- pen_stack/adapt/recalibrate.py +58 -0
- pen_stack/adapt/report.py +130 -0
- pen_stack/agent/__init__.py +1 -0
- pen_stack/agent/guardrails.py +49 -0
- pen_stack/agent/mcp_server.py +42 -0
- pen_stack/agent/orchestrator.py +106 -0
- pen_stack/agent/pen_agent.py +169 -0
- pen_stack/agent/tools.py +130 -0
- pen_stack/atlas/__init__.py +1 -0
- pen_stack/atlas/build_wtkb.py +80 -0
- pen_stack/atlas/crosslink.py +144 -0
- pen_stack/atlas/expand.py +190 -0
- pen_stack/atlas/schema.py +59 -0
- pen_stack/atlas/scorecard.py +134 -0
- pen_stack/atlas/universe.py +75 -0
- pen_stack/atlas/variant_propose.py +155 -0
- pen_stack/bridge/__init__.py +1 -0
- pen_stack/bridge/activity.py +52 -0
- pen_stack/bridge/cli.py +65 -0
- pen_stack/bridge/fold_qc.py +53 -0
- pen_stack/bridge/guide_qc.py +84 -0
- pen_stack/bridge/ingest.py +139 -0
- pen_stack/bridge/offtarget.py +133 -0
- pen_stack/bridge/ortholog_screen.py +73 -0
- pen_stack/bridge/pipeline.py +83 -0
- pen_stack/cli.py +126 -0
- pen_stack/data/__init__.py +1 -0
- pen_stack/data/encode.py +84 -0
- pen_stack/data/genome.py +71 -0
- pen_stack/data/ingest_chromatin.py +119 -0
- pen_stack/data/ingest_integration.py +112 -0
- pen_stack/data/ingest_safety_annot.py +164 -0
- pen_stack/data/ingest_trip.py +76 -0
- pen_stack/mech/__init__.py +1 -0
- pen_stack/mech/classify_atlas.py +71 -0
- pen_stack/mech/whitelist.py +66 -0
- pen_stack/monitor/__init__.py +1 -0
- pen_stack/monitor/europepmc.py +32 -0
- pen_stack/monitor/run.py +57 -0
- pen_stack/monitor/triage.py +63 -0
- pen_stack/planner/__init__.py +1 -0
- pen_stack/planner/cargo.py +56 -0
- pen_stack/planner/cargo_polish.py +146 -0
- pen_stack/planner/delivery.py +32 -0
- pen_stack/planner/multiplex.py +110 -0
- pen_stack/planner/optimize.py +156 -0
- pen_stack/planner/pipeline.py +86 -0
- pen_stack/planner/report.py +26 -0
- pen_stack/rag/__init__.py +1 -0
- pen_stack/rag/index.py +53 -0
- pen_stack/rag/llm.py +178 -0
- pen_stack/rag/qa.py +105 -0
- pen_stack/score/__init__.py +1 -0
- pen_stack/score/recalibrate.py +77 -0
- pen_stack/score/therapeutic.py +85 -0
- pen_stack/server/__init__.py +1 -0
- pen_stack/server/api.py +142 -0
- pen_stack/ui/__init__.py +1 -0
- pen_stack/ui/app.py +518 -0
- pen_stack/validate/__init__.py +1 -0
- pen_stack/validate/adapt_demo.py +69 -0
- pen_stack/validate/agent_eval.py +117 -0
- pen_stack/validate/blind_gsh_discovery.py +165 -0
- pen_stack/validate/cargo_directionality.py +57 -0
- pen_stack/validate/durability_baselines.py +150 -0
- pen_stack/validate/forward_hypotheses.py +104 -0
- pen_stack/validate/guide_qc_demo.py +58 -0
- pen_stack/validate/intent_specification.py +82 -0
- pen_stack/validate/paper3_benchmark.py +165 -0
- pen_stack/validate/paper4_real_validation.py +144 -0
- pen_stack/validate/paper4_validation.py +82 -0
- pen_stack/validate/seq_vs_measured.py +134 -0
- pen_stack/validate/within_locus_ranking.py +74 -0
- pen_stack/validate/writer_recovery.py +86 -0
- pen_stack/wgenome/__init__.py +1 -0
- pen_stack/wgenome/chromatin_seq.py +83 -0
- pen_stack/wgenome/durability.py +108 -0
- pen_stack/wgenome/export_tracks.py +52 -0
- pen_stack/wgenome/features.py +82 -0
- pen_stack/wgenome/gsh_baseline.py +117 -0
- pen_stack/wgenome/providers.py +245 -0
- pen_stack/wgenome/safety.py +69 -0
- pen_stack/wgenome/structure3d.py +168 -0
- pen_stack/wgenome/writability.py +72 -0
- pen_stack-3.1.0.dist-info/METADATA +451 -0
- pen_stack-3.1.0.dist-info/RECORD +96 -0
- pen_stack-3.1.0.dist-info/WHEEL +5 -0
- pen_stack-3.1.0.dist-info/entry_points.txt +3 -0
- pen_stack-3.1.0.dist-info/licenses/LICENSE +21 -0
- pen_stack-3.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pen-stack
|
|
3
|
+
Version: 3.1.0
|
|
4
|
+
Summary: Open infrastructure for genome writing: the Writable Genome atlas, the Writer Atlas, and the Write Planner.
|
|
5
|
+
Author-email: Anees Ahmed Mahaboob Ali <ahmedaneesm@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ahmedanees-m/pen-stack
|
|
8
|
+
Project-URL: Repository, https://github.com/ahmedanees-m/pen-stack
|
|
9
|
+
Project-URL: Documentation, https://github.com/ahmedanees-m/pen-stack/tree/main/docs
|
|
10
|
+
Project-URL: Changelog, https://github.com/ahmedanees-m/pen-stack/blob/main/CHANGELOG.md
|
|
11
|
+
Project-URL: Issues, https://github.com/ahmedanees-m/pen-stack/issues
|
|
12
|
+
Project-URL: Benchmark, https://github.com/ahmedanees-m/pen-stack/tree/main/benchmarks/genome_writing_bench
|
|
13
|
+
Keywords: genome-writing,genome-editing,bridge-recombinase,safe-harbor,writable-genome,writer-atlas,write-planner,bioinformatics
|
|
14
|
+
Classifier: Development Status :: 4 - Beta
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Intended Audience :: Science/Research
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: numpy>=1.26
|
|
27
|
+
Requires-Dist: pandas>=2.2
|
|
28
|
+
Requires-Dist: pyarrow>=16
|
|
29
|
+
Requires-Dist: scipy>=1.13
|
|
30
|
+
Requires-Dist: scikit-learn>=1.5
|
|
31
|
+
Requires-Dist: lightgbm>=4.5
|
|
32
|
+
Requires-Dist: pydantic>=2.8
|
|
33
|
+
Requires-Dist: pyyaml>=6.0
|
|
34
|
+
Requires-Dist: click>=8.1
|
|
35
|
+
Requires-Dist: requests>=2.32
|
|
36
|
+
Requires-Dist: tqdm>=4.66
|
|
37
|
+
Provides-Extra: models
|
|
38
|
+
Requires-Dist: xgboost>=2.1; extra == "models"
|
|
39
|
+
Requires-Dist: torch>=2.3; extra == "models"
|
|
40
|
+
Requires-Dist: statsmodels>=0.14; extra == "models"
|
|
41
|
+
Provides-Extra: bio
|
|
42
|
+
Requires-Dist: biopython>=1.84; extra == "bio"
|
|
43
|
+
Requires-Dist: pysam>=0.22; extra == "bio"
|
|
44
|
+
Requires-Dist: pyBigWig>=0.3; extra == "bio"
|
|
45
|
+
Requires-Dist: pybedtools>=0.10; extra == "bio"
|
|
46
|
+
Requires-Dist: ViennaRNA>=2.6; extra == "bio"
|
|
47
|
+
Provides-Extra: bridge
|
|
48
|
+
Requires-Dist: bridgernadesigner>=0.1.1; extra == "bridge"
|
|
49
|
+
Requires-Dist: ViennaRNA>=2.6; extra == "bridge"
|
|
50
|
+
Requires-Dist: pysam>=0.22; extra == "bridge"
|
|
51
|
+
Provides-Extra: server
|
|
52
|
+
Requires-Dist: fastapi>=0.111; extra == "server"
|
|
53
|
+
Requires-Dist: uvicorn>=0.30; extra == "server"
|
|
54
|
+
Provides-Extra: services
|
|
55
|
+
Requires-Dist: litellm>=1.44; extra == "services"
|
|
56
|
+
Requires-Dist: paper-qa>=5.10; extra == "services"
|
|
57
|
+
Requires-Dist: streamlit>=1.37; extra == "services"
|
|
58
|
+
Requires-Dist: fastmcp>=2.3; extra == "services"
|
|
59
|
+
Requires-Dist: beautifulsoup4>=4.12; extra == "services"
|
|
60
|
+
Provides-Extra: orchestrate
|
|
61
|
+
Requires-Dist: paramiko>=3.4; extra == "orchestrate"
|
|
62
|
+
Requires-Dist: scp>=0.15; extra == "orchestrate"
|
|
63
|
+
Requires-Dist: rich>=13.7; extra == "orchestrate"
|
|
64
|
+
Requires-Dist: python-dotenv>=1.0; extra == "orchestrate"
|
|
65
|
+
Provides-Extra: docs
|
|
66
|
+
Requires-Dist: mkdocs>=1.6; extra == "docs"
|
|
67
|
+
Requires-Dist: mkdocs-material>=9.5; extra == "docs"
|
|
68
|
+
Provides-Extra: dev
|
|
69
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
70
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
71
|
+
Requires-Dist: pytest-cov>=5; extra == "dev"
|
|
72
|
+
Dynamic: license-file
|
|
73
|
+
|
|
74
|
+
<div align="center">
|
|
75
|
+
|
|
76
|
+
# PEN-STACK
|
|
77
|
+
|
|
78
|
+
### The Writable Genome - open infrastructure for genome *writing*
|
|
79
|
+
|
|
80
|
+
*Editing tools tell you **how** to change a base. PEN-STACK tells you **where** in the genome you can safely
|
|
81
|
+
and durably write new DNA, **which enzyme** can write it there, and **how** to design the write end-to-end.*
|
|
82
|
+
|
|
83
|
+
[](https://pypi.org/project/pen-stack/)
|
|
84
|
+
[](https://github.com/ahmedanees-m/pen-stack/actions/workflows/ci.yml)
|
|
85
|
+
[](https://github.com/ahmedanees-m/pen-stack/actions/workflows/publish.yml)
|
|
86
|
+
[](https://github.com/ahmedanees-m/pen-stack/actions/workflows/ci.yml)
|
|
87
|
+
[](https://codecov.io/gh/ahmedanees-m/pen-stack)
|
|
88
|
+
[](LICENSE)
|
|
89
|
+
[](https://www.python.org/)
|
|
90
|
+
[](CHANGELOG.md)
|
|
91
|
+
[](tests/)
|
|
92
|
+
[](https://github.com/astral-sh/ruff)
|
|
93
|
+
[](docker/)
|
|
94
|
+
[](prereg/)
|
|
95
|
+
[](benchmarks/genome_writing_bench/)
|
|
96
|
+
|
|
97
|
+
**Built on five prior, separately published repositories:**
|
|
98
|
+
|
|
99
|
+
[](https://github.com/ahmedanees-m/genome-atlas)
|
|
100
|
+
[](https://github.com/ahmedanees-m/mech-class)
|
|
101
|
+
[](https://github.com/ahmedanees-m/pen-score)
|
|
102
|
+
[](https://github.com/ahmedanees-m/pen-assemble)
|
|
103
|
+
[](https://github.com/ahmedanees-m/pen-compare)
|
|
104
|
+
|
|
105
|
+
</div>
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## What is PEN-STACK?
|
|
110
|
+
|
|
111
|
+
PEN-STACK is a single, installable, pre-registered computational stack that builds the reference and design
|
|
112
|
+
layer the genome-**writing** era lacks. It consolidates five earlier research projects into one citable
|
|
113
|
+
package, then adds the two reference maps and the design engine the field was missing.
|
|
114
|
+
|
|
115
|
+
Genome **editing** changes a base or short stretch in place. Genome **writing** installs *new* information -
|
|
116
|
+
inserting genes, flipping or excising kilobases, placing programmable landing pads. Writing is the harder,
|
|
117
|
+
less-tooled, and more clinically transformative modality, and it is gated by questions that today have no
|
|
118
|
+
canonical answer.
|
|
119
|
+
|
|
120
|
+
## The problem, and the gaps PEN-STACK closes
|
|
121
|
+
|
|
122
|
+
Two questions gate every genome-writing project, and before PEN-STACK no resource answered them together:
|
|
123
|
+
|
|
124
|
+
| Gap | The problem today | What PEN-STACK provides |
|
|
125
|
+
|---|---|---|
|
|
126
|
+
| **Where can you write?** | Each lab re-derives an ad-hoc "safe harbour" shortlist from inconsistent criteria; published lists range from ~2,000 sites to 25, none predict expression durability from a learned model, none are writer-aware, most cover one cell type. | **The Writable Genome** - a learned, cell-type-aware, writer-aware atlas scoring every locus for *safety* (genotoxicity risk) x *durability* (will the cassette stay expressed) x *reachability* (which enzyme can engage it). |
|
|
127
|
+
| **What can write there, and how well?** | Enzyme capabilities are scattered across papers; no catalogue places all genome-writing families on common, measured axes with their targeting requirements. | **The Writer Atlas** - 33,370 enzyme systems across 8 families on common measured axes, joined to the Writable Genome by a bidirectional cross-link. |
|
|
128
|
+
| **How do I design the actual write?** | Destination, enzyme, cargo and delivery are interdependent and goal-dependent; no tool optimises them jointly. | **The Write Planner** - inverse design that, given a goal and an `edit_intent`, returns ranked, traceable site x writer x cargo x delivery plans. |
|
|
129
|
+
| **Where might my bridge-recombinase design go off-target?** | Bridge recombinases are the most programmable writers, but had no genome-wide off-target screening tool (a "CRISPOR" equivalent); their developers list this as future work. | **The bridge off-target engine** (`pen-bridge`) - measured-data-validated screening that *nominates and ranks candidate off-target locations* (a screen, not a per-site risk calculator). |
|
|
130
|
+
|
|
131
|
+
Everything is built on bulk-downloadable public data, runs on a single GPU, and is validated **blind** against
|
|
132
|
+
a pre-registered, honest baseline before release.
|
|
133
|
+
|
|
134
|
+
## What is new in v3.1
|
|
135
|
+
|
|
136
|
+
v3.1 hardens the honesty of the planning benchmark, surrounds the models with strong baselines, adds a
|
|
137
|
+
predicted-structure safety axis, and ships the first benchmark and grounded agent for the genome-*writing*
|
|
138
|
+
side of the field. Every workstream is pre-registered (`prereg/ws_*.yaml`, SHA-locked) and reports its
|
|
139
|
+
honest negatives, not just its wins.
|
|
140
|
+
|
|
141
|
+
| Workstream | What it adds | Honest headline result |
|
|
142
|
+
|---|---|---|
|
|
143
|
+
| **A - De-circularized benchmark** (gate) | retires the circular targeted-intent recovery@k; the headline is now blind safe-harbour discovery | blind GSH discovery **AUROC 0.92** vs safety-only 0.50 |
|
|
144
|
+
| **B - Strong baselines + safety metric switch** | endogenous-expression baseline, multi-mark ablation, published GSH rule-set; safe-harbour discrimination is now the primary safety metric | learned writability **0.92 (95% CI 0.82-0.98)** vs GSH distance-rule 0.38 (delta CI excludes zero); the circular `genotoxic_cis` AUROC demoted to a labeled diagnostic |
|
|
145
|
+
| **C - AlphaGenome integration** | predicted sequence tracks + a predicted **3D structural-risk** axis (Hi-C contact-map deltas) via the hosted AlphaGenome API | per-track transfers well (HepG2 ATAC 0.91), but the *composite* score degrades from predicted tracks, so the measured atlas stays the backbone (flagged) |
|
|
146
|
+
| **D - Cargo Polish** | scores the *insert* for silencing/instability triggers (CpG islands, GC, cryptic splice, MFE, silencers) | directional: high-CpG bacterial cassette 0.75 vs CpG-depleted 0.0, every flag carries a fix |
|
|
147
|
+
| **E - Genome-Writing Bench v0.1 + PEN-Agent** | the first benchmark for the writing side, plus a grounded agent that cannot fabricate | planner beats the naive baseline 3/3; a real LLM agent reaches the planner's numbers only by grounding (0 fabricated) |
|
|
148
|
+
| **F - Local recalibration / private-data adaptation** | recalibrate or fine-tune the released models on your own assays, in-container, behind a validation gate | the adapted model activates only if it beats the released model AND a no-skill baseline; the released model is provably unchanged |
|
|
149
|
+
| **G - Multiplex + guide QC** | a pairwise translocation-risk screen for multi-edit plans, and a bridge-RNA guide ranker | DSB-free recombinase plans carry ~zero translocation risk by construction; known-bad guides are retrospectively down-ranked |
|
|
150
|
+
|
|
151
|
+
The **Genome-Writing Bench** (workstream E) is v3.1's adoption vehicle: a one-command, SHA-locked, leaderboard
|
|
152
|
+
benchmark with deterministic scorers and no circular labels. See
|
|
153
|
+
[`benchmarks/genome_writing_bench/`](benchmarks/genome_writing_bench/) and `docs/positioning.md`.
|
|
154
|
+
|
|
155
|
+
## Architecture
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
+-------------------------------------------+
|
|
159
|
+
| WRITE PLANNER (engine) |
|
|
160
|
+
| inverse design: destination x writer |
|
|
161
|
+
| x cargo/guide x delivery -> ranked plan |
|
|
162
|
+
+----------------^-------------^-------------+
|
|
163
|
+
| |
|
|
164
|
+
+-------------------------+--+ +---+------------------------+
|
|
165
|
+
| WRITABLE GENOME (B) | | WRITER ATLAS (A) |
|
|
166
|
+
| flagship reference |<---->| companion reference |
|
|
167
|
+
| | reach| |
|
|
168
|
+
| - Safety layer (learned) | ability - Family targeting KB |
|
|
169
|
+
| - Durability layer (learned)| | - Measured scoring axes |
|
|
170
|
+
| - Reachability layer -----+------+ - Mechanism classifier |
|
|
171
|
+
| -> writability profile | | - DMS variant model |
|
|
172
|
+
+-------------------------^--+ +---+------------------------+
|
|
173
|
+
| |
|
|
174
|
+
+----------------------------------+-------------+-------------------------+
|
|
175
|
+
| DATA FOUNDATION (bulk-downloadable) |
|
|
176
|
+
| hg38 . ENCODE/Roadmap chromatin . Hi-C/LADs . TRIP position effects . |
|
|
177
|
+
| RID/VISDB/MLV integration sites . clinical genotoxic CIS . COSMIC . |
|
|
178
|
+
| DepMap . gnomAD . GTEx . UniProt . Pfam/InterPro . bridge-recombinase |
|
|
179
|
+
| off-target + DMS (Perry 2025) |
|
|
180
|
+
+-------------------------------------------------------------------------+
|
|
181
|
+
|
|
182
|
+
Platform services (on top of the validated core): PEN-MONITOR (Europe PMC living database),
|
|
183
|
+
grounded RAG/Q&A, a tool-using agent + MCP server, and a Streamlit web app.
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## How it works
|
|
187
|
+
|
|
188
|
+
PEN-STACK is organised as **two reference layers + one engine + a services layer**.
|
|
189
|
+
|
|
190
|
+
| Component | Module | Role | Status |
|
|
191
|
+
|---|---|---|---|
|
|
192
|
+
| **Writable Genome** (flagship) | `pen_stack.wgenome` | learned per-locus safety x durability x reachability | Paper 1 |
|
|
193
|
+
| **Writer Atlas** (companion) | `pen_stack.atlas`, `.mech`, `.score` | cross-family enzyme catalogue + Writer-Targeting KB | Paper 2 |
|
|
194
|
+
| **Cross-link** | `pen_stack.atlas.crosslink` | bidirectional writer to locus queries | Paper 2 |
|
|
195
|
+
| **Write Planner** (engine) | `pen_stack.planner` | inverse design, `edit_intent`-conditioned | Paper 3 |
|
|
196
|
+
| **Agentic platform** | `pen_stack.agent` | goal to cited, auditable plan; MCP server; one-command deploy | Paper 3 |
|
|
197
|
+
| **Bridge off-target engine** | `pen_stack.bridge` | "CRISPOR for bridge recombinases" + guide QC (v3.1) | Paper 4 |
|
|
198
|
+
| **Genome-Writing Bench** (v3.1) | `benchmarks/`, `bench/run.py` | first writing-side benchmark; deterministic scorers, leaderboard | M2 |
|
|
199
|
+
| **PEN-Agent** (v3.1) | `pen_stack.agent.pen_agent` | grounded write-planning state machine; zero fabrication | M2 |
|
|
200
|
+
| **3D structural risk** (v3.1) | `pen_stack.wgenome.structure3d` | AlphaGenome contact-map deltas as a safety axis | M1 |
|
|
201
|
+
| **Cargo Polish** (v3.1) | `pen_stack.planner.cargo_polish` | cargo-sequence silencing-risk scan | M1 |
|
|
202
|
+
| **Local adaptation** (v3.1) | `pen_stack.adapt` | gated recalibration / fine-tuning on private data | M1 |
|
|
203
|
+
| **Multiplex risk** (v3.1) | `pen_stack.planner.multiplex` | pairwise translocation-risk screen for multi-edit plans | M3 |
|
|
204
|
+
| **Platform services** | `monitor`, `rag`, `ui`, `server` | living database, grounded RAG, web app, REST API | - |
|
|
205
|
+
|
|
206
|
+
### Headline results (all blind / pre-registered)
|
|
207
|
+
|
|
208
|
+
- **Paper 1 (Writable Genome):** a genome-wide atlas of 3,031,030 loci x 3 cell types (K562, HepG2, CD34+
|
|
209
|
+
HSPC) recovers validated safe harbours as highly writable and clinical genotoxic loci as non-writable,
|
|
210
|
+
blind. Durability transfers mouse to human (Spearman rho = 0.42).
|
|
211
|
+
- **Paper 2 (Writer Atlas):** 33,370 enzyme systems across 8 families on common measured axes; mechanism
|
|
212
|
+
classifier agrees with the audited labels on the curated core (1.00); cross-link validated on AAVS1.
|
|
213
|
+
- **Paper 3 / v3.1 (Write Planner + de-circularized benchmark):** the honest headline is **blind
|
|
214
|
+
safe-harbour site discovery** - run genome-wide (so no on-target identity term fires), the planner's
|
|
215
|
+
writability separates held-out, DOI-validated safe harbours from matched-context controls at **AUROC
|
|
216
|
+
0.92** (safety-only baseline 0.50). Writer-family recovery@1 = 1.0 vs prevalence 0.25 across 4 families.
|
|
217
|
+
The earlier "recovery@10 = 1.00, McNemar p" result for *targeted* intents was definitional, not
|
|
218
|
+
predictive (an on-target identity term dominates the score), so it is now reported only as a
|
|
219
|
+
specification-compliance correctness table - see `docs/benchmark_circularity.md`. A tool-using agent
|
|
220
|
+
never fabricates a number (every value traces to a validated tool call).
|
|
221
|
+
- **Paper 4 (Bridge off-target engine):** to our knowledge the first measured-data-validated tool that
|
|
222
|
+
**nominates and ranks candidate off-target *locations*** for bridge recombinases. On the measured Perry
|
|
223
|
+
2025 data (6,856 real off-targets) the per-position profile confirms the central core (positions 7-9) is
|
|
224
|
+
the specificity determinant, and the model ranks real off-targets above core-disrupted decoys at AUROC
|
|
225
|
+
0.77 vs 0.62 for Hamming. Stated plainly: it is a **screening tool, not a quantitative safety
|
|
226
|
+
calculator**, it does not quantify how much recombination occurs at each site (sequence-risk vs measured
|
|
227
|
+
magnitude, rho approximately 0.30). A first-of-its-kind beachhead for a genuinely unoccupied gap, not a
|
|
228
|
+
Nature-tier breakthrough; the Writable Genome (Paper 1) remains the flagship novelty.
|
|
229
|
+
|
|
230
|
+
## The Genome-Writing Bench (v3.1, M2)
|
|
231
|
+
|
|
232
|
+
The first benchmark for the **writing** side of genome engineering - *where* to write, *what* writer to use,
|
|
233
|
+
*how* to design the cargo, and *what off-target / structural risk* a write carries - complementing the many
|
|
234
|
+
editing-side (Cas9 / base / prime) benchmarks. Six tasks, each with a deterministic scorer and a documented
|
|
235
|
+
ground-truth source; **no task is scored against a circular label** (it inherits the de-circularization gate).
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
python bench/run.py --agent # one command -> out/bench_results.json + a leaderboard
|
|
239
|
+
docker compose run --rm bench python bench/run.py --agent # same, on the clean image
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
| Solver | Beats naive on | No-fabrication | Note |
|
|
243
|
+
|---|---|---|---|
|
|
244
|
+
| deterministic planner | 3/3 grounded tasks | n/a | the validated planning tools (reference) |
|
|
245
|
+
| naive baseline | - | n/a | safety-only / prevalence / Hamming |
|
|
246
|
+
| **LLM agent** (PEN-Agent) | = planner (grounded) | **PASS** | a real LLM drives the tools; reaches the planner only by grounding every value, 0 fabricated |
|
|
247
|
+
|
|
248
|
+
Per-task (planner vs naive): site selection **0.92** vs 0.50, writer recovery **1.0** vs 0.25, off-target
|
|
249
|
+
**0.77** vs 0.62, intent 7/7, no-fabrication **PASS** (a hard gate). **PEN-Agent** (`pen_stack.agent`) is a
|
|
250
|
+
grounded write-planning state machine - goal to site to writer to cargo (with Cargo Polish) to off-target
|
|
251
|
+
to 3D structural risk to report - that copies every number from a validated tool with provenance and refuses
|
|
252
|
+
or degrades rather than invent. See [`benchmarks/genome_writing_bench/`](benchmarks/genome_writing_bench/),
|
|
253
|
+
`docs/agent.md`, and the leaderboard submission guide.
|
|
254
|
+
|
|
255
|
+
## How PEN-STACK connects to the prior repositories
|
|
256
|
+
|
|
257
|
+
PEN-STACK v3.0 consolidates and re-grounds five earlier projects. Their genuinely reusable assets are
|
|
258
|
+
imported here; the originals are archived read-only for provenance and DOI stability. This is what makes
|
|
259
|
+
PEN-STACK "the thing you cite instead of rebuilding the pipeline."
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
genome-atlas --+ 18-family InterPro-audited Pfam whitelist (v1.2.1) --> WT-KB + mechanism classifier
|
|
263
|
+
mech-class ----+ multi-source mechanism classifier --> family / mechanism calls
|
|
264
|
+
pen-score -----+- 9 scoring axes (dsb/cargo/deliv/immuno/prog/...) --> re-grounded therapeutic axes
|
|
265
|
+
pen-assemble --+ IS110 ortholog / design set --> part of the 1,058-entity universe
|
|
266
|
+
pen-compare ---+ unified_editor_universe.parquet (1,058) + scorecard --> canonical universe + scorecard
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
| Prior repo | Pinned version | What v3.0 reuses | What changed |
|
|
270
|
+
|---|---|---|---|
|
|
271
|
+
| [genome-atlas](https://github.com/ahmedanees-m/genome-atlas) | v0.7.2 | the audited 18-family Pfam backbone - spine of the WT-KB and the at-scale mechanism classifier | GraphSAGE link-prediction framing retired |
|
|
272
|
+
| [mech-class](https://github.com/ahmedanees-m/mech-class) | v0.5.4 | the mechanism classifier (Pfam + RHEA + CRISPRcasdb + UniProt) | reused as the family/mechanism caller |
|
|
273
|
+
| [pen-score](https://github.com/ahmedanees-m/pen-score) | v0.1.3 | the scoring axes (deliv / immuno / cargo, ...) | prog/cargo re-grounded; hand-set overrides removed |
|
|
274
|
+
| [pen-assemble](https://github.com/ahmedanees-m/pen-assemble) | v0.5.2 | the ortholog sequence set | de-novo chimera generation retired -> DMS-grounded point-variant proposal |
|
|
275
|
+
| [pen-compare](https://github.com/ahmedanees-m/pen-compare) | v0.1.0 | the 1,058-entity universe + scorecard scaffold + tests | circular 5-gate "certification" -> descriptive scorecard with blind concordance |
|
|
276
|
+
|
|
277
|
+
**One canonical assembly path** (`pen_stack/atlas/universe.py::assemble`) feeds the classifier, the scorer,
|
|
278
|
+
and the scorecard identical metadata, so the cross-module inconsistency in the prior pipelines cannot recur.
|
|
279
|
+
|
|
280
|
+
## Repository structure
|
|
281
|
+
|
|
282
|
+
```
|
|
283
|
+
pen-stack/
|
|
284
|
+
├── pen_stack/ the installable package
|
|
285
|
+
│ ├── wgenome/ Writable Genome (Paper 1)
|
|
286
|
+
│ │ ├── features.py unified feature matrix (accessibility + histones + safety + integration)
|
|
287
|
+
│ │ ├── safety.py calibrated genotoxicity-risk model (chrom-block CV + baseline)
|
|
288
|
+
│ │ ├── durability.py conditional chromatin->expression model (TRIP-trained, transferable)
|
|
289
|
+
│ │ ├── writability.py decomposable safety x durability x reachability integration
|
|
290
|
+
│ │ └── export_tracks.py BigWig / BED atlas export
|
|
291
|
+
│ ├── atlas/ Writer Atlas + WT-KB + cross-link (Papers 1-2)
|
|
292
|
+
│ │ ├── schema.py pydantic WriterEntry (enforces >=1 DOI per row)
|
|
293
|
+
│ │ ├── build_wtkb.py Writer-Targeting Knowledge Base builder (8 families, tiered)
|
|
294
|
+
│ │ ├── expand.py ortholog ingestion -> atlas.parquet (33,370 systems)
|
|
295
|
+
│ │ ├── crosslink.py writers_for_locus / loci_for_writer / loci_for_gene
|
|
296
|
+
│ │ ├── variant_propose.py DMS-grounded point-mutation proposal (no chimeras)
|
|
297
|
+
│ │ ├── universe.py THE canonical universe assembly (1,058 entities)
|
|
298
|
+
│ │ └── scorecard.py descriptive scorecard + blind concordance
|
|
299
|
+
│ ├── mech/ mechanism classification at scale (audited 18-family whitelist v1.2.1)
|
|
300
|
+
│ ├── score/ re-grounded axes + therapeutic-readiness scoring
|
|
301
|
+
│ ├── planner/ Write Planner (Paper 3): optimize / cargo / cargo_polish / multiplex / pipeline
|
|
302
|
+
│ ├── bridge/ bridge off-target engine (Paper 4): offtarget / fold_qc / guide_qc / pipeline / cli
|
|
303
|
+
│ ├── agent/ agentic platform: tools / orchestrator / pen_agent / mcp_server / guardrails
|
|
304
|
+
│ ├── adapt/ local recalibration / private-data adaptation behind a gate (v3.1, WS-F)
|
|
305
|
+
│ ├── monitor/ PEN-MONITOR living database (Europe PMC)
|
|
306
|
+
│ ├── rag/ grounded, cited Q&A (hybrid LLM: Ollama primary, Nemotron fallback)
|
|
307
|
+
│ ├── validate/ benchmarks: blind_gsh_discovery / durability_baselines / seq_vs_measured / agent_eval / adapt_demo
|
|
308
|
+
│ ├── data/ ingestion (genome, chromatin, integration, TRIP, safety annotations)
|
|
309
|
+
│ ├── server/api.py FastAPI REST (atlas, crosslink, writable, plan, bridge, ask)
|
|
310
|
+
│ ├── ui/app.py Streamlit web app (11 pages)
|
|
311
|
+
│ └── cli.py unified CLI
|
|
312
|
+
├── benchmarks/genome_writing_bench/ Genome-Writing Bench v0.1 (tasks / harness / solvers / LEADERBOARD / SHAs)
|
|
313
|
+
├── bench/run.py one-command bench entrypoint (--agent, --verify)
|
|
314
|
+
├── scripts/ reproducible pipeline drivers (p1_*, p2_*, p4_*, ws_*_report)
|
|
315
|
+
├── configs/ pinned datasets + thresholds + curation (YAML)
|
|
316
|
+
├── prereg/ SHA-locked success criteria (paper1..4 + ws_a..ws_g + locks)
|
|
317
|
+
├── data/curated/ small committed tables (universe, gene coords, measured bridge profile)
|
|
318
|
+
├── tests/unit/ unit + regression + blind-validation suite
|
|
319
|
+
├── docs/ mkdocs site (cards, tutorials, INFRA, DEPLOY, MCP)
|
|
320
|
+
├── docker/ CUDA image + UI image + pinned requirements
|
|
321
|
+
├── tools/penctl.py laptop<->VM orchestrator (paramiko SSH/SFTP, Docker-only)
|
|
322
|
+
├── docker-compose.yml one-command self-hostable platform
|
|
323
|
+
└── pyproject.toml CITATION.cff CHANGELOG.md LICENSE
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
> **Data policy.** Large artifacts (3 M-row atlases, BigWig tracks, models) and any third-party copyrighted
|
|
327
|
+
> data are *not* committed - they are released via Zenodo (DOI) or fetched from the original source, and are
|
|
328
|
+
> reproducible by re-running the scripts. Only small curated tables and derived products live in git.
|
|
329
|
+
|
|
330
|
+
## Installation and quick start
|
|
331
|
+
|
|
332
|
+
**From PyPI** (the library, CLI, agent, and pure-logic tools):
|
|
333
|
+
|
|
334
|
+
```bash
|
|
335
|
+
pip install pen-stack # core
|
|
336
|
+
pip install "pen-stack[models,bio,bridge,server,services]" # full stack
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
The wheel ships the importable package and the command-line tools. The **full data pipeline** (the 3 M-row
|
|
340
|
+
atlases, BigWig tracks, and curated configs) is distributed via the cloned repo + Zenodo, per the data
|
|
341
|
+
policy below; point an installed copy at a checkout with `export PEN_STACK_HOME=/path/to/pen-stack` to use
|
|
342
|
+
the config-driven features. Most users who want the whole pipeline clone the repo:
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
git clone https://github.com/ahmedanees-m/pen-stack.git && cd pen-stack
|
|
346
|
+
pip install -e ".[dev]" # core + tests
|
|
347
|
+
pip install -e ".[models,bio,bridge,server,services]" # full stack
|
|
348
|
+
pytest -q # 115 tests
|
|
349
|
+
pen-stack info # stack status
|
|
350
|
+
python bench/run.py --agent # run the Genome-Writing Bench (under 5 min)
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
A five-minute quickstart that runs a bench task end-to-end is in [`docs/quickstart.md`](docs/quickstart.md).
|
|
354
|
+
|
|
355
|
+
Query the stack:
|
|
356
|
+
|
|
357
|
+
```bash
|
|
358
|
+
pen-stack atlas --coverage # Writer Atlas coverage (33,370 systems x 8 families)
|
|
359
|
+
pen-stack writable --gene CCR5 --ct k562 # rank writable loci near a gene
|
|
360
|
+
pen-stack crosslink --chrom chr19 --bin 55090 # which writers reach AAVS1
|
|
361
|
+
pen-stack plan --gene TRAC --intent knock_in_with_disruption --cargo-bp 2000 # inverse-design plans
|
|
362
|
+
pen-bridge design --target ACGTGTCTACGTGA --donor TTGCATCTAGGCAC # bridge design + off-target + QC
|
|
363
|
+
pen-stack monitor --back-test # PEN-MONITOR living-database scan
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
Self-host the whole platform (API + web app + agent + MCP + LLM), one command:
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
docker compose up -d
|
|
370
|
+
docker compose exec ollama ollama pull qwen2.5:7b-instruct # first run only (local fallback model)
|
|
371
|
+
# Web app :8501 . API :8000 (/plan, /bridge/design, /ask) . MCP :8765 (see docs/DEPLOY.md)
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
**LLM backend (hybrid, non-load-bearing).** Services (agent, RAG, PEN-MONITOR) use one switch in
|
|
375
|
+
`configs/llm.yaml`. On the compute tier (the GPU VM) the default is the **local Ollama model**
|
|
376
|
+
(`qwen2.5:7b-instruct`, free, private, tool-calling verified) with **automatic fallback** to the hosted
|
|
377
|
+
**NVIDIA Nemotron** (free, no local resources), then to a deterministic no-LLM path. A cooldown cache and
|
|
378
|
+
bounded timeouts mean an absent or slow provider degrades in seconds rather than stalling. The LLM is
|
|
379
|
+
non-load-bearing - every number and citation comes from a validated tool - so the choice never affects
|
|
380
|
+
scientific reproducibility, only orchestration quality. Set `NVIDIA_API_KEY` (or a gitignored
|
|
381
|
+
`configs/nvidia_api_key.txt`) for the hosted fallback; a low-RAM laptop with no GPU uses it automatically.
|
|
382
|
+
The core scientific compute uses no LLM at all.
|
|
383
|
+
|
|
384
|
+
## The web platform
|
|
385
|
+
|
|
386
|
+
`pen_stack/ui/app.py` is a single Streamlit app over the whole stack (11 pages):
|
|
387
|
+
|
|
388
|
+
- **Writable Genome** - Overview, Forward query (gene to writability/safety/durability), Site finder
|
|
389
|
+
(inverse), Atlas browser, Validation dashboard, Cross-cell-type transfer.
|
|
390
|
+
- **Writer Atlas** - family coverage and measured-axis comparison.
|
|
391
|
+
- **Write Planner** - goal + `edit_intent` to ranked, traceable plans.
|
|
392
|
+
- **Bridge design** - design a bridge RNA, fold/cross-loop QC, genome-wide off-target scan.
|
|
393
|
+
- **Ask** - grounded, cited Q&A (numbers from validated tools).
|
|
394
|
+
- **Agent** - a goal to a cited, auditable end-to-end plan.
|
|
395
|
+
|
|
396
|
+
## Data sources (all public)
|
|
397
|
+
|
|
398
|
+
hg38 (UCSC); ENCODE / Roadmap chromatin (ATAC/DNase + histone marks; K562, HepG2, CD34+ progenitor, mouse
|
|
399
|
+
ES-Bruce4); GENCODE v46; COSMIC Cancer Gene Census v104; DepMap Public 26Q1; LaFave 2014 (NHGRI GeIST) MLV
|
|
400
|
+
integrations; VISDB; TRIP / Akhtar 2013 (GEO GSE49806/49807); UniProt orthologs; Pfam/InterPro; Europe PMC;
|
|
401
|
+
Addgene; Perry 2025 bridge-recombinase off-target + DMS data (Science adz0276; copyrighted - kept local,
|
|
402
|
+
only derived products released). Every accession and DOI is pinned in `configs/datasets.yaml` and
|
|
403
|
+
independently verified.
|
|
404
|
+
|
|
405
|
+
## Validation philosophy
|
|
406
|
+
|
|
407
|
+
- **Pre-register before training.** Success criteria, baselines and held-out sets are SHA-locked in
|
|
408
|
+
`prereg/` (paper1..4) before any model sees test data.
|
|
409
|
+
- **Always report an honest baseline** (oncogene-distance for safety; H3K9me3/LAD for durability;
|
|
410
|
+
intent-blind ranking for the Planner; Hamming for the bridge engine).
|
|
411
|
+
- **Blind external concordance** - recover validated safe harbours, clinical genotoxic loci, documented
|
|
412
|
+
writes, and measured off-targets the model never trained on.
|
|
413
|
+
- **Report failure honestly** - cross-cell-type degradation, small benchmark N, and the limits of
|
|
414
|
+
sequence-only off-target magnitude prediction are quantified results, not footnotes.
|
|
415
|
+
- **Grounded services** - every quantitative answer comes from a validated tool call (never a language
|
|
416
|
+
model); the living database never auto-edits the atlas; clinical directives are refused.
|
|
417
|
+
|
|
418
|
+
## Papers and phases
|
|
419
|
+
|
|
420
|
+
| # | Title | Phase | Status |
|
|
421
|
+
|---|---|---|---|
|
|
422
|
+
| 1 (flagship) | The Writable Genome: a predictive, writer-aware atlas of safe & durable insertion sites | 1 | complete |
|
|
423
|
+
| 2 (platform) | PEN-STACK: unified open infrastructure for non-destructive genome writing | 2 | complete |
|
|
424
|
+
| 3 (capstone) | The Write Planner: end-to-end inverse design of genomic writes | 3 | complete |
|
|
425
|
+
| 4 (beachhead) | Genome-wide off-target prediction for RNA-guided bridge recombinases | 1.5 | complete |
|
|
426
|
+
| M1 (v3.1) | Writable Genome hardened: strong baselines, AlphaGenome sequence + 3D structural-risk axis | v3.1 B,C,D,F | complete |
|
|
427
|
+
| M2 (v3.1) | The Genome-Writing Bench + PEN-Agent: the writing-side benchmark and a grounded agent | v3.1 E | complete |
|
|
428
|
+
| M3 (v3.1) | Multiplex translocation-risk + bridge-RNA guide QC | v3.1 G | complete |
|
|
429
|
+
|
|
430
|
+
The v3.1 cycle (workstreams A-H) is recorded in `CHANGELOG.md`, `docs/positioning.md`, and the SHA-locked
|
|
431
|
+
`prereg/ws_*.yaml`; preprint drafts are in `manuscripts/`.
|
|
432
|
+
|
|
433
|
+
Per-phase build records, execution summaries, and Zenodo deposit packages are kept alongside the program
|
|
434
|
+
plan. Data releases are deposited on Zenodo (one per paper).
|
|
435
|
+
|
|
436
|
+
## Citation
|
|
437
|
+
|
|
438
|
+
```bibtex
|
|
439
|
+
@software{penstack2026,
|
|
440
|
+
author = {Mahaboob Ali, Anees Ahmed},
|
|
441
|
+
title = {PEN-STACK: open infrastructure for genome writing (The Writable Genome)},
|
|
442
|
+
year = {2026},
|
|
443
|
+
version = {3.1.0},
|
|
444
|
+
url = {https://github.com/ahmedanees-m/pen-stack}
|
|
445
|
+
}
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
**Author:** Anees Ahmed Mahaboob Ali, VIT University, Vellore. MIT licensed.
|
|
449
|
+
|
|
450
|
+
*Decision-support, not a clinical directive - every score is traceable to public data and a pre-registered
|
|
451
|
+
model.*
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
pen_stack/__init__.py,sha256=KCoEBgVyUpSQazTpaMXKHTTn_j70m6ki2xxRK4WQEC8,87
|
|
2
|
+
pen_stack/_resources.py,sha256=8A5SDvkx64AbSRnlyancO1SpZp5NNHd2hbrO0LrGteI,1578
|
|
3
|
+
pen_stack/cli.py,sha256=FLy8pwSQBnAqRJciS3_U_T31b91dKLjo3wEm0k4S6Ug,5745
|
|
4
|
+
pen_stack/adapt/__init__.py,sha256=zRwPjyDVQcK0hXoH4_qkotQfrOlJna9GnBd_3SUsIY0,791
|
|
5
|
+
pen_stack/adapt/finetune.py,sha256=EtjZFtBPYvBbH9g0oKWPAS2xCRVdHhqVtk7c5havQWM,1469
|
|
6
|
+
pen_stack/adapt/ingest.py,sha256=JjQx8zNe0H9hmeOAGrpq-JFCuVjoULFzXWLgy01cXyU,4447
|
|
7
|
+
pen_stack/adapt/pipeline.py,sha256=zs_fDAiv0LtuTTxwoIeCR9GjhvodviSpp1bwYQlNHb0,5389
|
|
8
|
+
pen_stack/adapt/recalibrate.py,sha256=MGdDWeROqUmcpz6mT-Pmlp3NPmUMs52zRNFm55-twtI,2403
|
|
9
|
+
pen_stack/adapt/report.py,sha256=ebYbtlMQ5wuJvNYwdeIHvgGZdWsVG3SbcI-mfIWN9QU,6343
|
|
10
|
+
pen_stack/agent/__init__.py,sha256=J05GUK7ymg7jY1fTvJ9lUgTGW-y4_7hBXuLIHDpt7bk,57
|
|
11
|
+
pen_stack/agent/guardrails.py,sha256=PF8hxBLbAEtU6vk0p3CiBARHw4cjEHYRF8kPWDuLTeQ,2644
|
|
12
|
+
pen_stack/agent/mcp_server.py,sha256=HYh-H3a29SuW7yqGEh3tDoPeMpLFSYIKOT2yM1dMj4U,1980
|
|
13
|
+
pen_stack/agent/orchestrator.py,sha256=BgCHijM1DjhtfwrM7F1VeS_akoU1QVAlJ_f6hd4RFmE,5585
|
|
14
|
+
pen_stack/agent/pen_agent.py,sha256=Q_hChhFgsVjk4MrP0UiVAIm6bX2WstCuX15No01PIxg,9097
|
|
15
|
+
pen_stack/agent/tools.py,sha256=CSvGexFtM6imFXjTjaMknt_MMwn3D6XaV7kxi7g0qmQ,6617
|
|
16
|
+
pen_stack/atlas/__init__.py,sha256=-XGrYvi4u2qIM5FTgukHweLjCDe3SQipKs7_HvxTKx4,57
|
|
17
|
+
pen_stack/atlas/build_wtkb.py,sha256=mBvm7swjqhFBkhrTKigT0ffLnzk-_d3h6qk9K0eoFHc,3177
|
|
18
|
+
pen_stack/atlas/crosslink.py,sha256=nJWvpk_0AtrUIZbBZ1C25Az1EgTQQuPwaYHpDwDYT3c,6432
|
|
19
|
+
pen_stack/atlas/expand.py,sha256=5SZt6tS3SLt9uZwEQ3YlYdoIzSIQ5MRX9QASttODRbw,8898
|
|
20
|
+
pen_stack/atlas/schema.py,sha256=VLAItD-uGjrsySscBNnjr_lvbf06lByK7gribD15ogs,2410
|
|
21
|
+
pen_stack/atlas/scorecard.py,sha256=cMkuUH3Ty_n8cSWL5EUfrMRouKskpkdx6Se3xyrGTPA,6136
|
|
22
|
+
pen_stack/atlas/universe.py,sha256=BgwMjFAPC5W5rCprPcte84qXIn3phlBCVBNltL2tRkY,3218
|
|
23
|
+
pen_stack/atlas/variant_propose.py,sha256=q5EJtKzrg2c7OQVTppdKn9Q0UwfJ_VTsJa3-tJBiXH0,8073
|
|
24
|
+
pen_stack/bridge/__init__.py,sha256=QOESbsA8NIBPC6dSFS5qBtVnLJ1Ipu3_dPJWQ_5CYcQ,58
|
|
25
|
+
pen_stack/bridge/activity.py,sha256=x4H2MoHw4sGp0o3Fe2xiDIjve7aGHNjVqbnJkgouI6s,2682
|
|
26
|
+
pen_stack/bridge/cli.py,sha256=qGTjYoW2KBvs6B2T5gx92ofLlONzOo_2xZR-uHyJoak,3072
|
|
27
|
+
pen_stack/bridge/fold_qc.py,sha256=CyX-zeoZPd6MRNfarQM4hbP8zw_TnHmMwenkFZHL5vE,2507
|
|
28
|
+
pen_stack/bridge/guide_qc.py,sha256=Qdc2rrz2DK8ODZKl9iii1LiSk6ETAk7cJwfLhqUrGnk,4491
|
|
29
|
+
pen_stack/bridge/ingest.py,sha256=43lk_6KrabZVMxpXN1EzB5fcUDtGfOHdPn8FSpq98Wc,6095
|
|
30
|
+
pen_stack/bridge/offtarget.py,sha256=jKUHog0J9Pwf9rtEKgVau_H6wyU1Gq7Ns32ae5uqb_I,6425
|
|
31
|
+
pen_stack/bridge/ortholog_screen.py,sha256=HHXq3WONaZgOmZicYzP56epy6L0vhBUhGBFcf98KIYI,3380
|
|
32
|
+
pen_stack/bridge/pipeline.py,sha256=-ebgqppDGSYLJ2t91IKifj96BGB4lfxf7BsUlsON2GU,3830
|
|
33
|
+
pen_stack/data/__init__.py,sha256=jpgYY9g7VwyFZXIifhlssZRt6b-ZUdxKR0dsmL_muBo,56
|
|
34
|
+
pen_stack/data/encode.py,sha256=n9sW_lJOJq5-niflG_Owd10PdZFePphcGrcno1s5T6c,3146
|
|
35
|
+
pen_stack/data/genome.py,sha256=X5KM1I6cfNM3wMBJVaesXMBgNEPeeSXQFIVpyMkqQhY,2419
|
|
36
|
+
pen_stack/data/ingest_chromatin.py,sha256=J4U1K_Y_U5dQRv6t5YKGw2mpW8JoHEMvFK5cew4j37w,4425
|
|
37
|
+
pen_stack/data/ingest_integration.py,sha256=QOVMbcrmcvELywA0vr_lSY13zNx7PdgpR3lmelm-WAk,5014
|
|
38
|
+
pen_stack/data/ingest_safety_annot.py,sha256=XMqXKImy9RjhwBq_N-V5S7b-e6MhNN9N3Xq5yBZDju0,6818
|
|
39
|
+
pen_stack/data/ingest_trip.py,sha256=IeH0TXEWHL3z1ahID235HZO_KHdUF8g7fpVtFpGJhV4,3733
|
|
40
|
+
pen_stack/mech/__init__.py,sha256=GAILAvLwDv8InNGMIWNRiFIQaXUOXrv5m0ssDgSguWw,56
|
|
41
|
+
pen_stack/mech/classify_atlas.py,sha256=OOxPCUj2qT9WMbsXjFx7CXNExFSSYvMlRBlruSWFHU0,3271
|
|
42
|
+
pen_stack/mech/whitelist.py,sha256=OPKvqXy5_D5iHqCL84dlxfREqHlQOTPjNzNYZb-cBHs,3329
|
|
43
|
+
pen_stack/monitor/__init__.py,sha256=6tXqDuWVhNxOulYgpoDs03Y8dUZshstJ774kcbzJp_M,59
|
|
44
|
+
pen_stack/monitor/europepmc.py,sha256=igxEZycVFEJVyXk9DNJKHYXaOHrWOKuwRn4zjVh99-Q,1374
|
|
45
|
+
pen_stack/monitor/run.py,sha256=WlQsy7HGu41HOy-O3EXe8DswcDTT2hElRgHzarg1x1k,2420
|
|
46
|
+
pen_stack/monitor/triage.py,sha256=clt0hTN-ZHRu2s_sQqwwifMBblzamGW9Abec_4wdoZc,2630
|
|
47
|
+
pen_stack/planner/__init__.py,sha256=rqXwN3F-726t6f1lFVTfozKlwo1A3SOOiI8TDyI-1QM,59
|
|
48
|
+
pen_stack/planner/cargo.py,sha256=sWPXpWKaz47OX7ylqhwjt78GtExBVYn7Owit-4i_SO4,2744
|
|
49
|
+
pen_stack/planner/cargo_polish.py,sha256=P_o4FFT_E3NEFf7I2JBG3KNEuwIA_SXb477IgAhtk94,6209
|
|
50
|
+
pen_stack/planner/delivery.py,sha256=yVk79c05qM359LQPV_vE1UU_NpPXX2AlSkq6Mm9ARKg,1293
|
|
51
|
+
pen_stack/planner/multiplex.py,sha256=zNWeGbJSrwEnFrGIl0SBMk0OfyfPt09TWZCfx1pdNOg,5960
|
|
52
|
+
pen_stack/planner/optimize.py,sha256=m8KtxFc7b1EuoVlhl7Sz20LSb0Gc9R_0rpZ-1UY1Xes,7330
|
|
53
|
+
pen_stack/planner/pipeline.py,sha256=PCxpxaKVhUwrAVmA1NFC6NWOIwtBssIKMLm8YRO4xyI,4162
|
|
54
|
+
pen_stack/planner/report.py,sha256=HsQ6EsyYwEd_406y5WTr_MIxmclz-KAQPYYk-Uj_37M,1335
|
|
55
|
+
pen_stack/rag/__init__.py,sha256=FR6Du4rVBnTVtrFXKCB-jhS-A_Z5L8nDTbbHyc4BSRo,55
|
|
56
|
+
pen_stack/rag/index.py,sha256=gUdOystdW9QYhnpdccq4CblUOXRCW9DeZ1s9c7otBAU,2400
|
|
57
|
+
pen_stack/rag/llm.py,sha256=-yfILBK78P43Gt5-GX9583G95N4gS0sMNfR5InePcwI,8072
|
|
58
|
+
pen_stack/rag/qa.py,sha256=P9cau4k55QTy4pExbVxulDHaihe8wFgcu4THpq5ap5o,5348
|
|
59
|
+
pen_stack/score/__init__.py,sha256=xxvDK6A2MiR3f9V8a7-LX0yxyq_s7yIgiKbF5iuQqok,57
|
|
60
|
+
pen_stack/score/recalibrate.py,sha256=G-YpuE-AOZuXTmQwop3ZhqWFgsVztC9a3SUXtlQxZZE,3100
|
|
61
|
+
pen_stack/score/therapeutic.py,sha256=iDsbLmoTLQ2MLmCwoGowbtNRkWJsSn6YSQNvcUrNwws,3620
|
|
62
|
+
pen_stack/server/__init__.py,sha256=zg0WieD0JB731SfZeMR73PgJOXTPmwMax1_x_JSx3CE,58
|
|
63
|
+
pen_stack/server/api.py,sha256=NdPX34VMoqKyJxs3U79-WhZSiFE988sLOZ_UGAkO_2Q,6323
|
|
64
|
+
pen_stack/ui/__init__.py,sha256=rsfdyJ87h36XaCqoUvhBm1x5IigUNEuGacxkZTsjkTE,54
|
|
65
|
+
pen_stack/ui/app.py,sha256=S1ZzQ0losBkRUxYmI3GJlFA1TpUDYndKP99TMPTNKKw,30815
|
|
66
|
+
pen_stack/validate/__init__.py,sha256=BEu8bTzGd42CFvtpaOag9_SU8VLQzN_pd0mkxZzu_xo,60
|
|
67
|
+
pen_stack/validate/adapt_demo.py,sha256=DMFVU2OD5TE91evE0H2r4MH4HrrbXyI20hACaM5VU7I,3925
|
|
68
|
+
pen_stack/validate/agent_eval.py,sha256=RwBW2kdNP4pSeDWOjQ_Cw9FVsgUdzaukkTm7REgA0eI,5994
|
|
69
|
+
pen_stack/validate/blind_gsh_discovery.py,sha256=NV1QVLMUsuNdIjM4TVOWdcXfReaGuacFpSI_fZuLxVs,8154
|
|
70
|
+
pen_stack/validate/cargo_directionality.py,sha256=Tlo673ySL3vGENryyv7JLq--UWLP_ioy_iP08BL8iWk,2851
|
|
71
|
+
pen_stack/validate/durability_baselines.py,sha256=LRKJLsDByLnFkwopXC84HBuibrOElKBncZbhqCeLGOs,8422
|
|
72
|
+
pen_stack/validate/forward_hypotheses.py,sha256=0uRhd7JcDgNitqzzo5SGSCiqhpA6OrvdN4xqT_ykpJg,5007
|
|
73
|
+
pen_stack/validate/guide_qc_demo.py,sha256=mRNr9JhGGp73js3Wj8gL-f6Hfi3rgiZZ1x1Zv3eM9AQ,2781
|
|
74
|
+
pen_stack/validate/intent_specification.py,sha256=OuoMYVR9Si1i0IOAm51Qx3NFbqurJlFFJcPi0qIGRRg,3626
|
|
75
|
+
pen_stack/validate/paper3_benchmark.py,sha256=xV9jAJpUBpVFozjX4OVHntaNAzpT1vgyqRgTtCKSu7s,8364
|
|
76
|
+
pen_stack/validate/paper4_real_validation.py,sha256=FoVtYFgsgtYEkoed-3c2mIp9wuKGiehJOGBubc1EFB4,6970
|
|
77
|
+
pen_stack/validate/paper4_validation.py,sha256=MmiDLu4GV-ouoNAj1QWgGCu5vcRDPpPogwWbREJYLJc,3904
|
|
78
|
+
pen_stack/validate/seq_vs_measured.py,sha256=LF342iF4aS48FK6dZW72iw2IAMqma-63UbD74Oudq2A,7288
|
|
79
|
+
pen_stack/validate/within_locus_ranking.py,sha256=ft2cAjjBR0Z92r7xEL1hoV5hBeazK9UeWSwwYxP89XM,3350
|
|
80
|
+
pen_stack/validate/writer_recovery.py,sha256=Lv5OaN-BW6vlnhxDp5zizW_9n0xB7XvIhjvN2EgKspA,4208
|
|
81
|
+
pen_stack/wgenome/__init__.py,sha256=enDHFWKOmEOoyO7BHh0MeM7WrZqyow3Y_EWQtoS6ONE,59
|
|
82
|
+
pen_stack/wgenome/chromatin_seq.py,sha256=BTlZ79DhM95My9WEO03Bfx__20W84VHaOxpibpb3Hg4,4095
|
|
83
|
+
pen_stack/wgenome/durability.py,sha256=zxfRNCm0Ba-o7uZRfS5IvHMWLVA4Wj-2LsY4Hgva8Pw,5273
|
|
84
|
+
pen_stack/wgenome/export_tracks.py,sha256=TbFIsieSXmC7oTkwD0LdsGQPEKlCu0KJETDavh_GTks,2037
|
|
85
|
+
pen_stack/wgenome/features.py,sha256=2IuAPB6PQbrOIwAKMIC4DTiv6op8r20sJYLs9WUuksw,3850
|
|
86
|
+
pen_stack/wgenome/gsh_baseline.py,sha256=24RWbtEs-X59iN5a91xfKJ-m5zkQQ3u7qRlV9qgq7gE,5939
|
|
87
|
+
pen_stack/wgenome/providers.py,sha256=0Vc7D2duge86YFK4Ub-WLwQess8LtgTMzGGcBmslylI,11703
|
|
88
|
+
pen_stack/wgenome/safety.py,sha256=j4cOxWEDgk0J7aXQgSRsFGq2kYGgXpiiqG-YVwODcI0,3204
|
|
89
|
+
pen_stack/wgenome/structure3d.py,sha256=jt0Cofbkjv_Rm4GP7nVX4BDQ1nGM-c8txl0Op-krEfE,9250
|
|
90
|
+
pen_stack/wgenome/writability.py,sha256=m6RlG01r0w_UmbV08puBqNwmbwbm4OvzHenFLR0GGtc,3424
|
|
91
|
+
pen_stack-3.1.0.dist-info/licenses/LICENSE,sha256=vxjNCQMsjGGvVRpJfJOg6RWHgXzyTwmxYUu0WF5jUbI,1081
|
|
92
|
+
pen_stack-3.1.0.dist-info/METADATA,sha256=C5ubcAyyQQnJRLfj05zkUWmOZK_-IuVZxk_r56FSdxE,32315
|
|
93
|
+
pen_stack-3.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
94
|
+
pen_stack-3.1.0.dist-info/entry_points.txt,sha256=5FKPTemiVvUgmeecTWOrvwO-fiuHDpJ9y0ITK5BIO4E,88
|
|
95
|
+
pen_stack-3.1.0.dist-info/top_level.txt,sha256=0jV2NCFBddUK9spN6ubdoD8w27XBdWg3sMZb-HM648Y,10
|
|
96
|
+
pen_stack-3.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anees Ahmed Mahaboob Ali
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pen_stack
|