bbbnuke 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bbbnuke-0.9.0/.gitignore +13 -0
- bbbnuke-0.9.0/Dockerfile.api +23 -0
- bbbnuke-0.9.0/Dockerfile.worker-cpu +19 -0
- bbbnuke-0.9.0/PKG-INFO +333 -0
- bbbnuke-0.9.0/README.md +283 -0
- bbbnuke-0.9.0/docker-compose.yml +86 -0
- bbbnuke-0.9.0/docs/INTEGRATION_GUIDE.md +360 -0
- bbbnuke-0.9.0/pyproject.toml +79 -0
- bbbnuke-0.9.0/src/bbnuke/__init__.py +3 -0
- bbbnuke-0.9.0/src/bbnuke/agent/__init__.py +31 -0
- bbbnuke-0.9.0/src/bbnuke/agent/orchestrator.py +234 -0
- bbbnuke-0.9.0/src/bbnuke/agent/permissions.py +75 -0
- bbbnuke-0.9.0/src/bbnuke/agent/state.py +196 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tool_logger.py +92 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/__init__.py +14 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/compute_tools.py +165 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/library_tools.py +143 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/pipeline_tools.py +312 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/registry.py +241 -0
- bbbnuke-0.9.0/src/bbnuke/agent/tools/workspace_tools.py +219 -0
- bbbnuke-0.9.0/src/bbnuke/agent/workspace.py +148 -0
- bbbnuke-0.9.0/src/bbnuke/api/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/api/affinity_cache.py +164 -0
- bbbnuke-0.9.0/src/bbnuke/api/app.py +69 -0
- bbbnuke-0.9.0/src/bbnuke/api/audit.py +41 -0
- bbbnuke-0.9.0/src/bbnuke/api/auth.py +90 -0
- bbbnuke-0.9.0/src/bbnuke/api/deps.py +56 -0
- bbbnuke-0.9.0/src/bbnuke/api/middleware.py +209 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/batch.py +205 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/health.py +34 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/keys.py +97 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/projects.py +229 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/proteins.py +48 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/score.py +57 -0
- bbbnuke-0.9.0/src/bbnuke/api/routes/screen.py +15 -0
- bbbnuke-0.9.0/src/bbnuke/api/schemas.py +136 -0
- bbbnuke-0.9.0/src/bbnuke/api/usage.py +47 -0
- bbbnuke-0.9.0/src/bbnuke/base.py +41 -0
- bbbnuke-0.9.0/src/bbnuke/bb_profiler.py +59 -0
- bbbnuke-0.9.0/src/bbnuke/cli.py +216 -0
- bbbnuke-0.9.0/src/bbnuke/core/__init__.py +45 -0
- bbbnuke-0.9.0/src/bbnuke/core/config.py +73 -0
- bbbnuke-0.9.0/src/bbnuke/core/constants.py +51 -0
- bbbnuke-0.9.0/src/bbnuke/core/schemas.py +367 -0
- bbbnuke-0.9.0/src/bbnuke/data/BBB_protein_core_table.xlsx +0 -0
- bbbnuke-0.9.0/src/bbnuke/data/BBB_proteins_merged.csv +66 -0
- bbbnuke-0.9.0/src/bbnuke/data/__init__.py +0 -0
- bbbnuke-0.9.0/src/bbnuke/data/bbb_classifier.joblib +0 -0
- bbbnuke-0.9.0/src/bbnuke/data/bbb_classifier_meta.json +25 -0
- bbbnuke-0.9.0/src/bbnuke/data/bbb_scaler.joblib +0 -0
- bbbnuke-0.9.0/src/bbnuke/data/efflux_proteins.csv +10 -0
- bbbnuke-0.9.0/src/bbnuke/data/target.csv +3 -0
- bbbnuke-0.9.0/src/bbnuke/db/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/db/models.py +312 -0
- bbbnuke-0.9.0/src/bbnuke/db/session.py +39 -0
- bbbnuke-0.9.0/src/bbnuke/dockerfiles/Dockerfile +20 -0
- bbbnuke-0.9.0/src/bbnuke/mcp_server.py +333 -0
- bbbnuke-0.9.0/src/bbnuke/modules/__init__.py +0 -0
- bbbnuke-0.9.0/src/bbnuke/modules/affinity.py +255 -0
- bbbnuke-0.9.0/src/bbnuke/modules/cns_mpo.py +179 -0
- bbbnuke-0.9.0/src/bbnuke/modules/efflux.py +119 -0
- bbbnuke-0.9.0/src/bbnuke/modules/heuristic.py +385 -0
- bbbnuke-0.9.0/src/bbnuke/modules/pka.py +312 -0
- bbbnuke-0.9.0/src/bbnuke/modules/properties.py +31 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/__init__.py +287 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/dataset.py +164 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/drug_pool.py +53 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/inference.py +138 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/layers.py +459 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/ligand_init.py +314 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/net.py +290 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/pna.py +99 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/protein_pool.py +65 -0
- bbbnuke-0.9.0/src/bbnuke/modules/psichic/scaler.py +74 -0
- bbbnuke-0.9.0/src/bbnuke/modules/standardize.py +31 -0
- bbbnuke-0.9.0/src/bbnuke/openai_functions/__init__.py +918 -0
- bbbnuke-0.9.0/src/bbnuke/openai_functions/__main__.py +5 -0
- bbbnuke-0.9.0/src/bbnuke/optimizer.py +16 -0
- bbbnuke-0.9.0/src/bbnuke/pipeline/__init__.py +0 -0
- bbbnuke-0.9.0/src/bbnuke/pipeline/runner.py +255 -0
- bbbnuke-0.9.0/src/bbnuke/pipeline.py +45 -0
- bbbnuke-0.9.0/src/bbnuke/prop_predictor.py +21 -0
- bbbnuke-0.9.0/src/bbnuke/standardizer.py +22 -0
- bbbnuke-0.9.0/src/bbnuke/workers/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/workers/cpu_worker.py +100 -0
- bbbnuke-0.9.0/src/bbnuke/workers/gpu_worker.py +76 -0
- bbbnuke-0.9.0/src/bbnuke/workers/orchestrator.py +166 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/engine.py +98 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/models.py +75 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/steps/__init__.py +1 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/steps/batch_score.py +56 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/steps/filter_step.py +82 -0
- bbbnuke-0.9.0/src/bbnuke/workflows/steps/parameter_sweep.py +87 -0
- bbbnuke-0.9.0/tests/__init__.py +0 -0
- bbbnuke-0.9.0/tests/test_affinity.py +81 -0
- bbbnuke-0.9.0/tests/test_agent.py +569 -0
- bbbnuke-0.9.0/tests/test_api.py +161 -0
- bbbnuke-0.9.0/tests/test_auth.py +215 -0
- bbbnuke-0.9.0/tests/test_batch.py +201 -0
- bbbnuke-0.9.0/tests/test_end_to_end.py +97 -0
- bbbnuke-0.9.0/tests/test_pipeline.py +169 -0
- bbbnuke-0.9.0/tests/test_pka.py +84 -0
- bbbnuke-0.9.0/tests/test_sprint4.py +437 -0
bbbnuke-0.9.0/.gitignore
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
FROM continuumio/miniconda3:latest
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
# System deps
|
|
6
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
7
|
+
build-essential \
|
|
8
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
9
|
+
|
|
10
|
+
# Install RDKit via conda (fastest path)
|
|
11
|
+
RUN conda install -y -c conda-forge rdkit numpy"<2" && conda clean -afy
|
|
12
|
+
|
|
13
|
+
# Copy and install package
|
|
14
|
+
COPY pyproject.toml .
|
|
15
|
+
COPY src/ src/
|
|
16
|
+
|
|
17
|
+
RUN pip install --no-cache-dir ".[api]"
|
|
18
|
+
|
|
19
|
+
# Expose port
|
|
20
|
+
EXPOSE 8000
|
|
21
|
+
|
|
22
|
+
# Run API server
|
|
23
|
+
CMD ["uvicorn", "bbnuke.api.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
FROM continuumio/miniconda3:latest
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
6
|
+
build-essential \
|
|
7
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
8
|
+
|
|
9
|
+
# Install RDKit via conda
|
|
10
|
+
RUN conda install -y -c conda-forge rdkit numpy"<2" && conda clean -afy
|
|
11
|
+
|
|
12
|
+
# Copy and install package
|
|
13
|
+
COPY pyproject.toml .
|
|
14
|
+
COPY src/ src/
|
|
15
|
+
|
|
16
|
+
RUN pip install --no-cache-dir ".[api]"
|
|
17
|
+
|
|
18
|
+
# Run CPU worker
|
|
19
|
+
CMD ["python", "-m", "arq", "bbnuke.workers.cpu_worker.CpuWorkerSettings"]
|
bbbnuke-0.9.0/PKG-INFO
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bbbnuke
|
|
3
|
+
Version: 0.9.0
|
|
4
|
+
Summary: BBB-Nuke: Blood-brain barrier penetration screening pipeline
|
|
5
|
+
Author-email: Temi Sobodu <temisobodu@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Development Status :: 4 - Beta
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
13
|
+
Requires-Python: >=3.9
|
|
14
|
+
Requires-Dist: huggingface-hub>=0.20
|
|
15
|
+
Requires-Dist: joblib>=1.2
|
|
16
|
+
Requires-Dist: numpy>=1.24
|
|
17
|
+
Requires-Dist: openpyxl>=3.0
|
|
18
|
+
Requires-Dist: pandas>=2.0
|
|
19
|
+
Requires-Dist: pydantic>=2.0
|
|
20
|
+
Requires-Dist: rdkit
|
|
21
|
+
Requires-Dist: scikit-learn>=1.0
|
|
22
|
+
Requires-Dist: torch-geometric>=2.0
|
|
23
|
+
Requires-Dist: torch-scatter
|
|
24
|
+
Requires-Dist: torch-sparse
|
|
25
|
+
Requires-Dist: torch>=2.0
|
|
26
|
+
Provides-Extra: api
|
|
27
|
+
Requires-Dist: aiosqlite>=0.20; extra == 'api'
|
|
28
|
+
Requires-Dist: alembic>=1.13; extra == 'api'
|
|
29
|
+
Requires-Dist: arq>=0.26; extra == 'api'
|
|
30
|
+
Requires-Dist: asyncpg>=0.29; extra == 'api'
|
|
31
|
+
Requires-Dist: fastapi>=0.110; extra == 'api'
|
|
32
|
+
Requires-Dist: httpx>=0.25; extra == 'api'
|
|
33
|
+
Requires-Dist: redis>=5.0; extra == 'api'
|
|
34
|
+
Requires-Dist: sqlalchemy[asyncio]>=2.0; extra == 'api'
|
|
35
|
+
Requires-Dist: uvicorn[standard]>=0.25; extra == 'api'
|
|
36
|
+
Provides-Extra: api-test
|
|
37
|
+
Requires-Dist: aiosqlite>=0.20; extra == 'api-test'
|
|
38
|
+
Requires-Dist: arq>=0.26; extra == 'api-test'
|
|
39
|
+
Requires-Dist: fastapi>=0.110; extra == 'api-test'
|
|
40
|
+
Requires-Dist: httpx>=0.25; extra == 'api-test'
|
|
41
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'api-test'
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == 'api-test'
|
|
43
|
+
Requires-Dist: redis>=5.0; extra == 'api-test'
|
|
44
|
+
Requires-Dist: sqlalchemy[asyncio]>=2.0; extra == 'api-test'
|
|
45
|
+
Requires-Dist: uvicorn[standard]>=0.25; extra == 'api-test'
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
48
|
+
Requires-Dist: ruff>=0.4; extra == 'dev'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# BBB-Nuke
|
|
52
|
+
|
|
53
|
+
<img width="960" height="540" alt="image" src="https://github.com/user-attachments/assets/42bfaee2-aa67-4675-9423-803386c1dc6c" />
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
Blood-brain barrier penetration screening pipeline. Scores small molecules for BBB permeability using physicochemical properties, CNS-MPO desirability scoring, protein-ligand affinity, and a heuristic confidence layer.
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
### Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install -e ".[api]"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Run the API
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
uvicorn bbnuke.api.app:app --host 127.0.0.1 --port 8000 --reload
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Open **http://127.0.0.1:8000/docs** for the interactive Swagger UI.
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## API Tutorial
|
|
77
|
+
|
|
78
|
+
### Step 1: Check the server is running
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
curl http://127.0.0.1:8000/v1/health
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**Expected response:**
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{"status": "ok"}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Step 2: Check the pipeline version
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
curl http://127.0.0.1:8000/v1/version
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Expected response:**
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"pipeline_version": "0.7.0",
|
|
101
|
+
"api_version": "1.0.0",
|
|
102
|
+
"hyperparameters": {
|
|
103
|
+
"logistic_k": 0.1352,
|
|
104
|
+
"efflux_veto_threshold": 0.7,
|
|
105
|
+
"mpo_gain_coeff": 2.0,
|
|
106
|
+
"mpo_filter_cutoff": 3.0
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Step 3: Score a compound (CPU-only)
|
|
112
|
+
|
|
113
|
+
Score caffeine without affinity data:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
117
|
+
-H "Content-Type: application/json" \
|
|
118
|
+
-d '{"smiles": "Cn1c(=O)c2c(ncn2C)n(C)c1=O"}'
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Expected response (key fields):**
|
|
122
|
+
|
|
123
|
+
```json
|
|
124
|
+
{
|
|
125
|
+
"result": {
|
|
126
|
+
"compound_id": "query",
|
|
127
|
+
"smiles_standardized": "Cn1c(=O)c2c(ncn2C)n(C)c1=O",
|
|
128
|
+
"properties": {
|
|
129
|
+
"mw": 194.194,
|
|
130
|
+
"logp": -1.0293,
|
|
131
|
+
"tpsa": 61.82,
|
|
132
|
+
"hbd": 0,
|
|
133
|
+
"hba": 6
|
|
134
|
+
},
|
|
135
|
+
"cns_mpo": {
|
|
136
|
+
"score": 6.0,
|
|
137
|
+
"passed_filter": true
|
|
138
|
+
},
|
|
139
|
+
"affinity": null,
|
|
140
|
+
"heuristic": {
|
|
141
|
+
"p_bbb": 0.692365,
|
|
142
|
+
"diagnostics": {
|
|
143
|
+
"reason": "MPO-only (no affinity scores)",
|
|
144
|
+
"veto": false,
|
|
145
|
+
"score_bind": 0.0,
|
|
146
|
+
"score_mpo": 6.0,
|
|
147
|
+
"score_total": 6.0
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
"passed_mpo_filter": true
|
|
151
|
+
},
|
|
152
|
+
"pipeline_version": "0.7.0"
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
**What this means:**
|
|
157
|
+
- Caffeine scores a perfect 6.0 on CNS-MPO (excellent drug-likeness for CNS)
|
|
158
|
+
- Without affinity data, P_BBB is derived from MPO alone: `P_BBB = 1 / (1 + exp(-0.1352 * 6.0))` = 0.69
|
|
159
|
+
- `affinity: null` because no pre-computed PSICHIC data is loaded
|
|
160
|
+
|
|
161
|
+
### Step 4: Score with affinity data
|
|
162
|
+
|
|
163
|
+
To include PSICHIC affinity scoring, set `include_affinity: true`. This requires pre-computed data files (see [Data Setup](#data-setup) below).
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
167
|
+
-H "Content-Type: application/json" \
|
|
168
|
+
-d '{
|
|
169
|
+
"smiles": "CCC1(c2ccccc2)C(=O)NC(=O)N(C(=O)c2ccccc2)C1=O",
|
|
170
|
+
"include_affinity": true
|
|
171
|
+
}'
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
**Expected response with affinity (key fields):**
|
|
175
|
+
|
|
176
|
+
```json
|
|
177
|
+
{
|
|
178
|
+
"result": {
|
|
179
|
+
"compound_id": "query",
|
|
180
|
+
"cns_mpo": {
|
|
181
|
+
"score": 5.6234,
|
|
182
|
+
"passed_filter": true
|
|
183
|
+
},
|
|
184
|
+
"affinity": {
|
|
185
|
+
"model": "psichic",
|
|
186
|
+
"scores": [
|
|
187
|
+
{"protein_id": "4F2_HUMAN", "score_raw": 6.42, "score_norm": 0.642},
|
|
188
|
+
{"protein_id": "ACHE_HUMAN", "score_raw": 7.90, "score_norm": 0.790},
|
|
189
|
+
"... 65 proteins total ..."
|
|
190
|
+
]
|
|
191
|
+
},
|
|
192
|
+
"heuristic": {
|
|
193
|
+
"p_bbb": 0.670261,
|
|
194
|
+
"diagnostics": {
|
|
195
|
+
"reason": "OK",
|
|
196
|
+
"veto": false,
|
|
197
|
+
"score_bind": 0.0,
|
|
198
|
+
"score_mpo": 5.2468,
|
|
199
|
+
"score_total": 5.2468,
|
|
200
|
+
"contributions": ["... per-protein breakdown ..."]
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
**What this means:**
|
|
208
|
+
- Each of the 65 BBB proteins gets a binding probability (0-1) from PSICHIC
|
|
209
|
+
- The heuristic weighs carrier proteins positively, enzyme proteins negatively
|
|
210
|
+
- Efflux proteins (P-gp, BCRP, MRPs) can **veto** the compound if binding exceeds their threshold
|
|
211
|
+
- `S_bind` sums weighted protein contributions; `S_mpo` is the MPO gain above baseline
|
|
212
|
+
- `P_BBB = 1 / (1 + exp(-k * (S_bind + S_mpo)))`
|
|
213
|
+
|
|
214
|
+
### Step 5: Score a compound that fails MPO filter
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
218
|
+
-H "Content-Type: application/json" \
|
|
219
|
+
-d '{"smiles": "O=C(O)CCCCCCCCCCCCCCCCC"}'
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
**Expected response:**
|
|
223
|
+
|
|
224
|
+
```json
|
|
225
|
+
{
|
|
226
|
+
"result": {
|
|
227
|
+
"cns_mpo": {
|
|
228
|
+
"score": 1.873,
|
|
229
|
+
"passed_filter": false
|
|
230
|
+
},
|
|
231
|
+
"heuristic": null,
|
|
232
|
+
"passed_mpo_filter": false
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
**What this means:**
|
|
238
|
+
- Stearic acid has poor CNS drug-likeness (high MW, high LogP, no HBD)
|
|
239
|
+
- CNS-MPO < 3.0, so it's filtered out before affinity/heuristic scoring
|
|
240
|
+
- `heuristic: null` — no P_BBB is computed for compounds that fail the MPO gate
|
|
241
|
+
|
|
242
|
+
### Step 6: Override pipeline configuration
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
246
|
+
-H "Content-Type: application/json" \
|
|
247
|
+
-d '{
|
|
248
|
+
"smiles": "c1ccc2c(c1)c1ccccc1[nH]2",
|
|
249
|
+
"config": {
|
|
250
|
+
"mpo_cutoff": 5.0,
|
|
251
|
+
"logistic_k": 0.2
|
|
252
|
+
}
|
|
253
|
+
}'
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
You can override any pipeline parameter: `mpo_cutoff`, `logistic_k`, `efflux_veto_threshold`, `mpo_gain_coeff`.
|
|
257
|
+
|
|
258
|
+
### Step 7: List all BBB target proteins
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
curl http://127.0.0.1:8000/v1/proteins
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
Returns all 65 proteins with their categories (carrier/efflux/enzyme), weights, and binding thresholds.
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Data Setup
|
|
269
|
+
|
|
270
|
+
### Pre-computed affinity data
|
|
271
|
+
|
|
272
|
+
The API supports pre-computed PSICHIC affinity lookup when `include_affinity: true`. To enable this, place these files in the repo root:
|
|
273
|
+
|
|
274
|
+
| File | Description |
|
|
275
|
+
|------|-------------|
|
|
276
|
+
| `psichic_affinity_matrix.csv` | 599 compounds x 65 proteins, normalized scores (0-1) |
|
|
277
|
+
| `bbb_plus_pka.csv` | pKa predictions (acid/base lists) for SMILES-to-compound mapping |
|
|
278
|
+
|
|
279
|
+
**Format of `psichic_affinity_matrix.csv`:**
|
|
280
|
+
```
|
|
281
|
+
Compound_ID,4F2_HUMAN,5NTD_HUMAN,ABCG2_HUMAN,...
|
|
282
|
+
caffeine,0.431,0.489,0.497,...
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
**Format of `bbb_plus_pka.csv`:**
|
|
286
|
+
```
|
|
287
|
+
compound_name,SMILES,acid_pkas,base_pkas
|
|
288
|
+
caffeine,Cn1c(=O)c2c(ncn2C)n(C)c1=O,,
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
The API matches incoming SMILES by canonicalizing via RDKit and looking up against the pre-computed set. If a compound is not found, scoring proceeds without affinity data (MPO-only).
|
|
292
|
+
|
|
293
|
+
### Running your own PSICHIC screening
|
|
294
|
+
|
|
295
|
+
To generate affinity data for new compounds, see the `modules/affinity.py` module which wraps the PSICHIC subprocess.
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## Pipeline
|
|
300
|
+
|
|
301
|
+
```
|
|
302
|
+
Input SMILES
|
|
303
|
+
-> Standardize (RDKit: canonicalize, neutralize, strip salts)
|
|
304
|
+
-> Properties (MW, LogP, TPSA, HBD, HBA)
|
|
305
|
+
-> pKa (MolGpKa predictions or placeholder)
|
|
306
|
+
-> CNS-MPO (6 desirability subscores, 0-6 total)
|
|
307
|
+
-> MPO Gate (score >= 3.0 to proceed)
|
|
308
|
+
-> Affinity (PSICHIC: 65 BBB proteins, 0-1 binding probability)
|
|
309
|
+
-> Heuristic (weighted binding + efflux veto + logistic)
|
|
310
|
+
-> P_BBB output (0-1)
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
## Endpoints
|
|
314
|
+
|
|
315
|
+
| Method | Path | Description |
|
|
316
|
+
|--------|------|-------------|
|
|
317
|
+
| GET | `/v1/health` | Liveness probe |
|
|
318
|
+
| GET | `/v1/version` | Pipeline version + hyperparameters |
|
|
319
|
+
| POST | `/v1/score` | Score a single compound |
|
|
320
|
+
| GET | `/v1/proteins` | List all 65 BBB target proteins |
|
|
321
|
+
| POST | `/v1/batch` | Submit batch job (requires Redis + ARQ) |
|
|
322
|
+
| GET | `/v1/batch/{job_id}` | Poll batch progress |
|
|
323
|
+
|
|
324
|
+
## Tests
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
pip install -e ".[api]"
|
|
328
|
+
pytest tests/ -v
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
## License
|
|
332
|
+
|
|
333
|
+
Proprietary — ATTN Lab.
|
bbbnuke-0.9.0/README.md
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# BBB-Nuke
|
|
2
|
+
|
|
3
|
+
<img width="960" height="540" alt="image" src="https://github.com/user-attachments/assets/42bfaee2-aa67-4675-9423-803386c1dc6c" />
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Blood-brain barrier penetration screening pipeline. Scores small molecules for BBB permeability using physicochemical properties, CNS-MPO desirability scoring, protein-ligand affinity, and a heuristic confidence layer.
|
|
7
|
+
|
|
8
|
+
## Quick Start
|
|
9
|
+
|
|
10
|
+
### Install
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
pip install -e ".[api]"
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
### Run the API
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
uvicorn bbnuke.api.app:app --host 127.0.0.1 --port 8000 --reload
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Open **http://127.0.0.1:8000/docs** for the interactive Swagger UI.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## API Tutorial
|
|
27
|
+
|
|
28
|
+
### Step 1: Check the server is running
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
curl http://127.0.0.1:8000/v1/health
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
**Expected response:**
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{"status": "ok"}
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### Step 2: Check the pipeline version
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
curl http://127.0.0.1:8000/v1/version
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**Expected response:**
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"pipeline_version": "0.7.0",
|
|
51
|
+
"api_version": "1.0.0",
|
|
52
|
+
"hyperparameters": {
|
|
53
|
+
"logistic_k": 0.1352,
|
|
54
|
+
"efflux_veto_threshold": 0.7,
|
|
55
|
+
"mpo_gain_coeff": 2.0,
|
|
56
|
+
"mpo_filter_cutoff": 3.0
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Step 3: Score a compound (CPU-only)
|
|
62
|
+
|
|
63
|
+
Score caffeine without affinity data:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
67
|
+
-H "Content-Type: application/json" \
|
|
68
|
+
-d '{"smiles": "Cn1c(=O)c2c(ncn2C)n(C)c1=O"}'
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
**Expected response (key fields):**
|
|
72
|
+
|
|
73
|
+
```json
|
|
74
|
+
{
|
|
75
|
+
"result": {
|
|
76
|
+
"compound_id": "query",
|
|
77
|
+
"smiles_standardized": "Cn1c(=O)c2c(ncn2C)n(C)c1=O",
|
|
78
|
+
"properties": {
|
|
79
|
+
"mw": 194.194,
|
|
80
|
+
"logp": -1.0293,
|
|
81
|
+
"tpsa": 61.82,
|
|
82
|
+
"hbd": 0,
|
|
83
|
+
"hba": 6
|
|
84
|
+
},
|
|
85
|
+
"cns_mpo": {
|
|
86
|
+
"score": 6.0,
|
|
87
|
+
"passed_filter": true
|
|
88
|
+
},
|
|
89
|
+
"affinity": null,
|
|
90
|
+
"heuristic": {
|
|
91
|
+
"p_bbb": 0.692365,
|
|
92
|
+
"diagnostics": {
|
|
93
|
+
"reason": "MPO-only (no affinity scores)",
|
|
94
|
+
"veto": false,
|
|
95
|
+
"score_bind": 0.0,
|
|
96
|
+
"score_mpo": 6.0,
|
|
97
|
+
"score_total": 6.0
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
"passed_mpo_filter": true
|
|
101
|
+
},
|
|
102
|
+
"pipeline_version": "0.7.0"
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
**What this means:**
|
|
107
|
+
- Caffeine scores a perfect 6.0 on CNS-MPO (excellent drug-likeness for CNS)
|
|
108
|
+
- Without affinity data, P_BBB is derived from MPO alone: `P_BBB = 1 / (1 + exp(-0.1352 * 6.0))` = 0.69
|
|
109
|
+
- `affinity: null` because no pre-computed PSICHIC data is loaded
|
|
110
|
+
|
|
111
|
+
### Step 4: Score with affinity data
|
|
112
|
+
|
|
113
|
+
To include PSICHIC affinity scoring, set `include_affinity: true`. This requires pre-computed data files (see [Data Setup](#data-setup) below).
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
117
|
+
-H "Content-Type: application/json" \
|
|
118
|
+
-d '{
|
|
119
|
+
"smiles": "CCC1(c2ccccc2)C(=O)NC(=O)N(C(=O)c2ccccc2)C1=O",
|
|
120
|
+
"include_affinity": true
|
|
121
|
+
}'
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Expected response with affinity (key fields):**
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"result": {
|
|
129
|
+
"compound_id": "query",
|
|
130
|
+
"cns_mpo": {
|
|
131
|
+
"score": 5.6234,
|
|
132
|
+
"passed_filter": true
|
|
133
|
+
},
|
|
134
|
+
"affinity": {
|
|
135
|
+
"model": "psichic",
|
|
136
|
+
"scores": [
|
|
137
|
+
{"protein_id": "4F2_HUMAN", "score_raw": 6.42, "score_norm": 0.642},
|
|
138
|
+
{"protein_id": "ACHE_HUMAN", "score_raw": 7.90, "score_norm": 0.790},
|
|
139
|
+
"... 65 proteins total ..."
|
|
140
|
+
]
|
|
141
|
+
},
|
|
142
|
+
"heuristic": {
|
|
143
|
+
"p_bbb": 0.670261,
|
|
144
|
+
"diagnostics": {
|
|
145
|
+
"reason": "OK",
|
|
146
|
+
"veto": false,
|
|
147
|
+
"score_bind": 0.0,
|
|
148
|
+
"score_mpo": 5.2468,
|
|
149
|
+
"score_total": 5.2468,
|
|
150
|
+
"contributions": ["... per-protein breakdown ..."]
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**What this means:**
|
|
158
|
+
- Each of the 65 BBB proteins gets a binding probability (0-1) from PSICHIC
|
|
159
|
+
- The heuristic weighs carrier proteins positively, enzyme proteins negatively
|
|
160
|
+
- Efflux proteins (P-gp, BCRP, MRPs) can **veto** the compound if binding exceeds their threshold
|
|
161
|
+
- `S_bind` sums weighted protein contributions; `S_mpo` is the MPO gain above baseline
|
|
162
|
+
- `P_BBB = 1 / (1 + exp(-k * (S_bind + S_mpo)))`
|
|
163
|
+
|
|
164
|
+
### Step 5: Score a compound that fails MPO filter
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
168
|
+
-H "Content-Type: application/json" \
|
|
169
|
+
-d '{"smiles": "O=C(O)CCCCCCCCCCCCCCCCC"}'
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Expected response:**
|
|
173
|
+
|
|
174
|
+
```json
|
|
175
|
+
{
|
|
176
|
+
"result": {
|
|
177
|
+
"cns_mpo": {
|
|
178
|
+
"score": 1.873,
|
|
179
|
+
"passed_filter": false
|
|
180
|
+
},
|
|
181
|
+
"heuristic": null,
|
|
182
|
+
"passed_mpo_filter": false
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**What this means:**
|
|
188
|
+
- Stearic acid has poor CNS drug-likeness (high MW, high LogP, no HBD)
|
|
189
|
+
- CNS-MPO < 3.0, so it's filtered out before affinity/heuristic scoring
|
|
190
|
+
- `heuristic: null` — no P_BBB is computed for compounds that fail the MPO gate
|
|
191
|
+
|
|
192
|
+
### Step 6: Override pipeline configuration
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
curl -X POST http://127.0.0.1:8000/v1/score \
|
|
196
|
+
-H "Content-Type: application/json" \
|
|
197
|
+
-d '{
|
|
198
|
+
"smiles": "c1ccc2c(c1)c1ccccc1[nH]2",
|
|
199
|
+
"config": {
|
|
200
|
+
"mpo_cutoff": 5.0,
|
|
201
|
+
"logistic_k": 0.2
|
|
202
|
+
}
|
|
203
|
+
}'
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
You can override any pipeline parameter: `mpo_cutoff`, `logistic_k`, `efflux_veto_threshold`, `mpo_gain_coeff`.
|
|
207
|
+
|
|
208
|
+
### Step 7: List all BBB target proteins
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
curl http://127.0.0.1:8000/v1/proteins
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
Returns all 65 proteins with their categories (carrier/efflux/enzyme), weights, and binding thresholds.
|
|
215
|
+
|
|
216
|
+
---
|
|
217
|
+
|
|
218
|
+
## Data Setup
|
|
219
|
+
|
|
220
|
+
### Pre-computed affinity data
|
|
221
|
+
|
|
222
|
+
The API supports pre-computed PSICHIC affinity lookup when `include_affinity: true`. To enable this, place these files in the repo root:
|
|
223
|
+
|
|
224
|
+
| File | Description |
|
|
225
|
+
|------|-------------|
|
|
226
|
+
| `psichic_affinity_matrix.csv` | 599 compounds x 65 proteins, normalized scores (0-1) |
|
|
227
|
+
| `bbb_plus_pka.csv` | pKa predictions (acid/base lists) for SMILES-to-compound mapping |
|
|
228
|
+
|
|
229
|
+
**Format of `psichic_affinity_matrix.csv`:**
|
|
230
|
+
```
|
|
231
|
+
Compound_ID,4F2_HUMAN,5NTD_HUMAN,ABCG2_HUMAN,...
|
|
232
|
+
caffeine,0.431,0.489,0.497,...
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**Format of `bbb_plus_pka.csv`:**
|
|
236
|
+
```
|
|
237
|
+
compound_name,SMILES,acid_pkas,base_pkas
|
|
238
|
+
caffeine,Cn1c(=O)c2c(ncn2C)n(C)c1=O,,
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
The API matches incoming SMILES by canonicalizing via RDKit and looking up against the pre-computed set. If a compound is not found, scoring proceeds without affinity data (MPO-only).
|
|
242
|
+
|
|
243
|
+
### Running your own PSICHIC screening
|
|
244
|
+
|
|
245
|
+
To generate affinity data for new compounds, see the `modules/affinity.py` module which wraps the PSICHIC subprocess.
|
|
246
|
+
|
|
247
|
+
---
|
|
248
|
+
|
|
249
|
+
## Pipeline
|
|
250
|
+
|
|
251
|
+
```
|
|
252
|
+
Input SMILES
|
|
253
|
+
-> Standardize (RDKit: canonicalize, neutralize, strip salts)
|
|
254
|
+
-> Properties (MW, LogP, TPSA, HBD, HBA)
|
|
255
|
+
-> pKa (MolGpKa predictions or placeholder)
|
|
256
|
+
-> CNS-MPO (6 desirability subscores, 0-6 total)
|
|
257
|
+
-> MPO Gate (score >= 3.0 to proceed)
|
|
258
|
+
-> Affinity (PSICHIC: 65 BBB proteins, 0-1 binding probability)
|
|
259
|
+
-> Heuristic (weighted binding + efflux veto + logistic)
|
|
260
|
+
-> P_BBB output (0-1)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Endpoints
|
|
264
|
+
|
|
265
|
+
| Method | Path | Description |
|
|
266
|
+
|--------|------|-------------|
|
|
267
|
+
| GET | `/v1/health` | Liveness probe |
|
|
268
|
+
| GET | `/v1/version` | Pipeline version + hyperparameters |
|
|
269
|
+
| POST | `/v1/score` | Score a single compound |
|
|
270
|
+
| GET | `/v1/proteins` | List all 65 BBB target proteins |
|
|
271
|
+
| POST | `/v1/batch` | Submit batch job (requires Redis + ARQ) |
|
|
272
|
+
| GET | `/v1/batch/{job_id}` | Poll batch progress |
|
|
273
|
+
|
|
274
|
+
## Tests
|
|
275
|
+
|
|
276
|
+
```bash
|
|
277
|
+
pip install -e ".[api]"
|
|
278
|
+
pytest tests/ -v
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## License
|
|
282
|
+
|
|
283
|
+
Proprietary — ATTN Lab.
|