bbbnuke 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. bbbnuke-0.9.0/.gitignore +13 -0
  2. bbbnuke-0.9.0/Dockerfile.api +23 -0
  3. bbbnuke-0.9.0/Dockerfile.worker-cpu +19 -0
  4. bbbnuke-0.9.0/PKG-INFO +333 -0
  5. bbbnuke-0.9.0/README.md +283 -0
  6. bbbnuke-0.9.0/docker-compose.yml +86 -0
  7. bbbnuke-0.9.0/docs/INTEGRATION_GUIDE.md +360 -0
  8. bbbnuke-0.9.0/pyproject.toml +79 -0
  9. bbbnuke-0.9.0/src/bbnuke/__init__.py +3 -0
  10. bbbnuke-0.9.0/src/bbnuke/agent/__init__.py +31 -0
  11. bbbnuke-0.9.0/src/bbnuke/agent/orchestrator.py +234 -0
  12. bbbnuke-0.9.0/src/bbnuke/agent/permissions.py +75 -0
  13. bbbnuke-0.9.0/src/bbnuke/agent/state.py +196 -0
  14. bbbnuke-0.9.0/src/bbnuke/agent/tool_logger.py +92 -0
  15. bbbnuke-0.9.0/src/bbnuke/agent/tools/__init__.py +14 -0
  16. bbbnuke-0.9.0/src/bbnuke/agent/tools/compute_tools.py +165 -0
  17. bbbnuke-0.9.0/src/bbnuke/agent/tools/library_tools.py +143 -0
  18. bbbnuke-0.9.0/src/bbnuke/agent/tools/pipeline_tools.py +312 -0
  19. bbbnuke-0.9.0/src/bbnuke/agent/tools/registry.py +241 -0
  20. bbbnuke-0.9.0/src/bbnuke/agent/tools/workspace_tools.py +219 -0
  21. bbbnuke-0.9.0/src/bbnuke/agent/workspace.py +148 -0
  22. bbbnuke-0.9.0/src/bbnuke/api/__init__.py +1 -0
  23. bbbnuke-0.9.0/src/bbnuke/api/affinity_cache.py +164 -0
  24. bbbnuke-0.9.0/src/bbnuke/api/app.py +69 -0
  25. bbbnuke-0.9.0/src/bbnuke/api/audit.py +41 -0
  26. bbbnuke-0.9.0/src/bbnuke/api/auth.py +90 -0
  27. bbbnuke-0.9.0/src/bbnuke/api/deps.py +56 -0
  28. bbbnuke-0.9.0/src/bbnuke/api/middleware.py +209 -0
  29. bbbnuke-0.9.0/src/bbnuke/api/routes/__init__.py +1 -0
  30. bbbnuke-0.9.0/src/bbnuke/api/routes/batch.py +205 -0
  31. bbbnuke-0.9.0/src/bbnuke/api/routes/health.py +34 -0
  32. bbbnuke-0.9.0/src/bbnuke/api/routes/keys.py +97 -0
  33. bbbnuke-0.9.0/src/bbnuke/api/routes/projects.py +229 -0
  34. bbbnuke-0.9.0/src/bbnuke/api/routes/proteins.py +48 -0
  35. bbbnuke-0.9.0/src/bbnuke/api/routes/score.py +57 -0
  36. bbbnuke-0.9.0/src/bbnuke/api/routes/screen.py +15 -0
  37. bbbnuke-0.9.0/src/bbnuke/api/schemas.py +136 -0
  38. bbbnuke-0.9.0/src/bbnuke/api/usage.py +47 -0
  39. bbbnuke-0.9.0/src/bbnuke/base.py +41 -0
  40. bbbnuke-0.9.0/src/bbnuke/bb_profiler.py +59 -0
  41. bbbnuke-0.9.0/src/bbnuke/cli.py +216 -0
  42. bbbnuke-0.9.0/src/bbnuke/core/__init__.py +45 -0
  43. bbbnuke-0.9.0/src/bbnuke/core/config.py +73 -0
  44. bbbnuke-0.9.0/src/bbnuke/core/constants.py +51 -0
  45. bbbnuke-0.9.0/src/bbnuke/core/schemas.py +367 -0
  46. bbbnuke-0.9.0/src/bbnuke/data/BBB_protein_core_table.xlsx +0 -0
  47. bbbnuke-0.9.0/src/bbnuke/data/BBB_proteins_merged.csv +66 -0
  48. bbbnuke-0.9.0/src/bbnuke/data/__init__.py +0 -0
  49. bbbnuke-0.9.0/src/bbnuke/data/bbb_classifier.joblib +0 -0
  50. bbbnuke-0.9.0/src/bbnuke/data/bbb_classifier_meta.json +25 -0
  51. bbbnuke-0.9.0/src/bbnuke/data/bbb_scaler.joblib +0 -0
  52. bbbnuke-0.9.0/src/bbnuke/data/efflux_proteins.csv +10 -0
  53. bbbnuke-0.9.0/src/bbnuke/data/target.csv +3 -0
  54. bbbnuke-0.9.0/src/bbnuke/db/__init__.py +1 -0
  55. bbbnuke-0.9.0/src/bbnuke/db/models.py +312 -0
  56. bbbnuke-0.9.0/src/bbnuke/db/session.py +39 -0
  57. bbbnuke-0.9.0/src/bbnuke/dockerfiles/Dockerfile +20 -0
  58. bbbnuke-0.9.0/src/bbnuke/mcp_server.py +333 -0
  59. bbbnuke-0.9.0/src/bbnuke/modules/__init__.py +0 -0
  60. bbbnuke-0.9.0/src/bbnuke/modules/affinity.py +255 -0
  61. bbbnuke-0.9.0/src/bbnuke/modules/cns_mpo.py +179 -0
  62. bbbnuke-0.9.0/src/bbnuke/modules/efflux.py +119 -0
  63. bbbnuke-0.9.0/src/bbnuke/modules/heuristic.py +385 -0
  64. bbbnuke-0.9.0/src/bbnuke/modules/pka.py +312 -0
  65. bbbnuke-0.9.0/src/bbnuke/modules/properties.py +31 -0
  66. bbbnuke-0.9.0/src/bbnuke/modules/psichic/__init__.py +287 -0
  67. bbbnuke-0.9.0/src/bbnuke/modules/psichic/dataset.py +164 -0
  68. bbbnuke-0.9.0/src/bbnuke/modules/psichic/drug_pool.py +53 -0
  69. bbbnuke-0.9.0/src/bbnuke/modules/psichic/inference.py +138 -0
  70. bbbnuke-0.9.0/src/bbnuke/modules/psichic/layers.py +459 -0
  71. bbbnuke-0.9.0/src/bbnuke/modules/psichic/ligand_init.py +314 -0
  72. bbbnuke-0.9.0/src/bbnuke/modules/psichic/net.py +290 -0
  73. bbbnuke-0.9.0/src/bbnuke/modules/psichic/pna.py +99 -0
  74. bbbnuke-0.9.0/src/bbnuke/modules/psichic/protein_pool.py +65 -0
  75. bbbnuke-0.9.0/src/bbnuke/modules/psichic/scaler.py +74 -0
  76. bbbnuke-0.9.0/src/bbnuke/modules/standardize.py +31 -0
  77. bbbnuke-0.9.0/src/bbnuke/openai_functions/__init__.py +918 -0
  78. bbbnuke-0.9.0/src/bbnuke/openai_functions/__main__.py +5 -0
  79. bbbnuke-0.9.0/src/bbnuke/optimizer.py +16 -0
  80. bbbnuke-0.9.0/src/bbnuke/pipeline/__init__.py +0 -0
  81. bbbnuke-0.9.0/src/bbnuke/pipeline/runner.py +255 -0
  82. bbbnuke-0.9.0/src/bbnuke/pipeline.py +45 -0
  83. bbbnuke-0.9.0/src/bbnuke/prop_predictor.py +21 -0
  84. bbbnuke-0.9.0/src/bbnuke/standardizer.py +22 -0
  85. bbbnuke-0.9.0/src/bbnuke/workers/__init__.py +1 -0
  86. bbbnuke-0.9.0/src/bbnuke/workers/cpu_worker.py +100 -0
  87. bbbnuke-0.9.0/src/bbnuke/workers/gpu_worker.py +76 -0
  88. bbbnuke-0.9.0/src/bbnuke/workers/orchestrator.py +166 -0
  89. bbbnuke-0.9.0/src/bbnuke/workflows/__init__.py +1 -0
  90. bbbnuke-0.9.0/src/bbnuke/workflows/engine.py +98 -0
  91. bbbnuke-0.9.0/src/bbnuke/workflows/models.py +75 -0
  92. bbbnuke-0.9.0/src/bbnuke/workflows/steps/__init__.py +1 -0
  93. bbbnuke-0.9.0/src/bbnuke/workflows/steps/batch_score.py +56 -0
  94. bbbnuke-0.9.0/src/bbnuke/workflows/steps/filter_step.py +82 -0
  95. bbbnuke-0.9.0/src/bbnuke/workflows/steps/parameter_sweep.py +87 -0
  96. bbbnuke-0.9.0/tests/__init__.py +0 -0
  97. bbbnuke-0.9.0/tests/test_affinity.py +81 -0
  98. bbbnuke-0.9.0/tests/test_agent.py +569 -0
  99. bbbnuke-0.9.0/tests/test_api.py +161 -0
  100. bbbnuke-0.9.0/tests/test_auth.py +215 -0
  101. bbbnuke-0.9.0/tests/test_batch.py +201 -0
  102. bbbnuke-0.9.0/tests/test_end_to_end.py +97 -0
  103. bbbnuke-0.9.0/tests/test_pipeline.py +169 -0
  104. bbbnuke-0.9.0/tests/test_pka.py +84 -0
  105. bbbnuke-0.9.0/tests/test_sprint4.py +437 -0
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .pytest_cache/
7
+ .ruff_cache/
8
+ .DS_Store
9
+ *.db
10
+ runs/
11
+ bbb_plus_pka.csv
12
+ bbb_plus_full_results.csv
13
+ psichic_affinity_matrix.csv
@@ -0,0 +1,23 @@
1
+ FROM continuumio/miniconda3:latest
2
+
3
+ WORKDIR /app
4
+
5
+ # System deps
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Install RDKit via conda (fastest path)
11
+ RUN conda install -y -c conda-forge rdkit numpy"<2" && conda clean -afy
12
+
13
+ # Copy and install package
14
+ COPY pyproject.toml .
15
+ COPY src/ src/
16
+
17
+ RUN pip install --no-cache-dir ".[api]"
18
+
19
+ # Expose port
20
+ EXPOSE 8000
21
+
22
+ # Run API server
23
+ CMD ["uvicorn", "bbnuke.api.app:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,19 @@
1
+ FROM continuumio/miniconda3:latest
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y --no-install-recommends \
6
+ build-essential \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Install RDKit via conda
10
+ RUN conda install -y -c conda-forge rdkit numpy"<2" && conda clean -afy
11
+
12
+ # Copy and install package
13
+ COPY pyproject.toml .
14
+ COPY src/ src/
15
+
16
+ RUN pip install --no-cache-dir ".[api]"
17
+
18
+ # Run CPU worker
19
+ CMD ["python", "-m", "arq", "bbnuke.workers.cpu_worker.CpuWorkerSettings"]
bbbnuke-0.9.0/PKG-INFO ADDED
@@ -0,0 +1,333 @@
1
+ Metadata-Version: 2.4
2
+ Name: bbbnuke
3
+ Version: 0.9.0
4
+ Summary: BBB-Nuke: Blood-brain barrier penetration screening pipeline
5
+ Author-email: Temi Sobodu <temisobodu@gmail.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Science/Research
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
12
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
13
+ Requires-Python: >=3.9
14
+ Requires-Dist: huggingface-hub>=0.20
15
+ Requires-Dist: joblib>=1.2
16
+ Requires-Dist: numpy>=1.24
17
+ Requires-Dist: openpyxl>=3.0
18
+ Requires-Dist: pandas>=2.0
19
+ Requires-Dist: pydantic>=2.0
20
+ Requires-Dist: rdkit
21
+ Requires-Dist: scikit-learn>=1.0
22
+ Requires-Dist: torch-geometric>=2.0
23
+ Requires-Dist: torch-scatter
24
+ Requires-Dist: torch-sparse
25
+ Requires-Dist: torch>=2.0
26
+ Provides-Extra: api
27
+ Requires-Dist: aiosqlite>=0.20; extra == 'api'
28
+ Requires-Dist: alembic>=1.13; extra == 'api'
29
+ Requires-Dist: arq>=0.26; extra == 'api'
30
+ Requires-Dist: asyncpg>=0.29; extra == 'api'
31
+ Requires-Dist: fastapi>=0.110; extra == 'api'
32
+ Requires-Dist: httpx>=0.25; extra == 'api'
33
+ Requires-Dist: redis>=5.0; extra == 'api'
34
+ Requires-Dist: sqlalchemy[asyncio]>=2.0; extra == 'api'
35
+ Requires-Dist: uvicorn[standard]>=0.25; extra == 'api'
36
+ Provides-Extra: api-test
37
+ Requires-Dist: aiosqlite>=0.20; extra == 'api-test'
38
+ Requires-Dist: arq>=0.26; extra == 'api-test'
39
+ Requires-Dist: fastapi>=0.110; extra == 'api-test'
40
+ Requires-Dist: httpx>=0.25; extra == 'api-test'
41
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'api-test'
42
+ Requires-Dist: pytest>=7.0; extra == 'api-test'
43
+ Requires-Dist: redis>=5.0; extra == 'api-test'
44
+ Requires-Dist: sqlalchemy[asyncio]>=2.0; extra == 'api-test'
45
+ Requires-Dist: uvicorn[standard]>=0.25; extra == 'api-test'
46
+ Provides-Extra: dev
47
+ Requires-Dist: pytest>=7.0; extra == 'dev'
48
+ Requires-Dist: ruff>=0.4; extra == 'dev'
49
+ Description-Content-Type: text/markdown
50
+
51
+ # BBB-Nuke
52
+
53
+ <img width="960" height="540" alt="image" src="https://github.com/user-attachments/assets/42bfaee2-aa67-4675-9423-803386c1dc6c" />
54
+
55
+
56
+ Blood-brain barrier penetration screening pipeline. Scores small molecules for BBB permeability using physicochemical properties, CNS-MPO desirability scoring, protein-ligand affinity, and a heuristic confidence layer.
57
+
58
+ ## Quick Start
59
+
60
+ ### Install
61
+
62
+ ```bash
63
+ pip install -e ".[api]"
64
+ ```
65
+
66
+ ### Run the API
67
+
68
+ ```bash
69
+ uvicorn bbnuke.api.app:app --host 127.0.0.1 --port 8000 --reload
70
+ ```
71
+
72
+ Open **http://127.0.0.1:8000/docs** for the interactive Swagger UI.
73
+
74
+ ---
75
+
76
+ ## API Tutorial
77
+
78
+ ### Step 1: Check the server is running
79
+
80
+ ```bash
81
+ curl http://127.0.0.1:8000/v1/health
82
+ ```
83
+
84
+ **Expected response:**
85
+
86
+ ```json
87
+ {"status": "ok"}
88
+ ```
89
+
90
+ ### Step 2: Check the pipeline version
91
+
92
+ ```bash
93
+ curl http://127.0.0.1:8000/v1/version
94
+ ```
95
+
96
+ **Expected response:**
97
+
98
+ ```json
99
+ {
100
+ "pipeline_version": "0.7.0",
101
+ "api_version": "1.0.0",
102
+ "hyperparameters": {
103
+ "logistic_k": 0.1352,
104
+ "efflux_veto_threshold": 0.7,
105
+ "mpo_gain_coeff": 2.0,
106
+ "mpo_filter_cutoff": 3.0
107
+ }
108
+ }
109
+ ```
110
+
111
+ ### Step 3: Score a compound (CPU-only)
112
+
113
+ Score caffeine without affinity data:
114
+
115
+ ```bash
116
+ curl -X POST http://127.0.0.1:8000/v1/score \
117
+ -H "Content-Type: application/json" \
118
+ -d '{"smiles": "Cn1c(=O)c2c(ncn2C)n(C)c1=O"}'
119
+ ```
120
+
121
+ **Expected response (key fields):**
122
+
123
+ ```json
124
+ {
125
+ "result": {
126
+ "compound_id": "query",
127
+ "smiles_standardized": "Cn1c(=O)c2c(ncn2C)n(C)c1=O",
128
+ "properties": {
129
+ "mw": 194.194,
130
+ "logp": -1.0293,
131
+ "tpsa": 61.82,
132
+ "hbd": 0,
133
+ "hba": 6
134
+ },
135
+ "cns_mpo": {
136
+ "score": 6.0,
137
+ "passed_filter": true
138
+ },
139
+ "affinity": null,
140
+ "heuristic": {
141
+ "p_bbb": 0.692365,
142
+ "diagnostics": {
143
+ "reason": "MPO-only (no affinity scores)",
144
+ "veto": false,
145
+ "score_bind": 0.0,
146
+ "score_mpo": 6.0,
147
+ "score_total": 6.0
148
+ }
149
+ },
150
+ "passed_mpo_filter": true
151
+ },
152
+ "pipeline_version": "0.7.0"
153
+ }
154
+ ```
155
+
156
+ **What this means:**
157
+ - Caffeine scores a perfect 6.0 on CNS-MPO (excellent drug-likeness for CNS)
158
+ - Without affinity data, P_BBB is derived from MPO alone: `P_BBB = 1 / (1 + exp(-0.1352 * 6.0))` = 0.69
159
+ - `affinity: null` because no pre-computed PSICHIC data is loaded
160
+
161
+ ### Step 4: Score with affinity data
162
+
163
+ To include PSICHIC affinity scoring, set `include_affinity: true`. This requires pre-computed data files (see [Data Setup](#data-setup) below).
164
+
165
+ ```bash
166
+ curl -X POST http://127.0.0.1:8000/v1/score \
167
+ -H "Content-Type: application/json" \
168
+ -d '{
169
+ "smiles": "CCC1(c2ccccc2)C(=O)NC(=O)N(C(=O)c2ccccc2)C1=O",
170
+ "include_affinity": true
171
+ }'
172
+ ```
173
+
174
+ **Expected response with affinity (key fields):**
175
+
176
+ ```json
177
+ {
178
+ "result": {
179
+ "compound_id": "query",
180
+ "cns_mpo": {
181
+ "score": 5.6234,
182
+ "passed_filter": true
183
+ },
184
+ "affinity": {
185
+ "model": "psichic",
186
+ "scores": [
187
+ {"protein_id": "4F2_HUMAN", "score_raw": 6.42, "score_norm": 0.642},
188
+ {"protein_id": "ACHE_HUMAN", "score_raw": 7.90, "score_norm": 0.790},
189
+ "... 65 proteins total ..."
190
+ ]
191
+ },
192
+ "heuristic": {
193
+ "p_bbb": 0.670261,
194
+ "diagnostics": {
195
+ "reason": "OK",
196
+ "veto": false,
197
+ "score_bind": 0.0,
198
+ "score_mpo": 5.2468,
199
+ "score_total": 5.2468,
200
+ "contributions": ["... per-protein breakdown ..."]
201
+ }
202
+ }
203
+ }
204
+ }
205
+ ```
206
+
207
+ **What this means:**
208
+ - Each of the 65 BBB proteins gets a binding probability (0-1) from PSICHIC
209
+ - The heuristic weighs carrier proteins positively, enzyme proteins negatively
210
+ - Efflux proteins (P-gp, BCRP, MRPs) can **veto** the compound if binding exceeds their threshold
211
+ - `S_bind` sums weighted protein contributions; `S_mpo` is the MPO gain above baseline
212
+ - `P_BBB = 1 / (1 + exp(-k * (S_bind + S_mpo)))`
213
+
214
+ ### Step 5: Score a compound that fails MPO filter
215
+
216
+ ```bash
217
+ curl -X POST http://127.0.0.1:8000/v1/score \
218
+ -H "Content-Type: application/json" \
219
+ -d '{"smiles": "O=C(O)CCCCCCCCCCCCCCCCC"}'
220
+ ```
221
+
222
+ **Expected response:**
223
+
224
+ ```json
225
+ {
226
+ "result": {
227
+ "cns_mpo": {
228
+ "score": 1.873,
229
+ "passed_filter": false
230
+ },
231
+ "heuristic": null,
232
+ "passed_mpo_filter": false
233
+ }
234
+ }
235
+ ```
236
+
237
+ **What this means:**
238
+ - Stearic acid has poor CNS drug-likeness (high MW, high LogP, no HBD)
239
+ - CNS-MPO < 3.0, so it's filtered out before affinity/heuristic scoring
240
+ - `heuristic: null` — no P_BBB is computed for compounds that fail the MPO gate
241
+
242
+ ### Step 6: Override pipeline configuration
243
+
244
+ ```bash
245
+ curl -X POST http://127.0.0.1:8000/v1/score \
246
+ -H "Content-Type: application/json" \
247
+ -d '{
248
+ "smiles": "c1ccc2c(c1)c1ccccc1[nH]2",
249
+ "config": {
250
+ "mpo_cutoff": 5.0,
251
+ "logistic_k": 0.2
252
+ }
253
+ }'
254
+ ```
255
+
256
+ You can override any pipeline parameter: `mpo_cutoff`, `logistic_k`, `efflux_veto_threshold`, `mpo_gain_coeff`.
257
+
258
+ ### Step 7: List all BBB target proteins
259
+
260
+ ```bash
261
+ curl http://127.0.0.1:8000/v1/proteins
262
+ ```
263
+
264
+ Returns all 65 proteins with their categories (carrier/efflux/enzyme), weights, and binding thresholds.
265
+
266
+ ---
267
+
268
+ ## Data Setup
269
+
270
+ ### Pre-computed affinity data
271
+
272
+ The API supports pre-computed PSICHIC affinity lookup when `include_affinity: true`. To enable this, place these files in the repo root:
273
+
274
+ | File | Description |
275
+ |------|-------------|
276
+ | `psichic_affinity_matrix.csv` | 599 compounds x 65 proteins, normalized scores (0-1) |
277
+ | `bbb_plus_pka.csv` | pKa predictions (acid/base lists) for SMILES-to-compound mapping |
278
+
279
+ **Format of `psichic_affinity_matrix.csv`:**
280
+ ```
281
+ Compound_ID,4F2_HUMAN,5NTD_HUMAN,ABCG2_HUMAN,...
282
+ caffeine,0.431,0.489,0.497,...
283
+ ```
284
+
285
+ **Format of `bbb_plus_pka.csv`:**
286
+ ```
287
+ compound_name,SMILES,acid_pkas,base_pkas
288
+ caffeine,Cn1c(=O)c2c(ncn2C)n(C)c1=O,,
289
+ ```
290
+
291
+ The API matches incoming SMILES by canonicalizing via RDKit and looking up against the pre-computed set. If a compound is not found, scoring proceeds without affinity data (MPO-only).
292
+
293
+ ### Running your own PSICHIC screening
294
+
295
+ To generate affinity data for new compounds, see the `modules/affinity.py` module which wraps the PSICHIC subprocess.
296
+
297
+ ---
298
+
299
+ ## Pipeline
300
+
301
+ ```
302
+ Input SMILES
303
+ -> Standardize (RDKit: canonicalize, neutralize, strip salts)
304
+ -> Properties (MW, LogP, TPSA, HBD, HBA)
305
+ -> pKa (MolGpKa predictions or placeholder)
306
+ -> CNS-MPO (6 desirability subscores, 0-6 total)
307
+ -> MPO Gate (score >= 3.0 to proceed)
308
+ -> Affinity (PSICHIC: 65 BBB proteins, 0-1 binding probability)
309
+ -> Heuristic (weighted binding + efflux veto + logistic)
310
+ -> P_BBB output (0-1)
311
+ ```
312
+
313
+ ## Endpoints
314
+
315
+ | Method | Path | Description |
316
+ |--------|------|-------------|
317
+ | GET | `/v1/health` | Liveness probe |
318
+ | GET | `/v1/version` | Pipeline version + hyperparameters |
319
+ | POST | `/v1/score` | Score a single compound |
320
+ | GET | `/v1/proteins` | List all 65 BBB target proteins |
321
+ | POST | `/v1/batch` | Submit batch job (requires Redis + ARQ) |
322
+ | GET | `/v1/batch/{job_id}` | Poll batch progress |
323
+
324
+ ## Tests
325
+
326
+ ```bash
327
+ pip install -e ".[api]"
328
+ pytest tests/ -v
329
+ ```
330
+
331
+ ## License
332
+
333
+ Proprietary — ATTN Lab.
@@ -0,0 +1,283 @@
1
+ # BBB-Nuke
2
+
3
+ <img width="960" height="540" alt="image" src="https://github.com/user-attachments/assets/42bfaee2-aa67-4675-9423-803386c1dc6c" />
4
+
5
+
6
+ Blood-brain barrier penetration screening pipeline. Scores small molecules for BBB permeability using physicochemical properties, CNS-MPO desirability scoring, protein-ligand affinity, and a heuristic confidence layer.
7
+
8
+ ## Quick Start
9
+
10
+ ### Install
11
+
12
+ ```bash
13
+ pip install -e ".[api]"
14
+ ```
15
+
16
+ ### Run the API
17
+
18
+ ```bash
19
+ uvicorn bbnuke.api.app:app --host 127.0.0.1 --port 8000 --reload
20
+ ```
21
+
22
+ Open **http://127.0.0.1:8000/docs** for the interactive Swagger UI.
23
+
24
+ ---
25
+
26
+ ## API Tutorial
27
+
28
+ ### Step 1: Check the server is running
29
+
30
+ ```bash
31
+ curl http://127.0.0.1:8000/v1/health
32
+ ```
33
+
34
+ **Expected response:**
35
+
36
+ ```json
37
+ {"status": "ok"}
38
+ ```
39
+
40
+ ### Step 2: Check the pipeline version
41
+
42
+ ```bash
43
+ curl http://127.0.0.1:8000/v1/version
44
+ ```
45
+
46
+ **Expected response:**
47
+
48
+ ```json
49
+ {
50
+ "pipeline_version": "0.7.0",
51
+ "api_version": "1.0.0",
52
+ "hyperparameters": {
53
+ "logistic_k": 0.1352,
54
+ "efflux_veto_threshold": 0.7,
55
+ "mpo_gain_coeff": 2.0,
56
+ "mpo_filter_cutoff": 3.0
57
+ }
58
+ }
59
+ ```
60
+
61
+ ### Step 3: Score a compound (CPU-only)
62
+
63
+ Score caffeine without affinity data:
64
+
65
+ ```bash
66
+ curl -X POST http://127.0.0.1:8000/v1/score \
67
+ -H "Content-Type: application/json" \
68
+ -d '{"smiles": "Cn1c(=O)c2c(ncn2C)n(C)c1=O"}'
69
+ ```
70
+
71
+ **Expected response (key fields):**
72
+
73
+ ```json
74
+ {
75
+ "result": {
76
+ "compound_id": "query",
77
+ "smiles_standardized": "Cn1c(=O)c2c(ncn2C)n(C)c1=O",
78
+ "properties": {
79
+ "mw": 194.194,
80
+ "logp": -1.0293,
81
+ "tpsa": 61.82,
82
+ "hbd": 0,
83
+ "hba": 6
84
+ },
85
+ "cns_mpo": {
86
+ "score": 6.0,
87
+ "passed_filter": true
88
+ },
89
+ "affinity": null,
90
+ "heuristic": {
91
+ "p_bbb": 0.692365,
92
+ "diagnostics": {
93
+ "reason": "MPO-only (no affinity scores)",
94
+ "veto": false,
95
+ "score_bind": 0.0,
96
+ "score_mpo": 6.0,
97
+ "score_total": 6.0
98
+ }
99
+ },
100
+ "passed_mpo_filter": true
101
+ },
102
+ "pipeline_version": "0.7.0"
103
+ }
104
+ ```
105
+
106
+ **What this means:**
107
+ - Caffeine scores a perfect 6.0 on CNS-MPO (excellent drug-likeness for CNS)
108
+ - Without affinity data, P_BBB is derived from MPO alone: `P_BBB = 1 / (1 + exp(-0.1352 * 6.0))` = 0.69
109
+ - `affinity: null` because no pre-computed PSICHIC data is loaded
110
+
111
+ ### Step 4: Score with affinity data
112
+
113
+ To include PSICHIC affinity scoring, set `include_affinity: true`. This requires pre-computed data files (see [Data Setup](#data-setup) below).
114
+
115
+ ```bash
116
+ curl -X POST http://127.0.0.1:8000/v1/score \
117
+ -H "Content-Type: application/json" \
118
+ -d '{
119
+ "smiles": "CCC1(c2ccccc2)C(=O)NC(=O)N(C(=O)c2ccccc2)C1=O",
120
+ "include_affinity": true
121
+ }'
122
+ ```
123
+
124
+ **Expected response with affinity (key fields):**
125
+
126
+ ```json
127
+ {
128
+ "result": {
129
+ "compound_id": "query",
130
+ "cns_mpo": {
131
+ "score": 5.6234,
132
+ "passed_filter": true
133
+ },
134
+ "affinity": {
135
+ "model": "psichic",
136
+ "scores": [
137
+ {"protein_id": "4F2_HUMAN", "score_raw": 6.42, "score_norm": 0.642},
138
+ {"protein_id": "ACHE_HUMAN", "score_raw": 7.90, "score_norm": 0.790},
139
+ "... 65 proteins total ..."
140
+ ]
141
+ },
142
+ "heuristic": {
143
+ "p_bbb": 0.670261,
144
+ "diagnostics": {
145
+ "reason": "OK",
146
+ "veto": false,
147
+ "score_bind": 0.0,
148
+ "score_mpo": 5.2468,
149
+ "score_total": 5.2468,
150
+ "contributions": ["... per-protein breakdown ..."]
151
+ }
152
+ }
153
+ }
154
+ }
155
+ ```
156
+
157
+ **What this means:**
158
+ - Each of the 65 BBB proteins gets a binding probability (0-1) from PSICHIC
159
+ - The heuristic weighs carrier proteins positively, enzyme proteins negatively
160
+ - Efflux proteins (P-gp, BCRP, MRPs) can **veto** the compound if binding exceeds their threshold
161
+ - `S_bind` sums weighted protein contributions; `S_mpo` is the MPO gain above baseline
162
+ - `P_BBB = 1 / (1 + exp(-k * (S_bind + S_mpo)))`
163
+
164
+ ### Step 5: Score a compound that fails MPO filter
165
+
166
+ ```bash
167
+ curl -X POST http://127.0.0.1:8000/v1/score \
168
+ -H "Content-Type: application/json" \
169
+ -d '{"smiles": "O=C(O)CCCCCCCCCCCCCCCCC"}'
170
+ ```
171
+
172
+ **Expected response:**
173
+
174
+ ```json
175
+ {
176
+ "result": {
177
+ "cns_mpo": {
178
+ "score": 1.873,
179
+ "passed_filter": false
180
+ },
181
+ "heuristic": null,
182
+ "passed_mpo_filter": false
183
+ }
184
+ }
185
+ ```
186
+
187
+ **What this means:**
188
+ - Stearic acid has poor CNS drug-likeness (high MW, high LogP, no HBD)
189
+ - CNS-MPO < 3.0, so it's filtered out before affinity/heuristic scoring
190
+ - `heuristic: null` — no P_BBB is computed for compounds that fail the MPO gate
191
+
192
+ ### Step 6: Override pipeline configuration
193
+
194
+ ```bash
195
+ curl -X POST http://127.0.0.1:8000/v1/score \
196
+ -H "Content-Type: application/json" \
197
+ -d '{
198
+ "smiles": "c1ccc2c(c1)c1ccccc1[nH]2",
199
+ "config": {
200
+ "mpo_cutoff": 5.0,
201
+ "logistic_k": 0.2
202
+ }
203
+ }'
204
+ ```
205
+
206
+ You can override any pipeline parameter: `mpo_cutoff`, `logistic_k`, `efflux_veto_threshold`, `mpo_gain_coeff`.
207
+
208
+ ### Step 7: List all BBB target proteins
209
+
210
+ ```bash
211
+ curl http://127.0.0.1:8000/v1/proteins
212
+ ```
213
+
214
+ Returns all 65 proteins with their categories (carrier/efflux/enzyme), weights, and binding thresholds.
215
+
216
+ ---
217
+
218
+ ## Data Setup
219
+
220
+ ### Pre-computed affinity data
221
+
222
+ The API supports pre-computed PSICHIC affinity lookup when `include_affinity: true`. To enable this, place these files in the repo root:
223
+
224
+ | File | Description |
225
+ |------|-------------|
226
+ | `psichic_affinity_matrix.csv` | 599 compounds x 65 proteins, normalized scores (0-1) |
227
+ | `bbb_plus_pka.csv` | pKa predictions (acid/base lists) for SMILES-to-compound mapping |
228
+
229
+ **Format of `psichic_affinity_matrix.csv`:**
230
+ ```
231
+ Compound_ID,4F2_HUMAN,5NTD_HUMAN,ABCG2_HUMAN,...
232
+ caffeine,0.431,0.489,0.497,...
233
+ ```
234
+
235
+ **Format of `bbb_plus_pka.csv`:**
236
+ ```
237
+ compound_name,SMILES,acid_pkas,base_pkas
238
+ caffeine,Cn1c(=O)c2c(ncn2C)n(C)c1=O,,
239
+ ```
240
+
241
+ The API matches incoming SMILES by canonicalizing via RDKit and looking up against the pre-computed set. If a compound is not found, scoring proceeds without affinity data (MPO-only).
242
+
243
+ ### Running your own PSICHIC screening
244
+
245
+ To generate affinity data for new compounds, see the `modules/affinity.py` module which wraps the PSICHIC subprocess.
246
+
247
+ ---
248
+
249
+ ## Pipeline
250
+
251
+ ```
252
+ Input SMILES
253
+ -> Standardize (RDKit: canonicalize, neutralize, strip salts)
254
+ -> Properties (MW, LogP, TPSA, HBD, HBA)
255
+ -> pKa (MolGpKa predictions or placeholder)
256
+ -> CNS-MPO (6 desirability subscores, 0-6 total)
257
+ -> MPO Gate (score >= 3.0 to proceed)
258
+ -> Affinity (PSICHIC: 65 BBB proteins, 0-1 binding probability)
259
+ -> Heuristic (weighted binding + efflux veto + logistic)
260
+ -> P_BBB output (0-1)
261
+ ```
262
+
263
+ ## Endpoints
264
+
265
+ | Method | Path | Description |
266
+ |--------|------|-------------|
267
+ | GET | `/v1/health` | Liveness probe |
268
+ | GET | `/v1/version` | Pipeline version + hyperparameters |
269
+ | POST | `/v1/score` | Score a single compound |
270
+ | GET | `/v1/proteins` | List all 65 BBB target proteins |
271
+ | POST | `/v1/batch` | Submit batch job (requires Redis + ARQ) |
272
+ | GET | `/v1/batch/{job_id}` | Poll batch progress |
273
+
274
+ ## Tests
275
+
276
+ ```bash
277
+ pip install -e ".[api]"
278
+ pytest tests/ -v
279
+ ```
280
+
281
+ ## License
282
+
283
+ Proprietary — ATTN Lab.