protein-quest 0.3.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- {protein_quest-0.3.2 → protein_quest-0.4.0}/PKG-INFO +33 -3
- {protein_quest-0.3.2 → protein_quest-0.4.0}/README.md +32 -2
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/uniprot.ipynb +95 -2
- {protein_quest-0.3.2 → protein_quest-0.4.0}/pyproject.toml +1 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/__version__.py +1 -1
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/cli.py +139 -1
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/converter.py +1 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/mcp_server.py +10 -1
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/ss.py +20 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/uniprot.py +157 -4
- protein_quest-0.4.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +384 -0
- protein_quest-0.4.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
- protein_quest-0.4.0/tests/test_converter.py +23 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_ss.py +6 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_uniprot.py +65 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/uv.lock +36 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/ci.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/pages.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/pypi-publish.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/.gitignore +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/.vscode/extensions.json +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/CITATION.cff +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/CODE_OF_CONDUCT.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/LICENSE +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/cli_doc_hook.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/.gitignore +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/alphafold.ipynb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/pdbe.ipynb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/protein-quest-mcp.png +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/mkdocs.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/confidence.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/emdb.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/filters.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/parallel.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/io.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/py.typed +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/taxonomy.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/utils.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_confidence.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_entry_summary.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/fixtures/2y29.cif +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/test_fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/test_io.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_cli.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_emdb.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_mcp.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_taxonomy.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -56,12 +56,14 @@ graph TB;
|
|
|
56
56
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
57
57
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
58
58
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
59
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
60
|
+
searchcomplexes[/Search complexes/]
|
|
59
61
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
60
62
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
61
63
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
62
|
-
fetchpdbe -->|
|
|
64
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
63
65
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
64
|
-
fetchad -->|
|
|
66
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
65
67
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
66
68
|
residuefilter --> |mmcif_files| ssfilter
|
|
67
69
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
@@ -69,6 +71,8 @@ graph TB;
|
|
|
69
71
|
taxonomy:::dashedBorder
|
|
70
72
|
searchemdb:::dashedBorder
|
|
71
73
|
fetchemdb:::dashedBorder
|
|
74
|
+
searchintactionpartners:::dashedBorder
|
|
75
|
+
searchcomplexes:::dashedBorder
|
|
72
76
|
```
|
|
73
77
|
|
|
74
78
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -204,6 +208,32 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
204
208
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
205
209
|
```
|
|
206
210
|
|
|
211
|
+
### Search for interaction partners
|
|
212
|
+
|
|
213
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
214
|
+
|
|
215
|
+
```shell
|
|
216
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
220
|
+
|
|
221
|
+
### Search for complexes
|
|
222
|
+
|
|
223
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
224
|
+
and return the complex entries and their members.
|
|
225
|
+
|
|
226
|
+
```shell
|
|
227
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
The `complexes.csv` looks like
|
|
231
|
+
|
|
232
|
+
```csv
|
|
233
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
234
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
235
|
+
```
|
|
236
|
+
|
|
207
237
|
## Model Context Protocol (MCP) server
|
|
208
238
|
|
|
209
239
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -26,12 +26,14 @@ graph TB;
|
|
|
26
26
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
27
27
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
28
28
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
29
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
30
|
+
searchcomplexes[/Search complexes/]
|
|
29
31
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
30
32
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
31
33
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
32
|
-
fetchpdbe -->|
|
|
34
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
33
35
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
34
|
-
fetchad -->|
|
|
36
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
35
37
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
36
38
|
residuefilter --> |mmcif_files| ssfilter
|
|
37
39
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
@@ -39,6 +41,8 @@ graph TB;
|
|
|
39
41
|
taxonomy:::dashedBorder
|
|
40
42
|
searchemdb:::dashedBorder
|
|
41
43
|
fetchemdb:::dashedBorder
|
|
44
|
+
searchintactionpartners:::dashedBorder
|
|
45
|
+
searchcomplexes:::dashedBorder
|
|
42
46
|
```
|
|
43
47
|
|
|
44
48
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -174,6 +178,32 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
174
178
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
175
179
|
```
|
|
176
180
|
|
|
181
|
+
### Search for interaction partners
|
|
182
|
+
|
|
183
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
184
|
+
|
|
185
|
+
```shell
|
|
186
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
190
|
+
|
|
191
|
+
### Search for complexes
|
|
192
|
+
|
|
193
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
194
|
+
and return the complex entries and their members.
|
|
195
|
+
|
|
196
|
+
```shell
|
|
197
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
The `complexes.csv` looks like
|
|
201
|
+
|
|
202
|
+
```csv
|
|
203
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
204
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
205
|
+
```
|
|
206
|
+
|
|
177
207
|
## Model Context Protocol (MCP) server
|
|
178
208
|
|
|
179
209
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
14
|
"cell_type": "code",
|
|
15
|
-
"execution_count":
|
|
15
|
+
"execution_count": 1,
|
|
16
16
|
"id": "85674583",
|
|
17
17
|
"metadata": {},
|
|
18
18
|
"outputs": [],
|
|
@@ -282,6 +282,99 @@
|
|
|
282
282
|
"first_uniprot = next(iter(uniprot_accessions.items()))\n",
|
|
283
283
|
"pprint(first_uniprot)"
|
|
284
284
|
]
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"cell_type": "markdown",
|
|
288
|
+
"id": "e32a95f8",
|
|
289
|
+
"metadata": {},
|
|
290
|
+
"source": [
|
|
291
|
+
"## Find interaction partners for uniprot entries"
|
|
292
|
+
]
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
"cell_type": "code",
|
|
296
|
+
"execution_count": 1,
|
|
297
|
+
"id": "d035c702",
|
|
298
|
+
"metadata": {},
|
|
299
|
+
"outputs": [],
|
|
300
|
+
"source": [
|
|
301
|
+
"from protein_quest.uniprot import search4interaction_partners, search4macromolecular_complexes"
|
|
302
|
+
]
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"cell_type": "code",
|
|
306
|
+
"execution_count": 2,
|
|
307
|
+
"id": "601c690a",
|
|
308
|
+
"metadata": {},
|
|
309
|
+
"outputs": [],
|
|
310
|
+
"source": [
|
|
311
|
+
"# Helicase SWR1 in yeast\n",
|
|
312
|
+
"uniprot_accession = \"Q05471\""
|
|
313
|
+
]
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
"cell_type": "code",
|
|
317
|
+
"execution_count": 3,
|
|
318
|
+
"id": "173c764d",
|
|
319
|
+
"metadata": {},
|
|
320
|
+
"outputs": [
|
|
321
|
+
{
|
|
322
|
+
"data": {
|
|
323
|
+
"text/plain": [
|
|
324
|
+
"{'Q12464': {'CPX-2122'},\n",
|
|
325
|
+
" 'P35817': {'CPX-2122'},\n",
|
|
326
|
+
" 'P80428': {'CPX-2122'},\n",
|
|
327
|
+
" 'Q12509': {'CPX-2122'},\n",
|
|
328
|
+
" 'Q03388': {'CPX-2122'},\n",
|
|
329
|
+
" 'P53201': {'CPX-2122'},\n",
|
|
330
|
+
" 'P53930': {'CPX-2122'},\n",
|
|
331
|
+
" 'P60010': {'CPX-2122'},\n",
|
|
332
|
+
" 'Q03433': {'CPX-2122'},\n",
|
|
333
|
+
" 'Q06707': {'CPX-2122'},\n",
|
|
334
|
+
" 'P38326': {'CPX-2122'},\n",
|
|
335
|
+
" 'P31376': {'CPX-2122'},\n",
|
|
336
|
+
" 'Q03940': {'CPX-2122'}}"
|
|
337
|
+
]
|
|
338
|
+
},
|
|
339
|
+
"execution_count": 3,
|
|
340
|
+
"metadata": {},
|
|
341
|
+
"output_type": "execute_result"
|
|
342
|
+
}
|
|
343
|
+
],
|
|
344
|
+
"source": [
|
|
345
|
+
"partners = search4interaction_partners(uniprot_accession, limit=100)\n",
|
|
346
|
+
"partners"
|
|
347
|
+
]
|
|
348
|
+
},
|
|
349
|
+
{
|
|
350
|
+
"cell_type": "markdown",
|
|
351
|
+
"id": "a763b6f8",
|
|
352
|
+
"metadata": {},
|
|
353
|
+
"source": [
|
|
354
|
+
"To get more information about the complex you can search for the complexes themselves with:"
|
|
355
|
+
]
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
"cell_type": "code",
|
|
359
|
+
"execution_count": 4,
|
|
360
|
+
"id": "236050ea",
|
|
361
|
+
"metadata": {},
|
|
362
|
+
"outputs": [
|
|
363
|
+
{
|
|
364
|
+
"data": {
|
|
365
|
+
"text/plain": [
|
|
366
|
+
"[ComplexPortalEntry(query_protein='Q05471', complex_id='CPX-2122', complex_url='https://www.ebi.ac.uk/complexportal/complex/CPX-2122', complex_title='Swr1 chromatin remodelling complex', members={'P35817', 'Q05471', 'Q12464', 'Q12509', 'Q06707', 'Q03433', 'P38326', 'P53201', 'Q03388', 'P53930', 'P80428', 'Q03940', 'P60010', 'P31376'})]"
|
|
367
|
+
]
|
|
368
|
+
},
|
|
369
|
+
"execution_count": 4,
|
|
370
|
+
"metadata": {},
|
|
371
|
+
"output_type": "execute_result"
|
|
372
|
+
}
|
|
373
|
+
],
|
|
374
|
+
"source": [
|
|
375
|
+
"complexes = search4macromolecular_complexes([uniprot_accession])\n",
|
|
376
|
+
"complexes"
|
|
377
|
+
]
|
|
285
378
|
}
|
|
286
379
|
],
|
|
287
380
|
"metadata": {
|
|
@@ -300,7 +393,7 @@
|
|
|
300
393
|
"name": "python",
|
|
301
394
|
"nbconvert_exporter": "python",
|
|
302
395
|
"pygments_lexer": "ipython3",
|
|
303
|
-
"version": "3.13.
|
|
396
|
+
"version": "3.13.5"
|
|
304
397
|
}
|
|
305
398
|
},
|
|
306
399
|
"nbformat": 4,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.4.0"
|
|
2
2
|
"""The version of the package."""
|
|
@@ -15,6 +15,7 @@ from textwrap import dedent
|
|
|
15
15
|
from cattrs import structure
|
|
16
16
|
from rich import print as rprint
|
|
17
17
|
from rich.logging import RichHandler
|
|
18
|
+
from rich.markdown import Markdown
|
|
18
19
|
from rich.panel import Panel
|
|
19
20
|
from rich_argparse import ArgumentDefaultsRichHelpFormatter
|
|
20
21
|
from tqdm.rich import tqdm
|
|
@@ -31,7 +32,17 @@ from protein_quest.pdbe import fetch as pdbe_fetch
|
|
|
31
32
|
from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
|
|
32
33
|
from protein_quest.ss import SecondaryStructureFilterQuery, filter_files_on_secondary_structure
|
|
33
34
|
from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
|
|
34
|
-
from protein_quest.uniprot import
|
|
35
|
+
from protein_quest.uniprot import (
|
|
36
|
+
ComplexPortalEntry,
|
|
37
|
+
PdbResult,
|
|
38
|
+
Query,
|
|
39
|
+
search4af,
|
|
40
|
+
search4emdb,
|
|
41
|
+
search4interaction_partners,
|
|
42
|
+
search4macromolecular_complexes,
|
|
43
|
+
search4pdb,
|
|
44
|
+
search4uniprot,
|
|
45
|
+
)
|
|
35
46
|
from protein_quest.utils import CopyMethod, copy_methods, copyfile
|
|
36
47
|
|
|
37
48
|
logger = logging.getLogger(__name__)
|
|
@@ -211,6 +222,73 @@ def _add_search_taxonomy_parser(subparser: argparse._SubParsersAction):
|
|
|
211
222
|
parser.add_argument("--limit", type=int, default=100, help="Maximum number of results to return")
|
|
212
223
|
|
|
213
224
|
|
|
225
|
+
def _add_search_interaction_partners_parser(subparsers: argparse._SubParsersAction):
|
|
226
|
+
"""Add search interaction partners subcommand parser."""
|
|
227
|
+
parser = subparsers.add_parser(
|
|
228
|
+
"interaction-partners",
|
|
229
|
+
help="Search for interaction partners of given UniProt accession",
|
|
230
|
+
description=dedent("""\
|
|
231
|
+
Search for interaction partners of given UniProt accession
|
|
232
|
+
in the Uniprot SPARQL endpoint and Complex Portal.
|
|
233
|
+
"""),
|
|
234
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
235
|
+
)
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
"uniprot_acc",
|
|
238
|
+
type=str,
|
|
239
|
+
help="UniProt accession (for example P12345).",
|
|
240
|
+
)
|
|
241
|
+
parser.add_argument(
|
|
242
|
+
"--exclude",
|
|
243
|
+
type=str,
|
|
244
|
+
action="append",
|
|
245
|
+
help="UniProt accessions to exclude from the results. For example already known interaction partners.",
|
|
246
|
+
)
|
|
247
|
+
parser.add_argument(
|
|
248
|
+
"output_csv",
|
|
249
|
+
type=argparse.FileType("w", encoding="UTF-8"),
|
|
250
|
+
help="Output CSV with interaction partners per UniProt accession. Use `-` for stdout.",
|
|
251
|
+
)
|
|
252
|
+
parser.add_argument(
|
|
253
|
+
"--limit", type=int, default=10_000, help="Maximum number of interaction partner uniprot accessions to return"
|
|
254
|
+
)
|
|
255
|
+
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
|
|
259
|
+
"""Add search complexes subcommand parser."""
|
|
260
|
+
description = dedent("""\
|
|
261
|
+
Search for complexes in the Complex Portal.
|
|
262
|
+
https://www.ebi.ac.uk/complexportal/
|
|
263
|
+
|
|
264
|
+
The output CSV file has the following columns:
|
|
265
|
+
|
|
266
|
+
- query_protein: UniProt accession used as query
|
|
267
|
+
- complex_id: Complex Portal identifier
|
|
268
|
+
- complex_url: URL to the Complex Portal entry
|
|
269
|
+
- complex_title: Title of the complex
|
|
270
|
+
- members: Semicolon-separated list of UniProt accessions of complex members
|
|
271
|
+
""")
|
|
272
|
+
parser = subparsers.add_parser(
|
|
273
|
+
"complexes",
|
|
274
|
+
help="Search for complexes in the Complex Portal",
|
|
275
|
+
description=Markdown(description, style="argparse.text"), # type: ignore using rich formatter makes this OK
|
|
276
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
277
|
+
)
|
|
278
|
+
parser.add_argument(
|
|
279
|
+
"uniprot_accs",
|
|
280
|
+
type=argparse.FileType("r", encoding="UTF-8"),
|
|
281
|
+
help="Text file with UniProt accessions (one per line) as query for searching complexes. Use `-` for stdin.",
|
|
282
|
+
)
|
|
283
|
+
parser.add_argument(
|
|
284
|
+
"output_csv",
|
|
285
|
+
type=argparse.FileType("w", encoding="UTF-8"),
|
|
286
|
+
help="Output CSV file with complex results. Use `-` for stdout.",
|
|
287
|
+
)
|
|
288
|
+
parser.add_argument("--limit", type=int, default=100, help="Maximum number of complex results to return")
|
|
289
|
+
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
290
|
+
|
|
291
|
+
|
|
214
292
|
def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
215
293
|
"""Add retrieve pdbe subcommand parser."""
|
|
216
294
|
parser = subparsers.add_parser(
|
|
@@ -458,6 +536,8 @@ def _add_search_subcommands(subparsers: argparse._SubParsersAction):
|
|
|
458
536
|
_add_search_emdb_parser(subsubparsers)
|
|
459
537
|
_add_search_go_parser(subsubparsers)
|
|
460
538
|
_add_search_taxonomy_parser(subsubparsers)
|
|
539
|
+
_add_search_interaction_partners_parser(subsubparsers)
|
|
540
|
+
_add_search_complexes_parser(subsubparsers)
|
|
461
541
|
|
|
462
542
|
|
|
463
543
|
def _add_retrieve_subcommands(subparsers: argparse._SubParsersAction):
|
|
@@ -636,6 +716,32 @@ def _handle_search_taxonomy(args):
|
|
|
636
716
|
_write_taxonomy_csv(results, output_csv)
|
|
637
717
|
|
|
638
718
|
|
|
719
|
+
def _handle_search_interaction_partners(args: argparse.Namespace):
|
|
720
|
+
uniprot_acc: str = args.uniprot_acc
|
|
721
|
+
excludes: set[str] = set(args.exclude) if args.exclude else set()
|
|
722
|
+
limit: int = args.limit
|
|
723
|
+
timeout: int = args.timeout
|
|
724
|
+
output_csv: TextIOWrapper = args.output_csv
|
|
725
|
+
|
|
726
|
+
rprint(f"Searching for interaction partners of '{uniprot_acc}'")
|
|
727
|
+
results = search4interaction_partners(uniprot_acc, excludes=excludes, limit=limit, timeout=timeout)
|
|
728
|
+
rprint(f"Found {len(results)} interaction partners, written to {output_csv.name}")
|
|
729
|
+
_write_lines(output_csv, results.keys())
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
def _handle_search_complexes(args: argparse.Namespace):
|
|
733
|
+
uniprot_accs = args.uniprot_accs
|
|
734
|
+
limit = args.limit
|
|
735
|
+
timeout = args.timeout
|
|
736
|
+
output_csv = args.output_csv
|
|
737
|
+
|
|
738
|
+
accs = _read_lines(uniprot_accs)
|
|
739
|
+
rprint(f"Finding complexes for {len(accs)} uniprot accessions")
|
|
740
|
+
results = search4macromolecular_complexes(accs, limit=limit, timeout=timeout)
|
|
741
|
+
rprint(f"Found {len(results)} complexes, written to {output_csv.name}")
|
|
742
|
+
_write_complexes_csv(results, output_csv)
|
|
743
|
+
|
|
744
|
+
|
|
639
745
|
def _handle_retrieve_pdbe(args):
|
|
640
746
|
pdbe_csv = args.pdbe_csv
|
|
641
747
|
output_dir = args.output_dir
|
|
@@ -875,6 +981,8 @@ HANDLERS: dict[tuple[str, str | None], Callable] = {
|
|
|
875
981
|
("search", "emdb"): _handle_search_emdb,
|
|
876
982
|
("search", "go"): _handle_search_go,
|
|
877
983
|
("search", "taxonomy"): _handle_search_taxonomy,
|
|
984
|
+
("search", "interaction-partners"): _handle_search_interaction_partners,
|
|
985
|
+
("search", "complexes"): _handle_search_complexes,
|
|
878
986
|
("retrieve", "pdbe"): _handle_retrieve_pdbe,
|
|
879
987
|
("retrieve", "alphafold"): _handle_retrieve_alphafold,
|
|
880
988
|
("retrieve", "emdb"): _handle_retrieve_emdb,
|
|
@@ -937,3 +1045,33 @@ def _iter_csv_rows(file: TextIOWrapper) -> Generator[dict[str, str]]:
|
|
|
937
1045
|
|
|
938
1046
|
def _read_column_from_csv(file: TextIOWrapper, column: str) -> set[str]:
|
|
939
1047
|
return {row[column] for row in _iter_csv_rows(file)}
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
def _write_complexes_csv(complexes: list[ComplexPortalEntry], output_csv: TextIOWrapper) -> None:
|
|
1051
|
+
"""Write ComplexPortal information to a CSV file.
|
|
1052
|
+
|
|
1053
|
+
Args:
|
|
1054
|
+
complexes: List of ComplexPortalEntry objects.
|
|
1055
|
+
output_csv: TextIOWrapper to write the CSV data to.
|
|
1056
|
+
"""
|
|
1057
|
+
writer = csv.writer(output_csv)
|
|
1058
|
+
writer.writerow(
|
|
1059
|
+
[
|
|
1060
|
+
"query_protein",
|
|
1061
|
+
"complex_id",
|
|
1062
|
+
"complex_url",
|
|
1063
|
+
"complex_title",
|
|
1064
|
+
"members",
|
|
1065
|
+
]
|
|
1066
|
+
)
|
|
1067
|
+
for entry in complexes:
|
|
1068
|
+
members_str = ";".join(sorted(entry.members))
|
|
1069
|
+
writer.writerow(
|
|
1070
|
+
[
|
|
1071
|
+
entry.query_protein,
|
|
1072
|
+
entry.complex_id,
|
|
1073
|
+
entry.complex_url,
|
|
1074
|
+
entry.complex_title,
|
|
1075
|
+
members_str,
|
|
1076
|
+
]
|
|
1077
|
+
)
|
|
@@ -13,6 +13,7 @@ type PositiveInt = int
|
|
|
13
13
|
converter = make_converter()
|
|
14
14
|
"""cattrs converter to read JSON document or dict to Python objects."""
|
|
15
15
|
converter.register_structure_hook(URL, lambda v, _: URL(v))
|
|
16
|
+
converter.register_unstructure_hook(URL, lambda u: str(u))
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
@converter.register_structure_hook
|
|
@@ -48,7 +48,15 @@ from protein_quest.pdbe.fetch import fetch as pdbe_fetch
|
|
|
48
48
|
from protein_quest.pdbe.io import glob_structure_files, nr_residues_in_chain, write_single_chain_pdb_file
|
|
49
49
|
from protein_quest.ss import filter_file_on_secondary_structure
|
|
50
50
|
from protein_quest.taxonomy import search_taxon
|
|
51
|
-
from protein_quest.uniprot import
|
|
51
|
+
from protein_quest.uniprot import (
|
|
52
|
+
PdbResult,
|
|
53
|
+
Query,
|
|
54
|
+
search4af,
|
|
55
|
+
search4emdb,
|
|
56
|
+
search4macromolecular_complexes,
|
|
57
|
+
search4pdb,
|
|
58
|
+
search4uniprot,
|
|
59
|
+
)
|
|
52
60
|
|
|
53
61
|
mcp = FastMCP("protein-quest")
|
|
54
62
|
|
|
@@ -137,6 +145,7 @@ def search_alphafolds(
|
|
|
137
145
|
|
|
138
146
|
|
|
139
147
|
mcp.tool(search4emdb, name="search_emdb")
|
|
148
|
+
mcp.tool(search4macromolecular_complexes, name="search_macromolecular_complexes")
|
|
140
149
|
|
|
141
150
|
|
|
142
151
|
@mcp.tool
|
|
@@ -111,6 +111,26 @@ class SecondaryStructureFilterQuery:
|
|
|
111
111
|
ratio_min_sheet_residues: Ratio | None = None
|
|
112
112
|
ratio_max_sheet_residues: Ratio | None = None
|
|
113
113
|
|
|
114
|
+
def is_actionable(self) -> bool:
|
|
115
|
+
"""Check if the secondary structure query has any actionable filters.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
True if any of the filters are set, False otherwise.
|
|
119
|
+
"""
|
|
120
|
+
return any(
|
|
121
|
+
field is not None
|
|
122
|
+
for field in [
|
|
123
|
+
self.abs_min_helix_residues,
|
|
124
|
+
self.abs_max_helix_residues,
|
|
125
|
+
self.abs_min_sheet_residues,
|
|
126
|
+
self.abs_max_sheet_residues,
|
|
127
|
+
self.ratio_min_helix_residues,
|
|
128
|
+
self.ratio_max_helix_residues,
|
|
129
|
+
self.ratio_min_sheet_residues,
|
|
130
|
+
self.ratio_max_sheet_residues,
|
|
131
|
+
]
|
|
132
|
+
)
|
|
133
|
+
|
|
114
134
|
|
|
115
135
|
def _check_range(min_val, max_val, label):
|
|
116
136
|
if min_val is not None and max_val is not None and min_val >= max_val:
|