protein-quest 0.3.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

Files changed (73) hide show
  1. {protein_quest-0.3.2 → protein_quest-0.4.0}/PKG-INFO +33 -3
  2. {protein_quest-0.3.2 → protein_quest-0.4.0}/README.md +32 -2
  3. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/uniprot.ipynb +95 -2
  4. {protein_quest-0.3.2 → protein_quest-0.4.0}/pyproject.toml +1 -0
  5. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/__version__.py +1 -1
  6. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/cli.py +139 -1
  7. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/converter.py +1 -0
  8. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/mcp_server.py +10 -1
  9. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/ss.py +20 -0
  10. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/uniprot.py +157 -4
  11. protein_quest-0.4.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +384 -0
  12. protein_quest-0.4.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
  13. protein_quest-0.4.0/tests/test_converter.py +23 -0
  14. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_ss.py +6 -0
  15. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_uniprot.py +65 -0
  16. {protein_quest-0.3.2 → protein_quest-0.4.0}/uv.lock +36 -0
  17. {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/ci.yml +0 -0
  18. {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/pages.yml +0 -0
  19. {protein_quest-0.3.2 → protein_quest-0.4.0}/.github/workflows/pypi-publish.yml +0 -0
  20. {protein_quest-0.3.2 → protein_quest-0.4.0}/.gitignore +0 -0
  21. {protein_quest-0.3.2 → protein_quest-0.4.0}/.vscode/extensions.json +0 -0
  22. {protein_quest-0.3.2 → protein_quest-0.4.0}/CITATION.cff +0 -0
  23. {protein_quest-0.3.2 → protein_quest-0.4.0}/CODE_OF_CONDUCT.md +0 -0
  24. {protein_quest-0.3.2 → protein_quest-0.4.0}/CONTRIBUTING.md +0 -0
  25. {protein_quest-0.3.2 → protein_quest-0.4.0}/LICENSE +0 -0
  26. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/CONTRIBUTING.md +0 -0
  27. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/cli_doc_hook.py +0 -0
  28. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/index.md +0 -0
  29. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/.gitignore +0 -0
  30. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/alphafold.ipynb +0 -0
  31. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/index.md +0 -0
  32. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/notebooks/pdbe.ipynb +0 -0
  33. {protein_quest-0.3.2 → protein_quest-0.4.0}/docs/protein-quest-mcp.png +0 -0
  34. {protein_quest-0.3.2 → protein_quest-0.4.0}/mkdocs.yml +0 -0
  35. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/__init__.py +0 -0
  36. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/__init__.py +0 -0
  37. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/confidence.py +0 -0
  38. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
  39. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/alphafold/fetch.py +0 -0
  40. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/emdb.py +0 -0
  41. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/filters.py +0 -0
  42. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/go.py +0 -0
  43. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/parallel.py +0 -0
  44. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/__init__.py +0 -0
  45. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/fetch.py +0 -0
  46. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/pdbe/io.py +0 -0
  47. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/py.typed +0 -0
  48. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/taxonomy.py +0 -0
  49. {protein_quest-0.3.2 → protein_quest-0.4.0}/src/protein_quest/utils.py +0 -0
  50. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  51. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  52. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_confidence.py +0 -0
  53. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_entry_summary.py +0 -0
  54. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/alphafold/test_fetch.py +0 -0
  55. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  56. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  57. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  58. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  59. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  60. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  61. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  62. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  63. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  64. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  65. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/fixtures/2y29.cif +0 -0
  66. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/test_fetch.py +0 -0
  67. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/pdbe/test_io.py +0 -0
  68. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_cli.py +0 -0
  69. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_emdb.py +0 -0
  70. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_go.py +0 -0
  71. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_mcp.py +0 -0
  72. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_taxonomy.py +0 -0
  73. {protein_quest-0.3.2 → protein_quest-0.4.0}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -56,12 +56,14 @@ graph TB;
56
56
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
57
57
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
58
58
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
59
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
60
+ searchcomplexes[/Search complexes/]
59
61
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
60
62
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
61
63
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
62
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
64
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
63
65
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
64
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
66
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
65
67
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
66
68
  residuefilter --> |mmcif_files| ssfilter
67
69
  classDef dashedBorder stroke-dasharray: 5 5;
@@ -69,6 +71,8 @@ graph TB;
69
71
  taxonomy:::dashedBorder
70
72
  searchemdb:::dashedBorder
71
73
  fetchemdb:::dashedBorder
74
+ searchintactionpartners:::dashedBorder
75
+ searchcomplexes:::dashedBorder
72
76
  ```
73
77
 
74
78
  (Dotted nodes and edges are side-quests.)
@@ -204,6 +208,32 @@ You can use following command to search for a Gene Ontology (GO) term.
204
208
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
205
209
  ```
206
210
 
211
+ ### Search for interaction partners
212
+
213
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
214
+
215
+ ```shell
216
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
217
+ ```
218
+
219
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
220
+
221
+ ### Search for complexes
222
+
223
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
224
+ and return the complex entries and their members.
225
+
226
+ ```shell
227
+ echo Q05471 | protein-quest search complexes - complexes.csv
228
+ ```
229
+
230
+ The `complexes.csv` looks like
231
+
232
+ ```csv
233
+ query_protein,complex_id,complex_url,complex_title,members
234
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
235
+ ```
236
+
207
237
  ## Model Context Protocol (MCP) server
208
238
 
209
239
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -26,12 +26,14 @@ graph TB;
26
26
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
27
27
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
28
28
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
29
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
30
+ searchcomplexes[/Search complexes/]
29
31
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
30
32
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
31
33
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
32
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
34
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
33
35
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
34
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
36
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
35
37
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
36
38
  residuefilter --> |mmcif_files| ssfilter
37
39
  classDef dashedBorder stroke-dasharray: 5 5;
@@ -39,6 +41,8 @@ graph TB;
39
41
  taxonomy:::dashedBorder
40
42
  searchemdb:::dashedBorder
41
43
  fetchemdb:::dashedBorder
44
+ searchintactionpartners:::dashedBorder
45
+ searchcomplexes:::dashedBorder
42
46
  ```
43
47
 
44
48
  (Dotted nodes and edges are side-quests.)
@@ -174,6 +178,32 @@ You can use following command to search for a Gene Ontology (GO) term.
174
178
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
175
179
  ```
176
180
 
181
+ ### Search for interaction partners
182
+
183
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
184
+
185
+ ```shell
186
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
187
+ ```
188
+
189
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
190
+
191
+ ### Search for complexes
192
+
193
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
194
+ and return the complex entries and their members.
195
+
196
+ ```shell
197
+ echo Q05471 | protein-quest search complexes - complexes.csv
198
+ ```
199
+
200
+ The `complexes.csv` looks like
201
+
202
+ ```csv
203
+ query_protein,complex_id,complex_url,complex_title,members
204
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
205
+ ```
206
+
177
207
  ## Model Context Protocol (MCP) server
178
208
 
179
209
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -12,7 +12,7 @@
12
12
  },
13
13
  {
14
14
  "cell_type": "code",
15
- "execution_count": 23,
15
+ "execution_count": 1,
16
16
  "id": "85674583",
17
17
  "metadata": {},
18
18
  "outputs": [],
@@ -282,6 +282,99 @@
282
282
  "first_uniprot = next(iter(uniprot_accessions.items()))\n",
283
283
  "pprint(first_uniprot)"
284
284
  ]
285
+ },
286
+ {
287
+ "cell_type": "markdown",
288
+ "id": "e32a95f8",
289
+ "metadata": {},
290
+ "source": [
291
+ "## Find interaction partners for uniprot entries"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": 1,
297
+ "id": "d035c702",
298
+ "metadata": {},
299
+ "outputs": [],
300
+ "source": [
301
+ "from protein_quest.uniprot import search4interaction_partners, search4macromolecular_complexes"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 2,
307
+ "id": "601c690a",
308
+ "metadata": {},
309
+ "outputs": [],
310
+ "source": [
311
+ "# Helicase SWR1 in yeast\n",
312
+ "uniprot_accession = \"Q05471\""
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 3,
318
+ "id": "173c764d",
319
+ "metadata": {},
320
+ "outputs": [
321
+ {
322
+ "data": {
323
+ "text/plain": [
324
+ "{'Q12464': {'CPX-2122'},\n",
325
+ " 'P35817': {'CPX-2122'},\n",
326
+ " 'P80428': {'CPX-2122'},\n",
327
+ " 'Q12509': {'CPX-2122'},\n",
328
+ " 'Q03388': {'CPX-2122'},\n",
329
+ " 'P53201': {'CPX-2122'},\n",
330
+ " 'P53930': {'CPX-2122'},\n",
331
+ " 'P60010': {'CPX-2122'},\n",
332
+ " 'Q03433': {'CPX-2122'},\n",
333
+ " 'Q06707': {'CPX-2122'},\n",
334
+ " 'P38326': {'CPX-2122'},\n",
335
+ " 'P31376': {'CPX-2122'},\n",
336
+ " 'Q03940': {'CPX-2122'}}"
337
+ ]
338
+ },
339
+ "execution_count": 3,
340
+ "metadata": {},
341
+ "output_type": "execute_result"
342
+ }
343
+ ],
344
+ "source": [
345
+ "partners = search4interaction_partners(uniprot_accession, limit=100)\n",
346
+ "partners"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "markdown",
351
+ "id": "a763b6f8",
352
+ "metadata": {},
353
+ "source": [
354
+ "To get more information about the complex you can search for the complexes themselves with:"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 4,
360
+ "id": "236050ea",
361
+ "metadata": {},
362
+ "outputs": [
363
+ {
364
+ "data": {
365
+ "text/plain": [
366
+ "[ComplexPortalEntry(query_protein='Q05471', complex_id='CPX-2122', complex_url='https://www.ebi.ac.uk/complexportal/complex/CPX-2122', complex_title='Swr1 chromatin remodelling complex', members={'P35817', 'Q05471', 'Q12464', 'Q12509', 'Q06707', 'Q03433', 'P38326', 'P53201', 'Q03388', 'P53930', 'P80428', 'Q03940', 'P60010', 'P31376'})]"
367
+ ]
368
+ },
369
+ "execution_count": 4,
370
+ "metadata": {},
371
+ "output_type": "execute_result"
372
+ }
373
+ ],
374
+ "source": [
375
+ "complexes = search4macromolecular_complexes([uniprot_accession])\n",
376
+ "complexes"
377
+ ]
285
378
  }
286
379
  ],
287
380
  "metadata": {
@@ -300,7 +393,7 @@
300
393
  "name": "python",
301
394
  "nbconvert_exporter": "python",
302
395
  "pygments_lexer": "ipython3",
303
- "version": "3.13.2"
396
+ "version": "3.13.5"
304
397
  }
305
398
  },
306
399
  "nbformat": 4,
@@ -52,6 +52,7 @@ dev = [
52
52
  ]
53
53
  docs = [
54
54
  "ipykernel>=6.29.5", # For notebook support in VS Code
55
+ "ipywidgets", # For tqdm support in notebooks
55
56
  "mkdocs>=1.6.1",
56
57
  "mkdocs-autoapi>=0.4.1",
57
58
  "mkdocs-jupyter>=0.25.1",
@@ -1,2 +1,2 @@
1
- __version__ = "0.3.2"
1
+ __version__ = "0.4.0"
2
2
  """The version of the package."""
@@ -15,6 +15,7 @@ from textwrap import dedent
15
15
  from cattrs import structure
16
16
  from rich import print as rprint
17
17
  from rich.logging import RichHandler
18
+ from rich.markdown import Markdown
18
19
  from rich.panel import Panel
19
20
  from rich_argparse import ArgumentDefaultsRichHelpFormatter
20
21
  from tqdm.rich import tqdm
@@ -31,7 +32,17 @@ from protein_quest.pdbe import fetch as pdbe_fetch
31
32
  from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
32
33
  from protein_quest.ss import SecondaryStructureFilterQuery, filter_files_on_secondary_structure
33
34
  from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
34
- from protein_quest.uniprot import PdbResult, Query, search4af, search4emdb, search4pdb, search4uniprot
35
+ from protein_quest.uniprot import (
36
+ ComplexPortalEntry,
37
+ PdbResult,
38
+ Query,
39
+ search4af,
40
+ search4emdb,
41
+ search4interaction_partners,
42
+ search4macromolecular_complexes,
43
+ search4pdb,
44
+ search4uniprot,
45
+ )
35
46
  from protein_quest.utils import CopyMethod, copy_methods, copyfile
36
47
 
37
48
  logger = logging.getLogger(__name__)
@@ -211,6 +222,73 @@ def _add_search_taxonomy_parser(subparser: argparse._SubParsersAction):
211
222
  parser.add_argument("--limit", type=int, default=100, help="Maximum number of results to return")
212
223
 
213
224
 
225
+ def _add_search_interaction_partners_parser(subparsers: argparse._SubParsersAction):
226
+ """Add search interaction partners subcommand parser."""
227
+ parser = subparsers.add_parser(
228
+ "interaction-partners",
229
+ help="Search for interaction partners of given UniProt accession",
230
+ description=dedent("""\
231
+ Search for interaction partners of given UniProt accession
232
+ in the Uniprot SPARQL endpoint and Complex Portal.
233
+ """),
234
+ formatter_class=ArgumentDefaultsRichHelpFormatter,
235
+ )
236
+ parser.add_argument(
237
+ "uniprot_acc",
238
+ type=str,
239
+ help="UniProt accession (for example P12345).",
240
+ )
241
+ parser.add_argument(
242
+ "--exclude",
243
+ type=str,
244
+ action="append",
245
+ help="UniProt accessions to exclude from the results. For example already known interaction partners.",
246
+ )
247
+ parser.add_argument(
248
+ "output_csv",
249
+ type=argparse.FileType("w", encoding="UTF-8"),
250
+ help="Output CSV with interaction partners per UniProt accession. Use `-` for stdout.",
251
+ )
252
+ parser.add_argument(
253
+ "--limit", type=int, default=10_000, help="Maximum number of interaction partner uniprot accessions to return"
254
+ )
255
+ parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
256
+
257
+
258
+ def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
259
+ """Add search complexes subcommand parser."""
260
+ description = dedent("""\
261
+ Search for complexes in the Complex Portal.
262
+ https://www.ebi.ac.uk/complexportal/
263
+
264
+ The output CSV file has the following columns:
265
+
266
+ - query_protein: UniProt accession used as query
267
+ - complex_id: Complex Portal identifier
268
+ - complex_url: URL to the Complex Portal entry
269
+ - complex_title: Title of the complex
270
+ - members: Semicolon-separated list of UniProt accessions of complex members
271
+ """)
272
+ parser = subparsers.add_parser(
273
+ "complexes",
274
+ help="Search for complexes in the Complex Portal",
275
+ description=Markdown(description, style="argparse.text"), # type: ignore using rich formatter makes this OK
276
+ formatter_class=ArgumentDefaultsRichHelpFormatter,
277
+ )
278
+ parser.add_argument(
279
+ "uniprot_accs",
280
+ type=argparse.FileType("r", encoding="UTF-8"),
281
+ help="Text file with UniProt accessions (one per line) as query for searching complexes. Use `-` for stdin.",
282
+ )
283
+ parser.add_argument(
284
+ "output_csv",
285
+ type=argparse.FileType("w", encoding="UTF-8"),
286
+ help="Output CSV file with complex results. Use `-` for stdout.",
287
+ )
288
+ parser.add_argument("--limit", type=int, default=100, help="Maximum number of complex results to return")
289
+ parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
290
+
291
+
214
292
  def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
215
293
  """Add retrieve pdbe subcommand parser."""
216
294
  parser = subparsers.add_parser(
@@ -458,6 +536,8 @@ def _add_search_subcommands(subparsers: argparse._SubParsersAction):
458
536
  _add_search_emdb_parser(subsubparsers)
459
537
  _add_search_go_parser(subsubparsers)
460
538
  _add_search_taxonomy_parser(subsubparsers)
539
+ _add_search_interaction_partners_parser(subsubparsers)
540
+ _add_search_complexes_parser(subsubparsers)
461
541
 
462
542
 
463
543
  def _add_retrieve_subcommands(subparsers: argparse._SubParsersAction):
@@ -636,6 +716,32 @@ def _handle_search_taxonomy(args):
636
716
  _write_taxonomy_csv(results, output_csv)
637
717
 
638
718
 
719
+ def _handle_search_interaction_partners(args: argparse.Namespace):
720
+ uniprot_acc: str = args.uniprot_acc
721
+ excludes: set[str] = set(args.exclude) if args.exclude else set()
722
+ limit: int = args.limit
723
+ timeout: int = args.timeout
724
+ output_csv: TextIOWrapper = args.output_csv
725
+
726
+ rprint(f"Searching for interaction partners of '{uniprot_acc}'")
727
+ results = search4interaction_partners(uniprot_acc, excludes=excludes, limit=limit, timeout=timeout)
728
+ rprint(f"Found {len(results)} interaction partners, written to {output_csv.name}")
729
+ _write_lines(output_csv, results.keys())
730
+
731
+
732
+ def _handle_search_complexes(args: argparse.Namespace):
733
+ uniprot_accs = args.uniprot_accs
734
+ limit = args.limit
735
+ timeout = args.timeout
736
+ output_csv = args.output_csv
737
+
738
+ accs = _read_lines(uniprot_accs)
739
+ rprint(f"Finding complexes for {len(accs)} uniprot accessions")
740
+ results = search4macromolecular_complexes(accs, limit=limit, timeout=timeout)
741
+ rprint(f"Found {len(results)} complexes, written to {output_csv.name}")
742
+ _write_complexes_csv(results, output_csv)
743
+
744
+
639
745
  def _handle_retrieve_pdbe(args):
640
746
  pdbe_csv = args.pdbe_csv
641
747
  output_dir = args.output_dir
@@ -875,6 +981,8 @@ HANDLERS: dict[tuple[str, str | None], Callable] = {
875
981
  ("search", "emdb"): _handle_search_emdb,
876
982
  ("search", "go"): _handle_search_go,
877
983
  ("search", "taxonomy"): _handle_search_taxonomy,
984
+ ("search", "interaction-partners"): _handle_search_interaction_partners,
985
+ ("search", "complexes"): _handle_search_complexes,
878
986
  ("retrieve", "pdbe"): _handle_retrieve_pdbe,
879
987
  ("retrieve", "alphafold"): _handle_retrieve_alphafold,
880
988
  ("retrieve", "emdb"): _handle_retrieve_emdb,
@@ -937,3 +1045,33 @@ def _iter_csv_rows(file: TextIOWrapper) -> Generator[dict[str, str]]:
937
1045
 
938
1046
  def _read_column_from_csv(file: TextIOWrapper, column: str) -> set[str]:
939
1047
  return {row[column] for row in _iter_csv_rows(file)}
1048
+
1049
+
1050
+ def _write_complexes_csv(complexes: list[ComplexPortalEntry], output_csv: TextIOWrapper) -> None:
1051
+ """Write ComplexPortal information to a CSV file.
1052
+
1053
+ Args:
1054
+ complexes: List of ComplexPortalEntry objects.
1055
+ output_csv: TextIOWrapper to write the CSV data to.
1056
+ """
1057
+ writer = csv.writer(output_csv)
1058
+ writer.writerow(
1059
+ [
1060
+ "query_protein",
1061
+ "complex_id",
1062
+ "complex_url",
1063
+ "complex_title",
1064
+ "members",
1065
+ ]
1066
+ )
1067
+ for entry in complexes:
1068
+ members_str = ";".join(sorted(entry.members))
1069
+ writer.writerow(
1070
+ [
1071
+ entry.query_protein,
1072
+ entry.complex_id,
1073
+ entry.complex_url,
1074
+ entry.complex_title,
1075
+ members_str,
1076
+ ]
1077
+ )
@@ -13,6 +13,7 @@ type PositiveInt = int
13
13
  converter = make_converter()
14
14
  """cattrs converter to read JSON document or dict to Python objects."""
15
15
  converter.register_structure_hook(URL, lambda v, _: URL(v))
16
+ converter.register_unstructure_hook(URL, lambda u: str(u))
16
17
 
17
18
 
18
19
  @converter.register_structure_hook
@@ -48,7 +48,15 @@ from protein_quest.pdbe.fetch import fetch as pdbe_fetch
48
48
  from protein_quest.pdbe.io import glob_structure_files, nr_residues_in_chain, write_single_chain_pdb_file
49
49
  from protein_quest.ss import filter_file_on_secondary_structure
50
50
  from protein_quest.taxonomy import search_taxon
51
- from protein_quest.uniprot import PdbResult, Query, search4af, search4emdb, search4pdb, search4uniprot
51
+ from protein_quest.uniprot import (
52
+ PdbResult,
53
+ Query,
54
+ search4af,
55
+ search4emdb,
56
+ search4macromolecular_complexes,
57
+ search4pdb,
58
+ search4uniprot,
59
+ )
52
60
 
53
61
  mcp = FastMCP("protein-quest")
54
62
 
@@ -137,6 +145,7 @@ def search_alphafolds(
137
145
 
138
146
 
139
147
  mcp.tool(search4emdb, name="search_emdb")
148
+ mcp.tool(search4macromolecular_complexes, name="search_macromolecular_complexes")
140
149
 
141
150
 
142
151
  @mcp.tool
@@ -111,6 +111,26 @@ class SecondaryStructureFilterQuery:
111
111
  ratio_min_sheet_residues: Ratio | None = None
112
112
  ratio_max_sheet_residues: Ratio | None = None
113
113
 
114
+ def is_actionable(self) -> bool:
115
+ """Check if the secondary structure query has any actionable filters.
116
+
117
+ Returns:
118
+ True if any of the filters are set, False otherwise.
119
+ """
120
+ return any(
121
+ field is not None
122
+ for field in [
123
+ self.abs_min_helix_residues,
124
+ self.abs_max_helix_residues,
125
+ self.abs_min_sheet_residues,
126
+ self.abs_max_sheet_residues,
127
+ self.ratio_min_helix_residues,
128
+ self.ratio_max_helix_residues,
129
+ self.ratio_min_sheet_residues,
130
+ self.ratio_max_sheet_residues,
131
+ ]
132
+ )
133
+
114
134
 
115
135
  def _check_range(min_val, max_val, label):
116
136
  if min_val is not None and max_val is not None and min_val >= max_val: