protein-quest 0.3.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

Files changed (75) hide show
  1. {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/ci.yml +0 -9
  2. {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/pages.yml +9 -1
  3. {protein_quest-0.3.2 → protein_quest-0.5.0}/.gitignore +14 -1
  4. {protein_quest-0.3.2 → protein_quest-0.5.0}/PKG-INFO +41 -3
  5. {protein_quest-0.3.2 → protein_quest-0.5.0}/README.md +39 -2
  6. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/uniprot.ipynb +95 -2
  7. {protein_quest-0.3.2 → protein_quest-0.5.0}/mkdocs.yml +3 -4
  8. {protein_quest-0.3.2 → protein_quest-0.5.0}/pyproject.toml +3 -0
  9. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/__version__.py +1 -1
  10. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/fetch.py +34 -9
  11. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/cli.py +207 -26
  12. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/converter.py +1 -0
  13. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/emdb.py +6 -3
  14. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/mcp_server.py +34 -3
  15. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/fetch.py +6 -3
  16. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/ss.py +20 -0
  17. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/uniprot.py +157 -4
  18. protein_quest-0.5.0/src/protein_quest/utils.py +511 -0
  19. protein_quest-0.5.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +384 -0
  20. protein_quest-0.5.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
  21. protein_quest-0.5.0/tests/test_converter.py +23 -0
  22. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_ss.py +6 -0
  23. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_uniprot.py +65 -0
  24. protein_quest-0.5.0/tests/test_utils.py +326 -0
  25. {protein_quest-0.3.2 → protein_quest-0.5.0}/uv.lock +55 -1
  26. protein_quest-0.3.2/docs/cli_doc_hook.py +0 -113
  27. protein_quest-0.3.2/src/protein_quest/utils.py +0 -167
  28. protein_quest-0.3.2/tests/test_utils.py +0 -31
  29. {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/pypi-publish.yml +0 -0
  30. {protein_quest-0.3.2 → protein_quest-0.5.0}/.vscode/extensions.json +0 -0
  31. {protein_quest-0.3.2 → protein_quest-0.5.0}/CITATION.cff +0 -0
  32. {protein_quest-0.3.2 → protein_quest-0.5.0}/CODE_OF_CONDUCT.md +0 -0
  33. {protein_quest-0.3.2 → protein_quest-0.5.0}/CONTRIBUTING.md +0 -0
  34. {protein_quest-0.3.2 → protein_quest-0.5.0}/LICENSE +0 -0
  35. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/CONTRIBUTING.md +0 -0
  36. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/index.md +0 -0
  37. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/.gitignore +0 -0
  38. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/alphafold.ipynb +0 -0
  39. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/index.md +0 -0
  40. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/pdbe.ipynb +0 -0
  41. {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/protein-quest-mcp.png +0 -0
  42. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/__init__.py +0 -0
  43. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/__init__.py +0 -0
  44. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/confidence.py +0 -0
  45. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
  46. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/filters.py +0 -0
  47. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/go.py +0 -0
  48. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/parallel.py +0 -0
  49. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/__init__.py +0 -0
  50. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/io.py +0 -0
  51. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/py.typed +0 -0
  52. {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/taxonomy.py +0 -0
  53. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  54. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  55. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_confidence.py +0 -0
  56. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_entry_summary.py +0 -0
  57. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_fetch.py +0 -0
  58. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  59. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  60. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  61. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  62. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  63. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  64. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  65. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  66. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  67. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  68. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/fixtures/2y29.cif +0 -0
  69. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/test_fetch.py +0 -0
  70. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/test_io.py +0 -0
  71. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_cli.py +0 -0
  72. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_emdb.py +0 -0
  73. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_go.py +0 -0
  74. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_mcp.py +0 -0
  75. {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_taxonomy.py +0 -0
@@ -27,20 +27,11 @@ jobs:
27
27
  - name: Run tests
28
28
  run: |
29
29
  uv run pytest --cov --cov-report=xml
30
- echo $? > pytest-exitcode
31
- continue-on-error: true
32
- # Always upload coverage, even if tests fail
33
30
  - name: Run codacy-coverage-reporter
34
31
  uses: codacy/codacy-coverage-reporter-action@v1.3.0
35
32
  with:
36
33
  project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
37
34
  coverage-reports: coverage.xml
38
- - name: Fail job if pytest failed
39
- run: |
40
- if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
41
- echo "Pytest failed, failing job."
42
- exit 1
43
- fi
44
35
  build:
45
36
  name: build
46
37
  runs-on: ubuntu-latest
@@ -5,6 +5,7 @@ on:
5
5
  branches:
6
6
  - main
7
7
  workflow_dispatch:
8
+ pull_request:
8
9
 
9
10
  permissions:
10
11
  contents: read
@@ -13,7 +14,7 @@ permissions:
13
14
 
14
15
  # Only have one deployment in progress at a time
15
16
  concurrency:
16
- group: "pages"
17
+ group: pages
17
18
  cancel-in-progress: true
18
19
 
19
20
  jobs:
@@ -32,6 +33,10 @@ jobs:
32
33
  - name: Build MkDocs site
33
34
  run: |
34
35
  uv run mkdocs build
36
+ env:
37
+ # Force colored output from rich library
38
+ TTY_COMPATIBLE: '1'
39
+ TTY_INTERACTIVE: '0'
35
40
 
36
41
  - name: Upload artifact
37
42
  uses: actions/upload-pages-artifact@v3
@@ -42,6 +47,9 @@ jobs:
42
47
  # Add a dependency to the build job
43
48
  needs: build
44
49
 
50
+ # Only deploy on pushes to main or manual trigger of main branch
51
+ if: github.ref == 'refs/heads/main'
52
+
45
53
  # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
46
54
  permissions:
47
55
  pages: write # to deploy to Pages
@@ -73,4 +73,17 @@ venv.bak/
73
73
  /docs/pdb_files/
74
74
  /docs/density_filtered/
75
75
  /site
76
- /mysession/
76
+ /mysession/
77
+ # Paths generated in README.md examples
78
+ uniprot_accs.txt
79
+ pdbe.csv
80
+ alphafold.csv
81
+ emdbs.csv
82
+ interaction-partners-of-Q05471.txt
83
+ complexes.csv
84
+ downloads-af/
85
+ downloads-emdb/
86
+ downloads-pdbe/
87
+ filtered/
88
+ filtered-chains/
89
+ filtered-ss/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.2
3
+ Version: 0.5.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
17
17
  Requires-Dist: dask>=2025.5.1
18
18
  Requires-Dist: distributed>=2025.5.1
19
19
  Requires-Dist: gemmi>=0.7.3
20
+ Requires-Dist: platformdirs>=4.3.8
20
21
  Requires-Dist: psutil>=7.0.0
21
22
  Requires-Dist: rich-argparse>=1.7.1
22
23
  Requires-Dist: rich>=14.0.0
@@ -47,6 +48,10 @@ It uses
47
48
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
48
49
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
49
50
 
51
+ The package is used by
52
+
53
+ - [protein-detective](https://github.com/haddocking/protein-detective)
54
+
50
55
  An example workflow:
51
56
 
52
57
  ```mermaid
@@ -56,12 +61,14 @@ graph TB;
56
61
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
57
62
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
58
63
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
64
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
65
+ searchcomplexes[/Search complexes/]
59
66
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
60
67
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
61
68
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
62
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
69
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
63
70
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
64
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
71
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
65
72
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
66
73
  residuefilter --> |mmcif_files| ssfilter
67
74
  classDef dashedBorder stroke-dasharray: 5 5;
@@ -69,6 +76,8 @@ graph TB;
69
76
  taxonomy:::dashedBorder
70
77
  searchemdb:::dashedBorder
71
78
  fetchemdb:::dashedBorder
79
+ searchintactionpartners:::dashedBorder
80
+ searchcomplexes:::dashedBorder
72
81
  ```
73
82
 
74
83
  (Dotted nodes and edges are side-quests.)
@@ -90,6 +99,9 @@ The main entry point is the `protein-quest` command line tool which has multiple
90
99
 
91
100
  To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
92
101
 
102
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
103
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
104
+
93
105
  ### Search Uniprot accessions
94
106
 
95
107
  ```shell
@@ -204,6 +216,32 @@ You can use following command to search for a Gene Ontology (GO) term.
204
216
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
205
217
  ```
206
218
 
219
+ ### Search for interaction partners
220
+
221
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
222
+
223
+ ```shell
224
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
225
+ ```
226
+
227
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
228
+
229
+ ### Search for complexes
230
+
231
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
232
+ and return the complex entries and their members.
233
+
234
+ ```shell
235
+ echo Q05471 | protein-quest search complexes - complexes.csv
236
+ ```
237
+
238
+ The `complexes.csv` looks like
239
+
240
+ ```csv
241
+ query_protein,complex_id,complex_url,complex_title,members
242
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
243
+ ```
244
+
207
245
  ## Model Context Protocol (MCP) server
208
246
 
209
247
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -17,6 +17,10 @@ It uses
17
17
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
18
18
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
19
19
 
20
+ The package is used by
21
+
22
+ - [protein-detective](https://github.com/haddocking/protein-detective)
23
+
20
24
  An example workflow:
21
25
 
22
26
  ```mermaid
@@ -26,12 +30,14 @@ graph TB;
26
30
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
27
31
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
28
32
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
33
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
34
+ searchcomplexes[/Search complexes/]
29
35
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
30
36
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
31
37
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
32
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
38
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
33
39
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
34
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
40
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
35
41
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
36
42
  residuefilter --> |mmcif_files| ssfilter
37
43
  classDef dashedBorder stroke-dasharray: 5 5;
@@ -39,6 +45,8 @@ graph TB;
39
45
  taxonomy:::dashedBorder
40
46
  searchemdb:::dashedBorder
41
47
  fetchemdb:::dashedBorder
48
+ searchintactionpartners:::dashedBorder
49
+ searchcomplexes:::dashedBorder
42
50
  ```
43
51
 
44
52
  (Dotted nodes and edges are side-quests.)
@@ -60,6 +68,9 @@ The main entry point is the `protein-quest` command line tool which has multiple
60
68
 
61
69
  To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
62
70
 
71
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
72
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
73
+
63
74
  ### Search Uniprot accessions
64
75
 
65
76
  ```shell
@@ -174,6 +185,32 @@ You can use following command to search for a Gene Ontology (GO) term.
174
185
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
175
186
  ```
176
187
 
188
+ ### Search for interaction partners
189
+
190
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
191
+
192
+ ```shell
193
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
194
+ ```
195
+
196
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
197
+
198
+ ### Search for complexes
199
+
200
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
201
+ and return the complex entries and their members.
202
+
203
+ ```shell
204
+ echo Q05471 | protein-quest search complexes - complexes.csv
205
+ ```
206
+
207
+ The `complexes.csv` looks like
208
+
209
+ ```csv
210
+ query_protein,complex_id,complex_url,complex_title,members
211
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
212
+ ```
213
+
177
214
  ## Model Context Protocol (MCP) server
178
215
 
179
216
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -12,7 +12,7 @@
12
12
  },
13
13
  {
14
14
  "cell_type": "code",
15
- "execution_count": 23,
15
+ "execution_count": 1,
16
16
  "id": "85674583",
17
17
  "metadata": {},
18
18
  "outputs": [],
@@ -282,6 +282,99 @@
282
282
  "first_uniprot = next(iter(uniprot_accessions.items()))\n",
283
283
  "pprint(first_uniprot)"
284
284
  ]
285
+ },
286
+ {
287
+ "cell_type": "markdown",
288
+ "id": "e32a95f8",
289
+ "metadata": {},
290
+ "source": [
291
+ "## Find interaction partners for uniprot entries"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": 1,
297
+ "id": "d035c702",
298
+ "metadata": {},
299
+ "outputs": [],
300
+ "source": [
301
+ "from protein_quest.uniprot import search4interaction_partners, search4macromolecular_complexes"
302
+ ]
303
+ },
304
+ {
305
+ "cell_type": "code",
306
+ "execution_count": 2,
307
+ "id": "601c690a",
308
+ "metadata": {},
309
+ "outputs": [],
310
+ "source": [
311
+ "# Helicase SWR1 in yeast\n",
312
+ "uniprot_accession = \"Q05471\""
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "code",
317
+ "execution_count": 3,
318
+ "id": "173c764d",
319
+ "metadata": {},
320
+ "outputs": [
321
+ {
322
+ "data": {
323
+ "text/plain": [
324
+ "{'Q12464': {'CPX-2122'},\n",
325
+ " 'P35817': {'CPX-2122'},\n",
326
+ " 'P80428': {'CPX-2122'},\n",
327
+ " 'Q12509': {'CPX-2122'},\n",
328
+ " 'Q03388': {'CPX-2122'},\n",
329
+ " 'P53201': {'CPX-2122'},\n",
330
+ " 'P53930': {'CPX-2122'},\n",
331
+ " 'P60010': {'CPX-2122'},\n",
332
+ " 'Q03433': {'CPX-2122'},\n",
333
+ " 'Q06707': {'CPX-2122'},\n",
334
+ " 'P38326': {'CPX-2122'},\n",
335
+ " 'P31376': {'CPX-2122'},\n",
336
+ " 'Q03940': {'CPX-2122'}}"
337
+ ]
338
+ },
339
+ "execution_count": 3,
340
+ "metadata": {},
341
+ "output_type": "execute_result"
342
+ }
343
+ ],
344
+ "source": [
345
+ "partners = search4interaction_partners(uniprot_accession, limit=100)\n",
346
+ "partners"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "markdown",
351
+ "id": "a763b6f8",
352
+ "metadata": {},
353
+ "source": [
354
+ "To get more information about the complex you can search for the complexes themselves with:"
355
+ ]
356
+ },
357
+ {
358
+ "cell_type": "code",
359
+ "execution_count": 4,
360
+ "id": "236050ea",
361
+ "metadata": {},
362
+ "outputs": [
363
+ {
364
+ "data": {
365
+ "text/plain": [
366
+ "[ComplexPortalEntry(query_protein='Q05471', complex_id='CPX-2122', complex_url='https://www.ebi.ac.uk/complexportal/complex/CPX-2122', complex_title='Swr1 chromatin remodelling complex', members={'P35817', 'Q05471', 'Q12464', 'Q12509', 'Q06707', 'Q03433', 'P38326', 'P53201', 'Q03388', 'P53930', 'P80428', 'Q03940', 'P60010', 'P31376'})]"
367
+ ]
368
+ },
369
+ "execution_count": 4,
370
+ "metadata": {},
371
+ "output_type": "execute_result"
372
+ }
373
+ ],
374
+ "source": [
375
+ "complexes = search4macromolecular_complexes([uniprot_accession])\n",
376
+ "complexes"
377
+ ]
285
378
  }
286
379
  ],
287
380
  "metadata": {
@@ -300,7 +393,7 @@
300
393
  "name": "python",
301
394
  "nbconvert_exporter": "python",
302
395
  "pygments_lexer": "ipython3",
303
- "version": "3.13.2"
396
+ "version": "3.13.5"
304
397
  }
305
398
  },
306
399
  "nbformat": 4,
@@ -3,10 +3,6 @@ site_url: https://bonvinlab.org/protein_quest
3
3
  repo_name: haddocking/protein-quest
4
4
  repo_url: https://github.com/haddocking/protein-quest
5
5
  watch: [mkdocs.yml, README.md, src/protein_quest]
6
- exclude_docs: |
7
- cli_doc_hook.py
8
- hooks:
9
- - docs/cli_doc_hook.py
10
6
  use_directory_urls: false
11
7
  theme:
12
8
  name: material
@@ -61,6 +57,9 @@ plugins:
61
57
  remove_tag_config:
62
58
  remove_input_tags:
63
59
  - hide_code
60
+ - mkdocs-rich-argparse:
61
+ module: protein_quest.cli
62
+ factory: make_parser
64
63
 
65
64
  markdown_extensions:
66
65
  # Use to render part of README as home
@@ -20,6 +20,7 @@ dependencies = [
20
20
  "sparqlwrapper>=2.0.0",
21
21
  "tqdm>=4.67.1",
22
22
  "yarl>=1.20.1",
23
+ "platformdirs>=4.3.8",
23
24
  ]
24
25
 
25
26
  [project.urls]
@@ -52,10 +53,12 @@ dev = [
52
53
  ]
53
54
  docs = [
54
55
  "ipykernel>=6.29.5", # For notebook support in VS Code
56
+ "ipywidgets", # For tqdm support in notebooks
55
57
  "mkdocs>=1.6.1",
56
58
  "mkdocs-autoapi>=0.4.1",
57
59
  "mkdocs-jupyter>=0.25.1",
58
60
  "mkdocs-material>=9.6.14",
61
+ "mkdocs-rich-argparse>=0.1.2",
59
62
  "mkdocstrings[python]>=0.29.1",
60
63
  ]
61
64
  docs-type = [
@@ -1,2 +1,2 @@
1
- __version__ = "0.3.2"
1
+ __version__ = "0.5.0"
2
2
  """The version of the package."""
@@ -14,7 +14,7 @@ from yarl import URL
14
14
 
15
15
  from protein_quest.alphafold.entry_summary import EntrySummary
16
16
  from protein_quest.converter import converter
17
- from protein_quest.utils import friendly_session, retrieve_files, run_async
17
+ from protein_quest.utils import Cacher, PassthroughCacher, friendly_session, retrieve_files, run_async
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -104,7 +104,7 @@ class AlphaFoldEntry:
104
104
 
105
105
 
106
106
  async def fetch_summary(
107
- qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None
107
+ qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None, cacher: Cacher
108
108
  ) -> list[EntrySummary]:
109
109
  """Fetches a summary from the AlphaFold database for a given qualifier.
110
110
 
@@ -116,6 +116,7 @@ async def fetch_summary(
116
116
  save_dir: An optional directory to save the fetched summary as a JSON file.
117
117
  If set and summary exists then summary will be loaded from disk instead of being fetched from the API.
118
118
  If not set then the summary will not be saved to disk and will always be fetched from the API.
119
+ cacher: A cacher to use for caching the fetched summary. Only used if save_dir is not None.
119
120
 
120
121
  Returns:
121
122
  A list of EntrySummary objects representing the fetched summary.
@@ -124,6 +125,11 @@ async def fetch_summary(
124
125
  fn: AsyncPath | None = None
125
126
  if save_dir is not None:
126
127
  fn = AsyncPath(save_dir / f"{qualifier}.json")
128
+ cached_file = await cacher.copy_from_cache(Path(fn))
129
+ if cached_file is not None:
130
+ logger.debug(f"Using cached file {cached_file} for summary of {qualifier}.")
131
+ raw_data = await AsyncPath(cached_file).read_bytes()
132
+ return converter.loads(raw_data, list[EntrySummary])
127
133
  if await fn.exists():
128
134
  logger.debug(f"File {fn} already exists. Skipping download from {url}.")
129
135
  raw_data = await fn.read_bytes()
@@ -133,18 +139,23 @@ async def fetch_summary(
133
139
  raw_data = await response.content.read()
134
140
  if fn is not None:
135
141
  # TODO return fn and make it part of AlphaFoldEntry as summary_file prop
136
- await fn.write_bytes(raw_data)
142
+ await cacher.write_bytes(Path(fn), raw_data)
137
143
  return converter.loads(raw_data, list[EntrySummary])
138
144
 
139
145
 
140
146
  async def fetch_summaries(
141
- qualifiers: Iterable[str], save_dir: Path | None = None, max_parallel_downloads: int = 5
147
+ qualifiers: Iterable[str],
148
+ save_dir: Path | None = None,
149
+ max_parallel_downloads: int = 5,
150
+ cacher: Cacher | None = None,
142
151
  ) -> AsyncGenerator[EntrySummary]:
143
152
  semaphore = Semaphore(max_parallel_downloads)
144
153
  if save_dir is not None:
145
154
  save_dir.mkdir(parents=True, exist_ok=True)
155
+ if cacher is None:
156
+ cacher = PassthroughCacher()
146
157
  async with friendly_session() as session:
147
- tasks = [fetch_summary(qualifier, session, semaphore, save_dir) for qualifier in qualifiers]
158
+ tasks = [fetch_summary(qualifier, session, semaphore, save_dir, cacher) for qualifier in qualifiers]
148
159
  summaries_per_qualifier: list[list[EntrySummary]] = await tqdm.gather(
149
160
  *tasks, desc="Fetching Alphafold summaries"
150
161
  )
@@ -154,7 +165,11 @@ async def fetch_summaries(
154
165
 
155
166
 
156
167
  async def fetch_many_async(
157
- uniprot_accessions: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
168
+ uniprot_accessions: Iterable[str],
169
+ save_dir: Path,
170
+ what: set[DownloadableFormat],
171
+ max_parallel_downloads: int = 5,
172
+ cacher: Cacher | None = None,
158
173
  ) -> AsyncGenerator[AlphaFoldEntry]:
159
174
  """Asynchronously fetches summaries and files from
160
175
  [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
@@ -164,15 +179,17 @@ async def fetch_many_async(
164
179
  save_dir: The directory to save the fetched files to.
165
180
  what: A set of formats to download.
166
181
  max_parallel_downloads: The maximum number of parallel downloads.
182
+ cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
167
183
 
168
184
  Yields:
169
185
  A dataclass containing the summary, pdb file, and pae file.
170
186
  """
171
187
  save_dir_for_summaries = save_dir if "summary" in what and save_dir is not None else None
188
+
172
189
  summaries = [
173
190
  s
174
191
  async for s in fetch_summaries(
175
- uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads
192
+ uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads, cacher=cacher
176
193
  )
177
194
  ]
178
195
 
@@ -183,6 +200,7 @@ async def fetch_many_async(
183
200
  save_dir,
184
201
  desc="Downloading AlphaFold files",
185
202
  max_parallel_downloads=max_parallel_downloads,
203
+ cacher=cacher,
186
204
  )
187
205
  for summary in summaries:
188
206
  yield AlphaFoldEntry(
@@ -236,7 +254,11 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
236
254
 
237
255
 
238
256
  def fetch_many(
239
- ids: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
257
+ ids: Iterable[str],
258
+ save_dir: Path,
259
+ what: set[DownloadableFormat],
260
+ max_parallel_downloads: int = 5,
261
+ cacher: Cacher | None = None,
240
262
  ) -> list[AlphaFoldEntry]:
241
263
  """Synchronously fetches summaries and pdb and pae files from AlphaFold Protein Structure Database.
242
264
 
@@ -245,6 +267,7 @@ def fetch_many(
245
267
  save_dir: The directory to save the fetched files to.
246
268
  what: A set of formats to download.
247
269
  max_parallel_downloads: The maximum number of parallel downloads.
270
+ cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
248
271
 
249
272
  Returns:
250
273
  A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
@@ -253,7 +276,9 @@ def fetch_many(
253
276
  async def gather_entries():
254
277
  return [
255
278
  entry
256
- async for entry in fetch_many_async(ids, save_dir, what, max_parallel_downloads=max_parallel_downloads)
279
+ async for entry in fetch_many_async(
280
+ ids, save_dir, what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
281
+ )
257
282
  ]
258
283
 
259
284
  return run_async(gather_entries())