protein-quest 0.3.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/ci.yml +0 -9
- {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/pages.yml +9 -1
- {protein_quest-0.3.2 → protein_quest-0.5.0}/.gitignore +14 -1
- {protein_quest-0.3.2 → protein_quest-0.5.0}/PKG-INFO +41 -3
- {protein_quest-0.3.2 → protein_quest-0.5.0}/README.md +39 -2
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/uniprot.ipynb +95 -2
- {protein_quest-0.3.2 → protein_quest-0.5.0}/mkdocs.yml +3 -4
- {protein_quest-0.3.2 → protein_quest-0.5.0}/pyproject.toml +3 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/__version__.py +1 -1
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/fetch.py +34 -9
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/cli.py +207 -26
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/converter.py +1 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/emdb.py +6 -3
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/mcp_server.py +34 -3
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/fetch.py +6 -3
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/ss.py +20 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/uniprot.py +157 -4
- protein_quest-0.5.0/src/protein_quest/utils.py +511 -0
- protein_quest-0.5.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +384 -0
- protein_quest-0.5.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
- protein_quest-0.5.0/tests/test_converter.py +23 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_ss.py +6 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_uniprot.py +65 -0
- protein_quest-0.5.0/tests/test_utils.py +326 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/uv.lock +55 -1
- protein_quest-0.3.2/docs/cli_doc_hook.py +0 -113
- protein_quest-0.3.2/src/protein_quest/utils.py +0 -167
- protein_quest-0.3.2/tests/test_utils.py +0 -31
- {protein_quest-0.3.2 → protein_quest-0.5.0}/.github/workflows/pypi-publish.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/.vscode/extensions.json +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/CITATION.cff +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/CODE_OF_CONDUCT.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/LICENSE +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/.gitignore +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/alphafold.ipynb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/notebooks/pdbe.ipynb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/docs/protein-quest-mcp.png +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/confidence.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/filters.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/parallel.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/pdbe/io.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/py.typed +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/src/protein_quest/taxonomy.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_confidence.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_entry_summary.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/alphafold/test_fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/fixtures/2y29.cif +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/test_fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/pdbe/test_io.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_cli.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_emdb.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_mcp.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.5.0}/tests/test_taxonomy.py +0 -0
|
@@ -27,20 +27,11 @@ jobs:
|
|
|
27
27
|
- name: Run tests
|
|
28
28
|
run: |
|
|
29
29
|
uv run pytest --cov --cov-report=xml
|
|
30
|
-
echo $? > pytest-exitcode
|
|
31
|
-
continue-on-error: true
|
|
32
|
-
# Always upload coverage, even if tests fail
|
|
33
30
|
- name: Run codacy-coverage-reporter
|
|
34
31
|
uses: codacy/codacy-coverage-reporter-action@v1.3.0
|
|
35
32
|
with:
|
|
36
33
|
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
|
37
34
|
coverage-reports: coverage.xml
|
|
38
|
-
- name: Fail job if pytest failed
|
|
39
|
-
run: |
|
|
40
|
-
if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
|
|
41
|
-
echo "Pytest failed, failing job."
|
|
42
|
-
exit 1
|
|
43
|
-
fi
|
|
44
35
|
build:
|
|
45
36
|
name: build
|
|
46
37
|
runs-on: ubuntu-latest
|
|
@@ -5,6 +5,7 @@ on:
|
|
|
5
5
|
branches:
|
|
6
6
|
- main
|
|
7
7
|
workflow_dispatch:
|
|
8
|
+
pull_request:
|
|
8
9
|
|
|
9
10
|
permissions:
|
|
10
11
|
contents: read
|
|
@@ -13,7 +14,7 @@ permissions:
|
|
|
13
14
|
|
|
14
15
|
# Only have one deployment in progress at a time
|
|
15
16
|
concurrency:
|
|
16
|
-
group:
|
|
17
|
+
group: pages
|
|
17
18
|
cancel-in-progress: true
|
|
18
19
|
|
|
19
20
|
jobs:
|
|
@@ -32,6 +33,10 @@ jobs:
|
|
|
32
33
|
- name: Build MkDocs site
|
|
33
34
|
run: |
|
|
34
35
|
uv run mkdocs build
|
|
36
|
+
env:
|
|
37
|
+
# Force colored output from rich library
|
|
38
|
+
TTY_COMPATIBLE: '1'
|
|
39
|
+
TTY_INTERACTIVE: '0'
|
|
35
40
|
|
|
36
41
|
- name: Upload artifact
|
|
37
42
|
uses: actions/upload-pages-artifact@v3
|
|
@@ -42,6 +47,9 @@ jobs:
|
|
|
42
47
|
# Add a dependency to the build job
|
|
43
48
|
needs: build
|
|
44
49
|
|
|
50
|
+
# Only deploy on pushes to main or manual trigger of main branch
|
|
51
|
+
if: github.ref == 'refs/heads/main'
|
|
52
|
+
|
|
45
53
|
# Grant GITHUB_TOKEN the permissions required to make a Pages deployment
|
|
46
54
|
permissions:
|
|
47
55
|
pages: write # to deploy to Pages
|
|
@@ -73,4 +73,17 @@ venv.bak/
|
|
|
73
73
|
/docs/pdb_files/
|
|
74
74
|
/docs/density_filtered/
|
|
75
75
|
/site
|
|
76
|
-
/mysession/
|
|
76
|
+
/mysession/
|
|
77
|
+
# Paths generated in README.md examples
|
|
78
|
+
uniprot_accs.txt
|
|
79
|
+
pdbe.csv
|
|
80
|
+
alphafold.csv
|
|
81
|
+
emdbs.csv
|
|
82
|
+
interaction-partners-of-Q05471.txt
|
|
83
|
+
complexes.csv
|
|
84
|
+
downloads-af/
|
|
85
|
+
downloads-emdb/
|
|
86
|
+
downloads-pdbe/
|
|
87
|
+
filtered/
|
|
88
|
+
filtered-chains/
|
|
89
|
+
filtered-ss/
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
|
|
|
17
17
|
Requires-Dist: dask>=2025.5.1
|
|
18
18
|
Requires-Dist: distributed>=2025.5.1
|
|
19
19
|
Requires-Dist: gemmi>=0.7.3
|
|
20
|
+
Requires-Dist: platformdirs>=4.3.8
|
|
20
21
|
Requires-Dist: psutil>=7.0.0
|
|
21
22
|
Requires-Dist: rich-argparse>=1.7.1
|
|
22
23
|
Requires-Dist: rich>=14.0.0
|
|
@@ -47,6 +48,10 @@ It uses
|
|
|
47
48
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
48
49
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
49
50
|
|
|
51
|
+
The package is used by
|
|
52
|
+
|
|
53
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
54
|
+
|
|
50
55
|
An example workflow:
|
|
51
56
|
|
|
52
57
|
```mermaid
|
|
@@ -56,12 +61,14 @@ graph TB;
|
|
|
56
61
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
57
62
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
58
63
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
64
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
65
|
+
searchcomplexes[/Search complexes/]
|
|
59
66
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
60
67
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
61
68
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
62
|
-
fetchpdbe -->|
|
|
69
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
63
70
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
64
|
-
fetchad -->|
|
|
71
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
65
72
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
66
73
|
residuefilter --> |mmcif_files| ssfilter
|
|
67
74
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
@@ -69,6 +76,8 @@ graph TB;
|
|
|
69
76
|
taxonomy:::dashedBorder
|
|
70
77
|
searchemdb:::dashedBorder
|
|
71
78
|
fetchemdb:::dashedBorder
|
|
79
|
+
searchintactionpartners:::dashedBorder
|
|
80
|
+
searchcomplexes:::dashedBorder
|
|
72
81
|
```
|
|
73
82
|
|
|
74
83
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -90,6 +99,9 @@ The main entry point is the `protein-quest` command line tool which has multiple
|
|
|
90
99
|
|
|
91
100
|
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
92
101
|
|
|
102
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
103
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
104
|
+
|
|
93
105
|
### Search Uniprot accessions
|
|
94
106
|
|
|
95
107
|
```shell
|
|
@@ -204,6 +216,32 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
204
216
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
205
217
|
```
|
|
206
218
|
|
|
219
|
+
### Search for interaction partners
|
|
220
|
+
|
|
221
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
222
|
+
|
|
223
|
+
```shell
|
|
224
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
228
|
+
|
|
229
|
+
### Search for complexes
|
|
230
|
+
|
|
231
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
232
|
+
and return the complex entries and their members.
|
|
233
|
+
|
|
234
|
+
```shell
|
|
235
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The `complexes.csv` looks like
|
|
239
|
+
|
|
240
|
+
```csv
|
|
241
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
242
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
243
|
+
```
|
|
244
|
+
|
|
207
245
|
## Model Context Protocol (MCP) server
|
|
208
246
|
|
|
209
247
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -17,6 +17,10 @@ It uses
|
|
|
17
17
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
18
18
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
19
19
|
|
|
20
|
+
The package is used by
|
|
21
|
+
|
|
22
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
23
|
+
|
|
20
24
|
An example workflow:
|
|
21
25
|
|
|
22
26
|
```mermaid
|
|
@@ -26,12 +30,14 @@ graph TB;
|
|
|
26
30
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
27
31
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
28
32
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
33
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
34
|
+
searchcomplexes[/Search complexes/]
|
|
29
35
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
30
36
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
31
37
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
32
|
-
fetchpdbe -->|
|
|
38
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
33
39
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
34
|
-
fetchad -->|
|
|
40
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
35
41
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
36
42
|
residuefilter --> |mmcif_files| ssfilter
|
|
37
43
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
@@ -39,6 +45,8 @@ graph TB;
|
|
|
39
45
|
taxonomy:::dashedBorder
|
|
40
46
|
searchemdb:::dashedBorder
|
|
41
47
|
fetchemdb:::dashedBorder
|
|
48
|
+
searchintactionpartners:::dashedBorder
|
|
49
|
+
searchcomplexes:::dashedBorder
|
|
42
50
|
```
|
|
43
51
|
|
|
44
52
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -60,6 +68,9 @@ The main entry point is the `protein-quest` command line tool which has multiple
|
|
|
60
68
|
|
|
61
69
|
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
62
70
|
|
|
71
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
72
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
73
|
+
|
|
63
74
|
### Search Uniprot accessions
|
|
64
75
|
|
|
65
76
|
```shell
|
|
@@ -174,6 +185,32 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
174
185
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
175
186
|
```
|
|
176
187
|
|
|
188
|
+
### Search for interaction partners
|
|
189
|
+
|
|
190
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
191
|
+
|
|
192
|
+
```shell
|
|
193
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
197
|
+
|
|
198
|
+
### Search for complexes
|
|
199
|
+
|
|
200
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
201
|
+
and return the complex entries and their members.
|
|
202
|
+
|
|
203
|
+
```shell
|
|
204
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
The `complexes.csv` looks like
|
|
208
|
+
|
|
209
|
+
```csv
|
|
210
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
211
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
212
|
+
```
|
|
213
|
+
|
|
177
214
|
## Model Context Protocol (MCP) server
|
|
178
215
|
|
|
179
216
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
},
|
|
13
13
|
{
|
|
14
14
|
"cell_type": "code",
|
|
15
|
-
"execution_count":
|
|
15
|
+
"execution_count": 1,
|
|
16
16
|
"id": "85674583",
|
|
17
17
|
"metadata": {},
|
|
18
18
|
"outputs": [],
|
|
@@ -282,6 +282,99 @@
|
|
|
282
282
|
"first_uniprot = next(iter(uniprot_accessions.items()))\n",
|
|
283
283
|
"pprint(first_uniprot)"
|
|
284
284
|
]
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"cell_type": "markdown",
|
|
288
|
+
"id": "e32a95f8",
|
|
289
|
+
"metadata": {},
|
|
290
|
+
"source": [
|
|
291
|
+
"## Find interaction partners for uniprot entries"
|
|
292
|
+
]
|
|
293
|
+
},
|
|
294
|
+
{
|
|
295
|
+
"cell_type": "code",
|
|
296
|
+
"execution_count": 1,
|
|
297
|
+
"id": "d035c702",
|
|
298
|
+
"metadata": {},
|
|
299
|
+
"outputs": [],
|
|
300
|
+
"source": [
|
|
301
|
+
"from protein_quest.uniprot import search4interaction_partners, search4macromolecular_complexes"
|
|
302
|
+
]
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"cell_type": "code",
|
|
306
|
+
"execution_count": 2,
|
|
307
|
+
"id": "601c690a",
|
|
308
|
+
"metadata": {},
|
|
309
|
+
"outputs": [],
|
|
310
|
+
"source": [
|
|
311
|
+
"# Helicase SWR1 in yeast\n",
|
|
312
|
+
"uniprot_accession = \"Q05471\""
|
|
313
|
+
]
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
"cell_type": "code",
|
|
317
|
+
"execution_count": 3,
|
|
318
|
+
"id": "173c764d",
|
|
319
|
+
"metadata": {},
|
|
320
|
+
"outputs": [
|
|
321
|
+
{
|
|
322
|
+
"data": {
|
|
323
|
+
"text/plain": [
|
|
324
|
+
"{'Q12464': {'CPX-2122'},\n",
|
|
325
|
+
" 'P35817': {'CPX-2122'},\n",
|
|
326
|
+
" 'P80428': {'CPX-2122'},\n",
|
|
327
|
+
" 'Q12509': {'CPX-2122'},\n",
|
|
328
|
+
" 'Q03388': {'CPX-2122'},\n",
|
|
329
|
+
" 'P53201': {'CPX-2122'},\n",
|
|
330
|
+
" 'P53930': {'CPX-2122'},\n",
|
|
331
|
+
" 'P60010': {'CPX-2122'},\n",
|
|
332
|
+
" 'Q03433': {'CPX-2122'},\n",
|
|
333
|
+
" 'Q06707': {'CPX-2122'},\n",
|
|
334
|
+
" 'P38326': {'CPX-2122'},\n",
|
|
335
|
+
" 'P31376': {'CPX-2122'},\n",
|
|
336
|
+
" 'Q03940': {'CPX-2122'}}"
|
|
337
|
+
]
|
|
338
|
+
},
|
|
339
|
+
"execution_count": 3,
|
|
340
|
+
"metadata": {},
|
|
341
|
+
"output_type": "execute_result"
|
|
342
|
+
}
|
|
343
|
+
],
|
|
344
|
+
"source": [
|
|
345
|
+
"partners = search4interaction_partners(uniprot_accession, limit=100)\n",
|
|
346
|
+
"partners"
|
|
347
|
+
]
|
|
348
|
+
},
|
|
349
|
+
{
|
|
350
|
+
"cell_type": "markdown",
|
|
351
|
+
"id": "a763b6f8",
|
|
352
|
+
"metadata": {},
|
|
353
|
+
"source": [
|
|
354
|
+
"To get more information about the complex you can search for the complexes themselves with:"
|
|
355
|
+
]
|
|
356
|
+
},
|
|
357
|
+
{
|
|
358
|
+
"cell_type": "code",
|
|
359
|
+
"execution_count": 4,
|
|
360
|
+
"id": "236050ea",
|
|
361
|
+
"metadata": {},
|
|
362
|
+
"outputs": [
|
|
363
|
+
{
|
|
364
|
+
"data": {
|
|
365
|
+
"text/plain": [
|
|
366
|
+
"[ComplexPortalEntry(query_protein='Q05471', complex_id='CPX-2122', complex_url='https://www.ebi.ac.uk/complexportal/complex/CPX-2122', complex_title='Swr1 chromatin remodelling complex', members={'P35817', 'Q05471', 'Q12464', 'Q12509', 'Q06707', 'Q03433', 'P38326', 'P53201', 'Q03388', 'P53930', 'P80428', 'Q03940', 'P60010', 'P31376'})]"
|
|
367
|
+
]
|
|
368
|
+
},
|
|
369
|
+
"execution_count": 4,
|
|
370
|
+
"metadata": {},
|
|
371
|
+
"output_type": "execute_result"
|
|
372
|
+
}
|
|
373
|
+
],
|
|
374
|
+
"source": [
|
|
375
|
+
"complexes = search4macromolecular_complexes([uniprot_accession])\n",
|
|
376
|
+
"complexes"
|
|
377
|
+
]
|
|
285
378
|
}
|
|
286
379
|
],
|
|
287
380
|
"metadata": {
|
|
@@ -300,7 +393,7 @@
|
|
|
300
393
|
"name": "python",
|
|
301
394
|
"nbconvert_exporter": "python",
|
|
302
395
|
"pygments_lexer": "ipython3",
|
|
303
|
-
"version": "3.13.
|
|
396
|
+
"version": "3.13.5"
|
|
304
397
|
}
|
|
305
398
|
},
|
|
306
399
|
"nbformat": 4,
|
|
@@ -3,10 +3,6 @@ site_url: https://bonvinlab.org/protein_quest
|
|
|
3
3
|
repo_name: haddocking/protein-quest
|
|
4
4
|
repo_url: https://github.com/haddocking/protein-quest
|
|
5
5
|
watch: [mkdocs.yml, README.md, src/protein_quest]
|
|
6
|
-
exclude_docs: |
|
|
7
|
-
cli_doc_hook.py
|
|
8
|
-
hooks:
|
|
9
|
-
- docs/cli_doc_hook.py
|
|
10
6
|
use_directory_urls: false
|
|
11
7
|
theme:
|
|
12
8
|
name: material
|
|
@@ -61,6 +57,9 @@ plugins:
|
|
|
61
57
|
remove_tag_config:
|
|
62
58
|
remove_input_tags:
|
|
63
59
|
- hide_code
|
|
60
|
+
- mkdocs-rich-argparse:
|
|
61
|
+
module: protein_quest.cli
|
|
62
|
+
factory: make_parser
|
|
64
63
|
|
|
65
64
|
markdown_extensions:
|
|
66
65
|
# Use to render part of README as home
|
|
@@ -20,6 +20,7 @@ dependencies = [
|
|
|
20
20
|
"sparqlwrapper>=2.0.0",
|
|
21
21
|
"tqdm>=4.67.1",
|
|
22
22
|
"yarl>=1.20.1",
|
|
23
|
+
"platformdirs>=4.3.8",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
[project.urls]
|
|
@@ -52,10 +53,12 @@ dev = [
|
|
|
52
53
|
]
|
|
53
54
|
docs = [
|
|
54
55
|
"ipykernel>=6.29.5", # For notebook support in VS Code
|
|
56
|
+
"ipywidgets", # For tqdm support in notebooks
|
|
55
57
|
"mkdocs>=1.6.1",
|
|
56
58
|
"mkdocs-autoapi>=0.4.1",
|
|
57
59
|
"mkdocs-jupyter>=0.25.1",
|
|
58
60
|
"mkdocs-material>=9.6.14",
|
|
61
|
+
"mkdocs-rich-argparse>=0.1.2",
|
|
59
62
|
"mkdocstrings[python]>=0.29.1",
|
|
60
63
|
]
|
|
61
64
|
docs-type = [
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.5.0"
|
|
2
2
|
"""The version of the package."""
|
|
@@ -14,7 +14,7 @@ from yarl import URL
|
|
|
14
14
|
|
|
15
15
|
from protein_quest.alphafold.entry_summary import EntrySummary
|
|
16
16
|
from protein_quest.converter import converter
|
|
17
|
-
from protein_quest.utils import friendly_session, retrieve_files, run_async
|
|
17
|
+
from protein_quest.utils import Cacher, PassthroughCacher, friendly_session, retrieve_files, run_async
|
|
18
18
|
|
|
19
19
|
logger = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -104,7 +104,7 @@ class AlphaFoldEntry:
|
|
|
104
104
|
|
|
105
105
|
|
|
106
106
|
async def fetch_summary(
|
|
107
|
-
qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None
|
|
107
|
+
qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None, cacher: Cacher
|
|
108
108
|
) -> list[EntrySummary]:
|
|
109
109
|
"""Fetches a summary from the AlphaFold database for a given qualifier.
|
|
110
110
|
|
|
@@ -116,6 +116,7 @@ async def fetch_summary(
|
|
|
116
116
|
save_dir: An optional directory to save the fetched summary as a JSON file.
|
|
117
117
|
If set and summary exists then summary will be loaded from disk instead of being fetched from the API.
|
|
118
118
|
If not set then the summary will not be saved to disk and will always be fetched from the API.
|
|
119
|
+
cacher: A cacher to use for caching the fetched summary. Only used if save_dir is not None.
|
|
119
120
|
|
|
120
121
|
Returns:
|
|
121
122
|
A list of EntrySummary objects representing the fetched summary.
|
|
@@ -124,6 +125,11 @@ async def fetch_summary(
|
|
|
124
125
|
fn: AsyncPath | None = None
|
|
125
126
|
if save_dir is not None:
|
|
126
127
|
fn = AsyncPath(save_dir / f"{qualifier}.json")
|
|
128
|
+
cached_file = await cacher.copy_from_cache(Path(fn))
|
|
129
|
+
if cached_file is not None:
|
|
130
|
+
logger.debug(f"Using cached file {cached_file} for summary of {qualifier}.")
|
|
131
|
+
raw_data = await AsyncPath(cached_file).read_bytes()
|
|
132
|
+
return converter.loads(raw_data, list[EntrySummary])
|
|
127
133
|
if await fn.exists():
|
|
128
134
|
logger.debug(f"File {fn} already exists. Skipping download from {url}.")
|
|
129
135
|
raw_data = await fn.read_bytes()
|
|
@@ -133,18 +139,23 @@ async def fetch_summary(
|
|
|
133
139
|
raw_data = await response.content.read()
|
|
134
140
|
if fn is not None:
|
|
135
141
|
# TODO return fn and make it part of AlphaFoldEntry as summary_file prop
|
|
136
|
-
await
|
|
142
|
+
await cacher.write_bytes(Path(fn), raw_data)
|
|
137
143
|
return converter.loads(raw_data, list[EntrySummary])
|
|
138
144
|
|
|
139
145
|
|
|
140
146
|
async def fetch_summaries(
|
|
141
|
-
qualifiers: Iterable[str],
|
|
147
|
+
qualifiers: Iterable[str],
|
|
148
|
+
save_dir: Path | None = None,
|
|
149
|
+
max_parallel_downloads: int = 5,
|
|
150
|
+
cacher: Cacher | None = None,
|
|
142
151
|
) -> AsyncGenerator[EntrySummary]:
|
|
143
152
|
semaphore = Semaphore(max_parallel_downloads)
|
|
144
153
|
if save_dir is not None:
|
|
145
154
|
save_dir.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
if cacher is None:
|
|
156
|
+
cacher = PassthroughCacher()
|
|
146
157
|
async with friendly_session() as session:
|
|
147
|
-
tasks = [fetch_summary(qualifier, session, semaphore, save_dir) for qualifier in qualifiers]
|
|
158
|
+
tasks = [fetch_summary(qualifier, session, semaphore, save_dir, cacher) for qualifier in qualifiers]
|
|
148
159
|
summaries_per_qualifier: list[list[EntrySummary]] = await tqdm.gather(
|
|
149
160
|
*tasks, desc="Fetching Alphafold summaries"
|
|
150
161
|
)
|
|
@@ -154,7 +165,11 @@ async def fetch_summaries(
|
|
|
154
165
|
|
|
155
166
|
|
|
156
167
|
async def fetch_many_async(
|
|
157
|
-
uniprot_accessions: Iterable[str],
|
|
168
|
+
uniprot_accessions: Iterable[str],
|
|
169
|
+
save_dir: Path,
|
|
170
|
+
what: set[DownloadableFormat],
|
|
171
|
+
max_parallel_downloads: int = 5,
|
|
172
|
+
cacher: Cacher | None = None,
|
|
158
173
|
) -> AsyncGenerator[AlphaFoldEntry]:
|
|
159
174
|
"""Asynchronously fetches summaries and files from
|
|
160
175
|
[AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
|
|
@@ -164,15 +179,17 @@ async def fetch_many_async(
|
|
|
164
179
|
save_dir: The directory to save the fetched files to.
|
|
165
180
|
what: A set of formats to download.
|
|
166
181
|
max_parallel_downloads: The maximum number of parallel downloads.
|
|
182
|
+
cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
|
|
167
183
|
|
|
168
184
|
Yields:
|
|
169
185
|
A dataclass containing the summary, pdb file, and pae file.
|
|
170
186
|
"""
|
|
171
187
|
save_dir_for_summaries = save_dir if "summary" in what and save_dir is not None else None
|
|
188
|
+
|
|
172
189
|
summaries = [
|
|
173
190
|
s
|
|
174
191
|
async for s in fetch_summaries(
|
|
175
|
-
uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads
|
|
192
|
+
uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads, cacher=cacher
|
|
176
193
|
)
|
|
177
194
|
]
|
|
178
195
|
|
|
@@ -183,6 +200,7 @@ async def fetch_many_async(
|
|
|
183
200
|
save_dir,
|
|
184
201
|
desc="Downloading AlphaFold files",
|
|
185
202
|
max_parallel_downloads=max_parallel_downloads,
|
|
203
|
+
cacher=cacher,
|
|
186
204
|
)
|
|
187
205
|
for summary in summaries:
|
|
188
206
|
yield AlphaFoldEntry(
|
|
@@ -236,7 +254,11 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
|
|
|
236
254
|
|
|
237
255
|
|
|
238
256
|
def fetch_many(
|
|
239
|
-
ids: Iterable[str],
|
|
257
|
+
ids: Iterable[str],
|
|
258
|
+
save_dir: Path,
|
|
259
|
+
what: set[DownloadableFormat],
|
|
260
|
+
max_parallel_downloads: int = 5,
|
|
261
|
+
cacher: Cacher | None = None,
|
|
240
262
|
) -> list[AlphaFoldEntry]:
|
|
241
263
|
"""Synchronously fetches summaries and pdb and pae files from AlphaFold Protein Structure Database.
|
|
242
264
|
|
|
@@ -245,6 +267,7 @@ def fetch_many(
|
|
|
245
267
|
save_dir: The directory to save the fetched files to.
|
|
246
268
|
what: A set of formats to download.
|
|
247
269
|
max_parallel_downloads: The maximum number of parallel downloads.
|
|
270
|
+
cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
|
|
248
271
|
|
|
249
272
|
Returns:
|
|
250
273
|
A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
|
|
@@ -253,7 +276,9 @@ def fetch_many(
|
|
|
253
276
|
async def gather_entries():
|
|
254
277
|
return [
|
|
255
278
|
entry
|
|
256
|
-
async for entry in fetch_many_async(
|
|
279
|
+
async for entry in fetch_many_async(
|
|
280
|
+
ids, save_dir, what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
|
|
281
|
+
)
|
|
257
282
|
]
|
|
258
283
|
|
|
259
284
|
return run_async(gather_entries())
|