protein-quest 0.3.2__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/ci.yml +0 -9
- {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/pages.yml +9 -1
- {protein_quest-0.3.2 → protein_quest-0.8.0}/.gitignore +14 -1
- protein_quest-0.8.0/.python-version +1 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/PKG-INFO +104 -8
- {protein_quest-0.3.2 → protein_quest-0.8.0}/README.md +99 -5
- protein_quest-0.8.0/docs/notebooks/alphafold.ipynb +463 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/pdbe.ipynb +12 -8
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/uniprot.ipynb +96 -3
- {protein_quest-0.3.2 → protein_quest-0.8.0}/mkdocs.yml +3 -4
- {protein_quest-0.3.2 → protein_quest-0.8.0}/pyproject.toml +8 -2
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/__version__.py +1 -1
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/alphafold/confidence.py +2 -2
- protein_quest-0.8.0/src/protein_quest/alphafold/entry_summary.py +64 -0
- protein_quest-0.8.0/src/protein_quest/alphafold/fetch.py +534 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/cli.py +615 -130
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/converter.py +1 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/emdb.py +6 -3
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/filters.py +2 -5
- protein_quest-0.8.0/src/protein_quest/io.py +350 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/mcp_server.py +58 -13
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/pdbe/fetch.py +6 -3
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/ss.py +23 -7
- protein_quest-0.3.2/src/protein_quest/pdbe/io.py → protein_quest-0.8.0/src/protein_quest/structure.py +77 -126
- protein_quest-0.8.0/src/protein_quest/uniprot.py +975 -0
- protein_quest-0.8.0/src/protein_quest/utils.py +547 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +48 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +55567 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +51 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +42326 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +9431 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +9385 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/alphafold/test_confidence.py +3 -2
- protein_quest-0.8.0/tests/alphafold/test_entry_summary.py +16 -0
- protein_quest-0.8.0/tests/alphafold/test_fetch.py +301 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_pdbe.yaml +1023 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot.yaml +64 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot_details.yaml +87 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +66 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +145 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +66 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +382 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
- protein_quest-0.8.0/tests/conftest.py +18 -0
- protein_quest-0.8.0/tests/fixtures/2Y29.cif.gz +0 -0
- protein_quest-0.8.0/tests/test_cli.py +101 -0
- protein_quest-0.8.0/tests/test_converter.py +23 -0
- protein_quest-0.8.0/tests/test_io.py +230 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_mcp.py +3 -8
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_ss.py +8 -10
- protein_quest-0.8.0/tests/test_structure.py +116 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_uniprot.py +267 -3
- protein_quest-0.8.0/tests/test_utils.py +518 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/uv.lock +166 -63
- protein_quest-0.3.2/docs/cli_doc_hook.py +0 -113
- protein_quest-0.3.2/docs/notebooks/alphafold.ipynb +0 -384
- protein_quest-0.3.2/src/protein_quest/alphafold/entry_summary.py +0 -40
- protein_quest-0.3.2/src/protein_quest/alphafold/fetch.py +0 -288
- protein_quest-0.3.2/src/protein_quest/uniprot.py +0 -511
- protein_quest-0.3.2/src/protein_quest/utils.py +0 -167
- protein_quest-0.3.2/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -6289
- protein_quest-0.3.2/tests/alphafold/test_entry_summary.py +0 -12
- protein_quest-0.3.2/tests/alphafold/test_fetch.py +0 -20
- protein_quest-0.3.2/tests/pdbe/fixtures/2y29.cif +0 -940
- protein_quest-0.3.2/tests/pdbe/test_io.py +0 -142
- protein_quest-0.3.2/tests/test_cli.py +0 -14
- protein_quest-0.3.2/tests/test_utils.py +0 -31
- {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/pypi-publish.yml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/.vscode/extensions.json +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/CITATION.cff +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/CODE_OF_CONDUCT.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/LICENSE +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/.gitignore +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/index.md +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/protein-quest-mcp.png +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/alphafold/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/parallel.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/pdbe/__init__.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/py.typed +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/taxonomy.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/pdbe/test_fetch.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_emdb.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_go.py +0 -0
- {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_taxonomy.py +0 -0
|
@@ -27,20 +27,11 @@ jobs:
|
|
|
27
27
|
- name: Run tests
|
|
28
28
|
run: |
|
|
29
29
|
uv run pytest --cov --cov-report=xml
|
|
30
|
-
echo $? > pytest-exitcode
|
|
31
|
-
continue-on-error: true
|
|
32
|
-
# Always upload coverage, even if tests fail
|
|
33
30
|
- name: Run codacy-coverage-reporter
|
|
34
31
|
uses: codacy/codacy-coverage-reporter-action@v1.3.0
|
|
35
32
|
with:
|
|
36
33
|
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
|
37
34
|
coverage-reports: coverage.xml
|
|
38
|
-
- name: Fail job if pytest failed
|
|
39
|
-
run: |
|
|
40
|
-
if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
|
|
41
|
-
echo "Pytest failed, failing job."
|
|
42
|
-
exit 1
|
|
43
|
-
fi
|
|
44
35
|
build:
|
|
45
36
|
name: build
|
|
46
37
|
runs-on: ubuntu-latest
|
|
@@ -5,6 +5,7 @@ on:
|
|
|
5
5
|
branches:
|
|
6
6
|
- main
|
|
7
7
|
workflow_dispatch:
|
|
8
|
+
pull_request:
|
|
8
9
|
|
|
9
10
|
permissions:
|
|
10
11
|
contents: read
|
|
@@ -13,7 +14,7 @@ permissions:
|
|
|
13
14
|
|
|
14
15
|
# Only have one deployment in progress at a time
|
|
15
16
|
concurrency:
|
|
16
|
-
group:
|
|
17
|
+
group: pages
|
|
17
18
|
cancel-in-progress: true
|
|
18
19
|
|
|
19
20
|
jobs:
|
|
@@ -32,6 +33,10 @@ jobs:
|
|
|
32
33
|
- name: Build MkDocs site
|
|
33
34
|
run: |
|
|
34
35
|
uv run mkdocs build
|
|
36
|
+
env:
|
|
37
|
+
# Force colored output from rich library
|
|
38
|
+
TTY_COMPATIBLE: '1'
|
|
39
|
+
TTY_INTERACTIVE: '0'
|
|
35
40
|
|
|
36
41
|
- name: Upload artifact
|
|
37
42
|
uses: actions/upload-pages-artifact@v3
|
|
@@ -42,6 +47,9 @@ jobs:
|
|
|
42
47
|
# Add a dependency to the build job
|
|
43
48
|
needs: build
|
|
44
49
|
|
|
50
|
+
# Only deploy on pushes to main or manual trigger of main branch
|
|
51
|
+
if: github.ref == 'refs/heads/main'
|
|
52
|
+
|
|
45
53
|
# Grant GITHUB_TOKEN the permissions required to make a Pages deployment
|
|
46
54
|
permissions:
|
|
47
55
|
pages: write # to deploy to Pages
|
|
@@ -73,4 +73,17 @@ venv.bak/
|
|
|
73
73
|
/docs/pdb_files/
|
|
74
74
|
/docs/density_filtered/
|
|
75
75
|
/site
|
|
76
|
-
/mysession/
|
|
76
|
+
/mysession/
|
|
77
|
+
# Paths generated in README.md examples
|
|
78
|
+
uniprot_accs.txt
|
|
79
|
+
pdbe.csv
|
|
80
|
+
alphafold.csv
|
|
81
|
+
emdbs.csv
|
|
82
|
+
interaction-partners-of-Q05471.txt
|
|
83
|
+
complexes.csv
|
|
84
|
+
downloads-af/
|
|
85
|
+
downloads-emdb/
|
|
86
|
+
downloads-pdbe/
|
|
87
|
+
filtered/
|
|
88
|
+
filtered-chains/
|
|
89
|
+
filtered-ss/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -11,21 +11,23 @@ Requires-Python: >=3.13
|
|
|
11
11
|
Requires-Dist: aiofiles>=24.1.0
|
|
12
12
|
Requires-Dist: aiohttp-retry>=2.9.1
|
|
13
13
|
Requires-Dist: aiohttp[speedups]>=3.11.18
|
|
14
|
-
Requires-Dist: aiopath>=0.7.7
|
|
15
14
|
Requires-Dist: attrs>=25.3.0
|
|
16
15
|
Requires-Dist: cattrs[orjson]>=24.1.3
|
|
17
16
|
Requires-Dist: dask>=2025.5.1
|
|
18
17
|
Requires-Dist: distributed>=2025.5.1
|
|
19
18
|
Requires-Dist: gemmi>=0.7.3
|
|
19
|
+
Requires-Dist: mmcif>=0.92.0
|
|
20
|
+
Requires-Dist: platformdirs>=4.3.8
|
|
20
21
|
Requires-Dist: psutil>=7.0.0
|
|
21
22
|
Requires-Dist: rich-argparse>=1.7.1
|
|
22
23
|
Requires-Dist: rich>=14.0.0
|
|
24
|
+
Requires-Dist: shtab>=1.7.2
|
|
23
25
|
Requires-Dist: sparqlwrapper>=2.0.0
|
|
24
26
|
Requires-Dist: tqdm>=4.67.1
|
|
25
27
|
Requires-Dist: yarl>=1.20.1
|
|
26
28
|
Provides-Extra: mcp
|
|
27
29
|
Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
|
|
28
|
-
Requires-Dist: pydantic>=2.
|
|
30
|
+
Requires-Dist: pydantic>=2.12.0; extra == 'mcp'
|
|
29
31
|
Description-Content-Type: text/markdown
|
|
30
32
|
|
|
31
33
|
# protein-quest
|
|
@@ -47,6 +49,10 @@ It uses
|
|
|
47
49
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
48
50
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
49
51
|
|
|
52
|
+
The package is used by
|
|
53
|
+
|
|
54
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
55
|
+
|
|
50
56
|
An example workflow:
|
|
51
57
|
|
|
52
58
|
```mermaid
|
|
@@ -56,19 +62,29 @@ graph TB;
|
|
|
56
62
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
57
63
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
58
64
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
65
|
+
searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
|
|
66
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
67
|
+
searchcomplexes[/Search complexes/]
|
|
59
68
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
60
69
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
61
70
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
62
|
-
fetchpdbe -->|
|
|
71
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
63
72
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
64
|
-
fetchad -->|
|
|
73
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
65
74
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
66
75
|
residuefilter --> |mmcif_files| ssfilter
|
|
76
|
+
ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
|
|
77
|
+
ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
|
|
67
78
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
68
79
|
goterm:::dashedBorder
|
|
69
80
|
taxonomy:::dashedBorder
|
|
70
81
|
searchemdb:::dashedBorder
|
|
71
82
|
fetchemdb:::dashedBorder
|
|
83
|
+
searchintactionpartners:::dashedBorder
|
|
84
|
+
searchcomplexes:::dashedBorder
|
|
85
|
+
searchuniprotdetails:::dashedBorder
|
|
86
|
+
convert2cif:::dashedBorder
|
|
87
|
+
convert2uniprot_accessions:::dashedBorder
|
|
72
88
|
```
|
|
73
89
|
|
|
74
90
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -90,13 +106,16 @@ The main entry point is the `protein-quest` command line tool which has multiple
|
|
|
90
106
|
|
|
91
107
|
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
92
108
|
|
|
109
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
110
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
111
|
+
|
|
93
112
|
### Search Uniprot accessions
|
|
94
113
|
|
|
95
114
|
```shell
|
|
96
115
|
protein-quest search uniprot \
|
|
97
116
|
--taxon-id 9606 \
|
|
98
117
|
--reviewed \
|
|
99
|
-
--subcellular-location-uniprot nucleus \
|
|
118
|
+
--subcellular-location-uniprot "nucleus" \
|
|
100
119
|
--subcellular-location-go GO:0005634 \
|
|
101
120
|
--molecular-function-go GO:0003677 \
|
|
102
121
|
--limit 100 \
|
|
@@ -136,7 +155,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
|
|
|
136
155
|
protein-quest retrieve alphafold alphafold.csv downloads-af/
|
|
137
156
|
```
|
|
138
157
|
|
|
139
|
-
For each entry downloads the
|
|
158
|
+
For each entry downloads the cif file.
|
|
140
159
|
|
|
141
160
|
### To retrieve EMDB volume files
|
|
142
161
|
|
|
@@ -179,7 +198,7 @@ protein-quest filter residue \
|
|
|
179
198
|
|
|
180
199
|
### To filter on secondary structure
|
|
181
200
|
|
|
182
|
-
To filter on structure being mostly alpha helices and have no beta sheets.
|
|
201
|
+
To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
|
|
183
202
|
|
|
184
203
|
```shell
|
|
185
204
|
protein-quest filter secondary-structure \
|
|
@@ -204,6 +223,63 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
204
223
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
205
224
|
```
|
|
206
225
|
|
|
226
|
+
### Search for interaction partners
|
|
227
|
+
|
|
228
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
229
|
+
|
|
230
|
+
```shell
|
|
231
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
235
|
+
|
|
236
|
+
### Search for complexes
|
|
237
|
+
|
|
238
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
239
|
+
and return the complex entries and their members.
|
|
240
|
+
|
|
241
|
+
```shell
|
|
242
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
The `complexes.csv` looks like
|
|
246
|
+
|
|
247
|
+
```csv
|
|
248
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
249
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Search for UniProt details
|
|
253
|
+
|
|
254
|
+
To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
|
|
255
|
+
|
|
256
|
+
```shell
|
|
257
|
+
protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
The `uniprot_details.csv` looks like:
|
|
261
|
+
|
|
262
|
+
```csv
|
|
263
|
+
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
264
|
+
A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Convert structure files to .cif format
|
|
268
|
+
|
|
269
|
+
Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
|
|
270
|
+
|
|
271
|
+
```shell
|
|
272
|
+
protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### Convert structure files to UniProt accessions
|
|
276
|
+
|
|
277
|
+
After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
|
|
278
|
+
|
|
279
|
+
```shell
|
|
280
|
+
protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
|
|
281
|
+
```
|
|
282
|
+
|
|
207
283
|
## Model Context Protocol (MCP) server
|
|
208
284
|
|
|
209
285
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -224,6 +300,26 @@ protein-quest mcp
|
|
|
224
300
|
|
|
225
301
|
The mcp server contains an prompt template to search/retrieve/filter candidate structures.
|
|
226
302
|
|
|
303
|
+
## Shell autocompletion
|
|
304
|
+
|
|
305
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
|
|
306
|
+
|
|
307
|
+
Initialize for bash shell with:
|
|
308
|
+
|
|
309
|
+
```shell
|
|
310
|
+
mkdir -p ~/.local/share/bash-completion/completions
|
|
311
|
+
protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Initialize for zsh shell with:
|
|
315
|
+
|
|
316
|
+
```shell
|
|
317
|
+
mkdir -p ~/.local/share/zsh/site-functions
|
|
318
|
+
protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
|
|
319
|
+
fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
|
|
320
|
+
autoload -Uz compinit && compinit
|
|
321
|
+
```
|
|
322
|
+
|
|
227
323
|
## Contributing
|
|
228
324
|
|
|
229
325
|
For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
@@ -17,6 +17,10 @@ It uses
|
|
|
17
17
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
18
18
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
19
19
|
|
|
20
|
+
The package is used by
|
|
21
|
+
|
|
22
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
23
|
+
|
|
20
24
|
An example workflow:
|
|
21
25
|
|
|
22
26
|
```mermaid
|
|
@@ -26,19 +30,29 @@ graph TB;
|
|
|
26
30
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
27
31
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
28
32
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
33
|
+
searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
|
|
34
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
35
|
+
searchcomplexes[/Search complexes/]
|
|
29
36
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
30
37
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
31
38
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
32
|
-
fetchpdbe -->|
|
|
39
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
33
40
|
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
34
|
-
fetchad -->|
|
|
41
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
35
42
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
36
43
|
residuefilter --> |mmcif_files| ssfilter
|
|
44
|
+
ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
|
|
45
|
+
ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
|
|
37
46
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
38
47
|
goterm:::dashedBorder
|
|
39
48
|
taxonomy:::dashedBorder
|
|
40
49
|
searchemdb:::dashedBorder
|
|
41
50
|
fetchemdb:::dashedBorder
|
|
51
|
+
searchintactionpartners:::dashedBorder
|
|
52
|
+
searchcomplexes:::dashedBorder
|
|
53
|
+
searchuniprotdetails:::dashedBorder
|
|
54
|
+
convert2cif:::dashedBorder
|
|
55
|
+
convert2uniprot_accessions:::dashedBorder
|
|
42
56
|
```
|
|
43
57
|
|
|
44
58
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -60,13 +74,16 @@ The main entry point is the `protein-quest` command line tool which has multiple
|
|
|
60
74
|
|
|
61
75
|
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
62
76
|
|
|
77
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
78
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
79
|
+
|
|
63
80
|
### Search Uniprot accessions
|
|
64
81
|
|
|
65
82
|
```shell
|
|
66
83
|
protein-quest search uniprot \
|
|
67
84
|
--taxon-id 9606 \
|
|
68
85
|
--reviewed \
|
|
69
|
-
--subcellular-location-uniprot nucleus \
|
|
86
|
+
--subcellular-location-uniprot "nucleus" \
|
|
70
87
|
--subcellular-location-go GO:0005634 \
|
|
71
88
|
--molecular-function-go GO:0003677 \
|
|
72
89
|
--limit 100 \
|
|
@@ -106,7 +123,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
|
|
|
106
123
|
protein-quest retrieve alphafold alphafold.csv downloads-af/
|
|
107
124
|
```
|
|
108
125
|
|
|
109
|
-
For each entry downloads the
|
|
126
|
+
For each entry downloads the cif file.
|
|
110
127
|
|
|
111
128
|
### To retrieve EMDB volume files
|
|
112
129
|
|
|
@@ -149,7 +166,7 @@ protein-quest filter residue \
|
|
|
149
166
|
|
|
150
167
|
### To filter on secondary structure
|
|
151
168
|
|
|
152
|
-
To filter on structure being mostly alpha helices and have no beta sheets.
|
|
169
|
+
To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
|
|
153
170
|
|
|
154
171
|
```shell
|
|
155
172
|
protein-quest filter secondary-structure \
|
|
@@ -174,6 +191,63 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
174
191
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
175
192
|
```
|
|
176
193
|
|
|
194
|
+
### Search for interaction partners
|
|
195
|
+
|
|
196
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
197
|
+
|
|
198
|
+
```shell
|
|
199
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
203
|
+
|
|
204
|
+
### Search for complexes
|
|
205
|
+
|
|
206
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
207
|
+
and return the complex entries and their members.
|
|
208
|
+
|
|
209
|
+
```shell
|
|
210
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
The `complexes.csv` looks like
|
|
214
|
+
|
|
215
|
+
```csv
|
|
216
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
217
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Search for UniProt details
|
|
221
|
+
|
|
222
|
+
To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
|
|
223
|
+
|
|
224
|
+
```shell
|
|
225
|
+
protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
The `uniprot_details.csv` looks like:
|
|
229
|
+
|
|
230
|
+
```csv
|
|
231
|
+
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
232
|
+
A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Convert structure files to .cif format
|
|
236
|
+
|
|
237
|
+
Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
|
|
238
|
+
|
|
239
|
+
```shell
|
|
240
|
+
protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Convert structure files to UniProt accessions
|
|
244
|
+
|
|
245
|
+
After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
|
|
246
|
+
|
|
247
|
+
```shell
|
|
248
|
+
protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
|
|
249
|
+
```
|
|
250
|
+
|
|
177
251
|
## Model Context Protocol (MCP) server
|
|
178
252
|
|
|
179
253
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -194,6 +268,26 @@ protein-quest mcp
|
|
|
194
268
|
|
|
195
269
|
The mcp server contains an prompt template to search/retrieve/filter candidate structures.
|
|
196
270
|
|
|
271
|
+
## Shell autocompletion
|
|
272
|
+
|
|
273
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
|
|
274
|
+
|
|
275
|
+
Initialize for bash shell with:
|
|
276
|
+
|
|
277
|
+
```shell
|
|
278
|
+
mkdir -p ~/.local/share/bash-completion/completions
|
|
279
|
+
protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Initialize for zsh shell with:
|
|
283
|
+
|
|
284
|
+
```shell
|
|
285
|
+
mkdir -p ~/.local/share/zsh/site-functions
|
|
286
|
+
protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
|
|
287
|
+
fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
|
|
288
|
+
autoload -Uz compinit && compinit
|
|
289
|
+
```
|
|
290
|
+
|
|
197
291
|
## Contributing
|
|
198
292
|
|
|
199
293
|
For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
|