protein-quest 0.3.0__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/ci.yml +16 -10
- {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/pages.yml +9 -1
- {protein_quest-0.3.0 → protein_quest-0.8.0}/.gitignore +14 -1
- protein_quest-0.8.0/.python-version +1 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/CITATION.cff +1 -2
- {protein_quest-0.3.0 → protein_quest-0.8.0}/CONTRIBUTING.md +15 -1
- {protein_quest-0.3.0 → protein_quest-0.8.0}/PKG-INFO +120 -14
- {protein_quest-0.3.0 → protein_quest-0.8.0}/README.md +115 -8
- protein_quest-0.8.0/docs/notebooks/.gitignore +4 -0
- protein_quest-0.8.0/docs/notebooks/alphafold.ipynb +463 -0
- protein_quest-0.8.0/docs/notebooks/index.md +3 -0
- protein_quest-0.8.0/docs/notebooks/pdbe.ipynb +282 -0
- protein_quest-0.8.0/docs/notebooks/uniprot.ipynb +401 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/mkdocs.yml +16 -8
- {protein_quest-0.3.0 → protein_quest-0.8.0}/pyproject.toml +20 -18
- protein_quest-0.8.0/src/protein_quest/__version__.py +2 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/alphafold/confidence.py +46 -19
- protein_quest-0.8.0/src/protein_quest/alphafold/entry_summary.py +64 -0
- protein_quest-0.8.0/src/protein_quest/alphafold/fetch.py +534 -0
- protein_quest-0.8.0/src/protein_quest/cli.py +1424 -0
- protein_quest-0.8.0/src/protein_quest/converter.py +46 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/emdb.py +6 -3
- protein_quest-0.8.0/src/protein_quest/filters.py +147 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/go.py +1 -4
- protein_quest-0.8.0/src/protein_quest/io.py +350 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/mcp_server.py +64 -16
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/parallel.py +37 -1
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/pdbe/fetch.py +20 -3
- protein_quest-0.8.0/src/protein_quest/ss.py +280 -0
- protein_quest-0.8.0/src/protein_quest/structure.py +232 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/taxonomy.py +13 -3
- protein_quest-0.8.0/src/protein_quest/uniprot.py +975 -0
- protein_quest-0.8.0/src/protein_quest/utils.py +547 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +48 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +55567 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +51 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +42326 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +9431 -0
- protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +9385 -0
- protein_quest-0.8.0/tests/alphafold/test_confidence.py +156 -0
- protein_quest-0.8.0/tests/alphafold/test_entry_summary.py +16 -0
- protein_quest-0.8.0/tests/alphafold/test_fetch.py +301 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_pdbe.yaml +1023 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot.yaml +64 -0
- protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot_details.yaml +87 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +66 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +145 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +66 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +62 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +382 -0
- protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
- protein_quest-0.8.0/tests/conftest.py +18 -0
- protein_quest-0.8.0/tests/fixtures/2Y29.cif.gz +0 -0
- protein_quest-0.8.0/tests/fixtures/3JRS_B2A.cif.gz +0 -0
- protein_quest-0.8.0/tests/pdbe/test_fetch.py +29 -0
- protein_quest-0.8.0/tests/test_cli.py +101 -0
- protein_quest-0.8.0/tests/test_converter.py +23 -0
- protein_quest-0.8.0/tests/test_io.py +230 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_mcp.py +3 -8
- protein_quest-0.8.0/tests/test_ss.py +225 -0
- protein_quest-0.8.0/tests/test_structure.py +116 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_uniprot.py +267 -3
- protein_quest-0.8.0/tests/test_utils.py +518 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/uv.lock +198 -714
- protein_quest-0.3.0/docs/cli_doc_hook.py +0 -113
- protein_quest-0.3.0/src/protein_quest/__version__.py +0 -1
- protein_quest-0.3.0/src/protein_quest/alphafold/entry_summary.py +0 -38
- protein_quest-0.3.0/src/protein_quest/alphafold/fetch.py +0 -314
- protein_quest-0.3.0/src/protein_quest/cli.py +0 -782
- protein_quest-0.3.0/src/protein_quest/filters.py +0 -107
- protein_quest-0.3.0/src/protein_quest/pdbe/io.py +0 -185
- protein_quest-0.3.0/src/protein_quest/uniprot.py +0 -511
- protein_quest-0.3.0/src/protein_quest/utils.py +0 -105
- protein_quest-0.3.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -6289
- protein_quest-0.3.0/tests/alphafold/test_confidence.py +0 -63
- protein_quest-0.3.0/tests/alphafold/test_entry_summary.py +0 -15
- protein_quest-0.3.0/tests/alphafold/test_fetch.py +0 -20
- protein_quest-0.3.0/tests/pdbe/fixtures/2y29.cif +0 -940
- protein_quest-0.3.0/tests/pdbe/test_fetch.py +0 -17
- protein_quest-0.3.0/tests/pdbe/test_io.py +0 -81
- protein_quest-0.3.0/tests/test_cli.py +0 -14
- {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/pypi-publish.yml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/.vscode/extensions.json +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/CODE_OF_CONDUCT.md +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/LICENSE +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/CONTRIBUTING.md +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/index.md +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/protein-quest-mcp.png +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/__init__.py +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/alphafold/__init__.py +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/pdbe/__init__.py +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/py.typed +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_emdb.py +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_go.py +0 -0
- {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_taxonomy.py +0 -0
|
@@ -3,7 +3,7 @@ name: CI
|
|
|
3
3
|
on:
|
|
4
4
|
push:
|
|
5
5
|
branches:
|
|
6
|
-
|
|
6
|
+
- main
|
|
7
7
|
pull_request:
|
|
8
8
|
|
|
9
9
|
concurrency:
|
|
@@ -27,20 +27,11 @@ jobs:
|
|
|
27
27
|
- name: Run tests
|
|
28
28
|
run: |
|
|
29
29
|
uv run pytest --cov --cov-report=xml
|
|
30
|
-
echo $? > pytest-exitcode
|
|
31
|
-
continue-on-error: true
|
|
32
|
-
# Always upload coverage, even if tests fail
|
|
33
30
|
- name: Run codacy-coverage-reporter
|
|
34
31
|
uses: codacy/codacy-coverage-reporter-action@v1.3.0
|
|
35
32
|
with:
|
|
36
33
|
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
|
|
37
34
|
coverage-reports: coverage.xml
|
|
38
|
-
- name: Fail job if pytest failed
|
|
39
|
-
run: |
|
|
40
|
-
if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
|
|
41
|
-
echo "Pytest failed, failing job."
|
|
42
|
-
exit 1
|
|
43
|
-
fi
|
|
44
35
|
build:
|
|
45
36
|
name: build
|
|
46
37
|
runs-on: ubuntu-latest
|
|
@@ -70,3 +61,18 @@ jobs:
|
|
|
70
61
|
run: uv sync --locked --dev --extra mcp
|
|
71
62
|
- name: Run type checkers
|
|
72
63
|
run: uv run pyrefly check src tests
|
|
64
|
+
typing-docs:
|
|
65
|
+
name: typing-docs
|
|
66
|
+
runs-on: ubuntu-latest
|
|
67
|
+
steps:
|
|
68
|
+
- uses: actions/checkout@v4
|
|
69
|
+
- name: Install uv
|
|
70
|
+
uses: astral-sh/setup-uv@v6
|
|
71
|
+
- name: Install the project
|
|
72
|
+
run: uv sync --group docs-type
|
|
73
|
+
- name: Convert notebooks to Python scripts
|
|
74
|
+
run: |
|
|
75
|
+
find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
|
|
76
|
+
- name: Run type checkers on docs
|
|
77
|
+
run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
|
|
78
|
+
|
|
@@ -5,6 +5,7 @@ on:
|
|
|
5
5
|
branches:
|
|
6
6
|
- main
|
|
7
7
|
workflow_dispatch:
|
|
8
|
+
pull_request:
|
|
8
9
|
|
|
9
10
|
permissions:
|
|
10
11
|
contents: read
|
|
@@ -13,7 +14,7 @@ permissions:
|
|
|
13
14
|
|
|
14
15
|
# Only have one deployment in progress at a time
|
|
15
16
|
concurrency:
|
|
16
|
-
group:
|
|
17
|
+
group: pages
|
|
17
18
|
cancel-in-progress: true
|
|
18
19
|
|
|
19
20
|
jobs:
|
|
@@ -32,6 +33,10 @@ jobs:
|
|
|
32
33
|
- name: Build MkDocs site
|
|
33
34
|
run: |
|
|
34
35
|
uv run mkdocs build
|
|
36
|
+
env:
|
|
37
|
+
# Force colored output from rich library
|
|
38
|
+
TTY_COMPATIBLE: '1'
|
|
39
|
+
TTY_INTERACTIVE: '0'
|
|
35
40
|
|
|
36
41
|
- name: Upload artifact
|
|
37
42
|
uses: actions/upload-pages-artifact@v3
|
|
@@ -42,6 +47,9 @@ jobs:
|
|
|
42
47
|
# Add a dependency to the build job
|
|
43
48
|
needs: build
|
|
44
49
|
|
|
50
|
+
# Only deploy on pushes to main or manual trigger of main branch
|
|
51
|
+
if: github.ref == 'refs/heads/main'
|
|
52
|
+
|
|
45
53
|
# Grant GITHUB_TOKEN the permissions required to make a Pages deployment
|
|
46
54
|
permissions:
|
|
47
55
|
pages: write # to deploy to Pages
|
|
@@ -73,4 +73,17 @@ venv.bak/
|
|
|
73
73
|
/docs/pdb_files/
|
|
74
74
|
/docs/density_filtered/
|
|
75
75
|
/site
|
|
76
|
-
/mysession/
|
|
76
|
+
/mysession/
|
|
77
|
+
# Paths generated in README.md examples
|
|
78
|
+
uniprot_accs.txt
|
|
79
|
+
pdbe.csv
|
|
80
|
+
alphafold.csv
|
|
81
|
+
emdbs.csv
|
|
82
|
+
interaction-partners-of-Q05471.txt
|
|
83
|
+
complexes.csv
|
|
84
|
+
downloads-af/
|
|
85
|
+
downloads-emdb/
|
|
86
|
+
downloads-pdbe/
|
|
87
|
+
filtered/
|
|
88
|
+
filtered-chains/
|
|
89
|
+
filtered-ss/
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -82,9 +82,23 @@ uv run mkdocs build
|
|
|
82
82
|
python3 -m http.server -d site
|
|
83
83
|
```
|
|
84
84
|
|
|
85
|
+
<details>
|
|
86
|
+
<summary>Type checking notebooks</summary>
|
|
87
|
+
|
|
88
|
+
[Pyrefly](https://pyrefly.org/) does not support notebooks yet, so we need to convert them to python scripts and then run pyrefly on them.
|
|
89
|
+
|
|
90
|
+
```shell
|
|
91
|
+
find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
|
|
92
|
+
uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
|
|
93
|
+
rm docs/notebooks/*.ipynb.py
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
</details>
|
|
97
|
+
|
|
98
|
+
|
|
85
99
|
## Contributing to tests
|
|
86
100
|
|
|
87
|
-
The code coverage
|
|
101
|
+
The code coverage is stored at [https://app.codacy.com/gh/haddocking/protein-quest/coverage](https://app.codacy.com/gh/haddocking/protein-quest/coverage) .
|
|
88
102
|
|
|
89
103
|
The search functions of the protein-quest package talk to web services on the Internet.
|
|
90
104
|
To have fast tests we use [pytest-recording](https://github.com/kiwicom/pytest-recording) to record and replay HTTP interactions.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -11,24 +11,23 @@ Requires-Python: >=3.13
|
|
|
11
11
|
Requires-Dist: aiofiles>=24.1.0
|
|
12
12
|
Requires-Dist: aiohttp-retry>=2.9.1
|
|
13
13
|
Requires-Dist: aiohttp[speedups]>=3.11.18
|
|
14
|
-
Requires-Dist: aiopath>=0.7.7
|
|
15
14
|
Requires-Dist: attrs>=25.3.0
|
|
16
|
-
Requires-Dist: bokeh>=3.7.3
|
|
17
15
|
Requires-Dist: cattrs[orjson]>=24.1.3
|
|
18
16
|
Requires-Dist: dask>=2025.5.1
|
|
19
17
|
Requires-Dist: distributed>=2025.5.1
|
|
20
18
|
Requires-Dist: gemmi>=0.7.3
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: pandas>=2.3.0
|
|
19
|
+
Requires-Dist: mmcif>=0.92.0
|
|
23
20
|
Requires-Dist: platformdirs>=4.3.8
|
|
24
21
|
Requires-Dist: psutil>=7.0.0
|
|
25
22
|
Requires-Dist: rich-argparse>=1.7.1
|
|
26
23
|
Requires-Dist: rich>=14.0.0
|
|
24
|
+
Requires-Dist: shtab>=1.7.2
|
|
27
25
|
Requires-Dist: sparqlwrapper>=2.0.0
|
|
28
26
|
Requires-Dist: tqdm>=4.67.1
|
|
27
|
+
Requires-Dist: yarl>=1.20.1
|
|
29
28
|
Provides-Extra: mcp
|
|
30
29
|
Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
|
|
31
|
-
Requires-Dist: pydantic>=2.
|
|
30
|
+
Requires-Dist: pydantic>=2.12.0; extra == 'mcp'
|
|
32
31
|
Description-Content-Type: text/markdown
|
|
33
32
|
|
|
34
33
|
# protein-quest
|
|
@@ -37,8 +36,7 @@ Description-Content-Type: text/markdown
|
|
|
37
36
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
38
37
|
[](https://www.research-software.nl/software/protein-quest)
|
|
39
38
|
[](https://pypi.org/project/protein-quest/)
|
|
40
|
-
|
|
41
|
-
[](https://doi.org/10.5281/zenodo.15632658)
|
|
39
|
+
[](https://doi.org/10.5281/zenodo.16941288)
|
|
42
40
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
43
41
|
|
|
44
42
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
@@ -51,6 +49,10 @@ It uses
|
|
|
51
49
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
52
50
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
53
51
|
|
|
52
|
+
The package is used by
|
|
53
|
+
|
|
54
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
55
|
+
|
|
54
56
|
An example workflow:
|
|
55
57
|
|
|
56
58
|
```mermaid
|
|
@@ -60,17 +62,29 @@ graph TB;
|
|
|
60
62
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
61
63
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
62
64
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
65
|
+
searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
|
|
66
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
67
|
+
searchcomplexes[/Search complexes/]
|
|
63
68
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
64
69
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
65
70
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
66
|
-
fetchpdbe -->|
|
|
67
|
-
chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
|
|
68
|
-
fetchad -->|
|
|
71
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
72
|
+
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
73
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
74
|
+
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
75
|
+
residuefilter --> |mmcif_files| ssfilter
|
|
76
|
+
ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
|
|
77
|
+
ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
|
|
69
78
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
70
79
|
goterm:::dashedBorder
|
|
71
80
|
taxonomy:::dashedBorder
|
|
72
81
|
searchemdb:::dashedBorder
|
|
73
82
|
fetchemdb:::dashedBorder
|
|
83
|
+
searchintactionpartners:::dashedBorder
|
|
84
|
+
searchcomplexes:::dashedBorder
|
|
85
|
+
searchuniprotdetails:::dashedBorder
|
|
86
|
+
convert2cif:::dashedBorder
|
|
87
|
+
convert2uniprot_accessions:::dashedBorder
|
|
74
88
|
```
|
|
75
89
|
|
|
76
90
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -90,7 +104,10 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
90
104
|
|
|
91
105
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
92
106
|
|
|
93
|
-
To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
107
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
108
|
+
|
|
109
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
110
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
94
111
|
|
|
95
112
|
### Search Uniprot accessions
|
|
96
113
|
|
|
@@ -98,7 +115,7 @@ To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein
|
|
|
98
115
|
protein-quest search uniprot \
|
|
99
116
|
--taxon-id 9606 \
|
|
100
117
|
--reviewed \
|
|
101
|
-
--subcellular-location-uniprot nucleus \
|
|
118
|
+
--subcellular-location-uniprot "nucleus" \
|
|
102
119
|
--subcellular-location-go GO:0005634 \
|
|
103
120
|
--molecular-function-go GO:0003677 \
|
|
104
121
|
--limit 100 \
|
|
@@ -138,7 +155,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
|
|
|
138
155
|
protein-quest retrieve alphafold alphafold.csv downloads-af/
|
|
139
156
|
```
|
|
140
157
|
|
|
141
|
-
For each entry downloads the
|
|
158
|
+
For each entry downloads the cif file.
|
|
142
159
|
|
|
143
160
|
### To retrieve EMDB volume files
|
|
144
161
|
|
|
@@ -179,6 +196,18 @@ protein-quest filter residue \
|
|
|
179
196
|
./filtered-chains ./filtered
|
|
180
197
|
```
|
|
181
198
|
|
|
199
|
+
### To filter on secondary structure
|
|
200
|
+
|
|
201
|
+
To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
|
|
202
|
+
|
|
203
|
+
```shell
|
|
204
|
+
protein-quest filter secondary-structure \
|
|
205
|
+
--ratio-min-helix-residues 0.5 \
|
|
206
|
+
--ratio-max-sheet-residues 0.0 \
|
|
207
|
+
--write-stats filtered-ss/stats.csv \
|
|
208
|
+
./filtered-chains ./filtered-ss
|
|
209
|
+
```
|
|
210
|
+
|
|
182
211
|
### Search Taxonomy
|
|
183
212
|
|
|
184
213
|
```shell
|
|
@@ -194,6 +223,63 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
194
223
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
195
224
|
```
|
|
196
225
|
|
|
226
|
+
### Search for interaction partners
|
|
227
|
+
|
|
228
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
229
|
+
|
|
230
|
+
```shell
|
|
231
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
235
|
+
|
|
236
|
+
### Search for complexes
|
|
237
|
+
|
|
238
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
239
|
+
and return the complex entries and their members.
|
|
240
|
+
|
|
241
|
+
```shell
|
|
242
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
The `complexes.csv` looks like
|
|
246
|
+
|
|
247
|
+
```csv
|
|
248
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
249
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
### Search for UniProt details
|
|
253
|
+
|
|
254
|
+
To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
|
|
255
|
+
|
|
256
|
+
```shell
|
|
257
|
+
protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
The `uniprot_details.csv` looks like:
|
|
261
|
+
|
|
262
|
+
```csv
|
|
263
|
+
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
264
|
+
A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
### Convert structure files to .cif format
|
|
268
|
+
|
|
269
|
+
Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
|
|
270
|
+
|
|
271
|
+
```shell
|
|
272
|
+
protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
### Convert structure files to UniProt accessions
|
|
276
|
+
|
|
277
|
+
After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
|
|
278
|
+
|
|
279
|
+
```shell
|
|
280
|
+
protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
|
|
281
|
+
```
|
|
282
|
+
|
|
197
283
|
## Model Context Protocol (MCP) server
|
|
198
284
|
|
|
199
285
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -214,6 +300,26 @@ protein-quest mcp
|
|
|
214
300
|
|
|
215
301
|
The mcp server contains an prompt template to search/retrieve/filter candidate structures.
|
|
216
302
|
|
|
303
|
+
## Shell autocompletion
|
|
304
|
+
|
|
305
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
|
|
306
|
+
|
|
307
|
+
Initialize for bash shell with:
|
|
308
|
+
|
|
309
|
+
```shell
|
|
310
|
+
mkdir -p ~/.local/share/bash-completion/completions
|
|
311
|
+
protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Initialize for zsh shell with:
|
|
315
|
+
|
|
316
|
+
```shell
|
|
317
|
+
mkdir -p ~/.local/share/zsh/site-functions
|
|
318
|
+
protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
|
|
319
|
+
fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
|
|
320
|
+
autoload -Uz compinit && compinit
|
|
321
|
+
```
|
|
322
|
+
|
|
217
323
|
## Contributing
|
|
218
324
|
|
|
219
325
|
For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
@@ -4,8 +4,7 @@
|
|
|
4
4
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
5
5
|
[](https://www.research-software.nl/software/protein-quest)
|
|
6
6
|
[](https://pypi.org/project/protein-quest/)
|
|
7
|
-
|
|
8
|
-
[](https://doi.org/10.5281/zenodo.15632658)
|
|
7
|
+
[](https://doi.org/10.5281/zenodo.16941288)
|
|
9
8
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
10
9
|
|
|
11
10
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
@@ -18,6 +17,10 @@ It uses
|
|
|
18
17
|
- [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
|
|
19
18
|
- [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
|
|
20
19
|
|
|
20
|
+
The package is used by
|
|
21
|
+
|
|
22
|
+
- [protein-detective](https://github.com/haddocking/protein-detective)
|
|
23
|
+
|
|
21
24
|
An example workflow:
|
|
22
25
|
|
|
23
26
|
```mermaid
|
|
@@ -27,17 +30,29 @@ graph TB;
|
|
|
27
30
|
searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
|
|
28
31
|
searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
|
|
29
32
|
searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
|
|
33
|
+
searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
|
|
34
|
+
searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
|
|
35
|
+
searchcomplexes[/Search complexes/]
|
|
30
36
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
31
37
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
32
38
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
33
|
-
fetchpdbe -->|
|
|
34
|
-
chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
|
|
35
|
-
fetchad -->|
|
|
39
|
+
fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
|
|
40
|
+
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
41
|
+
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
42
|
+
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
43
|
+
residuefilter --> |mmcif_files| ssfilter
|
|
44
|
+
ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
|
|
45
|
+
ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
|
|
36
46
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
37
47
|
goterm:::dashedBorder
|
|
38
48
|
taxonomy:::dashedBorder
|
|
39
49
|
searchemdb:::dashedBorder
|
|
40
50
|
fetchemdb:::dashedBorder
|
|
51
|
+
searchintactionpartners:::dashedBorder
|
|
52
|
+
searchcomplexes:::dashedBorder
|
|
53
|
+
searchuniprotdetails:::dashedBorder
|
|
54
|
+
convert2cif:::dashedBorder
|
|
55
|
+
convert2uniprot_accessions:::dashedBorder
|
|
41
56
|
```
|
|
42
57
|
|
|
43
58
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -57,7 +72,10 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
57
72
|
|
|
58
73
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
59
74
|
|
|
60
|
-
To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
75
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
76
|
+
|
|
77
|
+
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
78
|
+
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
61
79
|
|
|
62
80
|
### Search Uniprot accessions
|
|
63
81
|
|
|
@@ -65,7 +83,7 @@ To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein
|
|
|
65
83
|
protein-quest search uniprot \
|
|
66
84
|
--taxon-id 9606 \
|
|
67
85
|
--reviewed \
|
|
68
|
-
--subcellular-location-uniprot nucleus \
|
|
86
|
+
--subcellular-location-uniprot "nucleus" \
|
|
69
87
|
--subcellular-location-go GO:0005634 \
|
|
70
88
|
--molecular-function-go GO:0003677 \
|
|
71
89
|
--limit 100 \
|
|
@@ -105,7 +123,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
|
|
|
105
123
|
protein-quest retrieve alphafold alphafold.csv downloads-af/
|
|
106
124
|
```
|
|
107
125
|
|
|
108
|
-
For each entry downloads the
|
|
126
|
+
For each entry downloads the cif file.
|
|
109
127
|
|
|
110
128
|
### To retrieve EMDB volume files
|
|
111
129
|
|
|
@@ -146,6 +164,18 @@ protein-quest filter residue \
|
|
|
146
164
|
./filtered-chains ./filtered
|
|
147
165
|
```
|
|
148
166
|
|
|
167
|
+
### To filter on secondary structure
|
|
168
|
+
|
|
169
|
+
To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
|
|
170
|
+
|
|
171
|
+
```shell
|
|
172
|
+
protein-quest filter secondary-structure \
|
|
173
|
+
--ratio-min-helix-residues 0.5 \
|
|
174
|
+
--ratio-max-sheet-residues 0.0 \
|
|
175
|
+
--write-stats filtered-ss/stats.csv \
|
|
176
|
+
./filtered-chains ./filtered-ss
|
|
177
|
+
```
|
|
178
|
+
|
|
149
179
|
### Search Taxonomy
|
|
150
180
|
|
|
151
181
|
```shell
|
|
@@ -161,6 +191,63 @@ You can use following command to search for a Gene Ontology (GO) term.
|
|
|
161
191
|
protein-quest search go --limit 5 --aspect cellular_component apoptosome -
|
|
162
192
|
```
|
|
163
193
|
|
|
194
|
+
### Search for interaction partners
|
|
195
|
+
|
|
196
|
+
Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
|
|
197
|
+
|
|
198
|
+
```shell
|
|
199
|
+
protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
|
|
203
|
+
|
|
204
|
+
### Search for complexes
|
|
205
|
+
|
|
206
|
+
Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
|
|
207
|
+
and return the complex entries and their members.
|
|
208
|
+
|
|
209
|
+
```shell
|
|
210
|
+
echo Q05471 | protein-quest search complexes - complexes.csv
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
The `complexes.csv` looks like
|
|
214
|
+
|
|
215
|
+
```csv
|
|
216
|
+
query_protein,complex_id,complex_url,complex_title,members
|
|
217
|
+
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### Search for UniProt details
|
|
221
|
+
|
|
222
|
+
To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
|
|
223
|
+
|
|
224
|
+
```shell
|
|
225
|
+
protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
The `uniprot_details.csv` looks like:
|
|
229
|
+
|
|
230
|
+
```csv
|
|
231
|
+
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
232
|
+
A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Convert structure files to .cif format
|
|
236
|
+
|
|
237
|
+
Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
|
|
238
|
+
|
|
239
|
+
```shell
|
|
240
|
+
protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Convert structure files to UniProt accessions
|
|
244
|
+
|
|
245
|
+
After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
|
|
246
|
+
|
|
247
|
+
```shell
|
|
248
|
+
protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
|
|
249
|
+
```
|
|
250
|
+
|
|
164
251
|
## Model Context Protocol (MCP) server
|
|
165
252
|
|
|
166
253
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -181,6 +268,26 @@ protein-quest mcp
|
|
|
181
268
|
|
|
182
269
|
The mcp server contains an prompt template to search/retrieve/filter candidate structures.
|
|
183
270
|
|
|
271
|
+
## Shell autocompletion
|
|
272
|
+
|
|
273
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
|
|
274
|
+
|
|
275
|
+
Initialize for bash shell with:
|
|
276
|
+
|
|
277
|
+
```shell
|
|
278
|
+
mkdir -p ~/.local/share/bash-completion/completions
|
|
279
|
+
protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Initialize for zsh shell with:
|
|
283
|
+
|
|
284
|
+
```shell
|
|
285
|
+
mkdir -p ~/.local/share/zsh/site-functions
|
|
286
|
+
protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
|
|
287
|
+
fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
|
|
288
|
+
autoload -Uz compinit && compinit
|
|
289
|
+
```
|
|
290
|
+
|
|
184
291
|
## Contributing
|
|
185
292
|
|
|
186
293
|
For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
|