protein-quest 0.3.0__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/ci.yml +16 -10
  2. {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/pages.yml +9 -1
  3. {protein_quest-0.3.0 → protein_quest-0.8.0}/.gitignore +14 -1
  4. protein_quest-0.8.0/.python-version +1 -0
  5. {protein_quest-0.3.0 → protein_quest-0.8.0}/CITATION.cff +1 -2
  6. {protein_quest-0.3.0 → protein_quest-0.8.0}/CONTRIBUTING.md +15 -1
  7. {protein_quest-0.3.0 → protein_quest-0.8.0}/PKG-INFO +120 -14
  8. {protein_quest-0.3.0 → protein_quest-0.8.0}/README.md +115 -8
  9. protein_quest-0.8.0/docs/notebooks/.gitignore +4 -0
  10. protein_quest-0.8.0/docs/notebooks/alphafold.ipynb +463 -0
  11. protein_quest-0.8.0/docs/notebooks/index.md +3 -0
  12. protein_quest-0.8.0/docs/notebooks/pdbe.ipynb +282 -0
  13. protein_quest-0.8.0/docs/notebooks/uniprot.ipynb +401 -0
  14. {protein_quest-0.3.0 → protein_quest-0.8.0}/mkdocs.yml +16 -8
  15. {protein_quest-0.3.0 → protein_quest-0.8.0}/pyproject.toml +20 -18
  16. protein_quest-0.8.0/src/protein_quest/__version__.py +2 -0
  17. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/alphafold/confidence.py +46 -19
  18. protein_quest-0.8.0/src/protein_quest/alphafold/entry_summary.py +64 -0
  19. protein_quest-0.8.0/src/protein_quest/alphafold/fetch.py +534 -0
  20. protein_quest-0.8.0/src/protein_quest/cli.py +1424 -0
  21. protein_quest-0.8.0/src/protein_quest/converter.py +46 -0
  22. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/emdb.py +6 -3
  23. protein_quest-0.8.0/src/protein_quest/filters.py +147 -0
  24. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/go.py +1 -4
  25. protein_quest-0.8.0/src/protein_quest/io.py +350 -0
  26. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/mcp_server.py +64 -16
  27. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/parallel.py +37 -1
  28. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/pdbe/fetch.py +20 -3
  29. protein_quest-0.8.0/src/protein_quest/ss.py +280 -0
  30. protein_quest-0.8.0/src/protein_quest/structure.py +232 -0
  31. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/taxonomy.py +13 -3
  32. protein_quest-0.8.0/src/protein_quest/uniprot.py +975 -0
  33. protein_quest-0.8.0/src/protein_quest/utils.py +547 -0
  34. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +48 -0
  35. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +55567 -0
  36. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +51 -0
  37. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +42326 -0
  38. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +9431 -0
  39. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +9385 -0
  40. protein_quest-0.8.0/tests/alphafold/test_confidence.py +156 -0
  41. protein_quest-0.8.0/tests/alphafold/test_entry_summary.py +16 -0
  42. protein_quest-0.8.0/tests/alphafold/test_fetch.py +301 -0
  43. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_pdbe.yaml +1023 -0
  44. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot.yaml +64 -0
  45. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot_details.yaml +87 -0
  46. protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +62 -0
  47. protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +66 -0
  48. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +145 -0
  49. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +66 -0
  50. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +62 -0
  51. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +62 -0
  52. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +382 -0
  53. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
  54. protein_quest-0.8.0/tests/conftest.py +18 -0
  55. protein_quest-0.8.0/tests/fixtures/2Y29.cif.gz +0 -0
  56. protein_quest-0.8.0/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  57. protein_quest-0.8.0/tests/pdbe/test_fetch.py +29 -0
  58. protein_quest-0.8.0/tests/test_cli.py +101 -0
  59. protein_quest-0.8.0/tests/test_converter.py +23 -0
  60. protein_quest-0.8.0/tests/test_io.py +230 -0
  61. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_mcp.py +3 -8
  62. protein_quest-0.8.0/tests/test_ss.py +225 -0
  63. protein_quest-0.8.0/tests/test_structure.py +116 -0
  64. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_uniprot.py +267 -3
  65. protein_quest-0.8.0/tests/test_utils.py +518 -0
  66. {protein_quest-0.3.0 → protein_quest-0.8.0}/uv.lock +198 -714
  67. protein_quest-0.3.0/docs/cli_doc_hook.py +0 -113
  68. protein_quest-0.3.0/src/protein_quest/__version__.py +0 -1
  69. protein_quest-0.3.0/src/protein_quest/alphafold/entry_summary.py +0 -38
  70. protein_quest-0.3.0/src/protein_quest/alphafold/fetch.py +0 -314
  71. protein_quest-0.3.0/src/protein_quest/cli.py +0 -782
  72. protein_quest-0.3.0/src/protein_quest/filters.py +0 -107
  73. protein_quest-0.3.0/src/protein_quest/pdbe/io.py +0 -185
  74. protein_quest-0.3.0/src/protein_quest/uniprot.py +0 -511
  75. protein_quest-0.3.0/src/protein_quest/utils.py +0 -105
  76. protein_quest-0.3.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -6289
  77. protein_quest-0.3.0/tests/alphafold/test_confidence.py +0 -63
  78. protein_quest-0.3.0/tests/alphafold/test_entry_summary.py +0 -15
  79. protein_quest-0.3.0/tests/alphafold/test_fetch.py +0 -20
  80. protein_quest-0.3.0/tests/pdbe/fixtures/2y29.cif +0 -940
  81. protein_quest-0.3.0/tests/pdbe/test_fetch.py +0 -17
  82. protein_quest-0.3.0/tests/pdbe/test_io.py +0 -81
  83. protein_quest-0.3.0/tests/test_cli.py +0 -14
  84. {protein_quest-0.3.0 → protein_quest-0.8.0}/.github/workflows/pypi-publish.yml +0 -0
  85. {protein_quest-0.3.0 → protein_quest-0.8.0}/.vscode/extensions.json +0 -0
  86. {protein_quest-0.3.0 → protein_quest-0.8.0}/CODE_OF_CONDUCT.md +0 -0
  87. {protein_quest-0.3.0 → protein_quest-0.8.0}/LICENSE +0 -0
  88. {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/CONTRIBUTING.md +0 -0
  89. {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/index.md +0 -0
  90. {protein_quest-0.3.0 → protein_quest-0.8.0}/docs/protein-quest-mcp.png +0 -0
  91. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/__init__.py +0 -0
  92. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/alphafold/__init__.py +0 -0
  93. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/pdbe/__init__.py +0 -0
  94. {protein_quest-0.3.0 → protein_quest-0.8.0}/src/protein_quest/py.typed +0 -0
  95. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  96. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  97. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  98. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  99. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  100. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  101. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  102. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  103. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  104. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  105. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_emdb.py +0 -0
  106. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_go.py +0 -0
  107. {protein_quest-0.3.0 → protein_quest-0.8.0}/tests/test_taxonomy.py +0 -0
@@ -3,7 +3,7 @@ name: CI
3
3
  on:
4
4
  push:
5
5
  branches:
6
- - main
6
+ - main
7
7
  pull_request:
8
8
 
9
9
  concurrency:
@@ -27,20 +27,11 @@ jobs:
27
27
  - name: Run tests
28
28
  run: |
29
29
  uv run pytest --cov --cov-report=xml
30
- echo $? > pytest-exitcode
31
- continue-on-error: true
32
- # Always upload coverage, even if tests fail
33
30
  - name: Run codacy-coverage-reporter
34
31
  uses: codacy/codacy-coverage-reporter-action@v1.3.0
35
32
  with:
36
33
  project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
37
34
  coverage-reports: coverage.xml
38
- - name: Fail job if pytest failed
39
- run: |
40
- if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
41
- echo "Pytest failed, failing job."
42
- exit 1
43
- fi
44
35
  build:
45
36
  name: build
46
37
  runs-on: ubuntu-latest
@@ -70,3 +61,18 @@ jobs:
70
61
  run: uv sync --locked --dev --extra mcp
71
62
  - name: Run type checkers
72
63
  run: uv run pyrefly check src tests
64
+ typing-docs:
65
+ name: typing-docs
66
+ runs-on: ubuntu-latest
67
+ steps:
68
+ - uses: actions/checkout@v4
69
+ - name: Install uv
70
+ uses: astral-sh/setup-uv@v6
71
+ - name: Install the project
72
+ run: uv sync --group docs-type
73
+ - name: Convert notebooks to Python scripts
74
+ run: |
75
+ find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
76
+ - name: Run type checkers on docs
77
+ run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
78
+
@@ -5,6 +5,7 @@ on:
5
5
  branches:
6
6
  - main
7
7
  workflow_dispatch:
8
+ pull_request:
8
9
 
9
10
  permissions:
10
11
  contents: read
@@ -13,7 +14,7 @@ permissions:
13
14
 
14
15
  # Only have one deployment in progress at a time
15
16
  concurrency:
16
- group: "pages"
17
+ group: pages
17
18
  cancel-in-progress: true
18
19
 
19
20
  jobs:
@@ -32,6 +33,10 @@ jobs:
32
33
  - name: Build MkDocs site
33
34
  run: |
34
35
  uv run mkdocs build
36
+ env:
37
+ # Force colored output from rich library
38
+ TTY_COMPATIBLE: '1'
39
+ TTY_INTERACTIVE: '0'
35
40
 
36
41
  - name: Upload artifact
37
42
  uses: actions/upload-pages-artifact@v3
@@ -42,6 +47,9 @@ jobs:
42
47
  # Add a dependency to the build job
43
48
  needs: build
44
49
 
50
+ # Only deploy on pushes to main or manual trigger of main branch
51
+ if: github.ref == 'refs/heads/main'
52
+
45
53
  # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
46
54
  permissions:
47
55
  pages: write # to deploy to Pages
@@ -73,4 +73,17 @@ venv.bak/
73
73
  /docs/pdb_files/
74
74
  /docs/density_filtered/
75
75
  /site
76
- /mysession/
76
+ /mysession/
77
+ # Paths generated in README.md examples
78
+ uniprot_accs.txt
79
+ pdbe.csv
80
+ alphafold.csv
81
+ emdbs.csv
82
+ interaction-partners-of-Q05471.txt
83
+ complexes.csv
84
+ downloads-af/
85
+ downloads-emdb/
86
+ downloads-pdbe/
87
+ filtered/
88
+ filtered-chains/
89
+ filtered-ss/
@@ -0,0 +1 @@
1
+ 3.13
@@ -23,5 +23,4 @@ repository-code: https://github.com/haddocking/protein-quest
23
23
  identifiers:
24
24
  - description: Latest version of software
25
25
  type: doi
26
- # TODO update once release has been made
27
- value: 10.5281/zenodo.15632658
26
+ value: 10.5281/zenodo.16941288
@@ -82,9 +82,23 @@ uv run mkdocs build
82
82
  python3 -m http.server -d site
83
83
  ```
84
84
 
85
+ <details>
86
+ <summary>Type checking notebooks</summary>
87
+
88
+ [Pyrefly](https://pyrefly.org/) does not support notebooks yet, so we need to convert them to python scripts and then run pyrefly on them.
89
+
90
+ ```shell
91
+ find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
92
+ uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
93
+ rm docs/notebooks/*.ipynb.py
94
+ ```
95
+
96
+ </details>
97
+
98
+
85
99
  ## Contributing to tests
86
100
 
87
- The code coverage are stored at https://app.codacy.com/gh/haddocking/protein-quest/coverage .
101
+ The code coverage is stored at [https://app.codacy.com/gh/haddocking/protein-quest/coverage](https://app.codacy.com/gh/haddocking/protein-quest/coverage) .
88
102
 
89
103
  The search functions of the protein-quest package talk to web services on the Internet.
90
104
  To have fast tests we use [pytest-recording](https://github.com/kiwicom/pytest-recording) to record and replay HTTP interactions.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.0
3
+ Version: 0.8.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -11,24 +11,23 @@ Requires-Python: >=3.13
11
11
  Requires-Dist: aiofiles>=24.1.0
12
12
  Requires-Dist: aiohttp-retry>=2.9.1
13
13
  Requires-Dist: aiohttp[speedups]>=3.11.18
14
- Requires-Dist: aiopath>=0.7.7
15
14
  Requires-Dist: attrs>=25.3.0
16
- Requires-Dist: bokeh>=3.7.3
17
15
  Requires-Dist: cattrs[orjson]>=24.1.3
18
16
  Requires-Dist: dask>=2025.5.1
19
17
  Requires-Dist: distributed>=2025.5.1
20
18
  Requires-Dist: gemmi>=0.7.3
21
- Requires-Dist: molviewspec>=1.6.0
22
- Requires-Dist: pandas>=2.3.0
19
+ Requires-Dist: mmcif>=0.92.0
23
20
  Requires-Dist: platformdirs>=4.3.8
24
21
  Requires-Dist: psutil>=7.0.0
25
22
  Requires-Dist: rich-argparse>=1.7.1
26
23
  Requires-Dist: rich>=14.0.0
24
+ Requires-Dist: shtab>=1.7.2
27
25
  Requires-Dist: sparqlwrapper>=2.0.0
28
26
  Requires-Dist: tqdm>=4.67.1
27
+ Requires-Dist: yarl>=1.20.1
29
28
  Provides-Extra: mcp
30
29
  Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
31
- Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
30
+ Requires-Dist: pydantic>=2.12.0; extra == 'mcp'
32
31
  Description-Content-Type: text/markdown
33
32
 
34
33
  # protein-quest
@@ -37,8 +36,7 @@ Description-Content-Type: text/markdown
37
36
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
38
37
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
39
38
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
40
- <!-- TODO replace with correct zenodo id -->
41
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
39
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
42
40
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
43
41
 
44
42
  Python package to search/retrieve/filter proteins and protein structures.
@@ -51,6 +49,10 @@ It uses
51
49
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
52
50
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
53
51
 
52
+ The package is used by
53
+
54
+ - [protein-detective](https://github.com/haddocking/protein-detective)
55
+
54
56
  An example workflow:
55
57
 
56
58
  ```mermaid
@@ -60,17 +62,29 @@ graph TB;
60
62
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
61
63
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
62
64
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
65
+ searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
66
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
67
+ searchcomplexes[/Search complexes/]
63
68
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
64
69
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
65
70
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
66
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{Filter on chain of uniprot}
67
- chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
68
- fetchad -->|pdb_files| confidencefilter{Filter out low confidence}
71
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
72
+ chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
73
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
74
+ confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
75
+ residuefilter --> |mmcif_files| ssfilter
76
+ ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
77
+ ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
69
78
  classDef dashedBorder stroke-dasharray: 5 5;
70
79
  goterm:::dashedBorder
71
80
  taxonomy:::dashedBorder
72
81
  searchemdb:::dashedBorder
73
82
  fetchemdb:::dashedBorder
83
+ searchintactionpartners:::dashedBorder
84
+ searchcomplexes:::dashedBorder
85
+ searchuniprotdetails:::dashedBorder
86
+ convert2cif:::dashedBorder
87
+ convert2uniprot_accessions:::dashedBorder
74
88
  ```
75
89
 
76
90
  (Dotted nodes and edges are side-quests.)
@@ -90,7 +104,10 @@ pip install git+https://github.com/haddocking/protein-quest.git
90
104
 
91
105
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
92
106
 
93
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
107
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
108
+
109
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
110
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
94
111
 
95
112
  ### Search Uniprot accessions
96
113
 
@@ -98,7 +115,7 @@ To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein
98
115
  protein-quest search uniprot \
99
116
  --taxon-id 9606 \
100
117
  --reviewed \
101
- --subcellular-location-uniprot nucleus \
118
+ --subcellular-location-uniprot "nucleus" \
102
119
  --subcellular-location-go GO:0005634 \
103
120
  --molecular-function-go GO:0003677 \
104
121
  --limit 100 \
@@ -138,7 +155,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
138
155
  protein-quest retrieve alphafold alphafold.csv downloads-af/
139
156
  ```
140
157
 
141
- For each entry downloads the summary.json and cif file.
158
+ For each entry downloads the cif file.
142
159
 
143
160
  ### To retrieve EMDB volume files
144
161
 
@@ -179,6 +196,18 @@ protein-quest filter residue \
179
196
  ./filtered-chains ./filtered
180
197
  ```
181
198
 
199
+ ### To filter on secondary structure
200
+
201
+ To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
202
+
203
+ ```shell
204
+ protein-quest filter secondary-structure \
205
+ --ratio-min-helix-residues 0.5 \
206
+ --ratio-max-sheet-residues 0.0 \
207
+ --write-stats filtered-ss/stats.csv \
208
+ ./filtered-chains ./filtered-ss
209
+ ```
210
+
182
211
  ### Search Taxonomy
183
212
 
184
213
  ```shell
@@ -194,6 +223,63 @@ You can use following command to search for a Gene Ontology (GO) term.
194
223
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
195
224
  ```
196
225
 
226
+ ### Search for interaction partners
227
+
228
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
229
+
230
+ ```shell
231
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
232
+ ```
233
+
234
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
235
+
236
+ ### Search for complexes
237
+
238
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
239
+ and return the complex entries and their members.
240
+
241
+ ```shell
242
+ echo Q05471 | protein-quest search complexes - complexes.csv
243
+ ```
244
+
245
+ The `complexes.csv` looks like
246
+
247
+ ```csv
248
+ query_protein,complex_id,complex_url,complex_title,members
249
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
250
+ ```
251
+
252
+ ### Search for UniProt details
253
+
254
+ To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
255
+
256
+ ```shell
257
+ protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
258
+ ```
259
+
260
+ The `uniprot_details.csv` looks like:
261
+
262
+ ```csv
263
+ uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
264
+ A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
265
+ ```
266
+
267
+ ### Convert structure files to .cif format
268
+
269
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
270
+
271
+ ```shell
272
+ protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
273
+ ```
274
+
275
+ ### Convert structure files to UniProt accessions
276
+
277
+ After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
278
+
279
+ ```shell
280
+ protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
281
+ ```
282
+
197
283
  ## Model Context Protocol (MCP) server
198
284
 
199
285
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -214,6 +300,26 @@ protein-quest mcp
214
300
 
215
301
  The mcp server contains an prompt template to search/retrieve/filter candidate structures.
216
302
 
303
+ ## Shell autocompletion
304
+
305
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
306
+
307
+ Initialize for bash shell with:
308
+
309
+ ```shell
310
+ mkdir -p ~/.local/share/bash-completion/completions
311
+ protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
312
+ ```
313
+
314
+ Initialize for zsh shell with:
315
+
316
+ ```shell
317
+ mkdir -p ~/.local/share/zsh/site-functions
318
+ protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
319
+ fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
320
+ autoload -Uz compinit && compinit
321
+ ```
322
+
217
323
  ## Contributing
218
324
 
219
325
  For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
@@ -4,8 +4,7 @@
4
4
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
5
5
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
6
6
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
7
- <!-- TODO replace with correct zenodo id -->
8
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
7
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
9
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
10
9
 
11
10
  Python package to search/retrieve/filter proteins and protein structures.
@@ -18,6 +17,10 @@ It uses
18
17
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
19
18
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
20
19
 
20
+ The package is used by
21
+
22
+ - [protein-detective](https://github.com/haddocking/protein-detective)
23
+
21
24
  An example workflow:
22
25
 
23
26
  ```mermaid
@@ -27,17 +30,29 @@ graph TB;
27
30
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
28
31
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
29
32
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
33
+ searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
34
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
35
+ searchcomplexes[/Search complexes/]
30
36
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
31
37
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
32
38
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
33
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{Filter on chain of uniprot}
34
- chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
35
- fetchad -->|pdb_files| confidencefilter{Filter out low confidence}
39
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
40
+ chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
41
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
42
+ confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
43
+ residuefilter --> |mmcif_files| ssfilter
44
+ ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
45
+ ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
36
46
  classDef dashedBorder stroke-dasharray: 5 5;
37
47
  goterm:::dashedBorder
38
48
  taxonomy:::dashedBorder
39
49
  searchemdb:::dashedBorder
40
50
  fetchemdb:::dashedBorder
51
+ searchintactionpartners:::dashedBorder
52
+ searchcomplexes:::dashedBorder
53
+ searchuniprotdetails:::dashedBorder
54
+ convert2cif:::dashedBorder
55
+ convert2uniprot_accessions:::dashedBorder
41
56
  ```
42
57
 
43
58
  (Dotted nodes and edges are side-quests.)
@@ -57,7 +72,10 @@ pip install git+https://github.com/haddocking/protein-quest.git
57
72
 
58
73
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
59
74
 
60
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
75
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
76
+
77
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
78
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
61
79
 
62
80
  ### Search Uniprot accessions
63
81
 
@@ -65,7 +83,7 @@ To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein
65
83
  protein-quest search uniprot \
66
84
  --taxon-id 9606 \
67
85
  --reviewed \
68
- --subcellular-location-uniprot nucleus \
86
+ --subcellular-location-uniprot "nucleus" \
69
87
  --subcellular-location-go GO:0005634 \
70
88
  --molecular-function-go GO:0003677 \
71
89
  --limit 100 \
@@ -105,7 +123,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
105
123
  protein-quest retrieve alphafold alphafold.csv downloads-af/
106
124
  ```
107
125
 
108
- For each entry downloads the summary.json and cif file.
126
+ For each entry downloads the cif file.
109
127
 
110
128
  ### To retrieve EMDB volume files
111
129
 
@@ -146,6 +164,18 @@ protein-quest filter residue \
146
164
  ./filtered-chains ./filtered
147
165
  ```
148
166
 
167
+ ### To filter on secondary structure
168
+
169
+ To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
170
+
171
+ ```shell
172
+ protein-quest filter secondary-structure \
173
+ --ratio-min-helix-residues 0.5 \
174
+ --ratio-max-sheet-residues 0.0 \
175
+ --write-stats filtered-ss/stats.csv \
176
+ ./filtered-chains ./filtered-ss
177
+ ```
178
+
149
179
  ### Search Taxonomy
150
180
 
151
181
  ```shell
@@ -161,6 +191,63 @@ You can use following command to search for a Gene Ontology (GO) term.
161
191
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
162
192
  ```
163
193
 
194
+ ### Search for interaction partners
195
+
196
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
197
+
198
+ ```shell
199
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
200
+ ```
201
+
202
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
203
+
204
+ ### Search for complexes
205
+
206
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
207
+ and return the complex entries and their members.
208
+
209
+ ```shell
210
+ echo Q05471 | protein-quest search complexes - complexes.csv
211
+ ```
212
+
213
+ The `complexes.csv` looks like
214
+
215
+ ```csv
216
+ query_protein,complex_id,complex_url,complex_title,members
217
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
218
+ ```
219
+
220
+ ### Search for UniProt details
221
+
222
+ To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
223
+
224
+ ```shell
225
+ protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
226
+ ```
227
+
228
+ The `uniprot_details.csv` looks like:
229
+
230
+ ```csv
231
+ uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
232
+ A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
233
+ ```
234
+
235
+ ### Convert structure files to .cif format
236
+
237
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
238
+
239
+ ```shell
240
+ protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
241
+ ```
242
+
243
+ ### Convert structure files to UniProt accessions
244
+
245
+ After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
246
+
247
+ ```shell
248
+ protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
249
+ ```
250
+
164
251
  ## Model Context Protocol (MCP) server
165
252
 
166
253
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -181,6 +268,26 @@ protein-quest mcp
181
268
 
182
269
  The mcp server contains an prompt template to search/retrieve/filter candidate structures.
183
270
 
271
+ ## Shell autocompletion
272
+
273
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
274
+
275
+ Initialize for bash shell with:
276
+
277
+ ```shell
278
+ mkdir -p ~/.local/share/bash-completion/completions
279
+ protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
280
+ ```
281
+
282
+ Initialize for zsh shell with:
283
+
284
+ ```shell
285
+ mkdir -p ~/.local/share/zsh/site-functions
286
+ protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
287
+ fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
288
+ autoload -Uz compinit && compinit
289
+ ```
290
+
184
291
  ## Contributing
185
292
 
186
293
  For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
@@ -0,0 +1,4 @@
1
+ pdb_files/
2
+ alphafold_files/
3
+ filtered/
4
+ session/