protein-quest 0.3.2__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/ci.yml +0 -9
  2. {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/pages.yml +9 -1
  3. {protein_quest-0.3.2 → protein_quest-0.8.0}/.gitignore +14 -1
  4. protein_quest-0.8.0/.python-version +1 -0
  5. {protein_quest-0.3.2 → protein_quest-0.8.0}/PKG-INFO +104 -8
  6. {protein_quest-0.3.2 → protein_quest-0.8.0}/README.md +99 -5
  7. protein_quest-0.8.0/docs/notebooks/alphafold.ipynb +463 -0
  8. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/pdbe.ipynb +12 -8
  9. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/uniprot.ipynb +96 -3
  10. {protein_quest-0.3.2 → protein_quest-0.8.0}/mkdocs.yml +3 -4
  11. {protein_quest-0.3.2 → protein_quest-0.8.0}/pyproject.toml +8 -2
  12. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/__version__.py +1 -1
  13. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/alphafold/confidence.py +2 -2
  14. protein_quest-0.8.0/src/protein_quest/alphafold/entry_summary.py +64 -0
  15. protein_quest-0.8.0/src/protein_quest/alphafold/fetch.py +534 -0
  16. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/cli.py +615 -130
  17. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/converter.py +1 -0
  18. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/emdb.py +6 -3
  19. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/filters.py +2 -5
  20. protein_quest-0.8.0/src/protein_quest/io.py +350 -0
  21. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/mcp_server.py +58 -13
  22. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/pdbe/fetch.py +6 -3
  23. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/ss.py +23 -7
  24. protein_quest-0.3.2/src/protein_quest/pdbe/io.py → protein_quest-0.8.0/src/protein_quest/structure.py +77 -126
  25. protein_quest-0.8.0/src/protein_quest/uniprot.py +975 -0
  26. protein_quest-0.8.0/src/protein_quest/utils.py +547 -0
  27. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +48 -0
  28. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +55567 -0
  29. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +51 -0
  30. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +42326 -0
  31. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +9431 -0
  32. protein_quest-0.8.0/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +9385 -0
  33. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/alphafold/test_confidence.py +3 -2
  34. protein_quest-0.8.0/tests/alphafold/test_entry_summary.py +16 -0
  35. protein_quest-0.8.0/tests/alphafold/test_fetch.py +301 -0
  36. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_pdbe.yaml +1023 -0
  37. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot.yaml +64 -0
  38. protein_quest-0.8.0/tests/cassettes/test_cli/test_search_uniprot_details.yaml +87 -0
  39. protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +62 -0
  40. protein_quest-0.8.0/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +66 -0
  41. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +145 -0
  42. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +66 -0
  43. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +62 -0
  44. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +62 -0
  45. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +382 -0
  46. protein_quest-0.8.0/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +382 -0
  47. protein_quest-0.8.0/tests/conftest.py +18 -0
  48. protein_quest-0.8.0/tests/fixtures/2Y29.cif.gz +0 -0
  49. protein_quest-0.8.0/tests/test_cli.py +101 -0
  50. protein_quest-0.8.0/tests/test_converter.py +23 -0
  51. protein_quest-0.8.0/tests/test_io.py +230 -0
  52. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_mcp.py +3 -8
  53. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_ss.py +8 -10
  54. protein_quest-0.8.0/tests/test_structure.py +116 -0
  55. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_uniprot.py +267 -3
  56. protein_quest-0.8.0/tests/test_utils.py +518 -0
  57. {protein_quest-0.3.2 → protein_quest-0.8.0}/uv.lock +166 -63
  58. protein_quest-0.3.2/docs/cli_doc_hook.py +0 -113
  59. protein_quest-0.3.2/docs/notebooks/alphafold.ipynb +0 -384
  60. protein_quest-0.3.2/src/protein_quest/alphafold/entry_summary.py +0 -40
  61. protein_quest-0.3.2/src/protein_quest/alphafold/fetch.py +0 -288
  62. protein_quest-0.3.2/src/protein_quest/uniprot.py +0 -511
  63. protein_quest-0.3.2/src/protein_quest/utils.py +0 -167
  64. protein_quest-0.3.2/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -6289
  65. protein_quest-0.3.2/tests/alphafold/test_entry_summary.py +0 -12
  66. protein_quest-0.3.2/tests/alphafold/test_fetch.py +0 -20
  67. protein_quest-0.3.2/tests/pdbe/fixtures/2y29.cif +0 -940
  68. protein_quest-0.3.2/tests/pdbe/test_io.py +0 -142
  69. protein_quest-0.3.2/tests/test_cli.py +0 -14
  70. protein_quest-0.3.2/tests/test_utils.py +0 -31
  71. {protein_quest-0.3.2 → protein_quest-0.8.0}/.github/workflows/pypi-publish.yml +0 -0
  72. {protein_quest-0.3.2 → protein_quest-0.8.0}/.vscode/extensions.json +0 -0
  73. {protein_quest-0.3.2 → protein_quest-0.8.0}/CITATION.cff +0 -0
  74. {protein_quest-0.3.2 → protein_quest-0.8.0}/CODE_OF_CONDUCT.md +0 -0
  75. {protein_quest-0.3.2 → protein_quest-0.8.0}/CONTRIBUTING.md +0 -0
  76. {protein_quest-0.3.2 → protein_quest-0.8.0}/LICENSE +0 -0
  77. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/CONTRIBUTING.md +0 -0
  78. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/index.md +0 -0
  79. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/.gitignore +0 -0
  80. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/notebooks/index.md +0 -0
  81. {protein_quest-0.3.2 → protein_quest-0.8.0}/docs/protein-quest-mcp.png +0 -0
  82. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/__init__.py +0 -0
  83. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/alphafold/__init__.py +0 -0
  84. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/go.py +0 -0
  85. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/parallel.py +0 -0
  86. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/pdbe/__init__.py +0 -0
  87. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/py.typed +0 -0
  88. {protein_quest-0.3.2 → protein_quest-0.8.0}/src/protein_quest/taxonomy.py +0 -0
  89. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  90. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  91. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  92. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  93. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  94. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  95. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  96. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  97. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  98. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  99. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  100. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/pdbe/test_fetch.py +0 -0
  101. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_emdb.py +0 -0
  102. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_go.py +0 -0
  103. {protein_quest-0.3.2 → protein_quest-0.8.0}/tests/test_taxonomy.py +0 -0
@@ -27,20 +27,11 @@ jobs:
27
27
  - name: Run tests
28
28
  run: |
29
29
  uv run pytest --cov --cov-report=xml
30
- echo $? > pytest-exitcode
31
- continue-on-error: true
32
- # Always upload coverage, even if tests fail
33
30
  - name: Run codacy-coverage-reporter
34
31
  uses: codacy/codacy-coverage-reporter-action@v1.3.0
35
32
  with:
36
33
  project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
37
34
  coverage-reports: coverage.xml
38
- - name: Fail job if pytest failed
39
- run: |
40
- if [ -f pytest-exitcode ] && [ "$(cat pytest-exitcode)" -ne 0 ]; then
41
- echo "Pytest failed, failing job."
42
- exit 1
43
- fi
44
35
  build:
45
36
  name: build
46
37
  runs-on: ubuntu-latest
@@ -5,6 +5,7 @@ on:
5
5
  branches:
6
6
  - main
7
7
  workflow_dispatch:
8
+ pull_request:
8
9
 
9
10
  permissions:
10
11
  contents: read
@@ -13,7 +14,7 @@ permissions:
13
14
 
14
15
  # Only have one deployment in progress at a time
15
16
  concurrency:
16
- group: "pages"
17
+ group: pages
17
18
  cancel-in-progress: true
18
19
 
19
20
  jobs:
@@ -32,6 +33,10 @@ jobs:
32
33
  - name: Build MkDocs site
33
34
  run: |
34
35
  uv run mkdocs build
36
+ env:
37
+ # Force colored output from rich library
38
+ TTY_COMPATIBLE: '1'
39
+ TTY_INTERACTIVE: '0'
35
40
 
36
41
  - name: Upload artifact
37
42
  uses: actions/upload-pages-artifact@v3
@@ -42,6 +47,9 @@ jobs:
42
47
  # Add a dependency to the build job
43
48
  needs: build
44
49
 
50
+ # Only deploy on pushes to main or manual trigger of main branch
51
+ if: github.ref == 'refs/heads/main'
52
+
45
53
  # Grant GITHUB_TOKEN the permissions required to make a Pages deployment
46
54
  permissions:
47
55
  pages: write # to deploy to Pages
@@ -73,4 +73,17 @@ venv.bak/
73
73
  /docs/pdb_files/
74
74
  /docs/density_filtered/
75
75
  /site
76
- /mysession/
76
+ /mysession/
77
+ # Paths generated in README.md examples
78
+ uniprot_accs.txt
79
+ pdbe.csv
80
+ alphafold.csv
81
+ emdbs.csv
82
+ interaction-partners-of-Q05471.txt
83
+ complexes.csv
84
+ downloads-af/
85
+ downloads-emdb/
86
+ downloads-pdbe/
87
+ filtered/
88
+ filtered-chains/
89
+ filtered-ss/
@@ -0,0 +1 @@
1
+ 3.13
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.2
3
+ Version: 0.8.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -11,21 +11,23 @@ Requires-Python: >=3.13
11
11
  Requires-Dist: aiofiles>=24.1.0
12
12
  Requires-Dist: aiohttp-retry>=2.9.1
13
13
  Requires-Dist: aiohttp[speedups]>=3.11.18
14
- Requires-Dist: aiopath>=0.7.7
15
14
  Requires-Dist: attrs>=25.3.0
16
15
  Requires-Dist: cattrs[orjson]>=24.1.3
17
16
  Requires-Dist: dask>=2025.5.1
18
17
  Requires-Dist: distributed>=2025.5.1
19
18
  Requires-Dist: gemmi>=0.7.3
19
+ Requires-Dist: mmcif>=0.92.0
20
+ Requires-Dist: platformdirs>=4.3.8
20
21
  Requires-Dist: psutil>=7.0.0
21
22
  Requires-Dist: rich-argparse>=1.7.1
22
23
  Requires-Dist: rich>=14.0.0
24
+ Requires-Dist: shtab>=1.7.2
23
25
  Requires-Dist: sparqlwrapper>=2.0.0
24
26
  Requires-Dist: tqdm>=4.67.1
25
27
  Requires-Dist: yarl>=1.20.1
26
28
  Provides-Extra: mcp
27
29
  Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
28
- Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
30
+ Requires-Dist: pydantic>=2.12.0; extra == 'mcp'
29
31
  Description-Content-Type: text/markdown
30
32
 
31
33
  # protein-quest
@@ -47,6 +49,10 @@ It uses
47
49
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
48
50
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
49
51
 
52
+ The package is used by
53
+
54
+ - [protein-detective](https://github.com/haddocking/protein-detective)
55
+
50
56
  An example workflow:
51
57
 
52
58
  ```mermaid
@@ -56,19 +62,29 @@ graph TB;
56
62
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
57
63
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
58
64
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
65
+ searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
66
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
67
+ searchcomplexes[/Search complexes/]
59
68
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
60
69
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
61
70
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
62
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
71
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
63
72
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
64
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
73
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
65
74
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
66
75
  residuefilter --> |mmcif_files| ssfilter
76
+ ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
77
+ ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
67
78
  classDef dashedBorder stroke-dasharray: 5 5;
68
79
  goterm:::dashedBorder
69
80
  taxonomy:::dashedBorder
70
81
  searchemdb:::dashedBorder
71
82
  fetchemdb:::dashedBorder
83
+ searchintactionpartners:::dashedBorder
84
+ searchcomplexes:::dashedBorder
85
+ searchuniprotdetails:::dashedBorder
86
+ convert2cif:::dashedBorder
87
+ convert2uniprot_accessions:::dashedBorder
72
88
  ```
73
89
 
74
90
  (Dotted nodes and edges are side-quests.)
@@ -90,13 +106,16 @@ The main entry point is the `protein-quest` command line tool which has multiple
90
106
 
91
107
  To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
92
108
 
109
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
110
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
111
+
93
112
  ### Search Uniprot accessions
94
113
 
95
114
  ```shell
96
115
  protein-quest search uniprot \
97
116
  --taxon-id 9606 \
98
117
  --reviewed \
99
- --subcellular-location-uniprot nucleus \
118
+ --subcellular-location-uniprot "nucleus" \
100
119
  --subcellular-location-go GO:0005634 \
101
120
  --molecular-function-go GO:0003677 \
102
121
  --limit 100 \
@@ -136,7 +155,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
136
155
  protein-quest retrieve alphafold alphafold.csv downloads-af/
137
156
  ```
138
157
 
139
- For each entry downloads the summary.json and cif file.
158
+ For each entry downloads the cif file.
140
159
 
141
160
  ### To retrieve EMDB volume files
142
161
 
@@ -179,7 +198,7 @@ protein-quest filter residue \
179
198
 
180
199
  ### To filter on secondary structure
181
200
 
182
- To filter on structure being mostly alpha helices and have no beta sheets.
201
+ To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
183
202
 
184
203
  ```shell
185
204
  protein-quest filter secondary-structure \
@@ -204,6 +223,63 @@ You can use following command to search for a Gene Ontology (GO) term.
204
223
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
205
224
  ```
206
225
 
226
+ ### Search for interaction partners
227
+
228
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
229
+
230
+ ```shell
231
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
232
+ ```
233
+
234
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
235
+
236
+ ### Search for complexes
237
+
238
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
239
+ and return the complex entries and their members.
240
+
241
+ ```shell
242
+ echo Q05471 | protein-quest search complexes - complexes.csv
243
+ ```
244
+
245
+ The `complexes.csv` looks like
246
+
247
+ ```csv
248
+ query_protein,complex_id,complex_url,complex_title,members
249
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
250
+ ```
251
+
252
+ ### Search for UniProt details
253
+
254
+ To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
255
+
256
+ ```shell
257
+ protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
258
+ ```
259
+
260
+ The `uniprot_details.csv` looks like:
261
+
262
+ ```csv
263
+ uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
264
+ A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
265
+ ```
266
+
267
+ ### Convert structure files to .cif format
268
+
269
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
270
+
271
+ ```shell
272
+ protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
273
+ ```
274
+
275
+ ### Convert structure files to UniProt accessions
276
+
277
+ After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
278
+
279
+ ```shell
280
+ protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
281
+ ```
282
+
207
283
  ## Model Context Protocol (MCP) server
208
284
 
209
285
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -224,6 +300,26 @@ protein-quest mcp
224
300
 
225
301
  The mcp server contains an prompt template to search/retrieve/filter candidate structures.
226
302
 
303
+ ## Shell autocompletion
304
+
305
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
306
+
307
+ Initialize for bash shell with:
308
+
309
+ ```shell
310
+ mkdir -p ~/.local/share/bash-completion/completions
311
+ protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
312
+ ```
313
+
314
+ Initialize for zsh shell with:
315
+
316
+ ```shell
317
+ mkdir -p ~/.local/share/zsh/site-functions
318
+ protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
319
+ fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
320
+ autoload -Uz compinit && compinit
321
+ ```
322
+
227
323
  ## Contributing
228
324
 
229
325
  For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).
@@ -17,6 +17,10 @@ It uses
17
17
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
18
18
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
19
19
 
20
+ The package is used by
21
+
22
+ - [protein-detective](https://github.com/haddocking/protein-detective)
23
+
20
24
  An example workflow:
21
25
 
22
26
  ```mermaid
@@ -26,19 +30,29 @@ graph TB;
26
30
  searchuniprot --> |uniprot_accessions|searchpdbe[/Search PDBe/]
27
31
  searchuniprot --> |uniprot_accessions|searchaf[/Search Alphafold/]
28
32
  searchuniprot -. uniprot_accessions .-> searchemdb[/Search EMDB/]
33
+ searchuniprot -. uniprot_accessions .-> searchuniprotdetails[/Search UniProt details/]
34
+ searchintactionpartners[/Search interaction partners/] -.-x |uniprot_accessions|searchuniprot
35
+ searchcomplexes[/Search complexes/]
29
36
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
30
37
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
31
38
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
32
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
39
+ fetchpdbe -->|mmcif_files| chainfilter{{Filter on chain of uniprot}}
33
40
  chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
34
- fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
41
+ fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
35
42
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
36
43
  residuefilter --> |mmcif_files| ssfilter
44
+ ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
45
+ ssfilter -. mmcif_files .-> convert2uniprot_accessions([Convert to UniProt accessions])
37
46
  classDef dashedBorder stroke-dasharray: 5 5;
38
47
  goterm:::dashedBorder
39
48
  taxonomy:::dashedBorder
40
49
  searchemdb:::dashedBorder
41
50
  fetchemdb:::dashedBorder
51
+ searchintactionpartners:::dashedBorder
52
+ searchcomplexes:::dashedBorder
53
+ searchuniprotdetails:::dashedBorder
54
+ convert2cif:::dashedBorder
55
+ convert2uniprot_accessions:::dashedBorder
42
56
  ```
43
57
 
44
58
  (Dotted nodes and edges are side-quests.)
@@ -60,13 +74,16 @@ The main entry point is the `protein-quest` command line tool which has multiple
60
74
 
61
75
  To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
62
76
 
77
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
78
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
79
+
63
80
  ### Search Uniprot accessions
64
81
 
65
82
  ```shell
66
83
  protein-quest search uniprot \
67
84
  --taxon-id 9606 \
68
85
  --reviewed \
69
- --subcellular-location-uniprot nucleus \
86
+ --subcellular-location-uniprot "nucleus" \
70
87
  --subcellular-location-go GO:0005634 \
71
88
  --molecular-function-go GO:0003677 \
72
89
  --limit 100 \
@@ -106,7 +123,7 @@ protein-quest retrieve pdbe pdbe.csv downloads-pdbe/
106
123
  protein-quest retrieve alphafold alphafold.csv downloads-af/
107
124
  ```
108
125
 
109
- For each entry downloads the summary.json and cif file.
126
+ For each entry downloads the cif file.
110
127
 
111
128
  ### To retrieve EMDB volume files
112
129
 
@@ -149,7 +166,7 @@ protein-quest filter residue \
149
166
 
150
167
  ### To filter on secondary structure
151
168
 
152
- To filter on structure being mostly alpha helices and have no beta sheets.
169
+ To filter on structure being mostly alpha helices and have no beta sheets. See the following [notebook](https://www.bonvinlab.org/protein-detective/SSE_elements.html) to determine the ratio of secondary structure elements.
153
170
 
154
171
  ```shell
155
172
  protein-quest filter secondary-structure \
@@ -174,6 +191,63 @@ You can use following command to search for a Gene Ontology (GO) term.
174
191
  protein-quest search go --limit 5 --aspect cellular_component apoptosome -
175
192
  ```
176
193
 
194
+ ### Search for interaction partners
195
+
196
+ Use https://www.ebi.ac.uk/complexportal to find interaction partners of given UniProt accession.
197
+
198
+ ```shell
199
+ protein-quest search interaction-partners Q05471 interaction-partners-of-Q05471.txt
200
+ ```
201
+
202
+ The `interaction-partners-of-Q05471.txt` file contains uniprot accessions (one per line).
203
+
204
+ ### Search for complexes
205
+
206
+ Given Uniprot accessions search for macromolecular complexes at https://www.ebi.ac.uk/complexportal
207
+ and return the complex entries and their members.
208
+
209
+ ```shell
210
+ echo Q05471 | protein-quest search complexes - complexes.csv
211
+ ```
212
+
213
+ The `complexes.csv` looks like
214
+
215
+ ```csv
216
+ query_protein,complex_id,complex_url,complex_title,members
217
+ Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
218
+ ```
219
+
220
+ ### Search for UniProt details
221
+
222
+ To get details (like protein name, sequence length, organism) for a list of UniProt accessions.
223
+
224
+ ```shell
225
+ protein-quest search uniprot-details uniprot_accs.txt uniprot_details.csv
226
+ ```
227
+
228
+ The `uniprot_details.csv` looks like:
229
+
230
+ ```csv
231
+ uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
232
+ A0A087WUV0,ZN892_HUMAN,522,True,Zinc finger protein 892,9606,Homo sapiens
233
+ ```
234
+
235
+ ### Convert structure files to .cif format
236
+
237
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
238
+
239
+ ```shell
240
+ protein-quest convert structures --format cif --output-dir ./filtered-cif ./filtered-ss
241
+ ```
242
+
243
+ ### Convert structure files to UniProt accessions
244
+
245
+ After running some filters you might want to know which UniProt accessions are still present in the filtered structures.
246
+
247
+ ```shell
248
+ protein-quest convert uniprot ./filtered-ss uniprot_accs.filtered.txt
249
+ ```
250
+
177
251
  ## Model Context Protocol (MCP) server
178
252
 
179
253
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -194,6 +268,26 @@ protein-quest mcp
194
268
 
195
269
  The mcp server contains an prompt template to search/retrieve/filter candidate structures.
196
270
 
271
+ ## Shell autocompletion
272
+
273
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
274
+
275
+ Initialize for bash shell with:
276
+
277
+ ```shell
278
+ mkdir -p ~/.local/share/bash-completion/completions
279
+ protein-quest --print-completion bash > ~/.local/share/bash-completion/completions/protein-quest
280
+ ```
281
+
282
+ Initialize for zsh shell with:
283
+
284
+ ```shell
285
+ mkdir -p ~/.local/share/zsh/site-functions
286
+ protein-quest --print-completion zsh > ~/.local/share/zsh/site-functions/_protein-quest
287
+ fpath=("$HOME/.local/share/zsh/site-functions" $fpath)
288
+ autoload -Uz compinit && compinit
289
+ ```
290
+
197
291
  ## Contributing
198
292
 
199
293
  For development information and contribution guidelines, please see [CONTRIBUTING.md](CONTRIBUTING.md).