protein-quest 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

Files changed (67) hide show
  1. {protein_quest-0.3.0 → protein_quest-0.3.1}/.github/workflows/ci.yml +16 -1
  2. {protein_quest-0.3.0 → protein_quest-0.3.1}/CITATION.cff +1 -2
  3. {protein_quest-0.3.0 → protein_quest-0.3.1}/CONTRIBUTING.md +15 -1
  4. {protein_quest-0.3.0 → protein_quest-0.3.1}/PKG-INFO +4 -8
  5. {protein_quest-0.3.0 → protein_quest-0.3.1}/README.md +2 -3
  6. protein_quest-0.3.1/docs/notebooks/.gitignore +4 -0
  7. protein_quest-0.3.1/docs/notebooks/alphafold.ipynb +384 -0
  8. protein_quest-0.3.1/docs/notebooks/index.md +3 -0
  9. protein_quest-0.3.1/docs/notebooks/pdbe.ipynb +278 -0
  10. protein_quest-0.3.1/docs/notebooks/uniprot.ipynb +308 -0
  11. {protein_quest-0.3.0 → protein_quest-0.3.1}/mkdocs.yml +13 -4
  12. {protein_quest-0.3.0 → protein_quest-0.3.1}/pyproject.toml +12 -16
  13. protein_quest-0.3.1/src/protein_quest/__version__.py +2 -0
  14. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/alphafold/confidence.py +2 -2
  15. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/alphafold/entry_summary.py +11 -9
  16. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/alphafold/fetch.py +37 -61
  17. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/cli.py +35 -18
  18. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/filters.py +43 -32
  19. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/mcp_server.py +4 -5
  20. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/parallel.py +37 -1
  21. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/pdbe/fetch.py +15 -1
  22. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/pdbe/io.py +25 -10
  23. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/taxonomy.py +12 -0
  24. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/utils.py +38 -3
  25. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/alphafold/test_entry_summary.py +1 -4
  26. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/alphafold/test_fetch.py +1 -1
  27. protein_quest-0.3.1/tests/pdbe/test_fetch.py +29 -0
  28. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/pdbe/test_io.py +31 -5
  29. {protein_quest-0.3.0 → protein_quest-0.3.1}/uv.lock +76 -695
  30. protein_quest-0.3.0/src/protein_quest/__version__.py +0 -1
  31. protein_quest-0.3.0/tests/pdbe/test_fetch.py +0 -17
  32. {protein_quest-0.3.0 → protein_quest-0.3.1}/.github/workflows/pages.yml +0 -0
  33. {protein_quest-0.3.0 → protein_quest-0.3.1}/.github/workflows/pypi-publish.yml +0 -0
  34. {protein_quest-0.3.0 → protein_quest-0.3.1}/.gitignore +0 -0
  35. {protein_quest-0.3.0 → protein_quest-0.3.1}/.vscode/extensions.json +0 -0
  36. {protein_quest-0.3.0 → protein_quest-0.3.1}/CODE_OF_CONDUCT.md +0 -0
  37. {protein_quest-0.3.0 → protein_quest-0.3.1}/LICENSE +0 -0
  38. {protein_quest-0.3.0 → protein_quest-0.3.1}/docs/CONTRIBUTING.md +0 -0
  39. {protein_quest-0.3.0 → protein_quest-0.3.1}/docs/cli_doc_hook.py +0 -0
  40. {protein_quest-0.3.0 → protein_quest-0.3.1}/docs/index.md +0 -0
  41. {protein_quest-0.3.0 → protein_quest-0.3.1}/docs/protein-quest-mcp.png +0 -0
  42. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/__init__.py +0 -0
  43. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/alphafold/__init__.py +0 -0
  44. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/emdb.py +0 -0
  45. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/go.py +0 -0
  46. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/pdbe/__init__.py +0 -0
  47. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/py.typed +0 -0
  48. {protein_quest-0.3.0 → protein_quest-0.3.1}/src/protein_quest/uniprot.py +0 -0
  49. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  50. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  51. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/alphafold/test_confidence.py +0 -0
  52. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  53. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  54. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  55. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  56. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  57. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  58. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  59. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  60. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  61. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/pdbe/fixtures/2y29.cif +0 -0
  62. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_cli.py +0 -0
  63. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_emdb.py +0 -0
  64. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_go.py +0 -0
  65. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_mcp.py +0 -0
  66. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_taxonomy.py +0 -0
  67. {protein_quest-0.3.0 → protein_quest-0.3.1}/tests/test_uniprot.py +0 -0
@@ -3,7 +3,7 @@ name: CI
3
3
  on:
4
4
  push:
5
5
  branches:
6
- - main
6
+ - main
7
7
  pull_request:
8
8
 
9
9
  concurrency:
@@ -70,3 +70,18 @@ jobs:
70
70
  run: uv sync --locked --dev --extra mcp
71
71
  - name: Run type checkers
72
72
  run: uv run pyrefly check src tests
73
+ typing-docs:
74
+ name: typing-docs
75
+ runs-on: ubuntu-latest
76
+ steps:
77
+ - uses: actions/checkout@v4
78
+ - name: Install uv
79
+ uses: astral-sh/setup-uv@v6
80
+ - name: Install the project
81
+ run: uv sync --group docs-type
82
+ - name: Convert notebooks to Python scripts
83
+ run: |
84
+ find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
85
+ - name: Run type checkers on docs
86
+ run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
87
+
@@ -23,5 +23,4 @@ repository-code: https://github.com/haddocking/protein-quest
23
23
  identifiers:
24
24
  - description: Latest version of software
25
25
  type: doi
26
- # TODO update once release has been made
27
- value: 10.5281/zenodo.15632658
26
+ value: 10.5281/zenodo.16941288
@@ -82,9 +82,23 @@ uv run mkdocs build
82
82
  python3 -m http.server -d site
83
83
  ```
84
84
 
85
+ <details>
86
+ <summary>Type checking notebooks</summary>
87
+
88
+ [Pyrefly](https://pyrefly.org/) does not support notebooks yet, so we need to convert them to python scripts and then run pyrefly on them.
89
+
90
+ ```shell
91
+ find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
92
+ uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
93
+ rm docs/notebooks/*.ipynb.py
94
+ ```
95
+
96
+ </details>
97
+
98
+
85
99
  ## Contributing to tests
86
100
 
87
- The code coverage are stored at https://app.codacy.com/gh/haddocking/protein-quest/coverage .
101
+ The code coverage is stored at [https://app.codacy.com/gh/haddocking/protein-quest/coverage](https://app.codacy.com/gh/haddocking/protein-quest/coverage) .
88
102
 
89
103
  The search functions of the protein-quest package talk to web services on the Internet.
90
104
  To have fast tests we use [pytest-recording](https://github.com/kiwicom/pytest-recording) to record and replay HTTP interactions.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -13,19 +13,16 @@ Requires-Dist: aiohttp-retry>=2.9.1
13
13
  Requires-Dist: aiohttp[speedups]>=3.11.18
14
14
  Requires-Dist: aiopath>=0.7.7
15
15
  Requires-Dist: attrs>=25.3.0
16
- Requires-Dist: bokeh>=3.7.3
17
16
  Requires-Dist: cattrs[orjson]>=24.1.3
18
17
  Requires-Dist: dask>=2025.5.1
19
18
  Requires-Dist: distributed>=2025.5.1
20
19
  Requires-Dist: gemmi>=0.7.3
21
- Requires-Dist: molviewspec>=1.6.0
22
- Requires-Dist: pandas>=2.3.0
23
- Requires-Dist: platformdirs>=4.3.8
24
20
  Requires-Dist: psutil>=7.0.0
25
21
  Requires-Dist: rich-argparse>=1.7.1
26
22
  Requires-Dist: rich>=14.0.0
27
23
  Requires-Dist: sparqlwrapper>=2.0.0
28
24
  Requires-Dist: tqdm>=4.67.1
25
+ Requires-Dist: yarl>=1.20.1
29
26
  Provides-Extra: mcp
30
27
  Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
31
28
  Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
@@ -37,8 +34,7 @@ Description-Content-Type: text/markdown
37
34
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
38
35
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
39
36
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
40
- <!-- TODO replace with correct zenodo id -->
41
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
37
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
42
38
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
43
39
 
44
40
  Python package to search/retrieve/filter proteins and protein structures.
@@ -90,7 +86,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
90
86
 
91
87
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
92
88
 
93
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
89
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
94
90
 
95
91
  ### Search Uniprot accessions
96
92
 
@@ -4,8 +4,7 @@
4
4
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
5
5
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
6
6
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
7
- <!-- TODO replace with correct zenodo id -->
8
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
7
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
9
8
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
10
9
 
11
10
  Python package to search/retrieve/filter proteins and protein structures.
@@ -57,7 +56,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
57
56
 
58
57
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
59
58
 
60
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
59
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
61
60
 
62
61
  ### Search Uniprot accessions
63
62
 
@@ -0,0 +1,4 @@
1
+ pdb_files/
2
+ alphafold_files/
3
+ filtered/
4
+ session/
@@ -0,0 +1,384 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "24b1926c",
6
+ "metadata": {},
7
+ "source": [
8
+ "# AlphaFold\n",
9
+ "\n",
10
+ "You can download and filter AlphaFold files on confidence."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 1,
16
+ "id": "681ba946",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "# Generic imports\n",
21
+ "import logging\n",
22
+ "from pathlib import Path\n",
23
+ "from pprint import pprint\n",
24
+ "\n",
25
+ "logging.basicConfig(level=logging.WARNING)\n",
26
+ "# Set to WARNING to see only warnings\n",
27
+ "# Set to INFO to see sparql queries\n",
28
+ "# Set to DEBUG to see raw results"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "markdown",
33
+ "id": "4959258c",
34
+ "metadata": {},
35
+ "source": [
36
+ "\n",
37
+ "## Download Alphafold files"
38
+ ]
39
+ },
40
+ {
41
+ "cell_type": "code",
42
+ "execution_count": 2,
43
+ "id": "81e449db",
44
+ "metadata": {},
45
+ "outputs": [],
46
+ "source": [
47
+ "from protein_quest.alphafold.fetch import fetch_many_async"
48
+ ]
49
+ },
50
+ {
51
+ "cell_type": "code",
52
+ "execution_count": 3,
53
+ "id": "5c2e6ee3",
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "save_dir = Path(\"alphafold_files\")"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "markdown",
62
+ "id": "f38991cf",
63
+ "metadata": {},
64
+ "source": [
65
+ "To download the summary, the cif, predicted Aligned error document (peaDoc) and the pdb file for 3 AlphaFold entries given their uniprot accessions.\n"
66
+ ]
67
+ },
68
+ {
69
+ "cell_type": "code",
70
+ "execution_count": 8,
71
+ "id": "e32b474a",
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "name": "stderr",
76
+ "output_type": "stream",
77
+ "text": [
78
+ "Fetching Alphafold summaries: 100%|██████████| 3/3 [00:00<00:00, 8.07it/s]\n",
79
+ "Downloading AlphaFold files: 100%|██████████| 9/9 [00:00<00:00, 55.82it/s]"
80
+ ]
81
+ },
82
+ {
83
+ "name": "stdout",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "[AlphaFoldEntry(uniprot_acc='A1YPR0',\n",
87
+ " summary=EntrySummary(entryId='AF-A1YPR0-F1',\n",
88
+ " uniprotAccession='A1YPR0',\n",
89
+ " uniprotId='ZBT7C_HUMAN',\n",
90
+ " uniprotDescription='Zinc finger and BTB '\n",
91
+ " 'domain-containing '\n",
92
+ " 'protein 7C',\n",
93
+ " taxId=9606,\n",
94
+ " organismScientificName='Homo sapiens',\n",
95
+ " uniprotStart=1,\n",
96
+ " uniprotEnd=619,\n",
97
+ " uniprotSequence='MANDIDELIGIPFPNHSSEVLCSLNEQRHDGLLCDVLLVVQEQEYRTHRSVLAACSKYFKKLFTAGTLASQPYVYEIDFVQPEALAAILEFAYTSTLTITAGNVKHILNAARMLEIQCIVNVCLEIMEPGGDGGEEDDKEDDDDDEDDDDEEDEEEEEEEEEDDDDDTEDFADQENLPDPQDISCHQSPSKTDHLTEKAYSDTPRDFPDSFQAGSPGHLGVIRDFSIESLLRENLYPKANIPDRRPSLSPFAPDFFPHLWPGDFGAFAQLPEQPMDSGPLDLVIKNRKIKEEEKEELPPPPPPPFPNDFFKDMFPDLPGGPLGPIKAENDYGAYLNFLSATHLGGLFPPWPLVEERKLKPKASQQCPICHKVIMGAGKLPRHMRTHTGEKPYMCTICEVRFTRQDKLKIHMRKHTGERPYLCIHCNAKFVHNYDLKNHMRIHTGVRPYQCEFCYKSFTRSDHLHRHIKRQSCRMARPRRGRKPAAWRAASLLFGPGGPAPDKAAFVMPPALGEVGGHLGGAAVCLPGPSPAKHFLAAPKGALSLQELERQFEETQMKLFGRAQLEAERNAGGLLAFALAENVAAARPYFPLPDPWAAGLAGLPGLAGLNHVASMSEANN',\n",
98
+ " modelCreatedDate='2022-06-01T00:00:00Z',\n",
99
+ " latestVersion=4,\n",
100
+ " allVersions=[1, 2, 3, 4],\n",
101
+ " bcifUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-model_v4.bcif',\n",
102
+ " cifUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-model_v4.cif',\n",
103
+ " pdbUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-model_v4.pdb',\n",
104
+ " paeImageUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-predicted_aligned_error_v4.png',\n",
105
+ " paeDocUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-predicted_aligned_error_v4.json',\n",
106
+ " gene='ZBTB7C',\n",
107
+ " sequenceChecksum='73D82A34502B55BF',\n",
108
+ " sequenceVersionDate='2007-02-06T00:00:00Z',\n",
109
+ " amAnnotationsUrl='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-aa-substitutions.csv',\n",
110
+ " amAnnotationsHg19Url='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-hg19.csv',\n",
111
+ " amAnnotationsHg38Url='https://alphafold.ebi.ac.uk/files/AF-A1YPR0-F1-hg38.csv',\n",
112
+ " isReviewed=True,\n",
113
+ " isReferenceProteome=True),\n",
114
+ " bcif_file=None,\n",
115
+ " cif_file=PosixPath('alphafold_files/AF-A1YPR0-F1-model_v4.cif'),\n",
116
+ " pdb_file=PosixPath('alphafold_files/AF-A1YPR0-F1-model_v4.pdb'),\n",
117
+ " pae_image_file=None,\n",
118
+ " pae_doc_file=PosixPath('alphafold_files/AF-A1YPR0-F1-predicted_aligned_error_v4.json'),\n",
119
+ " am_annotations_file=None,\n",
120
+ " am_annotations_hg19_file=None,\n",
121
+ " am_annotations_hg38_file=None),\n",
122
+ " AlphaFoldEntry(uniprot_acc='O60481',\n",
123
+ " summary=EntrySummary(entryId='AF-O60481-F1',\n",
124
+ " uniprotAccession='O60481',\n",
125
+ " uniprotId='ZIC3_HUMAN',\n",
126
+ " uniprotDescription='Zinc finger protein '\n",
127
+ " 'ZIC 3',\n",
128
+ " taxId=9606,\n",
129
+ " organismScientificName='Homo sapiens',\n",
130
+ " uniprotStart=1,\n",
131
+ " uniprotEnd=467,\n",
132
+ " uniprotSequence='MTMLLDGGPQFPGLGVGSFGAPRHHEMPNREPAGMGLNPFGDSTHAAAAAAAAAAFKLSPAAAHDLSSGQSSAFTPQGSGYANALGHHHHHHHHHHHTSQVPSYGGAASAAFNSTREFLFRQRSSGLSEAASGGGQHGLFAGSASSLHAPAGIPEPPSYLLFPGLHEQGAGHPSPTGHVDNNQVHLGLRGELFGRADPYRPVASPRTDPYAAGAQFPNYSPMNMNMGVNVAAHHGPGAFFRYMRQPIKQELSCKWIDEAQLSRPKKSCDRTFSTMHELVTHVTMEHVGGPEQNNHVCYWEECPREGKSFKAKYKLVNHIRVHTGEKPFPCPFPGCGKIFARSENLKIHKRTHTGEKPFKCEFEGCDRRFANSSDRKKHMHVHTSDKPYICKVCDKSYTHPSSLRKHMKVHESQGSDSSPAASSGYESSTPPAIASANSKDTTKTPSAVQTSTSHNPGLPPNFNEWYV',\n",
133
+ " modelCreatedDate='2022-06-01T00:00:00Z',\n",
134
+ " latestVersion=4,\n",
135
+ " allVersions=[1, 2, 3, 4],\n",
136
+ " bcifUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-model_v4.bcif',\n",
137
+ " cifUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-model_v4.cif',\n",
138
+ " pdbUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-model_v4.pdb',\n",
139
+ " paeImageUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-predicted_aligned_error_v4.png',\n",
140
+ " paeDocUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-predicted_aligned_error_v4.json',\n",
141
+ " gene='ZIC3',\n",
142
+ " sequenceChecksum='3150CF13C0679568',\n",
143
+ " sequenceVersionDate='1998-08-01T00:00:00Z',\n",
144
+ " amAnnotationsUrl='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-aa-substitutions.csv',\n",
145
+ " amAnnotationsHg19Url='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-hg19.csv',\n",
146
+ " amAnnotationsHg38Url='https://alphafold.ebi.ac.uk/files/AF-O60481-F1-hg38.csv',\n",
147
+ " isReviewed=True,\n",
148
+ " isReferenceProteome=True),\n",
149
+ " bcif_file=None,\n",
150
+ " cif_file=PosixPath('alphafold_files/AF-O60481-F1-model_v4.cif'),\n",
151
+ " pdb_file=PosixPath('alphafold_files/AF-O60481-F1-model_v4.pdb'),\n",
152
+ " pae_image_file=None,\n",
153
+ " pae_doc_file=PosixPath('alphafold_files/AF-O60481-F1-predicted_aligned_error_v4.json'),\n",
154
+ " am_annotations_file=None,\n",
155
+ " am_annotations_hg19_file=None,\n",
156
+ " am_annotations_hg38_file=None),\n",
157
+ " AlphaFoldEntry(uniprot_acc='P50613',\n",
158
+ " summary=EntrySummary(entryId='AF-P50613-F1',\n",
159
+ " uniprotAccession='P50613',\n",
160
+ " uniprotId='CDK7_HUMAN',\n",
161
+ " uniprotDescription='Cyclin-dependent '\n",
162
+ " 'kinase 7',\n",
163
+ " taxId=9606,\n",
164
+ " organismScientificName='Homo sapiens',\n",
165
+ " uniprotStart=1,\n",
166
+ " uniprotEnd=346,\n",
167
+ " uniprotSequence='MALDVKSRAKRYEKLDFLGEGQFATVYKARDKNTNQIVAIKKIKLGHRSEAKDGINRTALREIKLLQELSHPNIIGLLDAFGHKSNISLVFDFMETDLEVIIKDNSLVLTPSHIKAYMLMTLQGLEYLHQHWILHRDLKPNNLLLDENGVLKLADFGLAKSFGSPNRAYTHQVVTRWYRAPELLFGARMYGVGVDMWAVGCILAELLLRVPFLPGDSDLDQLTRIFETLGTPTEEQWPDMCSLPDYVTFKSFPGIPLHHIFSAAGDDLLDLIQGLFLFNPCARITATQALKMKYFSNRPGPTPGCQLPRPNCPVETLKEQSNPALAIKRKRTEALEQGGLPKKLIF',\n",
168
+ " modelCreatedDate='2022-06-01T00:00:00Z',\n",
169
+ " latestVersion=4,\n",
170
+ " allVersions=[1, 2, 3, 4],\n",
171
+ " bcifUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-model_v4.bcif',\n",
172
+ " cifUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-model_v4.cif',\n",
173
+ " pdbUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-model_v4.pdb',\n",
174
+ " paeImageUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-predicted_aligned_error_v4.png',\n",
175
+ " paeDocUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-predicted_aligned_error_v4.json',\n",
176
+ " gene='CDK7',\n",
177
+ " sequenceChecksum='0A94BFA7DD416CEB',\n",
178
+ " sequenceVersionDate='1996-10-01T00:00:00Z',\n",
179
+ " amAnnotationsUrl='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-aa-substitutions.csv',\n",
180
+ " amAnnotationsHg19Url='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-hg19.csv',\n",
181
+ " amAnnotationsHg38Url='https://alphafold.ebi.ac.uk/files/AF-P50613-F1-hg38.csv',\n",
182
+ " isReviewed=True,\n",
183
+ " isReferenceProteome=True),\n",
184
+ " bcif_file=None,\n",
185
+ " cif_file=PosixPath('alphafold_files/AF-P50613-F1-model_v4.cif'),\n",
186
+ " pdb_file=PosixPath('alphafold_files/AF-P50613-F1-model_v4.pdb'),\n",
187
+ " pae_image_file=None,\n",
188
+ " pae_doc_file=PosixPath('alphafold_files/AF-P50613-F1-predicted_aligned_error_v4.json'),\n",
189
+ " am_annotations_file=None,\n",
190
+ " am_annotations_hg19_file=None,\n",
191
+ " am_annotations_hg38_file=None)]\n"
192
+ ]
193
+ },
194
+ {
195
+ "name": "stderr",
196
+ "output_type": "stream",
197
+ "text": [
198
+ "\n"
199
+ ]
200
+ }
201
+ ],
202
+ "source": [
203
+ "summaries = [s async for s in fetch_many_async([\"A1YPR0\", \"O60481\", \"P50613\"], save_dir, what={\"pdb\", \"cif\", \"paeDoc\"})]\n",
204
+ "pprint(summaries)"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": 9,
210
+ "id": "2d3595e6",
211
+ "metadata": {},
212
+ "outputs": [
213
+ {
214
+ "name": "stdout",
215
+ "output_type": "stream",
216
+ "text": [
217
+ "total 4.2M\n",
218
+ "4.0K A1YPR0.json\n",
219
+ "548K AF-A1YPR0-F1-model_v4.cif\n",
220
+ "392K AF-A1YPR0-F1-model_v4.pdb\n",
221
+ "1.1M AF-A1YPR0-F1-predicted_aligned_error_v4.json\n",
222
+ "408K AF-O60481-F1-model_v4.cif\n",
223
+ "292K AF-O60481-F1-model_v4.pdb\n",
224
+ "632K AF-O60481-F1-predicted_aligned_error_v4.json\n",
225
+ "320K AF-P50613-F1-model_v4.cif\n",
226
+ "224K AF-P50613-F1-model_v4.pdb\n",
227
+ "280K AF-P50613-F1-predicted_aligned_error_v4.json\n",
228
+ "4.0K O60481.json\n",
229
+ "4.0K P50613.json\n"
230
+ ]
231
+ }
232
+ ],
233
+ "source": [
234
+ "!ls -sh {save_dir}"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "markdown",
239
+ "id": "a43edd87",
240
+ "metadata": {},
241
+ "source": [
242
+ "## Filter AlphFold structure files on confidence\n",
243
+ "\n",
244
+ "Filter AlphaFold mmcif/PDB files by confidence (plDDT). Passed files are written with residues below threshold removed."
245
+ ]
246
+ },
247
+ {
248
+ "cell_type": "code",
249
+ "execution_count": 10,
250
+ "id": "cc96c63a",
251
+ "metadata": {},
252
+ "outputs": [],
253
+ "source": [
254
+ "from protein_quest.alphafold.confidence import ConfidenceFilterQuery, filter_files_on_confidence"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "markdown",
259
+ "id": "724141d4",
260
+ "metadata": {},
261
+ "source": [
262
+ "Take one of the downloaded files"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 12,
268
+ "id": "73a61cf6",
269
+ "metadata": {},
270
+ "outputs": [
271
+ {
272
+ "data": {
273
+ "text/plain": [
274
+ "[PosixPath('alphafold_files/AF-A1YPR0-F1-model_v4.cif'),\n",
275
+ " PosixPath('alphafold_files/AF-O60481-F1-model_v4.cif'),\n",
276
+ " PosixPath('alphafold_files/AF-P50613-F1-model_v4.cif')]"
277
+ ]
278
+ },
279
+ "execution_count": 12,
280
+ "metadata": {},
281
+ "output_type": "execute_result"
282
+ }
283
+ ],
284
+ "source": [
285
+ "input_files = [entry.cif_file for entry in summaries if entry.cif_file is not None]\n",
286
+ "input_files"
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "markdown",
291
+ "id": "da8f2f67",
292
+ "metadata": {},
293
+ "source": [
294
+ "We only write a filtered cif file when in the input file there are between 100 and 1000 residues that have a pLDDT score above 50."
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "id": "fbfdf472",
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "query = ConfidenceFilterQuery(confidence=80, min_threshold=100, max_threshold=1000)"
305
+ ]
306
+ },
307
+ {
308
+ "cell_type": "code",
309
+ "execution_count": 14,
310
+ "id": "152aec9a",
311
+ "metadata": {},
312
+ "outputs": [],
313
+ "source": [
314
+ "output_dir = Path(\"./filtered\")\n",
315
+ "output_dir.mkdir(exist_ok=True)\n",
316
+ "result = filter_files_on_confidence(input_files, query, output_dir)"
317
+ ]
318
+ },
319
+ {
320
+ "cell_type": "code",
321
+ "execution_count": 17,
322
+ "id": "6a6f8e3f",
323
+ "metadata": {},
324
+ "outputs": [
325
+ {
326
+ "data": {
327
+ "text/plain": [
328
+ "[ConfidenceFilterResult(input_file='AF-A1YPR0-F1-model_v4.cif', count=175, filtered_file=PosixPath('filtered/AF-A1YPR0-F1-model_v4.cif')),\n",
329
+ " ConfidenceFilterResult(input_file='AF-O60481-F1-model_v4.cif', count=76, filtered_file=None),\n",
330
+ " ConfidenceFilterResult(input_file='AF-P50613-F1-model_v4.cif', count=244, filtered_file=PosixPath('filtered/AF-P50613-F1-model_v4.cif'))]"
331
+ ]
332
+ },
333
+ "execution_count": 17,
334
+ "metadata": {},
335
+ "output_type": "execute_result"
336
+ }
337
+ ],
338
+ "source": [
339
+ "list(\n",
340
+ " filter_files_on_confidence(\n",
341
+ " input_files, ConfidenceFilterQuery(confidence=80, min_threshold=100, max_threshold=1000), output_dir\n",
342
+ " )\n",
343
+ ")"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "markdown",
348
+ "id": "0fe1e388",
349
+ "metadata": {},
350
+ "source": [
351
+ "2 files have passed, but 1 file only has 75 high confidence residues so it is discarded."
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": null,
357
+ "id": "83ffc09b",
358
+ "metadata": {},
359
+ "outputs": [],
360
+ "source": []
361
+ }
362
+ ],
363
+ "metadata": {
364
+ "kernelspec": {
365
+ "display_name": "protein-quest",
366
+ "language": "python",
367
+ "name": "python3"
368
+ },
369
+ "language_info": {
370
+ "codemirror_mode": {
371
+ "name": "ipython",
372
+ "version": 3
373
+ },
374
+ "file_extension": ".py",
375
+ "mimetype": "text/x-python",
376
+ "name": "python",
377
+ "nbconvert_exporter": "python",
378
+ "pygments_lexer": "ipython3",
379
+ "version": "3.13.2"
380
+ }
381
+ },
382
+ "nbformat": 4,
383
+ "nbformat_minor": 5
384
+ }
@@ -0,0 +1,3 @@
1
+ # Example notebooks
2
+
3
+ The Jupyter notebooks show how to use the protein-quest package via its API.