protein-quest 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- protein_quest/__version__.py +2 -1
- protein_quest/alphafold/confidence.py +44 -17
- protein_quest/alphafold/entry_summary.py +11 -9
- protein_quest/alphafold/fetch.py +37 -63
- protein_quest/cli.py +187 -30
- protein_quest/converter.py +45 -0
- protein_quest/filters.py +78 -35
- protein_quest/go.py +1 -4
- protein_quest/mcp_server.py +8 -5
- protein_quest/parallel.py +37 -1
- protein_quest/pdbe/fetch.py +15 -1
- protein_quest/pdbe/io.py +142 -46
- protein_quest/ss.py +264 -0
- protein_quest/taxonomy.py +13 -3
- protein_quest/utils.py +65 -3
- {protein_quest-0.3.0.dist-info → protein_quest-0.3.2.dist-info}/METADATA +21 -11
- protein_quest-0.3.2.dist-info/RECORD +26 -0
- protein_quest-0.3.0.dist-info/RECORD +0 -24
- {protein_quest-0.3.0.dist-info → protein_quest-0.3.2.dist-info}/WHEEL +0 -0
- {protein_quest-0.3.0.dist-info → protein_quest-0.3.2.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.3.0.dist-info → protein_quest-0.3.2.dist-info}/licenses/LICENSE +0 -0
protein_quest/utils.py
CHANGED
|
@@ -2,20 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import logging
|
|
5
|
-
|
|
5
|
+
import shutil
|
|
6
|
+
from collections.abc import Coroutine, Iterable
|
|
6
7
|
from contextlib import asynccontextmanager
|
|
7
8
|
from pathlib import Path
|
|
9
|
+
from textwrap import dedent
|
|
10
|
+
from typing import Any, Literal, get_args
|
|
8
11
|
|
|
9
12
|
import aiofiles
|
|
10
13
|
import aiohttp
|
|
11
14
|
from aiohttp_retry import ExponentialRetry, RetryClient
|
|
12
15
|
from tqdm.asyncio import tqdm
|
|
16
|
+
from yarl import URL
|
|
13
17
|
|
|
14
18
|
logger = logging.getLogger(__name__)
|
|
15
19
|
|
|
16
20
|
|
|
17
21
|
async def retrieve_files(
|
|
18
|
-
urls: Iterable[tuple[str, str]],
|
|
22
|
+
urls: Iterable[tuple[URL | str, str]],
|
|
19
23
|
save_dir: Path,
|
|
20
24
|
max_parallel_downloads: int = 5,
|
|
21
25
|
retries: int = 3,
|
|
@@ -45,7 +49,7 @@ async def retrieve_files(
|
|
|
45
49
|
|
|
46
50
|
async def _retrieve_file(
|
|
47
51
|
session: RetryClient,
|
|
48
|
-
url: str,
|
|
52
|
+
url: URL | str,
|
|
49
53
|
save_path: Path,
|
|
50
54
|
semaphore: asyncio.Semaphore,
|
|
51
55
|
ovewrite: bool = False,
|
|
@@ -103,3 +107,61 @@ async def friendly_session(retries: int = 3, total_timeout: int = 300):
|
|
|
103
107
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
104
108
|
client = RetryClient(client_session=session, retry_options=retry_options)
|
|
105
109
|
yield client
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class NestedAsyncIOLoopError(RuntimeError):
|
|
113
|
+
"""Custom error for nested async I/O loops."""
|
|
114
|
+
|
|
115
|
+
def __init__(self) -> None:
|
|
116
|
+
msg = dedent("""\
|
|
117
|
+
Can not run async method from an environment where the asyncio event loop is already running.
|
|
118
|
+
Like a Jupyter notebook.
|
|
119
|
+
|
|
120
|
+
Please use the async function directly or
|
|
121
|
+
call `import nest_asyncio; nest_asyncio.apply()` and try again.
|
|
122
|
+
""")
|
|
123
|
+
super().__init__(msg)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def run_async[R](coroutine: Coroutine[Any, Any, R]) -> R:
|
|
127
|
+
"""Run an async coroutine with nicer error.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
coroutine: The async coroutine to run.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
The result of the coroutine.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
NestedAsyncIOLoopError: If called from a nested async I/O loop like in a Jupyter notebook.
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
return asyncio.run(coroutine)
|
|
140
|
+
except RuntimeError as e:
|
|
141
|
+
raise NestedAsyncIOLoopError from e
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
CopyMethod = Literal["copy", "symlink"]
|
|
145
|
+
copy_methods = set(get_args(CopyMethod))
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def copyfile(source: Path, target: Path, copy_method: CopyMethod = "copy"):
|
|
149
|
+
"""Make target path be same file as source by either copying or symlinking.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
source: The source file to copy or symlink.
|
|
153
|
+
target: The target file to create.
|
|
154
|
+
copy_method: The method to use for copying.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
FileNotFoundError: If the source file or parent of target does not exist.
|
|
158
|
+
ValueError: If the method is not "copy" or "symlink".
|
|
159
|
+
"""
|
|
160
|
+
if copy_method == "copy":
|
|
161
|
+
shutil.copyfile(source, target)
|
|
162
|
+
elif copy_method == "symlink":
|
|
163
|
+
rel_source = source.relative_to(target.parent, walk_up=True)
|
|
164
|
+
target.symlink_to(rel_source)
|
|
165
|
+
else:
|
|
166
|
+
msg = f"Unknown method: {copy_method}"
|
|
167
|
+
raise ValueError(msg)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -13,19 +13,16 @@ Requires-Dist: aiohttp-retry>=2.9.1
|
|
|
13
13
|
Requires-Dist: aiohttp[speedups]>=3.11.18
|
|
14
14
|
Requires-Dist: aiopath>=0.7.7
|
|
15
15
|
Requires-Dist: attrs>=25.3.0
|
|
16
|
-
Requires-Dist: bokeh>=3.7.3
|
|
17
16
|
Requires-Dist: cattrs[orjson]>=24.1.3
|
|
18
17
|
Requires-Dist: dask>=2025.5.1
|
|
19
18
|
Requires-Dist: distributed>=2025.5.1
|
|
20
19
|
Requires-Dist: gemmi>=0.7.3
|
|
21
|
-
Requires-Dist: molviewspec>=1.6.0
|
|
22
|
-
Requires-Dist: pandas>=2.3.0
|
|
23
|
-
Requires-Dist: platformdirs>=4.3.8
|
|
24
20
|
Requires-Dist: psutil>=7.0.0
|
|
25
21
|
Requires-Dist: rich-argparse>=1.7.1
|
|
26
22
|
Requires-Dist: rich>=14.0.0
|
|
27
23
|
Requires-Dist: sparqlwrapper>=2.0.0
|
|
28
24
|
Requires-Dist: tqdm>=4.67.1
|
|
25
|
+
Requires-Dist: yarl>=1.20.1
|
|
29
26
|
Provides-Extra: mcp
|
|
30
27
|
Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
|
|
31
28
|
Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
|
|
@@ -37,8 +34,7 @@ Description-Content-Type: text/markdown
|
|
|
37
34
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
38
35
|
[](https://www.research-software.nl/software/protein-quest)
|
|
39
36
|
[](https://pypi.org/project/protein-quest/)
|
|
40
|
-
|
|
41
|
-
[](https://doi.org/10.5281/zenodo.15632658)
|
|
37
|
+
[](https://doi.org/10.5281/zenodo.16941288)
|
|
42
38
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
43
39
|
|
|
44
40
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
@@ -63,9 +59,11 @@ graph TB;
|
|
|
63
59
|
searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
|
|
64
60
|
searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
|
|
65
61
|
searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
|
|
66
|
-
fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{Filter on chain of uniprot}
|
|
67
|
-
chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
|
|
68
|
-
fetchad -->|pdb_files| confidencefilter{Filter out low confidence}
|
|
62
|
+
fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
|
|
63
|
+
chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
|
|
64
|
+
fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
|
|
65
|
+
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
66
|
+
residuefilter --> |mmcif_files| ssfilter
|
|
69
67
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
70
68
|
goterm:::dashedBorder
|
|
71
69
|
taxonomy:::dashedBorder
|
|
@@ -90,7 +88,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
90
88
|
|
|
91
89
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
92
90
|
|
|
93
|
-
To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
91
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
|
|
94
92
|
|
|
95
93
|
### Search Uniprot accessions
|
|
96
94
|
|
|
@@ -179,6 +177,18 @@ protein-quest filter residue \
|
|
|
179
177
|
./filtered-chains ./filtered
|
|
180
178
|
```
|
|
181
179
|
|
|
180
|
+
### To filter on secondary structure
|
|
181
|
+
|
|
182
|
+
To filter on structure being mostly alpha helices and have no beta sheets.
|
|
183
|
+
|
|
184
|
+
```shell
|
|
185
|
+
protein-quest filter secondary-structure \
|
|
186
|
+
--ratio-min-helix-residues 0.5 \
|
|
187
|
+
--ratio-max-sheet-residues 0.0 \
|
|
188
|
+
--write-stats filtered-ss/stats.csv \
|
|
189
|
+
./filtered-chains ./filtered-ss
|
|
190
|
+
```
|
|
191
|
+
|
|
182
192
|
### Search Taxonomy
|
|
183
193
|
|
|
184
194
|
```shell
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
protein_quest/__version__.py,sha256=tDIN8WjNdFKRoXsf6tArV0_n6nbcPEBWNv1zuhaRbKo,56
|
|
3
|
+
protein_quest/cli.py,sha256=k4HC282QkbAAIk614vIJgaKfkS3XD9hYj7E5hEuiDxA,37893
|
|
4
|
+
protein_quest/converter.py,sha256=tSDw7HOlC7UoWryr_G-sHGzGq8nwflzSq8o7Gv1hWuQ,1382
|
|
5
|
+
protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
|
|
6
|
+
protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
|
|
7
|
+
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
|
+
protein_quest/mcp_server.py,sha256=auftrx4aBZp1P-pBcunkPiSmXLtOIZ6MTuhUuW7yrGY,7241
|
|
9
|
+
protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
|
|
10
|
+
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
protein_quest/ss.py,sha256=MMHgqKPxjYpjyExiqslWjmyG7aeForeAeJorCYdh75g,9663
|
|
12
|
+
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
13
|
+
protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
|
|
14
|
+
protein_quest/utils.py,sha256=z4PPPcog6nvPhA93DWVf7stv5uJ4h_2BP5owdhoO5mo,5626
|
|
15
|
+
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
16
|
+
protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
|
|
17
|
+
protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
|
|
18
|
+
protein_quest/alphafold/fetch.py,sha256=iFHORaO-2NvPwmpm33tfOFUcSJx8mBGwMXxwc4bRuk8,11336
|
|
19
|
+
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
20
|
+
protein_quest/pdbe/fetch.py,sha256=tlCrWoaOrwxnQFrf-PnimUUa6lmtHwwysS51efYsBcA,2379
|
|
21
|
+
protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
|
|
22
|
+
protein_quest-0.3.2.dist-info/METADATA,sha256=wcURSjBlmkCt-ddhZX7xRYrL-7tT1VuBpJ36_mP0Iuk,7760
|
|
23
|
+
protein_quest-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
24
|
+
protein_quest-0.3.2.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
25
|
+
protein_quest-0.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
+
protein_quest-0.3.2.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
|
|
3
|
-
protein_quest/cli.py,sha256=oyDin6Z92Q17mUmTCasKgju3YUJbPu298gniNakQUwY,31121
|
|
4
|
-
protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
|
|
5
|
-
protein_quest/filters.py,sha256=GNtM1N1S1mNUqAvX7OvyhOvnUWo4qx2hMneORbc-Qz8,3797
|
|
6
|
-
protein_quest/go.py,sha256=ycV3-grxuIKFt28bFgH6iRKmt5AEGi7txoTbaAnBxQE,5684
|
|
7
|
-
protein_quest/mcp_server.py,sha256=xIaOy6sY_gW5R_oMImI2yBmbBGtZZICOxXLzOkFmm-w,7197
|
|
8
|
-
protein_quest/parallel.py,sha256=kCH6KCJYJZVoq0_Qz8ZLbHnf2OJG-h4uxd9oH2rLNKc,2201
|
|
9
|
-
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
protein_quest/taxonomy.py,sha256=kAKKZT_mOtmX8ZWNIE9i7emE23VEewkj12X7d_t3p2Y,4659
|
|
11
|
-
protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
|
|
12
|
-
protein_quest/utils.py,sha256=HUvqfsuMBIFOVFlb_QC2to_UQkiZ0_fwHLlckifuXss,3700
|
|
13
|
-
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
14
|
-
protein_quest/alphafold/confidence.py,sha256=-lbwijzVMhRd98bxwFDbSi7idiUKJ5BpOsGFrvuTEnQ,5596
|
|
15
|
-
protein_quest/alphafold/entry_summary.py,sha256=P-S8qrXkU-wwIccA1nGol1lfDkUW0Sg0th_3EU-WjN8,1187
|
|
16
|
-
protein_quest/alphafold/fetch.py,sha256=eq__PfqisuUIQBUM8KVghpiEOBGF-zXWNC6Ll_Hlz2E,11828
|
|
17
|
-
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
18
|
-
protein_quest/pdbe/fetch.py,sha256=iTyS4ucV2KZl4jTgrUFOZhsXs3cRUIuvmTbXNm_pY8U,1850
|
|
19
|
-
protein_quest/pdbe/io.py,sha256=0ldsrIHKaaurrM2FfWXbqm1iRj3q6xw8-lptfYU1yEw,6231
|
|
20
|
-
protein_quest-0.3.0.dist-info/METADATA,sha256=yiHZn4gDdwilbCoxrF0pCjVk04v_O5pwpwrtr6oPLrE,7369
|
|
21
|
-
protein_quest-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
22
|
-
protein_quest-0.3.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
23
|
-
protein_quest-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
24
|
-
protein_quest-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|