protein-quest 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

protein_quest/utils.py CHANGED
@@ -2,20 +2,24 @@
2
2
 
3
3
  import asyncio
4
4
  import logging
5
- from collections.abc import Iterable
5
+ import shutil
6
+ from collections.abc import Coroutine, Iterable
6
7
  from contextlib import asynccontextmanager
7
8
  from pathlib import Path
9
+ from textwrap import dedent
10
+ from typing import Any, Literal, get_args
8
11
 
9
12
  import aiofiles
10
13
  import aiohttp
11
14
  from aiohttp_retry import ExponentialRetry, RetryClient
12
15
  from tqdm.asyncio import tqdm
16
+ from yarl import URL
13
17
 
14
18
  logger = logging.getLogger(__name__)
15
19
 
16
20
 
17
21
  async def retrieve_files(
18
- urls: Iterable[tuple[str, str]],
22
+ urls: Iterable[tuple[URL | str, str]],
19
23
  save_dir: Path,
20
24
  max_parallel_downloads: int = 5,
21
25
  retries: int = 3,
@@ -45,7 +49,7 @@ async def retrieve_files(
45
49
 
46
50
  async def _retrieve_file(
47
51
  session: RetryClient,
48
- url: str,
52
+ url: URL | str,
49
53
  save_path: Path,
50
54
  semaphore: asyncio.Semaphore,
51
55
  ovewrite: bool = False,
@@ -103,3 +107,61 @@ async def friendly_session(retries: int = 3, total_timeout: int = 300):
103
107
  async with aiohttp.ClientSession(timeout=timeout) as session:
104
108
  client = RetryClient(client_session=session, retry_options=retry_options)
105
109
  yield client
110
+
111
+
112
+ class NestedAsyncIOLoopError(RuntimeError):
113
+ """Custom error for nested async I/O loops."""
114
+
115
+ def __init__(self) -> None:
116
+ msg = dedent("""\
117
+ Can not run async method from an environment where the asyncio event loop is already running.
118
+ Like a Jupyter notebook.
119
+
120
+ Please use the async function directly or
121
+ call `import nest_asyncio; nest_asyncio.apply()` and try again.
122
+ """)
123
+ super().__init__(msg)
124
+
125
+
126
+ def run_async[R](coroutine: Coroutine[Any, Any, R]) -> R:
127
+ """Run an async coroutine with nicer error.
128
+
129
+ Args:
130
+ coroutine: The async coroutine to run.
131
+
132
+ Returns:
133
+ The result of the coroutine.
134
+
135
+ Raises:
136
+ NestedAsyncIOLoopError: If called from a nested async I/O loop like in a Jupyter notebook.
137
+ """
138
+ try:
139
+ return asyncio.run(coroutine)
140
+ except RuntimeError as e:
141
+ raise NestedAsyncIOLoopError from e
142
+
143
+
144
+ CopyMethod = Literal["copy", "symlink"]
145
+ copy_methods = set(get_args(CopyMethod))
146
+
147
+
148
+ def copyfile(source: Path, target: Path, copy_method: CopyMethod = "copy"):
149
+ """Make target path be same file as source by either copying or symlinking.
150
+
151
+ Args:
152
+ source: The source file to copy or symlink.
153
+ target: The target file to create.
154
+ copy_method: The method to use for copying.
155
+
156
+ Raises:
157
+ FileNotFoundError: If the source file or parent of target does not exist.
158
+ ValueError: If the method is not "copy" or "symlink".
159
+ """
160
+ if copy_method == "copy":
161
+ shutil.copyfile(source, target)
162
+ elif copy_method == "symlink":
163
+ rel_source = source.relative_to(target.parent, walk_up=True)
164
+ target.symlink_to(rel_source)
165
+ else:
166
+ msg = f"Unknown method: {copy_method}"
167
+ raise ValueError(msg)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -13,19 +13,16 @@ Requires-Dist: aiohttp-retry>=2.9.1
13
13
  Requires-Dist: aiohttp[speedups]>=3.11.18
14
14
  Requires-Dist: aiopath>=0.7.7
15
15
  Requires-Dist: attrs>=25.3.0
16
- Requires-Dist: bokeh>=3.7.3
17
16
  Requires-Dist: cattrs[orjson]>=24.1.3
18
17
  Requires-Dist: dask>=2025.5.1
19
18
  Requires-Dist: distributed>=2025.5.1
20
19
  Requires-Dist: gemmi>=0.7.3
21
- Requires-Dist: molviewspec>=1.6.0
22
- Requires-Dist: pandas>=2.3.0
23
- Requires-Dist: platformdirs>=4.3.8
24
20
  Requires-Dist: psutil>=7.0.0
25
21
  Requires-Dist: rich-argparse>=1.7.1
26
22
  Requires-Dist: rich>=14.0.0
27
23
  Requires-Dist: sparqlwrapper>=2.0.0
28
24
  Requires-Dist: tqdm>=4.67.1
25
+ Requires-Dist: yarl>=1.20.1
29
26
  Provides-Extra: mcp
30
27
  Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
31
28
  Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
@@ -37,8 +34,7 @@ Description-Content-Type: text/markdown
37
34
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
38
35
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
39
36
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
40
- <!-- TODO replace with correct zenodo id -->
41
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
37
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
42
38
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
43
39
 
44
40
  Python package to search/retrieve/filter proteins and protein structures.
@@ -63,9 +59,11 @@ graph TB;
63
59
  searchpdbe -->|pdb_ids|fetchpdbe[Retrieve PDBe]
64
60
  searchaf --> |uniprot_accessions|fetchad(Retrieve AlphaFold)
65
61
  searchemdb -. emdb_ids .->fetchemdb[Retrieve EMDB]
66
- fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{Filter on chain of uniprot}
67
- chainfilter --> |mmcif_files| residuefilter{Filter on chain length}
68
- fetchad -->|pdb_files| confidencefilter{Filter out low confidence}
62
+ fetchpdbe -->|mmcif_files_with_uniprot_acc| chainfilter{{Filter on chain of uniprot}}
63
+ chainfilter --> |mmcif_files| residuefilter{{Filter on chain length}}
64
+ fetchad -->|pdb_files| confidencefilter{{Filter out low confidence}}
65
+ confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
66
+ residuefilter --> |mmcif_files| ssfilter
69
67
  classDef dashedBorder stroke-dasharray: 5 5;
70
68
  goterm:::dashedBorder
71
69
  taxonomy:::dashedBorder
@@ -90,7 +88,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
90
88
 
91
89
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
92
90
 
93
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
91
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
94
92
 
95
93
  ### Search Uniprot accessions
96
94
 
@@ -179,6 +177,18 @@ protein-quest filter residue \
179
177
  ./filtered-chains ./filtered
180
178
  ```
181
179
 
180
+ ### To filter on secondary structure
181
+
182
+ To filter on structure being mostly alpha helices and have no beta sheets.
183
+
184
+ ```shell
185
+ protein-quest filter secondary-structure \
186
+ --ratio-min-helix-residues 0.5 \
187
+ --ratio-max-sheet-residues 0.0 \
188
+ --write-stats filtered-ss/stats.csv \
189
+ ./filtered-chains ./filtered-ss
190
+ ```
191
+
182
192
  ### Search Taxonomy
183
193
 
184
194
  ```shell
@@ -0,0 +1,26 @@
1
+ protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ protein_quest/__version__.py,sha256=tDIN8WjNdFKRoXsf6tArV0_n6nbcPEBWNv1zuhaRbKo,56
3
+ protein_quest/cli.py,sha256=k4HC282QkbAAIk614vIJgaKfkS3XD9hYj7E5hEuiDxA,37893
4
+ protein_quest/converter.py,sha256=tSDw7HOlC7UoWryr_G-sHGzGq8nwflzSq8o7Gv1hWuQ,1382
5
+ protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
6
+ protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
7
+ protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
8
+ protein_quest/mcp_server.py,sha256=auftrx4aBZp1P-pBcunkPiSmXLtOIZ6MTuhUuW7yrGY,7241
9
+ protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
10
+ protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ protein_quest/ss.py,sha256=MMHgqKPxjYpjyExiqslWjmyG7aeForeAeJorCYdh75g,9663
12
+ protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
13
+ protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
14
+ protein_quest/utils.py,sha256=z4PPPcog6nvPhA93DWVf7stv5uJ4h_2BP5owdhoO5mo,5626
15
+ protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
16
+ protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
17
+ protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
18
+ protein_quest/alphafold/fetch.py,sha256=iFHORaO-2NvPwmpm33tfOFUcSJx8mBGwMXxwc4bRuk8,11336
19
+ protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
20
+ protein_quest/pdbe/fetch.py,sha256=tlCrWoaOrwxnQFrf-PnimUUa6lmtHwwysS51efYsBcA,2379
21
+ protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
22
+ protein_quest-0.3.2.dist-info/METADATA,sha256=wcURSjBlmkCt-ddhZX7xRYrL-7tT1VuBpJ36_mP0Iuk,7760
23
+ protein_quest-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
24
+ protein_quest-0.3.2.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
25
+ protein_quest-0.3.2.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
+ protein_quest-0.3.2.dist-info/RECORD,,
@@ -1,24 +0,0 @@
1
- protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- protein_quest/__version__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
3
- protein_quest/cli.py,sha256=oyDin6Z92Q17mUmTCasKgju3YUJbPu298gniNakQUwY,31121
4
- protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
5
- protein_quest/filters.py,sha256=GNtM1N1S1mNUqAvX7OvyhOvnUWo4qx2hMneORbc-Qz8,3797
6
- protein_quest/go.py,sha256=ycV3-grxuIKFt28bFgH6iRKmt5AEGi7txoTbaAnBxQE,5684
7
- protein_quest/mcp_server.py,sha256=xIaOy6sY_gW5R_oMImI2yBmbBGtZZICOxXLzOkFmm-w,7197
8
- protein_quest/parallel.py,sha256=kCH6KCJYJZVoq0_Qz8ZLbHnf2OJG-h4uxd9oH2rLNKc,2201
9
- protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- protein_quest/taxonomy.py,sha256=kAKKZT_mOtmX8ZWNIE9i7emE23VEewkj12X7d_t3p2Y,4659
11
- protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
12
- protein_quest/utils.py,sha256=HUvqfsuMBIFOVFlb_QC2to_UQkiZ0_fwHLlckifuXss,3700
13
- protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
14
- protein_quest/alphafold/confidence.py,sha256=-lbwijzVMhRd98bxwFDbSi7idiUKJ5BpOsGFrvuTEnQ,5596
15
- protein_quest/alphafold/entry_summary.py,sha256=P-S8qrXkU-wwIccA1nGol1lfDkUW0Sg0th_3EU-WjN8,1187
16
- protein_quest/alphafold/fetch.py,sha256=eq__PfqisuUIQBUM8KVghpiEOBGF-zXWNC6Ll_Hlz2E,11828
17
- protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
18
- protein_quest/pdbe/fetch.py,sha256=iTyS4ucV2KZl4jTgrUFOZhsXs3cRUIuvmTbXNm_pY8U,1850
19
- protein_quest/pdbe/io.py,sha256=0ldsrIHKaaurrM2FfWXbqm1iRj3q6xw8-lptfYU1yEw,6231
20
- protein_quest-0.3.0.dist-info/METADATA,sha256=yiHZn4gDdwilbCoxrF0pCjVk04v_O5pwpwrtr6oPLrE,7369
21
- protein_quest-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- protein_quest-0.3.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
23
- protein_quest-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
- protein_quest-0.3.0.dist-info/RECORD,,