protein-quest 0.10.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protein_quest/__version__.py +1 -1
- protein_quest/alphafold/fetch.py +2 -1
- protein_quest/parallel.py +80 -3
- protein_quest/structure.py +12 -0
- {protein_quest-0.10.0.dist-info → protein_quest-1.0.0.dist-info}/METADATA +23 -3
- {protein_quest-0.10.0.dist-info → protein_quest-1.0.0.dist-info}/RECORD +9 -9
- {protein_quest-0.10.0.dist-info → protein_quest-1.0.0.dist-info}/WHEEL +0 -0
- {protein_quest-0.10.0.dist-info → protein_quest-1.0.0.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.10.0.dist-info → protein_quest-1.0.0.dist-info}/licenses/LICENSE +0 -0
protein_quest/__version__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "1.0.0"
|
|
2
2
|
"""The version of the package."""
|
protein_quest/alphafold/fetch.py
CHANGED
|
@@ -114,7 +114,6 @@ class AlphaFoldEntry:
|
|
|
114
114
|
"""Convert paths in an AlphaFoldEntry to be relative to the session directory.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
entry: An AlphaFoldEntry instance with absolute paths.
|
|
118
117
|
session_dir: The session directory to which the paths should be made relative.
|
|
119
118
|
|
|
120
119
|
Returns:
|
|
@@ -483,6 +482,7 @@ def fetch_many_async(
|
|
|
483
482
|
)
|
|
484
483
|
|
|
485
484
|
|
|
485
|
+
# jscpd:ignore-start # noqa: ERA001
|
|
486
486
|
def fetch_many(
|
|
487
487
|
uniprot_accessions: Iterable[str],
|
|
488
488
|
save_dir: Path,
|
|
@@ -492,6 +492,7 @@ def fetch_many(
|
|
|
492
492
|
cacher: Cacher | None = None,
|
|
493
493
|
gzip_files: bool = False,
|
|
494
494
|
all_isoforms: bool = False,
|
|
495
|
+
# jscpd:ignore-end # noqa: ERA001
|
|
495
496
|
) -> list[AlphaFoldEntry]:
|
|
496
497
|
"""Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
|
|
497
498
|
|
protein_quest/parallel.py
CHANGED
|
@@ -2,13 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
6
|
+
import warnings
|
|
5
7
|
from collections.abc import Callable, Collection, Iterator
|
|
6
|
-
from contextlib import contextmanager
|
|
8
|
+
from contextlib import contextmanager, suppress
|
|
7
9
|
from typing import Concatenate, ParamSpec, cast
|
|
8
10
|
|
|
9
|
-
from dask.distributed import Client, LocalCluster
|
|
11
|
+
from dask.distributed import Client, LocalCluster
|
|
10
12
|
from distributed.deploy.cluster import Cluster
|
|
13
|
+
from distributed.diagnostics.progress import format_time
|
|
14
|
+
from distributed.diagnostics.progressbar import ProgressBar
|
|
15
|
+
from distributed.utils import LoopRunner
|
|
11
16
|
from psutil import cpu_count
|
|
17
|
+
from tornado.ioloop import IOLoop
|
|
12
18
|
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
@@ -80,6 +86,72 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
|
|
|
80
86
|
return LocalCluster(name=name, threads_per_worker=1, n_workers=n_workers)
|
|
81
87
|
|
|
82
88
|
|
|
89
|
+
class MyProgressBar(ProgressBar):
|
|
90
|
+
"""Show progress of Dask computations.
|
|
91
|
+
|
|
92
|
+
Copy of distributed.diagnostics.progressbar.TextProgressBar that:
|
|
93
|
+
|
|
94
|
+
- prints to stderr instead of stdout
|
|
95
|
+
- Can have its interval (in seconds) set with `TQDM_MININTERVAL` environment variable
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
__loop: IOLoop | None = None
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
keys,
|
|
104
|
+
scheduler=None,
|
|
105
|
+
interval="100ms",
|
|
106
|
+
width=40,
|
|
107
|
+
loop=None,
|
|
108
|
+
complete=True,
|
|
109
|
+
start=True,
|
|
110
|
+
**kwargs, # noqa: ARG002
|
|
111
|
+
):
|
|
112
|
+
self._loop_runner = loop_runner = LoopRunner(loop=loop)
|
|
113
|
+
if interval == "100ms":
|
|
114
|
+
interval_env = os.getenv("TQDM_MININTERVAL")
|
|
115
|
+
if interval_env is not None:
|
|
116
|
+
interval = interval_env + "s"
|
|
117
|
+
|
|
118
|
+
super().__init__(keys, scheduler, interval, complete)
|
|
119
|
+
self.width = width
|
|
120
|
+
|
|
121
|
+
if start:
|
|
122
|
+
loop_runner.run_sync(self.listen)
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def loop(self) -> IOLoop | None:
|
|
126
|
+
loop = self.__loop
|
|
127
|
+
if loop is None:
|
|
128
|
+
# If the loop is not running when this is called, the LoopRunner.loop
|
|
129
|
+
# property will raise a DeprecationWarning
|
|
130
|
+
# However subsequent calls might occur - eg atexit, where a stopped
|
|
131
|
+
# loop is still acceptable - so we cache access to the loop.
|
|
132
|
+
self.__loop = loop = self._loop_runner.loop
|
|
133
|
+
return loop
|
|
134
|
+
|
|
135
|
+
@loop.setter
|
|
136
|
+
def loop(self, value: IOLoop) -> None:
|
|
137
|
+
warnings.warn("setting the loop property is deprecated", DeprecationWarning, stacklevel=2)
|
|
138
|
+
self.__loop = value
|
|
139
|
+
|
|
140
|
+
def _draw_bar(self, remaining, all, **kwargs): # noqa: A002, ARG002
|
|
141
|
+
frac = (1 - remaining / all) if all else 1.0
|
|
142
|
+
bar = "#" * int(self.width * frac)
|
|
143
|
+
percent = int(100 * frac)
|
|
144
|
+
elapsed = format_time(self.elapsed)
|
|
145
|
+
msg = "\r[{0:<{1}}] | {2}% Completed | {3}".format(bar, self.width, percent, elapsed)
|
|
146
|
+
with suppress(ValueError):
|
|
147
|
+
sys.stderr.write(msg)
|
|
148
|
+
sys.stderr.flush()
|
|
149
|
+
|
|
150
|
+
def _draw_stop(self, **kwargs): # noqa: ARG002
|
|
151
|
+
sys.stderr.write("\33[2K\r")
|
|
152
|
+
sys.stderr.flush()
|
|
153
|
+
|
|
154
|
+
|
|
83
155
|
# Generic type parameters used across helpers
|
|
84
156
|
P = ParamSpec("P")
|
|
85
157
|
|
|
@@ -94,6 +166,10 @@ def dask_map_with_progress[T, R, **P](
|
|
|
94
166
|
"""
|
|
95
167
|
Wrapper for map, progress, and gather of Dask that returns a correctly typed list.
|
|
96
168
|
|
|
169
|
+
Environment variables:
|
|
170
|
+
- Set interval (in seconds) of progress updates with `TQDM_MININTERVAL`
|
|
171
|
+
- Disabled by setting `TQDM_DISABLE` to any value
|
|
172
|
+
|
|
97
173
|
Args:
|
|
98
174
|
client: Dask client.
|
|
99
175
|
func: Function to map; first parameter comes from ``iterable`` and any
|
|
@@ -109,6 +185,7 @@ def dask_map_with_progress[T, R, **P](
|
|
|
109
185
|
if client.dashboard_link:
|
|
110
186
|
logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
|
|
111
187
|
futures = client.map(func, iterable, *args, **kwargs)
|
|
112
|
-
|
|
188
|
+
if not os.getenv("TQDM_DISABLE"):
|
|
189
|
+
MyProgressBar(futures)
|
|
113
190
|
results = client.gather(futures)
|
|
114
191
|
return cast("list[R]", results)
|
protein_quest/structure.py
CHANGED
|
@@ -132,6 +132,18 @@ class ChainNotFoundError(IndexError):
|
|
|
132
132
|
"""Helper for pickling the exception."""
|
|
133
133
|
return (self.__class__, (self.chain_id, self.file, self.available_chains))
|
|
134
134
|
|
|
135
|
+
def __eq__(self, other):
|
|
136
|
+
if not isinstance(other, ChainNotFoundError):
|
|
137
|
+
return NotImplemented
|
|
138
|
+
return (
|
|
139
|
+
self.chain_id == other.chain_id
|
|
140
|
+
and self.file == other.file
|
|
141
|
+
and self.available_chains == other.available_chains
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def __hash__(self):
|
|
145
|
+
return hash((self.chain_id, str(self.file), frozenset(self.available_chains)))
|
|
146
|
+
|
|
135
147
|
|
|
136
148
|
def write_single_chain_structure_file(
|
|
137
149
|
input_file: Path,
|
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
7
7
|
Project-URL: Documentation, https://www.bonvinlab.org/protein-quest/
|
|
8
8
|
Project-URL: Source, https://github.com/haddocking/protein-quest
|
|
9
9
|
License-File: LICENSE
|
|
10
|
+
Keywords: alphafold,mmcif,pdb,protein,protein structure,uniprot
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Framework :: AsyncIO
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Operating System :: POSIX
|
|
19
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
24
|
+
Classifier: Typing :: Typed
|
|
10
25
|
Requires-Python: >=3.13
|
|
11
26
|
Requires-Dist: aiofiles>=24.1.0
|
|
12
27
|
Requires-Dist: aiohttp-retry>=2.9.1
|
|
@@ -35,9 +50,14 @@ Description-Content-Type: text/markdown
|
|
|
35
50
|
[](https://www.bonvinlab.org/protein-quest/)
|
|
36
51
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
37
52
|
[](https://www.research-software.nl/software/protein-quest)
|
|
53
|
+
[](https://bio.tools/protein-quest)
|
|
38
54
|
[](https://pypi.org/project/protein-quest/)
|
|
39
55
|
[](https://doi.org/10.5281/zenodo.16941288)
|
|
40
56
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
57
|
+
[](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
|
|
58
|
+
[](https://fair-software.eu)
|
|
59
|
+
[](https://github.com/kucherenko/jscpd/)
|
|
60
|
+
|
|
41
61
|
|
|
42
62
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
43
63
|
|
|
@@ -104,7 +124,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
104
124
|
|
|
105
125
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
106
126
|
|
|
107
|
-
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/
|
|
127
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
|
|
108
128
|
|
|
109
129
|
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
110
130
|
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
@@ -302,7 +322,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
|
|
|
302
322
|
|
|
303
323
|
## Shell autocompletion
|
|
304
324
|
|
|
305
|
-
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://
|
|
325
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
|
|
306
326
|
|
|
307
327
|
Initialize for bash shell with:
|
|
308
328
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=
|
|
2
|
+
protein_quest/__version__.py,sha256=fb4tmcrpQ8DpX5nyj0S9YU6dOsGM0L_N3rGiboRYQ-8,56
|
|
3
3
|
protein_quest/cli.py,sha256=aWqdAeseUm7s8UGmrPFNfJPW6W83RmpJAsEy4sZscQY,57506
|
|
4
4
|
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
5
|
protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
|
|
@@ -7,21 +7,21 @@ protein_quest/filters.py,sha256=em1FYD7Y9z98ZSaJGYCv1VCGRADLbat8FfSOlNJNAJM,5663
|
|
|
7
7
|
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
8
|
protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
|
|
9
9
|
protein_quest/mcp_server.py,sha256=ZmEs18crS_Ce1-b_PM4m5kmS5C8lLlcrgpocTt7GVrg,8551
|
|
10
|
-
protein_quest/parallel.py,sha256=
|
|
10
|
+
protein_quest/parallel.py,sha256=hmwjv-KeiC7qSs5xApAvh3ZKkJ9HDW5zmr1zuwOzFpg,6367
|
|
11
11
|
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
|
|
13
|
-
protein_quest/structure.py,sha256=
|
|
13
|
+
protein_quest/structure.py,sha256=3TdzrXbGpmnskp3gjwVevwD1tfhKfAUPOHWi9ViaheM,9101
|
|
14
14
|
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
15
15
|
protein_quest/uniprot.py,sha256=kV1lOZ_ugcF-LUff9hvmJPaGwA_uaHPJCL_3DLBIvSE,36798
|
|
16
16
|
protein_quest/utils.py,sha256=5Ncdid-dslggy-Ti1yhOHwdAM7Bxpyia7Re-xDkc2P0,19909
|
|
17
17
|
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
18
18
|
protein_quest/alphafold/confidence.py,sha256=UtS2MJEReaZ1kTXbQf8Vrc9gzGjAOiGLYs4glqN-1do,8098
|
|
19
19
|
protein_quest/alphafold/entry_summary.py,sha256=Qhnw75RXFaoOU332g7axg_jYbbdZbUpsGPUOwPNDSeU,2114
|
|
20
|
-
protein_quest/alphafold/fetch.py,sha256=
|
|
20
|
+
protein_quest/alphafold/fetch.py,sha256=D-RWKWo5kWpCko_LNT_sslzrpeR3HX9nu5F4MUOFRtI,21979
|
|
21
21
|
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
22
22
|
protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
|
|
23
|
-
protein_quest-0.
|
|
24
|
-
protein_quest-0.
|
|
25
|
-
protein_quest-0.
|
|
26
|
-
protein_quest-0.
|
|
27
|
-
protein_quest-0.
|
|
23
|
+
protein_quest-1.0.0.dist-info/METADATA,sha256=6BeMJwGMFaHE03fo_Eqc-fGAz3NeX8SiLQb6tsJwz5I,12652
|
|
24
|
+
protein_quest-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
protein_quest-1.0.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
26
|
+
protein_quest-1.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
27
|
+
protein_quest-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|