protein-quest 0.9.0__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- protein_quest/__version__.py +1 -1
- protein_quest/alphafold/confidence.py +1 -6
- protein_quest/alphafold/fetch.py +2 -1
- protein_quest/filters.py +7 -6
- protein_quest/parallel.py +82 -12
- protein_quest/structure.py +29 -5
- {protein_quest-0.9.0.dist-info → protein_quest-0.10.1.dist-info}/METADATA +8 -3
- {protein_quest-0.9.0.dist-info → protein_quest-0.10.1.dist-info}/RECORD +11 -11
- {protein_quest-0.9.0.dist-info → protein_quest-0.10.1.dist-info}/WHEEL +0 -0
- {protein_quest-0.9.0.dist-info → protein_quest-0.10.1.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.9.0.dist-info → protein_quest-0.10.1.dist-info}/licenses/LICENSE +0 -0
protein_quest/__version__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.10.1"
|
|
2
2
|
"""The version of the package."""
|
|
@@ -209,12 +209,7 @@ def filter_files_on_confidence(
|
|
|
209
209
|
copy_method=copy_method,
|
|
210
210
|
)
|
|
211
211
|
|
|
212
|
-
scheduler_address =
|
|
213
|
-
scheduler_address,
|
|
214
|
-
name="filter-confidence",
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
with Client(scheduler_address) as client:
|
|
212
|
+
with configure_dask_scheduler(scheduler_address, name="filter-confidence") as cluster, Client(cluster) as client:
|
|
218
213
|
client.forward_logging()
|
|
219
214
|
return dask_map_with_progress(
|
|
220
215
|
client,
|
protein_quest/alphafold/fetch.py
CHANGED
|
@@ -114,7 +114,6 @@ class AlphaFoldEntry:
|
|
|
114
114
|
"""Convert paths in an AlphaFoldEntry to be relative to the session directory.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
entry: An AlphaFoldEntry instance with absolute paths.
|
|
118
117
|
session_dir: The session directory to which the paths should be made relative.
|
|
119
118
|
|
|
120
119
|
Returns:
|
|
@@ -483,6 +482,7 @@ def fetch_many_async(
|
|
|
483
482
|
)
|
|
484
483
|
|
|
485
484
|
|
|
485
|
+
# jscpd:ignore-start # noqa: ERA001
|
|
486
486
|
def fetch_many(
|
|
487
487
|
uniprot_accessions: Iterable[str],
|
|
488
488
|
save_dir: Path,
|
|
@@ -492,6 +492,7 @@ def fetch_many(
|
|
|
492
492
|
cacher: Cacher | None = None,
|
|
493
493
|
gzip_files: bool = False,
|
|
494
494
|
all_isoforms: bool = False,
|
|
495
|
+
# jscpd:ignore-end # noqa: ERA001
|
|
495
496
|
) -> list[AlphaFoldEntry]:
|
|
496
497
|
"""Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
|
|
497
498
|
|
protein_quest/filters.py
CHANGED
|
@@ -96,12 +96,13 @@ def filter_files_on_chain(
|
|
|
96
96
|
|
|
97
97
|
# TODO make logger.debug in filter_file_on_chain show to user when --log
|
|
98
98
|
# GPT-5 generated a fairly difficult setup with a WorkerPlugin, need to find a simpler approach
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
99
|
+
with (
|
|
100
|
+
configure_dask_scheduler(
|
|
101
|
+
scheduler_address,
|
|
102
|
+
name="filter-chain",
|
|
103
|
+
) as cluster,
|
|
104
|
+
Client(cluster) as client,
|
|
105
|
+
):
|
|
105
106
|
client.forward_logging()
|
|
106
107
|
return dask_map_with_progress(
|
|
107
108
|
client,
|
protein_quest/parallel.py
CHANGED
|
@@ -2,36 +2,52 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
-
|
|
5
|
+
import sys
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Callable, Collection, Iterator
|
|
8
|
+
from contextlib import contextmanager, suppress
|
|
6
9
|
from typing import Concatenate, ParamSpec, cast
|
|
7
10
|
|
|
8
|
-
from dask.distributed import Client, LocalCluster
|
|
11
|
+
from dask.distributed import Client, LocalCluster
|
|
9
12
|
from distributed.deploy.cluster import Cluster
|
|
13
|
+
from distributed.diagnostics.progress import format_time
|
|
14
|
+
from distributed.diagnostics.progressbar import ProgressBar
|
|
15
|
+
from distributed.utils import LoopRunner
|
|
10
16
|
from psutil import cpu_count
|
|
17
|
+
from tornado.ioloop import IOLoop
|
|
11
18
|
|
|
12
19
|
logger = logging.getLogger(__name__)
|
|
13
20
|
|
|
14
21
|
|
|
22
|
+
@contextmanager
|
|
15
23
|
def configure_dask_scheduler(
|
|
16
24
|
scheduler_address: str | Cluster | None,
|
|
17
25
|
name: str,
|
|
18
26
|
nproc: int = 1,
|
|
19
|
-
) -> str | Cluster:
|
|
20
|
-
"""
|
|
27
|
+
) -> Iterator[str | Cluster]:
|
|
28
|
+
"""Context manager that offers a Dask cluster.
|
|
29
|
+
|
|
30
|
+
If scheduler_address is None then creates a local Dask cluster
|
|
31
|
+
else returns scheduler_address unchanged and the callee is responsible for cluster cleanup.
|
|
21
32
|
|
|
22
33
|
Args:
|
|
23
34
|
scheduler_address: Address of the Dask scheduler to connect to, or None for local cluster.
|
|
24
35
|
name: Name for the Dask cluster.
|
|
25
36
|
nproc: Number of processes to use per worker for CPU support.
|
|
26
37
|
|
|
27
|
-
|
|
28
|
-
|
|
38
|
+
Yields:
|
|
39
|
+
The scheduler address as a string or a cluster.
|
|
29
40
|
"""
|
|
30
|
-
if scheduler_address is None:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
41
|
+
if scheduler_address is not None:
|
|
42
|
+
# Pass through existing scheduler address or cluster
|
|
43
|
+
yield scheduler_address
|
|
44
|
+
return
|
|
45
|
+
cluster = _configure_cpu_dask_scheduler(nproc, name)
|
|
46
|
+
logger.info(f"Using local Dask cluster: {cluster}")
|
|
47
|
+
try:
|
|
48
|
+
yield cluster
|
|
49
|
+
finally:
|
|
50
|
+
cluster.close()
|
|
35
51
|
|
|
36
52
|
|
|
37
53
|
def nr_cpus() -> int:
|
|
@@ -74,6 +90,60 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
|
|
|
74
90
|
P = ParamSpec("P")
|
|
75
91
|
|
|
76
92
|
|
|
93
|
+
class _StderrTextProgressBar(ProgressBar):
|
|
94
|
+
"""Copy of distributed.diagnostics.progressbar.TextProgressBar that prints to stderr instead of stdout."""
|
|
95
|
+
|
|
96
|
+
__loop: IOLoop | None = None
|
|
97
|
+
|
|
98
|
+
def __init__(
|
|
99
|
+
self,
|
|
100
|
+
keys,
|
|
101
|
+
scheduler=None,
|
|
102
|
+
interval="100ms",
|
|
103
|
+
width=40,
|
|
104
|
+
loop=None,
|
|
105
|
+
complete=True,
|
|
106
|
+
start=True,
|
|
107
|
+
**kwargs, # noqa: ARG002
|
|
108
|
+
):
|
|
109
|
+
self._loop_runner = loop_runner = LoopRunner(loop=loop)
|
|
110
|
+
super().__init__(keys, scheduler, interval, complete)
|
|
111
|
+
self.width = width
|
|
112
|
+
|
|
113
|
+
if start:
|
|
114
|
+
loop_runner.run_sync(self.listen)
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def loop(self) -> IOLoop | None:
|
|
118
|
+
loop = self.__loop
|
|
119
|
+
if loop is None:
|
|
120
|
+
# If the loop is not running when this is called, the LoopRunner.loop
|
|
121
|
+
# property will raise a DeprecationWarning
|
|
122
|
+
# However subsequent calls might occur - eg atexit, where a stopped
|
|
123
|
+
# loop is still acceptable - so we cache access to the loop.
|
|
124
|
+
self.__loop = loop = self._loop_runner.loop
|
|
125
|
+
return loop
|
|
126
|
+
|
|
127
|
+
@loop.setter
|
|
128
|
+
def loop(self, value: IOLoop) -> None:
|
|
129
|
+
warnings.warn("setting the loop property is deprecated", DeprecationWarning, stacklevel=2)
|
|
130
|
+
self.__loop = value
|
|
131
|
+
|
|
132
|
+
def _draw_bar(self, remaining, all, **kwargs): # noqa: A002, ARG002
|
|
133
|
+
frac = (1 - remaining / all) if all else 1.0
|
|
134
|
+
bar = "#" * int(self.width * frac)
|
|
135
|
+
percent = int(100 * frac)
|
|
136
|
+
elapsed = format_time(self.elapsed)
|
|
137
|
+
msg = "\r[{0:<{1}}] | {2}% Completed | {3}".format(bar, self.width, percent, elapsed)
|
|
138
|
+
with suppress(ValueError):
|
|
139
|
+
sys.stderr.write(msg)
|
|
140
|
+
sys.stderr.flush()
|
|
141
|
+
|
|
142
|
+
def _draw_stop(self, **kwargs): # noqa: ARG002
|
|
143
|
+
sys.stderr.write("\33[2K\r")
|
|
144
|
+
sys.stderr.flush()
|
|
145
|
+
|
|
146
|
+
|
|
77
147
|
def dask_map_with_progress[T, R, **P](
|
|
78
148
|
client: Client,
|
|
79
149
|
func: Callable[Concatenate[T, P], R],
|
|
@@ -99,6 +169,6 @@ def dask_map_with_progress[T, R, **P](
|
|
|
99
169
|
if client.dashboard_link:
|
|
100
170
|
logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
|
|
101
171
|
futures = client.map(func, iterable, *args, **kwargs)
|
|
102
|
-
|
|
172
|
+
_StderrTextProgressBar(futures)
|
|
103
173
|
results = client.gather(futures)
|
|
104
174
|
return cast("list[R]", results)
|
protein_quest/structure.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Module for querying and modifying [gemmi structures][gemmi.Structure]."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from collections.abc import Iterable
|
|
5
4
|
from datetime import UTC, datetime
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
|
|
@@ -123,11 +122,28 @@ def chains_in_structure(structure: gemmi.Structure) -> set[gemmi.Chain]:
|
|
|
123
122
|
class ChainNotFoundError(IndexError):
|
|
124
123
|
"""Exception raised when a chain is not found in a structure."""
|
|
125
124
|
|
|
126
|
-
def __init__(self,
|
|
127
|
-
super().__init__(f"Chain {
|
|
128
|
-
self.
|
|
125
|
+
def __init__(self, chain_id: str, file: Path | str, available_chains: set[str]):
|
|
126
|
+
super().__init__(f"Chain {chain_id} not found in {file}. Available chains are: {available_chains}")
|
|
127
|
+
self.available_chains = available_chains
|
|
128
|
+
self.chain_id = chain_id
|
|
129
129
|
self.file = file
|
|
130
130
|
|
|
131
|
+
def __reduce__(self):
|
|
132
|
+
"""Helper for pickling the exception."""
|
|
133
|
+
return (self.__class__, (self.chain_id, self.file, self.available_chains))
|
|
134
|
+
|
|
135
|
+
def __eq__(self, other):
|
|
136
|
+
if not isinstance(other, ChainNotFoundError):
|
|
137
|
+
return NotImplemented
|
|
138
|
+
return (
|
|
139
|
+
self.chain_id == other.chain_id
|
|
140
|
+
and self.file == other.file
|
|
141
|
+
and self.available_chains == other.available_chains
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def __hash__(self):
|
|
145
|
+
return hash((self.chain_id, str(self.file), frozenset(self.available_chains)))
|
|
146
|
+
|
|
131
147
|
|
|
132
148
|
def write_single_chain_structure_file(
|
|
133
149
|
input_file: Path,
|
|
@@ -194,7 +210,7 @@ def write_single_chain_structure_file(
|
|
|
194
210
|
copyfile(input_file, output_file, copy_method)
|
|
195
211
|
return output_file
|
|
196
212
|
|
|
197
|
-
gemmi.Selection(chain_name).remove_not_selected(structure)
|
|
213
|
+
gemmi.Selection(f"/1/{chain_name}").remove_not_selected(structure)
|
|
198
214
|
for m in structure:
|
|
199
215
|
m.remove_ligands_and_waters()
|
|
200
216
|
structure.setup_entities()
|
|
@@ -203,6 +219,14 @@ def write_single_chain_structure_file(
|
|
|
203
219
|
_dedup_sheets(structure, out_chain)
|
|
204
220
|
_add_provenance_info(structure, chain_name, out_chain)
|
|
205
221
|
|
|
222
|
+
if not (len(structure) == 1 and len(structure[0]) == 1 and len(structure[0][out_chain]) > 0):
|
|
223
|
+
msg = (
|
|
224
|
+
f"After processing, structure does not have exactly one model ({len(structure)}) "
|
|
225
|
+
f"with one chain (found {len(structure[0])}) called {out_chain} "
|
|
226
|
+
f"with some residues ({len(structure[0][out_chain])})."
|
|
227
|
+
)
|
|
228
|
+
raise ValueError(msg)
|
|
229
|
+
|
|
206
230
|
write_structure(structure, output_file)
|
|
207
231
|
|
|
208
232
|
return output_file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.1
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -35,9 +35,14 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
[](https://www.bonvinlab.org/protein-quest/)
|
|
36
36
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
37
37
|
[](https://www.research-software.nl/software/protein-quest)
|
|
38
|
+
[](https://bio.tools/protein-quest)
|
|
38
39
|
[](https://pypi.org/project/protein-quest/)
|
|
39
40
|
[](https://doi.org/10.5281/zenodo.16941288)
|
|
40
41
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
42
|
+
[](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
|
|
43
|
+
[](https://fair-software.eu)
|
|
44
|
+
[](https://github.com/kucherenko/jscpd/)
|
|
45
|
+
|
|
41
46
|
|
|
42
47
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
43
48
|
|
|
@@ -104,7 +109,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
104
109
|
|
|
105
110
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
106
111
|
|
|
107
|
-
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/
|
|
112
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
|
|
108
113
|
|
|
109
114
|
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
110
115
|
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
@@ -302,7 +307,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
|
|
|
302
307
|
|
|
303
308
|
## Shell autocompletion
|
|
304
309
|
|
|
305
|
-
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://
|
|
310
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
|
|
306
311
|
|
|
307
312
|
Initialize for bash shell with:
|
|
308
313
|
|
|
@@ -1,27 +1,27 @@
|
|
|
1
1
|
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=
|
|
2
|
+
protein_quest/__version__.py,sha256=qCmLtq4uktEgR1D3vZFBaO_0UsYFvPWt7gmxqgWwku0,57
|
|
3
3
|
protein_quest/cli.py,sha256=aWqdAeseUm7s8UGmrPFNfJPW6W83RmpJAsEy4sZscQY,57506
|
|
4
4
|
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
5
|
protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
|
|
6
|
-
protein_quest/filters.py,sha256=
|
|
6
|
+
protein_quest/filters.py,sha256=em1FYD7Y9z98ZSaJGYCv1VCGRADLbat8FfSOlNJNAJM,5663
|
|
7
7
|
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
8
|
protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
|
|
9
9
|
protein_quest/mcp_server.py,sha256=ZmEs18crS_Ce1-b_PM4m5kmS5C8lLlcrgpocTt7GVrg,8551
|
|
10
|
-
protein_quest/parallel.py,sha256=
|
|
10
|
+
protein_quest/parallel.py,sha256=uf26nD5l1Gp4Z5AFgb0K3vNBUlzvfFh8NSDbGzePSr0,5856
|
|
11
11
|
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
|
|
13
|
-
protein_quest/structure.py,sha256=
|
|
13
|
+
protein_quest/structure.py,sha256=3TdzrXbGpmnskp3gjwVevwD1tfhKfAUPOHWi9ViaheM,9101
|
|
14
14
|
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
15
15
|
protein_quest/uniprot.py,sha256=kV1lOZ_ugcF-LUff9hvmJPaGwA_uaHPJCL_3DLBIvSE,36798
|
|
16
16
|
protein_quest/utils.py,sha256=5Ncdid-dslggy-Ti1yhOHwdAM7Bxpyia7Re-xDkc2P0,19909
|
|
17
17
|
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
18
|
-
protein_quest/alphafold/confidence.py,sha256=
|
|
18
|
+
protein_quest/alphafold/confidence.py,sha256=UtS2MJEReaZ1kTXbQf8Vrc9gzGjAOiGLYs4glqN-1do,8098
|
|
19
19
|
protein_quest/alphafold/entry_summary.py,sha256=Qhnw75RXFaoOU332g7axg_jYbbdZbUpsGPUOwPNDSeU,2114
|
|
20
|
-
protein_quest/alphafold/fetch.py,sha256=
|
|
20
|
+
protein_quest/alphafold/fetch.py,sha256=D-RWKWo5kWpCko_LNT_sslzrpeR3HX9nu5F4MUOFRtI,21979
|
|
21
21
|
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
22
22
|
protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
|
|
23
|
-
protein_quest-0.
|
|
24
|
-
protein_quest-0.
|
|
25
|
-
protein_quest-0.
|
|
26
|
-
protein_quest-0.
|
|
27
|
-
protein_quest-0.
|
|
23
|
+
protein_quest-0.10.1.dist-info/METADATA,sha256=Mz2JLKRAqBvcbMsr1I1rdeYlJK8lTUXCX3AwBpSywxI,11939
|
|
24
|
+
protein_quest-0.10.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
protein_quest-0.10.1.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
26
|
+
protein_quest-0.10.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
27
|
+
protein_quest-0.10.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|