protein-quest 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- protein_quest/__version__.py +1 -1
- protein_quest/alphafold/confidence.py +2 -2
- protein_quest/alphafold/fetch.py +28 -19
- protein_quest/cli.py +133 -68
- protein_quest/filters.py +2 -5
- protein_quest/io.py +350 -0
- protein_quest/mcp_server.py +8 -5
- protein_quest/ss.py +3 -7
- protein_quest/{pdbe/io.py → structure.py} +53 -126
- protein_quest/uniprot.py +7 -3
- protein_quest/utils.py +26 -2
- {protein_quest-0.5.0.dist-info → protein_quest-0.6.0.dist-info}/METADATA +12 -1
- protein_quest-0.6.0.dist-info/RECORD +27 -0
- protein_quest-0.5.0.dist-info/RECORD +0 -26
- {protein_quest-0.5.0.dist-info → protein_quest-0.6.0.dist-info}/WHEEL +0 -0
- {protein_quest-0.5.0.dist-info → protein_quest-0.6.0.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.5.0.dist-info → protein_quest-0.6.0.dist-info}/licenses/LICENSE +0 -0
protein_quest/uniprot.py
CHANGED
|
@@ -525,7 +525,9 @@ def _build_complex_sparql_query(uniprot_accs: Iterable[str], limit: int) -> str:
|
|
|
525
525
|
?protein
|
|
526
526
|
?cp_db
|
|
527
527
|
?cp_comment
|
|
528
|
-
(GROUP_CONCAT(
|
|
528
|
+
(GROUP_CONCAT(
|
|
529
|
+
DISTINCT STRAFTER(STR(?member), "http://purl.uniprot.org/uniprot/"); separator=","
|
|
530
|
+
) AS ?complex_members)
|
|
529
531
|
(COUNT(DISTINCT ?member) AS ?member_count)
|
|
530
532
|
WHERE {
|
|
531
533
|
# Input UniProt accessions
|
|
@@ -550,7 +552,9 @@ def _build_complex_sparql_query(uniprot_accs: Iterable[str], limit: int) -> str:
|
|
|
550
552
|
"""
|
|
551
553
|
select_clause = dedent("""\
|
|
552
554
|
?protein ?cp_db ?cp_comment
|
|
553
|
-
(GROUP_CONCAT(
|
|
555
|
+
(GROUP_CONCAT(
|
|
556
|
+
DISTINCT STRAFTER(STR(?member), "http://purl.uniprot.org/uniprot/"); separator=","
|
|
557
|
+
) AS ?complex_members)
|
|
554
558
|
""")
|
|
555
559
|
where_clause = dedent("""
|
|
556
560
|
# --- Complex Info ---
|
|
@@ -596,7 +600,7 @@ def _flatten_results_complex(raw_results) -> list[ComplexPortalEntry]:
|
|
|
596
600
|
complex_id = raw_result["cp_db"]["value"].split("/")[-1]
|
|
597
601
|
complex_url = f"https://www.ebi.ac.uk/complexportal/complex/{complex_id}"
|
|
598
602
|
complex_title = raw_result.get("cp_comment", {}).get("value", "")
|
|
599
|
-
members =
|
|
603
|
+
members = set(raw_result["complex_members"]["value"].split(","))
|
|
600
604
|
results.append(
|
|
601
605
|
ComplexPortalEntry(
|
|
602
606
|
query_protein=query_protein,
|
protein_quest/utils.py
CHANGED
|
@@ -265,6 +265,7 @@ async def retrieve_files(
|
|
|
265
265
|
desc: str = "Downloading files",
|
|
266
266
|
cacher: Cacher | None = None,
|
|
267
267
|
chunk_size: int = 524288, # 512 KiB
|
|
268
|
+
gzip_files: bool = False,
|
|
268
269
|
) -> list[Path]:
|
|
269
270
|
"""Retrieve files from a list of URLs and save them to a directory.
|
|
270
271
|
|
|
@@ -277,6 +278,7 @@ async def retrieve_files(
|
|
|
277
278
|
desc: Description for the progress bar.
|
|
278
279
|
cacher: An optional cacher to use for caching files.
|
|
279
280
|
chunk_size: The size of each chunk to read from the response.
|
|
281
|
+
gzip_files: Whether to gzip the downloaded files.
|
|
280
282
|
|
|
281
283
|
Returns:
|
|
282
284
|
A list of paths to the downloaded files.
|
|
@@ -292,6 +294,7 @@ async def retrieve_files(
|
|
|
292
294
|
semaphore=semaphore,
|
|
293
295
|
cacher=cacher,
|
|
294
296
|
chunk_size=chunk_size,
|
|
297
|
+
gzip_files=gzip_files,
|
|
295
298
|
)
|
|
296
299
|
for url, filename in urls
|
|
297
300
|
]
|
|
@@ -299,6 +302,10 @@ async def retrieve_files(
|
|
|
299
302
|
return files
|
|
300
303
|
|
|
301
304
|
|
|
305
|
+
class InvalidContentEncodingError(aiohttp.ClientResponseError):
|
|
306
|
+
"""Content encoding is invalid."""
|
|
307
|
+
|
|
308
|
+
|
|
302
309
|
async def _retrieve_file(
|
|
303
310
|
session: RetryClient,
|
|
304
311
|
url: URL | str,
|
|
@@ -306,6 +313,7 @@ async def _retrieve_file(
|
|
|
306
313
|
semaphore: asyncio.Semaphore,
|
|
307
314
|
cacher: Cacher | None = None,
|
|
308
315
|
chunk_size: int = 524288, # 512 KiB
|
|
316
|
+
gzip_files: bool = False,
|
|
309
317
|
) -> Path:
|
|
310
318
|
"""Retrieve a single file from a URL and save it to a specified path.
|
|
311
319
|
|
|
@@ -316,6 +324,7 @@ async def _retrieve_file(
|
|
|
316
324
|
semaphore: A semaphore to limit the number of concurrent downloads.
|
|
317
325
|
cacher: An optional cacher to use for caching files.
|
|
318
326
|
chunk_size: The size of each chunk to read from the response.
|
|
327
|
+
gzip_files: Whether to gzip the downloaded file.
|
|
319
328
|
|
|
320
329
|
Returns:
|
|
321
330
|
The path to the saved file.
|
|
@@ -330,12 +339,27 @@ async def _retrieve_file(
|
|
|
330
339
|
logger.debug(f"File {save_path} was copied from cache {cached_file}. Skipping download from {url}.")
|
|
331
340
|
return save_path
|
|
332
341
|
|
|
342
|
+
# Alphafold server and many other web servers can return gzipped responses,
|
|
343
|
+
# when we want to save as *.gz, we use raw stream
|
|
344
|
+
# otherwise aiohttp will decompress it automatically for us.
|
|
345
|
+
auto_decompress = not gzip_files
|
|
346
|
+
headers = {"Accept-Encoding": "gzip"}
|
|
333
347
|
async with (
|
|
334
348
|
semaphore,
|
|
335
|
-
session.get(url) as resp,
|
|
349
|
+
session.get(url, headers=headers, auto_decompress=auto_decompress) as resp,
|
|
336
350
|
):
|
|
337
351
|
resp.raise_for_status()
|
|
338
|
-
|
|
352
|
+
if gzip_files and resp.headers.get("Content-Encoding") != "gzip":
|
|
353
|
+
msg = f"Server did not send gzip encoded content for {url}, can not save as gzipped file."
|
|
354
|
+
raise InvalidContentEncodingError(
|
|
355
|
+
request_info=resp.request_info,
|
|
356
|
+
history=resp.history,
|
|
357
|
+
status=415,
|
|
358
|
+
message=msg,
|
|
359
|
+
headers=resp.headers,
|
|
360
|
+
)
|
|
361
|
+
iterator = resp.content.iter_chunked(chunk_size)
|
|
362
|
+
await cacher.write_iter(save_path, iterator)
|
|
339
363
|
return save_path
|
|
340
364
|
|
|
341
365
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
|
|
|
17
17
|
Requires-Dist: dask>=2025.5.1
|
|
18
18
|
Requires-Dist: distributed>=2025.5.1
|
|
19
19
|
Requires-Dist: gemmi>=0.7.3
|
|
20
|
+
Requires-Dist: mmcif>=0.92.0
|
|
20
21
|
Requires-Dist: platformdirs>=4.3.8
|
|
21
22
|
Requires-Dist: psutil>=7.0.0
|
|
22
23
|
Requires-Dist: rich-argparse>=1.7.1
|
|
@@ -71,6 +72,7 @@ graph TB;
|
|
|
71
72
|
fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
|
|
72
73
|
confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
|
|
73
74
|
residuefilter --> |mmcif_files| ssfilter
|
|
75
|
+
ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
|
|
74
76
|
classDef dashedBorder stroke-dasharray: 5 5;
|
|
75
77
|
goterm:::dashedBorder
|
|
76
78
|
taxonomy:::dashedBorder
|
|
@@ -78,6 +80,7 @@ graph TB;
|
|
|
78
80
|
fetchemdb:::dashedBorder
|
|
79
81
|
searchintactionpartners:::dashedBorder
|
|
80
82
|
searchcomplexes:::dashedBorder
|
|
83
|
+
convert2cif:::dashedBorder
|
|
81
84
|
```
|
|
82
85
|
|
|
83
86
|
(Dotted nodes and edges are side-quests.)
|
|
@@ -242,6 +245,14 @@ query_protein,complex_id,complex_url,complex_title,members
|
|
|
242
245
|
Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
|
|
243
246
|
```
|
|
244
247
|
|
|
248
|
+
### Convert structure files to .cif format
|
|
249
|
+
|
|
250
|
+
Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
|
|
251
|
+
|
|
252
|
+
```shell
|
|
253
|
+
protein-quest convert --output-dir ./filtered-cif ./filtered-ss
|
|
254
|
+
```
|
|
255
|
+
|
|
245
256
|
## Model Context Protocol (MCP) server
|
|
246
257
|
|
|
247
258
|
Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
protein_quest/__version__.py,sha256=z_nR_Ti0YfIwFSKDD18DIrz_r3zxWQ8EGCNr2XUWkY0,56
|
|
3
|
+
protein_quest/cli.py,sha256=pWwMIzWBrtqhZbvTIkvd1XhA5u9J-WAAg7A3hJZGtlk,46201
|
|
4
|
+
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
|
+
protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
|
|
6
|
+
protein_quest/filters.py,sha256=Xr-cJTtbNjHKuzmXLBf7yZfqKf_U3RTivcVbr620LVU,5225
|
|
7
|
+
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
|
+
protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
|
|
9
|
+
protein_quest/mcp_server.py,sha256=rQv2srhF3_SYYK1TD3htIyxNiunU7a8FDC7CYT_oJFE,8269
|
|
10
|
+
protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
|
|
11
|
+
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
|
|
13
|
+
protein_quest/structure.py,sha256=1FTKN0mYKTwZHlyIB4ORSAgSHFKK-UAK7T-qoFo1vyI,7162
|
|
14
|
+
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
15
|
+
protein_quest/uniprot.py,sha256=92G5YiJAJwUBKJQHPrM6DZlaLe-XG4qBg0zy0BDGFYY,24354
|
|
16
|
+
protein_quest/utils.py,sha256=6OF8X4ia_z1HOYiXy6e-zEWlp_bF1DoZCVrCSg1qivY,19076
|
|
17
|
+
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
18
|
+
protein_quest/alphafold/confidence.py,sha256=mVAYTIzdbR8xBjRiUzA0at8wJq9vpfEQWPz5cJefLKs,6766
|
|
19
|
+
protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
|
|
20
|
+
protein_quest/alphafold/fetch.py,sha256=n5SlqbQfU1PE4X8saV4O1nCrKRn3Q2UcMlrNw5-163w,12801
|
|
21
|
+
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
22
|
+
protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
|
|
23
|
+
protein_quest-0.6.0.dist-info/METADATA,sha256=8rX0ixi4Xl516LkxOlOKKRe364nKIjP7mKn67xuOcDA,9623
|
|
24
|
+
protein_quest-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
25
|
+
protein_quest-0.6.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
26
|
+
protein_quest-0.6.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
27
|
+
protein_quest-0.6.0.dist-info/RECORD,,
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
protein_quest/__version__.py,sha256=AyGZhrskazcQPC8spzJ45d4XNxgla5DnO1bmKuzRj_Q,56
|
|
3
|
-
protein_quest/cli.py,sha256=xiXt_2l3MxbTbmxm2sz0w8_OdJr8gz_B68GBVv5wHjE,44182
|
|
4
|
-
protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
|
|
5
|
-
protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
|
|
6
|
-
protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
|
|
7
|
-
protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
|
|
8
|
-
protein_quest/mcp_server.py,sha256=PCXxcU3GElKg2sjMlxbsM63OiFxg9AtmfKwBJ1_0AQE,8130
|
|
9
|
-
protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
|
|
10
|
-
protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
protein_quest/ss.py,sha256=qOr0aMycNAtZmXXvhCN-KZH3Qp4EejnBcE6fsFgCrmY,10343
|
|
12
|
-
protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
|
|
13
|
-
protein_quest/uniprot.py,sha256=DIwQYzWZREZ7SGhkJT4Ozgl36pdz47FNfZ1QoEgEaXE,24239
|
|
14
|
-
protein_quest/utils.py,sha256=2lQ7jPHWtDySBTYnoL9VTKl5XUgQVYgp9Prb7qEnjtQ,17982
|
|
15
|
-
protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
|
|
16
|
-
protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
|
|
17
|
-
protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
|
|
18
|
-
protein_quest/alphafold/fetch.py,sha256=wIsgPZmtnE5EoAL9G22Y6Ehx9d0md53Mw88-6LLGp0Q,12298
|
|
19
|
-
protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
|
|
20
|
-
protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
|
|
21
|
-
protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
|
|
22
|
-
protein_quest-0.5.0.dist-info/METADATA,sha256=atoElM2xwPd9ubxXSQsFQYz2hjALJi-AegCRkrynEYc,9236
|
|
23
|
-
protein_quest-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
24
|
-
protein_quest-0.5.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
|
|
25
|
-
protein_quest-0.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
-
protein_quest-0.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|