protein-quest 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

protein_quest/uniprot.py CHANGED
@@ -525,7 +525,9 @@ def _build_complex_sparql_query(uniprot_accs: Iterable[str], limit: int) -> str:
525
525
  ?protein
526
526
  ?cp_db
527
527
  ?cp_comment
528
- (GROUP_CONCAT(DISTINCT ?member; separator=",") AS ?complex_members)
528
+ (GROUP_CONCAT(
529
+ DISTINCT STRAFTER(STR(?member), "http://purl.uniprot.org/uniprot/"); separator=","
530
+ ) AS ?complex_members)
529
531
  (COUNT(DISTINCT ?member) AS ?member_count)
530
532
  WHERE {
531
533
  # Input UniProt accessions
@@ -550,7 +552,9 @@ def _build_complex_sparql_query(uniprot_accs: Iterable[str], limit: int) -> str:
550
552
  """
551
553
  select_clause = dedent("""\
552
554
  ?protein ?cp_db ?cp_comment
553
- (GROUP_CONCAT(DISTINCT ?member; separator=",") AS ?complex_members)
555
+ (GROUP_CONCAT(
556
+ DISTINCT STRAFTER(STR(?member), "http://purl.uniprot.org/uniprot/"); separator=","
557
+ ) AS ?complex_members)
554
558
  """)
555
559
  where_clause = dedent("""
556
560
  # --- Complex Info ---
@@ -596,7 +600,7 @@ def _flatten_results_complex(raw_results) -> list[ComplexPortalEntry]:
596
600
  complex_id = raw_result["cp_db"]["value"].split("/")[-1]
597
601
  complex_url = f"https://www.ebi.ac.uk/complexportal/complex/{complex_id}"
598
602
  complex_title = raw_result.get("cp_comment", {}).get("value", "")
599
- members = {m.split("/")[-1] for m in raw_result["complex_members"]["value"].split(",")}
603
+ members = set(raw_result["complex_members"]["value"].split(","))
600
604
  results.append(
601
605
  ComplexPortalEntry(
602
606
  query_protein=query_protein,
protein_quest/utils.py CHANGED
@@ -265,6 +265,7 @@ async def retrieve_files(
265
265
  desc: str = "Downloading files",
266
266
  cacher: Cacher | None = None,
267
267
  chunk_size: int = 524288, # 512 KiB
268
+ gzip_files: bool = False,
268
269
  ) -> list[Path]:
269
270
  """Retrieve files from a list of URLs and save them to a directory.
270
271
 
@@ -277,6 +278,7 @@ async def retrieve_files(
277
278
  desc: Description for the progress bar.
278
279
  cacher: An optional cacher to use for caching files.
279
280
  chunk_size: The size of each chunk to read from the response.
281
+ gzip_files: Whether to gzip the downloaded files.
280
282
 
281
283
  Returns:
282
284
  A list of paths to the downloaded files.
@@ -292,6 +294,7 @@ async def retrieve_files(
292
294
  semaphore=semaphore,
293
295
  cacher=cacher,
294
296
  chunk_size=chunk_size,
297
+ gzip_files=gzip_files,
295
298
  )
296
299
  for url, filename in urls
297
300
  ]
@@ -299,6 +302,10 @@ async def retrieve_files(
299
302
  return files
300
303
 
301
304
 
305
+ class InvalidContentEncodingError(aiohttp.ClientResponseError):
306
+ """Content encoding is invalid."""
307
+
308
+
302
309
  async def _retrieve_file(
303
310
  session: RetryClient,
304
311
  url: URL | str,
@@ -306,6 +313,7 @@ async def _retrieve_file(
306
313
  semaphore: asyncio.Semaphore,
307
314
  cacher: Cacher | None = None,
308
315
  chunk_size: int = 524288, # 512 KiB
316
+ gzip_files: bool = False,
309
317
  ) -> Path:
310
318
  """Retrieve a single file from a URL and save it to a specified path.
311
319
 
@@ -316,6 +324,7 @@ async def _retrieve_file(
316
324
  semaphore: A semaphore to limit the number of concurrent downloads.
317
325
  cacher: An optional cacher to use for caching files.
318
326
  chunk_size: The size of each chunk to read from the response.
327
+ gzip_files: Whether to gzip the downloaded file.
319
328
 
320
329
  Returns:
321
330
  The path to the saved file.
@@ -330,12 +339,27 @@ async def _retrieve_file(
330
339
  logger.debug(f"File {save_path} was copied from cache {cached_file}. Skipping download from {url}.")
331
340
  return save_path
332
341
 
342
+ # Alphafold server and many other web servers can return gzipped responses,
343
+ # when we want to save as *.gz, we use raw stream
344
+ # otherwise aiohttp will decompress it automatically for us.
345
+ auto_decompress = not gzip_files
346
+ headers = {"Accept-Encoding": "gzip"}
333
347
  async with (
334
348
  semaphore,
335
- session.get(url) as resp,
349
+ session.get(url, headers=headers, auto_decompress=auto_decompress) as resp,
336
350
  ):
337
351
  resp.raise_for_status()
338
- await cacher.write_iter(save_path, resp.content.iter_chunked(chunk_size))
352
+ if gzip_files and resp.headers.get("Content-Encoding") != "gzip":
353
+ msg = f"Server did not send gzip encoded content for {url}, can not save as gzipped file."
354
+ raise InvalidContentEncodingError(
355
+ request_info=resp.request_info,
356
+ history=resp.history,
357
+ status=415,
358
+ message=msg,
359
+ headers=resp.headers,
360
+ )
361
+ iterator = resp.content.iter_chunked(chunk_size)
362
+ await cacher.write_iter(save_path, iterator)
339
363
  return save_path
340
364
 
341
365
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.5.0
3
+ Version: 0.6.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
17
17
  Requires-Dist: dask>=2025.5.1
18
18
  Requires-Dist: distributed>=2025.5.1
19
19
  Requires-Dist: gemmi>=0.7.3
20
+ Requires-Dist: mmcif>=0.92.0
20
21
  Requires-Dist: platformdirs>=4.3.8
21
22
  Requires-Dist: psutil>=7.0.0
22
23
  Requires-Dist: rich-argparse>=1.7.1
@@ -71,6 +72,7 @@ graph TB;
71
72
  fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
72
73
  confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
73
74
  residuefilter --> |mmcif_files| ssfilter
75
+ ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
74
76
  classDef dashedBorder stroke-dasharray: 5 5;
75
77
  goterm:::dashedBorder
76
78
  taxonomy:::dashedBorder
@@ -78,6 +80,7 @@ graph TB;
78
80
  fetchemdb:::dashedBorder
79
81
  searchintactionpartners:::dashedBorder
80
82
  searchcomplexes:::dashedBorder
83
+ convert2cif:::dashedBorder
81
84
  ```
82
85
 
83
86
  (Dotted nodes and edges are side-quests.)
@@ -242,6 +245,14 @@ query_protein,complex_id,complex_url,complex_title,members
242
245
  Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
243
246
  ```
244
247
 
248
+ ### Convert structure files to .cif format
249
+
250
+ Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
251
+
252
+ ```shell
253
+ protein-quest convert --output-dir ./filtered-cif ./filtered-ss
254
+ ```
255
+
245
256
  ## Model Context Protocol (MCP) server
246
257
 
247
258
  Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.
@@ -0,0 +1,27 @@
1
+ protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ protein_quest/__version__.py,sha256=z_nR_Ti0YfIwFSKDD18DIrz_r3zxWQ8EGCNr2XUWkY0,56
3
+ protein_quest/cli.py,sha256=pWwMIzWBrtqhZbvTIkvd1XhA5u9J-WAAg7A3hJZGtlk,46201
4
+ protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
5
+ protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
6
+ protein_quest/filters.py,sha256=Xr-cJTtbNjHKuzmXLBf7yZfqKf_U3RTivcVbr620LVU,5225
7
+ protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
8
+ protein_quest/io.py,sha256=ngV_HU2HIQFO-bP2xQj_fhgv0MYjW4puqz_9CxGpBv8,13017
9
+ protein_quest/mcp_server.py,sha256=rQv2srhF3_SYYK1TD3htIyxNiunU7a8FDC7CYT_oJFE,8269
10
+ protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
11
+ protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ protein_quest/ss.py,sha256=4ZGIHfjTlodYTXqGUKhMnGbgaStYOGaWg2oYrWIjdgo,10118
13
+ protein_quest/structure.py,sha256=1FTKN0mYKTwZHlyIB4ORSAgSHFKK-UAK7T-qoFo1vyI,7162
14
+ protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
15
+ protein_quest/uniprot.py,sha256=92G5YiJAJwUBKJQHPrM6DZlaLe-XG4qBg0zy0BDGFYY,24354
16
+ protein_quest/utils.py,sha256=6OF8X4ia_z1HOYiXy6e-zEWlp_bF1DoZCVrCSg1qivY,19076
17
+ protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
18
+ protein_quest/alphafold/confidence.py,sha256=mVAYTIzdbR8xBjRiUzA0at8wJq9vpfEQWPz5cJefLKs,6766
19
+ protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
20
+ protein_quest/alphafold/fetch.py,sha256=n5SlqbQfU1PE4X8saV4O1nCrKRn3Q2UcMlrNw5-163w,12801
21
+ protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
22
+ protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
23
+ protein_quest-0.6.0.dist-info/METADATA,sha256=8rX0ixi4Xl516LkxOlOKKRe364nKIjP7mKn67xuOcDA,9623
24
+ protein_quest-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
+ protein_quest-0.6.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
26
+ protein_quest-0.6.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ protein_quest-0.6.0.dist-info/RECORD,,
@@ -1,26 +0,0 @@
1
- protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- protein_quest/__version__.py,sha256=AyGZhrskazcQPC8spzJ45d4XNxgla5DnO1bmKuzRj_Q,56
3
- protein_quest/cli.py,sha256=xiXt_2l3MxbTbmxm2sz0w8_OdJr8gz_B68GBVv5wHjE,44182
4
- protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
5
- protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
6
- protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
7
- protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
8
- protein_quest/mcp_server.py,sha256=PCXxcU3GElKg2sjMlxbsM63OiFxg9AtmfKwBJ1_0AQE,8130
9
- protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
10
- protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
- protein_quest/ss.py,sha256=qOr0aMycNAtZmXXvhCN-KZH3Qp4EejnBcE6fsFgCrmY,10343
12
- protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
13
- protein_quest/uniprot.py,sha256=DIwQYzWZREZ7SGhkJT4Ozgl36pdz47FNfZ1QoEgEaXE,24239
14
- protein_quest/utils.py,sha256=2lQ7jPHWtDySBTYnoL9VTKl5XUgQVYgp9Prb7qEnjtQ,17982
15
- protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
16
- protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
17
- protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
18
- protein_quest/alphafold/fetch.py,sha256=wIsgPZmtnE5EoAL9G22Y6Ehx9d0md53Mw88-6LLGp0Q,12298
19
- protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
20
- protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
21
- protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
22
- protein_quest-0.5.0.dist-info/METADATA,sha256=atoElM2xwPd9ubxXSQsFQYz2hjALJi-AegCRkrynEYc,9236
23
- protein_quest-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
24
- protein_quest-0.5.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
25
- protein_quest-0.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
- protein_quest-0.5.0.dist-info/RECORD,,