protein-quest 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

@@ -1 +1,2 @@
1
- __version__ = "0.3.0"
1
+ __version__ = "0.3.1"
2
+ """The version of the package."""
@@ -98,7 +98,7 @@ class ConfidenceFilterResult:
98
98
 
99
99
 
100
100
  def filter_file_on_residues(file: Path, query: ConfidenceFilterQuery, filtered_dir: Path) -> ConfidenceFilterResult:
101
- """Filter a single AlphaFoldDB structure file based on confidence.
101
+ """Filter a single AlphaFoldDB structure file (*.pdb[.gz], *.cif[.gz]) based on confidence.
102
102
 
103
103
  Args:
104
104
  file: The path to the PDB file to filter.
@@ -107,7 +107,7 @@ def filter_file_on_residues(file: Path, query: ConfidenceFilterQuery, filtered_d
107
107
 
108
108
  Returns:
109
109
  result with filtered_file property set to Path where filtered PDB file is saved.
110
- or None if structure was filtered out.
110
+ or None if structure was filtered out.
111
111
  """
112
112
  structure = gemmi.read_structure(str(file))
113
113
  residues = set(find_high_confidence_residues(structure, query.confidence))
@@ -1,12 +1,14 @@
1
1
  # ruff: noqa: N815 allow camelCase follow what api returns
2
2
  from dataclasses import dataclass
3
3
 
4
+ from yarl import URL
5
+
4
6
 
5
7
  @dataclass
6
8
  class EntrySummary:
7
9
  """Dataclass representing a summary of an AlphaFold entry.
8
10
 
9
- Modelled after EntrySummary in https://alphafold.ebi.ac.uk/api/openapi.json
11
+ Modelled after EntrySummary in [https://alphafold.ebi.ac.uk/api/openapi.json](https://alphafold.ebi.ac.uk/api/openapi.json)
10
12
  """
11
13
 
12
14
  entryId: str
@@ -21,17 +23,17 @@ class EntrySummary:
21
23
  modelCreatedDate: str
22
24
  latestVersion: int
23
25
  allVersions: list[int]
24
- bcifUrl: str
25
- cifUrl: str
26
- pdbUrl: str
27
- paeImageUrl: str
28
- paeDocUrl: str
26
+ bcifUrl: URL
27
+ cifUrl: URL
28
+ pdbUrl: URL
29
+ paeImageUrl: URL
30
+ paeDocUrl: URL
29
31
  gene: str | None = None
30
32
  sequenceChecksum: str | None = None
31
33
  sequenceVersionDate: str | None = None
32
- amAnnotationsUrl: str | None = None
33
- amAnnotationsHg19Url: str | None = None
34
- amAnnotationsHg38Url: str | None = None
34
+ amAnnotationsUrl: URL | None = None
35
+ amAnnotationsHg19Url: URL | None = None
36
+ amAnnotationsHg38Url: URL | None = None
35
37
  isReviewed: bool | None = None
36
38
  isReferenceProteome: bool | None = None
37
39
  # TODO add new fields from https://alphafold.ebi.ac.uk/#/public-api/get_uniprot_summary_api_uniprot_summary__qualifier__json_get
@@ -1,26 +1,28 @@
1
1
  """Module for fetch Alphafold data."""
2
2
 
3
- import asyncio
4
3
  import logging
5
4
  from asyncio import Semaphore
6
5
  from collections.abc import AsyncGenerator, Iterable
7
6
  from dataclasses import dataclass
8
7
  from pathlib import Path
9
- from textwrap import dedent
10
- from typing import Literal
8
+ from typing import Literal, cast, get_args
11
9
 
12
10
  from aiohttp_retry import RetryClient
13
11
  from aiopath import AsyncPath
14
12
  from cattrs.preconf.orjson import make_converter
15
13
  from tqdm.asyncio import tqdm
14
+ from yarl import URL
16
15
 
17
16
  from protein_quest.alphafold.entry_summary import EntrySummary
18
- from protein_quest.utils import friendly_session, retrieve_files
17
+ from protein_quest.utils import friendly_session, retrieve_files, run_async
19
18
 
20
19
  logger = logging.getLogger(__name__)
21
20
  converter = make_converter()
21
+ """cattrs converter to read AlphaFold summary JSON document."""
22
+ converter.register_structure_hook(URL, lambda v, _: URL(v))
22
23
 
23
24
  DownloadableFormat = Literal[
25
+ "summary",
24
26
  "bcif",
25
27
  "cif",
26
28
  "pdb",
@@ -32,16 +34,7 @@ DownloadableFormat = Literal[
32
34
  ]
33
35
  """Types of formats that can be downloaded from the AlphaFold web service."""
34
36
 
35
- downloadable_formats: set[DownloadableFormat] = {
36
- "bcif",
37
- "cif",
38
- "pdb",
39
- "paeImage",
40
- "paeDoc",
41
- "amAnnotations",
42
- "amAnnotationsHg19",
43
- "amAnnotationsHg38",
44
- }
37
+ downloadable_formats: set[DownloadableFormat] = set(get_args(DownloadableFormat))
45
38
  """Set of formats that can be downloaded from the AlphaFold web service."""
46
39
 
47
40
 
@@ -59,6 +52,7 @@ class AlphaFoldEntry:
59
52
 
60
53
  uniprot_acc: str
61
54
  summary: EntrySummary | None
55
+ summary_file: Path | None = None
62
56
  bcif_file: Path | None = None
63
57
  cif_file: Path | None = None
64
58
  pdb_file: Path | None = None
@@ -127,10 +121,6 @@ async def fetch_summary(
127
121
 
128
122
  Returns:
129
123
  A list of EntrySummary objects representing the fetched summary.
130
-
131
- Raises:
132
- HTTPError: If the HTTP request returns an error status code.
133
- Exception: If there is an error during file reading/writing or data conversion.
134
124
  """
135
125
  url = f"https://alphafold.ebi.ac.uk/api/prediction/{qualifier}"
136
126
  fn: AsyncPath | None = None
@@ -144,6 +134,7 @@ async def fetch_summary(
144
134
  response.raise_for_status()
145
135
  raw_data = await response.content.read()
146
136
  if fn is not None:
137
+ # TODO return fn and make it part of AlphaFoldEntry as summary_file prop
147
138
  await fn.write_bytes(raw_data)
148
139
  return converter.loads(raw_data, list[EntrySummary])
149
140
 
@@ -164,19 +155,14 @@ async def fetch_summaries(
164
155
  yield summary
165
156
 
166
157
 
167
- def url2name(url: str) -> str:
168
- """Given a URL, return the final path component as the name of the file."""
169
- return url.split("/")[-1]
170
-
171
-
172
158
  async def fetch_many_async(
173
- ids: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
159
+ uniprot_accessions: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
174
160
  ) -> AsyncGenerator[AlphaFoldEntry]:
175
- """Asynchronously fetches summaries and pdb and pae (predicted alignment error) files from
161
+ """Asynchronously fetches summaries and files from
176
162
  [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
177
163
 
178
164
  Args:
179
- ids: A set of Uniprot IDs to fetch.
165
+ uniprot_accessions: A set of Uniprot acessions to fetch.
180
166
  save_dir: The directory to save the fetched files to.
181
167
  what: A set of formats to download.
182
168
  max_parallel_downloads: The maximum number of parallel downloads.
@@ -184,7 +170,13 @@ async def fetch_many_async(
184
170
  Yields:
185
171
  A dataclass containing the summary, pdb file, and pae file.
186
172
  """
187
- summaries = [s async for s in fetch_summaries(ids, save_dir, max_parallel_downloads=max_parallel_downloads)]
173
+ save_dir_for_summaries = save_dir if "summary" in what and save_dir is not None else None
174
+ summaries = [
175
+ s
176
+ async for s in fetch_summaries(
177
+ uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads
178
+ )
179
+ ]
188
180
 
189
181
  files = files_to_download(what, summaries)
190
182
 
@@ -198,30 +190,31 @@ async def fetch_many_async(
198
190
  yield AlphaFoldEntry(
199
191
  uniprot_acc=summary.uniprotAccession,
200
192
  summary=summary,
201
- bcif_file=save_dir / url2name(summary.bcifUrl) if "bcif" in what else None,
202
- cif_file=save_dir / url2name(summary.cifUrl) if "cif" in what else None,
203
- pdb_file=save_dir / url2name(summary.pdbUrl) if "pdb" in what else None,
204
- pae_image_file=save_dir / url2name(summary.paeImageUrl) if "paeImage" in what else None,
205
- pae_doc_file=save_dir / url2name(summary.paeDocUrl) if "paeDoc" in what else None,
193
+ summary_file=save_dir / f"{summary.uniprotAccession}.json" if save_dir_for_summaries is not None else None,
194
+ bcif_file=save_dir / summary.bcifUrl.name if "bcif" in what else None,
195
+ cif_file=save_dir / summary.cifUrl.name if "cif" in what else None,
196
+ pdb_file=save_dir / summary.pdbUrl.name if "pdb" in what else None,
197
+ pae_image_file=save_dir / summary.paeImageUrl.name if "paeImage" in what else None,
198
+ pae_doc_file=save_dir / summary.paeDocUrl.name if "paeDoc" in what else None,
206
199
  am_annotations_file=(
207
- save_dir / url2name(summary.amAnnotationsUrl)
200
+ save_dir / summary.amAnnotationsUrl.name
208
201
  if "amAnnotations" in what and summary.amAnnotationsUrl
209
202
  else None
210
203
  ),
211
204
  am_annotations_hg19_file=(
212
- save_dir / url2name(summary.amAnnotationsHg19Url)
205
+ save_dir / summary.amAnnotationsHg19Url.name
213
206
  if "amAnnotationsHg19" in what and summary.amAnnotationsHg19Url
214
207
  else None
215
208
  ),
216
209
  am_annotations_hg38_file=(
217
- save_dir / url2name(summary.amAnnotationsHg38Url)
210
+ save_dir / summary.amAnnotationsHg38Url.name
218
211
  if "amAnnotationsHg38" in what and summary.amAnnotationsHg38Url
219
212
  else None
220
213
  ),
221
214
  )
222
215
 
223
216
 
224
- def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySummary]) -> set[tuple[str, str]]:
217
+ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySummary]) -> set[tuple[URL, str]]:
225
218
  if not (set(what) <= downloadable_formats):
226
219
  msg = (
227
220
  f"Invalid format(s) specified: {set(what) - downloadable_formats}. "
@@ -229,24 +222,21 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
229
222
  )
230
223
  raise ValueError(msg)
231
224
 
232
- files: set[tuple[str, str]] = set()
225
+ files: set[tuple[URL, str]] = set()
233
226
  for summary in summaries:
234
227
  for fmt in what:
235
- url = getattr(summary, f"{fmt}Url", None)
228
+ if fmt == "summary":
229
+ # summary is handled already in fetch_summary
230
+ continue
231
+ url = cast("URL | None", getattr(summary, f"{fmt}Url", None))
236
232
  if url is None:
237
233
  logger.warning(f"Summary {summary.uniprotAccession} does not have a URL for format '{fmt}'. Skipping.")
238
234
  continue
239
- file = (url, url2name(url))
235
+ file = (url, url.name)
240
236
  files.add(file)
241
237
  return files
242
238
 
243
239
 
244
- class NestedAsyncIOLoopError(RuntimeError):
245
- """Custom error for nested async I/O loops."""
246
-
247
- pass
248
-
249
-
250
240
  def fetch_many(
251
241
  ids: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
252
242
  ) -> list[AlphaFoldEntry]:
@@ -260,9 +250,6 @@ def fetch_many(
260
250
 
261
251
  Returns:
262
252
  A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
263
-
264
- Raises:
265
- NestedAsyncIOLoopError: If called from a nested async I/O loop like in a Jupyter notebook.
266
253
  """
267
254
 
268
255
  async def gather_entries():
@@ -271,19 +258,7 @@ def fetch_many(
271
258
  async for entry in fetch_many_async(ids, save_dir, what, max_parallel_downloads=max_parallel_downloads)
272
259
  ]
273
260
 
274
- try:
275
- return asyncio.run(gather_entries())
276
- except RuntimeError as e:
277
- msg = dedent("""\
278
- Can not run async method from an environment where the asyncio event loop is already running.
279
- Like a Jupyter notebook.
280
-
281
- Please use the `fetch_many_async` function directly or before call
282
-
283
- import nest_asyncio
284
- nest_asyncio.apply()
285
- """)
286
- raise NestedAsyncIOLoopError(msg) from e
261
+ return run_async(gather_entries())
287
262
 
288
263
 
289
264
  def relative_to(entry: AlphaFoldEntry, session_dir: Path) -> AlphaFoldEntry:
@@ -299,6 +274,7 @@ def relative_to(entry: AlphaFoldEntry, session_dir: Path) -> AlphaFoldEntry:
299
274
  return AlphaFoldEntry(
300
275
  uniprot_acc=entry.uniprot_acc,
301
276
  summary=entry.summary,
277
+ summary_file=entry.summary_file.relative_to(session_dir) if entry.summary_file else None,
302
278
  bcif_file=entry.bcif_file.relative_to(session_dir) if entry.bcif_file else None,
303
279
  cif_file=entry.cif_file.relative_to(session_dir) if entry.cif_file else None,
304
280
  pdb_file=entry.pdb_file.relative_to(session_dir) if entry.pdb_file else None,
protein_quest/cli.py CHANGED
@@ -5,7 +5,8 @@ import asyncio
5
5
  import csv
6
6
  import logging
7
7
  import os
8
- from collections.abc import Callable, Iterable
8
+ import sys
9
+ from collections.abc import Callable, Generator, Iterable
9
10
  from importlib.util import find_spec
10
11
  from io import TextIOWrapper
11
12
  from pathlib import Path
@@ -14,6 +15,7 @@ from textwrap import dedent
14
15
  from cattrs import structure
15
16
  from rich import print as rprint
16
17
  from rich.logging import RichHandler
18
+ from rich.panel import Panel
17
19
  from rich_argparse import ArgumentDefaultsRichHelpFormatter
18
20
  from tqdm.rich import tqdm
19
21
 
@@ -25,7 +27,7 @@ from protein_quest.emdb import fetch as emdb_fetch
25
27
  from protein_quest.filters import filter_files_on_chain, filter_files_on_residues
26
28
  from protein_quest.go import Aspect, allowed_aspects, search_gene_ontology_term, write_go_terms_to_csv
27
29
  from protein_quest.pdbe import fetch as pdbe_fetch
28
- from protein_quest.pdbe.io import glob_structure_files
30
+ from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
29
31
  from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
30
32
  from protein_quest.uniprot import PdbResult, Query, search4af, search4emdb, search4pdb, search4uniprot
31
33
 
@@ -246,12 +248,12 @@ def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
246
248
  )
247
249
  parser.add_argument("output_dir", type=Path, help="Directory to store downloaded AlphaFold files")
248
250
  parser.add_argument(
249
- "--what-af-formats",
251
+ "--what-formats",
250
252
  type=str,
251
253
  action="append",
252
254
  choices=sorted(downloadable_formats),
253
255
  help=dedent("""AlphaFold formats to retrieve. Can be specified multiple times.
254
- Default is 'pdb'. Summary is always downloaded as `<entryId>.json`."""),
256
+ Default is 'summary' and 'cif'."""),
255
257
  )
256
258
  parser.add_argument(
257
259
  "--max-parallel-downloads",
@@ -585,17 +587,17 @@ def _handle_retrieve_pdbe(args):
585
587
 
586
588
  def _handle_retrieve_alphafold(args):
587
589
  download_dir = args.output_dir
588
- what_af_formats = args.what_af_formats
590
+ what_formats = args.what_formats
589
591
  alphafold_csv = args.alphafold_csv
590
592
  max_parallel_downloads = args.max_parallel_downloads
591
593
 
592
- if what_af_formats is None:
593
- what_af_formats = {"pdb"}
594
+ if what_formats is None:
595
+ what_formats = {"summary", "cif"}
594
596
 
595
597
  # TODO besides `uniprot_acc,af_id\n` csv also allow headless single column format
596
598
  #
597
- af_ids = [r["af_id"] for r in _read_alphafold_csv(alphafold_csv)]
598
- validated_what: set[DownloadableFormat] = structure(what_af_formats, set[DownloadableFormat])
599
+ af_ids = _read_column_from_csv(alphafold_csv, "af_id")
600
+ validated_what: set[DownloadableFormat] = structure(what_formats, set[DownloadableFormat])
599
601
  rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {validated_what}")
600
602
  afs = af_fetch(af_ids, download_dir, what=validated_what, max_parallel_downloads=max_parallel_downloads)
601
603
  total_nr_files = sum(af.nr_of_files() for af in afs)
@@ -658,12 +660,32 @@ def _handle_filter_chain(args):
658
660
  pdb_id2chain_mapping_file = args.chains
659
661
  scheduler_address = args.scheduler_address
660
662
 
663
+ # make sure files in input dir with entries in mapping file are the same
664
+ # complain when files from mapping file are missing on disk
661
665
  rows = list(_iter_csv_rows(pdb_id2chain_mapping_file))
662
- id2chains: dict[str, str] = {row["pdb_id"]: row["chain"] for row in rows}
666
+ file2chain: set[tuple[Path, str]] = set()
667
+ errors: list[FileNotFoundError] = []
663
668
 
664
- new_files = filter_files_on_chain(input_dir, id2chains, output_dir, scheduler_address)
669
+ for row in rows:
670
+ pdb_id = row["pdb_id"]
671
+ chain = row["chain"]
672
+ try:
673
+ f = locate_structure_file(input_dir, pdb_id)
674
+ file2chain.add((f, chain))
675
+ except FileNotFoundError as e:
676
+ errors.append(e)
665
677
 
666
- nr_written = len([r for r in new_files if r[2] is not None])
678
+ if errors:
679
+ msg = f"Some structure files could not be found ({len(errors)} missing), skipping them"
680
+ rprint(Panel(os.linesep.join(map(str, errors)), title=msg, style="red"))
681
+
682
+ if not file2chain:
683
+ rprint("[red]No valid structure files found. Exiting.")
684
+ sys.exit(1)
685
+
686
+ results = filter_files_on_chain(file2chain, output_dir, scheduler_address=scheduler_address)
687
+
688
+ nr_written = len([r for r in results if r.passed])
667
689
 
668
690
  rprint(f"Wrote {nr_written} single-chain PDB/mmCIF files to {output_dir}.")
669
691
 
@@ -768,12 +790,7 @@ def _write_dict_of_sets2csv(file: TextIOWrapper, data: dict[str, set[str]], ref_
768
790
  writer.writerow({"uniprot_acc": uniprot_acc, ref_id_field: ref_id})
769
791
 
770
792
 
771
- def _read_alphafold_csv(file: TextIOWrapper):
772
- reader = csv.DictReader(file)
773
- yield from reader
774
-
775
-
776
- def _iter_csv_rows(file: TextIOWrapper):
793
+ def _iter_csv_rows(file: TextIOWrapper) -> Generator[dict[str, str]]:
777
794
  reader = csv.DictReader(file)
778
795
  yield from reader
779
796
 
protein_quest/filters.py CHANGED
@@ -1,19 +1,17 @@
1
1
  """Module for filtering structure files and their contents."""
2
2
 
3
3
  import logging
4
- from collections.abc import Generator
4
+ from collections.abc import Collection, Generator
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
7
  from shutil import copyfile
8
- from typing import cast
9
8
 
10
- from dask.distributed import Client, progress
9
+ from dask.distributed import Client
11
10
  from distributed.deploy.cluster import Cluster
12
11
  from tqdm.auto import tqdm
13
12
 
14
- from protein_quest.parallel import configure_dask_scheduler
13
+ from protein_quest.parallel import configure_dask_scheduler, dask_map_with_progress
15
14
  from protein_quest.pdbe.io import (
16
- locate_structure_file,
17
15
  nr_residues_in_chain,
18
16
  write_single_chain_pdb_file,
19
17
  )
@@ -21,25 +19,48 @@ from protein_quest.pdbe.io import (
21
19
  logger = logging.getLogger(__name__)
22
20
 
23
21
 
22
+ @dataclass
23
+ class ChainFilterStatistics:
24
+ input_file: Path
25
+ chain_id: str
26
+ passed: bool = False
27
+ output_file: Path | None = None
28
+ discard_reason: Exception | None = None
29
+
30
+
31
+ def filter_file_on_chain(
32
+ file_and_chain: tuple[Path, str], output_dir: Path, out_chain: str = "A"
33
+ ) -> ChainFilterStatistics:
34
+ input_file, chain_id = file_and_chain
35
+ try:
36
+ output_file = write_single_chain_pdb_file(input_file, chain_id, output_dir, out_chain=out_chain)
37
+ return ChainFilterStatistics(
38
+ input_file=input_file,
39
+ chain_id=chain_id,
40
+ output_file=output_file,
41
+ passed=True,
42
+ )
43
+ except Exception as e: # noqa: BLE001 - error is handled downstream
44
+ return ChainFilterStatistics(input_file=input_file, chain_id=chain_id, discard_reason=e)
45
+
46
+
24
47
  def filter_files_on_chain(
25
- input_dir: Path,
26
- id2chains: dict[str, str],
48
+ file2chains: Collection[tuple[Path, str]],
27
49
  output_dir: Path,
28
- scheduler_address: str | Cluster | None = None,
29
50
  out_chain: str = "A",
30
- ) -> list[tuple[str, str, Path | None]]:
51
+ scheduler_address: str | Cluster | None = None,
52
+ ) -> list[ChainFilterStatistics]:
31
53
  """Filter mmcif/PDB files by chain.
32
54
 
33
55
  Args:
34
- input_dir: The directory containing the input mmcif/PDB files.
35
- id2chains: Which chain to keep for each PDB ID. Key is the PDB ID, value is the chain ID.
56
+ file2chains: Which chain to keep for each PDB file.
57
+ First item is the PDB file path, second item is the chain ID.
36
58
  output_dir: The directory where the filtered files will be written.
37
- scheduler_address: The address of the Dask scheduler.
38
59
  out_chain: Under what name to write the kept chain.
60
+ scheduler_address: The address of the Dask scheduler.
39
61
 
40
62
  Returns:
41
- A list of tuples containing the PDB ID, chain ID, and path to the filtered file.
42
- Last tuple item is None if something went wrong like chain not present.
63
+ Result of the filtering process.
43
64
  """
44
65
  output_dir.mkdir(parents=True, exist_ok=True)
45
66
  scheduler_address = configure_dask_scheduler(
@@ -47,24 +68,14 @@ def filter_files_on_chain(
47
68
  name="filter-chain",
48
69
  )
49
70
 
50
- def task(id2chain: tuple[str, str]) -> tuple[str, str, Path | None]:
51
- pdb_id, chain = id2chain
52
- input_file = locate_structure_file(input_dir, pdb_id)
53
- return pdb_id, chain, write_single_chain_pdb_file(input_file, chain, output_dir, out_chain=out_chain)
54
-
55
71
  with Client(scheduler_address) as client:
56
- logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
57
-
58
- futures = client.map(task, id2chains.items())
59
-
60
- progress(futures)
61
-
62
- results = client.gather(futures)
63
- return cast("list[tuple[str,str, Path | None]]", results)
72
+ return dask_map_with_progress(
73
+ client, filter_file_on_chain, file2chains, output_dir=output_dir, out_chain=out_chain
74
+ )
64
75
 
65
76
 
66
77
  @dataclass
67
- class FilterStat:
78
+ class ResidueFilterStatistics:
68
79
  """Statistics for filtering files based on residue count in a specific chain.
69
80
 
70
81
  Parameters:
@@ -82,7 +93,7 @@ class FilterStat:
82
93
 
83
94
  def filter_files_on_residues(
84
95
  input_files: list[Path], output_dir: Path, min_residues: int, max_residues: int, chain: str = "A"
85
- ) -> Generator[FilterStat]:
96
+ ) -> Generator[ResidueFilterStatistics]:
86
97
  """Filter PDB/mmCIF files by number of residues in given chain.
87
98
 
88
99
  Args:
@@ -93,7 +104,7 @@ def filter_files_on_residues(
93
104
  chain: The chain to count residues of.
94
105
 
95
106
  Yields:
96
- FilterStat objects containing information about the filtering process for each input file.
107
+ Objects containing information about the filtering process for each input file.
97
108
  """
98
109
  output_dir.mkdir(parents=True, exist_ok=True)
99
110
  for input_file in tqdm(input_files, unit="file"):
@@ -102,6 +113,6 @@ def filter_files_on_residues(
102
113
  if passed:
103
114
  output_file = output_dir / input_file.name
104
115
  copyfile(input_file, output_file)
105
- yield FilterStat(input_file, residue_count, True, output_file)
116
+ yield ResidueFilterStatistics(input_file, residue_count, True, output_file)
106
117
  else:
107
- yield FilterStat(input_file, residue_count, False, None)
118
+ yield ResidueFilterStatistics(input_file, residue_count, False, None)
@@ -24,12 +24,11 @@ npx @modelcontextprotocol/inspector
24
24
  # Choose STDIO
25
25
  # command: uv run protein-quest mcp
26
26
  # id: protein-quest
27
- # Prompt: What are the PDBe structures for `A8MT69` uniprot accession?
28
27
  ```
29
28
 
30
29
  Examples:
31
30
 
32
- For search pdb use `A8MT69` as input.
31
+ - What are the PDBe structures for `A8MT69` uniprot accession?
33
32
 
34
33
  """
35
34
 
@@ -90,7 +89,7 @@ def extract_single_chain_from_structure(
90
89
  chain2keep: str,
91
90
  output_dir: Path,
92
91
  out_chain: str = "A",
93
- ) -> Path | None:
92
+ ) -> Path:
94
93
  """
95
94
  Extract a single chain from a mmCIF/pdb file and write to a new file.
96
95
 
@@ -101,7 +100,7 @@ def extract_single_chain_from_structure(
101
100
  out_chain: The chain identifier for the output file.
102
101
 
103
102
  Returns:
104
- Path to the output mmCIF/pdb file or None if not created.
103
+ Path to the output mmCIF/pdb file
105
104
  """
106
105
  return write_single_chain_pdb_file(input_file, chain2keep, output_dir, out_chain)
107
106
 
@@ -150,7 +149,7 @@ def fetch_alphafold_structures(uniprot_accs: set[str], save_dir: Path) -> list[A
150
149
  Returns:
151
150
  A list of AlphaFold entries.
152
151
  """
153
- what: set[DownloadableFormat] = {"cif"}
152
+ what: set[DownloadableFormat] = {"summary", "cif"}
154
153
  return alphafold_fetch(uniprot_accs, save_dir, what)
155
154
 
156
155
 
protein_quest/parallel.py CHANGED
@@ -2,8 +2,10 @@
2
2
 
3
3
  import logging
4
4
  import os
5
+ from collections.abc import Callable, Collection
6
+ from typing import Concatenate, ParamSpec, cast
5
7
 
6
- from dask.distributed import LocalCluster
8
+ from dask.distributed import Client, LocalCluster, progress
7
9
  from distributed.deploy.cluster import Cluster
8
10
  from psutil import cpu_count
9
11
 
@@ -66,3 +68,37 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
66
68
  n_workers = total_cpus // nproc
67
69
  # Use single thread per worker to prevent GIL slowing down the computations
68
70
  return LocalCluster(name=name, threads_per_worker=1, n_workers=n_workers)
71
+
72
+
73
+ # Generic type parameters used across helpers
74
+ P = ParamSpec("P")
75
+
76
+
77
+ def dask_map_with_progress[T, R, **P](
78
+ client: Client,
79
+ func: Callable[Concatenate[T, P], R],
80
+ iterable: Collection[T],
81
+ *args: P.args,
82
+ **kwargs: P.kwargs,
83
+ ) -> list[R]:
84
+ """
85
+ Wrapper for map, progress, and gather of Dask that returns a correctly typed list.
86
+
87
+ Args:
88
+ client: Dask client.
89
+ func: Function to map; first parameter comes from ``iterable`` and any
90
+ additional parameters can be provided positionally via ``*args`` or
91
+ as keyword arguments via ``**kwargs``.
92
+ iterable: Collection of arguments to map over.
93
+ *args: Additional positional arguments to pass to client.map().
94
+ **kwargs: Additional keyword arguments to pass to client.map().
95
+
96
+ Returns:
97
+ List of results of type returned by `func` function.
98
+ """
99
+ if client.dashboard_link:
100
+ logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
101
+ futures = client.map(func, iterable, *args, **kwargs)
102
+ progress(futures)
103
+ results = client.gather(futures)
104
+ return cast("list[R]", results)
@@ -3,7 +3,7 @@
3
3
  from collections.abc import Iterable, Mapping
4
4
  from pathlib import Path
5
5
 
6
- from protein_quest.utils import retrieve_files
6
+ from protein_quest.utils import retrieve_files, run_async
7
7
 
8
8
 
9
9
  def _map_id_mmcif(pdb_id: str) -> tuple[str, str]:
@@ -49,3 +49,17 @@ async def fetch(ids: Iterable[str], save_dir: Path, max_parallel_downloads: int
49
49
 
50
50
  await retrieve_files(urls, save_dir, max_parallel_downloads, desc="Downloading PDBe mmCIF files")
51
51
  return id2paths
52
+
53
+
54
+ def sync_fetch(ids: Iterable[str], save_dir: Path, max_parallel_downloads: int = 5) -> Mapping[str, Path]:
55
+ """Synchronously fetches mmCIF files from the PDBe database.
56
+
57
+ Args:
58
+ ids: A set of PDB IDs to fetch.
59
+ save_dir: The directory to save the fetched mmCIF files to.
60
+ max_parallel_downloads: The maximum number of parallel downloads.
61
+
62
+ Returns:
63
+ A dict of id and paths to the downloaded mmCIF files.
64
+ """
65
+ return run_async(fetch(ids, save_dir, max_parallel_downloads))
protein_quest/pdbe/io.py CHANGED
@@ -11,6 +11,11 @@ from protein_quest import __version__
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
14
+ # TODO remove once v0.7.4 of gemmi is released,
15
+ # as uv pip install git+https://github.com/project-gemmi/gemmi.git installs 0.7.4.dev0 which does not print leaks
16
+ # Swallow gemmi leaked function warnings
17
+ gemmi.set_leak_warnings(False)
18
+
14
19
 
15
20
  def nr_residues_in_chain(file: Path | str, chain: str = "A") -> int:
16
21
  """Returns the number of residues in a specific chain from a mmCIF/pdb file.
@@ -131,9 +136,16 @@ def glob_structure_files(input_dir: Path) -> Generator[Path]:
131
136
  yield from input_dir.glob(f"*{ext}")
132
137
 
133
138
 
134
- def write_single_chain_pdb_file(
135
- input_file: Path, chain2keep: str, output_dir: Path, out_chain: str = "A"
136
- ) -> Path | None:
139
+ class ChainNotFoundError(IndexError):
140
+ """Exception raised when a chain is not found in a structure."""
141
+
142
+ def __init__(self, chain: str, file: Path | str):
143
+ super().__init__(f"Chain {chain} not found in {file}")
144
+ self.chain_id = chain
145
+ self.file = file
146
+
147
+
148
+ def write_single_chain_pdb_file(input_file: Path, chain2keep: str, output_dir: Path, out_chain: str = "A") -> Path:
137
149
  """Write a single chain from a mmCIF/pdb file to a new mmCIF/pdb file.
138
150
 
139
151
  Args:
@@ -143,7 +155,11 @@ def write_single_chain_pdb_file(
143
155
  out_chain: The chain identifier for the output file.
144
156
 
145
157
  Returns:
146
- Path to the output mmCIF/pdb file or None if not created.
158
+ Path to the output mmCIF/pdb file
159
+
160
+ Raises:
161
+ FileNotFoundError: If the input file does not exist.
162
+ ChainNotFoundError: If the specified chain is not found in the input file.
147
163
  """
148
164
 
149
165
  structure = gemmi.read_structure(str(input_file))
@@ -154,15 +170,14 @@ def write_single_chain_pdb_file(
154
170
 
155
171
  chain = find_chain_in_model(model, chain2keep)
156
172
  if chain is None:
157
- logger.warning(
158
- "Chain %s not found in %s. Skipping.",
159
- chain2keep,
160
- input_file,
161
- )
162
- return None
173
+ raise ChainNotFoundError(chain2keep, input_file)
163
174
  name, extension = _split_name_and_extension(input_file.name)
164
175
  output_file = output_dir / f"{name}_{chain.name}2{out_chain}{extension}"
165
176
 
177
+ if output_file.exists():
178
+ logger.info("Output file %s already exists for input file %s. Skipping.", output_file, input_file)
179
+ return output_file
180
+
166
181
  new_structure = gemmi.Structure()
167
182
  new_structure.resolution = structure.resolution
168
183
  new_id = structure.name + f"{chain2keep}2{out_chain}"
protein_quest/taxonomy.py CHANGED
@@ -20,6 +20,16 @@ logger = logging.getLogger(__name__)
20
20
 
21
21
  @dataclass(frozen=True, slots=True)
22
22
  class Taxon:
23
+ """Dataclass representing a taxon.
24
+
25
+ Arguments:
26
+ taxon_id: The unique identifier for the taxon.
27
+ scientific_name: The scientific name of the taxon.
28
+ rank: The taxonomic rank of the taxon (e.g., species, genus).
29
+ common_name: The common name of the taxon (if available).
30
+ other_names: A set of other names for the taxon (if available).
31
+ """
32
+
23
33
  taxon_id: str
24
34
  scientific_name: str
25
35
  rank: str
@@ -47,7 +57,9 @@ converter.register_structure_hook(
47
57
  )
48
58
 
49
59
  SearchField = Literal["tax_id", "scientific", "common", "parent"]
60
+ """Type of search field"""
50
61
  search_fields: set[SearchField | None] = set(get_args(SearchField)) | {None}
62
+ """Set of valid search fields"""
51
63
 
52
64
 
53
65
  def _get_next_page(response: ClientResponse) -> URL | str | None:
protein_quest/utils.py CHANGED
@@ -2,20 +2,23 @@
2
2
 
3
3
  import asyncio
4
4
  import logging
5
- from collections.abc import Iterable
5
+ from collections.abc import Coroutine, Iterable
6
6
  from contextlib import asynccontextmanager
7
7
  from pathlib import Path
8
+ from textwrap import dedent
9
+ from typing import Any
8
10
 
9
11
  import aiofiles
10
12
  import aiohttp
11
13
  from aiohttp_retry import ExponentialRetry, RetryClient
12
14
  from tqdm.asyncio import tqdm
15
+ from yarl import URL
13
16
 
14
17
  logger = logging.getLogger(__name__)
15
18
 
16
19
 
17
20
  async def retrieve_files(
18
- urls: Iterable[tuple[str, str]],
21
+ urls: Iterable[tuple[URL | str, str]],
19
22
  save_dir: Path,
20
23
  max_parallel_downloads: int = 5,
21
24
  retries: int = 3,
@@ -45,7 +48,7 @@ async def retrieve_files(
45
48
 
46
49
  async def _retrieve_file(
47
50
  session: RetryClient,
48
- url: str,
51
+ url: URL | str,
49
52
  save_path: Path,
50
53
  semaphore: asyncio.Semaphore,
51
54
  ovewrite: bool = False,
@@ -103,3 +106,35 @@ async def friendly_session(retries: int = 3, total_timeout: int = 300):
103
106
  async with aiohttp.ClientSession(timeout=timeout) as session:
104
107
  client = RetryClient(client_session=session, retry_options=retry_options)
105
108
  yield client
109
+
110
+
111
+ class NestedAsyncIOLoopError(RuntimeError):
112
+ """Custom error for nested async I/O loops."""
113
+
114
+ def __init__(self) -> None:
115
+ msg = dedent("""\
116
+ Can not run async method from an environment where the asyncio event loop is already running.
117
+ Like a Jupyter notebook.
118
+
119
+ Please use the async function directly or
120
+ call `import nest_asyncio; nest_asyncio.apply()` and try again.
121
+ """)
122
+ super().__init__(msg)
123
+
124
+
125
+ def run_async[R](coroutine: Coroutine[Any, Any, R]) -> R:
126
+ """Run an async coroutine with nicer error.
127
+
128
+ Args:
129
+ coroutine: The async coroutine to run.
130
+
131
+ Returns:
132
+ The result of the coroutine.
133
+
134
+ Raises:
135
+ NestedAsyncIOLoopError: If called from a nested async I/O loop like in a Jupyter notebook.
136
+ """
137
+ try:
138
+ return asyncio.run(coroutine)
139
+ except RuntimeError as e:
140
+ raise NestedAsyncIOLoopError from e
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -13,19 +13,16 @@ Requires-Dist: aiohttp-retry>=2.9.1
13
13
  Requires-Dist: aiohttp[speedups]>=3.11.18
14
14
  Requires-Dist: aiopath>=0.7.7
15
15
  Requires-Dist: attrs>=25.3.0
16
- Requires-Dist: bokeh>=3.7.3
17
16
  Requires-Dist: cattrs[orjson]>=24.1.3
18
17
  Requires-Dist: dask>=2025.5.1
19
18
  Requires-Dist: distributed>=2025.5.1
20
19
  Requires-Dist: gemmi>=0.7.3
21
- Requires-Dist: molviewspec>=1.6.0
22
- Requires-Dist: pandas>=2.3.0
23
- Requires-Dist: platformdirs>=4.3.8
24
20
  Requires-Dist: psutil>=7.0.0
25
21
  Requires-Dist: rich-argparse>=1.7.1
26
22
  Requires-Dist: rich>=14.0.0
27
23
  Requires-Dist: sparqlwrapper>=2.0.0
28
24
  Requires-Dist: tqdm>=4.67.1
25
+ Requires-Dist: yarl>=1.20.1
29
26
  Provides-Extra: mcp
30
27
  Requires-Dist: fastmcp>=2.11.3; extra == 'mcp'
31
28
  Requires-Dist: pydantic>=2.11.7; extra == 'mcp'
@@ -37,8 +34,7 @@ Description-Content-Type: text/markdown
37
34
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
38
35
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
39
36
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
40
- <!-- TODO replace with correct zenodo id -->
41
- [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.15632658.svg)](https://doi.org/10.5281/zenodo.15632658)
37
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
42
38
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
43
39
 
44
40
  Python package to search/retrieve/filter proteins and protein structures.
@@ -90,7 +86,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
90
86
 
91
87
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
92
88
 
93
- To use programmaticly, see [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
89
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
94
90
 
95
91
  ### Search Uniprot accessions
96
92
 
@@ -0,0 +1,24 @@
1
+ protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ protein_quest/__version__.py,sha256=Bu2gp24I4eIxc1qgY2e0PnF8N-szjUpFQwVAe10IRAo,56
3
+ protein_quest/cli.py,sha256=xjiWtRDqv-Ruv1fpvXq4dmDSuuyewxw81akDs1ktVbI,31772
4
+ protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
5
+ protein_quest/filters.py,sha256=3vqfFH87Lz7r9uYiSvwMxzShMfRNv1Zv_freJtDljrU,4051
6
+ protein_quest/go.py,sha256=ycV3-grxuIKFt28bFgH6iRKmt5AEGi7txoTbaAnBxQE,5684
7
+ protein_quest/mcp_server.py,sha256=1_CGC0peqoNUFBvgFWupKwIWjmHsKxN5Vxy1K7dt5Dw,7130
8
+ protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
9
+ protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ protein_quest/taxonomy.py,sha256=wPzLjum5n_SEkL2rHUKvyRnjL1pG7bhEnE2vMmXixEc,5105
11
+ protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
12
+ protein_quest/utils.py,sha256=YhlTJreIr1bExbh1M514l6sz4GmLVa3RN57mI1kjjuw,4730
13
+ protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
14
+ protein_quest/alphafold/confidence.py,sha256=GGd_vYsqVvs9InvFKtqHdGKB_61GHllPmDyIztvzG7E,5625
15
+ protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
16
+ protein_quest/alphafold/fetch.py,sha256=1mDbQNm01cxlwFNDsKHBWD7MEwzB3PaheskdaLN7XJs,11491
17
+ protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
18
+ protein_quest/pdbe/fetch.py,sha256=tlCrWoaOrwxnQFrf-PnimUUa6lmtHwwysS51efYsBcA,2379
19
+ protein_quest/pdbe/io.py,sha256=J6fHlRLHLALnpxDgSUUnFCNFV9Hr3u6eJDO6j81ftT4,6936
20
+ protein_quest-0.3.1.dist-info/METADATA,sha256=fWvmMbm5aEMb3WbWgPAqwEOWeYJSY47iuZLaRIgBuuk,7305
21
+ protein_quest-0.3.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
+ protein_quest-0.3.1.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
23
+ protein_quest-0.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
+ protein_quest-0.3.1.dist-info/RECORD,,
@@ -1,24 +0,0 @@
1
- protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- protein_quest/__version__.py,sha256=VrXpHDu3erkzwl_WXrqINBm9xWkcyUy53IQOj042dOs,22
3
- protein_quest/cli.py,sha256=oyDin6Z92Q17mUmTCasKgju3YUJbPu298gniNakQUwY,31121
4
- protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
5
- protein_quest/filters.py,sha256=GNtM1N1S1mNUqAvX7OvyhOvnUWo4qx2hMneORbc-Qz8,3797
6
- protein_quest/go.py,sha256=ycV3-grxuIKFt28bFgH6iRKmt5AEGi7txoTbaAnBxQE,5684
7
- protein_quest/mcp_server.py,sha256=xIaOy6sY_gW5R_oMImI2yBmbBGtZZICOxXLzOkFmm-w,7197
8
- protein_quest/parallel.py,sha256=kCH6KCJYJZVoq0_Qz8ZLbHnf2OJG-h4uxd9oH2rLNKc,2201
9
- protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- protein_quest/taxonomy.py,sha256=kAKKZT_mOtmX8ZWNIE9i7emE23VEewkj12X7d_t3p2Y,4659
11
- protein_quest/uniprot.py,sha256=8qWV4GWqHTRfed0bE_TdgsLYcnDT_vzKu-6JxIgapJQ,18680
12
- protein_quest/utils.py,sha256=HUvqfsuMBIFOVFlb_QC2to_UQkiZ0_fwHLlckifuXss,3700
13
- protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
14
- protein_quest/alphafold/confidence.py,sha256=-lbwijzVMhRd98bxwFDbSi7idiUKJ5BpOsGFrvuTEnQ,5596
15
- protein_quest/alphafold/entry_summary.py,sha256=P-S8qrXkU-wwIccA1nGol1lfDkUW0Sg0th_3EU-WjN8,1187
16
- protein_quest/alphafold/fetch.py,sha256=eq__PfqisuUIQBUM8KVghpiEOBGF-zXWNC6Ll_Hlz2E,11828
17
- protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
18
- protein_quest/pdbe/fetch.py,sha256=iTyS4ucV2KZl4jTgrUFOZhsXs3cRUIuvmTbXNm_pY8U,1850
19
- protein_quest/pdbe/io.py,sha256=0ldsrIHKaaurrM2FfWXbqm1iRj3q6xw8-lptfYU1yEw,6231
20
- protein_quest-0.3.0.dist-info/METADATA,sha256=yiHZn4gDdwilbCoxrF0pCjVk04v_O5pwpwrtr6oPLrE,7369
21
- protein_quest-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
22
- protein_quest-0.3.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
23
- protein_quest-0.3.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
24
- protein_quest-0.3.0.dist-info/RECORD,,