protein-quest 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of protein-quest might be problematic. Click here for more details.

@@ -1,2 +1,2 @@
1
- __version__ = "0.4.0"
1
+ __version__ = "0.5.0"
2
2
  """The version of the package."""
@@ -14,7 +14,7 @@ from yarl import URL
14
14
 
15
15
  from protein_quest.alphafold.entry_summary import EntrySummary
16
16
  from protein_quest.converter import converter
17
- from protein_quest.utils import friendly_session, retrieve_files, run_async
17
+ from protein_quest.utils import Cacher, PassthroughCacher, friendly_session, retrieve_files, run_async
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -104,7 +104,7 @@ class AlphaFoldEntry:
104
104
 
105
105
 
106
106
  async def fetch_summary(
107
- qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None
107
+ qualifier: str, session: RetryClient, semaphore: Semaphore, save_dir: Path | None, cacher: Cacher
108
108
  ) -> list[EntrySummary]:
109
109
  """Fetches a summary from the AlphaFold database for a given qualifier.
110
110
 
@@ -116,6 +116,7 @@ async def fetch_summary(
116
116
  save_dir: An optional directory to save the fetched summary as a JSON file.
117
117
  If set and summary exists then summary will be loaded from disk instead of being fetched from the API.
118
118
  If not set then the summary will not be saved to disk and will always be fetched from the API.
119
+ cacher: A cacher to use for caching the fetched summary. Only used if save_dir is not None.
119
120
 
120
121
  Returns:
121
122
  A list of EntrySummary objects representing the fetched summary.
@@ -124,6 +125,11 @@ async def fetch_summary(
124
125
  fn: AsyncPath | None = None
125
126
  if save_dir is not None:
126
127
  fn = AsyncPath(save_dir / f"{qualifier}.json")
128
+ cached_file = await cacher.copy_from_cache(Path(fn))
129
+ if cached_file is not None:
130
+ logger.debug(f"Using cached file {cached_file} for summary of {qualifier}.")
131
+ raw_data = await AsyncPath(cached_file).read_bytes()
132
+ return converter.loads(raw_data, list[EntrySummary])
127
133
  if await fn.exists():
128
134
  logger.debug(f"File {fn} already exists. Skipping download from {url}.")
129
135
  raw_data = await fn.read_bytes()
@@ -133,18 +139,23 @@ async def fetch_summary(
133
139
  raw_data = await response.content.read()
134
140
  if fn is not None:
135
141
  # TODO return fn and make it part of AlphaFoldEntry as summary_file prop
136
- await fn.write_bytes(raw_data)
142
+ await cacher.write_bytes(Path(fn), raw_data)
137
143
  return converter.loads(raw_data, list[EntrySummary])
138
144
 
139
145
 
140
146
  async def fetch_summaries(
141
- qualifiers: Iterable[str], save_dir: Path | None = None, max_parallel_downloads: int = 5
147
+ qualifiers: Iterable[str],
148
+ save_dir: Path | None = None,
149
+ max_parallel_downloads: int = 5,
150
+ cacher: Cacher | None = None,
142
151
  ) -> AsyncGenerator[EntrySummary]:
143
152
  semaphore = Semaphore(max_parallel_downloads)
144
153
  if save_dir is not None:
145
154
  save_dir.mkdir(parents=True, exist_ok=True)
155
+ if cacher is None:
156
+ cacher = PassthroughCacher()
146
157
  async with friendly_session() as session:
147
- tasks = [fetch_summary(qualifier, session, semaphore, save_dir) for qualifier in qualifiers]
158
+ tasks = [fetch_summary(qualifier, session, semaphore, save_dir, cacher) for qualifier in qualifiers]
148
159
  summaries_per_qualifier: list[list[EntrySummary]] = await tqdm.gather(
149
160
  *tasks, desc="Fetching Alphafold summaries"
150
161
  )
@@ -154,7 +165,11 @@ async def fetch_summaries(
154
165
 
155
166
 
156
167
  async def fetch_many_async(
157
- uniprot_accessions: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
168
+ uniprot_accessions: Iterable[str],
169
+ save_dir: Path,
170
+ what: set[DownloadableFormat],
171
+ max_parallel_downloads: int = 5,
172
+ cacher: Cacher | None = None,
158
173
  ) -> AsyncGenerator[AlphaFoldEntry]:
159
174
  """Asynchronously fetches summaries and files from
160
175
  [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
@@ -164,15 +179,17 @@ async def fetch_many_async(
164
179
  save_dir: The directory to save the fetched files to.
165
180
  what: A set of formats to download.
166
181
  max_parallel_downloads: The maximum number of parallel downloads.
182
+ cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
167
183
 
168
184
  Yields:
169
185
  A dataclass containing the summary, pdb file, and pae file.
170
186
  """
171
187
  save_dir_for_summaries = save_dir if "summary" in what and save_dir is not None else None
188
+
172
189
  summaries = [
173
190
  s
174
191
  async for s in fetch_summaries(
175
- uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads
192
+ uniprot_accessions, save_dir_for_summaries, max_parallel_downloads=max_parallel_downloads, cacher=cacher
176
193
  )
177
194
  ]
178
195
 
@@ -183,6 +200,7 @@ async def fetch_many_async(
183
200
  save_dir,
184
201
  desc="Downloading AlphaFold files",
185
202
  max_parallel_downloads=max_parallel_downloads,
203
+ cacher=cacher,
186
204
  )
187
205
  for summary in summaries:
188
206
  yield AlphaFoldEntry(
@@ -236,7 +254,11 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
236
254
 
237
255
 
238
256
  def fetch_many(
239
- ids: Iterable[str], save_dir: Path, what: set[DownloadableFormat], max_parallel_downloads: int = 5
257
+ ids: Iterable[str],
258
+ save_dir: Path,
259
+ what: set[DownloadableFormat],
260
+ max_parallel_downloads: int = 5,
261
+ cacher: Cacher | None = None,
240
262
  ) -> list[AlphaFoldEntry]:
241
263
  """Synchronously fetches summaries and pdb and pae files from AlphaFold Protein Structure Database.
242
264
 
@@ -245,6 +267,7 @@ def fetch_many(
245
267
  save_dir: The directory to save the fetched files to.
246
268
  what: A set of formats to download.
247
269
  max_parallel_downloads: The maximum number of parallel downloads.
270
+ cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
248
271
 
249
272
  Returns:
250
273
  A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
@@ -253,7 +276,9 @@ def fetch_many(
253
276
  async def gather_entries():
254
277
  return [
255
278
  entry
256
- async for entry in fetch_many_async(ids, save_dir, what, max_parallel_downloads=max_parallel_downloads)
279
+ async for entry in fetch_many_async(
280
+ ids, save_dir, what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
281
+ )
257
282
  ]
258
283
 
259
284
  return run_async(gather_entries())
protein_quest/cli.py CHANGED
@@ -43,7 +43,15 @@ from protein_quest.uniprot import (
43
43
  search4pdb,
44
44
  search4uniprot,
45
45
  )
46
- from protein_quest.utils import CopyMethod, copy_methods, copyfile
46
+ from protein_quest.utils import (
47
+ Cacher,
48
+ CopyMethod,
49
+ DirectoryCacher,
50
+ PassthroughCacher,
51
+ copy_methods,
52
+ copyfile,
53
+ user_cache_root_dir,
54
+ )
47
55
 
48
56
  logger = logging.getLogger(__name__)
49
57
 
@@ -312,6 +320,7 @@ def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
312
320
  default=5,
313
321
  help="Maximum number of parallel downloads",
314
322
  )
323
+ _add_cacher_arguments(parser)
315
324
 
316
325
 
317
326
  def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
@@ -342,6 +351,7 @@ def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
342
351
  default=5,
343
352
  help="Maximum number of parallel downloads",
344
353
  )
354
+ _add_cacher_arguments(parser)
345
355
 
346
356
 
347
357
  def _add_retrieve_emdb_parser(subparsers: argparse._SubParsersAction):
@@ -361,22 +371,7 @@ def _add_retrieve_emdb_parser(subparsers: argparse._SubParsersAction):
361
371
  help="CSV file with `emdb_id` column. Other columns are ignored. Use `-` for stdin.",
362
372
  )
363
373
  parser.add_argument("output_dir", type=Path, help="Directory to store downloaded EMDB volume files")
364
-
365
-
366
- def _add_copy_method_argument(parser: argparse.ArgumentParser):
367
- """Add copy method argument to parser."""
368
- default_copy_method = "symlink"
369
- if os.name == "nt":
370
- # On Windows you need developer mode or admin privileges to create symlinks
371
- # so we default to copying files instead of symlinking
372
- default_copy_method = "copy"
373
- parser.add_argument(
374
- "--copy-method",
375
- type=str,
376
- choices=copy_methods,
377
- default=default_copy_method,
378
- help="How to copy files when no changes are needed to output file.",
379
- )
374
+ _add_cacher_arguments(parser)
380
375
 
381
376
 
382
377
  def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
@@ -409,7 +404,7 @@ def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
409
404
  In CSV format with `<input_file>,<residue_count>,<passed>,<output_file>` columns.
410
405
  Use `-` for stdout."""),
411
406
  )
412
- _add_copy_method_argument(parser)
407
+ _add_copy_method_arguments(parser)
413
408
 
414
409
 
415
410
  def _add_filter_chain_parser(subparsers: argparse._SubParsersAction):
@@ -449,7 +444,7 @@ def _add_filter_chain_parser(subparsers: argparse._SubParsersAction):
449
444
  If not provided, will create a local cluster.
450
445
  If set to `sequential` will run tasks sequentially."""),
451
446
  )
452
- _add_copy_method_argument(parser)
447
+ _add_copy_method_arguments(parser)
453
448
 
454
449
 
455
450
  def _add_filter_residue_parser(subparsers: argparse._SubParsersAction):
@@ -472,7 +467,6 @@ def _add_filter_residue_parser(subparsers: argparse._SubParsersAction):
472
467
  )
473
468
  parser.add_argument("--min-residues", type=int, default=0, help="Min residues in chain A")
474
469
  parser.add_argument("--max-residues", type=int, default=10_000_000, help="Max residues in chain A")
475
- _add_copy_method_argument(parser)
476
470
  parser.add_argument(
477
471
  "--write-stats",
478
472
  type=argparse.FileType("w", encoding="UTF-8"),
@@ -481,6 +475,7 @@ def _add_filter_residue_parser(subparsers: argparse._SubParsersAction):
481
475
  In CSV format with `<input_file>,<residue_count>,<passed>,<output_file>` columns.
482
476
  Use `-` for stdout."""),
483
477
  )
478
+ _add_copy_method_arguments(parser)
484
479
 
485
480
 
486
481
  def _add_filter_ss_parser(subparsers: argparse._SubParsersAction):
@@ -507,7 +502,6 @@ def _add_filter_ss_parser(subparsers: argparse._SubParsersAction):
507
502
  parser.add_argument("--ratio-max-helix-residues", type=float, help="Max residues in helices (relative)")
508
503
  parser.add_argument("--ratio-min-sheet-residues", type=float, help="Min residues in sheets (relative)")
509
504
  parser.add_argument("--ratio-max-sheet-residues", type=float, help="Max residues in sheets (relative)")
510
- _add_copy_method_argument(parser)
511
505
  parser.add_argument(
512
506
  "--write-stats",
513
507
  type=argparse.FileType("w", encoding="UTF-8"),
@@ -518,6 +512,7 @@ def _add_filter_ss_parser(subparsers: argparse._SubParsersAction):
518
512
  Use `-` for stdout.
519
513
  """),
520
514
  )
515
+ _add_copy_method_arguments(parser)
521
516
 
522
517
 
523
518
  def _add_search_subcommands(subparsers: argparse._SubParsersAction):
@@ -585,6 +580,38 @@ def _add_mcp_command(subparsers: argparse._SubParsersAction):
585
580
  parser.add_argument("--port", default=8000, type=int, help="Port to bind the server to")
586
581
 
587
582
 
583
+ def _add_copy_method_arguments(parser):
584
+ parser.add_argument(
585
+ "--copy-method",
586
+ type=str,
587
+ choices=copy_methods,
588
+ default="hardlink",
589
+ help=dedent("""\
590
+ How to make target file be same file as source file.
591
+ By default uses hardlinks to save disk space.
592
+ Note that hardlinks only work within the same filesystem and are harder to track.
593
+ If you want to track cached files easily then use 'symlink'.
594
+ On Windows you need developer mode or admin privileges to create symlinks.
595
+ """),
596
+ )
597
+
598
+
599
+ def _add_cacher_arguments(parser: argparse.ArgumentParser):
600
+ """Add cacher arguments to parser."""
601
+ parser.add_argument(
602
+ "--no-cache",
603
+ action="store_true",
604
+ help="Disable caching of files to central location.",
605
+ )
606
+ parser.add_argument(
607
+ "--cache-dir",
608
+ type=Path,
609
+ default=user_cache_root_dir(),
610
+ help="Directory to use as cache for files.",
611
+ )
612
+ _add_copy_method_arguments(parser)
613
+
614
+
588
615
  def make_parser() -> argparse.ArgumentParser:
589
616
  parser = argparse.ArgumentParser(
590
617
  description="Protein Quest CLI", prog="protein-quest", formatter_class=ArgumentDefaultsRichHelpFormatter
@@ -742,14 +769,26 @@ def _handle_search_complexes(args: argparse.Namespace):
742
769
  _write_complexes_csv(results, output_csv)
743
770
 
744
771
 
745
- def _handle_retrieve_pdbe(args):
772
+ def _initialize_cacher(args: argparse.Namespace) -> Cacher:
773
+ if args.no_cache:
774
+ return PassthroughCacher()
775
+ return DirectoryCacher(
776
+ cache_dir=args.cache_dir,
777
+ copy_method=args.copy_method,
778
+ )
779
+
780
+
781
+ def _handle_retrieve_pdbe(args: argparse.Namespace):
746
782
  pdbe_csv = args.pdbe_csv
747
783
  output_dir = args.output_dir
748
784
  max_parallel_downloads = args.max_parallel_downloads
785
+ cacher = _initialize_cacher(args)
749
786
 
750
787
  pdb_ids = _read_column_from_csv(pdbe_csv, "pdb_id")
751
788
  rprint(f"Retrieving {len(pdb_ids)} PDBe entries")
752
- result = asyncio.run(pdbe_fetch.fetch(pdb_ids, output_dir, max_parallel_downloads=max_parallel_downloads))
789
+ result = asyncio.run(
790
+ pdbe_fetch.fetch(pdb_ids, output_dir, max_parallel_downloads=max_parallel_downloads, cacher=cacher)
791
+ )
753
792
  rprint(f"Retrieved {len(result)} PDBe entries")
754
793
 
755
794
 
@@ -758,6 +797,7 @@ def _handle_retrieve_alphafold(args):
758
797
  what_formats = args.what_formats
759
798
  alphafold_csv = args.alphafold_csv
760
799
  max_parallel_downloads = args.max_parallel_downloads
800
+ cacher = _initialize_cacher(args)
761
801
 
762
802
  if what_formats is None:
763
803
  what_formats = {"summary", "cif"}
@@ -767,7 +807,9 @@ def _handle_retrieve_alphafold(args):
767
807
  af_ids = _read_column_from_csv(alphafold_csv, "af_id")
768
808
  validated_what: set[DownloadableFormat] = structure(what_formats, set[DownloadableFormat])
769
809
  rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {validated_what}")
770
- afs = af_fetch(af_ids, download_dir, what=validated_what, max_parallel_downloads=max_parallel_downloads)
810
+ afs = af_fetch(
811
+ af_ids, download_dir, what=validated_what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
812
+ )
771
813
  total_nr_files = sum(af.nr_of_files() for af in afs)
772
814
  rprint(f"Retrieved {total_nr_files} AlphaFold files and {len(afs)} summaries, written to {download_dir}")
773
815
 
@@ -775,10 +817,11 @@ def _handle_retrieve_alphafold(args):
775
817
  def _handle_retrieve_emdb(args):
776
818
  emdb_csv = args.emdb_csv
777
819
  output_dir = args.output_dir
820
+ cacher = _initialize_cacher(args)
778
821
 
779
822
  emdb_ids = _read_column_from_csv(emdb_csv, "emdb_id")
780
823
  rprint(f"Retrieving {len(emdb_ids)} EMDB entries")
781
- result = asyncio.run(emdb_fetch(emdb_ids, output_dir))
824
+ result = asyncio.run(emdb_fetch(emdb_ids, output_dir, cacher=cacher))
782
825
  rprint(f"Retrieved {len(result)} EMDB entries")
783
826
 
784
827
 
protein_quest/emdb.py CHANGED
@@ -3,7 +3,7 @@
3
3
  from collections.abc import Iterable, Mapping
4
4
  from pathlib import Path
5
5
 
6
- from protein_quest.utils import retrieve_files
6
+ from protein_quest.utils import Cacher, retrieve_files
7
7
 
8
8
 
9
9
  def _map_id2volume_url(emdb_id: str) -> tuple[str, str]:
@@ -13,13 +13,16 @@ def _map_id2volume_url(emdb_id: str) -> tuple[str, str]:
13
13
  return url, fn
14
14
 
15
15
 
16
- async def fetch(emdb_ids: Iterable[str], save_dir: Path, max_parallel_downloads: int = 1) -> Mapping[str, Path]:
16
+ async def fetch(
17
+ emdb_ids: Iterable[str], save_dir: Path, max_parallel_downloads: int = 1, cacher: Cacher | None = None
18
+ ) -> Mapping[str, Path]:
17
19
  """Fetches volume files from the EMDB database.
18
20
 
19
21
  Args:
20
22
  emdb_ids: A list of EMDB IDs to fetch.
21
23
  save_dir: The directory to save the downloaded files.
22
24
  max_parallel_downloads: The maximum number of parallel downloads.
25
+ cacher: An optional cacher to use for caching downloaded files.
23
26
 
24
27
  Returns:
25
28
  A mapping of EMDB IDs to their downloaded files.
@@ -30,5 +33,5 @@ async def fetch(emdb_ids: Iterable[str], save_dir: Path, max_parallel_downloads:
30
33
 
31
34
  # TODO show progress of each item
32
35
  # TODO handle failed downloads, by skipping them instead of raising an error
33
- await retrieve_files(urls, save_dir, max_parallel_downloads, desc="Downloading EMDB volume files")
36
+ await retrieve_files(urls, save_dir, max_parallel_downloads, desc="Downloading EMDB volume files", cacher=cacher)
34
37
  return id2paths
@@ -32,6 +32,7 @@ Examples:
32
32
 
33
33
  """
34
34
 
35
+ from collections.abc import Mapping
35
36
  from pathlib import Path
36
37
  from textwrap import dedent
37
38
  from typing import Annotated
@@ -89,7 +90,18 @@ def search_pdb(
89
90
  return search4pdb(uniprot_accs, limit=limit)
90
91
 
91
92
 
92
- mcp.tool(pdbe_fetch, name="fetch_pdbe_structures")
93
+ @mcp.tool
94
+ async def fetch_pdbe_structures(pdb_ids: set[str], save_dir: Path) -> Mapping[str, Path]:
95
+ """Fetch the PDBe structures for given PDB IDs.
96
+
97
+ Args:
98
+ pdb_ids: A set of PDB IDs.
99
+ save_dir: The directory to save the fetched files.
100
+
101
+ Returns:
102
+ A mapping of PDB ID to the path of the fetched structure file.
103
+ """
104
+ return await pdbe_fetch(pdb_ids, save_dir)
93
105
 
94
106
 
95
107
  @mcp.tool
@@ -163,7 +175,17 @@ def fetch_alphafold_structures(uniprot_accs: set[str], save_dir: Path) -> list[A
163
175
  return alphafold_fetch(uniprot_accs, save_dir, what)
164
176
 
165
177
 
166
- mcp.tool(emdb_fetch, name="fetch_emdb_volumes")
178
+ @mcp.tool
179
+ async def fetch_emdb_volumes(emdb_ids: set[str], save_dir: Path) -> Mapping[str, Path]:
180
+ """Fetch EMDB volumes for given EMDB IDs.
181
+
182
+ Args:
183
+ emdb_ids: A set of EMDB IDs.
184
+ save_dir: The directory to save the fetched files.
185
+ Returns:
186
+ A mapping of EMDB ID to the path of the fetched volume file.
187
+ """
188
+ return await emdb_fetch(emdb_ids=emdb_ids, save_dir=save_dir)
167
189
 
168
190
 
169
191
  @mcp.tool
@@ -3,7 +3,7 @@
3
3
  from collections.abc import Iterable, Mapping
4
4
  from pathlib import Path
5
5
 
6
- from protein_quest.utils import retrieve_files, run_async
6
+ from protein_quest.utils import Cacher, retrieve_files, run_async
7
7
 
8
8
 
9
9
  def _map_id_mmcif(pdb_id: str) -> tuple[str, str]:
@@ -28,13 +28,16 @@ def _map_id_mmcif(pdb_id: str) -> tuple[str, str]:
28
28
  return url, fn
29
29
 
30
30
 
31
- async def fetch(ids: Iterable[str], save_dir: Path, max_parallel_downloads: int = 5) -> Mapping[str, Path]:
31
+ async def fetch(
32
+ ids: Iterable[str], save_dir: Path, max_parallel_downloads: int = 5, cacher: Cacher | None = None
33
+ ) -> Mapping[str, Path]:
32
34
  """Fetches mmCIF files from the PDBe database.
33
35
 
34
36
  Args:
35
37
  ids: A set of PDB IDs to fetch.
36
38
  save_dir: The directory to save the fetched mmCIF files to.
37
39
  max_parallel_downloads: The maximum number of parallel downloads.
40
+ cacher: An optional cacher to use for caching downloaded files.
38
41
 
39
42
  Returns:
40
43
  A dict of id and paths to the downloaded mmCIF files.
@@ -47,7 +50,7 @@ async def fetch(ids: Iterable[str], save_dir: Path, max_parallel_downloads: int
47
50
  urls = list(id2urls.values())
48
51
  id2paths = {pdb_id: save_dir / fn for pdb_id, (_, fn) in id2urls.items()}
49
52
 
50
- await retrieve_files(urls, save_dir, max_parallel_downloads, desc="Downloading PDBe mmCIF files")
53
+ await retrieve_files(urls, save_dir, max_parallel_downloads, desc="Downloading PDBe mmCIF files", cacher=cacher)
51
54
  return id2paths
52
55
 
53
56
 
protein_quest/utils.py CHANGED
@@ -1,22 +1,260 @@
1
1
  """Module for functions that are used in multiple places."""
2
2
 
3
+ import argparse
3
4
  import asyncio
5
+ import hashlib
4
6
  import logging
5
7
  import shutil
6
- from collections.abc import Coroutine, Iterable
8
+ from collections.abc import Coroutine, Iterable, Sequence
7
9
  from contextlib import asynccontextmanager
10
+ from functools import lru_cache
8
11
  from pathlib import Path
9
12
  from textwrap import dedent
10
- from typing import Any, Literal, get_args
13
+ from typing import Any, Literal, Protocol, get_args, runtime_checkable
11
14
 
12
15
  import aiofiles
16
+ import aiofiles.os
13
17
  import aiohttp
18
+ import rich
19
+ from aiohttp.streams import AsyncStreamIterator
14
20
  from aiohttp_retry import ExponentialRetry, RetryClient
21
+ from platformdirs import user_cache_dir
22
+ from rich_argparse import ArgumentDefaultsRichHelpFormatter
15
23
  from tqdm.asyncio import tqdm
16
24
  from yarl import URL
17
25
 
18
26
  logger = logging.getLogger(__name__)
19
27
 
28
+ CopyMethod = Literal["copy", "symlink", "hardlink"]
29
+ """Methods for copying files."""
30
+ copy_methods = set(get_args(CopyMethod))
31
+ """Set of valid copy methods."""
32
+
33
+
34
+ @lru_cache
35
+ def _cache_sub_dir(root_cache_dir: Path, filename: str, hash_length: int = 4) -> Path:
36
+ """Get the cache sub-directory for a given path.
37
+
38
+ To not have too many files in a single directory,
39
+ we create sub-directories based on the hash of the filename.
40
+
41
+ Args:
42
+ root_cache_dir: The root directory for the cache.
43
+ filename: The filename to be cached.
44
+ hash_length: The length of the hash to use for the sub-directory.
45
+
46
+ Returns:
47
+ The parent path to the cached file.
48
+ """
49
+ full_hash = hashlib.blake2b(filename.encode("utf-8")).hexdigest()
50
+ cache_sub_dir = full_hash[:hash_length]
51
+ cache_sub_dir_path = root_cache_dir / cache_sub_dir
52
+ cache_sub_dir_path.mkdir(parents=True, exist_ok=True)
53
+ return cache_sub_dir_path
54
+
55
+
56
+ @runtime_checkable
57
+ class Cacher(Protocol):
58
+ """Protocol for a cacher."""
59
+
60
+ def __contains__(self, item: str | Path) -> bool:
61
+ """Check if a file is in the cache.
62
+
63
+ Args:
64
+ item: The filename or Path to check.
65
+
66
+ Returns:
67
+ True if the file is in the cache, False otherwise.
68
+ """
69
+ ...
70
+
71
+ async def copy_from_cache(self, target: Path) -> Path | None:
72
+ """Copy a file from the cache to a target location if it exists in the cache.
73
+
74
+ Assumes:
75
+
76
+ - target does not exist.
77
+ - the parent directory of target exists.
78
+
79
+ Args:
80
+ target: The path to copy the file to.
81
+
82
+ Returns:
83
+ The path to the cached file if it was copied, None otherwise.
84
+ """
85
+ ...
86
+
87
+ async def write_iter(self, target: Path, content: AsyncStreamIterator[bytes]) -> Path:
88
+ """Write content to a file and cache it.
89
+
90
+ Args:
91
+ target: The path to write the content to.
92
+ content: An async iterator that yields bytes to write to the file.
93
+
94
+ Returns:
95
+ The path to the cached file.
96
+
97
+ Raises:
98
+ FileExistsError: If the target file already exists.
99
+ """
100
+ ...
101
+
102
+ async def write_bytes(self, target: Path, content: bytes) -> Path:
103
+ """Write bytes to a file and cache it.
104
+
105
+ Args:
106
+ target: The path to write the content to.
107
+ content: The bytes to write to the file.
108
+
109
+ Returns:
110
+ The path to the cached file.
111
+
112
+ Raises:
113
+ FileExistsError: If the target file already exists.
114
+ """
115
+ ...
116
+
117
+
118
+ class PassthroughCacher(Cacher):
119
+ """A cacher that caches nothing.
120
+
121
+ On writes it just writes to the target path.
122
+ """
123
+
124
+ def __contains__(self, item: str | Path) -> bool:
125
+ # We don't have anything cached ever
126
+ return False
127
+
128
+ async def copy_from_cache(self, target: Path) -> Path | None: # noqa: ARG002
129
+ # We don't have anything cached ever
130
+ return None
131
+
132
+ async def write_iter(self, target: Path, content: AsyncStreamIterator[bytes]) -> Path:
133
+ if target.exists():
134
+ raise FileExistsError(target)
135
+ target.write_bytes(b"".join([chunk async for chunk in content]))
136
+ return target
137
+
138
+ async def write_bytes(self, target: Path, content: bytes) -> Path:
139
+ if target.exists():
140
+ raise FileExistsError(target)
141
+ target.write_bytes(content)
142
+ return target
143
+
144
+
145
+ def user_cache_root_dir() -> Path:
146
+ """Get the users root directory for caching files.
147
+
148
+ Returns:
149
+ The path to the user's cache directory for protein-quest.
150
+ """
151
+ return Path(user_cache_dir("protein-quest"))
152
+
153
+
154
+ class DirectoryCacher(Cacher):
155
+ """Class to cache files in a directory.
156
+
157
+ Caching logic is based on the file name only.
158
+ If file name of paths are the same then the files are considered the same.
159
+
160
+ Attributes:
161
+ cache_dir: The directory to use for caching.
162
+ copy_method: The method to use for copying files.
163
+ """
164
+
165
+ def __init__(
166
+ self,
167
+ cache_dir: Path | None = None,
168
+ copy_method: CopyMethod = "hardlink",
169
+ ) -> None:
170
+ """Initialize the cacher.
171
+
172
+ If file name of paths are the same then the files are considered the same.
173
+
174
+ Args:
175
+ cache_dir: The directory to use for caching.
176
+ If None, a default cache directory (~/.cache/protein-quest) is used.
177
+ copy_method: The method to use for copying.
178
+ """
179
+ if cache_dir is None:
180
+ cache_dir = user_cache_root_dir()
181
+ self.cache_dir: Path = cache_dir
182
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
183
+ if copy_method == "copy":
184
+ logger.warning(
185
+ "Using copy as copy_method to cache files is not recommended. "
186
+ "This will use more disk space and be slower than symlink or hardlink."
187
+ )
188
+ if copy_method not in copy_methods:
189
+ msg = f"Unknown copy method: {copy_method}. Must be one of {copy_methods}."
190
+ raise ValueError(msg)
191
+ self.copy_method: CopyMethod = copy_method
192
+
193
+ def __contains__(self, item: str | Path) -> bool:
194
+ cached_file = self._as_cached_path(item)
195
+ return cached_file.exists()
196
+
197
+ def _as_cached_path(self, item: str | Path) -> Path:
198
+ file_name = item.name if isinstance(item, Path) else item
199
+ cache_sub_dir = _cache_sub_dir(self.cache_dir, file_name)
200
+ return cache_sub_dir / file_name
201
+
202
+ async def copy_from_cache(self, target: Path) -> Path | None:
203
+ cached_file = self._as_cached_path(target.name)
204
+ exists = await aiofiles.os.path.exists(str(cached_file))
205
+ if exists:
206
+ await async_copyfile(cached_file, target, copy_method=self.copy_method)
207
+ return cached_file
208
+ return None
209
+
210
+ async def write_iter(self, target: Path, content: AsyncStreamIterator[bytes]) -> Path:
211
+ cached_file = self._as_cached_path(target.name)
212
+ # Write file to cache dir
213
+ async with aiofiles.open(cached_file, "xb") as f:
214
+ async for chunk in content:
215
+ await f.write(chunk)
216
+ # Copy to target location
217
+ await async_copyfile(cached_file, target, copy_method=self.copy_method)
218
+ return cached_file
219
+
220
+ async def write_bytes(self, target: Path, content: bytes) -> Path:
221
+ cached_file = self._as_cached_path(target.name)
222
+ # Write file to cache dir
223
+ async with aiofiles.open(cached_file, "xb") as f:
224
+ await f.write(content)
225
+ # Copy to target location
226
+ await async_copyfile(cached_file, target, copy_method=self.copy_method)
227
+ return cached_file
228
+
229
+ def populate_cache(self, source_dir: Path) -> dict[Path, Path]:
230
+ """Populate the cache from an existing directory.
231
+
232
+ This will copy all files from the source directory to the cache directory.
233
+ If a file with the same name already exists in the cache, it will be skipped.
234
+
235
+ Args:
236
+ source_dir: The directory to populate the cache from.
237
+
238
+ Returns:
239
+ A dictionary mapping source file paths to their cached paths.
240
+
241
+ Raises:
242
+ NotADirectoryError: If the source_dir is not a directory.
243
+ """
244
+ if not source_dir.is_dir():
245
+ raise NotADirectoryError(source_dir)
246
+ cached = {}
247
+ for file_path in source_dir.iterdir():
248
+ if not file_path.is_file():
249
+ continue
250
+ cached_path = self._as_cached_path(file_path.name)
251
+ if cached_path.exists():
252
+ logger.debug(f"File {file_path.name} already in cache. Skipping.")
253
+ continue
254
+ copyfile(file_path, cached_path, copy_method=self.copy_method)
255
+ cached[file_path] = cached_path
256
+ return cached
257
+
20
258
 
21
259
  async def retrieve_files(
22
260
  urls: Iterable[tuple[URL | str, str]],
@@ -25,6 +263,8 @@ async def retrieve_files(
25
263
  retries: int = 3,
26
264
  total_timeout: int = 300,
27
265
  desc: str = "Downloading files",
266
+ cacher: Cacher | None = None,
267
+ chunk_size: int = 524288, # 512 KiB
28
268
  ) -> list[Path]:
29
269
  """Retrieve files from a list of URLs and save them to a directory.
30
270
 
@@ -35,6 +275,8 @@ async def retrieve_files(
35
275
  retries: The number of times to retry a failed download.
36
276
  total_timeout: The total timeout for a download in seconds.
37
277
  desc: Description for the progress bar.
278
+ cacher: An optional cacher to use for caching files.
279
+ chunk_size: The size of each chunk to read from the response.
38
280
 
39
281
  Returns:
40
282
  A list of paths to the downloaded files.
@@ -42,7 +284,17 @@ async def retrieve_files(
42
284
  save_dir.mkdir(parents=True, exist_ok=True)
43
285
  semaphore = asyncio.Semaphore(max_parallel_downloads)
44
286
  async with friendly_session(retries, total_timeout) as session:
45
- tasks = [_retrieve_file(session, url, save_dir / filename, semaphore) for url, filename in urls]
287
+ tasks = [
288
+ _retrieve_file(
289
+ session=session,
290
+ url=url,
291
+ save_path=save_dir / filename,
292
+ semaphore=semaphore,
293
+ cacher=cacher,
294
+ chunk_size=chunk_size,
295
+ )
296
+ for url, filename in urls
297
+ ]
46
298
  files: list[Path] = await tqdm.gather(*tasks, desc=desc)
47
299
  return files
48
300
 
@@ -52,8 +304,8 @@ async def _retrieve_file(
52
304
  url: URL | str,
53
305
  save_path: Path,
54
306
  semaphore: asyncio.Semaphore,
55
- ovewrite: bool = False,
56
- chunk_size: int = 131072, # 128 KiB
307
+ cacher: Cacher | None = None,
308
+ chunk_size: int = 524288, # 512 KiB
57
309
  ) -> Path:
58
310
  """Retrieve a single file from a URL and save it to a specified path.
59
311
 
@@ -62,26 +314,28 @@ async def _retrieve_file(
62
314
  url: The URL to download the file from.
63
315
  save_path: The path where the file should be saved.
64
316
  semaphore: A semaphore to limit the number of concurrent downloads.
65
- ovewrite: Whether to overwrite the file if it already exists.
317
+ cacher: An optional cacher to use for caching files.
66
318
  chunk_size: The size of each chunk to read from the response.
67
319
 
68
320
  Returns:
69
321
  The path to the saved file.
70
322
  """
71
323
  if save_path.exists():
72
- if ovewrite:
73
- save_path.unlink()
74
- else:
75
- logger.debug(f"File {save_path} already exists. Skipping download from {url}.")
76
- return save_path
324
+ logger.debug(f"File {save_path} already exists. Skipping download from {url}.")
325
+ return save_path
326
+
327
+ if cacher is None:
328
+ cacher = PassthroughCacher()
329
+ if cached_file := await cacher.copy_from_cache(save_path):
330
+ logger.debug(f"File {save_path} was copied from cache {cached_file}. Skipping download from {url}.")
331
+ return save_path
332
+
77
333
  async with (
78
334
  semaphore,
79
- aiofiles.open(save_path, "xb") as f,
80
335
  session.get(url) as resp,
81
336
  ):
82
337
  resp.raise_for_status()
83
- async for chunk in resp.content.iter_chunked(chunk_size):
84
- await f.write(chunk)
338
+ await cacher.write_iter(save_path, resp.content.iter_chunked(chunk_size))
85
339
  return save_path
86
340
 
87
341
 
@@ -141,27 +395,117 @@ def run_async[R](coroutine: Coroutine[Any, Any, R]) -> R:
141
395
  raise NestedAsyncIOLoopError from e
142
396
 
143
397
 
144
- CopyMethod = Literal["copy", "symlink"]
145
- copy_methods = set(get_args(CopyMethod))
146
-
147
-
148
398
  def copyfile(source: Path, target: Path, copy_method: CopyMethod = "copy"):
149
- """Make target path be same file as source by either copying or symlinking.
399
+ """Make target path be same file as source by either copying or symlinking or hardlinking.
400
+
401
+ Note that the hardlink copy method only works within the same filesystem and is harder to track.
402
+ If you want to track cached files easily then use 'symlink'.
403
+ On Windows you need developer mode or admin privileges to create symlinks.
150
404
 
151
405
  Args:
152
- source: The source file to copy or symlink.
406
+ source: The source file to copy or link.
153
407
  target: The target file to create.
154
408
  copy_method: The method to use for copying.
155
409
 
156
410
  Raises:
157
411
  FileNotFoundError: If the source file or parent of target does not exist.
158
- ValueError: If the method is not "copy" or "symlink".
412
+ FileExistsError: If the target file already exists.
413
+ ValueError: If an unknown copy method is provided.
159
414
  """
160
415
  if copy_method == "copy":
161
416
  shutil.copyfile(source, target)
162
417
  elif copy_method == "symlink":
163
- rel_source = source.relative_to(target.parent, walk_up=True)
418
+ rel_source = source.absolute().relative_to(target.parent.absolute(), walk_up=True)
164
419
  target.symlink_to(rel_source)
420
+ elif copy_method == "hardlink":
421
+ target.hardlink_to(source)
165
422
  else:
166
- msg = f"Unknown method: {copy_method}"
423
+ msg = f"Unknown method: {copy_method}. Valid methods are: {copy_methods}"
167
424
  raise ValueError(msg)
425
+
426
+
427
+ async def async_copyfile(
428
+ source: Path,
429
+ target: Path,
430
+ copy_method: CopyMethod = "copy",
431
+ ):
432
+ """Asynchronously make target path be same file as source by either copying or symlinking or hardlinking.
433
+
434
+ Note that the hardlink copy method only works within the same filesystem and is harder to track.
435
+ If you want to track cached files easily then use 'symlink'.
436
+ On Windows you need developer mode or admin privileges to create symlinks.
437
+
438
+ Args:
439
+ source: The source file to copy.
440
+ target: The target file to create.
441
+ copy_method: The method to use for copying.
442
+
443
+ Raises:
444
+ FileNotFoundError: If the source file or parent of target does not exist.
445
+ FileExistsError: If the target file already exists.
446
+ ValueError: If an unknown copy method is provided.
447
+ """
448
+ if copy_method == "copy":
449
+ # Could use loop of chunks with aiofiles,
450
+ # but shutil is ~1.9x faster on my machine
451
+ # due to fastcopy and sendfile optimizations in shutil.
452
+ await asyncio.to_thread(shutil.copyfile, source, target)
453
+ elif copy_method == "symlink":
454
+ rel_source = source.relative_to(target.parent, walk_up=True)
455
+ await aiofiles.os.symlink(str(rel_source), str(target))
456
+ elif copy_method == "hardlink":
457
+ await aiofiles.os.link(str(source), str(target))
458
+ else:
459
+ msg = f"Unknown method: {copy_method}. Valid methods are: {copy_methods}"
460
+ raise ValueError(msg)
461
+
462
+
463
+ def populate_cache_command(raw_args: Sequence[str] | None = None):
464
+ """Command line interface to populate the cache from an existing directory.
465
+
466
+ Can be called from the command line as:
467
+
468
+ ```bash
469
+ python3 -m protein_quest.utils populate-cache /path/to/source/dir
470
+ ```
471
+
472
+ Args:
473
+ raw_args: The raw command line arguments to parse. If None, uses sys.argv.
474
+ """
475
+ root_parser = argparse.ArgumentParser(formatter_class=ArgumentDefaultsRichHelpFormatter)
476
+ subparsers = root_parser.add_subparsers(dest="command")
477
+
478
+ desc = "Populate the cache directory with files from the source directory."
479
+ populate_cache_parser = subparsers.add_parser(
480
+ "populate-cache",
481
+ help=desc,
482
+ description=desc,
483
+ formatter_class=ArgumentDefaultsRichHelpFormatter,
484
+ )
485
+ populate_cache_parser.add_argument("source_dir", type=Path)
486
+ populate_cache_parser.add_argument(
487
+ "--cache-dir",
488
+ type=Path,
489
+ default=user_cache_root_dir(),
490
+ help="Directory to use for caching. If not provided, a default cache directory is used.",
491
+ )
492
+ populate_cache_parser.add_argument(
493
+ "--copy-method",
494
+ type=str,
495
+ default="hardlink",
496
+ choices=copy_methods,
497
+ help="Method to use for copying files to cache.",
498
+ )
499
+
500
+ args = root_parser.parse_args(raw_args)
501
+ if args.command == "populate-cache":
502
+ source_dir = args.source_dir
503
+ cacher = DirectoryCacher(cache_dir=args.cache_dir, copy_method=args.copy_method)
504
+ cached_files = cacher.populate_cache(source_dir)
505
+ rich.print(f"Cached {len(cached_files)} files from {source_dir} to {cacher.cache_dir}")
506
+ for src, cached in cached_files.items():
507
+ rich.print(f"- {src.relative_to(source_dir)} -> {cached.relative_to(cacher.cache_dir)}")
508
+
509
+
510
+ if __name__ == "__main__":
511
+ populate_cache_command()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
17
17
  Requires-Dist: dask>=2025.5.1
18
18
  Requires-Dist: distributed>=2025.5.1
19
19
  Requires-Dist: gemmi>=0.7.3
20
+ Requires-Dist: platformdirs>=4.3.8
20
21
  Requires-Dist: psutil>=7.0.0
21
22
  Requires-Dist: rich-argparse>=1.7.1
22
23
  Requires-Dist: rich>=14.0.0
@@ -47,6 +48,10 @@ It uses
47
48
  - [gemmi](https://project-gemmi.github.io/) to work with macromolecular models.
48
49
  - [dask-distributed](https://docs.dask.org/en/latest/) to compute in parallel.
49
50
 
51
+ The package is used by
52
+
53
+ - [protein-detective](https://github.com/haddocking/protein-detective)
54
+
50
55
  An example workflow:
51
56
 
52
57
  ```mermaid
@@ -94,6 +99,9 @@ The main entry point is the `protein-quest` command line tool which has multiple
94
99
 
95
100
  To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
96
101
 
102
+ While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
103
+ This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
104
+
97
105
  ### Search Uniprot accessions
98
106
 
99
107
  ```shell
@@ -1,26 +1,26 @@
1
1
  protein_quest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- protein_quest/__version__.py,sha256=je7v2gXyxr6yRVCFAS0wS-iABSLJOuCb-IPR-x90UAU,56
3
- protein_quest/cli.py,sha256=9Cgvn5BXbrAloIU2KCiFxLxJSyAoa2RLdmuB0HGsUJM,43078
2
+ protein_quest/__version__.py,sha256=AyGZhrskazcQPC8spzJ45d4XNxgla5DnO1bmKuzRj_Q,56
3
+ protein_quest/cli.py,sha256=xiXt_2l3MxbTbmxm2sz0w8_OdJr8gz_B68GBVv5wHjE,44182
4
4
  protein_quest/converter.py,sha256=Y-Oxf7lDNbEicL6GS-IpNWDwaAiHgIgs5bFAcEHCKdQ,1441
5
- protein_quest/emdb.py,sha256=QEeU0VJQ4lLM-o5yAU3QZlrtzDZNgnC5fCjlqPtTyAY,1370
5
+ protein_quest/emdb.py,sha256=641c6RwNYnu-0GBFyCFBiI58fNc0jMkd0ZZ9MW9-Jmc,1501
6
6
  protein_quest/filters.py,sha256=-gasSXR4g5SzYSYbkfcDwR-tm2KCAhCMdpIVJrUPR1w,5224
7
7
  protein_quest/go.py,sha256=lZNEcw8nTc9wpV3cl4y2FG9Lsj8wsXQ6zemmAQs_DWE,5650
8
- protein_quest/mcp_server.py,sha256=CXw5rTStunXdAVQ3BWPXy19zmgQGwV5uPcWlN1HF9do,7389
8
+ protein_quest/mcp_server.py,sha256=PCXxcU3GElKg2sjMlxbsM63OiFxg9AtmfKwBJ1_0AQE,8130
9
9
  protein_quest/parallel.py,sha256=ZJrLO1t2HXs4EeNctytvBTyROPBq-4-gLf35PiolHf0,3468
10
10
  protein_quest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  protein_quest/ss.py,sha256=qOr0aMycNAtZmXXvhCN-KZH3Qp4EejnBcE6fsFgCrmY,10343
12
12
  protein_quest/taxonomy.py,sha256=4mKv8zll4mX02Ow8CTvyqMJE2KJZvcq3QlTjjjLOJJk,5072
13
13
  protein_quest/uniprot.py,sha256=DIwQYzWZREZ7SGhkJT4Ozgl36pdz47FNfZ1QoEgEaXE,24239
14
- protein_quest/utils.py,sha256=z4PPPcog6nvPhA93DWVf7stv5uJ4h_2BP5owdhoO5mo,5626
14
+ protein_quest/utils.py,sha256=2lQ7jPHWtDySBTYnoL9VTKl5XUgQVYgp9Prb7qEnjtQ,17982
15
15
  protein_quest/alphafold/__init__.py,sha256=Ktasi5BRp71wO7-PpOGDpIRRtBEefs8knIdlKQeLQpk,51
16
16
  protein_quest/alphafold/confidence.py,sha256=pYIuwYdkuPuHLagcX1dSvSyZ_84xboRLfHUxkEoc4MY,6766
17
17
  protein_quest/alphafold/entry_summary.py,sha256=GtE3rT7wH3vIOOeiXY2s80Fo6EzdoqlcvakW8K591Yk,1257
18
- protein_quest/alphafold/fetch.py,sha256=iFHORaO-2NvPwmpm33tfOFUcSJx8mBGwMXxwc4bRuk8,11336
18
+ protein_quest/alphafold/fetch.py,sha256=wIsgPZmtnE5EoAL9G22Y6Ehx9d0md53Mw88-6LLGp0Q,12298
19
19
  protein_quest/pdbe/__init__.py,sha256=eNNHtN60NAGea7gvRkIzkoTXsYPK99s-ldIcKWYO6So,61
20
- protein_quest/pdbe/fetch.py,sha256=tlCrWoaOrwxnQFrf-PnimUUa6lmtHwwysS51efYsBcA,2379
20
+ protein_quest/pdbe/fetch.py,sha256=e8CHWDX2QzWnVLmYXCfNrscw1UcN1lI9Uz6Z5HmEOEQ,2510
21
21
  protein_quest/pdbe/io.py,sha256=iGLvmsD-eEYnrgZDYfkGWIDCzwDRRD5dwqB480talCs,10037
22
- protein_quest-0.4.0.dist-info/METADATA,sha256=y5DAnM4mhSincjslsvQZ4zk1QcMysGmnsBltK_Vz4MQ,8842
23
- protein_quest-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
24
- protein_quest-0.4.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
25
- protein_quest-0.4.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
- protein_quest-0.4.0.dist-info/RECORD,,
22
+ protein_quest-0.5.0.dist-info/METADATA,sha256=atoElM2xwPd9ubxXSQsFQYz2hjALJi-AegCRkrynEYc,9236
23
+ protein_quest-0.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
24
+ protein_quest-0.5.0.dist-info/entry_points.txt,sha256=f1RtOxv9TFBO3w01EMEuFXBTMsqKsQcKlkxmj9zE-0g,57
25
+ protein_quest-0.5.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
26
+ protein_quest-0.5.0.dist-info/RECORD,,