protein-quest 0.8.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {protein_quest-0.8.0 → protein_quest-0.9.0}/PKG-INFO +1 -1
  2. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/__version__.py +1 -1
  3. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/alphafold/confidence.py +55 -9
  4. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/cli.py +16 -12
  5. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/filters.py +21 -5
  6. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/mcp_server.py +2 -2
  7. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/test_confidence.py +1 -1
  8. {protein_quest-0.8.0 → protein_quest-0.9.0}/.github/workflows/ci.yml +0 -0
  9. {protein_quest-0.8.0 → protein_quest-0.9.0}/.github/workflows/pages.yml +0 -0
  10. {protein_quest-0.8.0 → protein_quest-0.9.0}/.github/workflows/pypi-publish.yml +0 -0
  11. {protein_quest-0.8.0 → protein_quest-0.9.0}/.gitignore +0 -0
  12. {protein_quest-0.8.0 → protein_quest-0.9.0}/.python-version +0 -0
  13. {protein_quest-0.8.0 → protein_quest-0.9.0}/.vscode/extensions.json +0 -0
  14. {protein_quest-0.8.0 → protein_quest-0.9.0}/CITATION.cff +0 -0
  15. {protein_quest-0.8.0 → protein_quest-0.9.0}/CODE_OF_CONDUCT.md +0 -0
  16. {protein_quest-0.8.0 → protein_quest-0.9.0}/CONTRIBUTING.md +0 -0
  17. {protein_quest-0.8.0 → protein_quest-0.9.0}/LICENSE +0 -0
  18. {protein_quest-0.8.0 → protein_quest-0.9.0}/README.md +0 -0
  19. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/CONTRIBUTING.md +0 -0
  20. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/index.md +0 -0
  21. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/notebooks/.gitignore +0 -0
  22. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/notebooks/alphafold.ipynb +0 -0
  23. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/notebooks/index.md +0 -0
  24. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/notebooks/pdbe.ipynb +0 -0
  25. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/notebooks/uniprot.ipynb +0 -0
  26. {protein_quest-0.8.0 → protein_quest-0.9.0}/docs/protein-quest-mcp.png +0 -0
  27. {protein_quest-0.8.0 → protein_quest-0.9.0}/mkdocs.yml +0 -0
  28. {protein_quest-0.8.0 → protein_quest-0.9.0}/pyproject.toml +0 -0
  29. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/__init__.py +0 -0
  30. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/alphafold/__init__.py +0 -0
  31. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
  32. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/alphafold/fetch.py +0 -0
  33. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/converter.py +0 -0
  34. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/emdb.py +0 -0
  35. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/go.py +0 -0
  36. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/io.py +0 -0
  37. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/parallel.py +0 -0
  38. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/pdbe/__init__.py +0 -0
  39. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/pdbe/fetch.py +0 -0
  40. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/py.typed +0 -0
  41. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/ss.py +0 -0
  42. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/structure.py +0 -0
  43. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/taxonomy.py +0 -0
  44. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/uniprot.py +0 -0
  45. {protein_quest-0.8.0 → protein_quest-0.9.0}/src/protein_quest/utils.py +0 -0
  46. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  47. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +0 -0
  48. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  49. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +0 -0
  50. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +0 -0
  51. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +0 -0
  52. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +0 -0
  53. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/test_entry_summary.py +0 -0
  54. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/alphafold/test_fetch.py +0 -0
  55. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_cli/test_search_pdbe.yaml +0 -0
  56. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_cli/test_search_uniprot.yaml +0 -0
  57. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_cli/test_search_uniprot_details.yaml +0 -0
  58. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  59. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  60. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  61. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  62. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +0 -0
  63. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +0 -0
  64. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +0 -0
  65. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  66. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +0 -0
  67. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +0 -0
  68. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +0 -0
  69. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  70. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +0 -0
  71. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +0 -0
  72. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  73. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  74. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/conftest.py +0 -0
  75. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/fixtures/2Y29.cif.gz +0 -0
  76. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  77. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  78. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/pdbe/test_fetch.py +0 -0
  79. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_cli.py +0 -0
  80. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_converter.py +0 -0
  81. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_emdb.py +0 -0
  82. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_go.py +0 -0
  83. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_io.py +0 -0
  84. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_mcp.py +0 -0
  85. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_ss.py +0 -0
  86. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_structure.py +0 -0
  87. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_taxonomy.py +0 -0
  88. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_uniprot.py +0 -0
  89. {protein_quest-0.8.0 → protein_quest-0.9.0}/tests/test_utils.py +0 -0
  90. {protein_quest-0.8.0 → protein_quest-0.9.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -1,2 +1,2 @@
1
- __version__ = "0.8.0"
1
+ __version__ = "0.9.0"
2
2
  """The version of the package."""
@@ -4,11 +4,16 @@ import logging
4
4
  from collections.abc import Generator
5
5
  from dataclasses import dataclass
6
6
  from pathlib import Path
7
+ from typing import Literal
7
8
 
8
9
  import gemmi
10
+ from dask.distributed import Client
11
+ from distributed.deploy.cluster import Cluster
12
+ from tqdm.auto import tqdm
9
13
 
10
14
  from protein_quest.converter import Percentage, PositiveInt, converter
11
15
  from protein_quest.io import read_structure, write_structure
16
+ from protein_quest.parallel import configure_dask_scheduler, dask_map_with_progress
12
17
  from protein_quest.ss import nr_of_residues_in_total
13
18
  from protein_quest.utils import CopyMethod, copyfile
14
19
 
@@ -112,7 +117,7 @@ class ConfidenceFilterResult:
112
117
  filtered_file: Path | None = None
113
118
 
114
119
 
115
- def filter_file_on_residues(
120
+ def filter_file_on_confidence(
116
121
  file: Path, query: ConfidenceFilterQuery, filtered_dir: Path, copy_method: CopyMethod = "copy"
117
122
  ) -> ConfidenceFilterResult:
118
123
  """Filter a single AlphaFoldDB structure file (*.pdb[.gz], *.cif[.gz]) based on confidence.
@@ -155,12 +160,31 @@ def filter_file_on_residues(
155
160
  )
156
161
 
157
162
 
163
+ def _filter_files_on_confidence_sequentially(
164
+ alphafold_pdb_files: list[Path],
165
+ query: ConfidenceFilterQuery,
166
+ filtered_dir: Path,
167
+ copy_method: CopyMethod = "copy",
168
+ ) -> list[ConfidenceFilterResult]:
169
+ results = []
170
+ for file in tqdm(
171
+ alphafold_pdb_files,
172
+ total=len(alphafold_pdb_files),
173
+ desc="Filtering on confidence",
174
+ unit="file",
175
+ ):
176
+ result = filter_file_on_confidence(file, query, filtered_dir, copy_method)
177
+ results.append(result)
178
+ return results
179
+
180
+
158
181
  def filter_files_on_confidence(
159
182
  alphafold_pdb_files: list[Path],
160
183
  query: ConfidenceFilterQuery,
161
184
  filtered_dir: Path,
162
185
  copy_method: CopyMethod = "copy",
163
- ) -> Generator[ConfidenceFilterResult]:
186
+ scheduler_address: str | Cluster | Literal["sequential"] | None = None,
187
+ ) -> list[ConfidenceFilterResult]:
164
188
  """Filter AlphaFoldDB structures based on confidence.
165
189
 
166
190
  Args:
@@ -168,13 +192,35 @@ def filter_files_on_confidence(
168
192
  query: The confidence filter query containing the confidence thresholds.
169
193
  filtered_dir: Directory where the filtered mmcif/PDB files will be saved.
170
194
  copy_method: How to copy when a direct copy is possible.
195
+ scheduler_address: The address of the Dask scheduler.
196
+ If not provided, will create a local cluster.
197
+ If set to `sequential` will run tasks sequentially.
171
198
 
172
- Yields:
173
- For each mmcif/PDB files yields whether it was filtered or not,
199
+ Returns:
200
+ For each mmcif/PDB files returns whether it was filtered or not,
174
201
  and number of residues with pLDDT above the confidence threshold.
175
202
  """
176
- # Note on why code looks duplicated:
177
- # In ../filter.py:filter_files_on_residues() we filter on number of residues on a file level
178
- # here we filter on file level and inside file remove low confidence residues
179
- for pdb_file in alphafold_pdb_files:
180
- yield filter_file_on_residues(pdb_file, query, filtered_dir, copy_method)
203
+ filtered_dir.mkdir(parents=True, exist_ok=True)
204
+ if scheduler_address == "sequential":
205
+ return _filter_files_on_confidence_sequentially(
206
+ alphafold_pdb_files,
207
+ query,
208
+ filtered_dir,
209
+ copy_method=copy_method,
210
+ )
211
+
212
+ scheduler_address = configure_dask_scheduler(
213
+ scheduler_address,
214
+ name="filter-confidence",
215
+ )
216
+
217
+ with Client(scheduler_address) as client:
218
+ client.forward_logging()
219
+ return dask_map_with_progress(
220
+ client,
221
+ filter_file_on_confidence,
222
+ alphafold_pdb_files,
223
+ query=query,
224
+ filtered_dir=filtered_dir,
225
+ copy_method=copy_method,
226
+ )
@@ -508,6 +508,15 @@ def _add_retrieve_emdb_parser(subparsers: argparse._SubParsersAction):
508
508
  _add_cacher_arguments(parser)
509
509
 
510
510
 
511
+ def _add_scheduler_address_argument(parser):
512
+ parser.add_argument(
513
+ "--scheduler-address",
514
+ help=dedent("""Address of the Dask scheduler to connect to.
515
+ If not provided, will create a local cluster.
516
+ If set to `sequential` will run tasks sequentially."""),
517
+ )
518
+
519
+
511
520
  def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
512
521
  """Add filter confidence subcommand parser."""
513
522
  parser = subparsers.add_parser(
@@ -542,6 +551,7 @@ def _add_filter_confidence_parser(subparsers: argparse._SubParsersAction):
542
551
  In CSV format with `<input_file>,<residue_count>,<passed>,<output_file>` columns.
543
552
  Use `-` for stdout."""),
544
553
  ).complete = shtab.FILE
554
+ _add_scheduler_address_argument(parser)
545
555
  _add_copy_method_arguments(parser)
546
556
 
547
557
 
@@ -576,12 +586,7 @@ def _add_filter_chain_parser(subparsers: argparse._SubParsersAction):
576
586
  help=dedent("""\
577
587
  Directory to write the single-chain PDB/mmCIF files. Output files are in same format as input files."""),
578
588
  ).complete = shtab.DIRECTORY
579
- parser.add_argument(
580
- "--scheduler-address",
581
- help=dedent("""Address of the Dask scheduler to connect to.
582
- If not provided, will create a local cluster.
583
- If set to `sequential` will run tasks sequentially."""),
584
- )
589
+ _add_scheduler_address_argument(parser)
585
590
  _add_copy_method_arguments(parser)
586
591
 
587
592
 
@@ -1060,6 +1065,7 @@ def _handle_filter_confidence(args: argparse.Namespace):
1060
1065
  max_residues = args.max_residues
1061
1066
  stats_file: TextIOWrapper | None = args.write_stats
1062
1067
  copy_method: CopyMethod = structure(args.copy_method, CopyMethod) # pyright: ignore[reportArgumentType]
1068
+ scheduler_address = structure(args.scheduler_address, str | None) # pyright: ignore[reportArgumentType]
1063
1069
 
1064
1070
  output_dir.mkdir(parents=True, exist_ok=True)
1065
1071
  input_files = sorted(glob_structure_files(input_dir))
@@ -1078,16 +1084,14 @@ def _handle_filter_confidence(args: argparse.Namespace):
1078
1084
  writer.writerow(["input_file", "residue_count", "passed", "output_file"])
1079
1085
 
1080
1086
  passed_count = 0
1081
- for r in tqdm(
1082
- filter_files_on_confidence(input_files, query, output_dir, copy_method=copy_method),
1083
- total=len(input_files),
1084
- unit="file",
1085
- ):
1087
+ results = filter_files_on_confidence(
1088
+ input_files, query, output_dir, copy_method=copy_method, scheduler_address=scheduler_address
1089
+ )
1090
+ for r in results:
1086
1091
  if r.filtered_file:
1087
1092
  passed_count += 1
1088
1093
  if stats_file:
1089
1094
  writer.writerow([r.input_file, r.count, r.filtered_file is not None, r.filtered_file]) # pyright: ignore[reportPossiblyUnboundVariable]
1090
-
1091
1095
  rprint(f"Filtered {passed_count} mmcif/PDB files by confidence, written to {output_dir} directory")
1092
1096
  if stats_file:
1093
1097
  rprint(f"Statistics written to {_name_of(stats_file)}")
@@ -48,6 +48,24 @@ def filter_file_on_chain(
48
48
  return ChainFilterStatistics(input_file=input_file, chain_id=chain_id, discard_reason=e)
49
49
 
50
50
 
51
+ def _filter_files_on_chain_sequentially(
52
+ file2chains: Collection[tuple[Path, str]],
53
+ output_dir: Path,
54
+ out_chain: str = "A",
55
+ copy_method: CopyMethod = "copy",
56
+ ) -> list[ChainFilterStatistics]:
57
+ results = []
58
+ for file_and_chain in tqdm(file2chains, unit="file"):
59
+ result = filter_file_on_chain(
60
+ file_and_chain,
61
+ output_dir=output_dir,
62
+ out_chain=out_chain,
63
+ copy_method=copy_method,
64
+ )
65
+ results.append(result)
66
+ return results
67
+
68
+
51
69
  def filter_files_on_chain(
52
70
  file2chains: Collection[tuple[Path, str]],
53
71
  output_dir: Path,
@@ -72,11 +90,9 @@ def filter_files_on_chain(
72
90
  """
73
91
  output_dir.mkdir(parents=True, exist_ok=True)
74
92
  if scheduler_address == "sequential":
75
-
76
- def task(file_and_chain: tuple[Path, str]) -> ChainFilterStatistics:
77
- return filter_file_on_chain(file_and_chain, output_dir, out_chain=out_chain, copy_method=copy_method)
78
-
79
- return list(map(task, file2chains))
93
+ return _filter_files_on_chain_sequentially(
94
+ file2chains, output_dir, out_chain=out_chain, copy_method=copy_method
95
+ )
80
96
 
81
97
  # TODO make logger.debug in filter_file_on_chain show to user when --log
82
98
  # GPT-5 generated a fairly difficult setup with a WorkerPlugin, need to find a simpler approach
@@ -40,7 +40,7 @@ from typing import Annotated
40
40
  from fastmcp import FastMCP
41
41
  from pydantic import Field
42
42
 
43
- from protein_quest.alphafold.confidence import ConfidenceFilterQuery, ConfidenceFilterResult, filter_file_on_residues
43
+ from protein_quest.alphafold.confidence import ConfidenceFilterQuery, ConfidenceFilterResult, filter_file_on_confidence
44
44
  from protein_quest.alphafold.fetch import AlphaFoldEntry, DownloadableFormat
45
45
  from protein_quest.alphafold.fetch import fetch_many as alphafold_fetch
46
46
  from protein_quest.emdb import fetch as emdb_fetch
@@ -199,7 +199,7 @@ def alphafold_confidence_filter(file: Path, query: ConfidenceFilterQuery, filter
199
199
 
200
200
  If passes filter writes file to filtered_dir with residues above confidence threshold.
201
201
  """
202
- return filter_file_on_residues(file, query, filtered_dir)
202
+ return filter_file_on_confidence(file, query, filtered_dir)
203
203
 
204
204
 
205
205
  mcp.tool(filter_file_on_secondary_structure)
@@ -49,7 +49,7 @@ def test_filter_files_on_confidence(sample_pdb_file: Path, tmp_path: Path):
49
49
  min_residues=10,
50
50
  )
51
51
 
52
- results = list(filter_files_on_confidence(input_files, query, tmp_path))
52
+ results = filter_files_on_confidence(input_files, query, tmp_path)
53
53
 
54
54
  expected = [
55
55
  ConfidenceFilterResult(
File without changes
File without changes
File without changes
File without changes
File without changes