protein-quest 0.9.0__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {protein_quest-0.9.0 → protein_quest-0.10.1}/.github/workflows/ci.yml +16 -1
  2. protein_quest-0.10.1/.howfairis.yml +1 -0
  3. {protein_quest-0.9.0 → protein_quest-0.10.1}/CONTRIBUTING.md +4 -1
  4. {protein_quest-0.9.0 → protein_quest-0.10.1}/PKG-INFO +8 -3
  5. {protein_quest-0.9.0 → protein_quest-0.10.1}/README.md +7 -2
  6. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/__version__.py +1 -1
  7. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/alphafold/confidence.py +1 -6
  8. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/alphafold/fetch.py +2 -1
  9. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/filters.py +7 -6
  10. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/parallel.py +82 -12
  11. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/structure.py +29 -5
  12. protein_quest-0.10.1/tests/cassettes/test_cli/test_search_alphafold.yaml +66 -0
  13. protein_quest-0.10.1/tests/test_cli.py +305 -0
  14. protein_quest-0.10.1/tests/test_filters.py +78 -0
  15. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_structure.py +72 -0
  16. protein_quest-0.9.0/tests/test_cli.py +0 -101
  17. {protein_quest-0.9.0 → protein_quest-0.10.1}/.github/workflows/pages.yml +0 -0
  18. {protein_quest-0.9.0 → protein_quest-0.10.1}/.github/workflows/pypi-publish.yml +0 -0
  19. {protein_quest-0.9.0 → protein_quest-0.10.1}/.gitignore +0 -0
  20. {protein_quest-0.9.0 → protein_quest-0.10.1}/.python-version +0 -0
  21. {protein_quest-0.9.0 → protein_quest-0.10.1}/.vscode/extensions.json +0 -0
  22. {protein_quest-0.9.0 → protein_quest-0.10.1}/CITATION.cff +0 -0
  23. {protein_quest-0.9.0 → protein_quest-0.10.1}/CODE_OF_CONDUCT.md +0 -0
  24. {protein_quest-0.9.0 → protein_quest-0.10.1}/LICENSE +0 -0
  25. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/CONTRIBUTING.md +0 -0
  26. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/index.md +0 -0
  27. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/notebooks/.gitignore +0 -0
  28. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/notebooks/alphafold.ipynb +0 -0
  29. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/notebooks/index.md +0 -0
  30. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/notebooks/pdbe.ipynb +0 -0
  31. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/notebooks/uniprot.ipynb +0 -0
  32. {protein_quest-0.9.0 → protein_quest-0.10.1}/docs/protein-quest-mcp.png +0 -0
  33. {protein_quest-0.9.0 → protein_quest-0.10.1}/mkdocs.yml +0 -0
  34. {protein_quest-0.9.0 → protein_quest-0.10.1}/pyproject.toml +0 -0
  35. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/__init__.py +0 -0
  36. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/alphafold/__init__.py +0 -0
  37. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/alphafold/entry_summary.py +0 -0
  38. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/cli.py +0 -0
  39. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/converter.py +0 -0
  40. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/emdb.py +0 -0
  41. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/go.py +0 -0
  42. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/io.py +0 -0
  43. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/mcp_server.py +0 -0
  44. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/pdbe/__init__.py +0 -0
  45. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/pdbe/fetch.py +0 -0
  46. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/py.typed +0 -0
  47. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/ss.py +0 -0
  48. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/taxonomy.py +0 -0
  49. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/uniprot.py +0 -0
  50. {protein_quest-0.9.0 → protein_quest-0.10.1}/src/protein_quest/utils.py +0 -0
  51. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  52. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +0 -0
  53. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  54. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +0 -0
  55. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +0 -0
  56. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +0 -0
  57. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +0 -0
  58. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/test_confidence.py +0 -0
  59. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/test_entry_summary.py +0 -0
  60. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/alphafold/test_fetch.py +0 -0
  61. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_cli/test_search_pdbe.yaml +0 -0
  62. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_cli/test_search_uniprot.yaml +0 -0
  63. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_cli/test_search_uniprot_details.yaml +0 -0
  64. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  65. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  66. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  67. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  68. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +0 -0
  69. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +0 -0
  70. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +0 -0
  71. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  72. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +0 -0
  73. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +0 -0
  74. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +0 -0
  75. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  76. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +0 -0
  77. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +0 -0
  78. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  79. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  80. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/conftest.py +0 -0
  81. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/fixtures/2Y29.cif.gz +0 -0
  82. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  83. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  84. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/pdbe/test_fetch.py +0 -0
  85. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_converter.py +0 -0
  86. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_emdb.py +0 -0
  87. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_go.py +0 -0
  88. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_io.py +0 -0
  89. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_mcp.py +0 -0
  90. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_ss.py +0 -0
  91. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_taxonomy.py +0 -0
  92. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_uniprot.py +0 -0
  93. {protein_quest-0.9.0 → protein_quest-0.10.1}/tests/test_utils.py +0 -0
  94. {protein_quest-0.9.0 → protein_quest-0.10.1}/uv.lock +0 -0
@@ -24,6 +24,11 @@ jobs:
24
24
  run: uv sync --locked --dev --extra mcp
25
25
  - name: Install pocl
26
26
  run: uv pip install pocl-binary-distribution==3.0
27
+ - name: Cache downloaded test data
28
+ uses: actions/cache@v4
29
+ with:
30
+ path: ~/.cache/protein-quest-tests
31
+ key: cached-protein-quest-tests
27
32
  - name: Run tests
28
33
  run: |
29
34
  uv run pytest --cov --cov-report=xml
@@ -32,6 +37,7 @@ jobs:
32
37
  with:
33
38
  project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
34
39
  coverage-reports: coverage.xml
40
+
35
41
  build:
36
42
  name: build
37
43
  runs-on: ubuntu-latest
@@ -75,4 +81,13 @@ jobs:
75
81
  find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
76
82
  - name: Run type checkers on docs
77
83
  run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
78
-
84
+ duplicated-code:
85
+ runs-on: ubuntu-latest
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - name: Install NodeJS
89
+ uses: actions/setup-node@v6
90
+ with:
91
+ node-version: '24'
92
+ - name: Run jscpd to detect duplicated code
93
+ run: npx jscpd src
@@ -0,0 +1 @@
1
+ skip_checklist_checks_reason: "I'm using the fairsoftwarechecklist"
@@ -38,7 +38,8 @@ The sections below outline the steps in each case.
38
38
  1. format your code with `uvx ruff format` and sort imports with `uvx ruff check --select I --fix`;
39
39
  1. lint your code with `uvx ruff check` (use `uvx ruff check --fix` to fix issues automatically);
40
40
  1. type check your code with `uv run pyrefly check src tests`;
41
- 1. update or expand the documentation (see [Contributing with documentation](#contributing-with-documentation) section below);
41
+ 1. prevent code duplication, detect with `npx jscpd src`;
42
+ 1. update or expand the documentation (see [Contributing to documentation](#contributing-to-documentation) section below);
42
43
  1. [push](http://rogerdudler.github.io/git-guide/) your feature branch to (your fork of) the protein-quest repository on GitHub;
43
44
  1. create the pull request, e.g. following the instructions [here](https://help.github.com/articles/creating-a-pull-request/).
44
45
 
@@ -104,3 +105,5 @@ The search functions of the protein-quest package talk to web services on the In
104
105
  To have fast tests we use [pytest-recording](https://github.com/kiwicom/pytest-recording) to record and replay HTTP interactions.
105
106
  See [pytest-recording documentation](https://github.com/kiwicom/pytest-recording) for more details on how to use it.
106
107
  Like overwrite previous recordings in test/cassettes/**.yaml files with `--record-mode=rewrite`.
108
+
109
+ The files downloaded for tests are cached in `~/.cache/protein-quest-tests`.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.9.0
3
+ Version: 0.10.1
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -35,9 +35,14 @@ Description-Content-Type: text/markdown
35
35
  [![Documentation](https://img.shields.io/badge/Documentation-bonvinlab.org-blue?style=flat-square&logo=gitbook)](https://www.bonvinlab.org/protein-quest/)
36
36
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
37
37
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
38
+ [![bio.tools](https://img.shields.io/badge/bio.tools-protein--quest-009fdf.svg)](https://bio.tools/protein-quest)
38
39
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
39
40
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
40
41
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
42
+ [![FAIR checklist badge](https://fairsoftwarechecklist.net/badge.svg)](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
43
+ [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
44
+ [![Copy/paste detector](https://raw.githubusercontent.com/kucherenko/jscpd/refs/tags/v3.5.10/assets/jscpd-badge.svg?sanitize=true)](https://github.com/kucherenko/jscpd/)
45
+
41
46
 
42
47
  Python package to search/retrieve/filter proteins and protein structures.
43
48
 
@@ -104,7 +109,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
104
109
 
105
110
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
106
111
 
107
- To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
112
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
108
113
 
109
114
  While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
110
115
  This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
@@ -302,7 +307,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
302
307
 
303
308
  ## Shell autocompletion
304
309
 
305
- The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
310
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
306
311
 
307
312
  Initialize for bash shell with:
308
313
 
@@ -3,9 +3,14 @@
3
3
  [![Documentation](https://img.shields.io/badge/Documentation-bonvinlab.org-blue?style=flat-square&logo=gitbook)](https://www.bonvinlab.org/protein-quest/)
4
4
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
5
5
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
6
+ [![bio.tools](https://img.shields.io/badge/bio.tools-protein--quest-009fdf.svg)](https://bio.tools/protein-quest)
6
7
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
7
8
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
8
9
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
10
+ [![FAIR checklist badge](https://fairsoftwarechecklist.net/badge.svg)](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
11
+ [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
12
+ [![Copy/paste detector](https://raw.githubusercontent.com/kucherenko/jscpd/refs/tags/v3.5.10/assets/jscpd-badge.svg?sanitize=true)](https://github.com/kucherenko/jscpd/)
13
+
9
14
 
10
15
  Python package to search/retrieve/filter proteins and protein structures.
11
16
 
@@ -72,7 +77,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
72
77
 
73
78
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
74
79
 
75
- To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
80
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
76
81
 
77
82
  While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
78
83
  This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
@@ -270,7 +275,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
270
275
 
271
276
  ## Shell autocompletion
272
277
 
273
- The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
278
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
274
279
 
275
280
  Initialize for bash shell with:
276
281
 
@@ -1,2 +1,2 @@
1
- __version__ = "0.9.0"
1
+ __version__ = "0.10.1"
2
2
  """The version of the package."""
@@ -209,12 +209,7 @@ def filter_files_on_confidence(
209
209
  copy_method=copy_method,
210
210
  )
211
211
 
212
- scheduler_address = configure_dask_scheduler(
213
- scheduler_address,
214
- name="filter-confidence",
215
- )
216
-
217
- with Client(scheduler_address) as client:
212
+ with configure_dask_scheduler(scheduler_address, name="filter-confidence") as cluster, Client(cluster) as client:
218
213
  client.forward_logging()
219
214
  return dask_map_with_progress(
220
215
  client,
@@ -114,7 +114,6 @@ class AlphaFoldEntry:
114
114
  """Convert paths in an AlphaFoldEntry to be relative to the session directory.
115
115
 
116
116
  Args:
117
- entry: An AlphaFoldEntry instance with absolute paths.
118
117
  session_dir: The session directory to which the paths should be made relative.
119
118
 
120
119
  Returns:
@@ -483,6 +482,7 @@ def fetch_many_async(
483
482
  )
484
483
 
485
484
 
485
+ # jscpd:ignore-start # noqa: ERA001
486
486
  def fetch_many(
487
487
  uniprot_accessions: Iterable[str],
488
488
  save_dir: Path,
@@ -492,6 +492,7 @@ def fetch_many(
492
492
  cacher: Cacher | None = None,
493
493
  gzip_files: bool = False,
494
494
  all_isoforms: bool = False,
495
+ # jscpd:ignore-end # noqa: ERA001
495
496
  ) -> list[AlphaFoldEntry]:
496
497
  """Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
497
498
 
@@ -96,12 +96,13 @@ def filter_files_on_chain(
96
96
 
97
97
  # TODO make logger.debug in filter_file_on_chain show to user when --log
98
98
  # GPT-5 generated a fairly difficult setup with a WorkerPlugin, need to find a simpler approach
99
- scheduler_address = configure_dask_scheduler(
100
- scheduler_address,
101
- name="filter-chain",
102
- )
103
-
104
- with Client(scheduler_address) as client:
99
+ with (
100
+ configure_dask_scheduler(
101
+ scheduler_address,
102
+ name="filter-chain",
103
+ ) as cluster,
104
+ Client(cluster) as client,
105
+ ):
105
106
  client.forward_logging()
106
107
  return dask_map_with_progress(
107
108
  client,
@@ -2,36 +2,52 @@
2
2
 
3
3
  import logging
4
4
  import os
5
- from collections.abc import Callable, Collection
5
+ import sys
6
+ import warnings
7
+ from collections.abc import Callable, Collection, Iterator
8
+ from contextlib import contextmanager, suppress
6
9
  from typing import Concatenate, ParamSpec, cast
7
10
 
8
- from dask.distributed import Client, LocalCluster, progress
11
+ from dask.distributed import Client, LocalCluster
9
12
  from distributed.deploy.cluster import Cluster
13
+ from distributed.diagnostics.progress import format_time
14
+ from distributed.diagnostics.progressbar import ProgressBar
15
+ from distributed.utils import LoopRunner
10
16
  from psutil import cpu_count
17
+ from tornado.ioloop import IOLoop
11
18
 
12
19
  logger = logging.getLogger(__name__)
13
20
 
14
21
 
22
+ @contextmanager
15
23
  def configure_dask_scheduler(
16
24
  scheduler_address: str | Cluster | None,
17
25
  name: str,
18
26
  nproc: int = 1,
19
- ) -> str | Cluster:
20
- """Configure the Dask scheduler by reusing existing or creating a new cluster.
27
+ ) -> Iterator[str | Cluster]:
28
+ """Context manager that offers a Dask cluster.
29
+
30
+ If scheduler_address is None then creates a local Dask cluster
31
+ else returns scheduler_address unchanged and the callee is responsible for cluster cleanup.
21
32
 
22
33
  Args:
23
34
  scheduler_address: Address of the Dask scheduler to connect to, or None for local cluster.
24
35
  name: Name for the Dask cluster.
25
36
  nproc: Number of processes to use per worker for CPU support.
26
37
 
27
- Returns:
28
- A Dask Cluster instance or a string address for the scheduler.
38
+ Yields:
39
+ The scheduler address as a string or a cluster.
29
40
  """
30
- if scheduler_address is None:
31
- scheduler_address = _configure_cpu_dask_scheduler(nproc, name)
32
- logger.info(f"Using local Dask cluster: {scheduler_address}")
33
-
34
- return scheduler_address
41
+ if scheduler_address is not None:
42
+ # Pass through existing scheduler address or cluster
43
+ yield scheduler_address
44
+ return
45
+ cluster = _configure_cpu_dask_scheduler(nproc, name)
46
+ logger.info(f"Using local Dask cluster: {cluster}")
47
+ try:
48
+ yield cluster
49
+ finally:
50
+ cluster.close()
35
51
 
36
52
 
37
53
  def nr_cpus() -> int:
@@ -74,6 +90,60 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
74
90
  P = ParamSpec("P")
75
91
 
76
92
 
93
+ class _StderrTextProgressBar(ProgressBar):
94
+ """Copy of distributed.diagnostics.progressbar.TextProgressBar that prints to stderr instead of stdout."""
95
+
96
+ __loop: IOLoop | None = None
97
+
98
+ def __init__(
99
+ self,
100
+ keys,
101
+ scheduler=None,
102
+ interval="100ms",
103
+ width=40,
104
+ loop=None,
105
+ complete=True,
106
+ start=True,
107
+ **kwargs, # noqa: ARG002
108
+ ):
109
+ self._loop_runner = loop_runner = LoopRunner(loop=loop)
110
+ super().__init__(keys, scheduler, interval, complete)
111
+ self.width = width
112
+
113
+ if start:
114
+ loop_runner.run_sync(self.listen)
115
+
116
+ @property
117
+ def loop(self) -> IOLoop | None:
118
+ loop = self.__loop
119
+ if loop is None:
120
+ # If the loop is not running when this is called, the LoopRunner.loop
121
+ # property will raise a DeprecationWarning
122
+ # However subsequent calls might occur - eg atexit, where a stopped
123
+ # loop is still acceptable - so we cache access to the loop.
124
+ self.__loop = loop = self._loop_runner.loop
125
+ return loop
126
+
127
+ @loop.setter
128
+ def loop(self, value: IOLoop) -> None:
129
+ warnings.warn("setting the loop property is deprecated", DeprecationWarning, stacklevel=2)
130
+ self.__loop = value
131
+
132
+ def _draw_bar(self, remaining, all, **kwargs): # noqa: A002, ARG002
133
+ frac = (1 - remaining / all) if all else 1.0
134
+ bar = "#" * int(self.width * frac)
135
+ percent = int(100 * frac)
136
+ elapsed = format_time(self.elapsed)
137
+ msg = "\r[{0:<{1}}] | {2}% Completed | {3}".format(bar, self.width, percent, elapsed)
138
+ with suppress(ValueError):
139
+ sys.stderr.write(msg)
140
+ sys.stderr.flush()
141
+
142
+ def _draw_stop(self, **kwargs): # noqa: ARG002
143
+ sys.stderr.write("\33[2K\r")
144
+ sys.stderr.flush()
145
+
146
+
77
147
  def dask_map_with_progress[T, R, **P](
78
148
  client: Client,
79
149
  func: Callable[Concatenate[T, P], R],
@@ -99,6 +169,6 @@ def dask_map_with_progress[T, R, **P](
99
169
  if client.dashboard_link:
100
170
  logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
101
171
  futures = client.map(func, iterable, *args, **kwargs)
102
- progress(futures)
172
+ _StderrTextProgressBar(futures)
103
173
  results = client.gather(futures)
104
174
  return cast("list[R]", results)
@@ -1,7 +1,6 @@
1
1
  """Module for querying and modifying [gemmi structures][gemmi.Structure]."""
2
2
 
3
3
  import logging
4
- from collections.abc import Iterable
5
4
  from datetime import UTC, datetime
6
5
  from pathlib import Path
7
6
 
@@ -123,11 +122,28 @@ def chains_in_structure(structure: gemmi.Structure) -> set[gemmi.Chain]:
123
122
  class ChainNotFoundError(IndexError):
124
123
  """Exception raised when a chain is not found in a structure."""
125
124
 
126
- def __init__(self, chain: str, file: Path | str, available_chains: Iterable[str]):
127
- super().__init__(f"Chain {chain} not found in {file}. Available chains are: {available_chains}")
128
- self.chain_id = chain
125
+ def __init__(self, chain_id: str, file: Path | str, available_chains: set[str]):
126
+ super().__init__(f"Chain {chain_id} not found in {file}. Available chains are: {available_chains}")
127
+ self.available_chains = available_chains
128
+ self.chain_id = chain_id
129
129
  self.file = file
130
130
 
131
+ def __reduce__(self):
132
+ """Helper for pickling the exception."""
133
+ return (self.__class__, (self.chain_id, self.file, self.available_chains))
134
+
135
+ def __eq__(self, other):
136
+ if not isinstance(other, ChainNotFoundError):
137
+ return NotImplemented
138
+ return (
139
+ self.chain_id == other.chain_id
140
+ and self.file == other.file
141
+ and self.available_chains == other.available_chains
142
+ )
143
+
144
+ def __hash__(self):
145
+ return hash((self.chain_id, str(self.file), frozenset(self.available_chains)))
146
+
131
147
 
132
148
  def write_single_chain_structure_file(
133
149
  input_file: Path,
@@ -194,7 +210,7 @@ def write_single_chain_structure_file(
194
210
  copyfile(input_file, output_file, copy_method)
195
211
  return output_file
196
212
 
197
- gemmi.Selection(chain_name).remove_not_selected(structure)
213
+ gemmi.Selection(f"/1/{chain_name}").remove_not_selected(structure)
198
214
  for m in structure:
199
215
  m.remove_ligands_and_waters()
200
216
  structure.setup_entities()
@@ -203,6 +219,14 @@ def write_single_chain_structure_file(
203
219
  _dedup_sheets(structure, out_chain)
204
220
  _add_provenance_info(structure, chain_name, out_chain)
205
221
 
222
+ if not (len(structure) == 1 and len(structure[0]) == 1 and len(structure[0][out_chain]) > 0):
223
+ msg = (
224
+ f"After processing, structure does not have exactly one model ({len(structure)}) "
225
+ f"with one chain (found {len(structure[0])}) called {out_chain} "
226
+ f"with some residues ({len(structure[0][out_chain])})."
227
+ )
228
+ raise ValueError(msg)
229
+
206
230
  write_structure(structure, output_file)
207
231
 
208
232
  return output_file
@@ -0,0 +1,66 @@
1
+ interactions:
2
+ - request:
3
+ body: null
4
+ headers:
5
+ Accept:
6
+ - application/sparql-results+json,application/json,text/javascript,application/javascript
7
+ Connection:
8
+ - close
9
+ Host:
10
+ - sparql.uniprot.org
11
+ User-Agent:
12
+ - sparqlwrapper 2.0.0 (rdflib.github.io/sparqlwrapper)
13
+ method: GET
14
+ uri: https://sparql.uniprot.org/sparql?query=%0A++++++++PREFIX+up%3A+%3Chttp%3A//purl.uniprot.org/core/%3E%0A++++++++PREFIX+taxon%3A+%3Chttp%3A//purl.uniprot.org/taxonomy/%3E%0A++++++++PREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0A++++++++PREFIX+rdfs%3A+%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0A++++++++PREFIX+skos%3A+%3Chttp%3A//www.w3.org/2004/02/skos/core%23%3E%0A++++++++PREFIX+GO%3A%3Chttp%3A//purl.obolibrary.org/obo/GO_%3E%0A%0A++++++++SELECT+%3Fprotein+%3Faf_db%0A++++++++WHERE+%7B%0A%0A++++++++%23+---+Protein+Selection+---%0A++++++++VALUES+%28%3Fac%29+%7B+%28%22P00811%22%29%7D%0A++++++++BIND+%28IRI%28CONCAT%28%22http%3A//purl.uniprot.org/uniprot/%22%2C%3Fac%29%29+AS+%3Fprotein%29%0A++++++++%3Fprotein+a+up%3AProtein+.%0A%0A%0A%23+---+Protein+Selection+---%0A%3Fprotein+a+up%3AProtein+.%0A%0A%23+---+AlphaFoldDB+Info+---%0A%3Fprotein+rdfs%3AseeAlso+%3Faf_db+.%0A%3Faf_db+up%3Adatabase+%3Chttp%3A//purl.uniprot.org/database/AlphaFoldDB%3E+.%0A%0A%0A++++++++%7D%0A%0A++++++++LIMIT+10000%0A&format=json&output=json&results=json
15
+ response:
16
+ body:
17
+ string: "{\n \"head\" : {\n \"vars\" : [\n \"protein\",\n \"af_db\"\n
18
+ \ ]\n },\n \"results\" : {\n \"bindings\" : [\n {\n \"protein\"
19
+ : {\n \"type\" : \"uri\",\n \"value\" : \"http://purl.uniprot.org/uniprot/P00811\"\n
20
+ \ },\n \"af_db\" : {\n \"type\" : \"uri\",\n \"value\"
21
+ : \"http://purl.uniprot.org/alphafolddb/P00811\"\n }\n }\n ]\n
22
+ \ }\n}"
23
+ headers:
24
+ Access-Control-Allow-Headers:
25
+ - origin, x-requested-with, content-type, X-Release, queryid
26
+ Access-Control-Allow-Origin:
27
+ - '*'
28
+ Access-Control-Expose-Headers:
29
+ - X-Total-Results, X-Release, queryid, content-type, user-agent, cache-control,
30
+ etag, range
31
+ Cache-Control:
32
+ - public
33
+ Connection:
34
+ - close
35
+ Content-Disposition:
36
+ - attachment; filename="sparql-CA32A0B92DC5589CE5CD9BF33CF492F9.srj"
37
+ Content-Length:
38
+ - '375'
39
+ Content-Type:
40
+ - application/sparql-results+json
41
+ Date:
42
+ - Mon, 17 Nov 2025 11:45:48 GMT
43
+ ETag:
44
+ - W/"2025_04"
45
+ Expires:
46
+ - Tue, 18 Nov 2025 11:45:48 GMT
47
+ Server:
48
+ - Apache
49
+ Strict-Transport-Security:
50
+ - max-age=31536001; includeSubDomains
51
+ Vary:
52
+ - Negotiate,Accept,Accept-Encoding,Content-Type
53
+ X-Content-Type-Options:
54
+ - nosniff
55
+ X-Frame-Options:
56
+ - SAMEORIGIN
57
+ X-Powered-By:
58
+ - sib.swiss
59
+ X-Release:
60
+ - '2025_04'
61
+ queryid:
62
+ - '770887'
63
+ status:
64
+ code: 200
65
+ message: ''
66
+ version: 1