protein-quest 0.10.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/ci.yml +10 -1
  2. protein_quest-1.0.0/.howfairis.yml +1 -0
  3. {protein_quest-0.10.0 → protein_quest-1.0.0}/CONTRIBUTING.md +2 -1
  4. {protein_quest-0.10.0 → protein_quest-1.0.0}/PKG-INFO +23 -3
  5. {protein_quest-0.10.0 → protein_quest-1.0.0}/README.md +7 -2
  6. protein_quest-1.0.0/docs/faq.md +17 -0
  7. {protein_quest-0.10.0 → protein_quest-1.0.0}/mkdocs.yml +1 -0
  8. {protein_quest-0.10.0 → protein_quest-1.0.0}/pyproject.toml +24 -0
  9. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/__version__.py +1 -1
  10. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/fetch.py +2 -1
  11. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/parallel.py +80 -3
  12. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/structure.py +12 -0
  13. protein_quest-1.0.0/tests/cassettes/test_cli/test_search_alphafold.yaml +66 -0
  14. protein_quest-1.0.0/tests/test_cli.py +305 -0
  15. protein_quest-1.0.0/tests/test_filters.py +78 -0
  16. protein_quest-1.0.0/tests/test_parallel.py +45 -0
  17. protein_quest-0.10.0/tests/test_cli.py +0 -101
  18. {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/pages.yml +0 -0
  19. {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/pypi-publish.yml +0 -0
  20. {protein_quest-0.10.0 → protein_quest-1.0.0}/.gitignore +0 -0
  21. {protein_quest-0.10.0 → protein_quest-1.0.0}/.python-version +0 -0
  22. {protein_quest-0.10.0 → protein_quest-1.0.0}/.vscode/extensions.json +0 -0
  23. {protein_quest-0.10.0 → protein_quest-1.0.0}/CITATION.cff +0 -0
  24. {protein_quest-0.10.0 → protein_quest-1.0.0}/CODE_OF_CONDUCT.md +0 -0
  25. {protein_quest-0.10.0 → protein_quest-1.0.0}/LICENSE +0 -0
  26. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/CONTRIBUTING.md +0 -0
  27. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/index.md +0 -0
  28. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/.gitignore +0 -0
  29. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/alphafold.ipynb +0 -0
  30. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/index.md +0 -0
  31. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/pdbe.ipynb +0 -0
  32. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/uniprot.ipynb +0 -0
  33. {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/protein-quest-mcp.png +0 -0
  34. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/__init__.py +0 -0
  35. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/__init__.py +0 -0
  36. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/confidence.py +0 -0
  37. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
  38. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/cli.py +0 -0
  39. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/converter.py +0 -0
  40. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/emdb.py +0 -0
  41. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/filters.py +0 -0
  42. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/go.py +0 -0
  43. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/io.py +0 -0
  44. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/mcp_server.py +0 -0
  45. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/pdbe/__init__.py +0 -0
  46. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/pdbe/fetch.py +0 -0
  47. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/py.typed +0 -0
  48. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/ss.py +0 -0
  49. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/taxonomy.py +0 -0
  50. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/uniprot.py +0 -0
  51. {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/utils.py +0 -0
  52. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
  53. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +0 -0
  54. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
  55. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +0 -0
  56. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +0 -0
  57. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +0 -0
  58. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +0 -0
  59. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_confidence.py +0 -0
  60. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_entry_summary.py +0 -0
  61. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_fetch.py +0 -0
  62. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_pdbe.yaml +0 -0
  63. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_uniprot.yaml +0 -0
  64. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_uniprot_details.yaml +0 -0
  65. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
  66. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
  67. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
  68. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
  69. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +0 -0
  70. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +0 -0
  71. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +0 -0
  72. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
  73. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +0 -0
  74. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +0 -0
  75. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +0 -0
  76. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
  77. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +0 -0
  78. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +0 -0
  79. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
  80. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
  81. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/conftest.py +0 -0
  82. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/fixtures/2Y29.cif.gz +0 -0
  83. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
  84. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
  85. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/pdbe/test_fetch.py +0 -0
  86. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_converter.py +0 -0
  87. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_emdb.py +0 -0
  88. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_go.py +0 -0
  89. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_io.py +0 -0
  90. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_mcp.py +0 -0
  91. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_ss.py +0 -0
  92. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_structure.py +0 -0
  93. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_taxonomy.py +0 -0
  94. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_uniprot.py +0 -0
  95. {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_utils.py +0 -0
  96. {protein_quest-0.10.0 → protein_quest-1.0.0}/uv.lock +0 -0
@@ -81,4 +81,13 @@ jobs:
81
81
  find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
82
82
  - name: Run type checkers on docs
83
83
  run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
84
-
84
+ duplicated-code:
85
+ runs-on: ubuntu-latest
86
+ steps:
87
+ - uses: actions/checkout@v4
88
+ - name: Install NodeJS
89
+ uses: actions/setup-node@v6
90
+ with:
91
+ node-version: '24'
92
+ - name: Run jscpd to detect duplicated code
93
+ run: npx jscpd src
@@ -0,0 +1 @@
1
+ skip_checklist_checks_reason: "I'm using the fairsoftwarechecklist"
@@ -38,7 +38,8 @@ The sections below outline the steps in each case.
38
38
  1. format your code with `uvx ruff format` and sort imports with `uvx ruff check --select I --fix`;
39
39
  1. lint your code with `uvx ruff check` (use `uvx ruff check --fix` to fix issues automatically);
40
40
  1. type check your code with `uv run pyrefly check src tests`;
41
- 1. update or expand the documentation (see [Contributing with documentation](#contributing-with-documentation) section below);
41
+ 1. prevent code duplication, detect with `npx jscpd src`;
42
+ 1. update or expand the documentation (see [Contributing to documentation](#contributing-to-documentation) section below);
42
43
  1. [push](http://rogerdudler.github.io/git-guide/) your feature branch to (your fork of) the protein-quest repository on GitHub;
43
44
  1. create the pull request, e.g. following the instructions [here](https://help.github.com/articles/creating-a-pull-request/).
44
45
 
@@ -1,12 +1,27 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: protein_quest
3
- Version: 0.10.0
3
+ Version: 1.0.0
4
4
  Summary: Search/retrieve/filter proteins and protein structures
5
5
  Project-URL: Homepage, https://github.com/haddocking/protein-quest
6
6
  Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
7
7
  Project-URL: Documentation, https://www.bonvinlab.org/protein-quest/
8
8
  Project-URL: Source, https://github.com/haddocking/protein-quest
9
9
  License-File: LICENSE
10
+ Keywords: alphafold,mmcif,pdb,protein,protein structure,uniprot
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
13
+ Classifier: Framework :: AsyncIO
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Natural Language :: English
17
+ Classifier: Operating System :: MacOS
18
+ Classifier: Operating System :: POSIX
19
+ Classifier: Operating System :: POSIX :: Linux
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.14
22
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
23
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
24
+ Classifier: Typing :: Typed
10
25
  Requires-Python: >=3.13
11
26
  Requires-Dist: aiofiles>=24.1.0
12
27
  Requires-Dist: aiohttp-retry>=2.9.1
@@ -35,9 +50,14 @@ Description-Content-Type: text/markdown
35
50
  [![Documentation](https://img.shields.io/badge/Documentation-bonvinlab.org-blue?style=flat-square&logo=gitbook)](https://www.bonvinlab.org/protein-quest/)
36
51
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
37
52
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
53
+ [![bio.tools](https://img.shields.io/badge/bio.tools-protein--quest-009fdf.svg)](https://bio.tools/protein-quest)
38
54
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
39
55
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
40
56
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
57
+ [![FAIR checklist badge](https://fairsoftwarechecklist.net/badge.svg)](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
58
+ [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
59
+ [![Copy/paste detector](https://raw.githubusercontent.com/kucherenko/jscpd/refs/tags/v3.5.10/assets/jscpd-badge.svg?sanitize=true)](https://github.com/kucherenko/jscpd/)
60
+
41
61
 
42
62
  Python package to search/retrieve/filter proteins and protein structures.
43
63
 
@@ -104,7 +124,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
104
124
 
105
125
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
106
126
 
107
- To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
127
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
108
128
 
109
129
  While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
110
130
  This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
@@ -302,7 +322,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
302
322
 
303
323
  ## Shell autocompletion
304
324
 
305
- The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
325
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
306
326
 
307
327
  Initialize for bash shell with:
308
328
 
@@ -3,9 +3,14 @@
3
3
  [![Documentation](https://img.shields.io/badge/Documentation-bonvinlab.org-blue?style=flat-square&logo=gitbook)](https://www.bonvinlab.org/protein-quest/)
4
4
  [![CI](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml/badge.svg)](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
5
5
  [![Research Software Directory Badge](https://img.shields.io/badge/rsd-00a3e3.svg)](https://www.research-software.nl/software/protein-quest)
6
+ [![bio.tools](https://img.shields.io/badge/bio.tools-protein--quest-009fdf.svg)](https://bio.tools/protein-quest)
6
7
  [![PyPI](https://img.shields.io/pypi/v/protein-quest)](https://pypi.org/project/protein-quest/)
7
8
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.16941288.svg)](https://doi.org/10.5281/zenodo.16941288)
8
9
  [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/7a3f3f1fe64640d583a5e50fe7ba828e)](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
10
+ [![FAIR checklist badge](https://fairsoftwarechecklist.net/badge.svg)](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
11
+ [![fair-software.eu](https://img.shields.io/badge/fair--software.eu-%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F%20%20%E2%97%8F-green)](https://fair-software.eu)
12
+ [![Copy/paste detector](https://raw.githubusercontent.com/kucherenko/jscpd/refs/tags/v3.5.10/assets/jscpd-badge.svg?sanitize=true)](https://github.com/kucherenko/jscpd/)
13
+
9
14
 
10
15
  Python package to search/retrieve/filter proteins and protein structures.
11
16
 
@@ -72,7 +77,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
72
77
 
73
78
  The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
74
79
 
75
- To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/summary/).
80
+ To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
76
81
 
77
82
  While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
78
83
  This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
@@ -270,7 +275,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
270
275
 
271
276
  ## Shell autocompletion
272
277
 
273
- The `protein-quest` command line tool supports shell autocompletion using [shtab](https://shtab.readthedocs.io/).
278
+ The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
274
279
 
275
280
  Initialize for bash shell with:
276
281
 
@@ -0,0 +1,17 @@
1
+ # Frequently Asked Questions (FAQ)
2
+
3
+ ## My search is taking a long time. What can I do?
4
+
5
+ Most likely your search is returning a lot of results.
6
+ The search commands have several options to reduce the number of results returned, such as `--limit`.
7
+
8
+ ## My log is polluted with progress bar lines. How can I fix this?
9
+
10
+ To reduce the number of lines printed by the progress bar, you can increase the minimum interval between updates with the `TQDM_MININTERVAL` environment variable.
11
+ For example, setting it to `9` will update the progress bar every 9 seconds instead of every 0.1 seconds.
12
+
13
+ To not have any progress bars at all, you can set `TQDM_DISABLE` environment variable to any value.
14
+
15
+ ## My protein-quest question is not answered here. Where can I get help?
16
+
17
+ Please see the [Contributing](CONTRIBUTING.md#you-have-a-question) document for instructions on how to ask questions and report issues.
@@ -76,6 +76,7 @@ markdown_extensions:
76
76
  nav:
77
77
  - Home: index.md
78
78
  - CLI Reference: cli.md
79
+ - FAQ: faq.md
79
80
  - Notebooks:
80
81
  - notebooks/index.md
81
82
  - Search uniprot: notebooks/uniprot.ipynb
@@ -4,6 +4,30 @@ dynamic = ["version"]
4
4
  description = "Search/retrieve/filter proteins and protein structures"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
7
+ keywords = [
8
+ "alphafold",
9
+ "mmcif",
10
+ "pdb",
11
+ "protein structure",
12
+ "protein",
13
+ "uniprot",
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 5 - Production/Stable",
17
+ "Environment :: Console",
18
+ "Framework :: AsyncIO",
19
+ "Intended Audience :: Science/Research",
20
+ "License :: OSI Approved :: Apache Software License",
21
+ "Natural Language :: English",
22
+ "Operating System :: MacOS",
23
+ "Operating System :: POSIX :: Linux",
24
+ "Operating System :: POSIX",
25
+ "Programming Language :: Python :: 3.13",
26
+ "Programming Language :: Python :: 3.14",
27
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
28
+ "Topic :: Scientific/Engineering :: Chemistry",
29
+ "Typing :: Typed",
30
+ ]
7
31
  dependencies = [
8
32
  "aiofiles>=24.1.0",
9
33
  "aiohttp[speedups]>=3.11.18",
@@ -1,2 +1,2 @@
1
- __version__ = "0.10.0"
1
+ __version__ = "1.0.0"
2
2
  """The version of the package."""
@@ -114,7 +114,6 @@ class AlphaFoldEntry:
114
114
  """Convert paths in an AlphaFoldEntry to be relative to the session directory.
115
115
 
116
116
  Args:
117
- entry: An AlphaFoldEntry instance with absolute paths.
118
117
  session_dir: The session directory to which the paths should be made relative.
119
118
 
120
119
  Returns:
@@ -483,6 +482,7 @@ def fetch_many_async(
483
482
  )
484
483
 
485
484
 
485
+ # jscpd:ignore-start # noqa: ERA001
486
486
  def fetch_many(
487
487
  uniprot_accessions: Iterable[str],
488
488
  save_dir: Path,
@@ -492,6 +492,7 @@ def fetch_many(
492
492
  cacher: Cacher | None = None,
493
493
  gzip_files: bool = False,
494
494
  all_isoforms: bool = False,
495
+ # jscpd:ignore-end # noqa: ERA001
495
496
  ) -> list[AlphaFoldEntry]:
496
497
  """Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
497
498
 
@@ -2,13 +2,19 @@
2
2
 
3
3
  import logging
4
4
  import os
5
+ import sys
6
+ import warnings
5
7
  from collections.abc import Callable, Collection, Iterator
6
- from contextlib import contextmanager
8
+ from contextlib import contextmanager, suppress
7
9
  from typing import Concatenate, ParamSpec, cast
8
10
 
9
- from dask.distributed import Client, LocalCluster, progress
11
+ from dask.distributed import Client, LocalCluster
10
12
  from distributed.deploy.cluster import Cluster
13
+ from distributed.diagnostics.progress import format_time
14
+ from distributed.diagnostics.progressbar import ProgressBar
15
+ from distributed.utils import LoopRunner
11
16
  from psutil import cpu_count
17
+ from tornado.ioloop import IOLoop
12
18
 
13
19
  logger = logging.getLogger(__name__)
14
20
 
@@ -80,6 +86,72 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
80
86
  return LocalCluster(name=name, threads_per_worker=1, n_workers=n_workers)
81
87
 
82
88
 
89
+ class MyProgressBar(ProgressBar):
90
+ """Show progress of Dask computations.
91
+
92
+ Copy of distributed.diagnostics.progressbar.TextProgressBar that:
93
+
94
+ - prints to stderr instead of stdout
95
+ - Can have its interval (in seconds) set with `TQDM_MININTERVAL` environment variable
96
+
97
+ """
98
+
99
+ __loop: IOLoop | None = None
100
+
101
+ def __init__(
102
+ self,
103
+ keys,
104
+ scheduler=None,
105
+ interval="100ms",
106
+ width=40,
107
+ loop=None,
108
+ complete=True,
109
+ start=True,
110
+ **kwargs, # noqa: ARG002
111
+ ):
112
+ self._loop_runner = loop_runner = LoopRunner(loop=loop)
113
+ if interval == "100ms":
114
+ interval_env = os.getenv("TQDM_MININTERVAL")
115
+ if interval_env is not None:
116
+ interval = interval_env + "s"
117
+
118
+ super().__init__(keys, scheduler, interval, complete)
119
+ self.width = width
120
+
121
+ if start:
122
+ loop_runner.run_sync(self.listen)
123
+
124
+ @property
125
+ def loop(self) -> IOLoop | None:
126
+ loop = self.__loop
127
+ if loop is None:
128
+ # If the loop is not running when this is called, the LoopRunner.loop
129
+ # property will raise a DeprecationWarning
130
+ # However subsequent calls might occur - eg atexit, where a stopped
131
+ # loop is still acceptable - so we cache access to the loop.
132
+ self.__loop = loop = self._loop_runner.loop
133
+ return loop
134
+
135
+ @loop.setter
136
+ def loop(self, value: IOLoop) -> None:
137
+ warnings.warn("setting the loop property is deprecated", DeprecationWarning, stacklevel=2)
138
+ self.__loop = value
139
+
140
+ def _draw_bar(self, remaining, all, **kwargs): # noqa: A002, ARG002
141
+ frac = (1 - remaining / all) if all else 1.0
142
+ bar = "#" * int(self.width * frac)
143
+ percent = int(100 * frac)
144
+ elapsed = format_time(self.elapsed)
145
+ msg = "\r[{0:<{1}}] | {2}% Completed | {3}".format(bar, self.width, percent, elapsed)
146
+ with suppress(ValueError):
147
+ sys.stderr.write(msg)
148
+ sys.stderr.flush()
149
+
150
+ def _draw_stop(self, **kwargs): # noqa: ARG002
151
+ sys.stderr.write("\33[2K\r")
152
+ sys.stderr.flush()
153
+
154
+
83
155
  # Generic type parameters used across helpers
84
156
  P = ParamSpec("P")
85
157
 
@@ -94,6 +166,10 @@ def dask_map_with_progress[T, R, **P](
94
166
  """
95
167
  Wrapper for map, progress, and gather of Dask that returns a correctly typed list.
96
168
 
169
+ Environment variables:
170
+ - Set interval (in seconds) of progress updates with `TQDM_MININTERVAL`
171
+ - Disabled by setting `TQDM_DISABLE` to any value
172
+
97
173
  Args:
98
174
  client: Dask client.
99
175
  func: Function to map; first parameter comes from ``iterable`` and any
@@ -109,6 +185,7 @@ def dask_map_with_progress[T, R, **P](
109
185
  if client.dashboard_link:
110
186
  logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
111
187
  futures = client.map(func, iterable, *args, **kwargs)
112
- progress(futures)
188
+ if not os.getenv("TQDM_DISABLE"):
189
+ MyProgressBar(futures)
113
190
  results = client.gather(futures)
114
191
  return cast("list[R]", results)
@@ -132,6 +132,18 @@ class ChainNotFoundError(IndexError):
132
132
  """Helper for pickling the exception."""
133
133
  return (self.__class__, (self.chain_id, self.file, self.available_chains))
134
134
 
135
+ def __eq__(self, other):
136
+ if not isinstance(other, ChainNotFoundError):
137
+ return NotImplemented
138
+ return (
139
+ self.chain_id == other.chain_id
140
+ and self.file == other.file
141
+ and self.available_chains == other.available_chains
142
+ )
143
+
144
+ def __hash__(self):
145
+ return hash((self.chain_id, str(self.file), frozenset(self.available_chains)))
146
+
135
147
 
136
148
  def write_single_chain_structure_file(
137
149
  input_file: Path,
@@ -0,0 +1,66 @@
1
+ interactions:
2
+ - request:
3
+ body: null
4
+ headers:
5
+ Accept:
6
+ - application/sparql-results+json,application/json,text/javascript,application/javascript
7
+ Connection:
8
+ - close
9
+ Host:
10
+ - sparql.uniprot.org
11
+ User-Agent:
12
+ - sparqlwrapper 2.0.0 (rdflib.github.io/sparqlwrapper)
13
+ method: GET
14
+ uri: https://sparql.uniprot.org/sparql?query=%0A++++++++PREFIX+up%3A+%3Chttp%3A//purl.uniprot.org/core/%3E%0A++++++++PREFIX+taxon%3A+%3Chttp%3A//purl.uniprot.org/taxonomy/%3E%0A++++++++PREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0A++++++++PREFIX+rdfs%3A+%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0A++++++++PREFIX+skos%3A+%3Chttp%3A//www.w3.org/2004/02/skos/core%23%3E%0A++++++++PREFIX+GO%3A%3Chttp%3A//purl.obolibrary.org/obo/GO_%3E%0A%0A++++++++SELECT+%3Fprotein+%3Faf_db%0A++++++++WHERE+%7B%0A%0A++++++++%23+---+Protein+Selection+---%0A++++++++VALUES+%28%3Fac%29+%7B+%28%22P00811%22%29%7D%0A++++++++BIND+%28IRI%28CONCAT%28%22http%3A//purl.uniprot.org/uniprot/%22%2C%3Fac%29%29+AS+%3Fprotein%29%0A++++++++%3Fprotein+a+up%3AProtein+.%0A%0A%0A%23+---+Protein+Selection+---%0A%3Fprotein+a+up%3AProtein+.%0A%0A%23+---+AlphaFoldDB+Info+---%0A%3Fprotein+rdfs%3AseeAlso+%3Faf_db+.%0A%3Faf_db+up%3Adatabase+%3Chttp%3A//purl.uniprot.org/database/AlphaFoldDB%3E+.%0A%0A%0A++++++++%7D%0A%0A++++++++LIMIT+10000%0A&format=json&output=json&results=json
15
+ response:
16
+ body:
17
+ string: "{\n \"head\" : {\n \"vars\" : [\n \"protein\",\n \"af_db\"\n
18
+ \ ]\n },\n \"results\" : {\n \"bindings\" : [\n {\n \"protein\"
19
+ : {\n \"type\" : \"uri\",\n \"value\" : \"http://purl.uniprot.org/uniprot/P00811\"\n
20
+ \ },\n \"af_db\" : {\n \"type\" : \"uri\",\n \"value\"
21
+ : \"http://purl.uniprot.org/alphafolddb/P00811\"\n }\n }\n ]\n
22
+ \ }\n}"
23
+ headers:
24
+ Access-Control-Allow-Headers:
25
+ - origin, x-requested-with, content-type, X-Release, queryid
26
+ Access-Control-Allow-Origin:
27
+ - '*'
28
+ Access-Control-Expose-Headers:
29
+ - X-Total-Results, X-Release, queryid, content-type, user-agent, cache-control,
30
+ etag, range
31
+ Cache-Control:
32
+ - public
33
+ Connection:
34
+ - close
35
+ Content-Disposition:
36
+ - attachment; filename="sparql-CA32A0B92DC5589CE5CD9BF33CF492F9.srj"
37
+ Content-Length:
38
+ - '375'
39
+ Content-Type:
40
+ - application/sparql-results+json
41
+ Date:
42
+ - Mon, 17 Nov 2025 11:45:48 GMT
43
+ ETag:
44
+ - W/"2025_04"
45
+ Expires:
46
+ - Tue, 18 Nov 2025 11:45:48 GMT
47
+ Server:
48
+ - Apache
49
+ Strict-Transport-Security:
50
+ - max-age=31536001; includeSubDomains
51
+ Vary:
52
+ - Negotiate,Accept,Accept-Encoding,Content-Type
53
+ X-Content-Type-Options:
54
+ - nosniff
55
+ X-Frame-Options:
56
+ - SAMEORIGIN
57
+ X-Powered-By:
58
+ - sib.swiss
59
+ X-Release:
60
+ - '2025_04'
61
+ queryid:
62
+ - '770887'
63
+ status:
64
+ code: 200
65
+ message: ''
66
+ version: 1
@@ -0,0 +1,305 @@
1
+ import csv
2
+ from pathlib import Path
3
+ from textwrap import dedent
4
+
5
+ import pytest
6
+
7
+ from protein_quest.cli import main, make_parser
8
+
9
+
10
+ def test_make_parser_help(capsys: pytest.CaptureFixture[str]):
11
+ in_args = ["--help"]
12
+ parser = make_parser()
13
+ with pytest.raises(SystemExit):
14
+ parser.parse_args(in_args)
15
+
16
+ captured = capsys.readouterr()
17
+ assert "Protein Quest CLI" in captured.out
18
+
19
+
20
+ @pytest.mark.vcr
21
+ def test_search_uniprot(capsys: pytest.CaptureFixture[str], caplog: pytest.LogCaptureFixture):
22
+ argv = [
23
+ "search",
24
+ "uniprot",
25
+ "--taxon-id",
26
+ "9606",
27
+ "--reviewed",
28
+ "--limit",
29
+ "1",
30
+ "-",
31
+ ]
32
+
33
+ main(argv)
34
+
35
+ captured = capsys.readouterr()
36
+ expected = "A0A024R1R8\n"
37
+ assert captured.out == expected
38
+ assert "Searching for UniProt accessions" in captured.err
39
+ assert "Found 1 UniProt accessions, written to <stdout>" in captured.err
40
+ assert "There may be more results available" in caplog.text
41
+
42
+
43
+ @pytest.mark.vcr
44
+ def test_search_pdbe(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
45
+ input_text = tmp_path / "uniprot_accessions.txt"
46
+ input_text.write_text("P00811\n")
47
+ output_file = tmp_path / "pdbe_results.csv"
48
+ argv = [
49
+ "search",
50
+ "pdbe",
51
+ "--limit",
52
+ "150",
53
+ "--min-residues",
54
+ "360", # P00811 has 377 residues and 5 full PDB entries
55
+ str(input_text),
56
+ str(output_file),
57
+ ]
58
+
59
+ main(argv)
60
+
61
+ result = output_file.read_text()
62
+ expected = dedent("""\
63
+ uniprot_accession,pdb_id,method,resolution,uniprot_chains,chain,chain_length
64
+ P00811,9C6P,X-Ray_Crystallography,1.66,A/B=1-377,A,377
65
+ P00811,9C81,X-Ray_Crystallography,1.7,A/B=1-377,A,377
66
+ P00811,9C83,X-Ray_Crystallography,2.9,A/B=1-377,A,377
67
+ P00811,9C84,X-Ray_Crystallography,1.7,A/B=1-377,A,377
68
+ P00811,9DHL,X-Ray_Crystallography,1.88,A/B=1-377,A,377
69
+ """)
70
+ assert result == expected
71
+
72
+ captured = capsys.readouterr()
73
+ assert "Finding PDB entries for 1 uniprot accessions" in captured.err
74
+ assert "Before filtering found 120 PDB entries for 1 uniprot accessions." in captured.err
75
+ assert "After filtering on chain length (360, None) remained 5 PDB entries for 1 uniprot" in captured.err
76
+ assert "Written to " in captured.err
77
+
78
+
79
+ @pytest.mark.vcr
80
+ def test_search_uniprot_details(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
81
+ input_text = tmp_path / "uniprot_accessions.txt"
82
+ input_text.write_text("P05067\nA0A0B5AC95\n")
83
+ output_csv = tmp_path / "uniprot_details.csv"
84
+ argv = [
85
+ "search",
86
+ "uniprot-details",
87
+ str(input_text),
88
+ str(output_csv),
89
+ ]
90
+
91
+ main(argv)
92
+
93
+ result = output_csv.read_text()
94
+ expected = dedent("""\
95
+ uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
96
+ A0A0B5AC95,INS1A_CONGE,115,True,Con-Ins G1a,6491,Conus geographus
97
+ P05067,A4_HUMAN,770,True,Amyloid-beta precursor protein,9606,Homo sapiens
98
+ """)
99
+ assert result == expected
100
+ captured = capsys.readouterr()
101
+ assert "Retrieving UniProt entry details for 2 uniprot accessions" in captured.err
102
+ assert "Retrieved details for 2 UniProt entries, written to " in captured.err
103
+
104
+
105
+ @pytest.mark.vcr
106
+ def test_search_alphafold(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
107
+ input_text = tmp_path / "uniprot_accessions.txt"
108
+ input_text.write_text("P00811\n")
109
+ output_file = tmp_path / "af_results.csv"
110
+
111
+ argv = [
112
+ "search",
113
+ "alphafold",
114
+ str(input_text),
115
+ str(output_file),
116
+ ]
117
+
118
+ main(argv)
119
+
120
+ result = output_file.read_text()
121
+
122
+ expected = dedent("""\
123
+ uniprot_accession,af_id
124
+ P00811,P00811
125
+ """)
126
+ assert result == expected
127
+
128
+ captured = capsys.readouterr()
129
+ assert "Finding AlphaFold entries for 1 uniprot accessions" in captured.err
130
+ assert "Found 1 AlphaFold entries, written to " in captured.err
131
+
132
+
133
+ def test_filter_chain_happy_path(sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]):
134
+ chains_fn = tmp_path / "chains.csv"
135
+ chains_fn.write_text("pdb_id,chain\n2Y29,A\n")
136
+
137
+ argv = [
138
+ "filter",
139
+ "chain",
140
+ str(chains_fn),
141
+ str(sample2_cif.parent),
142
+ str(tmp_path),
143
+ ]
144
+
145
+ main(argv)
146
+
147
+ output_file = tmp_path / "2Y29_A2A.cif.gz"
148
+ assert output_file.exists()
149
+
150
+ captured = capsys.readouterr()
151
+ assert "Wrote 1 single-chain PDB/mmCIF files to" in captured.err
152
+
153
+
154
+ def test_filter_chain_input_file_notfound(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
155
+ input_dir = tmp_path / "input"
156
+ input_dir.mkdir()
157
+ output_dir = tmp_path / "output"
158
+ output_dir.mkdir()
159
+ chains_fn = tmp_path / "chains.csv"
160
+ chains_fn.write_text("pdb_id,chain\n2Y29,A\n")
161
+
162
+ argv = [
163
+ "filter",
164
+ "chain",
165
+ str(chains_fn),
166
+ str(input_dir),
167
+ str(output_dir),
168
+ ]
169
+
170
+ with pytest.raises(SystemExit):
171
+ main(argv)
172
+
173
+ assert not any(output_dir.iterdir())
174
+
175
+ captured = capsys.readouterr()
176
+ assert "No structure file found for 2Y29" in captured.err
177
+
178
+
179
+ def test_filter_residue(sample_cif: Path, sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]):
180
+ input_dir = tmp_path / "input"
181
+ input_dir.mkdir()
182
+ local_sample = input_dir / sample_cif.name
183
+ local_sample.symlink_to(sample_cif)
184
+ local_sample2 = input_dir / sample2_cif.name
185
+ local_sample2.symlink_to(sample2_cif)
186
+ output_dir = tmp_path / "output"
187
+ output_dir.mkdir()
188
+ stats_fn = tmp_path / "stats.csv"
189
+
190
+ argv = [
191
+ "filter",
192
+ "residue",
193
+ str(input_dir),
194
+ str(output_dir),
195
+ "--min-residues",
196
+ "100",
197
+ "--max-residues",
198
+ "200",
199
+ "--copy-method",
200
+ "symlink",
201
+ "--write-stats",
202
+ str(stats_fn),
203
+ ]
204
+
205
+ main(argv)
206
+
207
+ # Check output files
208
+ output_files = list(output_dir.iterdir())
209
+ assert len(output_files) == 1
210
+ expected_passed_file = output_dir / sample_cif.name
211
+ assert expected_passed_file in output_files
212
+
213
+ # Check stats file
214
+ with stats_fn.open() as f:
215
+ rows = list(csv.DictReader(f))
216
+ # Input files processed in alphabetical order
217
+ expected_stats = [
218
+ {
219
+ "input_file": str(local_sample2),
220
+ "residue_count": "8",
221
+ "passed": "False",
222
+ "output_file": "",
223
+ },
224
+ {
225
+ "input_file": str(local_sample),
226
+ "residue_count": "173",
227
+ "passed": "True",
228
+ "output_file": str(expected_passed_file),
229
+ },
230
+ ]
231
+ assert rows == expected_stats
232
+
233
+ # Check captured output
234
+ captured = capsys.readouterr()
235
+ assert "by number of residues in chain A" in captured.err
236
+ assert "Wrote 1 files to" in captured.err
237
+ assert "Statistics written to" in captured.err
238
+
239
+
240
+ def test_filter_secondary_structure(
241
+ sample_cif: Path, sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]
242
+ ):
243
+ input_dir = tmp_path / "input"
244
+ input_dir.mkdir()
245
+ local_sample = input_dir / sample_cif.name
246
+ local_sample.symlink_to(sample_cif)
247
+ local_sample2 = input_dir / sample2_cif.name
248
+ local_sample2.symlink_to(sample2_cif)
249
+ output_dir = tmp_path / "output"
250
+ output_dir.mkdir()
251
+ stats_fn = tmp_path / "ss_stats.csv"
252
+
253
+ argv = [
254
+ "filter",
255
+ "secondary-structure",
256
+ str(input_dir),
257
+ str(output_dir),
258
+ "--abs-min-helix-residues",
259
+ "10",
260
+ "--copy-method",
261
+ "symlink",
262
+ "--write-stats",
263
+ str(stats_fn),
264
+ ]
265
+
266
+ main(argv)
267
+
268
+ # Check output files
269
+ output_files = list(output_dir.iterdir())
270
+ assert len(output_files) == 1
271
+ expected_passed_file = output_dir / sample_cif.name
272
+ assert expected_passed_file in output_files
273
+
274
+ # Check stats file
275
+ with stats_fn.open() as f:
276
+ rows = list(csv.DictReader(f))
277
+ expected_stats = [
278
+ {
279
+ "helix_ratio": "0.0",
280
+ "input_file": str(local_sample2),
281
+ "nr_helix_residues": "0",
282
+ "nr_residues": "8",
283
+ "nr_sheet_residues": "0",
284
+ "output_file": "",
285
+ "passed": "False",
286
+ "sheet_ratio": "0.0",
287
+ },
288
+ {
289
+ "input_file": str(local_sample),
290
+ "nr_residues": "173",
291
+ "nr_helix_residues": "58",
292
+ "nr_sheet_residues": "59",
293
+ "helix_ratio": f"{58 / 173:.3f}",
294
+ "sheet_ratio": f"{59 / 173:.3f}",
295
+ "passed": "True",
296
+ "output_file": str(expected_passed_file),
297
+ },
298
+ ]
299
+ assert rows == expected_stats
300
+
301
+ # Check captured output
302
+ captured = capsys.readouterr()
303
+ assert "by secondary structure" in captured.err
304
+ assert "Wrote 1 files to" in captured.err
305
+ assert "Statistics written to" in captured.err
@@ -0,0 +1,78 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from protein_quest.filters import (
6
+ ChainFilterStatistics,
7
+ ResidueFilterStatistics,
8
+ filter_files_on_chain,
9
+ filter_files_on_residues,
10
+ )
11
+ from protein_quest.structure import ChainNotFoundError
12
+
13
+
14
+ @pytest.mark.parametrize(
15
+ "scheduler_address,expected_progress_bar",
16
+ [
17
+ (None, "Completed"), # creates a local cluster
18
+ ("sequential", "file/s"),
19
+ ],
20
+ )
21
+ def test_filter_files_on_chain_local_cluster(
22
+ sample2_cif: Path,
23
+ tmp_path: Path,
24
+ capsys: pytest.CaptureFixture[str],
25
+ scheduler_address: str | None,
26
+ expected_progress_bar: str,
27
+ ):
28
+ file2chains = [
29
+ (sample2_cif, "A"), # should pass
30
+ (sample2_cif, "B"), # should be discarded
31
+ ]
32
+
33
+ results = filter_files_on_chain(file2chains, tmp_path, scheduler_address=scheduler_address)
34
+
35
+ expected_passed = ChainFilterStatistics(
36
+ input_file=sample2_cif,
37
+ chain_id="A",
38
+ passed=True,
39
+ output_file=tmp_path / "2Y29_A2A.cif.gz",
40
+ )
41
+ assert expected_passed.output_file and expected_passed.output_file.exists()
42
+ expected_discarded = ChainFilterStatistics(
43
+ input_file=sample2_cif,
44
+ chain_id="B",
45
+ passed=False,
46
+ output_file=None,
47
+ discard_reason=ChainNotFoundError("B", sample2_cif, {"A"}),
48
+ )
49
+ assert results == [expected_passed, expected_discarded]
50
+
51
+ _, stderr = capsys.readouterr()
52
+ assert expected_progress_bar in stderr
53
+
54
+
55
+ def test_filter_files_on_residues(sample_cif: Path, sample2_cif: Path, tmp_path: Path):
56
+ results = list(
57
+ filter_files_on_residues(
58
+ input_files=[sample_cif, sample2_cif],
59
+ output_dir=tmp_path,
60
+ min_residues=100,
61
+ max_residues=200,
62
+ )
63
+ )
64
+ expected_passed = ResidueFilterStatistics(
65
+ input_file=sample_cif,
66
+ residue_count=173,
67
+ passed=True,
68
+ output_file=tmp_path / sample_cif.name,
69
+ )
70
+ assert expected_passed.output_file and expected_passed.output_file.exists()
71
+ expected_discarded = ResidueFilterStatistics(
72
+ input_file=sample2_cif,
73
+ residue_count=8,
74
+ passed=False,
75
+ output_file=None,
76
+ )
77
+
78
+ assert results == [expected_passed, expected_discarded]
@@ -0,0 +1,45 @@
1
+ import pytest
2
+ from distributed import Client
3
+
4
+ from protein_quest.parallel import MyProgressBar, dask_map_with_progress
5
+
6
+
7
+ def test_MyProgressBar_interval_env(monkeypatch):
8
+ monkeypatch.setenv("TQDM_MININTERVAL", "1234")
9
+
10
+ with Client():
11
+ progress_bar = MyProgressBar([])
12
+ assert progress_bar.interval == 1234
13
+
14
+
15
+ def run_dask_map_with_progress():
16
+ def square(x: int) -> int:
17
+ return x**2
18
+
19
+ with Client() as client:
20
+ result = dask_map_with_progress(
21
+ client,
22
+ square,
23
+ range(5),
24
+ )
25
+ assert result == [0, 1, 4, 9, 16]
26
+
27
+
28
+ def test_dask_map_with_progress(capsys: pytest.CaptureFixture, caplog: pytest.LogCaptureFixture):
29
+ caplog.set_level("INFO")
30
+
31
+ run_dask_map_with_progress()
32
+
33
+ captured = capsys.readouterr()
34
+ assert "Completed" in captured.err
35
+
36
+ assert "Follow progress on dask dashboard at" in caplog.text
37
+
38
+
39
+ def test_dask_map_with_progress_disabled(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture):
40
+ monkeypatch.setenv("TQDM_DISABLE", "1")
41
+
42
+ run_dask_map_with_progress()
43
+
44
+ captured = capsys.readouterr()
45
+ assert "Completed" not in captured.err
@@ -1,101 +0,0 @@
1
- from pathlib import Path
2
- from textwrap import dedent
3
-
4
- import pytest
5
-
6
- from protein_quest.cli import main, make_parser
7
-
8
-
9
- def test_make_parser_help(capsys: pytest.CaptureFixture[str]):
10
- in_args = ["--help"]
11
- parser = make_parser()
12
- with pytest.raises(SystemExit):
13
- parser.parse_args(in_args)
14
-
15
- captured = capsys.readouterr()
16
- assert "Protein Quest CLI" in captured.out
17
-
18
-
19
- @pytest.mark.vcr
20
- def test_search_uniprot(capsys: pytest.CaptureFixture[str], caplog: pytest.LogCaptureFixture):
21
- argv = [
22
- "search",
23
- "uniprot",
24
- "--taxon-id",
25
- "9606",
26
- "--reviewed",
27
- "--limit",
28
- "1",
29
- "-",
30
- ]
31
-
32
- main(argv)
33
-
34
- captured = capsys.readouterr()
35
- expected = "A0A024R1R8\n"
36
- assert captured.out == expected
37
- assert "Searching for UniProt accessions" in captured.err
38
- assert "Found 1 UniProt accessions, written to <stdout>" in captured.err
39
- assert "There may be more results available" in caplog.text
40
-
41
-
42
- @pytest.mark.vcr
43
- def test_search_pdbe(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
44
- input_text = tmp_path / "uniprot_accessions.txt"
45
- input_text.write_text("P00811\n")
46
- output_file = tmp_path / "pdbe_results.csv"
47
- argv = [
48
- "search",
49
- "pdbe",
50
- "--limit",
51
- "150",
52
- "--min-residues",
53
- "360", # P00811 has 377 residues and 5 full PDB entries
54
- str(input_text),
55
- str(output_file),
56
- ]
57
-
58
- main(argv)
59
-
60
- result = output_file.read_text()
61
- expected = dedent("""\
62
- uniprot_accession,pdb_id,method,resolution,uniprot_chains,chain,chain_length
63
- P00811,9C6P,X-Ray_Crystallography,1.66,A/B=1-377,A,377
64
- P00811,9C81,X-Ray_Crystallography,1.7,A/B=1-377,A,377
65
- P00811,9C83,X-Ray_Crystallography,2.9,A/B=1-377,A,377
66
- P00811,9C84,X-Ray_Crystallography,1.7,A/B=1-377,A,377
67
- P00811,9DHL,X-Ray_Crystallography,1.88,A/B=1-377,A,377
68
- """)
69
- assert result == expected
70
-
71
- captured = capsys.readouterr()
72
- assert "Finding PDB entries for 1 uniprot accessions" in captured.err
73
- assert "Before filtering found 120 PDB entries for 1 uniprot accessions." in captured.err
74
- assert "After filtering on chain length (360, None) remained 5 PDB entries for 1 uniprot" in captured.err
75
- assert "Written to " in captured.err
76
-
77
-
78
- @pytest.mark.vcr
79
- def test_search_uniprot_details(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
80
- input_text = tmp_path / "uniprot_accessions.txt"
81
- input_text.write_text("P05067\nA0A0B5AC95\n")
82
- output_csv = tmp_path / "uniprot_details.csv"
83
- argv = [
84
- "search",
85
- "uniprot-details",
86
- str(input_text),
87
- str(output_csv),
88
- ]
89
-
90
- main(argv)
91
-
92
- result = output_csv.read_text()
93
- expected = dedent("""\
94
- uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
95
- A0A0B5AC95,INS1A_CONGE,115,True,Con-Ins G1a,6491,Conus geographus
96
- P05067,A4_HUMAN,770,True,Amyloid-beta precursor protein,9606,Homo sapiens
97
- """)
98
- assert result == expected
99
- captured = capsys.readouterr()
100
- assert "Retrieving UniProt entry details for 2 uniprot accessions" in captured.err
101
- assert "Retrieved details for 2 UniProt entries, written to " in captured.err
File without changes
File without changes
File without changes