protein-quest 0.10.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/ci.yml +10 -1
- protein_quest-1.0.0/.howfairis.yml +1 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/CONTRIBUTING.md +2 -1
- {protein_quest-0.10.0 → protein_quest-1.0.0}/PKG-INFO +23 -3
- {protein_quest-0.10.0 → protein_quest-1.0.0}/README.md +7 -2
- protein_quest-1.0.0/docs/faq.md +17 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/mkdocs.yml +1 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/pyproject.toml +24 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/__version__.py +1 -1
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/fetch.py +2 -1
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/parallel.py +80 -3
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/structure.py +12 -0
- protein_quest-1.0.0/tests/cassettes/test_cli/test_search_alphafold.yaml +66 -0
- protein_quest-1.0.0/tests/test_cli.py +305 -0
- protein_quest-1.0.0/tests/test_filters.py +78 -0
- protein_quest-1.0.0/tests/test_parallel.py +45 -0
- protein_quest-0.10.0/tests/test_cli.py +0 -101
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/pages.yml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.github/workflows/pypi-publish.yml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.gitignore +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.python-version +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/.vscode/extensions.json +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/CITATION.cff +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/CODE_OF_CONDUCT.md +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/LICENSE +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/CONTRIBUTING.md +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/index.md +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/.gitignore +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/alphafold.ipynb +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/index.md +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/pdbe.ipynb +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/notebooks/uniprot.ipynb +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/docs/protein-quest-mcp.png +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/__init__.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/__init__.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/confidence.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/alphafold/entry_summary.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/cli.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/converter.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/emdb.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/filters.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/go.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/io.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/mcp_server.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/pdbe/__init__.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/pdbe/fetch.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/py.typed +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/ss.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/taxonomy.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/uniprot.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/src/protein_quest/utils.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/AF-A1YPR0-F1-model_v4.pdb +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_alphafold_db_version.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_all_isoforms.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_gzipped.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/cassettes/test_fetch/test_fetch_many_no_summary_with_version.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_confidence.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_entry_summary.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/alphafold/test_fetch.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_pdbe.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_uniprot.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_uniprot_details.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_emdb/test_fetch.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_go/test_search_gene_ontology_term.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_taxonomy/test_search_taxon_by_id.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_do_not_match_external_isoform.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/TestSearch4AfExternalIsoforms.test_match_canonical_isoform.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_map_uniprot_accessions2uniprot_details.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_ok_sequence_length.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_too_big_sequence_length.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af_too_small_sequence_length.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4interaction_partners.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4macromolecular_complexes.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/conftest.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/fixtures/2Y29.cif.gz +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/fixtures/3JRS_B2A.cif.gz +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/pdbe/test_fetch.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_converter.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_emdb.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_go.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_io.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_mcp.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_ss.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_structure.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_taxonomy.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_uniprot.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/tests/test_utils.py +0 -0
- {protein_quest-0.10.0 → protein_quest-1.0.0}/uv.lock +0 -0
|
@@ -81,4 +81,13 @@ jobs:
|
|
|
81
81
|
find docs/ -name "*.ipynb" -exec uv run --group docs-type marimo convert {} -o {}.py \;
|
|
82
82
|
- name: Run type checkers on docs
|
|
83
83
|
run: uv run --group docs-type pyrefly check docs/notebooks/*.ipynb.py
|
|
84
|
-
|
|
84
|
+
duplicated-code:
|
|
85
|
+
runs-on: ubuntu-latest
|
|
86
|
+
steps:
|
|
87
|
+
- uses: actions/checkout@v4
|
|
88
|
+
- name: Install NodeJS
|
|
89
|
+
uses: actions/setup-node@v6
|
|
90
|
+
with:
|
|
91
|
+
node-version: '24'
|
|
92
|
+
- name: Run jscpd to detect duplicated code
|
|
93
|
+
run: npx jscpd src
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
skip_checklist_checks_reason: "I'm using the fairsoftwarechecklist"
|
|
@@ -38,7 +38,8 @@ The sections below outline the steps in each case.
|
|
|
38
38
|
1. format your code with `uvx ruff format` and sort imports with `uvx ruff check --select I --fix`;
|
|
39
39
|
1. lint your code with `uvx ruff check` (use `uvx ruff check --fix` to fix issues automatically);
|
|
40
40
|
1. type check your code with `uv run pyrefly check src tests`;
|
|
41
|
-
1.
|
|
41
|
+
1. prevent code duplication, detect with `npx jscpd src`;
|
|
42
|
+
1. update or expand the documentation (see [Contributing to documentation](#contributing-to-documentation) section below);
|
|
42
43
|
1. [push](http://rogerdudler.github.io/git-guide/) your feature branch to (your fork of) the protein-quest repository on GitHub;
|
|
43
44
|
1. create the pull request, e.g. following the instructions [here](https://help.github.com/articles/creating-a-pull-request/).
|
|
44
45
|
|
|
@@ -1,12 +1,27 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: protein_quest
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Search/retrieve/filter proteins and protein structures
|
|
5
5
|
Project-URL: Homepage, https://github.com/haddocking/protein-quest
|
|
6
6
|
Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
|
|
7
7
|
Project-URL: Documentation, https://www.bonvinlab.org/protein-quest/
|
|
8
8
|
Project-URL: Source, https://github.com/haddocking/protein-quest
|
|
9
9
|
License-File: LICENSE
|
|
10
|
+
Keywords: alphafold,mmcif,pdb,protein,protein structure,uniprot
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Framework :: AsyncIO
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Natural Language :: English
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Operating System :: POSIX
|
|
19
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
24
|
+
Classifier: Typing :: Typed
|
|
10
25
|
Requires-Python: >=3.13
|
|
11
26
|
Requires-Dist: aiofiles>=24.1.0
|
|
12
27
|
Requires-Dist: aiohttp-retry>=2.9.1
|
|
@@ -35,9 +50,14 @@ Description-Content-Type: text/markdown
|
|
|
35
50
|
[](https://www.bonvinlab.org/protein-quest/)
|
|
36
51
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
37
52
|
[](https://www.research-software.nl/software/protein-quest)
|
|
53
|
+
[](https://bio.tools/protein-quest)
|
|
38
54
|
[](https://pypi.org/project/protein-quest/)
|
|
39
55
|
[](https://doi.org/10.5281/zenodo.16941288)
|
|
40
56
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
57
|
+
[](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
|
|
58
|
+
[](https://fair-software.eu)
|
|
59
|
+
[](https://github.com/kucherenko/jscpd/)
|
|
60
|
+
|
|
41
61
|
|
|
42
62
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
43
63
|
|
|
@@ -104,7 +124,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
104
124
|
|
|
105
125
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
106
126
|
|
|
107
|
-
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/
|
|
127
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
|
|
108
128
|
|
|
109
129
|
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
110
130
|
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
@@ -302,7 +322,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
|
|
|
302
322
|
|
|
303
323
|
## Shell autocompletion
|
|
304
324
|
|
|
305
|
-
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://
|
|
325
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
|
|
306
326
|
|
|
307
327
|
Initialize for bash shell with:
|
|
308
328
|
|
|
@@ -3,9 +3,14 @@
|
|
|
3
3
|
[](https://www.bonvinlab.org/protein-quest/)
|
|
4
4
|
[](https://github.com/haddocking/protein-quest/actions/workflows/ci.yml)
|
|
5
5
|
[](https://www.research-software.nl/software/protein-quest)
|
|
6
|
+
[](https://bio.tools/protein-quest)
|
|
6
7
|
[](https://pypi.org/project/protein-quest/)
|
|
7
8
|
[](https://doi.org/10.5281/zenodo.16941288)
|
|
8
9
|
[](https://app.codacy.com/gh/haddocking/protein-quest/coverage?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_coverage)
|
|
10
|
+
[](https://fairsoftwarechecklist.net/v0.2?f=31&a=32113&i=32121&r=133)
|
|
11
|
+
[](https://fair-software.eu)
|
|
12
|
+
[](https://github.com/kucherenko/jscpd/)
|
|
13
|
+
|
|
9
14
|
|
|
10
15
|
Python package to search/retrieve/filter proteins and protein structures.
|
|
11
16
|
|
|
@@ -72,7 +77,7 @@ pip install git+https://github.com/haddocking/protein-quest.git
|
|
|
72
77
|
|
|
73
78
|
The main entry point is the `protein-quest` command line tool which has multiple subcommands to perform actions.
|
|
74
79
|
|
|
75
|
-
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/
|
|
80
|
+
To use programmaticly, see the [Jupyter notebooks](https://www.bonvinlab.org/protein-quest/notebooks) and [API documentation](https://www.bonvinlab.org/protein-quest/autoapi/protein_quest/).
|
|
76
81
|
|
|
77
82
|
While downloading or copying files it uses a global cache (located at `~/.cache/protein-quest`) and hardlinks to save disk space and improve speed.
|
|
78
83
|
This behavior can be customized with the `--no-cache`, `--cache-dir`, and `--copy-method` command line arguments.
|
|
@@ -270,7 +275,7 @@ The mcp server contains an prompt template to search/retrieve/filter candidate s
|
|
|
270
275
|
|
|
271
276
|
## Shell autocompletion
|
|
272
277
|
|
|
273
|
-
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://
|
|
278
|
+
The `protein-quest` command line tool supports shell autocompletion using [shtab](https://docs.iterative.ai/shtab).
|
|
274
279
|
|
|
275
280
|
Initialize for bash shell with:
|
|
276
281
|
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Frequently Asked Questions (FAQ)
|
|
2
|
+
|
|
3
|
+
## My search is taking a long time. What can I do?
|
|
4
|
+
|
|
5
|
+
Most likely your search is returning a lot of results.
|
|
6
|
+
The search commands have several options to reduce the number of results returned, such as `--limit`.
|
|
7
|
+
|
|
8
|
+
## My log is polluted with progress bar lines. How can I fix this?
|
|
9
|
+
|
|
10
|
+
To reduce the number of lines printed by the progress bar, you can increase the minimum interval between updates with the `TQDM_MININTERVAL` environment variable.
|
|
11
|
+
For example, setting it to `9` will update the progress bar every 9 seconds instead of every 0.1 seconds.
|
|
12
|
+
|
|
13
|
+
To not have any progress bars at all, you can set `TQDM_DISABLE` environment variable to any value.
|
|
14
|
+
|
|
15
|
+
## My protein-quest question is not answered here. Where can I get help?
|
|
16
|
+
|
|
17
|
+
Please see the [Contributing](CONTRIBUTING.md#you-have-a-question) document for instructions on how to ask questions and report issues.
|
|
@@ -4,6 +4,30 @@ dynamic = ["version"]
|
|
|
4
4
|
description = "Search/retrieve/filter proteins and protein structures"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.13"
|
|
7
|
+
keywords = [
|
|
8
|
+
"alphafold",
|
|
9
|
+
"mmcif",
|
|
10
|
+
"pdb",
|
|
11
|
+
"protein structure",
|
|
12
|
+
"protein",
|
|
13
|
+
"uniprot",
|
|
14
|
+
]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 5 - Production/Stable",
|
|
17
|
+
"Environment :: Console",
|
|
18
|
+
"Framework :: AsyncIO",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: Apache Software License",
|
|
21
|
+
"Natural Language :: English",
|
|
22
|
+
"Operating System :: MacOS",
|
|
23
|
+
"Operating System :: POSIX :: Linux",
|
|
24
|
+
"Operating System :: POSIX",
|
|
25
|
+
"Programming Language :: Python :: 3.13",
|
|
26
|
+
"Programming Language :: Python :: 3.14",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
28
|
+
"Topic :: Scientific/Engineering :: Chemistry",
|
|
29
|
+
"Typing :: Typed",
|
|
30
|
+
]
|
|
7
31
|
dependencies = [
|
|
8
32
|
"aiofiles>=24.1.0",
|
|
9
33
|
"aiohttp[speedups]>=3.11.18",
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "1.0.0"
|
|
2
2
|
"""The version of the package."""
|
|
@@ -114,7 +114,6 @@ class AlphaFoldEntry:
|
|
|
114
114
|
"""Convert paths in an AlphaFoldEntry to be relative to the session directory.
|
|
115
115
|
|
|
116
116
|
Args:
|
|
117
|
-
entry: An AlphaFoldEntry instance with absolute paths.
|
|
118
117
|
session_dir: The session directory to which the paths should be made relative.
|
|
119
118
|
|
|
120
119
|
Returns:
|
|
@@ -483,6 +482,7 @@ def fetch_many_async(
|
|
|
483
482
|
)
|
|
484
483
|
|
|
485
484
|
|
|
485
|
+
# jscpd:ignore-start # noqa: ERA001
|
|
486
486
|
def fetch_many(
|
|
487
487
|
uniprot_accessions: Iterable[str],
|
|
488
488
|
save_dir: Path,
|
|
@@ -492,6 +492,7 @@ def fetch_many(
|
|
|
492
492
|
cacher: Cacher | None = None,
|
|
493
493
|
gzip_files: bool = False,
|
|
494
494
|
all_isoforms: bool = False,
|
|
495
|
+
# jscpd:ignore-end # noqa: ERA001
|
|
495
496
|
) -> list[AlphaFoldEntry]:
|
|
496
497
|
"""Synchronously fetches summaries and/or files like cif from AlphaFold Protein Structure Database.
|
|
497
498
|
|
|
@@ -2,13 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
import sys
|
|
6
|
+
import warnings
|
|
5
7
|
from collections.abc import Callable, Collection, Iterator
|
|
6
|
-
from contextlib import contextmanager
|
|
8
|
+
from contextlib import contextmanager, suppress
|
|
7
9
|
from typing import Concatenate, ParamSpec, cast
|
|
8
10
|
|
|
9
|
-
from dask.distributed import Client, LocalCluster
|
|
11
|
+
from dask.distributed import Client, LocalCluster
|
|
10
12
|
from distributed.deploy.cluster import Cluster
|
|
13
|
+
from distributed.diagnostics.progress import format_time
|
|
14
|
+
from distributed.diagnostics.progressbar import ProgressBar
|
|
15
|
+
from distributed.utils import LoopRunner
|
|
11
16
|
from psutil import cpu_count
|
|
17
|
+
from tornado.ioloop import IOLoop
|
|
12
18
|
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
@@ -80,6 +86,72 @@ def _configure_cpu_dask_scheduler(nproc: int, name: str) -> LocalCluster:
|
|
|
80
86
|
return LocalCluster(name=name, threads_per_worker=1, n_workers=n_workers)
|
|
81
87
|
|
|
82
88
|
|
|
89
|
+
class MyProgressBar(ProgressBar):
|
|
90
|
+
"""Show progress of Dask computations.
|
|
91
|
+
|
|
92
|
+
Copy of distributed.diagnostics.progressbar.TextProgressBar that:
|
|
93
|
+
|
|
94
|
+
- prints to stderr instead of stdout
|
|
95
|
+
- Can have its interval (in seconds) set with `TQDM_MININTERVAL` environment variable
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
__loop: IOLoop | None = None
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
keys,
|
|
104
|
+
scheduler=None,
|
|
105
|
+
interval="100ms",
|
|
106
|
+
width=40,
|
|
107
|
+
loop=None,
|
|
108
|
+
complete=True,
|
|
109
|
+
start=True,
|
|
110
|
+
**kwargs, # noqa: ARG002
|
|
111
|
+
):
|
|
112
|
+
self._loop_runner = loop_runner = LoopRunner(loop=loop)
|
|
113
|
+
if interval == "100ms":
|
|
114
|
+
interval_env = os.getenv("TQDM_MININTERVAL")
|
|
115
|
+
if interval_env is not None:
|
|
116
|
+
interval = interval_env + "s"
|
|
117
|
+
|
|
118
|
+
super().__init__(keys, scheduler, interval, complete)
|
|
119
|
+
self.width = width
|
|
120
|
+
|
|
121
|
+
if start:
|
|
122
|
+
loop_runner.run_sync(self.listen)
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def loop(self) -> IOLoop | None:
|
|
126
|
+
loop = self.__loop
|
|
127
|
+
if loop is None:
|
|
128
|
+
# If the loop is not running when this is called, the LoopRunner.loop
|
|
129
|
+
# property will raise a DeprecationWarning
|
|
130
|
+
# However subsequent calls might occur - eg atexit, where a stopped
|
|
131
|
+
# loop is still acceptable - so we cache access to the loop.
|
|
132
|
+
self.__loop = loop = self._loop_runner.loop
|
|
133
|
+
return loop
|
|
134
|
+
|
|
135
|
+
@loop.setter
|
|
136
|
+
def loop(self, value: IOLoop) -> None:
|
|
137
|
+
warnings.warn("setting the loop property is deprecated", DeprecationWarning, stacklevel=2)
|
|
138
|
+
self.__loop = value
|
|
139
|
+
|
|
140
|
+
def _draw_bar(self, remaining, all, **kwargs): # noqa: A002, ARG002
|
|
141
|
+
frac = (1 - remaining / all) if all else 1.0
|
|
142
|
+
bar = "#" * int(self.width * frac)
|
|
143
|
+
percent = int(100 * frac)
|
|
144
|
+
elapsed = format_time(self.elapsed)
|
|
145
|
+
msg = "\r[{0:<{1}}] | {2}% Completed | {3}".format(bar, self.width, percent, elapsed)
|
|
146
|
+
with suppress(ValueError):
|
|
147
|
+
sys.stderr.write(msg)
|
|
148
|
+
sys.stderr.flush()
|
|
149
|
+
|
|
150
|
+
def _draw_stop(self, **kwargs): # noqa: ARG002
|
|
151
|
+
sys.stderr.write("\33[2K\r")
|
|
152
|
+
sys.stderr.flush()
|
|
153
|
+
|
|
154
|
+
|
|
83
155
|
# Generic type parameters used across helpers
|
|
84
156
|
P = ParamSpec("P")
|
|
85
157
|
|
|
@@ -94,6 +166,10 @@ def dask_map_with_progress[T, R, **P](
|
|
|
94
166
|
"""
|
|
95
167
|
Wrapper for map, progress, and gather of Dask that returns a correctly typed list.
|
|
96
168
|
|
|
169
|
+
Environment variables:
|
|
170
|
+
- Set interval (in seconds) of progress updates with `TQDM_MININTERVAL`
|
|
171
|
+
- Disabled by setting `TQDM_DISABLE` to any value
|
|
172
|
+
|
|
97
173
|
Args:
|
|
98
174
|
client: Dask client.
|
|
99
175
|
func: Function to map; first parameter comes from ``iterable`` and any
|
|
@@ -109,6 +185,7 @@ def dask_map_with_progress[T, R, **P](
|
|
|
109
185
|
if client.dashboard_link:
|
|
110
186
|
logger.info(f"Follow progress on dask dashboard at: {client.dashboard_link}")
|
|
111
187
|
futures = client.map(func, iterable, *args, **kwargs)
|
|
112
|
-
|
|
188
|
+
if not os.getenv("TQDM_DISABLE"):
|
|
189
|
+
MyProgressBar(futures)
|
|
113
190
|
results = client.gather(futures)
|
|
114
191
|
return cast("list[R]", results)
|
|
@@ -132,6 +132,18 @@ class ChainNotFoundError(IndexError):
|
|
|
132
132
|
"""Helper for pickling the exception."""
|
|
133
133
|
return (self.__class__, (self.chain_id, self.file, self.available_chains))
|
|
134
134
|
|
|
135
|
+
def __eq__(self, other):
|
|
136
|
+
if not isinstance(other, ChainNotFoundError):
|
|
137
|
+
return NotImplemented
|
|
138
|
+
return (
|
|
139
|
+
self.chain_id == other.chain_id
|
|
140
|
+
and self.file == other.file
|
|
141
|
+
and self.available_chains == other.available_chains
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
def __hash__(self):
|
|
145
|
+
return hash((self.chain_id, str(self.file), frozenset(self.available_chains)))
|
|
146
|
+
|
|
135
147
|
|
|
136
148
|
def write_single_chain_structure_file(
|
|
137
149
|
input_file: Path,
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
interactions:
|
|
2
|
+
- request:
|
|
3
|
+
body: null
|
|
4
|
+
headers:
|
|
5
|
+
Accept:
|
|
6
|
+
- application/sparql-results+json,application/json,text/javascript,application/javascript
|
|
7
|
+
Connection:
|
|
8
|
+
- close
|
|
9
|
+
Host:
|
|
10
|
+
- sparql.uniprot.org
|
|
11
|
+
User-Agent:
|
|
12
|
+
- sparqlwrapper 2.0.0 (rdflib.github.io/sparqlwrapper)
|
|
13
|
+
method: GET
|
|
14
|
+
uri: https://sparql.uniprot.org/sparql?query=%0A++++++++PREFIX+up%3A+%3Chttp%3A//purl.uniprot.org/core/%3E%0A++++++++PREFIX+taxon%3A+%3Chttp%3A//purl.uniprot.org/taxonomy/%3E%0A++++++++PREFIX+rdf%3A+%3Chttp%3A//www.w3.org/1999/02/22-rdf-syntax-ns%23%3E%0A++++++++PREFIX+rdfs%3A+%3Chttp%3A//www.w3.org/2000/01/rdf-schema%23%3E%0A++++++++PREFIX+skos%3A+%3Chttp%3A//www.w3.org/2004/02/skos/core%23%3E%0A++++++++PREFIX+GO%3A%3Chttp%3A//purl.obolibrary.org/obo/GO_%3E%0A%0A++++++++SELECT+%3Fprotein+%3Faf_db%0A++++++++WHERE+%7B%0A%0A++++++++%23+---+Protein+Selection+---%0A++++++++VALUES+%28%3Fac%29+%7B+%28%22P00811%22%29%7D%0A++++++++BIND+%28IRI%28CONCAT%28%22http%3A//purl.uniprot.org/uniprot/%22%2C%3Fac%29%29+AS+%3Fprotein%29%0A++++++++%3Fprotein+a+up%3AProtein+.%0A%0A%0A%23+---+Protein+Selection+---%0A%3Fprotein+a+up%3AProtein+.%0A%0A%23+---+AlphaFoldDB+Info+---%0A%3Fprotein+rdfs%3AseeAlso+%3Faf_db+.%0A%3Faf_db+up%3Adatabase+%3Chttp%3A//purl.uniprot.org/database/AlphaFoldDB%3E+.%0A%0A%0A++++++++%7D%0A%0A++++++++LIMIT+10000%0A&format=json&output=json&results=json
|
|
15
|
+
response:
|
|
16
|
+
body:
|
|
17
|
+
string: "{\n \"head\" : {\n \"vars\" : [\n \"protein\",\n \"af_db\"\n
|
|
18
|
+
\ ]\n },\n \"results\" : {\n \"bindings\" : [\n {\n \"protein\"
|
|
19
|
+
: {\n \"type\" : \"uri\",\n \"value\" : \"http://purl.uniprot.org/uniprot/P00811\"\n
|
|
20
|
+
\ },\n \"af_db\" : {\n \"type\" : \"uri\",\n \"value\"
|
|
21
|
+
: \"http://purl.uniprot.org/alphafolddb/P00811\"\n }\n }\n ]\n
|
|
22
|
+
\ }\n}"
|
|
23
|
+
headers:
|
|
24
|
+
Access-Control-Allow-Headers:
|
|
25
|
+
- origin, x-requested-with, content-type, X-Release, queryid
|
|
26
|
+
Access-Control-Allow-Origin:
|
|
27
|
+
- '*'
|
|
28
|
+
Access-Control-Expose-Headers:
|
|
29
|
+
- X-Total-Results, X-Release, queryid, content-type, user-agent, cache-control,
|
|
30
|
+
etag, range
|
|
31
|
+
Cache-Control:
|
|
32
|
+
- public
|
|
33
|
+
Connection:
|
|
34
|
+
- close
|
|
35
|
+
Content-Disposition:
|
|
36
|
+
- attachment; filename="sparql-CA32A0B92DC5589CE5CD9BF33CF492F9.srj"
|
|
37
|
+
Content-Length:
|
|
38
|
+
- '375'
|
|
39
|
+
Content-Type:
|
|
40
|
+
- application/sparql-results+json
|
|
41
|
+
Date:
|
|
42
|
+
- Mon, 17 Nov 2025 11:45:48 GMT
|
|
43
|
+
ETag:
|
|
44
|
+
- W/"2025_04"
|
|
45
|
+
Expires:
|
|
46
|
+
- Tue, 18 Nov 2025 11:45:48 GMT
|
|
47
|
+
Server:
|
|
48
|
+
- Apache
|
|
49
|
+
Strict-Transport-Security:
|
|
50
|
+
- max-age=31536001; includeSubDomains
|
|
51
|
+
Vary:
|
|
52
|
+
- Negotiate,Accept,Accept-Encoding,Content-Type
|
|
53
|
+
X-Content-Type-Options:
|
|
54
|
+
- nosniff
|
|
55
|
+
X-Frame-Options:
|
|
56
|
+
- SAMEORIGIN
|
|
57
|
+
X-Powered-By:
|
|
58
|
+
- sib.swiss
|
|
59
|
+
X-Release:
|
|
60
|
+
- '2025_04'
|
|
61
|
+
queryid:
|
|
62
|
+
- '770887'
|
|
63
|
+
status:
|
|
64
|
+
code: 200
|
|
65
|
+
message: ''
|
|
66
|
+
version: 1
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from textwrap import dedent
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from protein_quest.cli import main, make_parser
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_make_parser_help(capsys: pytest.CaptureFixture[str]):
|
|
11
|
+
in_args = ["--help"]
|
|
12
|
+
parser = make_parser()
|
|
13
|
+
with pytest.raises(SystemExit):
|
|
14
|
+
parser.parse_args(in_args)
|
|
15
|
+
|
|
16
|
+
captured = capsys.readouterr()
|
|
17
|
+
assert "Protein Quest CLI" in captured.out
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.mark.vcr
|
|
21
|
+
def test_search_uniprot(capsys: pytest.CaptureFixture[str], caplog: pytest.LogCaptureFixture):
|
|
22
|
+
argv = [
|
|
23
|
+
"search",
|
|
24
|
+
"uniprot",
|
|
25
|
+
"--taxon-id",
|
|
26
|
+
"9606",
|
|
27
|
+
"--reviewed",
|
|
28
|
+
"--limit",
|
|
29
|
+
"1",
|
|
30
|
+
"-",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
main(argv)
|
|
34
|
+
|
|
35
|
+
captured = capsys.readouterr()
|
|
36
|
+
expected = "A0A024R1R8\n"
|
|
37
|
+
assert captured.out == expected
|
|
38
|
+
assert "Searching for UniProt accessions" in captured.err
|
|
39
|
+
assert "Found 1 UniProt accessions, written to <stdout>" in captured.err
|
|
40
|
+
assert "There may be more results available" in caplog.text
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.mark.vcr
|
|
44
|
+
def test_search_pdbe(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
45
|
+
input_text = tmp_path / "uniprot_accessions.txt"
|
|
46
|
+
input_text.write_text("P00811\n")
|
|
47
|
+
output_file = tmp_path / "pdbe_results.csv"
|
|
48
|
+
argv = [
|
|
49
|
+
"search",
|
|
50
|
+
"pdbe",
|
|
51
|
+
"--limit",
|
|
52
|
+
"150",
|
|
53
|
+
"--min-residues",
|
|
54
|
+
"360", # P00811 has 377 residues and 5 full PDB entries
|
|
55
|
+
str(input_text),
|
|
56
|
+
str(output_file),
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
main(argv)
|
|
60
|
+
|
|
61
|
+
result = output_file.read_text()
|
|
62
|
+
expected = dedent("""\
|
|
63
|
+
uniprot_accession,pdb_id,method,resolution,uniprot_chains,chain,chain_length
|
|
64
|
+
P00811,9C6P,X-Ray_Crystallography,1.66,A/B=1-377,A,377
|
|
65
|
+
P00811,9C81,X-Ray_Crystallography,1.7,A/B=1-377,A,377
|
|
66
|
+
P00811,9C83,X-Ray_Crystallography,2.9,A/B=1-377,A,377
|
|
67
|
+
P00811,9C84,X-Ray_Crystallography,1.7,A/B=1-377,A,377
|
|
68
|
+
P00811,9DHL,X-Ray_Crystallography,1.88,A/B=1-377,A,377
|
|
69
|
+
""")
|
|
70
|
+
assert result == expected
|
|
71
|
+
|
|
72
|
+
captured = capsys.readouterr()
|
|
73
|
+
assert "Finding PDB entries for 1 uniprot accessions" in captured.err
|
|
74
|
+
assert "Before filtering found 120 PDB entries for 1 uniprot accessions." in captured.err
|
|
75
|
+
assert "After filtering on chain length (360, None) remained 5 PDB entries for 1 uniprot" in captured.err
|
|
76
|
+
assert "Written to " in captured.err
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pytest.mark.vcr
|
|
80
|
+
def test_search_uniprot_details(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
81
|
+
input_text = tmp_path / "uniprot_accessions.txt"
|
|
82
|
+
input_text.write_text("P05067\nA0A0B5AC95\n")
|
|
83
|
+
output_csv = tmp_path / "uniprot_details.csv"
|
|
84
|
+
argv = [
|
|
85
|
+
"search",
|
|
86
|
+
"uniprot-details",
|
|
87
|
+
str(input_text),
|
|
88
|
+
str(output_csv),
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
main(argv)
|
|
92
|
+
|
|
93
|
+
result = output_csv.read_text()
|
|
94
|
+
expected = dedent("""\
|
|
95
|
+
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
96
|
+
A0A0B5AC95,INS1A_CONGE,115,True,Con-Ins G1a,6491,Conus geographus
|
|
97
|
+
P05067,A4_HUMAN,770,True,Amyloid-beta precursor protein,9606,Homo sapiens
|
|
98
|
+
""")
|
|
99
|
+
assert result == expected
|
|
100
|
+
captured = capsys.readouterr()
|
|
101
|
+
assert "Retrieving UniProt entry details for 2 uniprot accessions" in captured.err
|
|
102
|
+
assert "Retrieved details for 2 UniProt entries, written to " in captured.err
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@pytest.mark.vcr
|
|
106
|
+
def test_search_alphafold(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
107
|
+
input_text = tmp_path / "uniprot_accessions.txt"
|
|
108
|
+
input_text.write_text("P00811\n")
|
|
109
|
+
output_file = tmp_path / "af_results.csv"
|
|
110
|
+
|
|
111
|
+
argv = [
|
|
112
|
+
"search",
|
|
113
|
+
"alphafold",
|
|
114
|
+
str(input_text),
|
|
115
|
+
str(output_file),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
main(argv)
|
|
119
|
+
|
|
120
|
+
result = output_file.read_text()
|
|
121
|
+
|
|
122
|
+
expected = dedent("""\
|
|
123
|
+
uniprot_accession,af_id
|
|
124
|
+
P00811,P00811
|
|
125
|
+
""")
|
|
126
|
+
assert result == expected
|
|
127
|
+
|
|
128
|
+
captured = capsys.readouterr()
|
|
129
|
+
assert "Finding AlphaFold entries for 1 uniprot accessions" in captured.err
|
|
130
|
+
assert "Found 1 AlphaFold entries, written to " in captured.err
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_filter_chain_happy_path(sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
134
|
+
chains_fn = tmp_path / "chains.csv"
|
|
135
|
+
chains_fn.write_text("pdb_id,chain\n2Y29,A\n")
|
|
136
|
+
|
|
137
|
+
argv = [
|
|
138
|
+
"filter",
|
|
139
|
+
"chain",
|
|
140
|
+
str(chains_fn),
|
|
141
|
+
str(sample2_cif.parent),
|
|
142
|
+
str(tmp_path),
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
main(argv)
|
|
146
|
+
|
|
147
|
+
output_file = tmp_path / "2Y29_A2A.cif.gz"
|
|
148
|
+
assert output_file.exists()
|
|
149
|
+
|
|
150
|
+
captured = capsys.readouterr()
|
|
151
|
+
assert "Wrote 1 single-chain PDB/mmCIF files to" in captured.err
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def test_filter_chain_input_file_notfound(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
155
|
+
input_dir = tmp_path / "input"
|
|
156
|
+
input_dir.mkdir()
|
|
157
|
+
output_dir = tmp_path / "output"
|
|
158
|
+
output_dir.mkdir()
|
|
159
|
+
chains_fn = tmp_path / "chains.csv"
|
|
160
|
+
chains_fn.write_text("pdb_id,chain\n2Y29,A\n")
|
|
161
|
+
|
|
162
|
+
argv = [
|
|
163
|
+
"filter",
|
|
164
|
+
"chain",
|
|
165
|
+
str(chains_fn),
|
|
166
|
+
str(input_dir),
|
|
167
|
+
str(output_dir),
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
with pytest.raises(SystemExit):
|
|
171
|
+
main(argv)
|
|
172
|
+
|
|
173
|
+
assert not any(output_dir.iterdir())
|
|
174
|
+
|
|
175
|
+
captured = capsys.readouterr()
|
|
176
|
+
assert "No structure file found for 2Y29" in captured.err
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_filter_residue(sample_cif: Path, sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
180
|
+
input_dir = tmp_path / "input"
|
|
181
|
+
input_dir.mkdir()
|
|
182
|
+
local_sample = input_dir / sample_cif.name
|
|
183
|
+
local_sample.symlink_to(sample_cif)
|
|
184
|
+
local_sample2 = input_dir / sample2_cif.name
|
|
185
|
+
local_sample2.symlink_to(sample2_cif)
|
|
186
|
+
output_dir = tmp_path / "output"
|
|
187
|
+
output_dir.mkdir()
|
|
188
|
+
stats_fn = tmp_path / "stats.csv"
|
|
189
|
+
|
|
190
|
+
argv = [
|
|
191
|
+
"filter",
|
|
192
|
+
"residue",
|
|
193
|
+
str(input_dir),
|
|
194
|
+
str(output_dir),
|
|
195
|
+
"--min-residues",
|
|
196
|
+
"100",
|
|
197
|
+
"--max-residues",
|
|
198
|
+
"200",
|
|
199
|
+
"--copy-method",
|
|
200
|
+
"symlink",
|
|
201
|
+
"--write-stats",
|
|
202
|
+
str(stats_fn),
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
main(argv)
|
|
206
|
+
|
|
207
|
+
# Check output files
|
|
208
|
+
output_files = list(output_dir.iterdir())
|
|
209
|
+
assert len(output_files) == 1
|
|
210
|
+
expected_passed_file = output_dir / sample_cif.name
|
|
211
|
+
assert expected_passed_file in output_files
|
|
212
|
+
|
|
213
|
+
# Check stats file
|
|
214
|
+
with stats_fn.open() as f:
|
|
215
|
+
rows = list(csv.DictReader(f))
|
|
216
|
+
# Input files processed in alphabetical order
|
|
217
|
+
expected_stats = [
|
|
218
|
+
{
|
|
219
|
+
"input_file": str(local_sample2),
|
|
220
|
+
"residue_count": "8",
|
|
221
|
+
"passed": "False",
|
|
222
|
+
"output_file": "",
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
"input_file": str(local_sample),
|
|
226
|
+
"residue_count": "173",
|
|
227
|
+
"passed": "True",
|
|
228
|
+
"output_file": str(expected_passed_file),
|
|
229
|
+
},
|
|
230
|
+
]
|
|
231
|
+
assert rows == expected_stats
|
|
232
|
+
|
|
233
|
+
# Check captured output
|
|
234
|
+
captured = capsys.readouterr()
|
|
235
|
+
assert "by number of residues in chain A" in captured.err
|
|
236
|
+
assert "Wrote 1 files to" in captured.err
|
|
237
|
+
assert "Statistics written to" in captured.err
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def test_filter_secondary_structure(
|
|
241
|
+
sample_cif: Path, sample2_cif: Path, tmp_path: Path, capsys: pytest.CaptureFixture[str]
|
|
242
|
+
):
|
|
243
|
+
input_dir = tmp_path / "input"
|
|
244
|
+
input_dir.mkdir()
|
|
245
|
+
local_sample = input_dir / sample_cif.name
|
|
246
|
+
local_sample.symlink_to(sample_cif)
|
|
247
|
+
local_sample2 = input_dir / sample2_cif.name
|
|
248
|
+
local_sample2.symlink_to(sample2_cif)
|
|
249
|
+
output_dir = tmp_path / "output"
|
|
250
|
+
output_dir.mkdir()
|
|
251
|
+
stats_fn = tmp_path / "ss_stats.csv"
|
|
252
|
+
|
|
253
|
+
argv = [
|
|
254
|
+
"filter",
|
|
255
|
+
"secondary-structure",
|
|
256
|
+
str(input_dir),
|
|
257
|
+
str(output_dir),
|
|
258
|
+
"--abs-min-helix-residues",
|
|
259
|
+
"10",
|
|
260
|
+
"--copy-method",
|
|
261
|
+
"symlink",
|
|
262
|
+
"--write-stats",
|
|
263
|
+
str(stats_fn),
|
|
264
|
+
]
|
|
265
|
+
|
|
266
|
+
main(argv)
|
|
267
|
+
|
|
268
|
+
# Check output files
|
|
269
|
+
output_files = list(output_dir.iterdir())
|
|
270
|
+
assert len(output_files) == 1
|
|
271
|
+
expected_passed_file = output_dir / sample_cif.name
|
|
272
|
+
assert expected_passed_file in output_files
|
|
273
|
+
|
|
274
|
+
# Check stats file
|
|
275
|
+
with stats_fn.open() as f:
|
|
276
|
+
rows = list(csv.DictReader(f))
|
|
277
|
+
expected_stats = [
|
|
278
|
+
{
|
|
279
|
+
"helix_ratio": "0.0",
|
|
280
|
+
"input_file": str(local_sample2),
|
|
281
|
+
"nr_helix_residues": "0",
|
|
282
|
+
"nr_residues": "8",
|
|
283
|
+
"nr_sheet_residues": "0",
|
|
284
|
+
"output_file": "",
|
|
285
|
+
"passed": "False",
|
|
286
|
+
"sheet_ratio": "0.0",
|
|
287
|
+
},
|
|
288
|
+
{
|
|
289
|
+
"input_file": str(local_sample),
|
|
290
|
+
"nr_residues": "173",
|
|
291
|
+
"nr_helix_residues": "58",
|
|
292
|
+
"nr_sheet_residues": "59",
|
|
293
|
+
"helix_ratio": f"{58 / 173:.3f}",
|
|
294
|
+
"sheet_ratio": f"{59 / 173:.3f}",
|
|
295
|
+
"passed": "True",
|
|
296
|
+
"output_file": str(expected_passed_file),
|
|
297
|
+
},
|
|
298
|
+
]
|
|
299
|
+
assert rows == expected_stats
|
|
300
|
+
|
|
301
|
+
# Check captured output
|
|
302
|
+
captured = capsys.readouterr()
|
|
303
|
+
assert "by secondary structure" in captured.err
|
|
304
|
+
assert "Wrote 1 files to" in captured.err
|
|
305
|
+
assert "Statistics written to" in captured.err
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from protein_quest.filters import (
|
|
6
|
+
ChainFilterStatistics,
|
|
7
|
+
ResidueFilterStatistics,
|
|
8
|
+
filter_files_on_chain,
|
|
9
|
+
filter_files_on_residues,
|
|
10
|
+
)
|
|
11
|
+
from protein_quest.structure import ChainNotFoundError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@pytest.mark.parametrize(
|
|
15
|
+
"scheduler_address,expected_progress_bar",
|
|
16
|
+
[
|
|
17
|
+
(None, "Completed"), # creates a local cluster
|
|
18
|
+
("sequential", "file/s"),
|
|
19
|
+
],
|
|
20
|
+
)
|
|
21
|
+
def test_filter_files_on_chain_local_cluster(
|
|
22
|
+
sample2_cif: Path,
|
|
23
|
+
tmp_path: Path,
|
|
24
|
+
capsys: pytest.CaptureFixture[str],
|
|
25
|
+
scheduler_address: str | None,
|
|
26
|
+
expected_progress_bar: str,
|
|
27
|
+
):
|
|
28
|
+
file2chains = [
|
|
29
|
+
(sample2_cif, "A"), # should pass
|
|
30
|
+
(sample2_cif, "B"), # should be discarded
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
results = filter_files_on_chain(file2chains, tmp_path, scheduler_address=scheduler_address)
|
|
34
|
+
|
|
35
|
+
expected_passed = ChainFilterStatistics(
|
|
36
|
+
input_file=sample2_cif,
|
|
37
|
+
chain_id="A",
|
|
38
|
+
passed=True,
|
|
39
|
+
output_file=tmp_path / "2Y29_A2A.cif.gz",
|
|
40
|
+
)
|
|
41
|
+
assert expected_passed.output_file and expected_passed.output_file.exists()
|
|
42
|
+
expected_discarded = ChainFilterStatistics(
|
|
43
|
+
input_file=sample2_cif,
|
|
44
|
+
chain_id="B",
|
|
45
|
+
passed=False,
|
|
46
|
+
output_file=None,
|
|
47
|
+
discard_reason=ChainNotFoundError("B", sample2_cif, {"A"}),
|
|
48
|
+
)
|
|
49
|
+
assert results == [expected_passed, expected_discarded]
|
|
50
|
+
|
|
51
|
+
_, stderr = capsys.readouterr()
|
|
52
|
+
assert expected_progress_bar in stderr
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_filter_files_on_residues(sample_cif: Path, sample2_cif: Path, tmp_path: Path):
|
|
56
|
+
results = list(
|
|
57
|
+
filter_files_on_residues(
|
|
58
|
+
input_files=[sample_cif, sample2_cif],
|
|
59
|
+
output_dir=tmp_path,
|
|
60
|
+
min_residues=100,
|
|
61
|
+
max_residues=200,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
expected_passed = ResidueFilterStatistics(
|
|
65
|
+
input_file=sample_cif,
|
|
66
|
+
residue_count=173,
|
|
67
|
+
passed=True,
|
|
68
|
+
output_file=tmp_path / sample_cif.name,
|
|
69
|
+
)
|
|
70
|
+
assert expected_passed.output_file and expected_passed.output_file.exists()
|
|
71
|
+
expected_discarded = ResidueFilterStatistics(
|
|
72
|
+
input_file=sample2_cif,
|
|
73
|
+
residue_count=8,
|
|
74
|
+
passed=False,
|
|
75
|
+
output_file=None,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
assert results == [expected_passed, expected_discarded]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from distributed import Client
|
|
3
|
+
|
|
4
|
+
from protein_quest.parallel import MyProgressBar, dask_map_with_progress
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_MyProgressBar_interval_env(monkeypatch):
|
|
8
|
+
monkeypatch.setenv("TQDM_MININTERVAL", "1234")
|
|
9
|
+
|
|
10
|
+
with Client():
|
|
11
|
+
progress_bar = MyProgressBar([])
|
|
12
|
+
assert progress_bar.interval == 1234
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def run_dask_map_with_progress():
|
|
16
|
+
def square(x: int) -> int:
|
|
17
|
+
return x**2
|
|
18
|
+
|
|
19
|
+
with Client() as client:
|
|
20
|
+
result = dask_map_with_progress(
|
|
21
|
+
client,
|
|
22
|
+
square,
|
|
23
|
+
range(5),
|
|
24
|
+
)
|
|
25
|
+
assert result == [0, 1, 4, 9, 16]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_dask_map_with_progress(capsys: pytest.CaptureFixture, caplog: pytest.LogCaptureFixture):
|
|
29
|
+
caplog.set_level("INFO")
|
|
30
|
+
|
|
31
|
+
run_dask_map_with_progress()
|
|
32
|
+
|
|
33
|
+
captured = capsys.readouterr()
|
|
34
|
+
assert "Completed" in captured.err
|
|
35
|
+
|
|
36
|
+
assert "Follow progress on dask dashboard at" in caplog.text
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_dask_map_with_progress_disabled(monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture):
|
|
40
|
+
monkeypatch.setenv("TQDM_DISABLE", "1")
|
|
41
|
+
|
|
42
|
+
run_dask_map_with_progress()
|
|
43
|
+
|
|
44
|
+
captured = capsys.readouterr()
|
|
45
|
+
assert "Completed" not in captured.err
|
|
@@ -1,101 +0,0 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
from textwrap import dedent
|
|
3
|
-
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
from protein_quest.cli import main, make_parser
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def test_make_parser_help(capsys: pytest.CaptureFixture[str]):
|
|
10
|
-
in_args = ["--help"]
|
|
11
|
-
parser = make_parser()
|
|
12
|
-
with pytest.raises(SystemExit):
|
|
13
|
-
parser.parse_args(in_args)
|
|
14
|
-
|
|
15
|
-
captured = capsys.readouterr()
|
|
16
|
-
assert "Protein Quest CLI" in captured.out
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@pytest.mark.vcr
|
|
20
|
-
def test_search_uniprot(capsys: pytest.CaptureFixture[str], caplog: pytest.LogCaptureFixture):
|
|
21
|
-
argv = [
|
|
22
|
-
"search",
|
|
23
|
-
"uniprot",
|
|
24
|
-
"--taxon-id",
|
|
25
|
-
"9606",
|
|
26
|
-
"--reviewed",
|
|
27
|
-
"--limit",
|
|
28
|
-
"1",
|
|
29
|
-
"-",
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
main(argv)
|
|
33
|
-
|
|
34
|
-
captured = capsys.readouterr()
|
|
35
|
-
expected = "A0A024R1R8\n"
|
|
36
|
-
assert captured.out == expected
|
|
37
|
-
assert "Searching for UniProt accessions" in captured.err
|
|
38
|
-
assert "Found 1 UniProt accessions, written to <stdout>" in captured.err
|
|
39
|
-
assert "There may be more results available" in caplog.text
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
@pytest.mark.vcr
|
|
43
|
-
def test_search_pdbe(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
44
|
-
input_text = tmp_path / "uniprot_accessions.txt"
|
|
45
|
-
input_text.write_text("P00811\n")
|
|
46
|
-
output_file = tmp_path / "pdbe_results.csv"
|
|
47
|
-
argv = [
|
|
48
|
-
"search",
|
|
49
|
-
"pdbe",
|
|
50
|
-
"--limit",
|
|
51
|
-
"150",
|
|
52
|
-
"--min-residues",
|
|
53
|
-
"360", # P00811 has 377 residues and 5 full PDB entries
|
|
54
|
-
str(input_text),
|
|
55
|
-
str(output_file),
|
|
56
|
-
]
|
|
57
|
-
|
|
58
|
-
main(argv)
|
|
59
|
-
|
|
60
|
-
result = output_file.read_text()
|
|
61
|
-
expected = dedent("""\
|
|
62
|
-
uniprot_accession,pdb_id,method,resolution,uniprot_chains,chain,chain_length
|
|
63
|
-
P00811,9C6P,X-Ray_Crystallography,1.66,A/B=1-377,A,377
|
|
64
|
-
P00811,9C81,X-Ray_Crystallography,1.7,A/B=1-377,A,377
|
|
65
|
-
P00811,9C83,X-Ray_Crystallography,2.9,A/B=1-377,A,377
|
|
66
|
-
P00811,9C84,X-Ray_Crystallography,1.7,A/B=1-377,A,377
|
|
67
|
-
P00811,9DHL,X-Ray_Crystallography,1.88,A/B=1-377,A,377
|
|
68
|
-
""")
|
|
69
|
-
assert result == expected
|
|
70
|
-
|
|
71
|
-
captured = capsys.readouterr()
|
|
72
|
-
assert "Finding PDB entries for 1 uniprot accessions" in captured.err
|
|
73
|
-
assert "Before filtering found 120 PDB entries for 1 uniprot accessions." in captured.err
|
|
74
|
-
assert "After filtering on chain length (360, None) remained 5 PDB entries for 1 uniprot" in captured.err
|
|
75
|
-
assert "Written to " in captured.err
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
@pytest.mark.vcr
|
|
79
|
-
def test_search_uniprot_details(tmp_path: Path, capsys: pytest.CaptureFixture[str]):
|
|
80
|
-
input_text = tmp_path / "uniprot_accessions.txt"
|
|
81
|
-
input_text.write_text("P05067\nA0A0B5AC95\n")
|
|
82
|
-
output_csv = tmp_path / "uniprot_details.csv"
|
|
83
|
-
argv = [
|
|
84
|
-
"search",
|
|
85
|
-
"uniprot-details",
|
|
86
|
-
str(input_text),
|
|
87
|
-
str(output_csv),
|
|
88
|
-
]
|
|
89
|
-
|
|
90
|
-
main(argv)
|
|
91
|
-
|
|
92
|
-
result = output_csv.read_text()
|
|
93
|
-
expected = dedent("""\
|
|
94
|
-
uniprot_accession,uniprot_id,sequence_length,reviewed,protein_name,taxon_id,taxon_name
|
|
95
|
-
A0A0B5AC95,INS1A_CONGE,115,True,Con-Ins G1a,6491,Conus geographus
|
|
96
|
-
P05067,A4_HUMAN,770,True,Amyloid-beta precursor protein,9606,Homo sapiens
|
|
97
|
-
""")
|
|
98
|
-
assert result == expected
|
|
99
|
-
captured = capsys.readouterr()
|
|
100
|
-
assert "Retrieving UniProt entry details for 2 uniprot accessions" in captured.err
|
|
101
|
-
assert "Retrieved details for 2 UniProt entries, written to " in captured.err
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_cli/test_search_uniprot.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_taxonomy/test_search_taxon.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4af.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4emdb.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4pdb.yaml
RENAMED
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/cassettes/test_uniprot/test_search4uniprot.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{protein_quest-0.10.0 → protein_quest-1.0.0}/tests/pdbe/cassettes/test_fetch/test_fetch.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|