protein-quest 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of protein-quest might be problematic. Click here for more details.
- protein_quest/__version__.py +1 -1
- protein_quest/alphafold/confidence.py +2 -2
- protein_quest/alphafold/entry_summary.py +46 -22
- protein_quest/alphafold/fetch.py +76 -42
- protein_quest/cli.py +385 -114
- protein_quest/filters.py +2 -5
- protein_quest/io.py +350 -0
- protein_quest/mcp_server.py +21 -7
- protein_quest/ss.py +3 -7
- protein_quest/{pdbe/io.py → structure.py} +77 -126
- protein_quest/uniprot.py +287 -15
- protein_quest/utils.py +26 -2
- {protein_quest-0.5.1.dist-info → protein_quest-0.7.0.dist-info}/METADATA +42 -5
- protein_quest-0.7.0.dist-info/RECORD +27 -0
- protein_quest-0.5.1.dist-info/RECORD +0 -26
- {protein_quest-0.5.1.dist-info → protein_quest-0.7.0.dist-info}/WHEEL +0 -0
- {protein_quest-0.5.1.dist-info → protein_quest-0.7.0.dist-info}/entry_points.txt +0 -0
- {protein_quest-0.5.1.dist-info → protein_quest-0.7.0.dist-info}/licenses/LICENSE +0 -0
protein_quest/cli.py
CHANGED
|
@@ -6,14 +6,15 @@ import csv
|
|
|
6
6
|
import logging
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
|
-
from collections.abc import Callable, Generator, Iterable
|
|
9
|
+
from collections.abc import Callable, Generator, Iterable, Sequence
|
|
10
|
+
from contextlib import suppress
|
|
10
11
|
from importlib.util import find_spec
|
|
11
|
-
from io import TextIOWrapper
|
|
12
|
+
from io import BytesIO, TextIOWrapper
|
|
12
13
|
from pathlib import Path
|
|
13
14
|
from textwrap import dedent
|
|
14
15
|
|
|
15
16
|
from cattrs import structure
|
|
16
|
-
from rich import
|
|
17
|
+
from rich.console import Console
|
|
17
18
|
from rich.logging import RichHandler
|
|
18
19
|
from rich.markdown import Markdown
|
|
19
20
|
from rich.panel import Panel
|
|
@@ -24,18 +25,28 @@ from protein_quest.__version__ import __version__
|
|
|
24
25
|
from protein_quest.alphafold.confidence import ConfidenceFilterQuery, filter_files_on_confidence
|
|
25
26
|
from protein_quest.alphafold.fetch import DownloadableFormat, downloadable_formats
|
|
26
27
|
from protein_quest.alphafold.fetch import fetch_many as af_fetch
|
|
27
|
-
from protein_quest.converter import converter
|
|
28
|
+
from protein_quest.converter import PositiveInt, converter
|
|
28
29
|
from protein_quest.emdb import fetch as emdb_fetch
|
|
29
30
|
from protein_quest.filters import filter_files_on_chain, filter_files_on_residues
|
|
30
31
|
from protein_quest.go import Aspect, allowed_aspects, search_gene_ontology_term, write_go_terms_to_csv
|
|
32
|
+
from protein_quest.io import (
|
|
33
|
+
convert_to_cif_files,
|
|
34
|
+
glob_structure_files,
|
|
35
|
+
locate_structure_file,
|
|
36
|
+
read_structure,
|
|
37
|
+
valid_structure_file_extensions,
|
|
38
|
+
)
|
|
31
39
|
from protein_quest.pdbe import fetch as pdbe_fetch
|
|
32
|
-
from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
|
|
33
40
|
from protein_quest.ss import SecondaryStructureFilterQuery, filter_files_on_secondary_structure
|
|
41
|
+
from protein_quest.structure import structure2uniprot_accessions
|
|
34
42
|
from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
|
|
35
43
|
from protein_quest.uniprot import (
|
|
36
44
|
ComplexPortalEntry,
|
|
37
|
-
|
|
45
|
+
PdbResults,
|
|
38
46
|
Query,
|
|
47
|
+
UniprotDetails,
|
|
48
|
+
filter_pdb_results_on_chain_length,
|
|
49
|
+
map_uniprot_accessions2uniprot_details,
|
|
39
50
|
search4af,
|
|
40
51
|
search4emdb,
|
|
41
52
|
search4interaction_partners,
|
|
@@ -53,6 +64,8 @@ from protein_quest.utils import (
|
|
|
53
64
|
user_cache_root_dir,
|
|
54
65
|
)
|
|
55
66
|
|
|
67
|
+
console = Console(stderr=True)
|
|
68
|
+
rprint = console.print
|
|
56
69
|
logger = logging.getLogger(__name__)
|
|
57
70
|
|
|
58
71
|
|
|
@@ -93,6 +106,8 @@ def _add_search_uniprot_parser(subparsers: argparse._SubParsersAction):
|
|
|
93
106
|
action="append",
|
|
94
107
|
help="GO term(s) for molecular function (e.g. GO:0003677). Can be given multiple times.",
|
|
95
108
|
)
|
|
109
|
+
parser.add_argument("--min-sequence-length", type=int, help="Minimum length of the canonical sequence.")
|
|
110
|
+
parser.add_argument("--max-sequence-length", type=int, help="Maximum length of the canonical sequence.")
|
|
96
111
|
parser.add_argument("--limit", type=int, default=10_000, help="Maximum number of uniprot accessions to return")
|
|
97
112
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
98
113
|
|
|
@@ -106,7 +121,7 @@ def _add_search_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
106
121
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
107
122
|
)
|
|
108
123
|
parser.add_argument(
|
|
109
|
-
"
|
|
124
|
+
"uniprot_accessions",
|
|
110
125
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
111
126
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
112
127
|
)
|
|
@@ -114,15 +129,27 @@ def _add_search_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
|
114
129
|
"output_csv",
|
|
115
130
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
116
131
|
help=dedent("""\
|
|
117
|
-
Output CSV with
|
|
132
|
+
Output CSV with following columns:
|
|
133
|
+
`uniprot_accession`, `pdb_id`, `method`, `resolution`, `uniprot_chains`, `chain`, `chain_length`.
|
|
118
134
|
Where `uniprot_chains` is the raw UniProt chain string, for example `A=1-100`.
|
|
119
|
-
and where `chain` is the first chain from `uniprot_chains`, for example `A
|
|
135
|
+
and where `chain` is the first chain from `uniprot_chains`, for example `A`
|
|
136
|
+
and `chain_length` is the length of the chain, for example `100`.
|
|
120
137
|
Use `-` for stdout.
|
|
121
138
|
"""),
|
|
122
139
|
)
|
|
123
140
|
parser.add_argument(
|
|
124
141
|
"--limit", type=int, default=10_000, help="Maximum number of PDB uniprot accessions combinations to return"
|
|
125
142
|
)
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"--min-residues",
|
|
145
|
+
type=int,
|
|
146
|
+
help="Minimum number of residues required in the chain mapped to the UniProt accession.",
|
|
147
|
+
)
|
|
148
|
+
parser.add_argument(
|
|
149
|
+
"--max-residues",
|
|
150
|
+
type=int,
|
|
151
|
+
help="Maximum number of residues allowed in chain mapped to the UniProt accession.",
|
|
152
|
+
)
|
|
126
153
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
127
154
|
|
|
128
155
|
|
|
@@ -135,7 +162,7 @@ def _add_search_alphafold_parser(subparsers: argparse._SubParsersAction):
|
|
|
135
162
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
136
163
|
)
|
|
137
164
|
parser.add_argument(
|
|
138
|
-
"
|
|
165
|
+
"uniprot_accessions",
|
|
139
166
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
140
167
|
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
141
168
|
)
|
|
@@ -144,6 +171,8 @@ def _add_search_alphafold_parser(subparsers: argparse._SubParsersAction):
|
|
|
144
171
|
type=argparse.FileType("w", encoding="UTF-8"),
|
|
145
172
|
help="Output CSV with AlphaFold IDs per UniProt accession. Use `-` for stdout.",
|
|
146
173
|
)
|
|
174
|
+
parser.add_argument("--min-sequence-length", type=int, help="Minimum length of the canonical sequence.")
|
|
175
|
+
parser.add_argument("--max-sequence-length", type=int, help="Maximum length of the canonical sequence.")
|
|
147
176
|
parser.add_argument(
|
|
148
177
|
"--limit", type=int, default=10_000, help="Maximum number of Alphafold entry identifiers to return"
|
|
149
178
|
)
|
|
@@ -242,7 +271,7 @@ def _add_search_interaction_partners_parser(subparsers: argparse._SubParsersActi
|
|
|
242
271
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
243
272
|
)
|
|
244
273
|
parser.add_argument(
|
|
245
|
-
"
|
|
274
|
+
"uniprot_accession",
|
|
246
275
|
type=str,
|
|
247
276
|
help="UniProt accession (for example P12345).",
|
|
248
277
|
)
|
|
@@ -284,7 +313,7 @@ def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
|
|
|
284
313
|
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
285
314
|
)
|
|
286
315
|
parser.add_argument(
|
|
287
|
-
"
|
|
316
|
+
"uniprot_accessions",
|
|
288
317
|
type=argparse.FileType("r", encoding="UTF-8"),
|
|
289
318
|
help="Text file with UniProt accessions (one per line) as query for searching complexes. Use `-` for stdin.",
|
|
290
319
|
)
|
|
@@ -297,6 +326,76 @@ def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
|
|
|
297
326
|
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
298
327
|
|
|
299
328
|
|
|
329
|
+
def _add_search_uniprot_details_parser(subparsers: argparse._SubParsersAction):
|
|
330
|
+
"""Add search uniprot details subcommand parser."""
|
|
331
|
+
description = dedent("""\
|
|
332
|
+
Retrieve UniProt details for given UniProt accessions
|
|
333
|
+
from the Uniprot SPARQL endpoint.
|
|
334
|
+
|
|
335
|
+
The output CSV file has the following columns:
|
|
336
|
+
|
|
337
|
+
- uniprot_accession: UniProt accession.
|
|
338
|
+
- uniprot_id: UniProt ID (mnemonic).
|
|
339
|
+
- sequence_length: Length of the canonical sequence.
|
|
340
|
+
- reviewed: Whether the entry is reviewed (Swiss-Prot) or unreviewed (TrEMBL).
|
|
341
|
+
- protein_name: Recommended protein name.
|
|
342
|
+
- taxon_id: NCBI Taxonomy ID of the organism.
|
|
343
|
+
- taxon_name: Scientific name of the organism.
|
|
344
|
+
|
|
345
|
+
The order of the output CSV can be different from the input order.
|
|
346
|
+
""")
|
|
347
|
+
parser = subparsers.add_parser(
|
|
348
|
+
"uniprot-details",
|
|
349
|
+
help="Retrieve UniProt details for given UniProt accessions",
|
|
350
|
+
description=Markdown(description, style="argparse.text"), # type: ignore using rich formatter makes this OK
|
|
351
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
352
|
+
)
|
|
353
|
+
parser.add_argument(
|
|
354
|
+
"uniprot_accessions",
|
|
355
|
+
type=argparse.FileType("r", encoding="UTF-8"),
|
|
356
|
+
help="Text file with UniProt accessions (one per line). Use `-` for stdin.",
|
|
357
|
+
)
|
|
358
|
+
parser.add_argument(
|
|
359
|
+
"output_csv",
|
|
360
|
+
type=argparse.FileType("w", encoding="UTF-8"),
|
|
361
|
+
help="Output CSV with UniProt details. Use `-` for stdout.",
|
|
362
|
+
)
|
|
363
|
+
parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
|
|
364
|
+
parser.add_argument("--batch-size", type=int, default=1_000, help="Number of accessions to query per batch")
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _add_copy_method_arguments(parser):
|
|
368
|
+
parser.add_argument(
|
|
369
|
+
"--copy-method",
|
|
370
|
+
type=str,
|
|
371
|
+
choices=copy_methods,
|
|
372
|
+
default="hardlink",
|
|
373
|
+
help=dedent("""\
|
|
374
|
+
How to make target file be same file as source file.
|
|
375
|
+
By default uses hardlinks to save disk space.
|
|
376
|
+
Note that hardlinks only work within the same filesystem and are harder to track.
|
|
377
|
+
If you want to track cached files easily then use 'symlink'.
|
|
378
|
+
On Windows you need developer mode or admin privileges to create symlinks.
|
|
379
|
+
"""),
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _add_cacher_arguments(parser: argparse.ArgumentParser):
|
|
384
|
+
"""Add cacher arguments to parser."""
|
|
385
|
+
parser.add_argument(
|
|
386
|
+
"--no-cache",
|
|
387
|
+
action="store_true",
|
|
388
|
+
help="Disable caching of files to central location.",
|
|
389
|
+
)
|
|
390
|
+
parser.add_argument(
|
|
391
|
+
"--cache-dir",
|
|
392
|
+
type=Path,
|
|
393
|
+
default=user_cache_root_dir(),
|
|
394
|
+
help="Directory to use as cache for files.",
|
|
395
|
+
)
|
|
396
|
+
_add_copy_method_arguments(parser)
|
|
397
|
+
|
|
398
|
+
|
|
300
399
|
def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
|
|
301
400
|
"""Add retrieve pdbe subcommand parser."""
|
|
302
401
|
parser = subparsers.add_parser(
|
|
@@ -345,6 +444,19 @@ def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
|
|
|
345
444
|
help=dedent("""AlphaFold formats to retrieve. Can be specified multiple times.
|
|
346
445
|
Default is 'summary' and 'cif'."""),
|
|
347
446
|
)
|
|
447
|
+
parser.add_argument(
|
|
448
|
+
"--gzip-files",
|
|
449
|
+
action="store_true",
|
|
450
|
+
help="Whether to gzip the downloaded files. Excludes summary files, they are always uncompressed.",
|
|
451
|
+
)
|
|
452
|
+
parser.add_argument(
|
|
453
|
+
"--all-isoforms",
|
|
454
|
+
action="store_true",
|
|
455
|
+
help=(
|
|
456
|
+
"Whether to return all isoforms of each uniprot entry. "
|
|
457
|
+
"If not given then only the Alphafold entry for the canonical sequence is returned."
|
|
458
|
+
),
|
|
459
|
+
)
|
|
348
460
|
parser.add_argument(
|
|
349
461
|
"--max-parallel-downloads",
|
|
350
462
|
type=int,
|
|
@@ -533,6 +645,7 @@ def _add_search_subcommands(subparsers: argparse._SubParsersAction):
|
|
|
533
645
|
_add_search_taxonomy_parser(subsubparsers)
|
|
534
646
|
_add_search_interaction_partners_parser(subsubparsers)
|
|
535
647
|
_add_search_complexes_parser(subsubparsers)
|
|
648
|
+
_add_search_uniprot_details_parser(subsubparsers)
|
|
536
649
|
|
|
537
650
|
|
|
538
651
|
def _add_retrieve_subcommands(subparsers: argparse._SubParsersAction):
|
|
@@ -561,6 +674,75 @@ def _add_filter_subcommands(subparsers: argparse._SubParsersAction):
|
|
|
561
674
|
_add_filter_ss_parser(subsubparsers)
|
|
562
675
|
|
|
563
676
|
|
|
677
|
+
def _add_convert_uniprot_parser(subparsers: argparse._SubParsersAction):
|
|
678
|
+
"""Add convert uniprot subcommand parser."""
|
|
679
|
+
parser = subparsers.add_parser(
|
|
680
|
+
"uniprot",
|
|
681
|
+
help="Convert structure files to list of UniProt accessions.",
|
|
682
|
+
description="Convert structure files to list of UniProt accessions. "
|
|
683
|
+
"Uniprot accessions are read from database reference of each structure.",
|
|
684
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
685
|
+
)
|
|
686
|
+
parser.add_argument(
|
|
687
|
+
"input_dir",
|
|
688
|
+
type=Path,
|
|
689
|
+
help=f"Directory with structure files. Supported extensions are {valid_structure_file_extensions}",
|
|
690
|
+
)
|
|
691
|
+
parser.add_argument(
|
|
692
|
+
"output",
|
|
693
|
+
type=argparse.FileType("wt", encoding="UTF-8"),
|
|
694
|
+
help="Output text file with UniProt accessions (one per line). Use '-' for stdout.",
|
|
695
|
+
)
|
|
696
|
+
parser.add_argument(
|
|
697
|
+
"--grouped",
|
|
698
|
+
action="store_true",
|
|
699
|
+
help="Whether to group accessions by structure file. "
|
|
700
|
+
"If set output changes to `<structure_file1>,<acc1>\\n<structure_file1>,<acc2>` format.",
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def _add_convert_structures_parser(subparsers: argparse._SubParsersAction):
|
|
705
|
+
"""Add convert structures subcommand parser."""
|
|
706
|
+
parser = subparsers.add_parser(
|
|
707
|
+
"structures",
|
|
708
|
+
help="Convert structure files between formats",
|
|
709
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
710
|
+
)
|
|
711
|
+
parser.add_argument(
|
|
712
|
+
"input_dir",
|
|
713
|
+
type=Path,
|
|
714
|
+
help=f"Directory with structure files. Supported extensions are {valid_structure_file_extensions}",
|
|
715
|
+
)
|
|
716
|
+
parser.add_argument(
|
|
717
|
+
"--output-dir",
|
|
718
|
+
type=Path,
|
|
719
|
+
help=dedent("""\
|
|
720
|
+
Directory to write converted structure files. If not given, files are written to `input_dir`.
|
|
721
|
+
"""),
|
|
722
|
+
)
|
|
723
|
+
parser.add_argument(
|
|
724
|
+
"--format",
|
|
725
|
+
type=str,
|
|
726
|
+
choices=("cif",),
|
|
727
|
+
default="cif",
|
|
728
|
+
help="Output format to convert to.",
|
|
729
|
+
)
|
|
730
|
+
_add_copy_method_arguments(parser)
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
def _add_convert_subcommands(subparsers: argparse._SubParsersAction):
|
|
734
|
+
"""Add convert command and its subcommands."""
|
|
735
|
+
parser = subparsers.add_parser(
|
|
736
|
+
"convert",
|
|
737
|
+
help="Convert files between formats",
|
|
738
|
+
formatter_class=ArgumentDefaultsRichHelpFormatter,
|
|
739
|
+
)
|
|
740
|
+
subsubparsers = parser.add_subparsers(dest="convert_cmd", required=True)
|
|
741
|
+
|
|
742
|
+
_add_convert_structures_parser(subsubparsers)
|
|
743
|
+
_add_convert_uniprot_parser(subsubparsers)
|
|
744
|
+
|
|
745
|
+
|
|
564
746
|
def _add_mcp_command(subparsers: argparse._SubParsersAction):
|
|
565
747
|
"""Add MCP command."""
|
|
566
748
|
|
|
@@ -580,38 +762,6 @@ def _add_mcp_command(subparsers: argparse._SubParsersAction):
|
|
|
580
762
|
parser.add_argument("--port", default=8000, type=int, help="Port to bind the server to")
|
|
581
763
|
|
|
582
764
|
|
|
583
|
-
def _add_copy_method_arguments(parser):
|
|
584
|
-
parser.add_argument(
|
|
585
|
-
"--copy-method",
|
|
586
|
-
type=str,
|
|
587
|
-
choices=copy_methods,
|
|
588
|
-
default="hardlink",
|
|
589
|
-
help=dedent("""\
|
|
590
|
-
How to make target file be same file as source file.
|
|
591
|
-
By default uses hardlinks to save disk space.
|
|
592
|
-
Note that hardlinks only work within the same filesystem and are harder to track.
|
|
593
|
-
If you want to track cached files easily then use 'symlink'.
|
|
594
|
-
On Windows you need developer mode or admin privileges to create symlinks.
|
|
595
|
-
"""),
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
def _add_cacher_arguments(parser: argparse.ArgumentParser):
|
|
600
|
-
"""Add cacher arguments to parser."""
|
|
601
|
-
parser.add_argument(
|
|
602
|
-
"--no-cache",
|
|
603
|
-
action="store_true",
|
|
604
|
-
help="Disable caching of files to central location.",
|
|
605
|
-
)
|
|
606
|
-
parser.add_argument(
|
|
607
|
-
"--cache-dir",
|
|
608
|
-
type=Path,
|
|
609
|
-
default=user_cache_root_dir(),
|
|
610
|
-
help="Directory to use as cache for files.",
|
|
611
|
-
)
|
|
612
|
-
_add_copy_method_arguments(parser)
|
|
613
|
-
|
|
614
|
-
|
|
615
765
|
def make_parser() -> argparse.ArgumentParser:
|
|
616
766
|
parser = argparse.ArgumentParser(
|
|
617
767
|
description="Protein Quest CLI", prog="protein-quest", formatter_class=ArgumentDefaultsRichHelpFormatter
|
|
@@ -624,25 +774,18 @@ def make_parser() -> argparse.ArgumentParser:
|
|
|
624
774
|
_add_search_subcommands(subparsers)
|
|
625
775
|
_add_retrieve_subcommands(subparsers)
|
|
626
776
|
_add_filter_subcommands(subparsers)
|
|
777
|
+
_add_convert_subcommands(subparsers)
|
|
627
778
|
_add_mcp_command(subparsers)
|
|
628
779
|
|
|
629
780
|
return parser
|
|
630
781
|
|
|
631
782
|
|
|
632
|
-
def
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
# Dispatch table to reduce complexity
|
|
639
|
-
cmd = args.command
|
|
640
|
-
sub = getattr(args, f"{cmd}_cmd", None)
|
|
641
|
-
handler = HANDLERS.get((cmd, sub))
|
|
642
|
-
if handler is None:
|
|
643
|
-
msg = f"Unknown command: {cmd} {sub}"
|
|
644
|
-
raise SystemExit(msg)
|
|
645
|
-
handler(args)
|
|
783
|
+
def _name_of(file: TextIOWrapper | BytesIO) -> str:
|
|
784
|
+
try:
|
|
785
|
+
return file.name
|
|
786
|
+
except AttributeError:
|
|
787
|
+
# In pytest BytesIO is used stdout which has no 'name' attribute
|
|
788
|
+
return "<stdout>"
|
|
646
789
|
|
|
647
790
|
|
|
648
791
|
def _handle_search_uniprot(args):
|
|
@@ -651,6 +794,8 @@ def _handle_search_uniprot(args):
|
|
|
651
794
|
subcellular_location_uniprot = args.subcellular_location_uniprot
|
|
652
795
|
subcellular_location_go = args.subcellular_location_go
|
|
653
796
|
molecular_function_go = args.molecular_function_go
|
|
797
|
+
min_sequence_length = args.min_sequence_length
|
|
798
|
+
max_sequence_length = args.max_sequence_length
|
|
654
799
|
limit = args.limit
|
|
655
800
|
timeout = args.timeout
|
|
656
801
|
output_file = args.output
|
|
@@ -662,54 +807,78 @@ def _handle_search_uniprot(args):
|
|
|
662
807
|
"subcellular_location_uniprot": subcellular_location_uniprot,
|
|
663
808
|
"subcellular_location_go": subcellular_location_go,
|
|
664
809
|
"molecular_function_go": molecular_function_go,
|
|
810
|
+
"min_sequence_length": min_sequence_length,
|
|
811
|
+
"max_sequence_length": max_sequence_length,
|
|
665
812
|
},
|
|
666
813
|
Query,
|
|
667
814
|
)
|
|
668
815
|
rprint("Searching for UniProt accessions")
|
|
669
816
|
accs = search4uniprot(query=query, limit=limit, timeout=timeout)
|
|
670
|
-
rprint(f"Found {len(accs)} UniProt accessions, written to {output_file
|
|
817
|
+
rprint(f"Found {len(accs)} UniProt accessions, written to {_name_of(output_file)}")
|
|
671
818
|
_write_lines(output_file, sorted(accs))
|
|
672
819
|
|
|
673
820
|
|
|
674
821
|
def _handle_search_pdbe(args):
|
|
675
|
-
|
|
822
|
+
uniprot_accessions = args.uniprot_accessions
|
|
676
823
|
limit = args.limit
|
|
677
824
|
timeout = args.timeout
|
|
678
825
|
output_csv = args.output_csv
|
|
826
|
+
min_residues = converter.structure(args.min_residues, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
827
|
+
max_residues = converter.structure(args.max_residues, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
679
828
|
|
|
680
|
-
accs = set(_read_lines(
|
|
829
|
+
accs = set(_read_lines(uniprot_accessions))
|
|
681
830
|
rprint(f"Finding PDB entries for {len(accs)} uniprot accessions")
|
|
682
831
|
results = search4pdb(accs, limit=limit, timeout=timeout)
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
832
|
+
|
|
833
|
+
raw_nr_results = len(results)
|
|
834
|
+
raw_total_pdbs = sum([len(v) for v in results.values()])
|
|
835
|
+
if min_residues or max_residues:
|
|
836
|
+
results = filter_pdb_results_on_chain_length(results, min_residues, max_residues)
|
|
837
|
+
total_pdbs = sum([len(v) for v in results.values()])
|
|
838
|
+
rprint(f"Before filtering found {raw_total_pdbs} PDB entries for {raw_nr_results} uniprot accessions.")
|
|
839
|
+
rprint(
|
|
840
|
+
f"After filtering on chain length ({min_residues}, {max_residues}) "
|
|
841
|
+
f"remained {total_pdbs} PDB entries for {len(results)} uniprot accessions."
|
|
842
|
+
)
|
|
843
|
+
else:
|
|
844
|
+
rprint(f"Found {raw_total_pdbs} PDB entries for {raw_nr_results} uniprot accessions")
|
|
845
|
+
|
|
686
846
|
_write_pdbe_csv(output_csv, results)
|
|
847
|
+
rprint(f"Written to {_name_of(output_csv)}")
|
|
687
848
|
|
|
688
849
|
|
|
689
850
|
def _handle_search_alphafold(args):
|
|
690
|
-
|
|
851
|
+
uniprot_accessions = args.uniprot_accessions
|
|
852
|
+
min_sequence_length = converter.structure(args.min_sequence_length, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
853
|
+
max_sequence_length = converter.structure(args.max_sequence_length, PositiveInt | None) # pyright: ignore[reportArgumentType]
|
|
691
854
|
limit = args.limit
|
|
692
855
|
timeout = args.timeout
|
|
693
856
|
output_csv = args.output_csv
|
|
694
857
|
|
|
695
|
-
accs = _read_lines(
|
|
858
|
+
accs = _read_lines(uniprot_accessions)
|
|
696
859
|
rprint(f"Finding AlphaFold entries for {len(accs)} uniprot accessions")
|
|
697
|
-
results = search4af(
|
|
698
|
-
|
|
860
|
+
results = search4af(
|
|
861
|
+
accs,
|
|
862
|
+
min_sequence_length=min_sequence_length,
|
|
863
|
+
max_sequence_length=max_sequence_length,
|
|
864
|
+
limit=limit,
|
|
865
|
+
timeout=timeout,
|
|
866
|
+
)
|
|
867
|
+
rprint(f"Found {len(results)} AlphaFold entries, written to {_name_of(output_csv)}")
|
|
699
868
|
_write_dict_of_sets2csv(output_csv, results, "af_id")
|
|
700
869
|
|
|
701
870
|
|
|
702
871
|
def _handle_search_emdb(args):
|
|
703
|
-
|
|
872
|
+
uniprot_accessions = args.uniprot_accessions
|
|
704
873
|
limit = args.limit
|
|
705
874
|
timeout = args.timeout
|
|
706
875
|
output_csv = args.output_csv
|
|
707
876
|
|
|
708
|
-
accs = _read_lines(
|
|
877
|
+
accs = _read_lines(uniprot_accessions)
|
|
709
878
|
rprint(f"Finding EMDB entries for {len(accs)} uniprot accessions")
|
|
710
879
|
results = search4emdb(accs, limit=limit, timeout=timeout)
|
|
711
880
|
total_emdbs = sum([len(v) for v in results.values()])
|
|
712
|
-
rprint(f"Found {total_emdbs} EMDB entries, written to {output_csv
|
|
881
|
+
rprint(f"Found {total_emdbs} EMDB entries, written to {_name_of(output_csv)}")
|
|
713
882
|
_write_dict_of_sets2csv(output_csv, results, "emdb_id")
|
|
714
883
|
|
|
715
884
|
|
|
@@ -724,7 +893,7 @@ def _handle_search_go(args):
|
|
|
724
893
|
else:
|
|
725
894
|
rprint(f"Searching for GO terms matching '{term}'")
|
|
726
895
|
results = asyncio.run(search_gene_ontology_term(term, aspect=aspect, limit=limit))
|
|
727
|
-
rprint(f"Found {len(results)} GO terms, written to {output_csv
|
|
896
|
+
rprint(f"Found {len(results)} GO terms, written to {_name_of(output_csv)}")
|
|
728
897
|
write_go_terms_to_csv(results, output_csv)
|
|
729
898
|
|
|
730
899
|
|
|
@@ -739,36 +908,49 @@ def _handle_search_taxonomy(args):
|
|
|
739
908
|
else:
|
|
740
909
|
rprint(f"Searching for taxon information matching '{query}'")
|
|
741
910
|
results = asyncio.run(search_taxon(query=query, field=field, limit=limit))
|
|
742
|
-
rprint(f"Found {len(results)} taxons, written to {output_csv
|
|
911
|
+
rprint(f"Found {len(results)} taxons, written to {_name_of(output_csv)}")
|
|
743
912
|
_write_taxonomy_csv(results, output_csv)
|
|
744
913
|
|
|
745
914
|
|
|
746
915
|
def _handle_search_interaction_partners(args: argparse.Namespace):
|
|
747
|
-
|
|
916
|
+
uniprot_accession: str = args.uniprot_accession
|
|
748
917
|
excludes: set[str] = set(args.exclude) if args.exclude else set()
|
|
749
918
|
limit: int = args.limit
|
|
750
919
|
timeout: int = args.timeout
|
|
751
920
|
output_csv: TextIOWrapper = args.output_csv
|
|
752
921
|
|
|
753
|
-
rprint(f"Searching for interaction partners of '{
|
|
754
|
-
results = search4interaction_partners(
|
|
755
|
-
rprint(f"Found {len(results)} interaction partners, written to {output_csv
|
|
922
|
+
rprint(f"Searching for interaction partners of '{uniprot_accession}'")
|
|
923
|
+
results = search4interaction_partners(uniprot_accession, excludes=excludes, limit=limit, timeout=timeout)
|
|
924
|
+
rprint(f"Found {len(results)} interaction partners, written to {_name_of(output_csv)}")
|
|
756
925
|
_write_lines(output_csv, results.keys())
|
|
757
926
|
|
|
758
927
|
|
|
759
928
|
def _handle_search_complexes(args: argparse.Namespace):
|
|
760
|
-
|
|
929
|
+
uniprot_accessions = args.uniprot_accessions
|
|
761
930
|
limit = args.limit
|
|
762
931
|
timeout = args.timeout
|
|
763
932
|
output_csv = args.output_csv
|
|
764
933
|
|
|
765
|
-
accs = _read_lines(
|
|
934
|
+
accs = _read_lines(uniprot_accessions)
|
|
766
935
|
rprint(f"Finding complexes for {len(accs)} uniprot accessions")
|
|
767
936
|
results = search4macromolecular_complexes(accs, limit=limit, timeout=timeout)
|
|
768
|
-
rprint(f"Found {len(results)} complexes, written to {output_csv
|
|
937
|
+
rprint(f"Found {len(results)} complexes, written to {_name_of(output_csv)}")
|
|
769
938
|
_write_complexes_csv(results, output_csv)
|
|
770
939
|
|
|
771
940
|
|
|
941
|
+
def _handle_search_uniprot_details(args: argparse.Namespace):
|
|
942
|
+
uniprot_accessions = args.uniprot_accessions
|
|
943
|
+
timeout = args.timeout
|
|
944
|
+
batch_size = args.batch_size
|
|
945
|
+
output_csv: TextIOWrapper = args.output_csv
|
|
946
|
+
|
|
947
|
+
accs = _read_lines(uniprot_accessions)
|
|
948
|
+
rprint(f"Retrieving UniProt entry details for {len(accs)} uniprot accessions")
|
|
949
|
+
results = list(map_uniprot_accessions2uniprot_details(accs, timeout=timeout, batch_size=batch_size))
|
|
950
|
+
_write_uniprot_details_csv(output_csv, results)
|
|
951
|
+
rprint(f"Retrieved details for {len(results)} UniProt entries, written to {_name_of(output_csv)}")
|
|
952
|
+
|
|
953
|
+
|
|
772
954
|
def _initialize_cacher(args: argparse.Namespace) -> Cacher:
|
|
773
955
|
if args.no_cache:
|
|
774
956
|
return PassthroughCacher()
|
|
@@ -798,17 +980,25 @@ def _handle_retrieve_alphafold(args):
|
|
|
798
980
|
alphafold_csv = args.alphafold_csv
|
|
799
981
|
max_parallel_downloads = args.max_parallel_downloads
|
|
800
982
|
cacher = _initialize_cacher(args)
|
|
983
|
+
gzip_files = args.gzip_files
|
|
984
|
+
all_isoforms = args.all_isoforms
|
|
801
985
|
|
|
802
986
|
if what_formats is None:
|
|
803
987
|
what_formats = {"summary", "cif"}
|
|
804
988
|
|
|
805
|
-
# TODO besides `
|
|
989
|
+
# TODO besides `uniprot_accession,af_id\n` csv also allow headless single column format
|
|
806
990
|
#
|
|
807
991
|
af_ids = _read_column_from_csv(alphafold_csv, "af_id")
|
|
808
992
|
validated_what: set[DownloadableFormat] = structure(what_formats, set[DownloadableFormat])
|
|
809
993
|
rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {validated_what}")
|
|
810
994
|
afs = af_fetch(
|
|
811
|
-
af_ids,
|
|
995
|
+
af_ids,
|
|
996
|
+
download_dir,
|
|
997
|
+
what=validated_what,
|
|
998
|
+
max_parallel_downloads=max_parallel_downloads,
|
|
999
|
+
cacher=cacher,
|
|
1000
|
+
gzip_files=gzip_files,
|
|
1001
|
+
all_isoforms=all_isoforms,
|
|
812
1002
|
)
|
|
813
1003
|
total_nr_files = sum(af.nr_of_files() for af in afs)
|
|
814
1004
|
rprint(f"Retrieved {total_nr_files} AlphaFold files and {len(afs)} summaries, written to {download_dir}")
|
|
@@ -863,11 +1053,11 @@ def _handle_filter_confidence(args: argparse.Namespace):
|
|
|
863
1053
|
if r.filtered_file:
|
|
864
1054
|
passed_count += 1
|
|
865
1055
|
if stats_file:
|
|
866
|
-
writer.writerow([r.input_file, r.count, r.filtered_file is not None, r.filtered_file])
|
|
1056
|
+
writer.writerow([r.input_file, r.count, r.filtered_file is not None, r.filtered_file]) # pyright: ignore[reportPossiblyUnboundVariable]
|
|
867
1057
|
|
|
868
1058
|
rprint(f"Filtered {passed_count} mmcif/PDB files by confidence, written to {output_dir} directory")
|
|
869
1059
|
if stats_file:
|
|
870
|
-
rprint(f"Statistics written to {stats_file
|
|
1060
|
+
rprint(f"Statistics written to {_name_of(stats_file)}")
|
|
871
1061
|
|
|
872
1062
|
|
|
873
1063
|
def _handle_filter_chain(args):
|
|
@@ -933,13 +1123,13 @@ def _handle_filter_residue(args):
|
|
|
933
1123
|
input_files, output_dir, min_residues=min_residues, max_residues=max_residues, copy_method=copy_method
|
|
934
1124
|
):
|
|
935
1125
|
if stats_file:
|
|
936
|
-
writer.writerow([r.input_file, r.residue_count, r.passed, r.output_file])
|
|
1126
|
+
writer.writerow([r.input_file, r.residue_count, r.passed, r.output_file]) # pyright: ignore[reportPossiblyUnboundVariable]
|
|
937
1127
|
if r.passed:
|
|
938
1128
|
nr_passed += 1
|
|
939
1129
|
|
|
940
1130
|
rprint(f"Wrote {nr_passed} files to {output_dir} directory.")
|
|
941
1131
|
if stats_file:
|
|
942
|
-
rprint(f"Statistics written to {stats_file
|
|
1132
|
+
rprint(f"Statistics written to {_name_of(stats_file)}")
|
|
943
1133
|
|
|
944
1134
|
|
|
945
1135
|
def _handle_filter_ss(args):
|
|
@@ -987,7 +1177,7 @@ def _handle_filter_ss(args):
|
|
|
987
1177
|
copyfile(input_file, output_file, copy_method)
|
|
988
1178
|
nr_passed += 1
|
|
989
1179
|
if stats_file:
|
|
990
|
-
writer.writerow(
|
|
1180
|
+
writer.writerow( # pyright: ignore[reportPossiblyUnboundVariable]
|
|
991
1181
|
[
|
|
992
1182
|
input_file,
|
|
993
1183
|
result.stats.nr_residues,
|
|
@@ -1001,7 +1191,7 @@ def _handle_filter_ss(args):
|
|
|
1001
1191
|
)
|
|
1002
1192
|
rprint(f"Wrote {nr_passed} files to {output_dir} directory.")
|
|
1003
1193
|
if stats_file:
|
|
1004
|
-
rprint(f"Statistics written to {stats_file
|
|
1194
|
+
rprint(f"Statistics written to {_name_of(stats_file)}")
|
|
1005
1195
|
|
|
1006
1196
|
|
|
1007
1197
|
def _handle_mcp(args):
|
|
@@ -1017,24 +1207,45 @@ def _handle_mcp(args):
|
|
|
1017
1207
|
mcp.run(transport=args.transport, host=args.host, port=args.port)
|
|
1018
1208
|
|
|
1019
1209
|
|
|
1020
|
-
|
|
1021
|
-
(
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
(
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1210
|
+
def _handle_convert_uniprot(args):
|
|
1211
|
+
input_dir = structure(args.input_dir, Path)
|
|
1212
|
+
output_file: TextIOWrapper = args.output
|
|
1213
|
+
grouped: bool = args.grouped
|
|
1214
|
+
input_files = sorted(glob_structure_files(input_dir))
|
|
1215
|
+
if grouped:
|
|
1216
|
+
for input_file in tqdm(input_files, unit="file"):
|
|
1217
|
+
s = read_structure(input_file)
|
|
1218
|
+
uniprot_accessions = structure2uniprot_accessions(s)
|
|
1219
|
+
_write_lines(
|
|
1220
|
+
output_file, [f"{input_file},{uniprot_accession}" for uniprot_accession in sorted(uniprot_accessions)]
|
|
1221
|
+
)
|
|
1222
|
+
else:
|
|
1223
|
+
uniprot_accessions: set[str] = set()
|
|
1224
|
+
for input_file in tqdm(input_files, unit="file"):
|
|
1225
|
+
s = read_structure(input_file)
|
|
1226
|
+
uniprot_accessions.update(structure2uniprot_accessions(s))
|
|
1227
|
+
_write_lines(output_file, sorted(uniprot_accessions))
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
def _handle_convert_structures(args):
|
|
1231
|
+
input_dir = structure(args.input_dir, Path)
|
|
1232
|
+
output_dir = input_dir if args.output_dir is None else structure(args.output_dir, Path)
|
|
1233
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
1234
|
+
copy_method: CopyMethod = structure(args.copy_method, CopyMethod) # pyright: ignore[reportArgumentType]
|
|
1235
|
+
|
|
1236
|
+
input_files = sorted(glob_structure_files(input_dir))
|
|
1237
|
+
rprint(f"Converting {len(input_files)} files in {input_dir} directory to cif format.")
|
|
1238
|
+
for _ in tqdm(
|
|
1239
|
+
convert_to_cif_files(
|
|
1240
|
+
input_files,
|
|
1241
|
+
output_dir,
|
|
1242
|
+
copy_method=copy_method,
|
|
1243
|
+
),
|
|
1244
|
+
total=len(input_files),
|
|
1245
|
+
unit="file",
|
|
1246
|
+
):
|
|
1247
|
+
pass
|
|
1248
|
+
rprint(f"Converted {len(input_files)} files into {output_dir}.")
|
|
1038
1249
|
|
|
1039
1250
|
|
|
1040
1251
|
def _read_lines(file: TextIOWrapper) -> list[str]:
|
|
@@ -1042,7 +1253,8 @@ def _read_lines(file: TextIOWrapper) -> list[str]:
|
|
|
1042
1253
|
|
|
1043
1254
|
|
|
1044
1255
|
def _make_sure_parent_exists(file: TextIOWrapper):
|
|
1045
|
-
|
|
1256
|
+
# Can not create dir for stdout
|
|
1257
|
+
with suppress(AttributeError):
|
|
1046
1258
|
Path(file.name).parent.mkdir(parents=True, exist_ok=True)
|
|
1047
1259
|
|
|
1048
1260
|
|
|
@@ -1051,34 +1263,35 @@ def _write_lines(file: TextIOWrapper, lines: Iterable[str]):
|
|
|
1051
1263
|
file.writelines(line + os.linesep for line in lines)
|
|
1052
1264
|
|
|
1053
1265
|
|
|
1054
|
-
def _write_pdbe_csv(path: TextIOWrapper, data:
|
|
1266
|
+
def _write_pdbe_csv(path: TextIOWrapper, data: PdbResults):
|
|
1055
1267
|
_make_sure_parent_exists(path)
|
|
1056
|
-
fieldnames = ["
|
|
1268
|
+
fieldnames = ["uniprot_accession", "pdb_id", "method", "resolution", "uniprot_chains", "chain", "chain_length"]
|
|
1057
1269
|
writer = csv.DictWriter(path, fieldnames=fieldnames)
|
|
1058
1270
|
writer.writeheader()
|
|
1059
|
-
for
|
|
1271
|
+
for uniprot_accession, entries in sorted(data.items()):
|
|
1060
1272
|
for e in sorted(entries, key=lambda x: (x.id, x.method)):
|
|
1061
1273
|
writer.writerow(
|
|
1062
1274
|
{
|
|
1063
|
-
"
|
|
1275
|
+
"uniprot_accession": uniprot_accession,
|
|
1064
1276
|
"pdb_id": e.id,
|
|
1065
1277
|
"method": e.method,
|
|
1066
1278
|
"resolution": e.resolution or "",
|
|
1067
1279
|
"uniprot_chains": e.uniprot_chains,
|
|
1068
1280
|
"chain": e.chain,
|
|
1281
|
+
"chain_length": e.chain_length,
|
|
1069
1282
|
}
|
|
1070
1283
|
)
|
|
1071
1284
|
|
|
1072
1285
|
|
|
1073
1286
|
def _write_dict_of_sets2csv(file: TextIOWrapper, data: dict[str, set[str]], ref_id_field: str):
|
|
1074
1287
|
_make_sure_parent_exists(file)
|
|
1075
|
-
fieldnames = ["
|
|
1288
|
+
fieldnames = ["uniprot_accession", ref_id_field]
|
|
1076
1289
|
|
|
1077
1290
|
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
|
1078
1291
|
writer.writeheader()
|
|
1079
|
-
for
|
|
1292
|
+
for uniprot_accession, ref_ids in sorted(data.items()):
|
|
1080
1293
|
for ref_id in sorted(ref_ids):
|
|
1081
|
-
writer.writerow({"
|
|
1294
|
+
writer.writerow({"uniprot_accession": uniprot_accession, ref_id_field: ref_id})
|
|
1082
1295
|
|
|
1083
1296
|
|
|
1084
1297
|
def _iter_csv_rows(file: TextIOWrapper) -> Generator[dict[str, str]]:
|
|
@@ -1118,3 +1331,61 @@ def _write_complexes_csv(complexes: list[ComplexPortalEntry], output_csv: TextIO
|
|
|
1118
1331
|
members_str,
|
|
1119
1332
|
]
|
|
1120
1333
|
)
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
def _write_uniprot_details_csv(
|
|
1337
|
+
output_csv: TextIOWrapper,
|
|
1338
|
+
uniprot_details_list: Iterable[UniprotDetails],
|
|
1339
|
+
) -> None:
|
|
1340
|
+
if not uniprot_details_list:
|
|
1341
|
+
msg = "No UniProt entries found for given accessions"
|
|
1342
|
+
raise ValueError(msg)
|
|
1343
|
+
# As all props of UniprotDetails are scalar, we can directly unstructure to dicts
|
|
1344
|
+
rows = converter.unstructure(uniprot_details_list)
|
|
1345
|
+
fieldnames = rows[0].keys()
|
|
1346
|
+
writer = csv.DictWriter(output_csv, fieldnames=fieldnames)
|
|
1347
|
+
writer.writeheader()
|
|
1348
|
+
writer.writerows(rows)
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
HANDLERS: dict[tuple[str, str | None], Callable] = {
|
|
1352
|
+
("search", "uniprot"): _handle_search_uniprot,
|
|
1353
|
+
("search", "pdbe"): _handle_search_pdbe,
|
|
1354
|
+
("search", "alphafold"): _handle_search_alphafold,
|
|
1355
|
+
("search", "emdb"): _handle_search_emdb,
|
|
1356
|
+
("search", "go"): _handle_search_go,
|
|
1357
|
+
("search", "taxonomy"): _handle_search_taxonomy,
|
|
1358
|
+
("search", "interaction-partners"): _handle_search_interaction_partners,
|
|
1359
|
+
("search", "complexes"): _handle_search_complexes,
|
|
1360
|
+
("search", "uniprot-details"): _handle_search_uniprot_details,
|
|
1361
|
+
("retrieve", "pdbe"): _handle_retrieve_pdbe,
|
|
1362
|
+
("retrieve", "alphafold"): _handle_retrieve_alphafold,
|
|
1363
|
+
("retrieve", "emdb"): _handle_retrieve_emdb,
|
|
1364
|
+
("filter", "confidence"): _handle_filter_confidence,
|
|
1365
|
+
("filter", "chain"): _handle_filter_chain,
|
|
1366
|
+
("filter", "residue"): _handle_filter_residue,
|
|
1367
|
+
("filter", "secondary-structure"): _handle_filter_ss,
|
|
1368
|
+
("mcp", None): _handle_mcp,
|
|
1369
|
+
("convert", "structures"): _handle_convert_structures,
|
|
1370
|
+
("convert", "uniprot"): _handle_convert_uniprot,
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
|
|
1374
|
+
def main(argv: Sequence[str] | None = None):
|
|
1375
|
+
"""Main entry point for the CLI.
|
|
1376
|
+
|
|
1377
|
+
Args:
|
|
1378
|
+
argv: List of command line arguments. If None, uses sys.argv.
|
|
1379
|
+
"""
|
|
1380
|
+
parser = make_parser()
|
|
1381
|
+
args = parser.parse_args(argv)
|
|
1382
|
+
logging.basicConfig(level=args.log_level, handlers=[RichHandler(show_level=False, console=console)])
|
|
1383
|
+
|
|
1384
|
+
# Dispatch table to reduce complexity
|
|
1385
|
+
cmd = args.command
|
|
1386
|
+
sub = getattr(args, f"{cmd}_cmd", None)
|
|
1387
|
+
handler = HANDLERS.get((cmd, sub))
|
|
1388
|
+
if handler is None:
|
|
1389
|
+
msg = f"Unknown command: {cmd} {sub}"
|
|
1390
|
+
raise SystemExit(msg)
|
|
1391
|
+
handler(args)
|