PyPI - protein-quest - Versions diffs - 0.5.0__tar.gz → 0.6.0__tar.gz - Mend

protein-quest 0.5.0tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of protein-quest might be problematic. Click here for more details.

Files changed (79) hide show

{protein_quest-0.5.0 → protein_quest-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: protein_quest
-Version: 0.5.0
+Version: 0.6.0
 Summary: Search/retrieve/filter proteins and protein structures
 Project-URL: Homepage, https://github.com/haddocking/protein-quest
 Project-URL: Issues, https://github.com/haddocking/protein-quest/issues
@@ -17,6 +17,7 @@ Requires-Dist: cattrs[orjson]>=24.1.3
 Requires-Dist: dask>=2025.5.1
 Requires-Dist: distributed>=2025.5.1
 Requires-Dist: gemmi>=0.7.3
+Requires-Dist: mmcif>=0.92.0
 Requires-Dist: platformdirs>=4.3.8
 Requires-Dist: psutil>=7.0.0
 Requires-Dist: rich-argparse>=1.7.1
@@ -71,6 +72,7 @@ graph TB;
     fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
     confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
     residuefilter --> |mmcif_files| ssfilter
+    ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
     classDef dashedBorder stroke-dasharray: 5 5;
     goterm:::dashedBorder
     taxonomy:::dashedBorder
@@ -78,6 +80,7 @@ graph TB;
     fetchemdb:::dashedBorder
     searchintactionpartners:::dashedBorder
     searchcomplexes:::dashedBorder
+    convert2cif:::dashedBorder
 ```
 (Dotted nodes and edges are side-quests.)
@@ -242,6 +245,14 @@ query_protein,complex_id,complex_url,complex_title,members
 Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
 ```
+### Convert structure files to .cif format
+Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
+```shell
+protein-quest convert --output-dir ./filtered-cif ./filtered-ss
+```
 ##  Model Context Protocol (MCP) server
 Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.

{protein_quest-0.5.0 → protein_quest-0.6.0}/README.md RENAMED Viewed

@@ -40,6 +40,7 @@ graph TB;
     fetchad -->|mmcif_files| confidencefilter{{Filter out low confidence}}
     confidencefilter --> |mmcif_files| ssfilter{{Filter on secondary structure}}
     residuefilter --> |mmcif_files| ssfilter
+    ssfilter -. mmcif_files .-> convert2cif([Convert to cif])
     classDef dashedBorder stroke-dasharray: 5 5;
     goterm:::dashedBorder
     taxonomy:::dashedBorder
@@ -47,6 +48,7 @@ graph TB;
     fetchemdb:::dashedBorder
     searchintactionpartners:::dashedBorder
     searchcomplexes:::dashedBorder
+    convert2cif:::dashedBorder
 ```
 (Dotted nodes and edges are side-quests.)
@@ -211,6 +213,14 @@ query_protein,complex_id,complex_url,complex_title,members
 Q05471,CPX-2122,https://www.ebi.ac.uk/complexportal/complex/CPX-2122,Swr1 chromatin remodelling complex,P31376;P35817;P38326;P53201;P53930;P60010;P80428;Q03388;Q03433;Q03940;Q05471;Q06707;Q12464;Q12509
 ```
+### Convert structure files to .cif format
+Some tools (for example [powerfit](https://github.com/haddocking/powerfit)) only work with `.cif` files and not `*.cif.gz` or `*.bcif` files.
+```shell
+protein-quest convert --output-dir ./filtered-cif ./filtered-ss
+```
 ##  Model Context Protocol (MCP) server
 Protein quest can also help LLMs like Claude Sonnet 4 by providing a [set of tools](https://modelcontextprotocol.io/docs/learn/server-concepts#tools-ai-actions) for protein structures.

{protein_quest-0.5.0 → protein_quest-0.6.0}/docs/notebooks/pdbe.ipynb RENAMED Viewed

@@ -111,12 +111,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "52a42ec2",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from protein_quest.pdbe.io import write_single_chain_pdb_file"
+    "from protein_quest.structure import write_single_chain_structure_file"
    ]
   },
   {
@@ -139,7 +139,9 @@
    "source": [
     "# A8MT69\t4NDY\tB/D/H/L/M/N/U/V/W/X=8-81\n",
     "# above is the identifier, chain and position info for the structure from https://www.uniprot.org/uniprotkb/A8MT69/entry#structure\n",
-    "output_4dny_file = write_single_chain_pdb_file(input_file=save_dir / \"4ndy.cif.gz\", chain2keep=\"B\", output_dir=save_dir)\n",
+    "output_4dny_file = write_single_chain_structure_file(\n",
+    "    input_file=save_dir / \"4ndy.cif.gz\", chain2keep=\"B\", output_dir=save_dir\n",
+    ")\n",
     "output_4dny_file"
    ]
   },
@@ -162,13 +164,15 @@
    ],
    "source": [
     "# A8MT69\t4DRA\tE/F/G/H=1-81\n",
-    "output_4dra_file = write_single_chain_pdb_file(input_file=save_dir / \"4dra.cif.gz\", chain2keep=\"E\", output_dir=save_dir)\n",
+    "output_4dra_file = write_single_chain_structure_file(\n",
+    "    input_file=save_dir / \"4dra.cif.gz\", chain2keep=\"E\", output_dir=save_dir\n",
+    ")\n",
     "output_4dra_file"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "id": "673d1274",
    "metadata": {},
    "outputs": [
@@ -184,7 +188,7 @@
     }
    ],
    "source": [
-    "output_1xwh_file = write_single_chain_pdb_file(\n",
+    "output_1xwh_file = write_single_chain_structure_file(\n",
     "    input_file=save_dir / \"1xwh.cif.gz\",\n",
     "    chain2keep=\"A\",\n",
     "    output_dir=save_dir,\n",
@@ -194,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "id": "5f03eb2c",
    "metadata": {},
    "outputs": [
@@ -211,7 +215,7 @@
    ],
    "source": [
     "# O00268      │ 8WAS    │ D/d=1-1085\n",
-    "output_8was_file = write_single_chain_pdb_file(\n",
+    "output_8was_file = write_single_chain_structure_file(\n",
     "    input_file=save_dir / \"8was.cif.gz\",\n",
     "    chain2keep=\"D\",\n",
     "    output_dir=save_dir,\n",

{protein_quest-0.5.0 → protein_quest-0.6.0}/pyproject.toml RENAMED Viewed

@@ -21,6 +21,7 @@ dependencies = [
     "tqdm>=4.67.1",
     "yarl>=1.20.1",
     "platformdirs>=4.3.8",
+    "mmcif>=0.92.0",
 ]
 [project.urls]

{protein_quest-0.5.0 → protein_quest-0.6.0}/src/protein_quest/__version__.py RENAMED Viewed

@@ -1,2 +1,2 @@
-__version__ = "0.5.0"
+__version__ = "0.6.0"
 """The version of the package."""

{protein_quest-0.5.0 → protein_quest-0.6.0}/src/protein_quest/alphafold/confidence.py RENAMED Viewed

@@ -8,7 +8,7 @@ from pathlib import Path
 import gemmi
 from protein_quest.converter import Percentage, PositiveInt, converter
-from protein_quest.pdbe.io import write_structure
+from protein_quest.io import read_structure, write_structure
 from protein_quest.ss import nr_of_residues_in_total
 from protein_quest.utils import CopyMethod, copyfile
@@ -127,7 +127,7 @@ def filter_file_on_residues(
         result with filtered_file property set to Path where filtered PDB file is saved.
             or None if structure was filtered out.
     """
-    structure = gemmi.read_structure(str(file))
+    structure = read_structure(file)
     residues = set(find_high_confidence_residues(structure, query.confidence))
     count = len(residues)
     if count < query.min_residues or count > query.max_residues:

{protein_quest-0.5.0 → protein_quest-0.6.0}/src/protein_quest/alphafold/fetch.py RENAMED Viewed

@@ -125,15 +125,15 @@ async def fetch_summary(
     fn: AsyncPath | None = None
     if save_dir is not None:
         fn = AsyncPath(save_dir / f"{qualifier}.json")
+        if await fn.exists():
+            logger.debug(f"File {fn} already exists. Skipping download from {url}.")
+            raw_data = await fn.read_bytes()
+            return converter.loads(raw_data, list[EntrySummary])
         cached_file = await cacher.copy_from_cache(Path(fn))
         if cached_file is not None:
             logger.debug(f"Using cached file {cached_file} for summary of {qualifier}.")
             raw_data = await AsyncPath(cached_file).read_bytes()
             return converter.loads(raw_data, list[EntrySummary])
-        if await fn.exists():
-            logger.debug(f"File {fn} already exists. Skipping download from {url}.")
-            raw_data = await fn.read_bytes()
-            return converter.loads(raw_data, list[EntrySummary])
     async with semaphore, session.get(url) as response:
         response.raise_for_status()
         raw_data = await response.content.read()
@@ -170,6 +170,7 @@ async def fetch_many_async(
     what: set[DownloadableFormat],
     max_parallel_downloads: int = 5,
     cacher: Cacher | None = None,
+    gzip_files: bool = False,
 ) -> AsyncGenerator[AlphaFoldEntry]:
     """Asynchronously fetches summaries and files from
     [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).
@@ -180,6 +181,7 @@ async def fetch_many_async(
         what: A set of formats to download.
         max_parallel_downloads: The maximum number of parallel downloads.
         cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
+        gzip_files: Whether to gzip the downloaded files.
     Yields:
         A dataclass containing the summary, pdb file, and pae file.
@@ -193,7 +195,7 @@ async def fetch_many_async(
         )
     ]
-    files = files_to_download(what, summaries)
+    files = files_to_download(what, summaries, gzip_files)
     await retrieve_files(
         files,
@@ -201,36 +203,40 @@ async def fetch_many_async(
         desc="Downloading AlphaFold files",
         max_parallel_downloads=max_parallel_downloads,
         cacher=cacher,
+        gzip_files=gzip_files,
     )
+    gzext = ".gz" if gzip_files else ""
     for summary in summaries:
         yield AlphaFoldEntry(
             uniprot_acc=summary.uniprotAccession,
             summary=summary,
             summary_file=save_dir / f"{summary.uniprotAccession}.json" if save_dir_for_summaries is not None else None,
-            bcif_file=save_dir / summary.bcifUrl.name if "bcif" in what else None,
-            cif_file=save_dir / summary.cifUrl.name if "cif" in what else None,
-            pdb_file=save_dir / summary.pdbUrl.name if "pdb" in what else None,
-            pae_image_file=save_dir / summary.paeImageUrl.name if "paeImage" in what else None,
-            pae_doc_file=save_dir / summary.paeDocUrl.name if "paeDoc" in what else None,
+            bcif_file=save_dir / (summary.bcifUrl.name + gzext) if "bcif" in what else None,
+            cif_file=save_dir / (summary.cifUrl.name + gzext) if "cif" in what else None,
+            pdb_file=save_dir / (summary.pdbUrl.name + gzext) if "pdb" in what else None,
+            pae_image_file=save_dir / (summary.paeImageUrl.name + gzext) if "paeImage" in what else None,
+            pae_doc_file=save_dir / (summary.paeDocUrl.name + gzext) if "paeDoc" in what else None,
             am_annotations_file=(
-                save_dir / summary.amAnnotationsUrl.name
+                save_dir / (summary.amAnnotationsUrl.name + gzext)
                 if "amAnnotations" in what and summary.amAnnotationsUrl
                 else None
             ),
             am_annotations_hg19_file=(
-                save_dir / summary.amAnnotationsHg19Url.name
+                save_dir / (summary.amAnnotationsHg19Url.name + gzext)
                 if "amAnnotationsHg19" in what and summary.amAnnotationsHg19Url
                 else None
             ),
             am_annotations_hg38_file=(
-                save_dir / summary.amAnnotationsHg38Url.name
+                save_dir / (summary.amAnnotationsHg38Url.name + gzext)
                 if "amAnnotationsHg38" in what and summary.amAnnotationsHg38Url
                 else None
             ),
         )
-def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySummary]) -> set[tuple[URL, str]]:
+def files_to_download(
+    what: set[DownloadableFormat], summaries: Iterable[EntrySummary], gzip_files: bool
+) -> set[tuple[URL, str]]:
     if not (set(what) <= downloadable_formats):
         msg = (
             f"Invalid format(s) specified: {set(what) - downloadable_formats}. "
@@ -238,7 +244,7 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
         )
         raise ValueError(msg)
-    files: set[tuple[URL, str]] = set()
+    url_filename_pairs: set[tuple[URL, str]] = set()
     for summary in summaries:
         for fmt in what:
             if fmt == "summary":
@@ -248,9 +254,10 @@ def files_to_download(what: set[DownloadableFormat], summaries: Iterable[EntrySu
             if url is None:
                 logger.warning(f"Summary {summary.uniprotAccession} does not have a URL for format '{fmt}'. Skipping.")
                 continue
-            file = (url, url.name)
-            files.add(file)
-    return files
+            fn = url.name + (".gz" if gzip_files else "")
+            url_filename_pair = (url, fn)
+            url_filename_pairs.add(url_filename_pair)
+    return url_filename_pairs
 def fetch_many(
@@ -259,6 +266,7 @@ def fetch_many(
     what: set[DownloadableFormat],
     max_parallel_downloads: int = 5,
     cacher: Cacher | None = None,
+    gzip_files: bool = False,
 ) -> list[AlphaFoldEntry]:
     """Synchronously fetches summaries and pdb and pae files from AlphaFold Protein Structure Database.
@@ -268,6 +276,7 @@ def fetch_many(
         what: A set of formats to download.
         max_parallel_downloads: The maximum number of parallel downloads.
         cacher: A cacher to use for caching the fetched files. Only used if summary is in what set.
+        gzip_files: Whether to gzip the downloaded files.
     Returns:
         A list of AlphaFoldEntry dataclasses containing the summary, pdb file, and pae file.
@@ -277,7 +286,7 @@ def fetch_many(
         return [
             entry
             async for entry in fetch_many_async(
-                ids, save_dir, what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
+                ids, save_dir, what, max_parallel_downloads=max_parallel_downloads, cacher=cacher, gzip_files=gzip_files
             )
         ]

{protein_quest-0.5.0 → protein_quest-0.6.0}/src/protein_quest/cli.py RENAMED Viewed

@@ -28,8 +28,13 @@ from protein_quest.converter import converter
 from protein_quest.emdb import fetch as emdb_fetch
 from protein_quest.filters import filter_files_on_chain, filter_files_on_residues
 from protein_quest.go import Aspect, allowed_aspects, search_gene_ontology_term, write_go_terms_to_csv
+from protein_quest.io import (
+    convert_to_cif_files,
+    glob_structure_files,
+    locate_structure_file,
+    valid_structure_file_extensions,
+)
 from protein_quest.pdbe import fetch as pdbe_fetch
-from protein_quest.pdbe.io import glob_structure_files, locate_structure_file
 from protein_quest.ss import SecondaryStructureFilterQuery, filter_files_on_secondary_structure
 from protein_quest.taxonomy import SearchField, _write_taxonomy_csv, search_fields, search_taxon
 from protein_quest.uniprot import (
@@ -297,6 +302,38 @@ def _add_search_complexes_parser(subparsers: argparse._SubParsersAction):
     parser.add_argument("--timeout", type=int, default=1_800, help="Maximum seconds to wait for query to complete")
+def _add_copy_method_arguments(parser):
+    parser.add_argument(
+        "--copy-method",
+        type=str,
+        choices=copy_methods,
+        default="hardlink",
+        help=dedent("""\
+            How to make target file be same file as source file.
+            By default uses hardlinks to save disk space.
+            Note that hardlinks only work within the same filesystem and are harder to track.
+            If you want to track cached files easily then use 'symlink'.
+            On Windows you need developer mode or admin privileges to create symlinks.
+        """),
+    )
+def _add_cacher_arguments(parser: argparse.ArgumentParser):
+    """Add cacher arguments to parser."""
+    parser.add_argument(
+        "--no-cache",
+        action="store_true",
+        help="Disable caching of files to central location.",
+    )
+    parser.add_argument(
+        "--cache-dir",
+        type=Path,
+        default=user_cache_root_dir(),
+        help="Directory to use as cache for files.",
+    )
+    _add_copy_method_arguments(parser)
 def _add_retrieve_pdbe_parser(subparsers: argparse._SubParsersAction):
     """Add retrieve pdbe subcommand parser."""
     parser = subparsers.add_parser(
@@ -345,6 +382,11 @@ def _add_retrieve_alphafold_parser(subparsers: argparse._SubParsersAction):
         help=dedent("""AlphaFold formats to retrieve. Can be specified multiple times.
             Default is 'summary' and 'cif'."""),
     )
+    parser.add_argument(
+        "--gzip-files",
+        action="store_true",
+        help="Whether to gzip the downloaded files. Excludes summary files, they are always uncompressed.",
+    )
     parser.add_argument(
         "--max-parallel-downloads",
         type=int,
@@ -561,6 +603,33 @@ def _add_filter_subcommands(subparsers: argparse._SubParsersAction):
     _add_filter_ss_parser(subsubparsers)
+def _add_convert_subcommands(subparsers: argparse._SubParsersAction):
+    """Add convert command."""
+    parser = subparsers.add_parser(
+        "convert", help="Convert structure files between formats", formatter_class=ArgumentDefaultsRichHelpFormatter
+    )
+    parser.add_argument(
+        "input_dir",
+        type=Path,
+        help=f"Directory with structure files. Supported extensions are {valid_structure_file_extensions}",
+    )
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        help=dedent("""\
+            Directory to write converted structure files. If not given, files are written to `input_dir`.
+        """),
+    )
+    parser.add_argument(
+        "--format",
+        type=str,
+        choices=("cif",),
+        default="cif",
+        help="Output format to convert to.",
+    )
+    _add_copy_method_arguments(parser)
 def _add_mcp_command(subparsers: argparse._SubParsersAction):
     """Add MCP command."""
@@ -580,38 +649,6 @@ def _add_mcp_command(subparsers: argparse._SubParsersAction):
     parser.add_argument("--port", default=8000, type=int, help="Port to bind the server to")
-def _add_copy_method_arguments(parser):
-    parser.add_argument(
-        "--copy-method",
-        type=str,
-        choices=copy_methods,
-        default="hardlink",
-        help=dedent("""\
-            How to make target file be same file as source file.
-            By default uses hardlinks to save disk space.
-            Note that hardlinks only work within the same filesystem and are harder to track.
-            If you want to track cached files easily then use 'symlink'.
-            On Windows you need developer mode or admin privileges to create symlinks.
-        """),
-    )
-def _add_cacher_arguments(parser: argparse.ArgumentParser):
-    """Add cacher arguments to parser."""
-    parser.add_argument(
-        "--no-cache",
-        action="store_true",
-        help="Disable caching of files to central location.",
-    )
-    parser.add_argument(
-        "--cache-dir",
-        type=Path,
-        default=user_cache_root_dir(),
-        help="Directory to use as cache for files.",
-    )
-    _add_copy_method_arguments(parser)
 def make_parser() -> argparse.ArgumentParser:
     parser = argparse.ArgumentParser(
         description="Protein Quest CLI", prog="protein-quest", formatter_class=ArgumentDefaultsRichHelpFormatter
@@ -624,27 +661,12 @@ def make_parser() -> argparse.ArgumentParser:
     _add_search_subcommands(subparsers)
     _add_retrieve_subcommands(subparsers)
     _add_filter_subcommands(subparsers)
+    _add_convert_subcommands(subparsers)
     _add_mcp_command(subparsers)
     return parser
-def main():
-    """Main entry point for the CLI."""
-    parser = make_parser()
-    args = parser.parse_args()
-    logging.basicConfig(level=args.log_level, handlers=[RichHandler(show_level=False)])
-    # Dispatch table to reduce complexity
-    cmd = args.command
-    sub = getattr(args, f"{cmd}_cmd", None)
-    handler = HANDLERS.get((cmd, sub))
-    if handler is None:
-        msg = f"Unknown command: {cmd} {sub}"
-        raise SystemExit(msg)
-    handler(args)
 def _handle_search_uniprot(args):
     taxon_id = args.taxon_id
     reviewed = args.reviewed
@@ -798,6 +820,7 @@ def _handle_retrieve_alphafold(args):
     alphafold_csv = args.alphafold_csv
     max_parallel_downloads = args.max_parallel_downloads
     cacher = _initialize_cacher(args)
+    gzip_files = args.gzip_files
     if what_formats is None:
         what_formats = {"summary", "cif"}
@@ -808,7 +831,12 @@ def _handle_retrieve_alphafold(args):
     validated_what: set[DownloadableFormat] = structure(what_formats, set[DownloadableFormat])
     rprint(f"Retrieving {len(af_ids)} AlphaFold entries with formats {validated_what}")
     afs = af_fetch(
-        af_ids, download_dir, what=validated_what, max_parallel_downloads=max_parallel_downloads, cacher=cacher
+        af_ids,
+        download_dir,
+        what=validated_what,
+        max_parallel_downloads=max_parallel_downloads,
+        cacher=cacher,
+        gzip_files=gzip_files,
     )
     total_nr_files = sum(af.nr_of_files() for af in afs)
     rprint(f"Retrieved {total_nr_files} AlphaFold files and {len(afs)} summaries, written to {download_dir}")
@@ -1017,24 +1045,24 @@ def _handle_mcp(args):
         mcp.run(transport=args.transport, host=args.host, port=args.port)
-HANDLERS: dict[tuple[str, str | None], Callable] = {
-    ("search", "uniprot"): _handle_search_uniprot,
-    ("search", "pdbe"): _handle_search_pdbe,
-    ("search", "alphafold"): _handle_search_alphafold,
-    ("search", "emdb"): _handle_search_emdb,
-    ("search", "go"): _handle_search_go,
-    ("search", "taxonomy"): _handle_search_taxonomy,
-    ("search", "interaction-partners"): _handle_search_interaction_partners,
-    ("search", "complexes"): _handle_search_complexes,
-    ("retrieve", "pdbe"): _handle_retrieve_pdbe,
-    ("retrieve", "alphafold"): _handle_retrieve_alphafold,
-    ("retrieve", "emdb"): _handle_retrieve_emdb,
-    ("filter", "confidence"): _handle_filter_confidence,
-    ("filter", "chain"): _handle_filter_chain,
-    ("filter", "residue"): _handle_filter_residue,
-    ("filter", "secondary-structure"): _handle_filter_ss,
-    ("mcp", None): _handle_mcp,
-}
+def _handle_convert(args):
+    input_dir = structure(args.input_dir, Path)
+    output_dir = input_dir if args.output_dir is None else structure(args.output_dir, Path)
+    copy_method: CopyMethod = structure(args.copy_method, CopyMethod)  # pyright: ignore[reportArgumentType]
+    input_files = sorted(glob_structure_files(input_dir))
+    rprint(f"Converting {len(input_files)} files in {input_dir} directory to cif format.")
+    for _ in tqdm(
+        convert_to_cif_files(
+            input_files,
+            output_dir,
+            copy_method=copy_method,
+        ),
+        total=len(input_files),
+        unit="file",
+    ):
+        pass
+    rprint(f"Converted {len(input_files)} files into {output_dir}.")
 def _read_lines(file: TextIOWrapper) -> list[str]:
@@ -1118,3 +1146,40 @@ def _write_complexes_csv(complexes: list[ComplexPortalEntry], output_csv: TextIO
                 members_str,
             ]
         )
+HANDLERS: dict[tuple[str, str | None], Callable] = {
+    ("search", "uniprot"): _handle_search_uniprot,
+    ("search", "pdbe"): _handle_search_pdbe,
+    ("search", "alphafold"): _handle_search_alphafold,
+    ("search", "emdb"): _handle_search_emdb,
+    ("search", "go"): _handle_search_go,
+    ("search", "taxonomy"): _handle_search_taxonomy,
+    ("search", "interaction-partners"): _handle_search_interaction_partners,
+    ("search", "complexes"): _handle_search_complexes,
+    ("retrieve", "pdbe"): _handle_retrieve_pdbe,
+    ("retrieve", "alphafold"): _handle_retrieve_alphafold,
+    ("retrieve", "emdb"): _handle_retrieve_emdb,
+    ("filter", "confidence"): _handle_filter_confidence,
+    ("filter", "chain"): _handle_filter_chain,
+    ("filter", "residue"): _handle_filter_residue,
+    ("filter", "secondary-structure"): _handle_filter_ss,
+    ("mcp", None): _handle_mcp,
+    ("convert", None): _handle_convert,
+}
+def main():
+    """Main entry point for the CLI."""
+    parser = make_parser()
+    args = parser.parse_args()
+    logging.basicConfig(level=args.log_level, handlers=[RichHandler(show_level=False)])
+    # Dispatch table to reduce complexity
+    cmd = args.command
+    sub = getattr(args, f"{cmd}_cmd", None)
+    handler = HANDLERS.get((cmd, sub))
+    if handler is None:
+        msg = f"Unknown command: {cmd} {sub}"
+        raise SystemExit(msg)
+    handler(args)

{protein_quest-0.5.0 → protein_quest-0.6.0}/src/protein_quest/filters.py RENAMED Viewed

@@ -11,10 +11,7 @@ from distributed.deploy.cluster import Cluster
 from tqdm.auto import tqdm
 from protein_quest.parallel import configure_dask_scheduler, dask_map_with_progress
-from protein_quest.pdbe.io import (
-    nr_residues_in_chain,
-    write_single_chain_pdb_file,
-)
+from protein_quest.structure import nr_residues_in_chain, write_single_chain_structure_file
 from protein_quest.utils import CopyMethod, copyfile
 logger = logging.getLogger(__name__)
@@ -38,7 +35,7 @@ def filter_file_on_chain(
     input_file, chain_id = file_and_chain
     logger.debug("Filtering %s on chain %s", input_file, chain_id)
     try:
-        output_file = write_single_chain_pdb_file(
+        output_file = write_single_chain_structure_file(
             input_file, chain_id, output_dir, out_chain=out_chain, copy_method=copy_method
         )
         return ChainFilterStatistics(

protein-quest 0.5.0__tar.gz → 0.6.0__tar.gz

Potentially problematic release.

protein-quest 0.5.0tar.gz → 0.6.0tar.gz