PyPI - cool-seq-tool - Versions diffs - 0.4.0.dev2__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

cool-seq-tool 0.4.0.dev2py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

cool_seq_tool/__init__.py +1 -3
cool_seq_tool/api.py +1 -2
cool_seq_tool/app.py +42 -24
cool_seq_tool/handlers/__init__.py +1 -0
cool_seq_tool/handlers/seqrepo_access.py +13 -15
cool_seq_tool/mappers/__init__.py +1 -0
cool_seq_tool/mappers/alignment.py +5 -6
cool_seq_tool/mappers/exon_genomic_coords.py +232 -68
cool_seq_tool/mappers/mane_transcript.py +84 -86
cool_seq_tool/resources/__init__.py +1 -0
cool_seq_tool/resources/data_files.py +93 -0
cool_seq_tool/resources/status.py +151 -0
cool_seq_tool/routers/__init__.py +1 -0
cool_seq_tool/routers/default.py +1 -0
cool_seq_tool/routers/mane.py +4 -4
cool_seq_tool/routers/mappings.py +2 -2
cool_seq_tool/schemas.py +83 -37
cool_seq_tool/sources/__init__.py +1 -0
cool_seq_tool/sources/mane_transcript_mappings.py +14 -7
cool_seq_tool/sources/transcript_mappings.py +41 -32
cool_seq_tool/sources/uta_database.py +120 -69
cool_seq_tool/utils.py +2 -2
cool_seq_tool/version.py +2 -1
{cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/LICENSE +1 -1
{cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/METADATA +15 -8
cool_seq_tool-0.4.1.dist-info/RECORD +29 -0
{cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/WHEEL +1 -1
cool_seq_tool/data/__init__.py +0 -2
cool_seq_tool/data/data_downloads.py +0 -89
cool_seq_tool/paths.py +0 -28
cool_seq_tool-0.4.0.dev2.dist-info/RECORD +0 -29
/cool_seq_tool/{data → resources}/transcript_mapping.tsv +0 -0
{cool_seq_tool-0.4.0.dev2.dist-info → cool_seq_tool-0.4.1.dist-info}/top_level.txt +0 -0

cool_seq_tool/mappers/mane_transcript.py CHANGED Viewed

@@ -11,10 +11,11 @@ Steps:
 In addition to a mapper utility class, this module also defines several vocabulary
 constraints and data models for coordinate representation.
 """
 import logging
 import math
 from enum import Enum
-from typing import Dict, List, Optional, Set, Tuple, Union
+from typing import Literal
 import polars as pl
 from pydantic import BaseModel
@@ -50,10 +51,10 @@ class EndAnnotationLayer(str, Enum):
 class DataRepresentation(BaseModel):
     """Define object model for final output representation"""
-    gene: Optional[str] = None
+    gene: str | None = None
     refseq: str
-    ensembl: Optional[str] = None
-    pos: Tuple[int, int]
+    ensembl: str | None = None
+    pos: tuple[int, int]
     strand: Strand
     status: TranscriptPriority
@@ -63,14 +64,14 @@ class CdnaRepresentation(DataRepresentation):
     coding_start_site: int
     coding_end_site: int
-    alt_ac: Optional[str] = None
+    alt_ac: str | None = None
 class GenomicRepresentation(BaseModel):
     """Define object model for genomic representation"""
     refseq: str
-    pos: Tuple[int, int]
+    pos: tuple[int, int]
     status: TranscriptPriority
     alt_ac: str
@@ -105,7 +106,7 @@ class ManeTranscript:
         >>> import asyncio
         >>> result = asyncio.run(mane_mapper.g_to_grch38("NC_000001.11", 100, 200))
-        >>> result['ac']
+        >>> result["ac"]
         'NC_000001.11'
         See the :ref:`Usage section <async_note>` for more information.
@@ -135,7 +136,7 @@ class ManeTranscript:
         return pos_mod_3
     @staticmethod
-    def _p_to_c_pos(start: int, end: int) -> Tuple[int, int]:
+    def _p_to_c_pos(start: int, end: int) -> tuple[int, int]:
         """Return cDNA position given a protein position.
         :param start: Start protein position. Inter-residue coordinates
@@ -148,7 +149,7 @@ class ManeTranscript:
     async def _p_to_c(
         self, ac: str, start_pos: int, end_pos: int
-    ) -> Optional[Tuple[str, Tuple[int, int]]]:
+    ) -> tuple[str, tuple[int, int]] | None:
         """Convert protein (p.) annotation to cDNA (c.) annotation.
         :param ac: Protein accession
@@ -176,7 +177,7 @@ class ManeTranscript:
         pos = self._p_to_c_pos(start_pos, end_pos)
         return ac, pos
-    async def _c_to_g(self, ac: str, pos: Tuple[int, int]) -> Optional[Dict]:
+    async def _c_to_g(self, ac: str, pos: tuple[int, int]) -> dict | None:
         """Get g. annotation from c. annotation.
         :param ac: cDNA accession
@@ -217,13 +218,12 @@ class ManeTranscript:
     async def _get_and_validate_genomic_tx_data(
         self,
         tx_ac: str,
-        pos: Tuple[int, int],
-        annotation_layer: Union[
-            AnnotationLayer.CDNA, AnnotationLayer.GENOMIC
-        ] = AnnotationLayer.CDNA,
-        coding_start_site: Optional[int] = None,
-        alt_ac: Optional[str] = None,
-    ) -> Optional[Dict]:
+        pos: tuple[int, int],
+        annotation_layer: Literal[AnnotationLayer.CDNA]
+        | Literal[AnnotationLayer.GENOMIC] = AnnotationLayer.CDNA,
+        coding_start_site: int | None = None,
+        alt_ac: str | None = None,
+    ) -> dict | None:
         """Get and validate genomic_tx_data
         :param tx_ac: Accession on c. coordinate
@@ -266,14 +266,14 @@ class ManeTranscript:
     @staticmethod
     def _get_c_data(
-        cds_start_end: Tuple[int, int],
-        c_pos_change: Tuple[int, int],
+        cds_start_end: tuple[int, int],
+        c_pos_change: tuple[int, int],
         strand: Strand,
         status: TranscriptPriority,
         refseq_c_ac: str,
-        gene: Optional[str] = None,
-        ensembl_c_ac: Optional[str] = None,
-        alt_ac: Optional[str] = None,
+        gene: str | None = None,
+        ensembl_c_ac: str | None = None,
+        alt_ac: str | None = None,
     ) -> CdnaRepresentation:
         """Return transcript data on c. coordinate.
@@ -311,7 +311,7 @@ class ManeTranscript:
             alt_ac=alt_ac,
         )
-    def _c_to_p_pos(self, c_pos: Tuple[int, int]) -> Tuple[int, int]:
+    def _c_to_p_pos(self, c_pos: tuple[int, int]) -> tuple[int, int]:
         """Get protein position from cdna position
         :param c_pos: cdna position. inter-residue coordinates
@@ -325,7 +325,7 @@ class ManeTranscript:
         return start, end
     def _get_mane_p(
-        self, mane_data: Dict, mane_c_pos_range: Tuple[int, int]
+        self, mane_data: dict, mane_c_pos_range: tuple[int, int]
     ) -> DataRepresentation:
         """Translate MANE Transcript c. annotation to p. annotation
@@ -349,13 +349,13 @@ class ManeTranscript:
     async def _g_to_c(
         self,
-        g: Dict,
+        g: dict,
         refseq_c_ac: str,
         status: TranscriptPriority,
-        ensembl_c_ac: Optional[str] = None,
-        alt_ac: Optional[str] = None,
+        ensembl_c_ac: str | None = None,
+        alt_ac: str | None = None,
         found_result: bool = False,
-    ) -> Optional[CdnaRepresentation]:
+    ) -> CdnaRepresentation | None:
         """Get transcript c. annotation data from g. annotation.
         :param g: Genomic data
@@ -459,9 +459,9 @@ class ManeTranscript:
         coding_start_site: int,
         start_pos: int,
         end_pos: int,
-        mane_transcript: Union[
-            DataRepresentation, CdnaRepresentation, GenomicRepresentation
-        ],
+        mane_transcript: DataRepresentation
+        | CdnaRepresentation
+        | GenomicRepresentation,
         expected_ref: str,
         anno: AnnotationLayer,
         residue_mode: ResidueMode,
@@ -522,7 +522,7 @@ class ManeTranscript:
         return True
     def _validate_index(
-        self, ac: str, pos: Tuple[int, int], coding_start_site: int
+        self, ac: str, pos: tuple[int, int], coding_start_site: int
     ) -> bool:
         """Validate that positions actually exist on accession
@@ -533,13 +533,13 @@ class ManeTranscript:
         """
         start_pos = pos[0] + coding_start_site
         end_pos = pos[1] + coding_start_site
-        if self.seqrepo_access.get_reference_sequence(
-            ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
-        )[0]:
-            return True
-        return False
+        return bool(
+            self.seqrepo_access.get_reference_sequence(
+                ac, start=start_pos, end=end_pos, residue_mode=ResidueMode.INTER_RESIDUE
+            )[0]
+        )
-    def _get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> List:
+    def _get_prioritized_transcripts_from_gene(self, df: pl.DataFrame) -> list:
         """Sort and filter transcripts from gene to get priority list
         :param df: Data frame containing transcripts from gene
@@ -550,7 +550,7 @@ class ManeTranscript:
             most recent version of a transcript associated with an assembly will be kept
         """
         copy_df = df.clone()
-        copy_df = copy_df.drop(columns="alt_ac").unique()
+        copy_df = copy_df.drop("alt_ac").unique()
         copy_df = copy_df.with_columns(
             [
                 pl.col("tx_ac")
@@ -590,15 +590,13 @@ class ManeTranscript:
         start_pos: int,
         end_pos: int,
         start_annotation_layer: AnnotationLayer,
-        gene: Optional[str] = None,
-        ref: Optional[str] = None,
+        gene: str | None = None,
+        ref: str | None = None,
         residue_mode: ResidueMode = ResidueMode.RESIDUE,
-        mane_transcripts: Optional[Set] = None,
-        alt_ac: Optional[str] = None,
-        end_annotation_layer: Optional[EndAnnotationLayer] = None,
-    ) -> Optional[
-        Union[DataRepresentation, CdnaRepresentation, ProteinAndCdnaRepresentation]
-    ]:
+        mane_transcripts: set | None = None,
+        alt_ac: str | None = None,
+        end_annotation_layer: EndAnnotationLayer | None = None,
+    ) -> DataRepresentation | CdnaRepresentation | ProteinAndCdnaRepresentation | None:
         """Get longest compatible transcript from a gene. See the documentation for
         the :ref:`transcript compatibility policy <transcript_compatibility>` for more
         information.
@@ -613,14 +611,16 @@ class ManeTranscript:
         ...     "NM_004333.6",
         ...     "ENST00000644969.2",
         ... }
-        >>> result = asyncio.run(mane_mapper.get_longest_compatible_transcript(
-        ...     599,
-        ...     599,
-        ...     gene="BRAF",
-        ...     start_annotation_layer=AnnotationLayer.PROTEIN,
-        ...     residue_mode=ResidueMode.INTER_RESIDUE,
-        ...     mane_transcripts=mane_transcripts,
-        ... ))
+        >>> result = asyncio.run(
+        ...     mane_mapper.get_longest_compatible_transcript(
+        ...         599,
+        ...         599,
+        ...         gene="BRAF",
+        ...         start_annotation_layer=AnnotationLayer.PROTEIN,
+        ...         residue_mode=ResidueMode.INTER_RESIDUE,
+        ...         mane_transcripts=mane_transcripts,
+        ...     )
+        ... )
         >>> result.refseq
         'NP_001365396.1'
@@ -645,9 +645,9 @@ class ManeTranscript:
         """
         def _get_protein_rep(
-            gene: Optional[str],
+            gene: str | None,
             pro_ac: str,
-            lcr_c_data_pos: Tuple[int, int],
+            lcr_c_data_pos: tuple[int, int],
             strand: Strand,
             status: TranscriptPriority,
         ) -> DataRepresentation:
@@ -731,7 +731,7 @@ class ManeTranscript:
             # Get prioritized transcript data for gene
             # grch38 -> c
-            lcr_c_data: Optional[CdnaRepresentation] = await self._g_to_c(
+            lcr_c_data: CdnaRepresentation | None = await self._g_to_c(
                 g=g,
                 refseq_c_ac=tx_ac,
                 status=TranscriptPriority.LONGEST_COMPATIBLE_REMAINING,
@@ -859,25 +859,26 @@ class ManeTranscript:
         start_pos: int,
         end_pos: int,
         start_annotation_layer: AnnotationLayer,
-        gene: Optional[str] = None,
-        ref: Optional[str] = None,
+        gene: str | None = None,
+        ref: str | None = None,
         try_longest_compatible: bool = False,
-        residue_mode: Union[
-            ResidueMode.RESIDUE, ResidueMode.INTER_RESIDUE
-        ] = ResidueMode.RESIDUE,
-    ) -> Optional[Union[DataRepresentation, CdnaRepresentation]]:
+        residue_mode: Literal[ResidueMode.RESIDUE]
+        | Literal[ResidueMode.INTER_RESIDUE] = ResidueMode.RESIDUE,
+    ) -> DataRepresentation | CdnaRepresentation | None:
         """Return MANE transcript.
         >>> from cool_seq_tool.app import CoolSeqTool
         >>> from cool_seq_tool.schemas import AnnotationLayer, ResidueMode
         >>> import asyncio
         >>> mane_mapper = CoolSeqTool().mane_transcript
-        >>> result = asyncio.run(mane_mapper.get_mane_transcript(
-        ...     "NP_004324.2",
-        ...     599,
-        ...     AnnotationLayer.PROTEIN,
-        ...     residue_mode=ResidueMode.INTER_RESIDUE,
-        ... ))
+        >>> result = asyncio.run(
+        ...     mane_mapper.get_mane_transcript(
+        ...         "NP_004324.2",
+        ...         599,
+        ...         AnnotationLayer.PROTEIN,
+        ...         residue_mode=ResidueMode.INTER_RESIDUE,
+        ...     )
+        ... )
         >>> result.gene, result.refseq, result.status
         ('BRAF', 'NP_004324.2', <TranscriptPriority.MANE_SELECT: 'mane_select'>)
@@ -930,7 +931,7 @@ class ManeTranscript:
                     current_mane_data["RefSeq_nuc"],
                     current_mane_data["Ensembl_nuc"],
                 }
-                mane: Optional[CdnaRepresentation] = await self._g_to_c(
+                mane: CdnaRepresentation | None = await self._g_to_c(
                     g=g,
                     refseq_c_ac=current_mane_data["RefSeq_nuc"],
                     status=TranscriptPriority(
@@ -1001,9 +1002,7 @@ class ManeTranscript:
         logger.warning("Annotation layer not supported: %s", start_annotation_layer)
         return None
-    async def g_to_grch38(
-        self, ac: str, start_pos: int, end_pos: int
-    ) -> Optional[Dict]:
+    async def g_to_grch38(self, ac: str, start_pos: int, end_pos: int) -> dict | None:
         """Return genomic coordinate on GRCh38 when not given gene context.
         :param ac: Genomic accession
@@ -1055,8 +1054,8 @@ class ManeTranscript:
     @staticmethod
     def get_mane_c_pos_change(
-        mane_tx_genomic_data: Dict, coding_start_site: int
-    ) -> Tuple[int, int]:
+        mane_tx_genomic_data: dict, coding_start_site: int
+    ) -> tuple[int, int]:
         """Get mane c position change
         :param mane_tx_genomic_data: MANE transcript and genomic data
@@ -1080,9 +1079,9 @@ class ManeTranscript:
         ac: str,
         start_pos: int,
         end_pos: int,
-        gene: Optional[str] = None,
+        gene: str | None = None,
         residue_mode: ResidueMode = ResidueMode.RESIDUE,
-    ) -> Optional[Union[GenomicRepresentation, CdnaRepresentation]]:
+    ) -> GenomicRepresentation | CdnaRepresentation | None:
         """Return MANE Transcript on the c. coordinate.
         If an arg for ``gene`` is provided, lifts to GRCh38, then gets MANE cDNA
@@ -1091,12 +1090,11 @@ class ManeTranscript:
         >>> import asyncio
         >>> from cool_seq_tool.app import CoolSeqTool
         >>> cst = CoolSeqTool()
-        >>> result = asyncio.run(cst.mane_transcript.g_to_mane_c(
-        ...     "NC_000007.13",
-        ...     55259515,
-        ...     None,
-        ...     gene="EGFR"
-        ... ))
+        >>> result = asyncio.run(
+        ...     cst.mane_transcript.g_to_mane_c(
+        ...         "NC_000007.13", 55259515, None, gene="EGFR"
+        ...     )
+        ... )
         >>> type(result)
         <class 'cool_seq_tool.mappers.mane_transcript.CdnaRepresentation'>
         >>> result.status
@@ -1198,10 +1196,10 @@ class ManeTranscript:
         alt_ac: str,
         start_pos: int,
         end_pos: int,
-        gene: Optional[str] = None,
+        gene: str | None = None,
         residue_mode: ResidueMode = ResidueMode.RESIDUE,
         try_longest_compatible: bool = False,
-    ) -> Optional[Dict]:
+    ) -> dict | None:
         """Given GRCh38 genomic representation, return protein representation.
         Will try MANE Select and then MANE Plus Clinical. If neither is found and

cool_seq_tool/resources/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Provide tools for acquiring and managing Cool-Seq-Tool data resources."""

cool_seq_tool/resources/data_files.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""Fetch data files regarding transcript mapping and annotation."""
+import logging
+from enum import Enum
+from importlib import resources
+from os import environ
+from pathlib import Path
+from wags_tails import NcbiLrgRefSeqGeneData, NcbiManeSummaryData
+_logger = logging.getLogger(__name__)
+class DataFile(str, Enum):
+    """Constrain legal values for file resource fetching in :py:meth:`get_data_file() <cool_seq_tool.resources.data_files.get_data_file>`."""
+    TRANSCRIPT_MAPPINGS = "transcript_mappings"
+    MANE_SUMMARY = "mane_summary"
+    LRG_REFSEQGENE = "lrg_refseqgene"
+    def lower(self) -> str:
+        """Return lower-cased value
+        :return: lower case string
+        """
+        return self.value.lower()
+_resource_acquisition_params = {
+    DataFile.TRANSCRIPT_MAPPINGS: (
+        "TRANSCRIPT_MAPPINGS_PATH",
+        lambda _: resources.files(__package__) / "transcript_mapping.tsv",
+    ),
+    DataFile.MANE_SUMMARY: (
+        "MANE_SUMMARY_PATH",
+        lambda from_local: NcbiManeSummaryData(silent=True).get_latest(
+            from_local=from_local
+        )[0],
+    ),
+    DataFile.LRG_REFSEQGENE: (
+        "LRG_REFSEQGENE_PATH",
+        lambda from_local: NcbiLrgRefSeqGeneData(silent=True).get_latest(
+            from_local=from_local
+        )[0],
+    ),
+}
+def get_data_file(resource: DataFile, from_local: bool = False) -> Path:
+    """Acquire Cool-Seq-Tool file dependency.
+    Each resource can be defined using an environment variable:
+    * ``Resource.TRANSCRIPT_MAPPINGS`` -> ``TRANSCRIPT_MAPPINGS_PATH``
+    * ``Resource.MANE_SUMMARY`` -> ``MANE_SUMMARY_PATH``
+    * ``Resource.LRG_REFSEQGENE`` -> ``LRG_REFSEQGENE_PATH``
+    Otherwise, this function falls back on default expected locations:
+    * ``transcript_mappings.tsv`` is bundled with this library.
+    * LRG RefseqGene and MANE summary files are acquired from NCBI using the `wags-tails <https://wags-tails.readthedocs.io/stable/>`_ if unavailable locally, or out of date.
+    :param resource: resource to fetch
+    :param from_local: if ``True``, don't check for or acquire latest version -- just
+        provide most recent locally available file and raise FileNotFoundError otherwise
+    :return: path to file. Consuming functions can assume that it exists and is a file.
+    :raise FileNotFoundError: if file location configured by env var doesn't exist
+    :raise ValueError: if file location configured by env var isn't a file
+    """
+    params = _resource_acquisition_params[resource]
+    configured_path = environ.get(params[0])
+    if configured_path:
+        _logger.debug(
+            "Acquiring %s via env var %s:%s", resource, params[0], configured_path
+        )
+        path = Path(configured_path)
+        loc_descr = (
+            "the default file bundled with Cool-Seq-Tool"
+            if resource == DataFile.TRANSCRIPT_MAPPINGS
+            else "the the default file pattern and possibly acquire from source via the `wags-tails` package"
+        )
+        msg = f'No {params[0].replace("_", " ").title()} file exists at path {configured_path} defined under env var {params[0]}. Either unset to use {loc_descr}, or ensure that it is available at this location. See the "Environment configuration" section under the Usage page within the documentation for more: https://coolseqtool.readthedocs.io/stable/usage.html#environment-configuration'
+        if not path.exists():
+            raise FileNotFoundError(msg)
+        if not path.is_file():
+            raise ValueError(msg)
+    else:
+        _logger.debug("Acquiring %s from default location/method.", resource)
+        # param[1] is the resource fetcher function -- use `from_local` param to
+        # optionally avoid unnecessary fetches
+        path = params[1](from_local)
+    _logger.debug("Acquired %s at %s", resource, path)
+    return path

cool_seq_tool/resources/status.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Enable quick status check of Cool-Seq-Tool resources."""
+import logging
+from collections import namedtuple
+from pathlib import Path
+from agct._core import ChainfileError
+from asyncpg import InvalidCatalogNameError, UndefinedTableError
+from biocommons.seqrepo import SeqRepo
+from cool_seq_tool.handlers.seqrepo_access import SEQREPO_ROOT_DIR, SeqRepoAccess
+from cool_seq_tool.resources.data_files import DataFile, get_data_file
+from cool_seq_tool.sources.uta_database import UTA_DB_URL, UtaDatabase, get_liftover
+_logger = logging.getLogger(__name__)
+ResourceStatus = namedtuple(
+    "ResourceStatus",
+    (
+        "uta",
+        "seqrepo",
+        DataFile.TRANSCRIPT_MAPPINGS.lower(),
+        DataFile.MANE_SUMMARY.lower(),
+        DataFile.LRG_REFSEQGENE.lower(),
+        "liftover",
+    ),
+)
+async def check_status(
+    transcript_file_path: Path | None = None,
+    lrg_refseqgene_path: Path | None = None,
+    mane_data_path: Path | None = None,
+    db_url: str = UTA_DB_URL,
+    sr: SeqRepo | None = None,
+    chain_file_37_to_38: str | None = None,
+    chain_file_38_to_37: str | None = None,
+) -> ResourceStatus:
+    """Perform basic status checks on availability of required data resources.
+    Arguments are intended to mirror arguments to :py:meth:`cool_seq_tool.app.CoolSeqTool.__init__`.
+    Additional arguments are available for testing paths to specific chainfiles (same
+    signature as :py:meth:`cool_seq_tool.sources.uta_database.UtaDatabase.__init__`).
+    Note that chainfile failures also entail UTA initialization failure; this status is
+    reported separately to enable more precise debugging.
+    >>> from cool_seq_tool.resources.status import check_status
+    >>> await check_status()
+    ResourceStatus(uta=True, seqrepo=True, transcript_mappings=True, mane_summary=True, lrg_refseqgene=True, liftover=True)
+    :param transcript_file_path: The path to ``transcript_mapping.tsv``
+    :param lrg_refseqgene_path: The path to the LRG_RefSeqGene file
+    :param mane_data_path: Path to RefSeq MANE summary data
+    :param db_url: PostgreSQL connection URL
+        Format: ``driver://user:password@host/database/schema``
+    :param chain_file_37_to_38: Optional path to chain file for 37 to 38 assembly. This
+        is used for ``agct``. If this is not provided, will check to see if
+        ``LIFTOVER_CHAIN_37_TO_38`` env var is set. If neither is provided, will allow
+        ``agct`` to download a chain file from UCSC
+    :param chain_file_38_to_37: Optional path to chain file for 38 to 37 assembly. This
+        is used for ``agct``. If this is not provided, will check to see if
+        ``LIFTOVER_CHAIN_38_TO_37`` env var is set. If neither is provided, will allow
+        ``agct`` to download a chain file from UCSC
+    :return: boolean description of availability of each resource, given current
+        environment configurations
+    """
+    file_path_params = {
+        DataFile.TRANSCRIPT_MAPPINGS.lower(): transcript_file_path,
+        DataFile.LRG_REFSEQGENE.lower(): lrg_refseqgene_path,
+        DataFile.MANE_SUMMARY.lower(): mane_data_path,
+    }
+    status = {
+        DataFile.TRANSCRIPT_MAPPINGS.lower(): False,
+        DataFile.LRG_REFSEQGENE.lower(): False,
+        DataFile.MANE_SUMMARY.lower(): False,
+        "liftover": False,
+        "uta": False,
+        "seqrepo": False,
+    }
+    for r in list(DataFile):
+        name_lower = r.lower()
+        declared_path = file_path_params[name_lower]
+        if declared_path and declared_path.exists() and declared_path.is_file():
+            status[name_lower] = True
+            continue
+        try:
+            get_data_file(r)
+        except FileNotFoundError:
+            _logger.error(
+                "%s does not exist at configured location %s", name_lower, declared_path
+            )
+        except ValueError:
+            _logger.error(
+                "%s configured at %s is not a valid file.", name_lower, declared_path
+            )
+        except Exception as e:
+            _logger.critical(
+                "Encountered unexpected error fetching %s: %s", name_lower, e
+            )
+        else:
+            status[name_lower] = True
+    try:
+        get_liftover(chain_file_37_to_38, chain_file_38_to_37)
+    except (FileNotFoundError, ChainfileError) as e:
+        _logger.error("agct converter setup failed: %s", e)
+    except Exception as e:
+        _logger.critical("Encountered unexpected error setting up agct: %s", e)
+    else:
+        status["liftover"] = True
+    try:
+        await UtaDatabase.create(db_url)
+    except (OSError, InvalidCatalogNameError, UndefinedTableError) as e:
+        _logger.error(
+            "Encountered error instantiating UTA at URI %s: %s", UTA_DB_URL, e
+        )
+    except Exception as e:
+        _logger.critical(
+            "Encountered unexpected error instantiating UTA from URI %s: %s",
+            UTA_DB_URL,
+            e,
+        )
+    else:
+        status["uta"] = True
+    try:
+        if not sr:
+            sr = SeqRepo(root_dir=SEQREPO_ROOT_DIR)
+        sra = SeqRepoAccess(sr)
+        sra.sr["NC_000001.11"][1000:1001]
+    except OSError as e:
+        _logger.error("Encountered error while instantiating SeqRepo: %s", e)
+    except KeyError:
+        _logger.error("SeqRepo data fetch test failed -- is it populated?")
+    except Exception as e:
+        _logger.critical("Encountered unexpected error setting up SeqRepo: %s", e)
+    else:
+        status["seqrepo"] = True
+    structured_status = ResourceStatus(**status)
+    if all(status.values()):
+        _logger.info("Cool-Seq-Tool resource status passed")
+    else:
+        _logger.error(
+            "Cool-Seq-Tool resource check failed. Result: %s", structured_status
+        )
+    return structured_status

cool_seq_tool/routers/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Module for routers"""
 from enum import Enum
 from cool_seq_tool.app import CoolSeqTool

cool_seq_tool/routers/default.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Module containing default routes"""
 import logging
 import os
 import tempfile

cool_seq_tool/routers/mane.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """Module containing routes related to MANE data"""
 import logging
-from typing import Optional
 from fastapi import APIRouter, Query
@@ -45,11 +45,11 @@ async def get_mane_data(
     start_annotation_layer: AnnotationLayer = Query(
         ..., description="Starting annotation layer for query"
     ),
-    end_pos: Optional[int] = Query(
+    end_pos: int | None = Query(
         None, description="End position. If not set, will set to `start_pos`."
     ),
-    gene: Optional[str] = Query(None, description="HGNC gene symbol"),
-    ref: Optional[str] = Query(None, description=ref_descr),
+    gene: str | None = Query(None, description="HGNC gene symbol"),
+    ref: str | None = Query(None, description=ref_descr),
     try_longest_compatible: bool = Query(
         True, description=try_longest_compatible_descr
     ),

cool-seq-tool 0.4.0.dev2__py3-none-any.whl → 0.4.1__py3-none-any.whl

cool-seq-tool 0.4.0.dev2py3-none-any.whl → 0.4.1py3-none-any.whl